]> git.saurik.com Git - apple/xnu.git/blob - osfmk/i386/fpu.c
xnu-2782.20.48.tar.gz
[apple/xnu.git] / osfmk / i386 / fpu.c
1 /*
2 * Copyright (c) 2000-2012 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28 /*
29 * @OSF_COPYRIGHT@
30 */
31 /*
32 * Mach Operating System
33 * Copyright (c) 1992-1990 Carnegie Mellon University
34 * All Rights Reserved.
35 *
36 * Permission to use, copy, modify and distribute this software and its
37 * documentation is hereby granted, provided that both the copyright
38 * notice and this permission notice appear in all copies of the
39 * software, derivative works or modified versions, and any portions
40 * thereof, and that both notices appear in supporting documentation.
41 *
42 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
43 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
44 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
45 *
46 * Carnegie Mellon requests users of this software to return to
47 *
48 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
49 * School of Computer Science
50 * Carnegie Mellon University
51 * Pittsburgh PA 15213-3890
52 *
53 * any improvements or extensions that they make and grant Carnegie Mellon
54 * the rights to redistribute these changes.
55 */
56 /*
57 */
58
59
60 #include <mach/exception_types.h>
61 #include <mach/i386/thread_status.h>
62 #include <mach/i386/fp_reg.h>
63 #include <mach/branch_predicates.h>
64
65 #include <kern/mach_param.h>
66 #include <kern/processor.h>
67 #include <kern/thread.h>
68 #include <kern/zalloc.h>
69 #include <kern/misc_protos.h>
70 #include <kern/spl.h>
71 #include <kern/assert.h>
72
73 #include <libkern/OSAtomic.h>
74
75 #include <architecture/i386/pio.h>
76 #include <i386/cpuid.h>
77 #include <i386/fpu.h>
78 #include <i386/proc_reg.h>
79 #include <i386/misc_protos.h>
80 #include <i386/thread.h>
81 #include <i386/trap.h>
82
83 int fp_kind = FP_NO; /* not inited */
84 zone_t ifps_zone; /* zone for FPU save area */
85
86 #define ALIGNED(addr,size) (((uintptr_t)(addr)&((size)-1))==0)
87
88 /* Forward */
89
90 extern void fpinit(void);
91 extern void fp_save(
92 thread_t thr_act);
93 extern void fp_load(
94 thread_t thr_act);
95
96 static void configure_mxcsr_capability_mask(struct x86_avx_thread_state *fps);
97
98 struct x86_avx_thread_state initial_fp_state __attribute((aligned(64)));
99
100
101 /* Global MXCSR capability bitmask */
102 static unsigned int mxcsr_capability_mask;
103
104 #define fninit() \
105 __asm__ volatile("fninit")
106
107 #define fnstcw(control) \
108 __asm__("fnstcw %0" : "=m" (*(unsigned short *)(control)))
109
110 #define fldcw(control) \
111 __asm__ volatile("fldcw %0" : : "m" (*(unsigned short *) &(control)) )
112
113 #define fnclex() \
114 __asm__ volatile("fnclex")
115
116 #define fnsave(state) \
117 __asm__ volatile("fnsave %0" : "=m" (*state))
118
119 #define frstor(state) \
120 __asm__ volatile("frstor %0" : : "m" (state))
121
122 #define fwait() \
123 __asm__("fwait");
124
125 #define fxrstor(addr) __asm__ __volatile__("fxrstor %0" : : "m" (*(addr)))
126 #define fxsave(addr) __asm__ __volatile__("fxsave %0" : "=m" (*(addr)))
127
128 static uint32_t fp_register_state_size = 0;
129 static uint32_t fpu_YMM_present = FALSE;
130 static uint32_t cpuid_reevaluated = 0;
131
132 static void fpu_store_registers(void *, boolean_t);
133 static void fpu_load_registers(void *);
134
135 extern void xsave64o(void);
136 extern void xrstor64o(void);
137
138 #define XMASK ((uint32_t) (XFEM_X87 | XFEM_SSE | XFEM_YMM))
139
140 static inline void xsetbv(uint32_t mask_hi, uint32_t mask_lo) {
141 __asm__ __volatile__("xsetbv" :: "a"(mask_lo), "d"(mask_hi), "c" (XCR0));
142 }
143
144 static inline void xsave(struct x86_fx_thread_state *a) {
145 __asm__ __volatile__("xsave %0" :"=m" (*a) : "a"(XMASK), "d"(0));
146 }
147
148 static inline void xrstor(struct x86_fx_thread_state *a) {
149 __asm__ __volatile__("xrstor %0" :: "m" (*a), "a"(XMASK), "d"(0));
150 }
151
152 #if DEBUG
153 static inline unsigned short
154 fnstsw(void)
155 {
156 unsigned short status;
157 __asm__ volatile("fnstsw %0" : "=ma" (status));
158 return(status);
159 }
160 #endif
161
162 /*
163 * Configure the initial FPU state presented to new threads.
164 * Determine the MXCSR capability mask, which allows us to mask off any
165 * potentially unsafe "reserved" bits before restoring the FPU context.
166 * *Not* per-cpu, assumes symmetry.
167 */
168
169 static void
170 configure_mxcsr_capability_mask(struct x86_avx_thread_state *fps)
171 {
172 /* XSAVE requires a 64 byte aligned store */
173 assert(ALIGNED(fps, 64));
174 /* Clear, to prepare for the diagnostic FXSAVE */
175 bzero(fps, sizeof(*fps));
176
177 fpinit();
178 fpu_store_registers(fps, FALSE);
179
180 mxcsr_capability_mask = fps->fx_MXCSR_MASK;
181
182 /* Set default mask value if necessary */
183 if (mxcsr_capability_mask == 0)
184 mxcsr_capability_mask = 0xffbf;
185
186 /* Clear vector register store */
187 bzero(&fps->fx_XMM_reg[0][0], sizeof(fps->fx_XMM_reg));
188 bzero(&fps->x_YMMH_reg[0][0], sizeof(fps->x_YMMH_reg));
189
190 fps->fp_valid = TRUE;
191 fps->fp_save_layout = fpu_YMM_present ? XSAVE32: FXSAVE32;
192 fpu_load_registers(fps);
193
194 /* Poison values to trap unsafe usage */
195 fps->fp_valid = 0xFFFFFFFF;
196 fps->fp_save_layout = FP_UNUSED;
197
198 /* Re-enable FPU/SSE DNA exceptions */
199 set_ts();
200 }
201
202
203 /*
204 * Look for FPU and initialize it.
205 * Called on each CPU.
206 */
207 void
208 init_fpu(void)
209 {
210 #if DEBUG
211 unsigned short status;
212 unsigned short control;
213 #endif
214 /*
215 * Check for FPU by initializing it,
216 * then trying to read the correct bit patterns from
217 * the control and status registers.
218 */
219 set_cr0((get_cr0() & ~(CR0_EM|CR0_TS)) | CR0_NE); /* allow use of FPU */
220 fninit();
221 #if DEBUG
222 status = fnstsw();
223 fnstcw(&control);
224
225 assert(((status & 0xff) == 0) && ((control & 0x103f) == 0x3f));
226 #endif
227 /* Advertise SSE support */
228 if (cpuid_features() & CPUID_FEATURE_FXSR) {
229 fp_kind = FP_FXSR;
230 set_cr4(get_cr4() | CR4_OSFXS);
231 /* And allow SIMD exceptions if present */
232 if (cpuid_features() & CPUID_FEATURE_SSE) {
233 set_cr4(get_cr4() | CR4_OSXMM);
234 }
235 fp_register_state_size = sizeof(struct x86_fx_thread_state);
236
237 } else
238 panic("fpu is not FP_FXSR");
239
240 /* Configure the XSAVE context mechanism if the processor supports
241 * AVX/YMM registers
242 */
243 if (cpuid_features() & CPUID_FEATURE_XSAVE) {
244 cpuid_xsave_leaf_t *xsp = &cpuid_info()->cpuid_xsave_leaf;
245 if (xsp->extended_state[0] & (uint32_t)XFEM_YMM) {
246 assert(xsp->extended_state[0] & (uint32_t) XFEM_SSE);
247 /* XSAVE container size for all features */
248 if (xsp->extended_state[2] != sizeof(struct x86_avx_thread_state))
249 kprintf("sizeof(struct x86_avx_thread_state)=%lu != xsp->extended_state[2]=%u\n",
250 sizeof(struct x86_avx_thread_state), xsp->extended_state[2]);
251 fp_register_state_size = sizeof(struct x86_avx_thread_state);
252 fpu_YMM_present = TRUE;
253 set_cr4(get_cr4() | CR4_OSXSAVE);
254 xsetbv(0, XMASK);
255 /* Re-evaluate CPUID, once, to reflect OSXSAVE */
256 if (OSCompareAndSwap(0, 1, &cpuid_reevaluated))
257 cpuid_set_info();
258 /* DRK: consider verifying AVX offset with cpuid(d, ECX:2) */
259 }
260 }
261 else
262 fpu_YMM_present = FALSE;
263
264 fpinit();
265
266 /*
267 * Trap wait instructions. Turn off FPU for now.
268 */
269 set_cr0(get_cr0() | CR0_TS | CR0_MP);
270 }
271
272 /*
273 * Allocate and initialize FP state for current thread.
274 * Don't load state.
275 */
276 static void *
277 fp_state_alloc(void)
278 {
279 struct x86_fx_thread_state *ifps = zalloc(ifps_zone);
280
281 #if DEBUG
282 if (!(ALIGNED(ifps,64))) {
283 panic("fp_state_alloc: %p, %u, %p, %u", ifps, (unsigned) ifps_zone->elem_size, (void *) ifps_zone->free_elements, (unsigned) ifps_zone->alloc_size);
284 }
285 #endif
286 bzero(ifps, sizeof(*ifps));
287 return ifps;
288 }
289
290 static inline void
291 fp_state_free(void *ifps)
292 {
293 zfree(ifps_zone, ifps);
294 }
295
296 void clear_fpu(void)
297 {
298 set_ts();
299 }
300
301
302 static void fpu_load_registers(void *fstate) {
303 struct x86_fx_thread_state *ifps = fstate;
304 fp_save_layout_t layout = ifps->fp_save_layout;
305
306 assert(layout == FXSAVE32 || layout == FXSAVE64 || layout == XSAVE32 || layout == XSAVE64);
307 assert(ALIGNED(ifps, 64));
308 assert(ml_get_interrupts_enabled() == FALSE);
309
310 #if DEBUG
311 if (layout == XSAVE32 || layout == XSAVE64) {
312 struct x86_avx_thread_state *iavx = fstate;
313 unsigned i;
314 /* Verify reserved bits in the XSAVE header*/
315 if (iavx->_xh.xsbv & ~7)
316 panic("iavx->_xh.xsbv: 0x%llx", iavx->_xh.xsbv);
317 for (i = 0; i < sizeof(iavx->_xh.xhrsvd); i++)
318 if (iavx->_xh.xhrsvd[i])
319 panic("Reserved bit set");
320 }
321 if (fpu_YMM_present) {
322 if (layout != XSAVE32 && layout != XSAVE64)
323 panic("Inappropriate layout: %u\n", layout);
324 }
325 #endif /* DEBUG */
326
327 if ((layout == XSAVE64) || (layout == XSAVE32))
328 xrstor(ifps);
329 else
330 fxrstor(ifps);
331 }
332
333 static void fpu_store_registers(void *fstate, boolean_t is64) {
334 struct x86_fx_thread_state *ifps = fstate;
335 assert(ALIGNED(ifps, 64));
336 if (fpu_YMM_present) {
337 xsave(ifps);
338 ifps->fp_save_layout = is64 ? XSAVE64 : XSAVE32;
339 }
340 else {
341 fxsave(ifps);
342 ifps->fp_save_layout = is64 ? FXSAVE64 : FXSAVE32;
343 }
344 }
345
346 /*
347 * Initialize FP handling.
348 */
349
350 void
351 fpu_module_init(void)
352 {
353 if ((fp_register_state_size != sizeof(struct x86_fx_thread_state)) &&
354 (fp_register_state_size != sizeof(struct x86_avx_thread_state)))
355 panic("fpu_module_init: incorrect savearea size %u\n", fp_register_state_size);
356
357 assert(fpu_YMM_present != 0xFFFFFFFF);
358
359 /* We explicitly choose an allocation size of 64
360 * to eliminate waste for the 832 byte sized
361 * AVX XSAVE register save area.
362 */
363 ifps_zone = zinit(fp_register_state_size,
364 thread_max * fp_register_state_size,
365 64 * fp_register_state_size,
366 "x86 fpsave state");
367
368 /* To maintain the required alignment, disable
369 * zone debugging for this zone as that appends
370 * 16 bytes to each element.
371 */
372 zone_change(ifps_zone, Z_ALIGNMENT_REQUIRED, TRUE);
373 /* Determine MXCSR reserved bits and configure initial FPU state*/
374 configure_mxcsr_capability_mask(&initial_fp_state);
375 }
376
377 /*
378 * Save thread`s FPU context.
379 */
380 void
381 fpu_save_context(thread_t thread)
382 {
383 struct x86_fx_thread_state *ifps;
384
385 assert(ml_get_interrupts_enabled() == FALSE);
386 ifps = (thread)->machine.ifps;
387 #if DEBUG
388 if (ifps && ((ifps->fp_valid != FALSE) && (ifps->fp_valid != TRUE))) {
389 panic("ifps->fp_valid: %u\n", ifps->fp_valid);
390 }
391 #endif
392 if (ifps != 0 && (ifps->fp_valid == FALSE)) {
393 /* Clear CR0.TS in preparation for the FP context save. In
394 * theory, this shouldn't be necessary since a live FPU should
395 * indicate that TS is clear. However, various routines
396 * (such as sendsig & sigreturn) manipulate TS directly.
397 */
398 clear_ts();
399 /* registers are in FPU - save to memory */
400 fpu_store_registers(ifps, (thread_is_64bit(thread) && is_saved_state64(thread->machine.iss)));
401 ifps->fp_valid = TRUE;
402 }
403 set_ts();
404 }
405
406
407 /*
408 * Free a FPU save area.
409 * Called only when thread terminating - no locking necessary.
410 */
411 void
412 fpu_free(void *fps)
413 {
414 fp_state_free(fps);
415 }
416
417 /*
418 * Set the floating-point state for a thread based
419 * on the FXSave formatted data. This is basically
420 * the same as fpu_set_state except it uses the
421 * expanded data structure.
422 * If the thread is not the current thread, it is
423 * not running (held). Locking needed against
424 * concurrent fpu_set_state or fpu_get_state.
425 */
426 kern_return_t
427 fpu_set_fxstate(
428 thread_t thr_act,
429 thread_state_t tstate,
430 thread_flavor_t f)
431 {
432 struct x86_fx_thread_state *ifps;
433 struct x86_fx_thread_state *new_ifps;
434 x86_float_state64_t *state;
435 pcb_t pcb;
436 size_t state_size = sizeof(struct x86_fx_thread_state);
437 boolean_t old_valid, fresh_state = FALSE;
438
439 if (fp_kind == FP_NO)
440 return KERN_FAILURE;
441
442 if ((f == x86_AVX_STATE32 || f == x86_AVX_STATE64) &&
443 !ml_fpu_avx_enabled())
444 return KERN_FAILURE;
445
446 state = (x86_float_state64_t *)tstate;
447
448 assert(thr_act != THREAD_NULL);
449 pcb = THREAD_TO_PCB(thr_act);
450
451 if (state == NULL) {
452 /*
453 * new FPU state is 'invalid'.
454 * Deallocate the fp state if it exists.
455 */
456 simple_lock(&pcb->lock);
457
458 ifps = pcb->ifps;
459 pcb->ifps = 0;
460
461 simple_unlock(&pcb->lock);
462
463 if (ifps != 0) {
464 fp_state_free(ifps);
465 }
466 } else {
467 /*
468 * Valid incoming state. Allocate the fp state if there is none.
469 */
470 new_ifps = 0;
471 Retry:
472 simple_lock(&pcb->lock);
473
474 ifps = pcb->ifps;
475 if (ifps == 0) {
476 if (new_ifps == 0) {
477 simple_unlock(&pcb->lock);
478 new_ifps = fp_state_alloc();
479 goto Retry;
480 }
481 ifps = new_ifps;
482 new_ifps = 0;
483 pcb->ifps = ifps;
484 fresh_state = TRUE;
485 }
486
487 /*
488 * now copy over the new data.
489 */
490
491 old_valid = ifps->fp_valid;
492
493 #if DEBUG || DEVELOPMENT
494 if ((fresh_state == FALSE) && (old_valid == FALSE) && (thr_act != current_thread())) {
495 panic("fpu_set_fxstate inconsistency, thread: %p not stopped", thr_act);
496 }
497 #endif
498 /*
499 * Clear any reserved bits in the MXCSR to prevent a GPF
500 * when issuing an FXRSTOR.
501 */
502
503 state->fpu_mxcsr &= mxcsr_capability_mask;
504
505 bcopy((char *)&state->fpu_fcw, (char *)ifps, state_size);
506
507 if (fpu_YMM_present) {
508 struct x86_avx_thread_state *iavx = (void *) ifps;
509 uint32_t fpu_nyreg = 0;
510
511 if (f == x86_AVX_STATE32)
512 fpu_nyreg = 8;
513 else if (f == x86_AVX_STATE64)
514 fpu_nyreg = 16;
515
516 if (fpu_nyreg) {
517 x86_avx_state64_t *ystate = (x86_avx_state64_t *) state;
518 bcopy(&ystate->__fpu_ymmh0, &iavx->x_YMMH_reg[0][0], fpu_nyreg * sizeof(_STRUCT_XMM_REG));
519 }
520
521 iavx->fp_save_layout = thread_is_64bit(thr_act) ? XSAVE64 : XSAVE32;
522 /* Sanitize XSAVE header */
523 bzero(&iavx->_xh.xhrsvd[0], sizeof(iavx->_xh.xhrsvd));
524 if (fpu_nyreg)
525 iavx->_xh.xsbv = (XFEM_YMM | XFEM_SSE | XFEM_X87);
526 else
527 iavx->_xh.xsbv = (XFEM_SSE | XFEM_X87);
528 } else {
529 ifps->fp_save_layout = thread_is_64bit(thr_act) ? FXSAVE64 : FXSAVE32;
530 }
531 ifps->fp_valid = old_valid;
532
533 if (old_valid == FALSE) {
534 boolean_t istate = ml_set_interrupts_enabled(FALSE);
535 ifps->fp_valid = TRUE;
536 /* If altering the current thread's state, disable FPU */
537 if (thr_act == current_thread())
538 set_ts();
539
540 ml_set_interrupts_enabled(istate);
541 }
542
543 simple_unlock(&pcb->lock);
544
545 if (new_ifps != 0)
546 fp_state_free(new_ifps);
547 }
548 return KERN_SUCCESS;
549 }
550
551 /*
552 * Get the floating-point state for a thread.
553 * If the thread is not the current thread, it is
554 * not running (held). Locking needed against
555 * concurrent fpu_set_state or fpu_get_state.
556 */
557 kern_return_t
558 fpu_get_fxstate(
559 thread_t thr_act,
560 thread_state_t tstate,
561 thread_flavor_t f)
562 {
563 struct x86_fx_thread_state *ifps;
564 x86_float_state64_t *state;
565 kern_return_t ret = KERN_FAILURE;
566 pcb_t pcb;
567 size_t state_size = sizeof(struct x86_fx_thread_state);
568
569 if (fp_kind == FP_NO)
570 return KERN_FAILURE;
571
572 if ((f == x86_AVX_STATE32 || f == x86_AVX_STATE64) &&
573 !ml_fpu_avx_enabled())
574 return KERN_FAILURE;
575
576 state = (x86_float_state64_t *)tstate;
577
578 assert(thr_act != THREAD_NULL);
579 pcb = THREAD_TO_PCB(thr_act);
580
581 simple_lock(&pcb->lock);
582
583 ifps = pcb->ifps;
584 if (ifps == 0) {
585 /*
586 * No valid floating-point state.
587 */
588
589 bcopy((char *)&initial_fp_state, (char *)&state->fpu_fcw,
590 state_size);
591
592 simple_unlock(&pcb->lock);
593
594 return KERN_SUCCESS;
595 }
596 /*
597 * Make sure we`ve got the latest fp state info
598 * If the live fpu state belongs to our target
599 */
600 if (thr_act == current_thread()) {
601 boolean_t intr;
602
603 intr = ml_set_interrupts_enabled(FALSE);
604
605 clear_ts();
606 fp_save(thr_act);
607 clear_fpu();
608
609 (void)ml_set_interrupts_enabled(intr);
610 }
611 if (ifps->fp_valid) {
612 bcopy((char *)ifps, (char *)&state->fpu_fcw, state_size);
613 if (fpu_YMM_present) {
614 struct x86_avx_thread_state *iavx = (void *) ifps;
615 uint32_t fpu_nyreg = 0;
616
617 if (f == x86_AVX_STATE32)
618 fpu_nyreg = 8;
619 else if (f == x86_AVX_STATE64)
620 fpu_nyreg = 16;
621
622 if (fpu_nyreg) {
623 x86_avx_state64_t *ystate = (x86_avx_state64_t *) state;
624 bcopy(&iavx->x_YMMH_reg[0][0], &ystate->__fpu_ymmh0, fpu_nyreg * sizeof(_STRUCT_XMM_REG));
625 }
626 }
627
628 ret = KERN_SUCCESS;
629 }
630 simple_unlock(&pcb->lock);
631
632 return ret;
633 }
634
635
636
637 /*
638 * the child thread is 'stopped' with the thread
639 * mutex held and is currently not known by anyone
640 * so no way for fpu state to get manipulated by an
641 * outside agency -> no need for pcb lock
642 */
643
644 void
645 fpu_dup_fxstate(
646 thread_t parent,
647 thread_t child)
648 {
649 struct x86_fx_thread_state *new_ifps = NULL;
650 boolean_t intr;
651 pcb_t ppcb;
652
653 ppcb = THREAD_TO_PCB(parent);
654
655 if (ppcb->ifps == NULL)
656 return;
657
658 if (child->machine.ifps)
659 panic("fpu_dup_fxstate: child's ifps non-null");
660
661 new_ifps = fp_state_alloc();
662
663 simple_lock(&ppcb->lock);
664
665 if (ppcb->ifps != NULL) {
666 struct x86_fx_thread_state *ifps = ppcb->ifps;
667 /*
668 * Make sure we`ve got the latest fp state info
669 */
670 intr = ml_set_interrupts_enabled(FALSE);
671 assert(current_thread() == parent);
672 clear_ts();
673 fp_save(parent);
674 clear_fpu();
675
676 (void)ml_set_interrupts_enabled(intr);
677
678 if (ifps->fp_valid) {
679 child->machine.ifps = new_ifps;
680 assert((fp_register_state_size == sizeof(struct x86_fx_thread_state)) ||
681 (fp_register_state_size == sizeof(struct x86_avx_thread_state)));
682 bcopy((char *)(ppcb->ifps),
683 (char *)(child->machine.ifps), fp_register_state_size);
684
685 /* Mark the new fp saved state as non-live. */
686 /* Temporarily disabled: radar 4647827
687 * new_ifps->fp_valid = TRUE;
688 */
689
690 /*
691 * Clear any reserved bits in the MXCSR to prevent a GPF
692 * when issuing an FXRSTOR.
693 */
694 new_ifps->fx_MXCSR &= mxcsr_capability_mask;
695 new_ifps = NULL;
696 }
697 }
698 simple_unlock(&ppcb->lock);
699
700 if (new_ifps != NULL)
701 fp_state_free(new_ifps);
702 }
703
704
705 /*
706 * Initialize FPU.
707 *
708 */
709
710 void
711 fpinit(void)
712 {
713 unsigned short control;
714
715 clear_ts();
716 fninit();
717 fnstcw(&control);
718 control &= ~(FPC_PC|FPC_RC); /* Clear precision & rounding control */
719 control |= (FPC_PC_64 | /* Set precision */
720 FPC_RC_RN | /* round-to-nearest */
721 FPC_ZE | /* Suppress zero-divide */
722 FPC_OE | /* and overflow */
723 FPC_UE | /* underflow */
724 FPC_IE | /* Allow NaNQs and +-INF */
725 FPC_DE | /* Allow denorms as operands */
726 FPC_PE); /* No trap for precision loss */
727 fldcw(control);
728
729 /* Initialize SSE/SSE2 */
730 __builtin_ia32_ldmxcsr(0x1f80);
731 }
732
733 /*
734 * Coprocessor not present.
735 */
736
737 void
738 fpnoextflt(void)
739 {
740 boolean_t intr;
741 thread_t thr_act;
742 pcb_t pcb;
743 struct x86_fx_thread_state *ifps = 0;
744
745 thr_act = current_thread();
746 pcb = THREAD_TO_PCB(thr_act);
747
748 assert(fp_register_state_size != 0);
749
750 if (pcb->ifps == 0 && !get_interrupt_level()) {
751 ifps = fp_state_alloc();
752 bcopy((char *)&initial_fp_state, (char *)ifps,
753 fp_register_state_size);
754 if (!thread_is_64bit(thr_act)) {
755 ifps->fp_save_layout = fpu_YMM_present ? XSAVE32 : FXSAVE32;
756 }
757 else
758 ifps->fp_save_layout = fpu_YMM_present ? XSAVE64 : FXSAVE64;
759 ifps->fp_valid = TRUE;
760 }
761 intr = ml_set_interrupts_enabled(FALSE);
762
763 clear_ts(); /* Enable FPU use */
764
765 if (__improbable(get_interrupt_level())) {
766 /*
767 * Save current coprocessor context if valid
768 * Initialize coprocessor live context
769 */
770 fp_save(thr_act);
771 fpinit();
772 } else {
773 if (pcb->ifps == 0) {
774 pcb->ifps = ifps;
775 ifps = 0;
776 }
777 /*
778 * Load this thread`s state into coprocessor live context.
779 */
780 fp_load(thr_act);
781 }
782 (void)ml_set_interrupts_enabled(intr);
783
784 if (ifps)
785 fp_state_free(ifps);
786 }
787
788 /*
789 * FPU overran end of segment.
790 * Re-initialize FPU. Floating point state is not valid.
791 */
792
793 void
794 fpextovrflt(void)
795 {
796 thread_t thr_act = current_thread();
797 pcb_t pcb;
798 struct x86_fx_thread_state *ifps;
799 boolean_t intr;
800
801 intr = ml_set_interrupts_enabled(FALSE);
802
803 if (get_interrupt_level())
804 panic("FPU segment overrun exception at interrupt context\n");
805 if (current_task() == kernel_task)
806 panic("FPU segment overrun exception in kernel thread context\n");
807
808 /*
809 * This is a non-recoverable error.
810 * Invalidate the thread`s FPU state.
811 */
812 pcb = THREAD_TO_PCB(thr_act);
813 simple_lock(&pcb->lock);
814 ifps = pcb->ifps;
815 pcb->ifps = 0;
816 simple_unlock(&pcb->lock);
817
818 /*
819 * Re-initialize the FPU.
820 */
821 clear_ts();
822 fninit();
823
824 /*
825 * And disable access.
826 */
827 clear_fpu();
828
829 (void)ml_set_interrupts_enabled(intr);
830
831 if (ifps)
832 zfree(ifps_zone, ifps);
833
834 /*
835 * Raise exception.
836 */
837 i386_exception(EXC_BAD_ACCESS, VM_PROT_READ|VM_PROT_EXECUTE, 0);
838 /*NOTREACHED*/
839 }
840
841 /*
842 * FPU error. Called by AST.
843 */
844
845 void
846 fpexterrflt(void)
847 {
848 thread_t thr_act = current_thread();
849 struct x86_fx_thread_state *ifps = thr_act->machine.ifps;
850 boolean_t intr;
851
852 intr = ml_set_interrupts_enabled(FALSE);
853
854 if (get_interrupt_level())
855 panic("FPU error exception at interrupt context\n");
856 if (current_task() == kernel_task)
857 panic("FPU error exception in kernel thread context\n");
858
859 /*
860 * Save the FPU state and turn off the FPU.
861 */
862 fp_save(thr_act);
863
864 (void)ml_set_interrupts_enabled(intr);
865
866 /*
867 * Raise FPU exception.
868 * Locking not needed on pcb->ifps,
869 * since thread is running.
870 */
871 i386_exception(EXC_ARITHMETIC,
872 EXC_I386_EXTERR,
873 ifps->fx_status);
874
875 /*NOTREACHED*/
876 }
877
878 /*
879 * Save FPU state.
880 *
881 * Locking not needed:
882 * . if called from fpu_get_state, pcb already locked.
883 * . if called from fpnoextflt or fp_intr, we are single-cpu
884 * . otherwise, thread is running.
885 * N.B.: Must be called with interrupts disabled
886 */
887
888 void
889 fp_save(
890 thread_t thr_act)
891 {
892 pcb_t pcb = THREAD_TO_PCB(thr_act);
893 struct x86_fx_thread_state *ifps = pcb->ifps;
894
895 assert(ifps != 0);
896 if (ifps != 0 && !ifps->fp_valid) {
897 assert((get_cr0() & CR0_TS) == 0);
898 /* registers are in FPU */
899 ifps->fp_valid = TRUE;
900 fpu_store_registers(ifps, thread_is_64bit(thr_act));
901 }
902 }
903
904 /*
905 * Restore FPU state from PCB.
906 *
907 * Locking not needed; always called on the current thread.
908 */
909
910 void
911 fp_load(
912 thread_t thr_act)
913 {
914 pcb_t pcb = THREAD_TO_PCB(thr_act);
915 struct x86_fx_thread_state *ifps = pcb->ifps;
916
917 assert(ifps);
918 #if DEBUG
919 if (ifps->fp_valid != FALSE && ifps->fp_valid != TRUE) {
920 panic("fp_load() invalid fp_valid: %u, fp_save_layout: %u\n",
921 ifps->fp_valid, ifps->fp_save_layout);
922 }
923 #endif
924
925 if (ifps->fp_valid == FALSE) {
926 fpinit();
927 } else {
928 fpu_load_registers(ifps);
929 }
930 ifps->fp_valid = FALSE; /* in FPU */
931 }
932
933 /*
934 * SSE arithmetic exception handling code.
935 * Basically the same as the x87 exception handler with a different subtype
936 */
937
938 void
939 fpSSEexterrflt(void)
940 {
941 thread_t thr_act = current_thread();
942 struct x86_fx_thread_state *ifps = thr_act->machine.ifps;
943 boolean_t intr;
944
945 intr = ml_set_interrupts_enabled(FALSE);
946
947 if (get_interrupt_level())
948 panic("SSE exception at interrupt context\n");
949 if (current_task() == kernel_task)
950 panic("SSE exception in kernel thread context\n");
951
952 /*
953 * Save the FPU state and turn off the FPU.
954 */
955 fp_save(thr_act);
956
957 (void)ml_set_interrupts_enabled(intr);
958 /*
959 * Raise FPU exception.
960 * Locking not needed on pcb->ifps,
961 * since thread is running.
962 */
963
964 i386_exception(EXC_ARITHMETIC,
965 EXC_I386_SSEEXTERR,
966 ifps->fx_MXCSR);
967 /*NOTREACHED*/
968 }
969
970 void
971 fp_setvalid(boolean_t value) {
972 thread_t thr_act = current_thread();
973 struct x86_fx_thread_state *ifps = thr_act->machine.ifps;
974
975 if (ifps) {
976 ifps->fp_valid = value;
977
978 if (value == TRUE) {
979 boolean_t istate = ml_set_interrupts_enabled(FALSE);
980 clear_fpu();
981 ml_set_interrupts_enabled(istate);
982 }
983 }
984 }
985
986 boolean_t
987 ml_fpu_avx_enabled(void) {
988 return (fpu_YMM_present == TRUE);
989 }