]> git.saurik.com Git - apple/xnu.git/blame - osfmk/i386/fpu.c
xnu-2782.10.72.tar.gz
[apple/xnu.git] / osfmk / i386 / fpu.c
CommitLineData
1c79356b 1/*
39236c6e 2 * Copyright (c) 2000-2012 Apple Inc. All rights reserved.
1c79356b 3 *
2d21ac55 4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
1c79356b 5 *
2d21ac55
A
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
8f6c56a5 14 *
2d21ac55
A
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
8f6c56a5
A
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
2d21ac55
A
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
8f6c56a5 25 *
2d21ac55 26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
1c79356b
A
27 */
28/*
29 * @OSF_COPYRIGHT@
30 */
31/*
32 * Mach Operating System
33 * Copyright (c) 1992-1990 Carnegie Mellon University
34 * All Rights Reserved.
35 *
36 * Permission to use, copy, modify and distribute this software and its
37 * documentation is hereby granted, provided that both the copyright
38 * notice and this permission notice appear in all copies of the
39 * software, derivative works or modified versions, and any portions
40 * thereof, and that both notices appear in supporting documentation.
41 *
42 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
43 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
44 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
45 *
46 * Carnegie Mellon requests users of this software to return to
47 *
48 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
49 * School of Computer Science
50 * Carnegie Mellon University
51 * Pittsburgh PA 15213-3890
52 *
53 * any improvements or extensions that they make and grant Carnegie Mellon
54 * the rights to redistribute these changes.
55 */
56/*
57 */
58
1c79356b
A
59
60#include <mach/exception_types.h>
61#include <mach/i386/thread_status.h>
62#include <mach/i386/fp_reg.h>
6d2010ae 63#include <mach/branch_predicates.h>
1c79356b
A
64
65#include <kern/mach_param.h>
91447636 66#include <kern/processor.h>
1c79356b
A
67#include <kern/thread.h>
68#include <kern/zalloc.h>
69#include <kern/misc_protos.h>
70#include <kern/spl.h>
71#include <kern/assert.h>
72
060df5ea
A
73#include <libkern/OSAtomic.h>
74
0c530ab8 75#include <architecture/i386/pio.h>
55e303ae 76#include <i386/cpuid.h>
b0d623f7 77#include <i386/fpu.h>
0c530ab8 78#include <i386/proc_reg.h>
b0d623f7
A
79#include <i386/misc_protos.h>
80#include <i386/thread.h>
81#include <i386/trap.h>
1c79356b 82
0c530ab8 83int fp_kind = FP_NO; /* not inited */
1c79356b
A
84zone_t ifps_zone; /* zone for FPU save area */
85
b0d623f7 86#define ALIGNED(addr,size) (((uintptr_t)(addr)&((size)-1))==0)
1c79356b
A
87
88/* Forward */
89
90extern void fpinit(void);
91extern void fp_save(
91447636 92 thread_t thr_act);
1c79356b 93extern void fp_load(
91447636 94 thread_t thr_act);
1c79356b 95
060df5ea 96static void configure_mxcsr_capability_mask(struct x86_avx_thread_state *fps);
0c530ab8 97
060df5ea 98struct x86_avx_thread_state initial_fp_state __attribute((aligned(64)));
0c530ab8
A
99
100
101/* Global MXCSR capability bitmask */
102static unsigned int mxcsr_capability_mask;
103
060df5ea
A
104#define fninit() \
105 __asm__ volatile("fninit")
106
107#define fnstcw(control) \
108 __asm__("fnstcw %0" : "=m" (*(unsigned short *)(control)))
109
110#define fldcw(control) \
111 __asm__ volatile("fldcw %0" : : "m" (*(unsigned short *) &(control)) )
112
113#define fnclex() \
114 __asm__ volatile("fnclex")
115
116#define fnsave(state) \
117 __asm__ volatile("fnsave %0" : "=m" (*state))
118
119#define frstor(state) \
120 __asm__ volatile("frstor %0" : : "m" (state))
121
122#define fwait() \
123 __asm__("fwait");
124
125#define fxrstor(addr) __asm__ __volatile__("fxrstor %0" : : "m" (*(addr)))
126#define fxsave(addr) __asm__ __volatile__("fxsave %0" : "=m" (*(addr)))
127
128static uint32_t fp_register_state_size = 0;
129static uint32_t fpu_YMM_present = FALSE;
130static uint32_t cpuid_reevaluated = 0;
131
132static void fpu_store_registers(void *, boolean_t);
133static void fpu_load_registers(void *);
134
135extern void xsave64o(void);
136extern void xrstor64o(void);
137
138#define XMASK ((uint32_t) (XFEM_X87 | XFEM_SSE | XFEM_YMM))
139
060df5ea 140static inline void xsetbv(uint32_t mask_hi, uint32_t mask_lo) {
fe8ab488 141 __asm__ __volatile__("xsetbv" :: "a"(mask_lo), "d"(mask_hi), "c" (XCR0));
060df5ea
A
142}
143
fe8ab488
A
144static inline void xsave(struct x86_fx_thread_state *a) {
145 __asm__ __volatile__("xsave %0" :"=m" (*a) : "a"(XMASK), "d"(0));
060df5ea
A
146}
147
fe8ab488
A
148static inline void xrstor(struct x86_fx_thread_state *a) {
149 __asm__ __volatile__("xrstor %0" :: "m" (*a), "a"(XMASK), "d"(0));
060df5ea
A
150}
151
fe8ab488 152#if DEBUG
060df5ea
A
153static inline unsigned short
154fnstsw(void)
155{
156 unsigned short status;
157 __asm__ volatile("fnstsw %0" : "=ma" (status));
158 return(status);
159}
fe8ab488 160#endif
060df5ea 161
0c530ab8 162/*
060df5ea 163 * Configure the initial FPU state presented to new threads.
0c530ab8
A
164 * Determine the MXCSR capability mask, which allows us to mask off any
165 * potentially unsafe "reserved" bits before restoring the FPU context.
166 * *Not* per-cpu, assumes symmetry.
167 */
060df5ea 168
0c530ab8 169static void
060df5ea 170configure_mxcsr_capability_mask(struct x86_avx_thread_state *fps)
0c530ab8 171{
060df5ea
A
172 /* XSAVE requires a 64 byte aligned store */
173 assert(ALIGNED(fps, 64));
0c530ab8 174 /* Clear, to prepare for the diagnostic FXSAVE */
060df5ea
A
175 bzero(fps, sizeof(*fps));
176
177 fpinit();
178 fpu_store_registers(fps, FALSE);
179
180 mxcsr_capability_mask = fps->fx_MXCSR_MASK;
0c530ab8
A
181
182 /* Set default mask value if necessary */
183 if (mxcsr_capability_mask == 0)
184 mxcsr_capability_mask = 0xffbf;
185
060df5ea
A
186 /* Clear vector register store */
187 bzero(&fps->fx_XMM_reg[0][0], sizeof(fps->fx_XMM_reg));
188 bzero(&fps->x_YMMH_reg[0][0], sizeof(fps->x_YMMH_reg));
0c530ab8 189
060df5ea
A
190 fps->fp_valid = TRUE;
191 fps->fp_save_layout = fpu_YMM_present ? XSAVE32: FXSAVE32;
192 fpu_load_registers(fps);
0c530ab8 193
060df5ea
A
194 /* Poison values to trap unsafe usage */
195 fps->fp_valid = 0xFFFFFFFF;
196 fps->fp_save_layout = FP_UNUSED;
0c530ab8 197
060df5ea
A
198 /* Re-enable FPU/SSE DNA exceptions */
199 set_ts();
0c530ab8
A
200}
201
202
1c79356b
A
203/*
204 * Look for FPU and initialize it.
205 * Called on each CPU.
206 */
207void
208init_fpu(void)
209{
060df5ea
A
210#if DEBUG
211 unsigned short status;
212 unsigned short control;
213#endif
1c79356b
A
214 /*
215 * Check for FPU by initializing it,
216 * then trying to read the correct bit patterns from
217 * the control and status registers.
218 */
91447636 219 set_cr0((get_cr0() & ~(CR0_EM|CR0_TS)) | CR0_NE); /* allow use of FPU */
1c79356b 220 fninit();
060df5ea 221#if DEBUG
1c79356b
A
222 status = fnstsw();
223 fnstcw(&control);
060df5ea
A
224
225 assert(((status & 0xff) == 0) && ((control & 0x103f) == 0x3f));
226#endif
227 /* Advertise SSE support */
228 if (cpuid_features() & CPUID_FEATURE_FXSR) {
229 fp_kind = FP_FXSR;
230 set_cr4(get_cr4() | CR4_OSFXS);
231 /* And allow SIMD exceptions if present */
232 if (cpuid_features() & CPUID_FEATURE_SSE) {
233 set_cr4(get_cr4() | CR4_OSXMM);
234 }
235 fp_register_state_size = sizeof(struct x86_fx_thread_state);
1c79356b 236
060df5ea
A
237 } else
238 panic("fpu is not FP_FXSR");
55e303ae 239
060df5ea
A
240 /* Configure the XSAVE context mechanism if the processor supports
241 * AVX/YMM registers
242 */
243 if (cpuid_features() & CPUID_FEATURE_XSAVE) {
244 cpuid_xsave_leaf_t *xsp = &cpuid_info()->cpuid_xsave_leaf;
245 if (xsp->extended_state[0] & (uint32_t)XFEM_YMM) {
246 assert(xsp->extended_state[0] & (uint32_t) XFEM_SSE);
247 /* XSAVE container size for all features */
a1c7dba1
A
248 if (xsp->extended_state[2] != sizeof(struct x86_avx_thread_state))
249 kprintf("sizeof(struct x86_avx_thread_state)=%lu != xsp->extended_state[2]=%u\n",
250 sizeof(struct x86_avx_thread_state), xsp->extended_state[2]);
060df5ea
A
251 fp_register_state_size = sizeof(struct x86_avx_thread_state);
252 fpu_YMM_present = TRUE;
253 set_cr4(get_cr4() | CR4_OSXSAVE);
254 xsetbv(0, XMASK);
255 /* Re-evaluate CPUID, once, to reflect OSXSAVE */
256 if (OSCompareAndSwap(0, 1, &cpuid_reevaluated))
257 cpuid_set_info();
258 /* DRK: consider verifying AVX offset with cpuid(d, ECX:2) */
259 }
260 }
261 else
262 fpu_YMM_present = FALSE;
263
264 fpinit();
265
266 /*
267 * Trap wait instructions. Turn off FPU for now.
268 */
269 set_cr0(get_cr0() | CR0_TS | CR0_MP);
270}
271
272/*
273 * Allocate and initialize FP state for current thread.
274 * Don't load state.
275 */
276static void *
277fp_state_alloc(void)
278{
fe8ab488 279 struct x86_fx_thread_state *ifps = zalloc(ifps_zone);
0c530ab8 280
060df5ea
A
281#if DEBUG
282 if (!(ALIGNED(ifps,64))) {
283 panic("fp_state_alloc: %p, %u, %p, %u", ifps, (unsigned) ifps_zone->elem_size, (void *) ifps_zone->free_elements, (unsigned) ifps_zone->alloc_size);
1c79356b 284 }
060df5ea 285#endif
fe8ab488 286 bzero(ifps, sizeof(*ifps));
060df5ea
A
287 return ifps;
288}
289
290static inline void
291fp_state_free(void *ifps)
292{
293 zfree(ifps_zone, ifps);
294}
295
296void clear_fpu(void)
297{
298 set_ts();
299}
300
301
302static void fpu_load_registers(void *fstate) {
303 struct x86_fx_thread_state *ifps = fstate;
304 fp_save_layout_t layout = ifps->fp_save_layout;
305
306 assert(layout == FXSAVE32 || layout == FXSAVE64 || layout == XSAVE32 || layout == XSAVE64);
307 assert(ALIGNED(ifps, 64));
308 assert(ml_get_interrupts_enabled() == FALSE);
309
310#if DEBUG
311 if (layout == XSAVE32 || layout == XSAVE64) {
312 struct x86_avx_thread_state *iavx = fstate;
313 unsigned i;
314 /* Verify reserved bits in the XSAVE header*/
315 if (iavx->_xh.xsbv & ~7)
316 panic("iavx->_xh.xsbv: 0x%llx", iavx->_xh.xsbv);
317 for (i = 0; i < sizeof(iavx->_xh.xhrsvd); i++)
318 if (iavx->_xh.xhrsvd[i])
319 panic("Reserved bit set");
320 }
321 if (fpu_YMM_present) {
322 if (layout != XSAVE32 && layout != XSAVE64)
323 panic("Inappropriate layout: %u\n", layout);
324 }
325#endif /* DEBUG */
326
060df5ea
A
327 if ((layout == XSAVE64) || (layout == XSAVE32))
328 xrstor(ifps);
1c79356b 329 else
060df5ea 330 fxrstor(ifps);
060df5ea
A
331}
332
333static void fpu_store_registers(void *fstate, boolean_t is64) {
334 struct x86_fx_thread_state *ifps = fstate;
335 assert(ALIGNED(ifps, 64));
060df5ea
A
336 if (fpu_YMM_present) {
337 xsave(ifps);
338 ifps->fp_save_layout = is64 ? XSAVE64 : XSAVE32;
1c79356b 339 }
060df5ea
A
340 else {
341 fxsave(ifps);
342 ifps->fp_save_layout = is64 ? FXSAVE64 : FXSAVE32;
343 }
1c79356b
A
344}
345
346/*
347 * Initialize FP handling.
348 */
060df5ea 349
1c79356b
A
350void
351fpu_module_init(void)
352{
060df5ea
A
353 if ((fp_register_state_size != sizeof(struct x86_fx_thread_state)) &&
354 (fp_register_state_size != sizeof(struct x86_avx_thread_state)))
355 panic("fpu_module_init: incorrect savearea size %u\n", fp_register_state_size);
356
357 assert(fpu_YMM_present != 0xFFFFFFFF);
358
359 /* We explicitly choose an allocation size of 64
360 * to eliminate waste for the 832 byte sized
361 * AVX XSAVE register save area.
362 */
363 ifps_zone = zinit(fp_register_state_size,
364 thread_max * fp_register_state_size,
365 64 * fp_register_state_size,
0c530ab8 366 "x86 fpsave state");
060df5ea 367
060df5ea
A
368 /* To maintain the required alignment, disable
369 * zone debugging for this zone as that appends
370 * 16 bytes to each element.
371 */
316670eb 372 zone_change(ifps_zone, Z_ALIGNMENT_REQUIRED, TRUE);
060df5ea
A
373 /* Determine MXCSR reserved bits and configure initial FPU state*/
374 configure_mxcsr_capability_mask(&initial_fp_state);
375}
376
377/*
378 * Save thread`s FPU context.
379 */
380void
381fpu_save_context(thread_t thread)
382{
383 struct x86_fx_thread_state *ifps;
384
385 assert(ml_get_interrupts_enabled() == FALSE);
6d2010ae 386 ifps = (thread)->machine.ifps;
060df5ea
A
387#if DEBUG
388 if (ifps && ((ifps->fp_valid != FALSE) && (ifps->fp_valid != TRUE))) {
389 panic("ifps->fp_valid: %u\n", ifps->fp_valid);
390 }
391#endif
392 if (ifps != 0 && (ifps->fp_valid == FALSE)) {
393 /* Clear CR0.TS in preparation for the FP context save. In
394 * theory, this shouldn't be necessary since a live FPU should
395 * indicate that TS is clear. However, various routines
396 * (such as sendsig & sigreturn) manipulate TS directly.
397 */
398 clear_ts();
399 /* registers are in FPU - save to memory */
6d2010ae 400 fpu_store_registers(ifps, (thread_is_64bit(thread) && is_saved_state64(thread->machine.iss)));
060df5ea
A
401 ifps->fp_valid = TRUE;
402 }
403 set_ts();
1c79356b
A
404}
405
060df5ea 406
1c79356b
A
407/*
408 * Free a FPU save area.
409 * Called only when thread terminating - no locking necessary.
410 */
411void
060df5ea 412fpu_free(void *fps)
1c79356b 413{
0c530ab8 414 fp_state_free(fps);
1c79356b
A
415}
416
55e303ae
A
417/*
418 * Set the floating-point state for a thread based
419 * on the FXSave formatted data. This is basically
420 * the same as fpu_set_state except it uses the
421 * expanded data structure.
422 * If the thread is not the current thread, it is
423 * not running (held). Locking needed against
424 * concurrent fpu_set_state or fpu_get_state.
425 */
426kern_return_t
427fpu_set_fxstate(
060df5ea
A
428 thread_t thr_act,
429 thread_state_t tstate,
430 thread_flavor_t f)
55e303ae 431{
060df5ea
A
432 struct x86_fx_thread_state *ifps;
433 struct x86_fx_thread_state *new_ifps;
0c530ab8
A
434 x86_float_state64_t *state;
435 pcb_t pcb;
7ddcb079 436 size_t state_size = sizeof(struct x86_fx_thread_state);
fe8ab488
A
437 boolean_t old_valid, fresh_state = FALSE;
438
55e303ae 439 if (fp_kind == FP_NO)
fe8ab488 440 return KERN_FAILURE;
0c530ab8 441
bd504ef0
A
442 if ((f == x86_AVX_STATE32 || f == x86_AVX_STATE64) &&
443 !ml_fpu_avx_enabled())
fe8ab488 444 return KERN_FAILURE;
bd504ef0 445
0c530ab8 446 state = (x86_float_state64_t *)tstate;
55e303ae 447
91447636 448 assert(thr_act != THREAD_NULL);
6d2010ae 449 pcb = THREAD_TO_PCB(thr_act);
55e303ae 450
0c530ab8 451 if (state == NULL) {
fe8ab488
A
452 /*
453 * new FPU state is 'invalid'.
454 * Deallocate the fp state if it exists.
455 */
456 simple_lock(&pcb->lock);
0c530ab8
A
457
458 ifps = pcb->ifps;
459 pcb->ifps = 0;
4452a7af 460
fe8ab488 461 simple_unlock(&pcb->lock);
0c530ab8 462
fe8ab488
A
463 if (ifps != 0) {
464 fp_state_free(ifps);
465 }
0c530ab8 466 } else {
fe8ab488
A
467 /*
468 * Valid incoming state. Allocate the fp state if there is none.
469 */
470 new_ifps = 0;
471 Retry:
472 simple_lock(&pcb->lock);
0c530ab8
A
473
474 ifps = pcb->ifps;
fe8ab488
A
475 if (ifps == 0) {
476 if (new_ifps == 0) {
477 simple_unlock(&pcb->lock);
478 new_ifps = fp_state_alloc();
479 goto Retry;
480 }
481 ifps = new_ifps;
482 new_ifps = 0;
483 pcb->ifps = ifps;
484 fresh_state = TRUE;
485 }
486
487 /*
488 * now copy over the new data.
489 */
490
491 old_valid = ifps->fp_valid;
492
493#if DEBUG || DEVELOPMENT
494 if ((fresh_state == FALSE) && (old_valid == FALSE) && (thr_act != current_thread())) {
495 panic("fpu_set_fxstate inconsistency, thread: %p not stopped", thr_act);
55e303ae 496 }
060df5ea 497#endif
fe8ab488
A
498 /*
499 * Clear any reserved bits in the MXCSR to prevent a GPF
500 * when issuing an FXRSTOR.
501 */
7ddcb079 502
fe8ab488 503 state->fpu_mxcsr &= mxcsr_capability_mask;
060df5ea 504
fe8ab488 505 bcopy((char *)&state->fpu_fcw, (char *)ifps, state_size);
060df5ea 506
fe8ab488
A
507 if (fpu_YMM_present) {
508 struct x86_avx_thread_state *iavx = (void *) ifps;
509 uint32_t fpu_nyreg = 0;
7ddcb079 510
fe8ab488
A
511 if (f == x86_AVX_STATE32)
512 fpu_nyreg = 8;
513 else if (f == x86_AVX_STATE64)
514 fpu_nyreg = 16;
7ddcb079 515
fe8ab488
A
516 if (fpu_nyreg) {
517 x86_avx_state64_t *ystate = (x86_avx_state64_t *) state;
518 bcopy(&ystate->__fpu_ymmh0, &iavx->x_YMMH_reg[0][0], fpu_nyreg * sizeof(_STRUCT_XMM_REG));
519 }
520
521 iavx->fp_save_layout = thread_is_64bit(thr_act) ? XSAVE64 : XSAVE32;
522 /* Sanitize XSAVE header */
523 bzero(&iavx->_xh.xhrsvd[0], sizeof(iavx->_xh.xhrsvd));
524 if (fpu_nyreg)
525 iavx->_xh.xsbv = (XFEM_YMM | XFEM_SSE | XFEM_X87);
526 else
527 iavx->_xh.xsbv = (XFEM_SSE | XFEM_X87);
528 } else {
529 ifps->fp_save_layout = thread_is_64bit(thr_act) ? FXSAVE64 : FXSAVE32;
7ddcb079 530 }
fe8ab488 531 ifps->fp_valid = old_valid;
7ddcb079 532
fe8ab488
A
533 if (old_valid == FALSE) {
534 boolean_t istate = ml_set_interrupts_enabled(FALSE);
535 ifps->fp_valid = TRUE;
536 /* If altering the current thread's state, disable FPU */
537 if (thr_act == current_thread())
538 set_ts();
539
540 ml_set_interrupts_enabled(istate);
541 }
542
543 simple_unlock(&pcb->lock);
544
545 if (new_ifps != 0)
546 fp_state_free(new_ifps);
0c530ab8 547 }
55e303ae
A
548 return KERN_SUCCESS;
549}
550
551/*
552 * Get the floating-point state for a thread.
553 * If the thread is not the current thread, it is
554 * not running (held). Locking needed against
555 * concurrent fpu_set_state or fpu_get_state.
556 */
557kern_return_t
558fpu_get_fxstate(
060df5ea
A
559 thread_t thr_act,
560 thread_state_t tstate,
561 thread_flavor_t f)
55e303ae 562{
060df5ea 563 struct x86_fx_thread_state *ifps;
0c530ab8
A
564 x86_float_state64_t *state;
565 kern_return_t ret = KERN_FAILURE;
566 pcb_t pcb;
7ddcb079 567 size_t state_size = sizeof(struct x86_fx_thread_state);
55e303ae 568
0c530ab8 569 if (fp_kind == FP_NO)
2d21ac55 570 return KERN_FAILURE;
0c530ab8 571
bd504ef0
A
572 if ((f == x86_AVX_STATE32 || f == x86_AVX_STATE64) &&
573 !ml_fpu_avx_enabled())
574 return KERN_FAILURE;
575
0c530ab8 576 state = (x86_float_state64_t *)tstate;
55e303ae 577
91447636 578 assert(thr_act != THREAD_NULL);
6d2010ae 579 pcb = THREAD_TO_PCB(thr_act);
55e303ae
A
580
581 simple_lock(&pcb->lock);
0c530ab8
A
582
583 ifps = pcb->ifps;
55e303ae 584 if (ifps == 0) {
2d21ac55 585 /*
0c530ab8
A
586 * No valid floating-point state.
587 */
060df5ea
A
588
589 bcopy((char *)&initial_fp_state, (char *)&state->fpu_fcw,
590 state_size);
0c530ab8
A
591
592 simple_unlock(&pcb->lock);
6601e61a 593
0c530ab8
A
594 return KERN_SUCCESS;
595 }
596 /*
597 * Make sure we`ve got the latest fp state info
598 * If the live fpu state belongs to our target
599 */
2d21ac55
A
600 if (thr_act == current_thread()) {
601 boolean_t intr;
8f6c56a5 602
0c530ab8 603 intr = ml_set_interrupts_enabled(FALSE);
89b3af67 604
0c530ab8
A
605 clear_ts();
606 fp_save(thr_act);
607 clear_fpu();
6601e61a 608
0c530ab8 609 (void)ml_set_interrupts_enabled(intr);
6601e61a 610 }
0c530ab8 611 if (ifps->fp_valid) {
060df5ea 612 bcopy((char *)ifps, (char *)&state->fpu_fcw, state_size);
7ddcb079
A
613 if (fpu_YMM_present) {
614 struct x86_avx_thread_state *iavx = (void *) ifps;
615 uint32_t fpu_nyreg = 0;
616
617 if (f == x86_AVX_STATE32)
618 fpu_nyreg = 8;
619 else if (f == x86_AVX_STATE64)
620 fpu_nyreg = 16;
621
622 if (fpu_nyreg) {
623 x86_avx_state64_t *ystate = (x86_avx_state64_t *) state;
624 bcopy(&iavx->x_YMMH_reg[0][0], &ystate->__fpu_ymmh0, fpu_nyreg * sizeof(_STRUCT_XMM_REG));
625 }
626 }
627
0c530ab8 628 ret = KERN_SUCCESS;
6601e61a 629 }
0c530ab8 630 simple_unlock(&pcb->lock);
21362eb3 631
0c530ab8 632 return ret;
6601e61a 633}
21362eb3 634
0c530ab8 635
2d21ac55 636
6601e61a 637/*
0c530ab8
A
638 * the child thread is 'stopped' with the thread
639 * mutex held and is currently not known by anyone
640 * so no way for fpu state to get manipulated by an
641 * outside agency -> no need for pcb lock
6601e61a 642 */
0c530ab8
A
643
644void
645fpu_dup_fxstate(
646 thread_t parent,
647 thread_t child)
6601e61a 648{
060df5ea
A
649 struct x86_fx_thread_state *new_ifps = NULL;
650 boolean_t intr;
0c530ab8 651 pcb_t ppcb;
21362eb3 652
6d2010ae 653 ppcb = THREAD_TO_PCB(parent);
21362eb3 654
0c530ab8
A
655 if (ppcb->ifps == NULL)
656 return;
4452a7af 657
6d2010ae 658 if (child->machine.ifps)
0c530ab8 659 panic("fpu_dup_fxstate: child's ifps non-null");
4452a7af 660
0c530ab8 661 new_ifps = fp_state_alloc();
5d5c5d0d 662
0c530ab8 663 simple_lock(&ppcb->lock);
6601e61a 664
0c530ab8 665 if (ppcb->ifps != NULL) {
060df5ea 666 struct x86_fx_thread_state *ifps = ppcb->ifps;
0c530ab8
A
667 /*
668 * Make sure we`ve got the latest fp state info
669 */
670 intr = ml_set_interrupts_enabled(FALSE);
060df5ea 671 assert(current_thread() == parent);
0c530ab8
A
672 clear_ts();
673 fp_save(parent);
674 clear_fpu();
6601e61a 675
0c530ab8 676 (void)ml_set_interrupts_enabled(intr);
6601e61a 677
060df5ea 678 if (ifps->fp_valid) {
6d2010ae 679 child->machine.ifps = new_ifps;
060df5ea
A
680 assert((fp_register_state_size == sizeof(struct x86_fx_thread_state)) ||
681 (fp_register_state_size == sizeof(struct x86_avx_thread_state)));
682 bcopy((char *)(ppcb->ifps),
6d2010ae 683 (char *)(child->machine.ifps), fp_register_state_size);
0c530ab8 684
2d21ac55
A
685 /* Mark the new fp saved state as non-live. */
686 /* Temporarily disabled: radar 4647827
687 * new_ifps->fp_valid = TRUE;
688 */
060df5ea 689
0c530ab8
A
690 /*
691 * Clear any reserved bits in the MXCSR to prevent a GPF
692 * when issuing an FXRSTOR.
693 */
060df5ea 694 new_ifps->fx_MXCSR &= mxcsr_capability_mask;
0c530ab8
A
695 new_ifps = NULL;
696 }
6601e61a 697 }
0c530ab8 698 simple_unlock(&ppcb->lock);
89b3af67 699
0c530ab8
A
700 if (new_ifps != NULL)
701 fp_state_free(new_ifps);
6601e61a 702}
4452a7af 703
0c530ab8 704
1c79356b
A
705/*
706 * Initialize FPU.
707 *
1c79356b 708 */
060df5ea 709
1c79356b
A
710void
711fpinit(void)
712{
713 unsigned short control;
714
1c79356b
A
715 clear_ts();
716 fninit();
717 fnstcw(&control);
718 control &= ~(FPC_PC|FPC_RC); /* Clear precision & rounding control */
0c530ab8 719 control |= (FPC_PC_64 | /* Set precision */
1c79356b
A
720 FPC_RC_RN | /* round-to-nearest */
721 FPC_ZE | /* Suppress zero-divide */
722 FPC_OE | /* and overflow */
723 FPC_UE | /* underflow */
724 FPC_IE | /* Allow NaNQs and +-INF */
725 FPC_DE | /* Allow denorms as operands */
726 FPC_PE); /* No trap for precision loss */
727 fldcw(control);
0c530ab8
A
728
729 /* Initialize SSE/SSE2 */
060df5ea 730 __builtin_ia32_ldmxcsr(0x1f80);
b0d623f7 731}
1c79356b
A
732
733/*
734 * Coprocessor not present.
735 */
736
737void
738fpnoextflt(void)
739{
0c530ab8 740 boolean_t intr;
2d21ac55
A
741 thread_t thr_act;
742 pcb_t pcb;
060df5ea 743 struct x86_fx_thread_state *ifps = 0;
2d21ac55
A
744
745 thr_act = current_thread();
6d2010ae 746 pcb = THREAD_TO_PCB(thr_act);
2d21ac55 747
060df5ea 748 assert(fp_register_state_size != 0);
4452a7af 749
060df5ea
A
750 if (pcb->ifps == 0 && !get_interrupt_level()) {
751 ifps = fp_state_alloc();
752 bcopy((char *)&initial_fp_state, (char *)ifps,
753 fp_register_state_size);
754 if (!thread_is_64bit(thr_act)) {
755 ifps->fp_save_layout = fpu_YMM_present ? XSAVE32 : FXSAVE32;
756 }
757 else
758 ifps->fp_save_layout = fpu_YMM_present ? XSAVE64 : FXSAVE64;
759 ifps->fp_valid = TRUE;
760 }
0c530ab8
A
761 intr = ml_set_interrupts_enabled(FALSE);
762
763 clear_ts(); /* Enable FPU use */
764
6d2010ae 765 if (__improbable(get_interrupt_level())) {
0c530ab8
A
766 /*
767 * Save current coprocessor context if valid
768 * Initialize coprocessor live context
769 */
2d21ac55 770 fp_save(thr_act);
0c530ab8
A
771 fpinit();
772 } else {
2d21ac55
A
773 if (pcb->ifps == 0) {
774 pcb->ifps = ifps;
775 ifps = 0;
776 }
0c530ab8
A
777 /*
778 * Load this thread`s state into coprocessor live context.
779 */
2d21ac55 780 fp_load(thr_act);
0c530ab8 781 }
0c530ab8 782 (void)ml_set_interrupts_enabled(intr);
2d21ac55
A
783
784 if (ifps)
785 fp_state_free(ifps);
1c79356b
A
786}
787
788/*
789 * FPU overran end of segment.
790 * Re-initialize FPU. Floating point state is not valid.
791 */
792
793void
794fpextovrflt(void)
795{
0c530ab8
A
796 thread_t thr_act = current_thread();
797 pcb_t pcb;
060df5ea 798 struct x86_fx_thread_state *ifps;
0c530ab8
A
799 boolean_t intr;
800
801 intr = ml_set_interrupts_enabled(FALSE);
802
803 if (get_interrupt_level())
2d21ac55 804 panic("FPU segment overrun exception at interrupt context\n");
0c530ab8
A
805 if (current_task() == kernel_task)
806 panic("FPU segment overrun exception in kernel thread context\n");
1c79356b 807
1c79356b
A
808 /*
809 * This is a non-recoverable error.
810 * Invalidate the thread`s FPU state.
811 */
6d2010ae 812 pcb = THREAD_TO_PCB(thr_act);
1c79356b 813 simple_lock(&pcb->lock);
0c530ab8
A
814 ifps = pcb->ifps;
815 pcb->ifps = 0;
1c79356b
A
816 simple_unlock(&pcb->lock);
817
818 /*
819 * Re-initialize the FPU.
820 */
821 clear_ts();
822 fninit();
823
824 /*
825 * And disable access.
826 */
827 clear_fpu();
828
0c530ab8
A
829 (void)ml_set_interrupts_enabled(intr);
830
1c79356b 831 if (ifps)
91447636 832 zfree(ifps_zone, ifps);
1c79356b
A
833
834 /*
835 * Raise exception.
836 */
837 i386_exception(EXC_BAD_ACCESS, VM_PROT_READ|VM_PROT_EXECUTE, 0);
838 /*NOTREACHED*/
839}
840
841/*
842 * FPU error. Called by AST.
843 */
844
845void
846fpexterrflt(void)
847{
0c530ab8 848 thread_t thr_act = current_thread();
6d2010ae 849 struct x86_fx_thread_state *ifps = thr_act->machine.ifps;
0c530ab8
A
850 boolean_t intr;
851
852 intr = ml_set_interrupts_enabled(FALSE);
853
854 if (get_interrupt_level())
855 panic("FPU error exception at interrupt context\n");
856 if (current_task() == kernel_task)
857 panic("FPU error exception in kernel thread context\n");
1c79356b 858
1c79356b
A
859 /*
860 * Save the FPU state and turn off the FPU.
861 */
862 fp_save(thr_act);
1c79356b 863
0c530ab8
A
864 (void)ml_set_interrupts_enabled(intr);
865
1c79356b
A
866 /*
867 * Raise FPU exception.
0c530ab8 868 * Locking not needed on pcb->ifps,
1c79356b
A
869 * since thread is running.
870 */
871 i386_exception(EXC_ARITHMETIC,
872 EXC_I386_EXTERR,
060df5ea 873 ifps->fx_status);
0c530ab8 874
1c79356b
A
875 /*NOTREACHED*/
876}
877
878/*
879 * Save FPU state.
880 *
881 * Locking not needed:
882 * . if called from fpu_get_state, pcb already locked.
883 * . if called from fpnoextflt or fp_intr, we are single-cpu
884 * . otherwise, thread is running.
0c530ab8 885 * N.B.: Must be called with interrupts disabled
1c79356b 886 */
0c530ab8 887
1c79356b
A
888void
889fp_save(
91447636 890 thread_t thr_act)
1c79356b 891{
6d2010ae 892 pcb_t pcb = THREAD_TO_PCB(thr_act);
060df5ea 893 struct x86_fx_thread_state *ifps = pcb->ifps;
0c530ab8 894
060df5ea 895 assert(ifps != 0);
1c79356b 896 if (ifps != 0 && !ifps->fp_valid) {
0c530ab8
A
897 assert((get_cr0() & CR0_TS) == 0);
898 /* registers are in FPU */
899 ifps->fp_valid = TRUE;
060df5ea 900 fpu_store_registers(ifps, thread_is_64bit(thr_act));
1c79356b
A
901 }
902}
903
904/*
905 * Restore FPU state from PCB.
906 *
907 * Locking not needed; always called on the current thread.
908 */
909
910void
911fp_load(
91447636 912 thread_t thr_act)
1c79356b 913{
6d2010ae 914 pcb_t pcb = THREAD_TO_PCB(thr_act);
060df5ea 915 struct x86_fx_thread_state *ifps = pcb->ifps;
0c530ab8 916
060df5ea 917 assert(ifps);
39236c6e
A
918#if DEBUG
919 if (ifps->fp_valid != FALSE && ifps->fp_valid != TRUE) {
920 panic("fp_load() invalid fp_valid: %u, fp_save_layout: %u\n",
921 ifps->fp_valid, ifps->fp_save_layout);
922 }
923#endif
060df5ea
A
924
925 if (ifps->fp_valid == FALSE) {
0c530ab8 926 fpinit();
1c79356b 927 } else {
060df5ea 928 fpu_load_registers(ifps);
1c79356b
A
929 }
930 ifps->fp_valid = FALSE; /* in FPU */
931}
932
1c79356b 933/*
0c530ab8
A
934 * SSE arithmetic exception handling code.
935 * Basically the same as the x87 exception handler with a different subtype
1c79356b
A
936 */
937
938void
0c530ab8 939fpSSEexterrflt(void)
1c79356b 940{
0c530ab8 941 thread_t thr_act = current_thread();
6d2010ae 942 struct x86_fx_thread_state *ifps = thr_act->machine.ifps;
0c530ab8 943 boolean_t intr;
4452a7af 944
0c530ab8
A
945 intr = ml_set_interrupts_enabled(FALSE);
946
947 if (get_interrupt_level())
948 panic("SSE exception at interrupt context\n");
949 if (current_task() == kernel_task)
950 panic("SSE exception in kernel thread context\n");
1c79356b
A
951
952 /*
0c530ab8 953 * Save the FPU state and turn off the FPU.
1c79356b 954 */
1c79356b 955 fp_save(thr_act);
1c79356b 956
0c530ab8 957 (void)ml_set_interrupts_enabled(intr);
1c79356b 958 /*
0c530ab8
A
959 * Raise FPU exception.
960 * Locking not needed on pcb->ifps,
961 * since thread is running.
1c79356b 962 */
fe8ab488 963
0c530ab8
A
964 i386_exception(EXC_ARITHMETIC,
965 EXC_I386_SSEEXTERR,
060df5ea 966 ifps->fx_MXCSR);
0c530ab8
A
967 /*NOTREACHED*/
968}
969
0c530ab8
A
970void
971fp_setvalid(boolean_t value) {
972 thread_t thr_act = current_thread();
6d2010ae 973 struct x86_fx_thread_state *ifps = thr_act->machine.ifps;
0c530ab8
A
974
975 if (ifps) {
976 ifps->fp_valid = value;
977
060df5ea
A
978 if (value == TRUE) {
979 boolean_t istate = ml_set_interrupts_enabled(FALSE);
0c530ab8 980 clear_fpu();
060df5ea
A
981 ml_set_interrupts_enabled(istate);
982 }
0c530ab8 983 }
1c79356b 984}
060df5ea 985
316670eb 986boolean_t
060df5ea
A
987ml_fpu_avx_enabled(void) {
988 return (fpu_YMM_present == TRUE);
989}