+
+ fpu_capability = fpu_default = FP;
+
+ PE_parse_boot_argn("fpsimd_fault_popc", &fpsimd_fault_popc, sizeof(fpsimd_fault_popc));
+
+#if !defined(RC_HIDE_XNU_J137)
+ static boolean_t is_avx512_enabled = TRUE;
+ if (cpu_number() == master_cpu) {
+ if (cpuid_leaf7_features() & CPUID_LEAF7_FEATURE_AVX512F) {
+ PE_parse_boot_argn("avx512", &is_avx512_enabled, sizeof(boolean_t));
+ kprintf("AVX512 supported %s\n",
+ is_avx512_enabled ? "and enabled" : "but disabled");
+ }
+ }
+#endif
+
+ /* Configure the XSAVE context mechanism if the processor supports
+ * AVX/YMM registers
+ */
+ if (cpuid_features() & CPUID_FEATURE_XSAVE) {
+ cpuid_xsave_leaf_t *xs0p = &cpuid_info()->cpuid_xsave_leaf[0];
+#if !defined(RC_HIDE_XNU_J137)
+ if (is_avx512_enabled &&
+ (xs0p->extended_state[eax] & XFEM_ZMM) == XFEM_ZMM) {
+ assert(xs0p->extended_state[eax] & XFEM_SSE);
+ assert(xs0p->extended_state[eax] & XFEM_YMM);
+ fpu_capability = AVX512;
+ /* XSAVE container size for all features */
+ set_cr4(get_cr4() | CR4_OSXSAVE);
+ xsetbv(0, AVX512_XMASK);
+ /* Re-evaluate CPUID, once, to reflect OSXSAVE */
+ if (OSCompareAndSwap(0, 1, &cpuid_reevaluated)) {
+ cpuid_set_info();
+ }
+ /* Verify that now selected state can be accommodated */
+ assert(xs0p->extended_state[ebx] == fp_state_size[AVX512]);
+ /*
+ * AVX set until AVX512 is used.
+ * See comment above about on-demand AVX512 support.
+ */
+ xsetbv(0, AVX_XMASK);
+ fpu_default = AVX;
+ } else
+#endif
+ if (xs0p->extended_state[eax] & XFEM_YMM) {
+ assert(xs0p->extended_state[eax] & XFEM_SSE);
+ fpu_capability = AVX;
+ fpu_default = AVX;
+ /* XSAVE container size for all features */
+ set_cr4(get_cr4() | CR4_OSXSAVE);
+ xsetbv(0, AVX_XMASK);
+ /* Re-evaluate CPUID, once, to reflect OSXSAVE */
+ if (OSCompareAndSwap(0, 1, &cpuid_reevaluated)) {
+ cpuid_set_info();
+ }
+ /* Verify that now selected state can be accommodated */
+ assert(xs0p->extended_state[ebx] == fp_state_size[AVX]);
+ }
+ }
+
+ if (cpu_number() == master_cpu) {
+ kprintf("fpu_state: %s, state_size: %d\n",
+ xstate_name[fpu_capability],
+ fp_state_size[fpu_capability]);
+ }
+
+ fpinit();
+ current_cpu_datap()->cpu_xstate = fpu_default;
+
+ /*
+ * Trap wait instructions. Turn off FPU for now.
+ */
+ set_cr0(get_cr0() | CR0_TS | CR0_MP);
+}
+
+/*
+ * Allocate and initialize FP state for specified xstate.
+ * Don't load state.
+ */
+static void *
+fp_state_alloc(xstate_t xs)
+{
+ struct x86_fx_thread_state *ifps;
+
+ assert(ifps_zone[xs] != NULL);
+ ifps = zalloc(ifps_zone[xs]);
+
+#if DEBUG
+ if (!(ALIGNED(ifps, 64))) {
+ panic("fp_state_alloc: %p, %u, %p, %u",
+ ifps, (unsigned) ifps_zone[xs]->elem_size,
+ (void *) ifps_zone[xs]->free_elements,
+ (unsigned) ifps_zone[xs]->alloc_size);
+ }
+#endif
+ bzero(ifps, fp_state_size[xs]);
+
+ return ifps;
+}
+
+static inline void
+fp_state_free(void *ifps, xstate_t xs)
+{
+ assert(ifps_zone[xs] != NULL);
+ zfree(ifps_zone[xs], ifps);
+}
+
+void
+clear_fpu(void)
+{
+ set_ts();
+}
+
+
+static void
+fpu_load_registers(void *fstate)
+{
+ struct x86_fx_thread_state *ifps = fstate;
+ fp_save_layout_t layout = ifps->fp_save_layout;
+
+ assert(current_task() == NULL || \
+ (thread_is_64bit_addr(current_thread()) ? \
+ (layout == FXSAVE64 || layout == XSAVE64) : \
+ (layout == FXSAVE32 || layout == XSAVE32)));
+ assert(ALIGNED(ifps, 64));
+ assert(ml_get_interrupts_enabled() == FALSE);
+
+#if DEBUG
+ if (layout == XSAVE32 || layout == XSAVE64) {
+ struct x86_avx_thread_state *iavx = fstate;
+ unsigned i;
+ /* Verify reserved bits in the XSAVE header*/
+ if (iavx->_xh.xstate_bv & ~xstate_xmask[current_xstate()]) {
+ panic("iavx->_xh.xstate_bv: 0x%llx", iavx->_xh.xstate_bv);
+ }
+ for (i = 0; i < sizeof(iavx->_xh.xhrsvd); i++) {
+ if (iavx->_xh.xhrsvd[i]) {
+ panic("Reserved bit set");
+ }
+ }
+ }
+ if (fpu_YMM_capable) {
+ if (layout != XSAVE32 && layout != XSAVE64) {
+ panic("Inappropriate layout: %u\n", layout);
+ }
+ }
+#endif /* DEBUG */
+
+ switch (layout) {
+ case FXSAVE64:
+ fxrstor64(ifps);
+ break;
+ case FXSAVE32:
+ fxrstor(ifps);
+ break;
+ case XSAVE64:
+ xrstor64(ifps, xstate_xmask[current_xstate()]);
+ break;
+ case XSAVE32:
+ xrstor(ifps, xstate_xmask[current_xstate()]);
+ break;
+ default:
+ panic("fpu_load_registers() bad layout: %d\n", layout);
+ }
+}
+
+static void
+fpu_store_registers(void *fstate, boolean_t is64)
+{
+ struct x86_fx_thread_state *ifps = fstate;
+ assert(ALIGNED(ifps, 64));
+ xstate_t xs = current_xstate();
+ switch (xs) {
+ case FP:
+ if (is64) {
+ fxsave64(fstate);
+ ifps->fp_save_layout = FXSAVE64;
+ } else {
+ fxsave(fstate);
+ ifps->fp_save_layout = FXSAVE32;
+ }
+ break;
+ case AVX:
+#if !defined(RC_HIDE_XNU_J137)
+ case AVX512:
+#endif
+ if (is64) {
+ xsave64(ifps, xstate_xmask[xs]);
+ ifps->fp_save_layout = XSAVE64;
+ } else {
+ xsave(ifps, xstate_xmask[xs]);
+ ifps->fp_save_layout = XSAVE32;
+ }
+ break;
+ default:
+ panic("fpu_store_registers() bad xstate: %d\n", xs);
+ }