- if (is_boot_cpu) {
- /*
- * Master CPU uses the tables built at boot time.
- * Just set the TSS and GDT pointers.
- */
- cdt->cdi_ktss = &ktss;
-#if MACH_KDB
- cdt->cdi_dbtss = &dbtss;
-#endif /* MACH_KDB */
- cdt->cdi_gdt = gdt;
- cdt->cdi_idt = idt;
- cdt->cdi_ldt = ldt;
-
- } else {
-
- cdt->cdi_ktss = &mpt->ktss;
- cdt->cdi_gdt = mpt->gdt;
- cdt->cdi_idt = mpt->idt;
- cdt->cdi_ldt = mpt->ldt;
-
- /*
- * Copy the tables
- */
- bcopy((char *)idt,
- (char *)mpt->idt,
- sizeof(idt));
- bcopy((char *)gdt,
- (char *)mpt->gdt,
- sizeof(gdt));
- bcopy((char *)ldt,
- (char *)mpt->ldt,
- sizeof(ldt));
- bzero((char *)&mpt->ktss,
- sizeof(struct i386_tss));
-
-#if MACH_KDB
- cdt->cdi_dbtss = &dbtss;
- bcopy((char *)&dbtss,
- (char *)&mpt->dbtss,
- sizeof(struct i386_tss));
-#endif /* MACH_KDB */
-
- /*
- * Fix up the entries in the GDT to point to
- * this LDT and this TSS.
- */
- mpt->gdt[sel_idx(KERNEL_LDT)] = ldt_desc_pattern;
- mpt->gdt[sel_idx(KERNEL_LDT)].offset = (vm_offset_t) mpt->ldt;
- fix_desc(&mpt->gdt[sel_idx(KERNEL_LDT)], 1);
-
- mpt->gdt[sel_idx(KERNEL_TSS)] = tss_desc_pattern;
- mpt->gdt[sel_idx(KERNEL_TSS)].offset = (vm_offset_t) &mpt->ktss;
- fix_desc(&mpt->gdt[sel_idx(KERNEL_TSS)], 1);
-
- mpt->gdt[sel_idx(CPU_DATA_GS)] = cpudata_desc_pattern;
- mpt->gdt[sel_idx(CPU_DATA_GS)].offset = (vm_offset_t) cdp;
- fix_desc(&mpt->gdt[sel_idx(CPU_DATA_GS)], 1);
-
-#if MACH_KDB
- mpt->gdt[sel_idx(DEBUG_TSS)] = tss_desc_pattern;
- mpt->gdt[sel_idx(DEBUG_TSS)].offset = (vm_offset_t) &mpt->dbtss;
- fix_desc(&mpt->gdt[sel_idx(DEBUG_TSS)], 1);
-
- mpt->dbtss.esp0 = (int)(db_task_stack_store +
- (INTSTACK_SIZE * (cpu + 1)) - sizeof (natural_t));
- mpt->dbtss.esp = mpt->dbtss.esp0;
- mpt->dbtss.eip = (int)&db_task_start;
-#endif /* MACH_KDB */
-
- mpt->ktss.ss0 = KERNEL_DS;
- mpt->ktss.io_bit_map_offset = 0x0FFF; /* no IO bitmap */
+ /*
+ * Now copy back over the fake structure.
+ */
+ bcopy((void *) &real, (void *) fakep, sizeof(real));
+ }
+}
+
+extern unsigned mldtsz;
+void
+cpu_desc_init(cpu_data_t *cdp)
+{
+ cpu_desc_index_t *cdi = &cdp->cpu_desc_index;
+
+ if (cdp == cpu_data_master) {
+ /*
+ * Populate the double-mapped 'u' and base 'b' fields in the
+ * KTSS with I/G/LDT and sysenter stack data.
+ */
+ cdi->cdi_ktssu = (void *)DBLMAP(&master_ktss64);
+ cdi->cdi_ktssb = (void *)&master_ktss64;
+ cdi->cdi_sstku = (vm_offset_t) DBLMAP(&master_sstk.top);
+ cdi->cdi_sstkb = (vm_offset_t) &master_sstk.top;
+
+ cdi->cdi_gdtu.ptr = (void *)DBLMAP((uintptr_t) &master_gdt);
+ cdi->cdi_gdtb.ptr = (void *)&master_gdt;
+ cdi->cdi_idtu.ptr = (void *)DBLMAP((uintptr_t) &master_idt64);
+ cdi->cdi_idtb.ptr = (void *)((uintptr_t) &master_idt64);
+ cdi->cdi_ldtu = (struct real_descriptor *)DBLMAP((uintptr_t)&master_ldt[0]);
+ cdi->cdi_ldtb = &master_ldt[0];
+
+ /* Replace the expanded LDTs and TSS slots in the GDT */
+ kernel_ldt_desc64.offset64 = (uintptr_t) cdi->cdi_ldtu;
+ *(struct fake_descriptor64 *) &master_gdt[sel_idx(KERNEL_LDT)] =
+ kernel_ldt_desc64;
+ *(struct fake_descriptor64 *) &master_gdt[sel_idx(USER_LDT)] =
+ kernel_ldt_desc64;
+ kernel_tss_desc64.offset64 = (uintptr_t) DBLMAP(&master_ktss64);
+ *(struct fake_descriptor64 *) &master_gdt[sel_idx(KERNEL_TSS)] =
+ kernel_tss_desc64;
+
+ /* Fix up the expanded descriptors for 64-bit. */
+ fix_desc64((void *) &master_idt64, IDTSZ);
+ fix_desc64((void *) &master_gdt[sel_idx(KERNEL_LDT)], 1);
+ fix_desc64((void *) &master_gdt[sel_idx(USER_LDT)], 1);
+ fix_desc64((void *) &master_gdt[sel_idx(KERNEL_TSS)], 1);
+
+ /*
+ * Set the NMI/fault stacks as IST2/IST1 in the 64-bit TSS
+ */
+ master_ktss64.ist2 = (uintptr_t) low_eintstack;
+ master_ktss64.ist1 = (uintptr_t) low_eintstack - sizeof(x86_64_intr_stack_frame_t);
+ } else if (cdi->cdi_ktssu == NULL) { /* Skipping re-init on wake */
+ cpu_desc_table64_t *cdt = (cpu_desc_table64_t *) cdp->cpu_desc_tablep;
+
+ cdi->cdi_idtu.ptr = (void *)DBLMAP((uintptr_t) &master_idt64);
+
+ cdi->cdi_ktssu = (void *)DBLMAP(&cdt->ktss);
+ cdi->cdi_ktssb = (void *)(&cdt->ktss);
+ cdi->cdi_sstku = (vm_offset_t)DBLMAP(&cdt->sstk.top);
+ cdi->cdi_sstkb = (vm_offset_t)(&cdt->sstk.top);
+ cdi->cdi_ldtu = (void *)LDTALIAS(cdp->cpu_ldtp);
+ cdi->cdi_ldtb = (void *)(cdp->cpu_ldtp);
+
+ /*
+ * Copy the tables
+ */
+ bcopy((char *)master_gdt, (char *)cdt->gdt, sizeof(master_gdt));
+ bcopy((char *)master_ldt, (char *)cdp->cpu_ldtp, mldtsz);
+ bcopy((char *)&master_ktss64, (char *)&cdt->ktss, sizeof(struct x86_64_tss));
+ cdi->cdi_gdtu.ptr = (void *)DBLMAP(cdt->gdt);
+ cdi->cdi_gdtb.ptr = (void *)(cdt->gdt);
+ /*
+ * Fix up the entries in the GDT to point to
+ * this LDT and this TSS.
+ * Note reuse of global 'kernel_ldt_desc64, which is not
+ * concurrency-safe. Higher level synchronization is expected
+ */
+ kernel_ldt_desc64.offset64 = (uintptr_t) cdi->cdi_ldtu;
+ *(struct fake_descriptor64 *) &cdt->gdt[sel_idx(KERNEL_LDT)] =
+ kernel_ldt_desc64;
+ fix_desc64(&cdt->gdt[sel_idx(KERNEL_LDT)], 1);
+
+ kernel_ldt_desc64.offset64 = (uintptr_t) cdi->cdi_ldtu;
+ *(struct fake_descriptor64 *) &cdt->gdt[sel_idx(USER_LDT)] =
+ kernel_ldt_desc64;
+ fix_desc64(&cdt->gdt[sel_idx(USER_LDT)], 1);
+
+ kernel_tss_desc64.offset64 = (uintptr_t) cdi->cdi_ktssu;
+ *(struct fake_descriptor64 *) &cdt->gdt[sel_idx(KERNEL_TSS)] =
+ kernel_tss_desc64;
+ fix_desc64(&cdt->gdt[sel_idx(KERNEL_TSS)], 1);
+
+ /* Set (zeroed) fault stack as IST1, NMI intr stack IST2 */
+ uint8_t *cfstk = &scfstks[cdp->cpu_number].fstk[0];
+ cdt->fstkp = cfstk;
+ bzero((void *) cfstk, FSTK_SZ);
+ cdt->ktss.ist2 = DBLMAP((uint64_t)cdt->fstkp + FSTK_SZ);
+ cdt->ktss.ist1 = cdt->ktss.ist2 - sizeof(x86_64_intr_stack_frame_t);
+ }
+
+ /* Require that the top of the sysenter stack is 16-byte aligned */
+ if ((cdi->cdi_sstku % 16) != 0) {
+ panic("cpu_desc_init() sysenter stack not 16-byte aligned");
+ }
+}
+void
+cpu_desc_load(cpu_data_t *cdp)
+{
+ cpu_desc_index_t *cdi = &cdp->cpu_desc_index;
+
+ postcode(CPU_DESC_LOAD_ENTRY);
+
+ /* Stuff the kernel per-cpu data area address into the MSRs */
+ postcode(CPU_DESC_LOAD_GS_BASE);
+ wrmsr64(MSR_IA32_GS_BASE, (uintptr_t) cdp);
+ postcode(CPU_DESC_LOAD_KERNEL_GS_BASE);
+ wrmsr64(MSR_IA32_KERNEL_GS_BASE, (uintptr_t) cdp);
+
+ /*
+ * Ensure the TSS segment's busy bit is clear. This is required
+ * for the case of reloading descriptors at wake to avoid
+ * their complete re-initialization.
+ */
+ gdt_desc_p(KERNEL_TSS)->access &= ~ACC_TSS_BUSY;
+
+ /* Load the GDT, LDT, IDT and TSS */
+ cdi->cdi_gdtb.size = sizeof(struct real_descriptor) * GDTSZ - 1;
+ cdi->cdi_gdtu.size = cdi->cdi_gdtb.size;
+ cdi->cdi_idtb.size = 0x1000 + cdp->cpu_number;
+ cdi->cdi_idtu.size = cdi->cdi_idtb.size;
+
+ postcode(CPU_DESC_LOAD_GDT);
+ lgdt((uintptr_t *) &cdi->cdi_gdtu);
+ postcode(CPU_DESC_LOAD_IDT);
+ lidt((uintptr_t *) &cdi->cdi_idtu);
+ postcode(CPU_DESC_LOAD_LDT);
+ lldt(KERNEL_LDT);
+ postcode(CPU_DESC_LOAD_TSS);
+ set_tr(KERNEL_TSS);
+
+ postcode(CPU_DESC_LOAD_EXIT);
+}
+
+/*
+ * Set MSRs for sysenter/sysexit and syscall/sysret for 64-bit.
+ */
+void
+cpu_syscall_init(cpu_data_t *cdp)
+{
+#pragma unused(cdp)
+
+ wrmsr64(MSR_IA32_SYSENTER_CS, SYSENTER_CS);
+ wrmsr64(MSR_IA32_SYSENTER_EIP, DBLMAP((uintptr_t) hi64_sysenter));
+ wrmsr64(MSR_IA32_SYSENTER_ESP, current_cpu_datap()->cpu_desc_index.cdi_sstku);
+ /* Enable syscall/sysret */
+ wrmsr64(MSR_IA32_EFER, rdmsr64(MSR_IA32_EFER) | MSR_IA32_EFER_SCE);
+
+ /*
+ * MSRs for 64-bit syscall/sysret
+ * Note USER_CS because sysret uses this + 16 when returning to
+ * 64-bit code.
+ */
+ wrmsr64(MSR_IA32_LSTAR, DBLMAP((uintptr_t) hi64_syscall));
+ wrmsr64(MSR_IA32_STAR, (((uint64_t)USER_CS) << 48) | (((uint64_t)KERNEL64_CS) << 32));
+ /*
+ * Emulate eflags cleared by sysenter but note that
+ * we also clear the trace trap to avoid the complications
+ * of single-stepping into a syscall. The nested task bit
+ * is also cleared to avoid a spurious "task switch"
+ * should we choose to return via an IRET.
+ */
+ wrmsr64(MSR_IA32_FMASK, EFL_DF | EFL_IF | EFL_TF | EFL_NT);
+}
+extern vm_offset_t dyn_dblmap(vm_offset_t, vm_offset_t);
+uint64_t ldt_alias_offset;
+
+__startup_func
+static void
+cpu_data_startup_init(void)
+{
+ int flags = KMA_GUARD_FIRST | KMA_GUARD_LAST | KMA_PERMANENT |
+ KMA_ZERO | KMA_KOBJECT;
+ uint32_t cpus = max_cpus_from_firmware;
+ vm_size_t size = percpu_section_size() * cpus;
+ kern_return_t kr;
+
+ percpu_base.size = percpu_section_size();
+ if (cpus == 0) {
+ panic("percpu: max_cpus_from_firmware not yet initialized");
+ }
+ if (cpus == 1) {
+ percpu_base.start = VM_MAX_KERNEL_ADDRESS;
+ return;