X-Git-Url: https://git.saurik.com/apple/xnu.git/blobdiff_plain/94ff46dc2849db4d43eaaf144872decc522aafb4..refs/heads/master:/osfmk/i386/i386_init.c diff --git a/osfmk/i386/i386_init.c b/osfmk/i386/i386_init.c index aef78bcbd..ad1f15c1f 100644 --- a/osfmk/i386/i386_init.c +++ b/osfmk/i386/i386_init.c @@ -99,9 +99,14 @@ #include #include #include /* LcksOpts */ +#include #if DEBUG #include #endif +extern void xcpm_bootstrap(void); +#if DEVELOPMENT || DEBUG +#include +#endif #if MONOTONIC #include @@ -109,16 +114,23 @@ #include -#if DEBUG -#define DBG(x ...) kprintf(x) +#if DEBUG || DEVELOPMENT +#define DBG(x, ...) kprintf(x, ##__VA_ARGS__) +#define dyldLogFunc(x, ...) kprintf(x, ##__VA_ARGS__) #else #define DBG(x ...) #endif +#include +#include + + int debug_task; int early_boot = 1; +bool serial_console_enabled = false; + static boot_args *kernelBootArgs; extern int disableConsoleOutput; @@ -148,6 +160,19 @@ int kernPhysPML4EntryCount; ppnum_t released_PT_ppn = 0; uint32_t released_PT_cnt = 0; +#if DEVELOPMENT || DEBUG +int panic_on_cacheline_mismatch = -1; +char panic_on_trap_procname[64]; +uint32_t panic_on_trap_mask; +#endif +bool last_branch_support_enabled; +int insn_copyin_count; +#if DEVELOPMENT || DEBUG +#define DEFAULT_INSN_COPYIN_COUNT x86_INSTRUCTION_STATE_MAX_INSN_BYTES +#else +#define DEFAULT_INSN_COPYIN_COUNT 192 +#endif + char *physfree; void idt64_remap(void); @@ -259,7 +284,7 @@ physmap_init_L3(int startIndex, uint64_t highest_phys, uint64_t *physStart, pt_e } static void -physmap_init(uint8_t phys_random_L3) +physmap_init(uint8_t phys_random_L3, uint64_t *new_physmap_base, uint64_t *new_physmap_max) { pt_entry_t *l3pte; int pml4_index, i; @@ -341,14 +366,14 @@ physmap_init(uint8_t phys_random_L3) | INTEL_PTE_WRITE; } - physmap_base = KVADDR(kernPhysPML4Index, phys_random_L3, 0, 0); + *new_physmap_base = KVADDR(kernPhysPML4Index, phys_random_L3, 0, 0); /* * physAddr contains the last-mapped physical address, so that's what we * add to physmap_base to derive the ending VA for the physmap. */ - physmap_max = physmap_base + physAddr; + *new_physmap_max = *new_physmap_base + physAddr; - DBG("Physical address map base: 0x%qx\n", physmap_base); + DBG("Physical address map base: 0x%qx\n", *new_physmap_base); for (i = kernPhysPML4Index; i < (kernPhysPML4Index + kernPhysPML4EntryCount); i++) { DBG("Physical map idlepml4[%d]: 0x%llx\n", i, IdlePML4[i]); } @@ -360,6 +385,7 @@ static void Idle_PTs_init(void) { uint64_t rand64; + uint64_t new_physmap_base, new_physmap_max; /* Allocate the "idle" kernel page tables: */ KPTphys = ALLOCPAGES(NKPT); /* level 1 */ @@ -391,13 +417,22 @@ Idle_PTs_init(void) * two 8-bit entropy values needed for address randomization. */ rand64 = early_random(); - physmap_init(rand64 & 0xFF); + physmap_init(rand64 & 0xFF, &new_physmap_base, &new_physmap_max); doublemap_init((rand64 >> 8) & 0xFF); idt64_remap(); postcode(VSTART_SET_CR3); - // Switch to the page tables.. + /* + * Switch to the page tables. We set physmap_base and physmap_max just + * before switching to the new page tables to avoid someone calling + * kprintf() or otherwise using physical memory in between. + * This is needed because kprintf() writes to physical memory using + * ml_phys_read_data and PHYSMAP_PTOV, which requires physmap_base to be + * set correctly. + */ + physmap_base = new_physmap_base; + physmap_max = new_physmap_max; set_cr3_raw((uintptr_t)ID_MAP_VTOP(IdlePML4)); } @@ -536,17 +571,105 @@ __attribute__((aligned(PAGE_SIZE))) = { }; static void -vstart_idt_init(void) +vstart_idt_init(boolean_t master) { x86_64_desc_register_t vstart_idt = { sizeof(master_boot_idt64), master_boot_idt64 }; - fix_desc64(master_boot_idt64, 32); + if (master) { + fix_desc64(master_boot_idt64, 32); + } lidt((void *)&vstart_idt); } +extern void *collection_base_pointers[KCNumKinds]; + +kern_return_t +i386_slide_individual_kext(kernel_mach_header_t *mh, uintptr_t slide) +{ + int ret = kernel_collection_slide(mh, (const void **) (void *)collection_base_pointers); + if (ret != 0) { + printf("Sliding pageable kc was stopped\n"); + return KERN_FAILURE; + } + + kernel_collection_adjust_fileset_entry_addrs(mh, slide); + return KERN_SUCCESS; +} + +kern_return_t +i386_slide_kext_collection_mh_addrs(kernel_mach_header_t *mh, uintptr_t slide, bool adjust_mach_headers) +{ + int ret = kernel_collection_slide(mh, (const void **) (void *)collection_base_pointers); + if (ret != KERN_SUCCESS) { + printf("Kernel Collection slide was stopped with value %d\n", ret); + return KERN_FAILURE; + } + + kernel_collection_adjust_mh_addrs(mh, slide, adjust_mach_headers, + NULL, NULL, NULL, NULL, NULL, NULL, NULL); + + return KERN_SUCCESS; +} + +static void +i386_slide_and_rebase_image(uintptr_t kstart_addr) +{ + extern uintptr_t kc_highest_nonlinkedit_vmaddr; + kernel_mach_header_t *k_mh, *kc_mh = NULL; + kernel_segment_command_t *seg; + uintptr_t slide; + + k_mh = &_mh_execute_header; + /* + * If we're not booting, an MH_FILESET, we don't need to slide + * anything because EFI has done that for us. When booting an + * MH_FILESET, EFI will slide the kernel proper, but not the kexts. + * Below, we infer the slide by comparing the slid address of the + * kernel's mach-o header and the unslid vmaddr of the first segment + * of the mach-o (which is assumed to always point to the mach-o + * header). + */ + if (!kernel_mach_header_is_in_fileset(k_mh)) { + DBG("[MH] kcgen-style KC\n"); + return; + } + + /* + * The kernel is part of a MH_FILESET kernel collection: determine slide + * based on first segment's mach-o vmaddr. + */ + seg = (kernel_segment_command_t *)((uintptr_t)k_mh + sizeof(*k_mh)); + assert(seg->cmd == LC_SEGMENT_KERNEL); + slide = (uintptr_t)k_mh - seg->vmaddr; + DBG("[MH] Sliding new-style KC: %llu\n", (unsigned long long)slide); + + /* + * The kernel collection mach-o header should be the start address + * passed to us by EFI. + */ + kc_mh = (kernel_mach_header_t *)(kstart_addr); + assert(kc_mh->filetype == MH_FILESET); + + PE_set_kc_header(KCKindPrimary, kc_mh, slide); + + /* + * rebase/slide all the kexts in the collection + * (EFI should have already rebased the kernel) + */ + kernel_collection_slide(kc_mh, (const void **) (void *)collection_base_pointers); + + + /* + * Now adjust the vmaddr fields of all mach-o headers + * and symbols in this MH_FILESET + */ + kernel_collection_adjust_mh_addrs(kc_mh, slide, false, + NULL, NULL, NULL, NULL, NULL, NULL, &kc_highest_nonlinkedit_vmaddr); +} + /* * vstart() is called in the natural mode (64bit for K64, 32 for K32) * on a set of bootstrap pagetables which use large, 2MB pages to map @@ -569,14 +692,19 @@ vstart(vm_offset_t boot_args_start) boolean_t is_boot_cpu = !(boot_args_start == 0); int cpu = 0; uint32_t lphysfree; +#if DEBUG + uint64_t gsbase; +#endif + postcode(VSTART_ENTRY); + /* + * Set-up temporary trap handlers during page-table set-up. + */ + if (is_boot_cpu) { - /* - * Set-up temporary trap handlers during page-table set-up. - */ - vstart_idt_init(); + vstart_idt_init(TRUE); postcode(VSTART_IDT_INIT); /* @@ -607,6 +735,18 @@ vstart(vm_offset_t boot_args_start) &kernelBootArgs->ksize, &kernelBootArgs->kaddr); DBG("SMBIOS mem sz 0x%llx\n", kernelBootArgs->PhysicalMemorySize); + DBG("KC_hdrs_vaddr %p\n", (void *)kernelBootArgs->KC_hdrs_vaddr); + + if (kernelBootArgs->Version >= 2 && kernelBootArgs->Revision >= 1 && + kernelBootArgs->KC_hdrs_vaddr != 0) { + /* + * slide the header addresses in all mach-o segments and sections, and + * perform any new-style chained-fixup sliding for kexts, as necessary. + * Note that efiboot has already loaded the kernel and all LC_SEGMENT_64s + * that correspond to the kexts present in the primary KC, into slid addresses. + */ + i386_slide_and_rebase_image((uintptr_t)ml_static_ptovirt(kernelBootArgs->KC_hdrs_vaddr)); + } /* * Setup boot args given the physical start address. @@ -653,12 +793,28 @@ vstart(vm_offset_t boot_args_start) * via i386_init_slave() */ } else { + /* Slave CPUs should use the basic IDT until i386_init_slave() */ + vstart_idt_init(FALSE); + /* Switch to kernel's page tables (from the Boot PTs) */ set_cr3_raw((uintptr_t)ID_MAP_VTOP(IdlePML4)); + /* Find our logical cpu number */ - cpu = lapic_to_cpu[(LAPIC_READ(ID) >> LAPIC_ID_SHIFT) & LAPIC_ID_MASK]; - DBG("CPU: %d, GSBASE initial value: 0x%llx\n", cpu, rdmsr64(MSR_IA32_GS_BASE)); + cpu = lapic_to_cpu[lapic_safe_apicid()]; +#if DEBUG + gsbase = rdmsr64(MSR_IA32_GS_BASE); +#endif cpu_desc_load(cpu_datap(cpu)); +#if DEBUG + DBG("CPU: %d, GSBASE initial value: 0x%llx\n", cpu, (unsigned long long)gsbase); +#endif + + /* + * Before we can discover our local APIC ID, we need to potentially + * initialize X2APIC, if it's enabled and firmware started us with + * the APIC in legacy mode. + */ + lapic_init_slave(); } early_boot = 0; @@ -690,7 +846,7 @@ i386_init(void) pal_i386_init(); tsc_init(); - rtclock_early_init(); /* mach_absolute_time() now functionsl */ + rtclock_early_init(); /* mach_absolute_time() now functional */ kernel_debug_string_early("i386_init"); pstate_trace(); @@ -702,7 +858,8 @@ i386_init(void) master_cpu = 0; - lck_mod_init(); + kernel_debug_string_early("kernel_startup_bootstrap"); + kernel_startup_bootstrap(); /* * Initialize the timer callout world @@ -713,20 +870,54 @@ i386_init(void) postcode(CPU_INIT_D); - printf_init(); /* Init this in case we need debugger */ - panic_init(); /* Init this in case we need debugger */ - /* setup debugging output if one has been chosen */ - kernel_debug_string_early("PE_init_kprintf"); - PE_init_kprintf(FALSE); - - kernel_debug_string_early("kernel_early_bootstrap"); - kernel_early_bootstrap(); + kernel_startup_initialize_upto(STARTUP_SUB_KPRINTF); + kprintf("kprintf initialized\n"); if (!PE_parse_boot_argn("diag", &dgWork.dgFlags, sizeof(dgWork.dgFlags))) { dgWork.dgFlags = 0; } + if (PE_parse_boot_argn("insn_capcnt", &insn_copyin_count, sizeof(insn_copyin_count))) { + /* + * Enforce max and min values (allowing 0 to disable copying completely) + * for the instruction copyin count + */ + if (insn_copyin_count > x86_INSTRUCTION_STATE_MAX_INSN_BYTES || + (insn_copyin_count != 0 && insn_copyin_count < 64)) { + insn_copyin_count = DEFAULT_INSN_COPYIN_COUNT; + } + } else { + insn_copyin_count = DEFAULT_INSN_COPYIN_COUNT; + } + +#if DEVELOPMENT || DEBUG + if (!PE_parse_boot_argn("panic_clmismatch", &panic_on_cacheline_mismatch, + sizeof(panic_on_cacheline_mismatch))) { + panic_on_cacheline_mismatch = 0; + } + + if (!PE_parse_boot_argn("panic_on_trap_procname", &panic_on_trap_procname[0], + sizeof(panic_on_trap_procname))) { + panic_on_trap_procname[0] = 0; + } + + if (!PE_parse_boot_argn("panic_on_trap_mask", &panic_on_trap_mask, + sizeof(panic_on_trap_mask))) { + if (panic_on_trap_procname[0] != 0) { + panic_on_trap_mask = DEFAULT_PANIC_ON_TRAP_MASK; + } else { + panic_on_trap_mask = 0; + } + } +#endif + /* But allow that to be overridden via boot-arg: */ + if (!PE_parse_boot_argn("lbr_support", &last_branch_support_enabled, + sizeof(last_branch_support_enabled))) { + /* Disable LBR support by default due to its high context switch overhead */ + last_branch_support_enabled = false; + } + serialmode = 0; if (PE_parse_boot_argn("serial", &serialmode, sizeof(serialmode))) { /* We want a serial keyboard and/or console */ @@ -743,6 +934,7 @@ i386_init(void) } } if (serialmode & SERIALMODE_OUTPUT) { + serial_console_enabled = true; (void)switch_to_serial_console(); disableConsoleOutput = FALSE; /* Allow printfs to happen */ } @@ -760,6 +952,8 @@ i386_init(void) maxmemtouse = ((uint64_t)maxmem) * MB; } + max_cpus_from_firmware = acpi_count_enabled_logical_processors(); + if (PE_parse_boot_argn("cpus", &cpus, sizeof(cpus))) { if ((0 < cpus) && (cpus < max_ncpus)) { max_ncpus = cpus; @@ -805,13 +999,15 @@ i386_init(void) kernel_debug_string_early("power_management_init"); power_management_init(); + xcpm_bootstrap(); #if MONOTONIC mt_cpu_up(cpu_datap(0)); #endif /* MONOTONIC */ processor_bootstrap(); - thread_bootstrap(); + thread_t thread = thread_bootstrap(); + machine_set_current_thread(thread); pstate_trace(); kernel_debug_string_early("machine_startup"); @@ -843,7 +1039,15 @@ do_init_slave(boolean_t fast_restart) #endif LAPIC_INIT(); - lapic_configure(); + /* + * Note that the true argument here does not necessarily mean we're + * here from a resume (this code path is also executed on boot). + * The implementation of lapic_configure checks to see if the + * state variable has been initialized, as it would be before + * sleep. If it has not been, it's construed as an indicator of + * first boot. + */ + lapic_configure(true); LAPIC_DUMP(); LAPIC_CPU_MAP_DUMP(); @@ -852,8 +1056,11 @@ do_init_slave(boolean_t fast_restart) #if CONFIG_MTRR mtrr_update_cpu(); #endif - /* update CPU microcode */ - ucode_update_wake(); + /* update CPU microcode and apply CPU workarounds */ + ucode_update_wake_and_apply_cpu_was(); + + /* Enable LBRs on non-boot CPUs */ + i386_lbr_init(cpuid_info(), false); } else { init_param = FAST_SLAVE_INIT; } @@ -907,7 +1114,6 @@ i386_init_slave_fast(void) do_init_slave(TRUE); } -#include /* TODO: Evaluate global PTEs for the double-mapped translations */