+/*
+ * Called once VM is fully initialized so that we can release unused
+ * sections of low memory to the general pool.
+ * Also complete the set-up of identity-mapped sections of the kernel:
+ * 1) write-protect kernel text
+ * 2) map kernel text using large pages if possible
+ * 3) read and write-protect page zero (for K32)
+ * 4) map the global page at the appropriate virtual address.
+ *
+ * Use of large pages
+ * ------------------
+ * To effectively map and write-protect all kernel text pages, the text
+ * must be 2M-aligned at the base, and the data section above must also be
+ * 2M-aligned. That is, there's padding below and above. This is achieved
+ * through linker directives. Large pages are used only if this alignment
+ * exists (and not overriden by the -kernel_text_page_4K boot-arg). The
+ * memory layout is:
+ *
+ * : :
+ * | __DATA |
+ * sdata: ================== 2Meg
+ * | |
+ * | zero-padding |
+ * | |
+ * etext: ------------------
+ * | |
+ * : :
+ * | |
+ * | __TEXT |
+ * | |
+ * : :
+ * | |
+ * stext: ================== 2Meg
+ * | |
+ * | zero-padding |
+ * | |
+ * eHIB: ------------------
+ * | __HIB |
+ * : :
+ *
+ * Prior to changing the mapping from 4K to 2M, the zero-padding pages
+ * [eHIB,stext] and [etext,sdata] are ml_static_mfree()'d. Then all the
+ * 4K pages covering [stext,etext] are coalesced as 2M large pages.
+ * The now unused level-1 PTE pages are also freed.
+ */
+extern ppnum_t vm_kernel_base_page;
+static uint32_t dataptes = 0;
+
+void
+pmap_lowmem_finalize(void)
+{
+ spl_t spl;
+ int i;
+
+ /*
+ * Update wired memory statistics for early boot pages
+ */
+ PMAP_ZINFO_PALLOC(kernel_pmap, bootstrap_wired_pages * PAGE_SIZE);
+
+ /*
+ * Free pages in pmap regions below the base:
+ * rdar://6332712
+ * We can't free all the pages to VM that EFI reports available.
+ * Pages in the range 0xc0000-0xff000 aren't safe over sleep/wake.
+ * There's also a size miscalculation here: pend is one page less
+ * than it should be but this is not fixed to be backwards
+ * compatible.
+ * This is important for KASLR because up to 256*2MB = 512MB of space
+ * needs has to be released to VM.
+ */
+ for (i = 0;
+ pmap_memory_regions[i].end < vm_kernel_base_page;
+ i++) {
+ vm_offset_t pbase = i386_ptob(pmap_memory_regions[i].base);
+ vm_offset_t pend = i386_ptob(pmap_memory_regions[i].end + 1);
+
+ DBG("pmap region %d [%p..[%p\n",
+ i, (void *) pbase, (void *) pend);
+
+ if (pmap_memory_regions[i].attribute & EFI_MEMORY_KERN_RESERVED) {
+ continue;
+ }
+ /*
+ * rdar://6332712
+ * Adjust limits not to free pages in range 0xc0000-0xff000.
+ */
+ if (pbase >= 0xc0000 && pend <= 0x100000) {
+ continue;
+ }
+ if (pbase < 0xc0000 && pend > 0x100000) {
+ /* page range entirely within region, free lower part */
+ DBG("- ml_static_mfree(%p,%p)\n",
+ (void *) ml_static_ptovirt(pbase),
+ (void *) (0xc0000 - pbase));
+ ml_static_mfree(ml_static_ptovirt(pbase), 0xc0000 - pbase);
+ pbase = 0x100000;
+ }
+ if (pbase < 0xc0000) {
+ pend = MIN(pend, 0xc0000);
+ }
+ if (pend > 0x100000) {
+ pbase = MAX(pbase, 0x100000);
+ }
+ DBG("- ml_static_mfree(%p,%p)\n",
+ (void *) ml_static_ptovirt(pbase),
+ (void *) (pend - pbase));
+ ml_static_mfree(ml_static_ptovirt(pbase), pend - pbase);
+ }
+
+ /* A final pass to get rid of all initial identity mappings to
+ * low pages.
+ */
+ DPRINTF("%s: Removing mappings from 0->0x%lx\n", __FUNCTION__, vm_kernel_base);
+
+ /*
+ * Remove all mappings past the boot-cpu descriptor aliases and low globals.
+ * Non-boot-cpu GDT aliases will be remapped later as needed.
+ */
+ pmap_remove(kernel_pmap, LOWGLOBAL_ALIAS + PAGE_SIZE, vm_kernel_base);
+
+ /*
+ * Release any memory for early boot 4K page table pages that got replaced
+ * with large page mappings for vm_pages[]. We know this memory is part of
+ * the KPTphys[] array that was allocated in Idle_PTs_init(), so we can free
+ * it using that address.
+ */
+ pmap_free_early_PT(released_PT_ppn, released_PT_cnt);
+
+ /*
+ * If text and data are both 2MB-aligned,
+ * we can map text with large-pages,
+ * unless the -kernel_text_ps_4K boot-arg overrides.
+ */
+ if ((stext & I386_LPGMASK) == 0 && (sdata & I386_LPGMASK) == 0) {
+ kprintf("Kernel text is 2MB aligned");
+ kernel_text_ps_4K = FALSE;
+ if (PE_parse_boot_argn("-kernel_text_ps_4K",
+ &kernel_text_ps_4K,
+ sizeof(kernel_text_ps_4K))) {
+ kprintf(" but will be mapped with 4K pages\n");
+ } else {
+ kprintf(" and will be mapped with 2M pages\n");
+ }
+ }
+#if DEVELOPMENT || DEBUG
+ (void) PE_parse_boot_argn("wpkernel", &wpkernel, sizeof(wpkernel));
+#endif
+ if (wpkernel) {
+ kprintf("Kernel text %p-%p to be write-protected\n",
+ (void *) stext, (void *) etext);
+ }
+
+ spl = splhigh();
+
+ /*
+ * Scan over text if mappings are to be changed:
+ * - Remap kernel text readonly unless the "wpkernel" boot-arg is 0
+ * - Change to large-pages if possible and not overriden.
+ */
+ if (kernel_text_ps_4K && wpkernel) {
+ vm_offset_t myva;
+ for (myva = stext; myva < etext; myva += PAGE_SIZE) {
+ pt_entry_t *ptep;
+
+ ptep = pmap_pte(kernel_pmap, (vm_map_offset_t)myva);
+ if (ptep) {
+ pmap_store_pte(ptep, *ptep & ~INTEL_PTE_WRITE);
+ }
+ }
+ }
+
+ if (!kernel_text_ps_4K) {
+ vm_offset_t myva;
+
+ /*
+ * Release zero-filled page padding used for 2M-alignment.
+ */
+ DBG("ml_static_mfree(%p,%p) for padding below text\n",
+ (void *) eHIB, (void *) (stext - eHIB));
+ ml_static_mfree(eHIB, stext - eHIB);
+ DBG("ml_static_mfree(%p,%p) for padding above text\n",
+ (void *) etext, (void *) (sdata - etext));
+ ml_static_mfree(etext, sdata - etext);
+
+ /*
+ * Coalesce text pages into large pages.
+ */
+ for (myva = stext; myva < sdata; myva += I386_LPGBYTES) {
+ pt_entry_t *ptep;
+ vm_offset_t pte_phys;
+ pt_entry_t *pdep;
+ pt_entry_t pde;
+ ppnum_t KPT_ppn;
+
+ pdep = pmap_pde(kernel_pmap, (vm_map_offset_t)myva);
+ KPT_ppn = (ppnum_t)((*pdep & PG_FRAME) >> PAGE_SHIFT);
+ ptep = pmap_pte(kernel_pmap, (vm_map_offset_t)myva);
+ DBG("myva: %p pdep: %p ptep: %p\n",
+ (void *) myva, (void *) pdep, (void *) ptep);
+ if ((*ptep & INTEL_PTE_VALID) == 0) {
+ continue;
+ }
+ pte_phys = (vm_offset_t)(*ptep & PG_FRAME);
+ pde = *pdep & PTMASK; /* page attributes from pde */
+ pde |= INTEL_PTE_PS; /* make it a 2M entry */
+ pde |= pte_phys; /* take page frame from pte */
+
+ if (wpkernel) {
+ pde &= ~INTEL_PTE_WRITE;
+ }
+ DBG("pmap_store_pte(%p,0x%llx)\n",
+ (void *)pdep, pde);
+ pmap_store_pte(pdep, pde);
+
+ /*
+ * Free the now-unused level-1 pte.
+ */
+ pmap_free_early_PT(KPT_ppn, 1);
+ }
+
+ /* Change variable read by sysctl machdep.pmap */
+ pmap_kernel_text_ps = I386_LPGBYTES;
+ }
+
+ vm_offset_t dva;
+
+ for (dva = sdata; dva < edata; dva += I386_PGBYTES) {
+ assert(((sdata | edata) & PAGE_MASK) == 0);
+ pt_entry_t dpte, *dptep = pmap_pte(kernel_pmap, dva);
+
+ dpte = *dptep;
+ assert((dpte & INTEL_PTE_VALID));
+ dpte |= INTEL_PTE_NX;
+ pmap_store_pte(dptep, dpte);
+ dataptes++;
+ }
+ assert(dataptes > 0);
+
+ kernel_segment_command_t * seg;
+ kernel_section_t * sec;
+
+ for (seg = firstseg(); seg != NULL; seg = nextsegfromheader(&_mh_execute_header, seg)) {
+ if (!strcmp(seg->segname, "__TEXT") ||
+ !strcmp(seg->segname, "__DATA")) {
+ continue;
+ }
+ //XXX
+ if (!strcmp(seg->segname, "__KLD")) {
+ continue;
+ }
+ if (!strcmp(seg->segname, "__HIB")) {
+ for (sec = firstsect(seg); sec != NULL; sec = nextsect(seg, sec)) {
+ if (sec->addr & PAGE_MASK) {
+ panic("__HIB segment's sections misaligned");
+ }
+ if (!strcmp(sec->sectname, "__text")) {
+ pmap_mark_range(kernel_pmap, sec->addr, round_page(sec->size), FALSE, TRUE);
+ } else {
+ pmap_mark_range(kernel_pmap, sec->addr, round_page(sec->size), TRUE, FALSE);
+ }
+ }
+ } else {
+ pmap_mark_range(kernel_pmap, seg->vmaddr, round_page_64(seg->vmsize), TRUE, FALSE);
+ }
+ }
+
+ /*
+ * If we're debugging, map the low global vector page at the fixed
+ * virtual address. Otherwise, remove the mapping for this.
+ */
+ if (debug_boot_arg) {
+ pt_entry_t *pte = NULL;
+ if (0 == (pte = pmap_pte(kernel_pmap, LOWGLOBAL_ALIAS))) {
+ panic("lowmem pte");
+ }
+ /* make sure it is defined on page boundary */
+ assert(0 == ((vm_offset_t) &lowGlo & PAGE_MASK));
+ pmap_store_pte(pte, kvtophys((vm_offset_t)&lowGlo)
+ | INTEL_PTE_REF
+ | INTEL_PTE_MOD
+ | INTEL_PTE_WIRED
+ | INTEL_PTE_VALID
+ | INTEL_PTE_WRITE
+ | INTEL_PTE_NX);
+ } else {
+ pmap_remove(kernel_pmap,
+ LOWGLOBAL_ALIAS, LOWGLOBAL_ALIAS + PAGE_SIZE);
+ }
+ pmap_tlbi_range(0, ~0ULL, true, 0);
+ splx(spl);
+}
+
+/*
+ * Mark the const data segment as read-only, non-executable.
+ */
+void
+x86_64_protect_data_const()
+{
+ boolean_t doconstro = TRUE;
+#if DEVELOPMENT || DEBUG
+ (void) PE_parse_boot_argn("dataconstro", &doconstro, sizeof(doconstro));
+#endif
+ if (doconstro) {
+ if (sconst & PAGE_MASK) {
+ panic("CONST segment misaligned 0x%lx 0x%lx\n",
+ sconst, econst);
+ }
+ kprintf("Marking const DATA read-only\n");
+ pmap_protect(kernel_pmap, sconst, econst, VM_PROT_READ);
+ }
+}
+/*
+ * this function is only used for debugging fron the vm layer
+ */
+boolean_t
+pmap_verify_free(
+ ppnum_t pn)
+{
+ pv_rooted_entry_t pv_h;
+ int pai;
+ boolean_t result;
+
+ assert(pn != vm_page_fictitious_addr);
+
+ if (!pmap_initialized) {
+ return TRUE;
+ }
+
+ if (pn == vm_page_guard_addr) {
+ return TRUE;
+ }
+
+ pai = ppn_to_pai(pn);
+ if (!IS_MANAGED_PAGE(pai)) {
+ return FALSE;
+ }
+ pv_h = pai_to_pvh(pn);
+ result = (pv_h->pmap == PMAP_NULL);
+ return result;
+}
+
+#if MACH_ASSERT
+void
+pmap_assert_free(ppnum_t pn)
+{
+ int pai;
+ pv_rooted_entry_t pv_h = NULL;
+ pmap_t pmap = NULL;
+ vm_offset_t va = 0;
+ static char buffer[32];
+ static char *pr_name = "not managed pn";
+ uint_t attr;
+ pt_entry_t *ptep;
+ pt_entry_t pte = -1ull;
+
+ if (pmap_verify_free(pn)) {
+ return;
+ }
+
+ if (pn > last_managed_page) {
+ attr = 0xff;
+ goto done;
+ }
+
+ pai = ppn_to_pai(pn);
+ attr = pmap_phys_attributes[pai];
+ pv_h = pai_to_pvh(pai);
+ va = pv_h->va_and_flags;
+ pmap = pv_h->pmap;
+ if (pmap == kernel_pmap) {
+ pr_name = "kernel";
+ } else if (pmap == NULL) {
+ pr_name = "pmap NULL";
+ } else if (pmap->pmap_procname[0] != 0) {
+ pr_name = &pmap->pmap_procname[0];
+ } else {
+ snprintf(buffer, sizeof(buffer), "pmap %p", pv_h->pmap);
+ pr_name = buffer;
+ }
+
+ if (pmap != NULL) {
+ ptep = pmap_pte(pmap, va);
+ if (ptep != NULL) {
+ pte = (uintptr_t)*ptep;
+ }
+ }
+
+done:
+ panic("page not FREE page: 0x%lx attr: 0x%x %s va: 0x%lx PTE: 0x%llx",
+ (ulong_t)pn, attr, pr_name, va, pte);
+}
+#endif /* MACH_ASSERT */
+
+boolean_t
+pmap_is_empty(
+ pmap_t pmap,
+ vm_map_offset_t va_start,
+ vm_map_offset_t va_end)
+{
+ vm_map_offset_t offset;
+ ppnum_t phys_page;
+
+ if (pmap == PMAP_NULL) {
+ return TRUE;