X-Git-Url: https://git.saurik.com/apple/xnu.git/blobdiff_plain/15129b1c8dbb3650c63b70adb1cad9af601c6c17..ecc0ceb4089d506a0b8d16686a95817b331af9cb:/osfmk/i386/pmap_x86_common.c diff --git a/osfmk/i386/pmap_x86_common.c b/osfmk/i386/pmap_x86_common.c index cc584a9a1..c6352893a 100644 --- a/osfmk/i386/pmap_x86_common.c +++ b/osfmk/i386/pmap_x86_common.c @@ -25,6 +25,9 @@ * * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ */ + +#include + #include #include #include @@ -91,6 +94,9 @@ kern_return_t pmap_nest(pmap_t grand, pmap_t subord, addr64_t va_start, addr64_t unsigned int i; uint64_t num_pde; + assert(!is_ept_pmap(grand)); + assert(!is_ept_pmap(subord)); + if ((size & (pmap_nesting_size_min-1)) || (va_start & (pmap_nesting_size_min-1)) || (nstart & (pmap_nesting_size_min-1)) || @@ -227,6 +233,8 @@ kern_return_t pmap_unnest(pmap_t grand, addr64_t vaddr, uint64_t size) { grand, vaddr, size); } + assert(!is_ept_pmap(grand)); + /* align everything to PDE boundaries */ va_start = vaddr & ~(NBPDE-1); va_end = (vaddr + size + NBPDE - 1) & ~(NBPDE-1); @@ -265,6 +273,15 @@ kern_return_t pmap_unnest(pmap_t grand, addr64_t vaddr, uint64_t size) { return KERN_SUCCESS; } +kern_return_t +pmap_unnest_options( + pmap_t grand, + addr64_t vaddr, + __unused uint64_t size, + __unused unsigned int options) { + return pmap_unnest(grand, vaddr, size); +} + /* Invoked by the Mach VM to determine the platform specific unnest region */ boolean_t pmap_adjust_unnest_parameters(pmap_t p, vm_map_offset_t *s, vm_map_offset_t *e) { @@ -308,6 +325,9 @@ pmap_find_phys(pmap_t pmap, addr64_t va) ppnum_t ppn = 0; pd_entry_t pde; pt_entry_t pte; + boolean_t is_ept; + + is_ept = is_ept_pmap(pmap); mp_disable_preemption(); @@ -321,14 +341,14 @@ pmap_find_phys(pmap_t pmap, addr64_t va) pdep = pmap_pde(pmap, va); - if ((pdep != PD_ENTRY_NULL) && ((pde = *pdep) & INTEL_PTE_VALID)) { - if (pde & INTEL_PTE_PS) { + if ((pdep != PD_ENTRY_NULL) && ((pde = *pdep) & PTE_VALID_MASK(is_ept))) { + if (pde & PTE_PS) { ppn = (ppnum_t) i386_btop(pte_to_pa(pde)); ppn += (ppnum_t) ptenum(va); } else { ptp = pmap_pte(pmap, va); - if ((PT_ENTRY_NULL != ptp) && (((pte = *ptp) & INTEL_PTE_VALID) != 0)) { + if ((PT_ENTRY_NULL != ptp) && (((pte = *ptp) & PTE_VALID_MASK(is_ept)) != 0)) { ppn = (ppnum_t) i386_btop(pte_to_pa(pte)); } } @@ -342,9 +362,13 @@ pfp_exit: /* * Update cache attributes for all extant managed mappings. * Assumes PV for this page is locked, and that the page - * is managed. + * is managed. We assume that this physical page may be mapped in + * both EPT and normal Intel PTEs, so we convert the attributes + * to the corresponding format for each pmap. + * + * We assert that the passed set of attributes is a subset of the + * PHYS_CACHEABILITY_MASK. */ - void pmap_update_cache_attributes_locked(ppnum_t pn, unsigned attributes) { pv_rooted_entry_t pv_h, pv_e; @@ -352,8 +376,17 @@ pmap_update_cache_attributes_locked(ppnum_t pn, unsigned attributes) { vm_map_offset_t vaddr; pmap_t pmap; pt_entry_t *ptep; + boolean_t is_ept; + unsigned ept_attributes; assert(IS_MANAGED_PAGE(pn)); + assert(((~PHYS_CACHEABILITY_MASK) & attributes) == 0); + + /* We don't support the PTA bit for EPT PTEs */ + if (attributes & INTEL_PTE_NCACHE) + ept_attributes = INTEL_EPT_NCACHE; + else + ept_attributes = INTEL_EPT_WB; pv_h = pai_to_pvh(pn); /* TODO: translate the PHYS_* bits to PTE bits, while they're @@ -374,12 +407,18 @@ pmap_update_cache_attributes_locked(ppnum_t pn, unsigned attributes) { pmap = pv_e->pmap; vaddr = pv_e->va; ptep = pmap_pte(pmap, vaddr); - + if (0 == ptep) panic("pmap_update_cache_attributes_locked: Missing PTE, pmap: %p, pn: 0x%x vaddr: 0x%llx kernel_pmap: %p", pmap, pn, vaddr, kernel_pmap); + is_ept = is_ept_pmap(pmap); + nexth = (pv_hashed_entry_t)queue_next(&pvh_e->qlink); - pmap_update_pte(ptep, PHYS_CACHEABILITY_MASK, attributes); + if (!is_ept) { + pmap_update_pte(ptep, PHYS_CACHEABILITY_MASK, attributes); + } else { + pmap_update_pte(ptep, INTEL_EPT_CACHE_MASK, ept_attributes); + } PMAP_UPDATE_TLBS(pmap, vaddr, vaddr + PAGE_SIZE); pvh_e = nexth; } while ((pv_e = (pv_rooted_entry_t)nexth) != pv_h); @@ -454,15 +493,18 @@ pmap_enter_options( /* 2MiB mappings are confined to x86_64 by VM */ boolean_t superpage = flags & VM_MEM_SUPERPAGE; vm_object_t delpage_pm_obj = NULL; - int delpage_pde_index = 0; + uint64_t delpage_pde_index = 0; pt_entry_t old_pte; kern_return_t kr_expand; + boolean_t is_ept; pmap_intr_assert(); if (pmap == PMAP_NULL) return KERN_INVALID_ARGUMENT; + is_ept = is_ept_pmap(pmap); + /* N.B. We can be supplied a zero page frame in the NOENTER case, it's an * unused value for that scenario. */ @@ -531,14 +573,14 @@ Retry: return KERN_SUCCESS; } - if (superpage && *pte && !(*pte & INTEL_PTE_PS)) { + if (superpage && *pte && !(*pte & PTE_PS)) { /* * There is still an empty page table mapped that * was used for a previous base page mapping. * Remember the PDE and the PDE index, so that we * can free the page at the end of this function. */ - delpage_pde_index = (int)pdeidx(pmap, vaddr); + delpage_pde_index = pdeidx(pmap, vaddr); delpage_pm_obj = pmap->pm_obj; *pte = 0; } @@ -548,7 +590,7 @@ Retry: old_pa_locked = FALSE; if (old_pa == 0 && - (*pte & INTEL_PTE_COMPRESSED)) { + (*pte & PTE_COMPRESSED)) { /* one less "compressed" */ OSAddAtomic64(-1, &pmap->stats.compressed); /* marker will be cleared below */ @@ -575,32 +617,58 @@ Retry: */ if (old_pa == pa) { pt_entry_t old_attributes = - *pte & ~(INTEL_PTE_REF | INTEL_PTE_MOD); + *pte & ~(PTE_REF(is_ept) | PTE_MOD(is_ept)); /* * May be changing its wired attribute or protection */ - template = pa_to_pte(pa) | INTEL_PTE_VALID; - template |= pmap_get_cache_attributes(pa_index(pa)); + template = pa_to_pte(pa); + + /* ?: WORTH ASSERTING THAT AT LEAST ONE RWX (implicit valid) PASSED FOR EPT? */ + if (!is_ept) { + template |= INTEL_PTE_VALID; + } else { + template |= INTEL_EPT_IPTA; + } + + template |= pmap_get_cache_attributes(pa_index(pa), is_ept); - if (VM_MEM_NOT_CACHEABLE == - (flags & (VM_MEM_NOT_CACHEABLE | VM_WIMG_USE_DEFAULT))) { + /* + * We don't support passing VM_MEM_NOT_CACHEABLE flags for EPT PTEs + */ + if (!is_ept && (VM_MEM_NOT_CACHEABLE == + (flags & (VM_MEM_NOT_CACHEABLE | VM_WIMG_USE_DEFAULT)))) { if (!(flags & VM_MEM_GUARDED)) template |= INTEL_PTE_PTA; template |= INTEL_PTE_NCACHE; } - if (pmap != kernel_pmap) + if (pmap != kernel_pmap && !is_ept) template |= INTEL_PTE_USER; + + if (prot & VM_PROT_READ) + template |= PTE_READ(is_ept); + if (prot & VM_PROT_WRITE) { - template |= INTEL_PTE_WRITE; + template |= PTE_WRITE(is_ept); + if (is_ept && !pmap_ept_support_ad) { + template |= PTE_MOD(is_ept); + if (old_pa_locked) { + assert(IS_MANAGED_PAGE(pai)); + pmap_phys_attributes[pai] |= PHYS_MODIFIED; + } + } + } + if (prot & VM_PROT_EXECUTE) { + assert(set_NX == 0); + template = pte_set_ex(template, is_ept); } if (set_NX) - template |= INTEL_PTE_NX; + template = pte_remove_ex(template, is_ept); if (wired) { - template |= INTEL_PTE_WIRED; + template |= PTE_WIRED; if (!iswired(old_attributes)) { OSAddAtomic(+1, &pmap->stats.wired_count); pmap_ledger_credit(pmap, task_ledgers.wired_mem, PAGE_SIZE); @@ -612,26 +680,36 @@ Retry: pmap_ledger_debit(pmap, task_ledgers.wired_mem, PAGE_SIZE); } } + if (superpage) /* this path can not be used */ - template |= INTEL_PTE_PS; /* to change the page size! */ + template |= PTE_PS; /* to change the page size! */ if (old_attributes == template) goto dont_update_pte; /* Determine delta, PV locked */ need_tlbflush = - ((old_attributes ^ template) != INTEL_PTE_WIRED); + ((old_attributes ^ template) != PTE_WIRED); - if (need_tlbflush == TRUE && !(old_attributes & INTEL_PTE_WRITE)) { - if ((old_attributes ^ template) == INTEL_PTE_WRITE) + if (need_tlbflush == TRUE && !(old_attributes & PTE_WRITE(is_ept))) { + if ((old_attributes ^ template) == PTE_WRITE(is_ept)) need_tlbflush = FALSE; } + /* For hardware that doesn't have EPT AD support, we always set REFMOD for EPT PTEs */ + if (is_ept && !pmap_ept_support_ad) { + template |= PTE_REF(is_ept); + if (old_pa_locked) { + assert(IS_MANAGED_PAGE(pai)); + pmap_phys_attributes[pai] |= PHYS_REFERENCED; + } + } + /* store modified PTE and preserve RC bits */ pt_entry_t npte, opte;; do { opte = *pte; - npte = template | (opte & (INTEL_PTE_REF | INTEL_PTE_MOD)); + npte = template | (opte & (PTE_REF(is_ept) | PTE_MOD(is_ept))); } while (!pmap_cmpx_pte(pte, opte, npte)); dont_update_pte: if (old_pa_locked) { @@ -663,12 +741,12 @@ dont_update_pte: */ /* invalidate the PTE */ - pmap_update_pte(pte, INTEL_PTE_VALID, 0); + pmap_update_pte(pte, PTE_VALID_MASK(is_ept), 0); /* propagate invalidate everywhere */ PMAP_UPDATE_TLBS(pmap, vaddr, vaddr + PAGE_SIZE); /* remember reference and change */ old_pte = *pte; - oattr = (char) (old_pte & (PHYS_MODIFIED | PHYS_REFERENCED)); + oattr = (char) (old_pte & (PTE_MOD(is_ept) | PTE_REF(is_ept))); /* completely invalidate the PTE */ pmap_store_pte(pte, 0); @@ -696,7 +774,12 @@ dont_update_pte: pmap_ledger_debit(pmap, task_ledgers.wired_mem, PAGE_SIZE); } - pmap_phys_attributes[pai] |= oattr; + + if (!is_ept) { + pmap_phys_attributes[pai] |= oattr; + } else { + pmap_phys_attributes[pai] |= ept_refmod_to_physmap(oattr); + } /* * Remove the mapping from the pvlist for @@ -851,25 +934,49 @@ dont_update_pte: * Build a template to speed up entering - * only the pfn changes. */ - template = pa_to_pte(pa) | INTEL_PTE_VALID; + template = pa_to_pte(pa); + + if (!is_ept) { + template |= INTEL_PTE_VALID; + } else { + template |= INTEL_EPT_IPTA; + } + + /* * DRK: It may be worth asserting on cache attribute flags that diverge * from the existing physical page attributes. */ - template |= pmap_get_cache_attributes(pa_index(pa)); - - if (flags & VM_MEM_NOT_CACHEABLE) { + template |= pmap_get_cache_attributes(pa_index(pa), is_ept); + + /* + * We don't support passing VM_MEM_NOT_CACHEABLE flags for EPT PTEs + */ + if (!is_ept && (flags & VM_MEM_NOT_CACHEABLE)) { if (!(flags & VM_MEM_GUARDED)) template |= INTEL_PTE_PTA; template |= INTEL_PTE_NCACHE; } - if (pmap != kernel_pmap) + if (pmap != kernel_pmap && !is_ept) template |= INTEL_PTE_USER; - if (prot & VM_PROT_WRITE) - template |= INTEL_PTE_WRITE; + if (prot & VM_PROT_READ) + template |= PTE_READ(is_ept); + if (prot & VM_PROT_WRITE) { + template |= PTE_WRITE(is_ept); + if (is_ept && !pmap_ept_support_ad) { + template |= PTE_MOD(is_ept); + if (IS_MANAGED_PAGE(pai)) + pmap_phys_attributes[pai] |= PHYS_MODIFIED; + } + } + if (prot & VM_PROT_EXECUTE) { + assert(set_NX == 0); + template = pte_set_ex(template, is_ept); + } + if (set_NX) - template |= INTEL_PTE_NX; + template = pte_remove_ex(template, is_ept); if (wired) { template |= INTEL_PTE_WIRED; OSAddAtomic(+1, & pmap->stats.wired_count); @@ -877,6 +984,14 @@ dont_update_pte: } if (superpage) template |= INTEL_PTE_PS; + + /* For hardware that doesn't have EPT AD support, we always set REFMOD for EPT PTEs */ + if (is_ept && !pmap_ept_support_ad) { + template |= PTE_REF(is_ept); + if (IS_MANAGED_PAGE(pai)) + pmap_phys_attributes[pai] |= PHYS_REFERENCED; + } + pmap_store_pte(pte, template); /* @@ -906,11 +1021,11 @@ Done: vm_page_t m; vm_object_lock(delpage_pm_obj); - m = vm_page_lookup(delpage_pm_obj, delpage_pde_index); + m = vm_page_lookup(delpage_pm_obj, (delpage_pde_index * PAGE_SIZE)); if (m == VM_PAGE_NULL) panic("pmap_enter: pte page not in object"); - vm_object_unlock(delpage_pm_obj); VM_PAGE_FREE(m); + vm_object_unlock(delpage_pm_obj); OSAddAtomic(-1, &inuse_ptepages_count); PMAP_ZINFO_PFREE(pmap, PAGE_SIZE); } @@ -938,7 +1053,8 @@ pmap_remove_range( pt_entry_t *spte, pt_entry_t *epte) { - pmap_remove_range_options(pmap, start_vaddr, spte, epte, 0); + pmap_remove_range_options(pmap, start_vaddr, spte, epte, + PMAP_OPTIONS_REMOVE); } void @@ -960,6 +1076,7 @@ pmap_remove_range_options( ppnum_t pai; pmap_paddr_t pa; vm_map_offset_t vaddr; + boolean_t is_ept = is_ept_pmap(pmap); num_removed = 0; num_unwired = 0; @@ -980,12 +1097,12 @@ pmap_remove_range_options( if (pa == 0) { if (pmap != kernel_pmap && (options & PMAP_OPTIONS_REMOVE) && - (p & INTEL_PTE_COMPRESSED)) { + (p & PTE_COMPRESSED)) { /* one less "compressed" */ num_compressed++; /* clear marker */ /* XXX probably does not need to be atomic! */ - pmap_update_pte(cpte, INTEL_PTE_COMPRESSED, 0); + pmap_update_pte(cpte, PTE_COMPRESSED, 0); } continue; } @@ -1006,11 +1123,11 @@ pmap_remove_range_options( continue; } - if ((p & INTEL_PTE_VALID) == 0) + if ((p & PTE_VALID_MASK(is_ept)) == 0) num_invalid++; /* invalidate the PTE */ - pmap_update_pte(cpte, INTEL_PTE_VALID, 0); + pmap_update_pte(cpte, PTE_VALID_MASK(is_ept), 0); } if (num_found == 0) { @@ -1138,7 +1255,7 @@ pmap_remove( addr64_t s64, addr64_t e64) { - pmap_remove_options(map, s64, e64, 0); + pmap_remove_options(map, s64, e64, PMAP_OPTIONS_REMOVE); } void @@ -1152,12 +1269,15 @@ pmap_remove_options( pt_entry_t *spte, *epte; addr64_t l64; uint64_t deadline; + boolean_t is_ept; pmap_intr_assert(); if (map == PMAP_NULL || s64 == e64) return; + is_ept = is_ept_pmap(map); + PMAP_TRACE(PMAP_CODE(PMAP__REMOVE) | DBG_FUNC_START, map, (uint32_t) (s64 >> 32), s64, @@ -1206,8 +1326,8 @@ pmap_remove_options( l64 = e64; pde = pmap_pde(map, s64); - if (pde && (*pde & INTEL_PTE_VALID)) { - if (*pde & INTEL_PTE_PS) { + if (pde && (*pde & PTE_VALID_MASK(is_ept))) { + if (*pde & PTE_PS) { /* * If we're removing a superpage, pmap_remove_range() * must work on level 2 instead of level 1; and we're @@ -1228,6 +1348,14 @@ pmap_remove_options( if (s64 < e64 && rdtsc64() >= deadline) { PMAP_UNLOCK(map) + /* TODO: Rapid release/reacquisition can defeat + * the "backoff" intent here; either consider a + * fair spinlock, or a scheme whereby each lock + * attempt marks the processor as within a spinlock + * acquisition, and scan CPUs here to determine + * if a backoff is necessary, to avoid sacrificing + * performance in the common case. + */ PMAP_LOCK(map) deadline = rdtsc64() + max_preemption_latency_tsc; } @@ -1274,6 +1402,7 @@ pmap_page_protect_options( pmap_t pmap; boolean_t remove; pt_entry_t new_pte_value; + boolean_t is_ept; pmap_intr_assert(); assert(pn != vm_page_fictitious_addr); @@ -1323,7 +1452,15 @@ pmap_page_protect_options( do { vm_map_offset_t vaddr; + if ((options & PMAP_OPTIONS_COMPRESSOR_IFF_MODIFIED) && + (pmap_phys_attributes[pai] & PHYS_MODIFIED)) { + /* page was modified, so it will be compressed */ + options &= ~PMAP_OPTIONS_COMPRESSOR_IFF_MODIFIED; + options |= PMAP_OPTIONS_COMPRESSOR; + } + pmap = pv_e->pmap; + is_ept = is_ept_pmap(pmap); vaddr = pv_e->va; pte = pmap_pte(pmap, vaddr); @@ -1351,12 +1488,8 @@ pmap_page_protect_options( if (pmap != kernel_pmap && (options & PMAP_OPTIONS_COMPRESSOR) && IS_INTERNAL_PAGE(pai)) { - /* adjust "reclaimed" stats */ - OSAddAtomic64(+1, &pmap->stats.compressed); - PMAP_STATS_PEAK(pmap->stats.compressed); - pmap->stats.compressed_lifetime++; /* mark this PTE as having been "reclaimed" */ - new_pte_value = INTEL_PTE_COMPRESSED; + new_pte_value = PTE_COMPRESSED; } else { new_pte_value = 0; } @@ -1372,13 +1505,40 @@ pmap_page_protect_options( /* * Remove the mapping, collecting dirty bits. */ - pmap_update_pte(pte, INTEL_PTE_VALID, 0); + pmap_update_pte(pte, PTE_VALID_MASK(is_ept), 0); PMAP_UPDATE_TLBS(pmap, vaddr, vaddr+PAGE_SIZE); - pmap_phys_attributes[pai] |= - *pte & (PHYS_MODIFIED|PHYS_REFERENCED); + if ((options & + PMAP_OPTIONS_COMPRESSOR_IFF_MODIFIED) && + ! (pmap_phys_attributes[pai] & + PHYS_MODIFIED) && + (*pte & PHYS_MODIFIED)) { + /* + * Page is actually "modified" and + * will be compressed. Start + * accounting for it as "compressed". + */ + options &= ~PMAP_OPTIONS_COMPRESSOR_IFF_MODIFIED; + options |= PMAP_OPTIONS_COMPRESSOR; + new_pte_value = PTE_COMPRESSED; + } + if (!is_ept) { + pmap_phys_attributes[pai] |= + *pte & (PHYS_MODIFIED|PHYS_REFERENCED); + } else { + pmap_phys_attributes[pai] |= + ept_refmod_to_physmap((*pte & (INTEL_EPT_REF | INTEL_EPT_MOD))) & (PHYS_MODIFIED | PHYS_REFERENCED); + } pmap_store_pte(pte, new_pte_value); } + + if (new_pte_value == PTE_COMPRESSED) { + /* one more "compressed" page */ + OSAddAtomic64(+1, &pmap->stats.compressed); + PMAP_STATS_PEAK(pmap->stats.compressed); + pmap->stats.compressed_lifetime++; + } + #if TESTING if (pmap->stats.resident_count < 1) panic("pmap_page_protect: resident_count"); @@ -1391,7 +1551,7 @@ pmap_page_protect_options( * This removal is only being done so we can send this page to * the compressor; therefore it mustn't affect total task footprint. */ - pmap_ledger_credit(pmap, task_ledgers.phys_compressed, PAGE_SIZE); + pmap_ledger_credit(pmap, task_ledgers.internal_compressed, PAGE_SIZE); } else { pmap_ledger_debit(pmap, task_ledgers.phys_footprint, PAGE_SIZE); } @@ -1434,9 +1594,14 @@ pmap_page_protect_options( /* * Write-protect, after opportunistic refmod collect */ - pmap_phys_attributes[pai] |= - *pte & (PHYS_MODIFIED|PHYS_REFERENCED); - pmap_update_pte(pte, INTEL_PTE_WRITE, 0); + if (!is_ept) { + pmap_phys_attributes[pai] |= + *pte & (PHYS_MODIFIED|PHYS_REFERENCED); + } else { + pmap_phys_attributes[pai] |= + ept_refmod_to_physmap((*pte & (INTEL_EPT_REF | INTEL_EPT_MOD))) & (PHYS_MODIFIED | PHYS_REFERENCED); + } + pmap_update_pte(pte, PTE_WRITE(is_ept), 0); if (options & PMAP_OPTIONS_NOFLUSH) PMAP_UPDATE_TLBS_DELAYED(pmap, vaddr, vaddr + PAGE_SIZE, (pmap_flush_context *)arg); @@ -1492,7 +1657,23 @@ phys_attribute_clear( int pai; pmap_t pmap; char attributes = 0; - + boolean_t is_internal, is_reusable, is_ept; + int ept_bits_to_clear; + boolean_t ept_keep_global_mod = FALSE; + + if ((bits & PHYS_MODIFIED) && + (options & PMAP_OPTIONS_NOFLUSH) && + arg == NULL) { + panic("phys_attribute_clear(0x%x,0x%x,0x%x,%p): " + "should not clear 'modified' without flushing TLBs\n", + pn, bits, options, arg); + } + + /* We only support converting MOD and REF bits for EPT PTEs in this function */ + assert((bits & ~(PHYS_REFERENCED | PHYS_MODIFIED)) == 0); + + ept_bits_to_clear = (unsigned)physmap_refmod_to_ept(bits & (PHYS_MODIFIED | PHYS_REFERENCED)); + pmap_intr_assert(); assert(pn != vm_page_fictitious_addr); if (pn == vm_page_guard_addr) @@ -1514,6 +1695,7 @@ phys_attribute_clear( LOCK_PVH(pai); + /* * Walk down PV list, clearing all modify or reference bits. * We do not have to lock the pv_list because we have @@ -1524,31 +1706,102 @@ phys_attribute_clear( * There are some mappings. */ + is_internal = IS_INTERNAL_PAGE(pai); + is_reusable = IS_REUSABLE_PAGE(pai); + pv_e = (pv_hashed_entry_t)pv_h; do { vm_map_offset_t va; + char pte_bits; pmap = pv_e->pmap; + is_ept = is_ept_pmap(pmap); va = pv_e->va; + pte_bits = 0; + + if (bits) { + pte = pmap_pte(pmap, va); + /* grab ref/mod bits from this PTE */ + pte_bits = (*pte & (PTE_REF(is_ept) | PTE_MOD(is_ept))); + /* propagate to page's global attributes */ + if (!is_ept) { + attributes |= pte_bits; + } else { + attributes |= ept_refmod_to_physmap(pte_bits); + if (!pmap_ept_support_ad && (pte_bits & INTEL_EPT_MOD)) { + ept_keep_global_mod = TRUE; + } + } + /* which bits to clear for this PTE? */ + if (!is_ept) { + pte_bits &= bits; + } else { + pte_bits &= ept_bits_to_clear; + } + } /* * Clear modify and/or reference bits. */ - pte = pmap_pte(pmap, va); - attributes |= *pte & (PHYS_MODIFIED|PHYS_REFERENCED); - pmap_update_pte(pte, bits, 0); - /* Ensure all processors using this translation - * invalidate this TLB entry. The invalidation *must* - * follow the PTE update, to ensure that the TLB - * shadow of the 'D' bit (in particular) is - * synchronized with the updated PTE. - */ - if (options & PMAP_OPTIONS_NOFLUSH) { - if (arg) - PMAP_UPDATE_TLBS_DELAYED(pmap, va, va + PAGE_SIZE, (pmap_flush_context *)arg); - } else - PMAP_UPDATE_TLBS(pmap, va, va + PAGE_SIZE); + if (pte_bits) { + pmap_update_pte(pte, bits, 0); + + /* Ensure all processors using this translation + * invalidate this TLB entry. The invalidation + * *must* follow the PTE update, to ensure that + * the TLB shadow of the 'D' bit (in particular) + * is synchronized with the updated PTE. + */ + if (! (options & PMAP_OPTIONS_NOFLUSH)) { + /* flush TLBS now */ + PMAP_UPDATE_TLBS(pmap, + va, + va + PAGE_SIZE); + } else if (arg) { + /* delayed TLB flush: add "pmap" info */ + PMAP_UPDATE_TLBS_DELAYED( + pmap, + va, + va + PAGE_SIZE, + (pmap_flush_context *)arg); + } else { + /* no TLB flushing at all */ + } + } + + /* update pmap "reusable" stats */ + if ((options & PMAP_OPTIONS_CLEAR_REUSABLE) && + is_reusable && + pmap != kernel_pmap) { + /* one less "reusable" */ + assert(pmap->stats.reusable > 0); + OSAddAtomic(-1, &pmap->stats.reusable); + if (is_internal) { + /* one more "internal" */ + OSAddAtomic(+1, &pmap->stats.internal); + PMAP_STATS_PEAK(pmap->stats.internal); + } else { + /* one more "external" */ + OSAddAtomic(+1, &pmap->stats.external); + PMAP_STATS_PEAK(pmap->stats.external); + } + } else if ((options & PMAP_OPTIONS_SET_REUSABLE) && + !is_reusable && + pmap != kernel_pmap) { + /* one more "reusable" */ + OSAddAtomic(+1, &pmap->stats.reusable); + PMAP_STATS_PEAK(pmap->stats.reusable); + if (is_internal) { + /* one less "internal" */ + assert(pmap->stats.internal > 0); + OSAddAtomic(-1, &pmap->stats.internal); + } else { + /* one less "external" */ + assert(pmap->stats.external > 0); + OSAddAtomic(-1, &pmap->stats.external); + } + } pv_e = (pv_hashed_entry_t)queue_next(&pv_e->qlink); @@ -1559,7 +1812,24 @@ phys_attribute_clear( */ pmap_phys_attributes[pai] |= attributes; - pmap_phys_attributes[pai] &= (~bits); + + if (ept_keep_global_mod) { + /* + * If the hardware doesn't support AD bits for EPT PTEs and someone is + * requesting that we clear the modified bit for a phys page, we need + * to ensure that there are no EPT mappings for the page with the + * modified bit set. If there are, we cannot clear the global modified bit. + */ + bits &= ~PHYS_MODIFIED; + } + pmap_phys_attributes[pai] &= ~(bits); + + /* update this page's "reusable" status */ + if (options & PMAP_OPTIONS_CLEAR_REUSABLE) { + pmap_phys_attributes[pai] &= ~PHYS_REUSABLE; + } else if (options & PMAP_OPTIONS_SET_REUSABLE) { + pmap_phys_attributes[pai] |= PHYS_REUSABLE; + } UNLOCK_PVH(pai); @@ -1581,9 +1851,11 @@ phys_attribute_test( int pai; pmap_t pmap; int attributes = 0; + boolean_t is_ept; pmap_intr_assert(); assert(pn != vm_page_fictitious_addr); + assert((bits & ~(PHYS_MODIFIED | PHYS_REFERENCED)) == 0); if (pn == vm_page_guard_addr) return 0; @@ -1627,13 +1899,19 @@ phys_attribute_test( vm_map_offset_t va; pmap = pv_e->pmap; + is_ept = is_ept_pmap(pmap); va = pv_e->va; /* * pick up modify and/or reference bits from mapping */ pte = pmap_pte(pmap, va); - attributes |= (int)(*pte & bits); + if (!is_ept) { + attributes |= (int)(*pte & bits); + } else { + attributes |= (int)(ept_refmod_to_physmap((*pte & (INTEL_EPT_REF | INTEL_EPT_MOD))) & (PHYS_MODIFIED | PHYS_REFERENCED)); + + } pv_e = (pv_hashed_entry_t)queue_next(&pv_e->qlink); @@ -1664,7 +1942,8 @@ pmap_change_wiring( PMAP_LOCK(map); if ((pte = pmap_pte(map, vaddr)) == PT_ENTRY_NULL) - panic("pmap_change_wiring: pte missing"); + panic("pmap_change_wiring(%p,0x%llx,%d): pte missing", + map, vaddr, wired); if (wired && !iswired(*pte)) { /* @@ -1672,7 +1951,7 @@ pmap_change_wiring( */ pmap_ledger_credit(map, task_ledgers.wired_mem, PAGE_SIZE); OSAddAtomic(+1, &map->stats.wired_count); - pmap_update_pte(pte, 0, INTEL_PTE_WIRED); + pmap_update_pte(pte, 0, PTE_WIRED); } else if (!wired && iswired(*pte)) { /* @@ -1681,7 +1960,7 @@ pmap_change_wiring( assert(map->stats.wired_count >= 1); OSAddAtomic(-1, &map->stats.wired_count); pmap_ledger_debit(map, task_ledgers.wired_mem, PAGE_SIZE); - pmap_update_pte(pte, INTEL_PTE_WIRED, 0); + pmap_update_pte(pte, PTE_WIRED, 0); } PMAP_UNLOCK(map); @@ -1742,185 +2021,39 @@ pmap_map_bd( return(virt); } -void -pmap_reusable( +mach_vm_size_t +pmap_query_resident( pmap_t pmap, addr64_t s64, addr64_t e64, - boolean_t reusable) + mach_vm_size_t *compressed_bytes_p) { pt_entry_t *pde; pt_entry_t *spte, *epte; addr64_t l64; uint64_t deadline; + mach_vm_size_t resident_bytes; + mach_vm_size_t compressed_bytes; + boolean_t is_ept; pmap_intr_assert(); - if (pmap == PMAP_NULL || pmap == kernel_pmap || s64 == e64) - return; - - PMAP_TRACE(PMAP_CODE(PMAP__REUSABLE) | DBG_FUNC_START, - pmap, - (uint32_t) (s64 >> 32), s64, - (uint32_t) (e64 >> 32), e64); - - PMAP_LOCK(pmap); - - deadline = rdtsc64() + max_preemption_latency_tsc; - - while (s64 < e64) { - l64 = (s64 + pde_mapped_size) & ~(pde_mapped_size - 1); - if (l64 > e64) - l64 = e64; - pde = pmap_pde(pmap, s64); - - if (pde && (*pde & INTEL_PTE_VALID)) { - if (*pde & INTEL_PTE_PS) { - /* superpage: not supported */ - } else { - spte = pmap_pte(pmap, - (s64 & ~(pde_mapped_size - 1))); - spte = &spte[ptenum(s64)]; - epte = &spte[intel_btop(l64 - s64)]; - pmap_reusable_range(pmap, s64, spte, epte, - reusable); - } + if (pmap == PMAP_NULL || pmap == kernel_pmap || s64 == e64) { + if (compressed_bytes_p) { + *compressed_bytes_p = 0; } - s64 = l64; - - if (s64 < e64 && rdtsc64() >= deadline) { - PMAP_UNLOCK(pmap); - PMAP_LOCK(pmap); - deadline = rdtsc64() + max_preemption_latency_tsc; - } - } - - PMAP_UNLOCK(pmap); - - PMAP_TRACE(PMAP_CODE(PMAP__REUSABLE) | DBG_FUNC_END, - pmap, reusable, 0, 0, 0); -} - -void -pmap_reusable_range( - pmap_t pmap, - vm_map_offset_t start_vaddr, - pt_entry_t *spte, - pt_entry_t *epte, - boolean_t reusable) -{ - pt_entry_t *cpte; - int num_external, num_internal, num_reusable; - ppnum_t pai; - pmap_paddr_t pa; - vm_map_offset_t vaddr; - - num_external = 0; - num_internal = 0; - num_reusable = 0; - - for (cpte = spte, vaddr = start_vaddr; - cpte < epte; - cpte++, vaddr += PAGE_SIZE_64) { - - pa = pte_to_pa(*cpte); - if (pa == 0) - continue; - - pai = pa_index(pa); - - LOCK_PVH(pai); - - pa = pte_to_pa(*cpte); - if (pa == 0) { - UNLOCK_PVH(pai); - continue; - } - if (reusable) { - /* we want to set "reusable" */ - if (IS_REUSABLE_PAGE(pai)) { - /* already reusable: no change */ - } else { - pmap_phys_attributes[pai] |= PHYS_REUSABLE; - /* one more "reusable" */ - num_reusable++; - if (IS_INTERNAL_PAGE(pai)) { - /* one less "internal" */ - num_internal--; - } else { - /* one less "external" */ - num_external--; - } - } - } else { - /* we want to clear "reusable" */ - if (IS_REUSABLE_PAGE(pai)) { - pmap_phys_attributes[pai] &= ~PHYS_REUSABLE; - /* one less "reusable" */ - num_reusable--; - if (IS_INTERNAL_PAGE(pai)) { - /* one more "internal" */ - num_internal++; - } else { - /* one more "external" */ - num_external++; - } - } else { - /* already not reusable: no change */ - } - } - - UNLOCK_PVH(pai); - - } /* for loop */ - - /* - * Update the counts - */ - if (pmap != kernel_pmap) { - if (num_external) { - OSAddAtomic(num_external, &pmap->stats.external); - PMAP_STATS_PEAK(pmap->stats.external); - } - assert(pmap->stats.external >= 0); - if (num_internal) { - OSAddAtomic(num_internal, &pmap->stats.internal); - PMAP_STATS_PEAK(pmap->stats.internal); - } - assert(pmap->stats.internal >= 0); - if (num_reusable) { - OSAddAtomic(num_reusable, &pmap->stats.reusable); - PMAP_STATS_PEAK(pmap->stats.reusable); - } - assert(pmap->stats.reusable >= 0); + return 0; } - return; -} - -unsigned int -pmap_query_resident( - pmap_t pmap, - addr64_t s64, - addr64_t e64) -{ - pt_entry_t *pde; - pt_entry_t *spte, *epte; - addr64_t l64; - uint64_t deadline; - unsigned int result; - - pmap_intr_assert(); - - if (pmap == PMAP_NULL || pmap == kernel_pmap || s64 == e64) - return 0; + is_ept = is_ept_pmap(pmap); PMAP_TRACE(PMAP_CODE(PMAP__QUERY_RESIDENT) | DBG_FUNC_START, pmap, (uint32_t) (s64 >> 32), s64, (uint32_t) (e64 >> 32), e64); - result = 0; + resident_bytes = 0; + compressed_bytes = 0; PMAP_LOCK(pmap); @@ -1932,8 +2065,8 @@ pmap_query_resident( l64 = e64; pde = pmap_pde(pmap, s64); - if (pde && (*pde & INTEL_PTE_VALID)) { - if (*pde & INTEL_PTE_PS) { + if (pde && (*pde & PTE_VALID_MASK(is_ept))) { + if (*pde & PTE_PS) { /* superpage: not supported */ } else { spte = pmap_pte(pmap, @@ -1943,7 +2076,9 @@ pmap_query_resident( for (; spte < epte; spte++) { if (pte_to_pa(*spte) != 0) { - result++; + resident_bytes += PAGE_SIZE; + } else if (*spte & PTE_COMPRESSED) { + compressed_bytes += PAGE_SIZE; } } @@ -1963,5 +2098,18 @@ pmap_query_resident( PMAP_TRACE(PMAP_CODE(PMAP__QUERY_RESIDENT) | DBG_FUNC_END, pmap, 0, 0, 0, 0); - return result; + if (compressed_bytes_p) { + *compressed_bytes_p = compressed_bytes; + } + return resident_bytes; +} + +#if MACH_ASSERT +void +pmap_set_process( + __unused pmap_t pmap, + __unused int pid, + __unused char *procname) +{ } +#endif /* MACH_ASSERT */