]> git.saurik.com Git - apple/xnu.git/blobdiff - osfmk/i386/pmap_x86_common.c
xnu-3248.50.21.tar.gz
[apple/xnu.git] / osfmk / i386 / pmap_x86_common.c
index cc584a9a11b85a3b198788c2da2129031e798d1f..c6352893ad9e7aa33354290850b26fb30e7558e0 100644 (file)
@@ -25,6 +25,9 @@
  * 
  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
  */
+
+#include <mach_assert.h>
+
 #include <vm/pmap.h>
 #include <vm/vm_map.h>
 #include <kern/ledger.h>
@@ -91,6 +94,9 @@ kern_return_t pmap_nest(pmap_t grand, pmap_t subord, addr64_t va_start, addr64_t
        unsigned int    i;
        uint64_t        num_pde;
 
+       assert(!is_ept_pmap(grand));
+       assert(!is_ept_pmap(subord));
+
        if ((size & (pmap_nesting_size_min-1)) ||
            (va_start & (pmap_nesting_size_min-1)) ||
            (nstart & (pmap_nesting_size_min-1)) ||
@@ -227,6 +233,8 @@ kern_return_t pmap_unnest(pmap_t grand, addr64_t vaddr, uint64_t size) {
                    grand, vaddr, size);
        }
 
+       assert(!is_ept_pmap(grand));
+
        /* align everything to PDE boundaries */
        va_start = vaddr & ~(NBPDE-1);
        va_end = (vaddr + size + NBPDE - 1) & ~(NBPDE-1);
@@ -265,6 +273,15 @@ kern_return_t pmap_unnest(pmap_t grand, addr64_t vaddr, uint64_t size) {
        return KERN_SUCCESS;
 }
 
+kern_return_t
+pmap_unnest_options(
+       pmap_t grand,
+       addr64_t vaddr,
+       __unused uint64_t size,
+       __unused unsigned int options) {
+       return pmap_unnest(grand, vaddr, size);
+}
+
 /* Invoked by the Mach VM to determine the platform specific unnest region */
 
 boolean_t pmap_adjust_unnest_parameters(pmap_t p, vm_map_offset_t *s, vm_map_offset_t *e) {
@@ -308,6 +325,9 @@ pmap_find_phys(pmap_t pmap, addr64_t va)
        ppnum_t         ppn = 0;
        pd_entry_t      pde;
        pt_entry_t      pte;
+       boolean_t       is_ept;
+
+       is_ept = is_ept_pmap(pmap);
 
        mp_disable_preemption();
 
@@ -321,14 +341,14 @@ pmap_find_phys(pmap_t pmap, addr64_t va)
 
        pdep = pmap_pde(pmap, va);
 
-       if ((pdep != PD_ENTRY_NULL) && ((pde = *pdep) & INTEL_PTE_VALID)) {
-               if (pde & INTEL_PTE_PS) {
+       if ((pdep != PD_ENTRY_NULL) && ((pde = *pdep) & PTE_VALID_MASK(is_ept))) {
+               if (pde & PTE_PS) {
                        ppn = (ppnum_t) i386_btop(pte_to_pa(pde));
                        ppn += (ppnum_t) ptenum(va);
                }
                else {
                        ptp = pmap_pte(pmap, va);
-                       if ((PT_ENTRY_NULL != ptp) && (((pte = *ptp) & INTEL_PTE_VALID) != 0)) {
+                       if ((PT_ENTRY_NULL != ptp) && (((pte = *ptp) & PTE_VALID_MASK(is_ept)) != 0)) {
                                ppn = (ppnum_t) i386_btop(pte_to_pa(pte));
                        }
                }
@@ -342,9 +362,13 @@ pfp_exit:
 /*
  * Update cache attributes for all extant managed mappings.
  * Assumes PV for this page is locked, and that the page
- * is managed.
+ * is managed. We assume that this physical page may be mapped in
+ * both EPT and normal Intel PTEs, so we convert the attributes
+ * to the corresponding format for each pmap.
+ *
+ * We assert that the passed set of attributes is a subset of the
+ * PHYS_CACHEABILITY_MASK.
  */
-
 void
 pmap_update_cache_attributes_locked(ppnum_t pn, unsigned attributes) {
        pv_rooted_entry_t       pv_h, pv_e;
@@ -352,8 +376,17 @@ pmap_update_cache_attributes_locked(ppnum_t pn, unsigned attributes) {
        vm_map_offset_t vaddr;
        pmap_t  pmap;
        pt_entry_t      *ptep;
+       boolean_t       is_ept;
+       unsigned        ept_attributes;
        
        assert(IS_MANAGED_PAGE(pn));
+       assert(((~PHYS_CACHEABILITY_MASK) & attributes) == 0);
+
+       /* We don't support the PTA bit for EPT PTEs */
+       if (attributes & INTEL_PTE_NCACHE)
+               ept_attributes = INTEL_EPT_NCACHE;
+       else
+               ept_attributes = INTEL_EPT_WB;
 
        pv_h = pai_to_pvh(pn);
        /* TODO: translate the PHYS_* bits to PTE bits, while they're
@@ -374,12 +407,18 @@ pmap_update_cache_attributes_locked(ppnum_t pn, unsigned attributes) {
                        pmap = pv_e->pmap;
                        vaddr = pv_e->va;
                        ptep = pmap_pte(pmap, vaddr);
-               
+                       
                        if (0 == ptep)
                                panic("pmap_update_cache_attributes_locked: Missing PTE, pmap: %p, pn: 0x%x vaddr: 0x%llx kernel_pmap: %p", pmap, pn, vaddr, kernel_pmap);
 
+                       is_ept = is_ept_pmap(pmap);
+
                        nexth = (pv_hashed_entry_t)queue_next(&pvh_e->qlink);
-                       pmap_update_pte(ptep, PHYS_CACHEABILITY_MASK, attributes);
+                       if (!is_ept) {
+                               pmap_update_pte(ptep, PHYS_CACHEABILITY_MASK, attributes);
+                       } else {
+                               pmap_update_pte(ptep, INTEL_EPT_CACHE_MASK, ept_attributes);
+                       }
                        PMAP_UPDATE_TLBS(pmap, vaddr, vaddr + PAGE_SIZE);
                        pvh_e = nexth;
                } while ((pv_e = (pv_rooted_entry_t)nexth) != pv_h);
@@ -454,15 +493,18 @@ pmap_enter_options(
        /* 2MiB mappings are confined to x86_64 by VM */
        boolean_t               superpage = flags & VM_MEM_SUPERPAGE;
        vm_object_t             delpage_pm_obj = NULL;
-       int                     delpage_pde_index = 0;
+       uint64_t                delpage_pde_index = 0;
        pt_entry_t              old_pte;
        kern_return_t           kr_expand;
+       boolean_t               is_ept;
 
        pmap_intr_assert();
 
        if (pmap == PMAP_NULL)
                return KERN_INVALID_ARGUMENT;
 
+       is_ept = is_ept_pmap(pmap);
+
        /* N.B. We can be supplied a zero page frame in the NOENTER case, it's an
         * unused value for that scenario.
         */
@@ -531,14 +573,14 @@ Retry:
                return KERN_SUCCESS;
        }
 
-       if (superpage && *pte && !(*pte & INTEL_PTE_PS)) {
+       if (superpage && *pte && !(*pte & PTE_PS)) {
                /*
                 * There is still an empty page table mapped that
                 * was used for a previous base page mapping.
                 * Remember the PDE and the PDE index, so that we
                 * can free the page at the end of this function.
                 */
-               delpage_pde_index = (int)pdeidx(pmap, vaddr);
+               delpage_pde_index = pdeidx(pmap, vaddr);
                delpage_pm_obj = pmap->pm_obj;
                *pte = 0;
        }
@@ -548,7 +590,7 @@ Retry:
        old_pa_locked = FALSE;
 
        if (old_pa == 0 &&
-           (*pte & INTEL_PTE_COMPRESSED)) {
+           (*pte & PTE_COMPRESSED)) {
                /* one less "compressed" */
                OSAddAtomic64(-1, &pmap->stats.compressed);
                /* marker will be cleared below */
@@ -575,32 +617,58 @@ Retry:
         */
        if (old_pa == pa) {
                pt_entry_t old_attributes =
-                   *pte & ~(INTEL_PTE_REF | INTEL_PTE_MOD);
+                   *pte & ~(PTE_REF(is_ept) | PTE_MOD(is_ept));
 
                /*
                 *      May be changing its wired attribute or protection
                 */
 
-               template = pa_to_pte(pa) | INTEL_PTE_VALID;
-               template |= pmap_get_cache_attributes(pa_index(pa));
+               template =  pa_to_pte(pa);
+
+               /* ?: WORTH ASSERTING THAT AT LEAST ONE RWX (implicit valid) PASSED FOR EPT? */
+               if (!is_ept) {
+                       template |= INTEL_PTE_VALID;
+               } else {
+                       template |= INTEL_EPT_IPTA;
+               }
+
+               template |= pmap_get_cache_attributes(pa_index(pa), is_ept);
 
-               if (VM_MEM_NOT_CACHEABLE ==
-                   (flags & (VM_MEM_NOT_CACHEABLE | VM_WIMG_USE_DEFAULT))) {
+               /*
+                * We don't support passing VM_MEM_NOT_CACHEABLE flags for EPT PTEs
+                */
+               if (!is_ept && (VM_MEM_NOT_CACHEABLE ==
+                   (flags & (VM_MEM_NOT_CACHEABLE | VM_WIMG_USE_DEFAULT)))) {
                        if (!(flags & VM_MEM_GUARDED))
                                template |= INTEL_PTE_PTA;
                        template |= INTEL_PTE_NCACHE;
                }
-               if (pmap != kernel_pmap)
+               if (pmap != kernel_pmap && !is_ept)
                        template |= INTEL_PTE_USER;
+
+               if (prot & VM_PROT_READ)
+                       template |= PTE_READ(is_ept);
+
                if (prot & VM_PROT_WRITE) {
-                       template |= INTEL_PTE_WRITE;
+                       template |= PTE_WRITE(is_ept);
+                       if (is_ept && !pmap_ept_support_ad) {
+                               template |= PTE_MOD(is_ept);
+                               if (old_pa_locked) {
+                                       assert(IS_MANAGED_PAGE(pai));
+                                       pmap_phys_attributes[pai] |= PHYS_MODIFIED;
+                               }
+                       }
+               }
+               if (prot & VM_PROT_EXECUTE) {
+                       assert(set_NX == 0);
+                       template = pte_set_ex(template, is_ept);
                }
 
                if (set_NX)
-                       template |= INTEL_PTE_NX;
+                       template = pte_remove_ex(template, is_ept);
 
                if (wired) {
-                       template |= INTEL_PTE_WIRED;
+                       template |= PTE_WIRED;
                        if (!iswired(old_attributes))  {
                                OSAddAtomic(+1, &pmap->stats.wired_count);
                                pmap_ledger_credit(pmap, task_ledgers.wired_mem, PAGE_SIZE);
@@ -612,26 +680,36 @@ Retry:
                                pmap_ledger_debit(pmap, task_ledgers.wired_mem, PAGE_SIZE);
                        }
                }
+
                if (superpage)          /* this path can not be used */
-                       template |= INTEL_PTE_PS;       /* to change the page size! */
+                       template |= PTE_PS;     /* to change the page size! */
 
                if (old_attributes == template)
                        goto dont_update_pte;
 
                /* Determine delta, PV locked */
                need_tlbflush =
-                   ((old_attributes ^ template) != INTEL_PTE_WIRED);
+                   ((old_attributes ^ template) != PTE_WIRED);
                
-               if (need_tlbflush == TRUE && !(old_attributes & INTEL_PTE_WRITE)) {
-                       if ((old_attributes ^ template) == INTEL_PTE_WRITE)
+               if (need_tlbflush == TRUE && !(old_attributes & PTE_WRITE(is_ept))) {
+                       if ((old_attributes ^ template) == PTE_WRITE(is_ept))
                                need_tlbflush = FALSE;
                }
 
+               /* For hardware that doesn't have EPT AD support, we always set REFMOD for EPT PTEs */
+               if (is_ept && !pmap_ept_support_ad) {
+                       template |= PTE_REF(is_ept);
+                       if (old_pa_locked) {
+                               assert(IS_MANAGED_PAGE(pai));
+                               pmap_phys_attributes[pai] |= PHYS_REFERENCED;
+                       }
+               }
+
                /* store modified PTE and preserve RC bits */
                pt_entry_t npte, opte;;
                do {
                        opte = *pte;
-                       npte = template | (opte & (INTEL_PTE_REF | INTEL_PTE_MOD));
+                       npte = template | (opte & (PTE_REF(is_ept) | PTE_MOD(is_ept)));
                } while (!pmap_cmpx_pte(pte, opte, npte));
 dont_update_pte:
                if (old_pa_locked) {
@@ -663,12 +741,12 @@ dont_update_pte:
                 */
 
                /* invalidate the PTE */
-               pmap_update_pte(pte, INTEL_PTE_VALID, 0);
+               pmap_update_pte(pte, PTE_VALID_MASK(is_ept), 0);
                /* propagate invalidate everywhere */
                PMAP_UPDATE_TLBS(pmap, vaddr, vaddr + PAGE_SIZE);
                /* remember reference and change */
                old_pte = *pte;
-               oattr = (char) (old_pte & (PHYS_MODIFIED | PHYS_REFERENCED));
+               oattr = (char) (old_pte & (PTE_MOD(is_ept) | PTE_REF(is_ept)));
                /* completely invalidate the PTE */
                pmap_store_pte(pte, 0);
 
@@ -696,7 +774,12 @@ dont_update_pte:
                                pmap_ledger_debit(pmap, task_ledgers.wired_mem,
                                    PAGE_SIZE);
                        }
-                       pmap_phys_attributes[pai] |= oattr;
+
+                       if (!is_ept) {
+                               pmap_phys_attributes[pai] |= oattr;
+                       } else {
+                               pmap_phys_attributes[pai] |= ept_refmod_to_physmap(oattr);
+                       }
 
                        /*
                         *      Remove the mapping from the pvlist for
@@ -851,25 +934,49 @@ dont_update_pte:
         *      Build a template to speed up entering -
         *      only the pfn changes.
         */
-       template = pa_to_pte(pa) | INTEL_PTE_VALID;
+       template = pa_to_pte(pa);
+
+       if (!is_ept) {
+               template |= INTEL_PTE_VALID;
+       } else {
+               template |= INTEL_EPT_IPTA;
+       }
+
+
        /*
         * DRK: It may be worth asserting on cache attribute flags that diverge
         * from the existing physical page attributes.
         */
 
-       template |= pmap_get_cache_attributes(pa_index(pa));
-       
-       if (flags & VM_MEM_NOT_CACHEABLE) {
+       template |= pmap_get_cache_attributes(pa_index(pa), is_ept);
+
+       /*
+        * We don't support passing VM_MEM_NOT_CACHEABLE flags for EPT PTEs
+        */
+       if (!is_ept && (flags & VM_MEM_NOT_CACHEABLE)) {
                if (!(flags & VM_MEM_GUARDED))
                        template |= INTEL_PTE_PTA;
                template |= INTEL_PTE_NCACHE;
        }
-       if (pmap != kernel_pmap)
+       if (pmap != kernel_pmap && !is_ept)
                template |= INTEL_PTE_USER;
-       if (prot & VM_PROT_WRITE)
-               template |= INTEL_PTE_WRITE;
+       if (prot & VM_PROT_READ)
+               template |= PTE_READ(is_ept);
+       if (prot & VM_PROT_WRITE) {
+               template |= PTE_WRITE(is_ept);
+               if (is_ept && !pmap_ept_support_ad) {
+                       template |= PTE_MOD(is_ept);
+                       if (IS_MANAGED_PAGE(pai))
+                               pmap_phys_attributes[pai] |= PHYS_MODIFIED;
+               }
+       }
+       if (prot & VM_PROT_EXECUTE) {
+               assert(set_NX == 0);
+               template = pte_set_ex(template, is_ept);
+       }
+
        if (set_NX)
-               template |= INTEL_PTE_NX;
+               template = pte_remove_ex(template, is_ept);
        if (wired) {
                template |= INTEL_PTE_WIRED;
                OSAddAtomic(+1,  & pmap->stats.wired_count);
@@ -877,6 +984,14 @@ dont_update_pte:
        }
        if (superpage)
                template |= INTEL_PTE_PS;
+
+       /* For hardware that doesn't have EPT AD support, we always set REFMOD for EPT PTEs */
+       if (is_ept && !pmap_ept_support_ad) {
+               template |= PTE_REF(is_ept);
+               if (IS_MANAGED_PAGE(pai))
+                       pmap_phys_attributes[pai] |= PHYS_REFERENCED;
+       }
+
        pmap_store_pte(pte, template);
 
        /*
@@ -906,11 +1021,11 @@ Done:
                vm_page_t m;
 
                vm_object_lock(delpage_pm_obj);
-               m = vm_page_lookup(delpage_pm_obj, delpage_pde_index);
+               m = vm_page_lookup(delpage_pm_obj, (delpage_pde_index * PAGE_SIZE));
                if (m == VM_PAGE_NULL)
                    panic("pmap_enter: pte page not in object");
-               vm_object_unlock(delpage_pm_obj);
                VM_PAGE_FREE(m);
+               vm_object_unlock(delpage_pm_obj);
                OSAddAtomic(-1,  &inuse_ptepages_count);
                PMAP_ZINFO_PFREE(pmap, PAGE_SIZE);
        }
@@ -938,7 +1053,8 @@ pmap_remove_range(
        pt_entry_t              *spte,
        pt_entry_t              *epte)
 {
-       pmap_remove_range_options(pmap, start_vaddr, spte, epte, 0);
+       pmap_remove_range_options(pmap, start_vaddr, spte, epte,
+                                 PMAP_OPTIONS_REMOVE);
 }
 
 void
@@ -960,6 +1076,7 @@ pmap_remove_range_options(
        ppnum_t                 pai;
        pmap_paddr_t            pa;
        vm_map_offset_t         vaddr;
+       boolean_t               is_ept = is_ept_pmap(pmap);
 
        num_removed = 0;
        num_unwired = 0;
@@ -980,12 +1097,12 @@ pmap_remove_range_options(
                if (pa == 0) {
                        if (pmap != kernel_pmap &&
                            (options & PMAP_OPTIONS_REMOVE) &&
-                           (p & INTEL_PTE_COMPRESSED)) {
+                           (p & PTE_COMPRESSED)) {
                                /* one less "compressed" */
                                num_compressed++;
                                /* clear marker */
                                /* XXX probably does not need to be atomic! */
-                               pmap_update_pte(cpte, INTEL_PTE_COMPRESSED, 0);
+                               pmap_update_pte(cpte, PTE_COMPRESSED, 0);
                        }
                        continue;
                }
@@ -1006,11 +1123,11 @@ pmap_remove_range_options(
                        continue;
                }
 
-               if ((p & INTEL_PTE_VALID) == 0)
+               if ((p & PTE_VALID_MASK(is_ept)) == 0)
                        num_invalid++;
 
                /* invalidate the PTE */
-               pmap_update_pte(cpte, INTEL_PTE_VALID, 0);
+               pmap_update_pte(cpte, PTE_VALID_MASK(is_ept), 0);
        }
 
        if (num_found == 0) {
@@ -1138,7 +1255,7 @@ pmap_remove(
        addr64_t        s64,
        addr64_t        e64)
 {
-       pmap_remove_options(map, s64, e64, 0);
+       pmap_remove_options(map, s64, e64, PMAP_OPTIONS_REMOVE);
 }
 
 void
@@ -1152,12 +1269,15 @@ pmap_remove_options(
        pt_entry_t     *spte, *epte;
        addr64_t        l64;
        uint64_t        deadline;
+       boolean_t       is_ept;
 
        pmap_intr_assert();
 
        if (map == PMAP_NULL || s64 == e64)
                return;
 
+       is_ept = is_ept_pmap(map);
+
        PMAP_TRACE(PMAP_CODE(PMAP__REMOVE) | DBG_FUNC_START,
                   map,
                   (uint32_t) (s64 >> 32), s64,
@@ -1206,8 +1326,8 @@ pmap_remove_options(
                        l64 = e64;
                pde = pmap_pde(map, s64);
 
-               if (pde && (*pde & INTEL_PTE_VALID)) {
-                       if (*pde & INTEL_PTE_PS) {
+               if (pde && (*pde & PTE_VALID_MASK(is_ept))) {
+                       if (*pde & PTE_PS) {
                                /*
                                 * If we're removing a superpage, pmap_remove_range()
                                 * must work on level 2 instead of level 1; and we're
@@ -1228,6 +1348,14 @@ pmap_remove_options(
 
                if (s64 < e64 && rdtsc64() >= deadline) {
                        PMAP_UNLOCK(map)
+                           /* TODO: Rapid release/reacquisition can defeat
+                            * the "backoff" intent here; either consider a
+                            * fair spinlock, or a scheme whereby each lock
+                            * attempt marks the processor as within a spinlock
+                            * acquisition, and scan CPUs here to determine
+                            * if a backoff is necessary, to avoid sacrificing
+                            * performance in the common case.
+                            */
                        PMAP_LOCK(map)
                        deadline = rdtsc64() + max_preemption_latency_tsc;
                }
@@ -1274,6 +1402,7 @@ pmap_page_protect_options(
        pmap_t                  pmap;
        boolean_t               remove;
        pt_entry_t              new_pte_value;
+       boolean_t               is_ept;
 
        pmap_intr_assert();
        assert(pn != vm_page_fictitious_addr);
@@ -1323,7 +1452,15 @@ pmap_page_protect_options(
        do {
                vm_map_offset_t vaddr;
 
+               if ((options & PMAP_OPTIONS_COMPRESSOR_IFF_MODIFIED) &&
+                   (pmap_phys_attributes[pai] & PHYS_MODIFIED)) {
+                       /* page was modified, so it will be compressed */
+                       options &= ~PMAP_OPTIONS_COMPRESSOR_IFF_MODIFIED;
+                       options |= PMAP_OPTIONS_COMPRESSOR;
+               }
+
                pmap = pv_e->pmap;
+               is_ept = is_ept_pmap(pmap);
                vaddr = pv_e->va;
                pte = pmap_pte(pmap, vaddr);
 
@@ -1351,12 +1488,8 @@ pmap_page_protect_options(
                        if (pmap != kernel_pmap &&
                            (options & PMAP_OPTIONS_COMPRESSOR) &&
                            IS_INTERNAL_PAGE(pai)) {
-                               /* adjust "reclaimed" stats */
-                               OSAddAtomic64(+1, &pmap->stats.compressed);
-                               PMAP_STATS_PEAK(pmap->stats.compressed);
-                               pmap->stats.compressed_lifetime++;
                                /* mark this PTE as having been "reclaimed" */
-                               new_pte_value = INTEL_PTE_COMPRESSED;
+                               new_pte_value = PTE_COMPRESSED;
                        } else {
                                new_pte_value = 0;
                        }
@@ -1372,13 +1505,40 @@ pmap_page_protect_options(
                                /*
                                 * Remove the mapping, collecting dirty bits.
                                 */
-                               pmap_update_pte(pte, INTEL_PTE_VALID, 0);
+                               pmap_update_pte(pte, PTE_VALID_MASK(is_ept), 0);
 
                                PMAP_UPDATE_TLBS(pmap, vaddr, vaddr+PAGE_SIZE);
-                               pmap_phys_attributes[pai] |=
-                                       *pte & (PHYS_MODIFIED|PHYS_REFERENCED);
+                               if ((options &
+                                    PMAP_OPTIONS_COMPRESSOR_IFF_MODIFIED) &&
+                                   ! (pmap_phys_attributes[pai] &
+                                      PHYS_MODIFIED) &&
+                                   (*pte & PHYS_MODIFIED)) {
+                                       /*
+                                        * Page is actually "modified" and
+                                        * will be compressed.  Start
+                                        * accounting for it as "compressed".
+                                        */
+                                       options &= ~PMAP_OPTIONS_COMPRESSOR_IFF_MODIFIED;
+                                       options |= PMAP_OPTIONS_COMPRESSOR;
+                                       new_pte_value = PTE_COMPRESSED;
+                               }
+                               if (!is_ept) {
+                                       pmap_phys_attributes[pai] |=
+                                               *pte & (PHYS_MODIFIED|PHYS_REFERENCED);
+                               } else {
+                                       pmap_phys_attributes[pai] |=
+                                               ept_refmod_to_physmap((*pte & (INTEL_EPT_REF | INTEL_EPT_MOD))) & (PHYS_MODIFIED | PHYS_REFERENCED);
+                               }
                                pmap_store_pte(pte, new_pte_value);
                        }
+
+                       if (new_pte_value == PTE_COMPRESSED) {
+                               /* one more "compressed" page */
+                               OSAddAtomic64(+1, &pmap->stats.compressed);
+                               PMAP_STATS_PEAK(pmap->stats.compressed);
+                               pmap->stats.compressed_lifetime++;
+                       }
+
 #if TESTING
                        if (pmap->stats.resident_count < 1)
                                panic("pmap_page_protect: resident_count");
@@ -1391,7 +1551,7 @@ pmap_page_protect_options(
                                 * This removal is only being done so we can send this page to
                                 * the compressor; therefore it mustn't affect total task footprint.
                                 */
-                               pmap_ledger_credit(pmap, task_ledgers.phys_compressed, PAGE_SIZE);
+                               pmap_ledger_credit(pmap, task_ledgers.internal_compressed, PAGE_SIZE);
                        } else {
                                pmap_ledger_debit(pmap, task_ledgers.phys_footprint, PAGE_SIZE);
                        }
@@ -1434,9 +1594,14 @@ pmap_page_protect_options(
                        /*
                         * Write-protect, after opportunistic refmod collect
                         */
-                       pmap_phys_attributes[pai] |=
-                           *pte & (PHYS_MODIFIED|PHYS_REFERENCED);
-                       pmap_update_pte(pte, INTEL_PTE_WRITE, 0);
+                       if (!is_ept) {
+                               pmap_phys_attributes[pai] |=
+                                       *pte & (PHYS_MODIFIED|PHYS_REFERENCED);
+                       } else {
+                               pmap_phys_attributes[pai] |=
+                                       ept_refmod_to_physmap((*pte & (INTEL_EPT_REF | INTEL_EPT_MOD))) & (PHYS_MODIFIED | PHYS_REFERENCED);
+                       }
+                       pmap_update_pte(pte, PTE_WRITE(is_ept), 0);
 
                        if (options & PMAP_OPTIONS_NOFLUSH)
                                PMAP_UPDATE_TLBS_DELAYED(pmap, vaddr, vaddr + PAGE_SIZE, (pmap_flush_context *)arg);
@@ -1492,7 +1657,23 @@ phys_attribute_clear(
        int                     pai;
        pmap_t                  pmap;
        char                    attributes = 0;
-       
+       boolean_t               is_internal, is_reusable, is_ept;
+       int                     ept_bits_to_clear;
+       boolean_t               ept_keep_global_mod = FALSE;
+
+       if ((bits & PHYS_MODIFIED) &&
+           (options & PMAP_OPTIONS_NOFLUSH) &&
+           arg == NULL) {
+               panic("phys_attribute_clear(0x%x,0x%x,0x%x,%p): "
+                     "should not clear 'modified' without flushing TLBs\n",
+                     pn, bits, options, arg);
+       }
+
+       /* We only support converting MOD and REF bits for EPT PTEs in this function */
+       assert((bits & ~(PHYS_REFERENCED | PHYS_MODIFIED)) == 0);
+
+       ept_bits_to_clear = (unsigned)physmap_refmod_to_ept(bits & (PHYS_MODIFIED | PHYS_REFERENCED));
+
        pmap_intr_assert();
        assert(pn != vm_page_fictitious_addr);
        if (pn == vm_page_guard_addr)
@@ -1514,6 +1695,7 @@ phys_attribute_clear(
 
        LOCK_PVH(pai);
 
+
        /*
         * Walk down PV list, clearing all modify or reference bits.
         * We do not have to lock the pv_list because we have
@@ -1524,31 +1706,102 @@ phys_attribute_clear(
                 * There are some mappings.
                 */
 
+               is_internal = IS_INTERNAL_PAGE(pai);
+               is_reusable = IS_REUSABLE_PAGE(pai);
+
                pv_e = (pv_hashed_entry_t)pv_h;
 
                do {
                        vm_map_offset_t va;
+                       char pte_bits;
 
                        pmap = pv_e->pmap;
+                       is_ept = is_ept_pmap(pmap);
                        va = pv_e->va;
+                       pte_bits = 0;
+
+                       if (bits) {
+                               pte = pmap_pte(pmap, va);
+                               /* grab ref/mod bits from this PTE */
+                               pte_bits = (*pte & (PTE_REF(is_ept) | PTE_MOD(is_ept)));
+                               /* propagate to page's global attributes */
+                               if (!is_ept) {
+                                       attributes |= pte_bits;
+                               } else {
+                                       attributes |= ept_refmod_to_physmap(pte_bits);
+                                       if (!pmap_ept_support_ad && (pte_bits & INTEL_EPT_MOD)) {
+                                               ept_keep_global_mod = TRUE;
+                                       }
+                               }
+                               /* which bits to clear for this PTE? */
+                               if (!is_ept) {
+                                       pte_bits &= bits;
+                               } else {
+                                       pte_bits &= ept_bits_to_clear;
+                               }
+                       }
 
                         /*
                          * Clear modify and/or reference bits.
                          */
-                       pte = pmap_pte(pmap, va);
-                       attributes |= *pte & (PHYS_MODIFIED|PHYS_REFERENCED);
-                       pmap_update_pte(pte, bits, 0);
-                       /* Ensure all processors using this translation
-                        * invalidate this TLB entry. The invalidation *must*
-                        * follow the PTE update, to ensure that the TLB
-                        * shadow of the 'D' bit (in particular) is
-                        * synchronized with the updated PTE.
-                        */
-                       if (options & PMAP_OPTIONS_NOFLUSH) {
-                               if (arg)
-                                       PMAP_UPDATE_TLBS_DELAYED(pmap, va, va + PAGE_SIZE, (pmap_flush_context *)arg);
-                       } else
-                               PMAP_UPDATE_TLBS(pmap, va, va + PAGE_SIZE);
+                       if (pte_bits) {
+                               pmap_update_pte(pte, bits, 0);
+
+                               /* Ensure all processors using this translation
+                                * invalidate this TLB entry. The invalidation
+                                * *must* follow the PTE update, to ensure that
+                                * the TLB shadow of the 'D' bit (in particular)
+                                * is synchronized with the updated PTE.
+                                */
+                               if (! (options & PMAP_OPTIONS_NOFLUSH)) {
+                                       /* flush TLBS now */
+                                       PMAP_UPDATE_TLBS(pmap,
+                                                        va,
+                                                        va + PAGE_SIZE);
+                               } else if (arg) {
+                                       /* delayed TLB flush: add "pmap" info */
+                                       PMAP_UPDATE_TLBS_DELAYED(
+                                               pmap,
+                                               va,
+                                               va + PAGE_SIZE,
+                                               (pmap_flush_context *)arg);
+                               } else {
+                                       /* no TLB flushing at all */
+                               }
+                       }
+
+                       /* update pmap "reusable" stats */
+                       if ((options & PMAP_OPTIONS_CLEAR_REUSABLE) &&
+                           is_reusable &&
+                           pmap != kernel_pmap) {
+                               /* one less "reusable" */
+                               assert(pmap->stats.reusable > 0);
+                               OSAddAtomic(-1, &pmap->stats.reusable);
+                               if (is_internal) {
+                                       /* one more "internal" */
+                                       OSAddAtomic(+1, &pmap->stats.internal);
+                                       PMAP_STATS_PEAK(pmap->stats.internal);
+                               } else {
+                                       /* one more "external" */
+                                       OSAddAtomic(+1, &pmap->stats.external);
+                                       PMAP_STATS_PEAK(pmap->stats.external);
+                               }
+                       } else if ((options & PMAP_OPTIONS_SET_REUSABLE) &&
+                                  !is_reusable &&
+                                  pmap != kernel_pmap) {
+                               /* one more "reusable" */
+                               OSAddAtomic(+1, &pmap->stats.reusable);
+                               PMAP_STATS_PEAK(pmap->stats.reusable);
+                               if (is_internal) {
+                                       /* one less "internal" */
+                                       assert(pmap->stats.internal > 0);
+                                       OSAddAtomic(-1, &pmap->stats.internal);
+                               } else {
+                                       /* one less "external" */
+                                       assert(pmap->stats.external > 0);
+                                       OSAddAtomic(-1, &pmap->stats.external);
+                               }
+                       }
 
                        pv_e = (pv_hashed_entry_t)queue_next(&pv_e->qlink);
 
@@ -1559,7 +1812,24 @@ phys_attribute_clear(
         */
 
        pmap_phys_attributes[pai] |= attributes;
-       pmap_phys_attributes[pai] &= (~bits);
+
+       if (ept_keep_global_mod) {
+               /*
+                * If the hardware doesn't support AD bits for EPT PTEs and someone is
+                * requesting that we clear the modified bit for a phys page, we need
+                * to ensure that there are no EPT mappings for the page with the
+                * modified bit set. If there are, we cannot clear the global modified bit.
+                */
+               bits &= ~PHYS_MODIFIED;
+       }
+       pmap_phys_attributes[pai] &= ~(bits);
+
+       /* update this page's "reusable" status */
+       if (options & PMAP_OPTIONS_CLEAR_REUSABLE) {
+               pmap_phys_attributes[pai] &= ~PHYS_REUSABLE;
+       } else if (options & PMAP_OPTIONS_SET_REUSABLE) {
+               pmap_phys_attributes[pai] |= PHYS_REUSABLE;
+       }
 
        UNLOCK_PVH(pai);
 
@@ -1581,9 +1851,11 @@ phys_attribute_test(
        int                     pai;
        pmap_t                  pmap;
        int                     attributes = 0;
+       boolean_t               is_ept;
 
        pmap_intr_assert();
        assert(pn != vm_page_fictitious_addr);
+       assert((bits & ~(PHYS_MODIFIED | PHYS_REFERENCED)) == 0);
        if (pn == vm_page_guard_addr)
                return 0;
 
@@ -1627,13 +1899,19 @@ phys_attribute_test(
                        vm_map_offset_t va;
 
                        pmap = pv_e->pmap;
+                       is_ept = is_ept_pmap(pmap);
                        va = pv_e->va;
                        /*
                         * pick up modify and/or reference bits from mapping
                         */
 
                        pte = pmap_pte(pmap, va);
-                       attributes |= (int)(*pte & bits);
+                       if (!is_ept) {
+                               attributes |= (int)(*pte & bits);
+                       } else {
+                               attributes |= (int)(ept_refmod_to_physmap((*pte & (INTEL_EPT_REF | INTEL_EPT_MOD))) & (PHYS_MODIFIED | PHYS_REFERENCED));
+
+                       }
 
                        pv_e = (pv_hashed_entry_t)queue_next(&pv_e->qlink);
 
@@ -1664,7 +1942,8 @@ pmap_change_wiring(
        PMAP_LOCK(map);
 
        if ((pte = pmap_pte(map, vaddr)) == PT_ENTRY_NULL)
-               panic("pmap_change_wiring: pte missing");
+               panic("pmap_change_wiring(%p,0x%llx,%d): pte missing",
+                     map, vaddr, wired);
 
        if (wired && !iswired(*pte)) {
                /*
@@ -1672,7 +1951,7 @@ pmap_change_wiring(
                 */
                pmap_ledger_credit(map, task_ledgers.wired_mem, PAGE_SIZE);
                OSAddAtomic(+1,  &map->stats.wired_count);
-               pmap_update_pte(pte, 0, INTEL_PTE_WIRED);
+               pmap_update_pte(pte, 0, PTE_WIRED);
        }
        else if (!wired && iswired(*pte)) {
                /*
@@ -1681,7 +1960,7 @@ pmap_change_wiring(
                assert(map->stats.wired_count >= 1);
                OSAddAtomic(-1,  &map->stats.wired_count);
                pmap_ledger_debit(map, task_ledgers.wired_mem, PAGE_SIZE);
-               pmap_update_pte(pte, INTEL_PTE_WIRED, 0);
+               pmap_update_pte(pte, PTE_WIRED, 0);
        }
 
        PMAP_UNLOCK(map);
@@ -1742,185 +2021,39 @@ pmap_map_bd(
        return(virt);
 }
 
-void
-pmap_reusable(
+mach_vm_size_t
+pmap_query_resident(
        pmap_t          pmap,
        addr64_t        s64,
        addr64_t        e64,
-       boolean_t       reusable)
+       mach_vm_size_t  *compressed_bytes_p)
 {
        pt_entry_t     *pde;
        pt_entry_t     *spte, *epte;
        addr64_t        l64;
        uint64_t        deadline;
+       mach_vm_size_t  resident_bytes;
+       mach_vm_size_t  compressed_bytes;
+       boolean_t       is_ept;
 
        pmap_intr_assert();
 
-       if (pmap == PMAP_NULL || pmap == kernel_pmap || s64 == e64)
-               return;
-
-       PMAP_TRACE(PMAP_CODE(PMAP__REUSABLE) | DBG_FUNC_START,
-                  pmap,
-                  (uint32_t) (s64 >> 32), s64,
-                  (uint32_t) (e64 >> 32), e64);
-
-       PMAP_LOCK(pmap);
-
-       deadline = rdtsc64() + max_preemption_latency_tsc;
-
-       while (s64 < e64) {
-               l64 = (s64 + pde_mapped_size) & ~(pde_mapped_size - 1);
-               if (l64 > e64)
-                       l64 = e64;
-               pde = pmap_pde(pmap, s64);
-
-               if (pde && (*pde & INTEL_PTE_VALID)) {
-                       if (*pde & INTEL_PTE_PS) {
-                               /* superpage: not supported */
-                       } else {
-                               spte = pmap_pte(pmap,
-                                               (s64 & ~(pde_mapped_size - 1)));
-                               spte = &spte[ptenum(s64)];
-                               epte = &spte[intel_btop(l64 - s64)];
-                               pmap_reusable_range(pmap, s64, spte, epte,
-                                                   reusable);
-                       }
+       if (pmap == PMAP_NULL || pmap == kernel_pmap || s64 == e64) {
+               if (compressed_bytes_p) {
+                       *compressed_bytes_p = 0;
                }
-               s64 = l64;
-
-               if (s64 < e64 && rdtsc64() >= deadline) {
-                       PMAP_UNLOCK(pmap);
-                       PMAP_LOCK(pmap);
-                       deadline = rdtsc64() + max_preemption_latency_tsc;
-               }
-       }
-
-       PMAP_UNLOCK(pmap);
-
-       PMAP_TRACE(PMAP_CODE(PMAP__REUSABLE) | DBG_FUNC_END,
-                  pmap, reusable, 0, 0, 0);
-}
-
-void
-pmap_reusable_range(
-       pmap_t                  pmap,
-       vm_map_offset_t         start_vaddr,
-       pt_entry_t              *spte,
-       pt_entry_t              *epte,
-       boolean_t               reusable)
-{
-       pt_entry_t              *cpte;
-       int                     num_external, num_internal, num_reusable;
-       ppnum_t                 pai;
-       pmap_paddr_t            pa;
-       vm_map_offset_t         vaddr;
-
-       num_external = 0;
-       num_internal = 0;
-       num_reusable = 0;
-
-       for (cpte = spte, vaddr = start_vaddr;
-            cpte < epte;
-            cpte++, vaddr += PAGE_SIZE_64) {
-
-               pa = pte_to_pa(*cpte);
-               if (pa == 0)
-                       continue;
-
-               pai = pa_index(pa);
-
-               LOCK_PVH(pai);
-
-               pa = pte_to_pa(*cpte);
-               if (pa == 0) {
-                       UNLOCK_PVH(pai);
-                       continue;
-               }
-               if (reusable) {
-                       /* we want to set "reusable" */
-                       if (IS_REUSABLE_PAGE(pai)) {
-                               /* already reusable: no change */
-                       } else {
-                               pmap_phys_attributes[pai] |= PHYS_REUSABLE;
-                               /* one more "reusable" */
-                               num_reusable++;
-                               if (IS_INTERNAL_PAGE(pai)) {
-                                       /* one less "internal" */
-                                       num_internal--;
-                               } else {
-                                       /* one less "external" */
-                                       num_external--;
-                               }
-                       }
-               } else {
-                       /* we want to clear "reusable" */
-                       if (IS_REUSABLE_PAGE(pai)) {
-                               pmap_phys_attributes[pai] &= ~PHYS_REUSABLE;
-                               /* one less "reusable" */
-                               num_reusable--;
-                               if (IS_INTERNAL_PAGE(pai)) {
-                                       /* one more "internal" */
-                                       num_internal++;
-                               } else {
-                                       /* one more "external" */
-                                       num_external++;
-                               }
-                       } else {
-                               /* already not reusable: no change */
-                       }
-               }
-
-               UNLOCK_PVH(pai);
-
-       } /* for loop */
-
-       /*
-        *      Update the counts
-        */
-       if (pmap != kernel_pmap) {
-               if (num_external) {
-                       OSAddAtomic(num_external, &pmap->stats.external);
-                       PMAP_STATS_PEAK(pmap->stats.external);
-               }
-               assert(pmap->stats.external >= 0);
-               if (num_internal) {
-                       OSAddAtomic(num_internal, &pmap->stats.internal);
-                       PMAP_STATS_PEAK(pmap->stats.internal);
-               }
-               assert(pmap->stats.internal >= 0);
-               if (num_reusable) {
-                       OSAddAtomic(num_reusable, &pmap->stats.reusable);
-                       PMAP_STATS_PEAK(pmap->stats.reusable);
-               }
-               assert(pmap->stats.reusable >= 0);
+               return 0;
        }
 
-       return;
-}
-
-unsigned int
-pmap_query_resident(
-       pmap_t          pmap,
-       addr64_t        s64,
-       addr64_t        e64)
-{
-       pt_entry_t     *pde;
-       pt_entry_t     *spte, *epte;
-       addr64_t        l64;
-       uint64_t        deadline;
-       unsigned int    result;
-
-       pmap_intr_assert();
-
-       if (pmap == PMAP_NULL || pmap == kernel_pmap || s64 == e64)
-               return 0;
+       is_ept = is_ept_pmap(pmap);
 
        PMAP_TRACE(PMAP_CODE(PMAP__QUERY_RESIDENT) | DBG_FUNC_START,
                   pmap,
                   (uint32_t) (s64 >> 32), s64,
                   (uint32_t) (e64 >> 32), e64);
 
-       result = 0;
+       resident_bytes = 0;
+       compressed_bytes = 0;
 
        PMAP_LOCK(pmap);
 
@@ -1932,8 +2065,8 @@ pmap_query_resident(
                        l64 = e64;
                pde = pmap_pde(pmap, s64);
 
-               if (pde && (*pde & INTEL_PTE_VALID)) {
-                       if (*pde & INTEL_PTE_PS) {
+               if (pde && (*pde & PTE_VALID_MASK(is_ept))) {
+                       if (*pde & PTE_PS) {
                                /* superpage: not supported */
                        } else {
                                spte = pmap_pte(pmap,
@@ -1943,7 +2076,9 @@ pmap_query_resident(
 
                                for (; spte < epte; spte++) {
                                        if (pte_to_pa(*spte) != 0) {
-                                               result++;
+                                               resident_bytes += PAGE_SIZE;
+                                       } else if (*spte & PTE_COMPRESSED) {
+                                               compressed_bytes += PAGE_SIZE;
                                        }
                                }
 
@@ -1963,5 +2098,18 @@ pmap_query_resident(
        PMAP_TRACE(PMAP_CODE(PMAP__QUERY_RESIDENT) | DBG_FUNC_END,
                   pmap, 0, 0, 0, 0);
 
-       return result;
+       if (compressed_bytes_p) {
+               *compressed_bytes_p = compressed_bytes;
+       }
+       return resident_bytes;
+}
+
+#if MACH_ASSERT
+void
+pmap_set_process(
+       __unused pmap_t pmap,
+       __unused int pid,
+       __unused char *procname)
+{
 }
+#endif /* MACH_ASSERT */