]> git.saurik.com Git - apple/xnu.git/blobdiff - osfmk/i386/pmap_x86_common.c
xnu-4570.31.3.tar.gz
[apple/xnu.git] / osfmk / i386 / pmap_x86_common.c
index a8c3423b418f5d77e43b8caf9fa79d9f9b618317..b66630233d686a24a621d04f9c05173c34c10227 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000-2009 Apple Inc. All rights reserved.
+ * Copyright (c) 2000-2012 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
  * 
  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
  */
+
+#include <mach_assert.h>
+
 #include <vm/pmap.h>
 #include <vm/vm_map.h>
+#include <vm/vm_kern.h>
+#include <kern/ledger.h>
 #include <i386/pmap_internal.h>
 
-
 void           pmap_remove_range(
                        pmap_t          pmap,
                        vm_map_offset_t va,
                        pt_entry_t      *spte,
                        pt_entry_t      *epte);
 
-pv_rooted_entry_t      pv_head_table;          /* array of entries, one per
-                                                * page */
-thread_call_t          mapping_adjust_call;
-static thread_call_data_t mapping_adjust_call_data;
-uint32_t               mappingrecurse = 0;
+void           pmap_remove_range_options(
+                       pmap_t          pmap,
+                       vm_map_offset_t va,
+                       pt_entry_t      *spte,
+                       pt_entry_t      *epte,
+                       int             options);
+
+void           pmap_reusable_range(
+                       pmap_t          pmap,
+                       vm_map_offset_t va,
+                       pt_entry_t      *spte,
+                       pt_entry_t      *epte,
+                       boolean_t       reusable);
 
-pmap_pagetable_corruption_record_t pmap_pagetable_corruption_records[PMAP_PAGETABLE_CORRUPTION_MAX_LOG];
-uint32_t pmap_pagetable_corruption_incidents;
-uint64_t pmap_pagetable_corruption_last_abstime = (~(0ULL) >> 1);
-uint64_t pmap_pagetable_corruption_interval_abstime;
-thread_call_t  pmap_pagetable_corruption_log_call;
-static thread_call_data_t      pmap_pagetable_corruption_log_call_data;
-boolean_t pmap_pagetable_corruption_timeout = FALSE;
+uint32_t pmap_update_clear_pte_count;
 
 /*
  * The Intel platform can nest at the PDE level, so NBPDE (i.e. 2MB) at a time,
@@ -89,6 +95,9 @@ kern_return_t pmap_nest(pmap_t grand, pmap_t subord, addr64_t va_start, addr64_t
        unsigned int    i;
        uint64_t        num_pde;
 
+       assert(!is_ept_pmap(grand));
+       assert(!is_ept_pmap(subord));
+
        if ((size & (pmap_nesting_size_min-1)) ||
            (va_start & (pmap_nesting_size_min-1)) ||
            (nstart & (pmap_nesting_size_min-1)) ||
@@ -103,8 +112,8 @@ kern_return_t pmap_nest(pmap_t grand, pmap_t subord, addr64_t va_start, addr64_t
                panic("pmap_nest: va_start(0x%llx) != nstart(0x%llx)\n", va_start, nstart);
 
        PMAP_TRACE(PMAP_CODE(PMAP__NEST) | DBG_FUNC_START,
-           (int) grand, (int) subord,
-           (int) (va_start>>32), (int) va_start, 0);
+                  VM_KERNEL_ADDRHIDE(grand), VM_KERNEL_ADDRHIDE(subord),
+                  VM_KERNEL_ADDRHIDE(va_start));
 
        nvaddr = (vm_map_offset_t)nstart;
        num_pde = size >> PDESHIFT;
@@ -120,7 +129,7 @@ kern_return_t pmap_nest(pmap_t grand, pmap_t subord, addr64_t va_start, addr64_t
 
                        while (0 == npde || ((*npde & INTEL_PTE_VALID) == 0)) {
                                PMAP_UNLOCK(subord);
-                               pmap_expand_pdpt(subord, nvaddr);
+                               pmap_expand_pdpt(subord, nvaddr, PMAP_EXPAND_OPTIONS_NONE);
                                PMAP_LOCK(subord);
                                npde = pmap64_pdpt(subord, nvaddr);
                        }
@@ -133,7 +142,7 @@ kern_return_t pmap_nest(pmap_t grand, pmap_t subord, addr64_t va_start, addr64_t
 
                        while (0 == npde || ((*npde & INTEL_PTE_VALID) == 0)) {
                                PMAP_UNLOCK(subord);
-                               pmap_expand(subord, nvaddr);
+                               pmap_expand(subord, nvaddr, PMAP_EXPAND_OPTIONS_NONE);
                                PMAP_LOCK(subord);
                                npde = pmap_pde(subord, nvaddr);
                        }
@@ -159,7 +168,7 @@ kern_return_t pmap_nest(pmap_t grand, pmap_t subord, addr64_t va_start, addr64_t
                        pde = pmap64_pdpt(grand, vaddr);
                        if (0 == pde) {
                                PMAP_UNLOCK(grand);
-                               pmap_expand_pml4(grand, vaddr);
+                               pmap_expand_pml4(grand, vaddr, PMAP_EXPAND_OPTIONS_NONE);
                                PMAP_LOCK(grand);
                                pde = pmap64_pdpt(grand, vaddr);
                        }
@@ -170,15 +179,14 @@ kern_return_t pmap_nest(pmap_t grand, pmap_t subord, addr64_t va_start, addr64_t
                        i += (uint32_t) NPDEPG;
                }
                else {
-                       npde = pmap_pde(subord, nstart);
+                       npde = pmap_pde(subord, vaddr);
                        if (npde == 0)
-                               panic("pmap_nest: no npde, subord %p nstart 0x%llx", subord, nstart);
+                               panic("pmap_nest: no npde, subord %p vaddr 0x%llx", subord, vaddr);
                        tpde = *npde;
-                       nstart += NBPDE;
                        pde = pmap_pde(grand, vaddr);
                        if ((0 == pde) && cpu_64bit) {
                                PMAP_UNLOCK(grand);
-                               pmap_expand_pdpt(grand, vaddr);
+                               pmap_expand_pdpt(grand, vaddr, PMAP_EXPAND_OPTIONS_NONE);
                                PMAP_LOCK(grand);
                                pde = pmap_pde(grand, vaddr);
                        }
@@ -193,7 +201,7 @@ kern_return_t pmap_nest(pmap_t grand, pmap_t subord, addr64_t va_start, addr64_t
 
        PMAP_UNLOCK(grand);
 
-       PMAP_TRACE(PMAP_CODE(PMAP__NEST) | DBG_FUNC_END, 0, 0, 0, 0, 0);
+       PMAP_TRACE(PMAP_CODE(PMAP__NEST) | DBG_FUNC_END, KERN_SUCCESS);
 
        return KERN_SUCCESS;
 }
@@ -208,7 +216,6 @@ kern_return_t pmap_nest(pmap_t grand, pmap_t subord, addr64_t va_start, addr64_t
  */
 
 kern_return_t pmap_unnest(pmap_t grand, addr64_t vaddr, uint64_t size) {
-                       
        pd_entry_t *pde;
        unsigned int i;
        uint64_t num_pde;
@@ -216,8 +223,7 @@ kern_return_t pmap_unnest(pmap_t grand, addr64_t vaddr, uint64_t size) {
        uint64_t npdpt = PMAP_INVALID_PDPTNUM;
 
        PMAP_TRACE(PMAP_CODE(PMAP__UNNEST) | DBG_FUNC_START,
-           (int) grand, 
-           (int) (vaddr>>32), (int) vaddr, 0, 0);
+                  VM_KERNEL_ADDRHIDE(grand), VM_KERNEL_ADDRHIDE(vaddr));
 
        if ((size & (pmap_nesting_size_min-1)) ||
            (vaddr & (pmap_nesting_size_min-1))) {
@@ -225,6 +231,8 @@ kern_return_t pmap_unnest(pmap_t grand, addr64_t vaddr, uint64_t size) {
                    grand, vaddr, size);
        }
 
+       assert(!is_ept_pmap(grand));
+
        /* align everything to PDE boundaries */
        va_start = vaddr & ~(NBPDE-1);
        va_end = (vaddr + size + NBPDE - 1) & ~(NBPDE-1);
@@ -257,12 +265,21 @@ kern_return_t pmap_unnest(pmap_t grand, addr64_t vaddr, uint64_t size) {
        PMAP_UPDATE_TLBS(grand, va_start, va_end);
 
        PMAP_UNLOCK(grand);
-               
-       PMAP_TRACE(PMAP_CODE(PMAP__UNNEST) | DBG_FUNC_END, 0, 0, 0, 0, 0);
+
+       PMAP_TRACE(PMAP_CODE(PMAP__UNNEST) | DBG_FUNC_END, KERN_SUCCESS);
 
        return KERN_SUCCESS;
 }
 
+kern_return_t
+pmap_unnest_options(
+       pmap_t grand,
+       addr64_t vaddr,
+       __unused uint64_t size,
+       __unused unsigned int options) {
+       return pmap_unnest(grand, vaddr, size);
+}
+
 /* Invoked by the Mach VM to determine the platform specific unnest region */
 
 boolean_t pmap_adjust_unnest_parameters(pmap_t p, vm_map_offset_t *s, vm_map_offset_t *e) {
@@ -306,6 +323,9 @@ pmap_find_phys(pmap_t pmap, addr64_t va)
        ppnum_t         ppn = 0;
        pd_entry_t      pde;
        pt_entry_t      pte;
+       boolean_t       is_ept;
+
+       is_ept = is_ept_pmap(pmap);
 
        mp_disable_preemption();
 
@@ -319,14 +339,14 @@ pmap_find_phys(pmap_t pmap, addr64_t va)
 
        pdep = pmap_pde(pmap, va);
 
-       if ((pdep != PD_ENTRY_NULL) && ((pde = *pdep) & INTEL_PTE_VALID)) {
-               if (pde & INTEL_PTE_PS) {
+       if ((pdep != PD_ENTRY_NULL) && ((pde = *pdep) & PTE_VALID_MASK(is_ept))) {
+               if (pde & PTE_PS) {
                        ppn = (ppnum_t) i386_btop(pte_to_pa(pde));
                        ppn += (ppnum_t) ptenum(va);
                }
                else {
                        ptp = pmap_pte(pmap, va);
-                       if ((PT_ENTRY_NULL != ptp) && (((pte = *ptp) & INTEL_PTE_VALID) != 0)) {
+                       if ((PT_ENTRY_NULL != ptp) && (((pte = *ptp) & PTE_VALID_MASK(is_ept)) != 0)) {
                                ppn = (ppnum_t) i386_btop(pte_to_pa(pte));
                        }
                }
@@ -337,6 +357,86 @@ pfp_exit:
         return ppn;
 }
 
+/*
+ * Update cache attributes for all extant managed mappings.
+ * Assumes PV for this page is locked, and that the page
+ * is managed. We assume that this physical page may be mapped in
+ * both EPT and normal Intel PTEs, so we convert the attributes
+ * to the corresponding format for each pmap.
+ *
+ * We assert that the passed set of attributes is a subset of the
+ * PHYS_CACHEABILITY_MASK.
+ */
+void
+pmap_update_cache_attributes_locked(ppnum_t pn, unsigned attributes) {
+       pv_rooted_entry_t       pv_h, pv_e;
+       pv_hashed_entry_t       pvh_e, nexth;
+       vm_map_offset_t vaddr;
+       pmap_t  pmap;
+       pt_entry_t      *ptep;
+       boolean_t       is_ept;
+       unsigned        ept_attributes;
+       
+       assert(IS_MANAGED_PAGE(pn));
+       assert(((~PHYS_CACHEABILITY_MASK) & attributes) == 0);
+
+       /* We don't support the PTA bit for EPT PTEs */
+       if (attributes & INTEL_PTE_NCACHE)
+               ept_attributes = INTEL_EPT_NCACHE;
+       else
+               ept_attributes = INTEL_EPT_WB;
+
+       pv_h = pai_to_pvh(pn);
+       /* TODO: translate the PHYS_* bits to PTE bits, while they're
+        * currently identical, they may not remain so
+        * Potential optimization (here and in page_protect),
+        * parallel shootdowns, check for redundant
+        * attribute modifications.
+        */
+       
+       /*
+        * Alter attributes on all mappings
+        */
+       if (pv_h->pmap != PMAP_NULL) {
+               pv_e = pv_h;
+               pvh_e = (pv_hashed_entry_t)pv_e;
+
+               do {
+                       pmap = pv_e->pmap;
+                       vaddr = PVE_VA(pv_e);
+                       ptep = pmap_pte(pmap, vaddr);
+                       
+                       if (0 == ptep)
+                               panic("pmap_update_cache_attributes_locked: Missing PTE, pmap: %p, pn: 0x%x vaddr: 0x%llx kernel_pmap: %p", pmap, pn, vaddr, kernel_pmap);
+
+                       is_ept = is_ept_pmap(pmap);
+
+                       nexth = (pv_hashed_entry_t)queue_next(&pvh_e->qlink);
+                       if (!is_ept) {
+                               pmap_update_pte(ptep, PHYS_CACHEABILITY_MASK, attributes);
+                       } else {
+                               pmap_update_pte(ptep, INTEL_EPT_CACHE_MASK, ept_attributes);
+                       }
+                       PMAP_UPDATE_TLBS(pmap, vaddr, vaddr + PAGE_SIZE);
+                       pvh_e = nexth;
+               } while ((pv_e = (pv_rooted_entry_t)nexth) != pv_h);
+       }
+}
+
+void x86_filter_TLB_coherency_interrupts(boolean_t dofilter) {
+       assert(ml_get_interrupts_enabled() == 0 || get_preemption_level() != 0);
+
+       if (dofilter) {
+               CPU_CR3_MARK_INACTIVE();
+       } else {
+               CPU_CR3_MARK_ACTIVE();
+               mfence();
+               if (current_cpu_datap()->cpu_tlb_invalid)
+                       process_pmap_updates();
+       }
+}
+
+
 /*
  *     Insert the given physical page (p) at
  *     the specified virtual address (v) in the
@@ -349,18 +449,36 @@ pfp_exit:
  *     or lose information.  That is, this routine must actually
  *     insert this page into the given map NOW.
  */
-void
+
+kern_return_t
 pmap_enter(
-       register pmap_t         pmap,
+       pmap_t          pmap,
        vm_map_offset_t         vaddr,
        ppnum_t                 pn,
        vm_prot_t               prot,
+       vm_prot_t               fault_type,
        unsigned int            flags,
        boolean_t               wired)
+{
+       return pmap_enter_options(pmap, vaddr, pn, prot, fault_type, flags, wired, PMAP_EXPAND_OPTIONS_NONE, NULL);
+}
+
+
+kern_return_t
+pmap_enter_options(
+       pmap_t          pmap,
+       vm_map_offset_t         vaddr,
+       ppnum_t                 pn,
+       vm_prot_t               prot,
+       __unused vm_prot_t      fault_type,
+       unsigned int            flags,
+       boolean_t               wired,
+       unsigned int            options,
+       void                    *arg)
 {
        pt_entry_t              *pte;
        pv_rooted_entry_t       pv_h;
-       int                     pai;
+       ppnum_t                 pai;
        pv_hashed_entry_t       pvh_e;
        pv_hashed_entry_t       pvh_new;
        pt_entry_t              template;
@@ -373,27 +491,42 @@ pmap_enter(
        /* 2MiB mappings are confined to x86_64 by VM */
        boolean_t               superpage = flags & VM_MEM_SUPERPAGE;
        vm_object_t             delpage_pm_obj = NULL;
-       int                     delpage_pde_index = 0;
+       uint64_t                delpage_pde_index = 0;
        pt_entry_t              old_pte;
+       kern_return_t           kr;
+       boolean_t               is_ept;
+       boolean_t               is_altacct;
+
+       kr = KERN_FAILURE;
 
        pmap_intr_assert();
-       assert(pn != vm_page_fictitious_addr);
 
        if (pmap == PMAP_NULL)
-               return;
+               return KERN_INVALID_ARGUMENT;
+
+       is_ept = is_ept_pmap(pmap);
+
+       /* N.B. We can be supplied a zero page frame in the NOENTER case, it's an
+        * unused value for that scenario.
+        */
+       assert(pn != vm_page_fictitious_addr);
+
        if (pn == vm_page_guard_addr)
-               return;
+               return KERN_INVALID_ARGUMENT;
 
        PMAP_TRACE(PMAP_CODE(PMAP__ENTER) | DBG_FUNC_START,
-                  pmap,
-                  (uint32_t) (vaddr >> 32), (uint32_t) vaddr,
-                  pn, prot);
+                  VM_KERNEL_ADDRHIDE(pmap), VM_KERNEL_ADDRHIDE(vaddr), pn,
+                  prot);
 
        if ((prot & VM_PROT_EXECUTE) || !nx_enabled || !pmap->nx_enabled)
                set_NX = FALSE;
        else
                set_NX = TRUE;
 
+       if (__improbable(set_NX && (pmap == kernel_pmap) && ((pmap_disable_kstack_nx && (flags & VM_MEM_STACK)) || (pmap_disable_kheap_nx && !(flags & VM_MEM_STACK))))) {
+               set_NX = FALSE;
+       }
+
        /*
         *      Must allocate a new pvlist entry while we're unlocked;
         *      zalloc may cause pageout (which will lock the pmap system).
@@ -413,11 +546,13 @@ Retry:
         *      pmap is always expanded to include enough hardware
         *      pages to map one VM page.
         */
-        if(superpage) {
+        if (superpage) {
                while ((pte = pmap64_pde(pmap, vaddr)) == PD_ENTRY_NULL) {
                        /* need room for another pde entry */
                        PMAP_UNLOCK(pmap);
-                       pmap_expand_pdpt(pmap, vaddr);
+                       kr = pmap_expand_pdpt(pmap, vaddr, options);
+                       if (kr != KERN_SUCCESS)
+                               goto done;
                        PMAP_LOCK(pmap);
                }
        } else {
@@ -427,28 +562,59 @@ Retry:
                         * going to grow pde level page(s)
                         */
                        PMAP_UNLOCK(pmap);
-                       pmap_expand(pmap, vaddr);
+                       kr = pmap_expand(pmap, vaddr, options);
+                       if (kr != KERN_SUCCESS)
+                               goto done;
                        PMAP_LOCK(pmap);
                }
        }
+       if (options & PMAP_EXPAND_OPTIONS_NOENTER) {
+               PMAP_UNLOCK(pmap);
+               kr = KERN_SUCCESS;
+               goto done;
+       }
 
-       if (superpage && *pte && !(*pte & INTEL_PTE_PS)) {
+       if (superpage && *pte && !(*pte & PTE_PS)) {
                /*
                 * There is still an empty page table mapped that
                 * was used for a previous base page mapping.
                 * Remember the PDE and the PDE index, so that we
                 * can free the page at the end of this function.
                 */
-               delpage_pde_index = (int)pdeidx(pmap, vaddr);
+               delpage_pde_index = pdeidx(pmap, vaddr);
                delpage_pm_obj = pmap->pm_obj;
                *pte = 0;
        }
 
-
        old_pa = pte_to_pa(*pte);
        pai = pa_index(old_pa);
        old_pa_locked = FALSE;
 
+       if (old_pa == 0 &&
+           PTE_IS_COMPRESSED(*pte)) {
+               /*
+                * "pmap" should be locked at this point, so this should
+                * not race with another pmap_enter() or pmap_remove_range().
+                */
+               assert(pmap != kernel_pmap);
+
+               /* one less "compressed" */
+               OSAddAtomic64(-1, &pmap->stats.compressed);
+               pmap_ledger_debit(pmap, task_ledgers.internal_compressed,
+                                 PAGE_SIZE);
+               if (*pte & PTE_COMPRESSED_ALT) {
+                       pmap_ledger_debit(
+                               pmap,
+                               task_ledgers.alternate_accounting_compressed,
+                               PAGE_SIZE);
+               } else {
+                       /* was part of the footprint */
+                       pmap_ledger_debit(pmap, task_ledgers.phys_footprint,
+                                         PAGE_SIZE);
+               }
+               /* marker will be cleared below */
+       }
+
        /*
         * if we have a previous managed page, lock the pv entry now. after
         * we lock it, check to see if someone beat us to the lock and if so
@@ -469,50 +635,106 @@ Retry:
         *      at this address.
         */
        if (old_pa == pa) {
+               pt_entry_t old_attributes =
+                   *pte & ~(PTE_REF(is_ept) | PTE_MOD(is_ept));
 
                /*
                 *      May be changing its wired attribute or protection
                 */
 
-               template = pa_to_pte(pa) | INTEL_PTE_VALID;
+               template =  pa_to_pte(pa);
+
+               /* ?: WORTH ASSERTING THAT AT LEAST ONE RWX (implicit valid) PASSED FOR EPT? */
+               if (!is_ept) {
+                       template |= INTEL_PTE_VALID;
+               } else {
+                       template |= INTEL_EPT_IPTA;
+               }
+
+               template |= pmap_get_cache_attributes(pa_index(pa), is_ept);
 
-               if (VM_MEM_NOT_CACHEABLE ==
-                   (flags & (VM_MEM_NOT_CACHEABLE | VM_WIMG_USE_DEFAULT))) {
+               /*
+                * We don't support passing VM_MEM_NOT_CACHEABLE flags for EPT PTEs
+                */
+               if (!is_ept && (VM_MEM_NOT_CACHEABLE ==
+                   (flags & (VM_MEM_NOT_CACHEABLE | VM_WIMG_USE_DEFAULT)))) {
                        if (!(flags & VM_MEM_GUARDED))
                                template |= INTEL_PTE_PTA;
                        template |= INTEL_PTE_NCACHE;
                }
-               if (pmap != kernel_pmap)
+               if (pmap != kernel_pmap && !is_ept)
                        template |= INTEL_PTE_USER;
-               if (prot & VM_PROT_WRITE)
-                       template |= INTEL_PTE_WRITE;
+
+               if (prot & VM_PROT_READ)
+                       template |= PTE_READ(is_ept);
+
+               if (prot & VM_PROT_WRITE) {
+                       template |= PTE_WRITE(is_ept);
+                       if (is_ept && !pmap_ept_support_ad) {
+                               template |= PTE_MOD(is_ept);
+                               if (old_pa_locked) {
+                                       assert(IS_MANAGED_PAGE(pai));
+                                       pmap_phys_attributes[pai] |= PHYS_MODIFIED;
+                               }
+                       }
+               }
+               if (prot & VM_PROT_EXECUTE) {
+                       assert(set_NX == 0);
+                       template = pte_set_ex(template, is_ept);
+               }
 
                if (set_NX)
-                       template |= INTEL_PTE_NX;
+                       template = pte_remove_ex(template, is_ept);
 
                if (wired) {
-                       template |= INTEL_PTE_WIRED;
-                       if (!iswired(*pte))
-                               OSAddAtomic(+1,
-                                       &pmap->stats.wired_count);
+                       template |= PTE_WIRED;
+                       if (!iswired(old_attributes))  {
+                               OSAddAtomic(+1, &pmap->stats.wired_count);
+                               pmap_ledger_credit(pmap, task_ledgers.wired_mem, PAGE_SIZE);
+                       }
                } else {
-                       if (iswired(*pte)) {
+                       if (iswired(old_attributes)) {
                                assert(pmap->stats.wired_count >= 1);
-                               OSAddAtomic(-1,
-                                       &pmap->stats.wired_count);
+                               OSAddAtomic(-1, &pmap->stats.wired_count);
+                               pmap_ledger_debit(pmap, task_ledgers.wired_mem, PAGE_SIZE);
                        }
                }
+
                if (superpage)          /* this path can not be used */
-                       template |= INTEL_PTE_PS;       /* to change the page size! */
+                       template |= PTE_PS;     /* to change the page size! */
+
+               if (old_attributes == template)
+                       goto dont_update_pte;
+
+               /* Determine delta, PV locked */
+               need_tlbflush =
+                   ((old_attributes ^ template) != PTE_WIRED);
+
+               if (need_tlbflush == TRUE && !(old_attributes & PTE_WRITE(is_ept))) {
+                       if ((old_attributes ^ template) == PTE_WRITE(is_ept))
+                               need_tlbflush = FALSE;
+               }
+
+               /* For hardware that doesn't have EPT AD support, we always set REFMOD for EPT PTEs */
+               if (is_ept && !pmap_ept_support_ad) {
+                       template |= PTE_REF(is_ept);
+                       if (old_pa_locked) {
+                               assert(IS_MANAGED_PAGE(pai));
+                               pmap_phys_attributes[pai] |= PHYS_REFERENCED;
+                       }
+               }
 
                /* store modified PTE and preserve RC bits */
-               pmap_update_pte(pte, *pte,
-                       template | (*pte & (INTEL_PTE_REF | INTEL_PTE_MOD)));
+               pt_entry_t npte, opte;;
+               do {
+                       opte = *pte;
+                       npte = template | (opte & (PTE_REF(is_ept) | PTE_MOD(is_ept)));
+               } while (!pmap_cmpx_pte(pte, opte, npte));
+dont_update_pte:
                if (old_pa_locked) {
                        UNLOCK_PVH(pai);
                        old_pa_locked = FALSE;
                }
-               need_tlbflush = TRUE;
                goto Done;
        }
 
@@ -530,6 +752,7 @@ Retry:
         */
 
        if (old_pa != (pmap_paddr_t) 0) {
+               boolean_t       was_altacct = FALSE;
 
                /*
                 *      Don't do anything to pages outside valid memory here.
@@ -538,42 +761,82 @@ Retry:
                 */
 
                /* invalidate the PTE */
-               pmap_update_pte(pte, *pte, (*pte & ~INTEL_PTE_VALID));
+               pmap_update_pte(pte, PTE_VALID_MASK(is_ept), 0);
                /* propagate invalidate everywhere */
                PMAP_UPDATE_TLBS(pmap, vaddr, vaddr + PAGE_SIZE);
                /* remember reference and change */
                old_pte = *pte;
-               oattr = (char) (old_pte & (PHYS_MODIFIED | PHYS_REFERENCED));
+               oattr = (char) (old_pte & (PTE_MOD(is_ept) | PTE_REF(is_ept)));
                /* completely invalidate the PTE */
                pmap_store_pte(pte, 0);
 
                if (IS_MANAGED_PAGE(pai)) {
-#if TESTING
-                       if (pmap->stats.resident_count < 1)
-                               panic("pmap_enter: resident_count");
-#endif
-                       assert(pmap->stats.resident_count >= 1);
-                       OSAddAtomic(-1,
-                               &pmap->stats.resident_count);
-
-                       if (iswired(*pte)) {
-#if TESTING
-                               if (pmap->stats.wired_count < 1)
-                                       panic("pmap_enter: wired_count");
-#endif
-                               assert(pmap->stats.wired_count >= 1);
-                               OSAddAtomic(-1,
-                                       &pmap->stats.wired_count);
-                       }
-                       pmap_phys_attributes[pai] |= oattr;
-
                        /*
                         *      Remove the mapping from the pvlist for
                         *      this physical page.
                         *      We'll end up with either a rooted pv or a
                         *      hashed pv
                         */
-                       pvh_e = pmap_pv_remove(pmap, vaddr, (ppnum_t *) &pai, &old_pte);
+                       pvh_e = pmap_pv_remove(pmap, vaddr, (ppnum_t *) &pai, &old_pte, &was_altacct);
+               }
+
+               if (IS_MANAGED_PAGE(pai)) {
+                       pmap_assert(old_pa_locked == TRUE);
+                       pmap_ledger_debit(pmap, task_ledgers.phys_mem, PAGE_SIZE);
+                       assert(pmap->stats.resident_count >= 1);
+                       OSAddAtomic(-1, &pmap->stats.resident_count);
+                       if (pmap != kernel_pmap) {
+                               /* update pmap stats */
+                               if (IS_REUSABLE_PAGE(pai)) {
+                                       PMAP_STATS_ASSERTF(
+                                               (pmap->stats.reusable > 0,
+                                                "reusable %d",
+                                                pmap->stats.reusable));
+                                       OSAddAtomic(-1, &pmap->stats.reusable);
+                               } else if (IS_INTERNAL_PAGE(pai)) {
+                                       PMAP_STATS_ASSERTF(
+                                               (pmap->stats.internal > 0,
+                                                "internal %d",
+                                                pmap->stats.internal));
+                                       OSAddAtomic(-1, &pmap->stats.internal);
+                               } else {
+                                       PMAP_STATS_ASSERTF(
+                                               (pmap->stats.external > 0,
+                                                "external %d",
+                                                pmap->stats.external));
+                                       OSAddAtomic(-1, &pmap->stats.external);
+                               }
+
+                               /* update ledgers */
+                               if (was_altacct) {
+                                       assert(IS_INTERNAL_PAGE(pai));
+                                       pmap_ledger_debit(pmap, task_ledgers.internal, PAGE_SIZE);
+                                       pmap_ledger_debit(pmap, task_ledgers.alternate_accounting, PAGE_SIZE);
+                               } else if (IS_REUSABLE_PAGE(pai)) {
+                                       assert(!was_altacct);
+                                       assert(IS_INTERNAL_PAGE(pai));
+                                       /* was already not in phys_footprint */
+                               } else if (IS_INTERNAL_PAGE(pai)) {
+                                       assert(!was_altacct);
+                                       assert(!IS_REUSABLE_PAGE(pai));
+                                       pmap_ledger_debit(pmap, task_ledgers.internal, PAGE_SIZE);
+                                       pmap_ledger_debit(pmap, task_ledgers.phys_footprint, PAGE_SIZE);
+                               } else {
+                                       /* not an internal page */
+                               }
+                       }
+                       if (iswired(*pte)) {
+                               assert(pmap->stats.wired_count >= 1);
+                               OSAddAtomic(-1, &pmap->stats.wired_count);
+                               pmap_ledger_debit(pmap, task_ledgers.wired_mem,
+                                   PAGE_SIZE);
+                       }
+
+                       if (!is_ept) {
+                               pmap_phys_attributes[pai] |= oattr;
+                       } else {
+                               pmap_phys_attributes[pai] |= ept_refmod_to_physmap(oattr);
+                       }
 
                } else {
 
@@ -582,10 +845,16 @@ Retry:
                         *      Do removal part of accounting.
                         */
 
+                       if (pmap != kernel_pmap) {
+#if 00
+                               assert(pmap->stats.device > 0);
+                               OSAddAtomic(-1, &pmap->stats.device);
+#endif
+                       }
                        if (iswired(*pte)) {
                                assert(pmap->stats.wired_count >= 1);
-                               OSAddAtomic(-1,
-                                       &pmap->stats.wired_count);
+                               OSAddAtomic(-1, &pmap->stats.wired_count);
+                               pmap_ledger_debit(pmap, task_ledgers.wired_mem, PAGE_SIZE);
                        }
                }
        }
@@ -613,9 +882,28 @@ Retry:
                        /*
                         *      No mappings yet, use rooted pv
                         */
-                       pv_h->va = vaddr;
+                       pv_h->va_and_flags = vaddr;
                        pv_h->pmap = pmap;
                        queue_init(&pv_h->qlink);
+
+                       if (options & PMAP_OPTIONS_INTERNAL) {
+                               pmap_phys_attributes[pai] |= PHYS_INTERNAL;
+                       } else {
+                               pmap_phys_attributes[pai] &= ~PHYS_INTERNAL;
+                       }
+                       if (options & PMAP_OPTIONS_REUSABLE) {
+                               pmap_phys_attributes[pai] |= PHYS_REUSABLE;
+                       } else {
+                               pmap_phys_attributes[pai] &= ~PHYS_REUSABLE;
+                       }
+                       if ((options & PMAP_OPTIONS_ALT_ACCT) &&
+                           IS_INTERNAL_PAGE(pai)) {
+                               pv_h->va_and_flags |= PVE_IS_ALTACCT;
+                               is_altacct = TRUE;
+                       } else {
+                               pv_h->va_and_flags &= ~PVE_IS_ALTACCT;
+                               is_altacct = FALSE;
+                       }
                } else {
                        /*
                         *      Add new pv_hashed_entry after header.
@@ -624,7 +912,7 @@ Retry:
                                pvh_e = pvh_new;
                                pvh_new = PV_HASHED_ENTRY_NULL;
                        } else if (PV_HASHED_ENTRY_NULL == pvh_e) {
-                               PV_HASHED_ALLOC(pvh_e);
+                               PV_HASHED_ALLOC(&pvh_e);
                                if (PV_HASHED_ENTRY_NULL == pvh_e) {
                                        /*
                                         * the pv list is empty. if we are on
@@ -636,22 +924,31 @@ Retry:
                                         * us.
                                         */
                                        if (kernel_pmap == pmap) {
-                                               PV_HASHED_KERN_ALLOC(pvh_e);
+                                               PV_HASHED_KERN_ALLOC(&pvh_e);
                                        } else {
                                                UNLOCK_PVH(pai);
                                                PMAP_UNLOCK(pmap);
+                                               pmap_pv_throttle(pmap);
                                                pvh_new = (pv_hashed_entry_t) zalloc(pv_hashed_list_zone);
                                                goto Retry;
                                        }
                                }
                        }
-                       
+
                        if (PV_HASHED_ENTRY_NULL == pvh_e)
                                panic("Mapping alias chain exhaustion, possibly induced by numerous kernel virtual double mappings");
 
-                       pvh_e->va = vaddr;
+                       pvh_e->va_and_flags = vaddr;
                        pvh_e->pmap = pmap;
                        pvh_e->ppn = pn;
+                       if ((options & PMAP_OPTIONS_ALT_ACCT) &&
+                           IS_INTERNAL_PAGE(pai)) {
+                               pvh_e->va_and_flags |= PVE_IS_ALTACCT;
+                               is_altacct = TRUE;
+                       } else {
+                               pvh_e->va_and_flags &= ~PVE_IS_ALTACCT;
+                               is_altacct = FALSE;
+                       }
                        pv_hash_add(pvh_e, pv_h);
 
                        /*
@@ -664,15 +961,57 @@ Retry:
                 * only count the mapping
                 * for 'managed memory'
                 */
-               OSAddAtomic(+1,  & pmap->stats.resident_count);
+               pmap_ledger_credit(pmap, task_ledgers.phys_mem, PAGE_SIZE);
+               OSAddAtomic(+1,  &pmap->stats.resident_count);
                if (pmap->stats.resident_count > pmap->stats.resident_max) {
                        pmap->stats.resident_max = pmap->stats.resident_count;
                }
+               if (pmap != kernel_pmap) {
+                       /* update pmap stats */
+                       if (IS_REUSABLE_PAGE(pai)) {
+                               OSAddAtomic(+1, &pmap->stats.reusable);
+                               PMAP_STATS_PEAK(pmap->stats.reusable);
+                       } else if (IS_INTERNAL_PAGE(pai)) {
+                               OSAddAtomic(+1, &pmap->stats.internal);
+                               PMAP_STATS_PEAK(pmap->stats.internal);
+                       } else {
+                               OSAddAtomic(+1, &pmap->stats.external);
+                               PMAP_STATS_PEAK(pmap->stats.external);
+                       }
+
+                       /* update ledgers */
+                       if (is_altacct) {
+                               /* internal but also alternate accounting */
+                               assert(IS_INTERNAL_PAGE(pai));
+                               pmap_ledger_credit(pmap, task_ledgers.internal, PAGE_SIZE);
+                               pmap_ledger_credit(pmap, task_ledgers.alternate_accounting, PAGE_SIZE);
+                               /* alternate accounting, so not in footprint */
+                       } else if (IS_REUSABLE_PAGE(pai)) {
+                               assert(!is_altacct);
+                               assert(IS_INTERNAL_PAGE(pai));
+                               /* internal but reusable: not in footprint */
+                       } else if (IS_INTERNAL_PAGE(pai)) {
+                               assert(!is_altacct);
+                               assert(!IS_REUSABLE_PAGE(pai));
+                               /* internal: add to footprint */
+                               pmap_ledger_credit(pmap, task_ledgers.internal, PAGE_SIZE);
+                               pmap_ledger_credit(pmap, task_ledgers.phys_footprint, PAGE_SIZE);
+                       } else {
+                               /* not internal: not in footprint */
+                       }
+               }
        } else if (last_managed_page == 0) {
                /* Account for early mappings created before "managed pages"
                 * are determined. Consider consulting the available DRAM map.
                 */
+               pmap_ledger_credit(pmap, task_ledgers.phys_mem, PAGE_SIZE);
                OSAddAtomic(+1,  &pmap->stats.resident_count);
+               if (pmap != kernel_pmap) {
+#if 00
+                       OSAddAtomic(+1, &pmap->stats.device);
+                       PMAP_STATS_PEAK(pmap->stats.device);
+#endif
+               }
        }
        /*
         * Step 3) Enter the mapping.
@@ -680,25 +1019,64 @@ Retry:
         *      Build a template to speed up entering -
         *      only the pfn changes.
         */
-       template = pa_to_pte(pa) | INTEL_PTE_VALID;
+       template = pa_to_pte(pa);
+
+       if (!is_ept) {
+               template |= INTEL_PTE_VALID;
+       } else {
+               template |= INTEL_EPT_IPTA;
+       }
+
+
+       /*
+        * DRK: It may be worth asserting on cache attribute flags that diverge
+        * from the existing physical page attributes.
+        */
 
-       if (flags & VM_MEM_NOT_CACHEABLE) {
+       template |= pmap_get_cache_attributes(pa_index(pa), is_ept);
+
+       /*
+        * We don't support passing VM_MEM_NOT_CACHEABLE flags for EPT PTEs
+        */
+       if (!is_ept && (flags & VM_MEM_NOT_CACHEABLE)) {
                if (!(flags & VM_MEM_GUARDED))
                        template |= INTEL_PTE_PTA;
                template |= INTEL_PTE_NCACHE;
        }
-       if (pmap != kernel_pmap)
+       if (pmap != kernel_pmap && !is_ept)
                template |= INTEL_PTE_USER;
-       if (prot & VM_PROT_WRITE)
-               template |= INTEL_PTE_WRITE;
+       if (prot & VM_PROT_READ)
+               template |= PTE_READ(is_ept);
+       if (prot & VM_PROT_WRITE) {
+               template |= PTE_WRITE(is_ept);
+               if (is_ept && !pmap_ept_support_ad) {
+                       template |= PTE_MOD(is_ept);
+                       if (IS_MANAGED_PAGE(pai))
+                               pmap_phys_attributes[pai] |= PHYS_MODIFIED;
+               }
+       }
+       if (prot & VM_PROT_EXECUTE) {
+               assert(set_NX == 0);
+               template = pte_set_ex(template, is_ept);
+       }
+
        if (set_NX)
-               template |= INTEL_PTE_NX;
+               template = pte_remove_ex(template, is_ept);
        if (wired) {
                template |= INTEL_PTE_WIRED;
                OSAddAtomic(+1,  & pmap->stats.wired_count);
+               pmap_ledger_credit(pmap, task_ledgers.wired_mem, PAGE_SIZE);
        }
        if (superpage)
                template |= INTEL_PTE_PS;
+
+       /* For hardware that doesn't have EPT AD support, we always set REFMOD for EPT PTEs */
+       if (is_ept && !pmap_ept_support_ad) {
+               template |= PTE_REF(is_ept);
+               if (IS_MANAGED_PAGE(pai))
+                       pmap_phys_attributes[pai] |= PHYS_REFERENCED;
+       }
+
        pmap_store_pte(pte, template);
 
        /*
@@ -710,9 +1088,12 @@ Retry:
                UNLOCK_PVH(pai);
        }
 Done:
-       if (need_tlbflush == TRUE)
-               PMAP_UPDATE_TLBS(pmap, vaddr, vaddr + PAGE_SIZE);
-
+       if (need_tlbflush == TRUE) {
+               if (options & PMAP_OPTIONS_NOFLUSH)
+                       PMAP_UPDATE_TLBS_DELAYED(pmap, vaddr, vaddr + PAGE_SIZE, (pmap_flush_context *)arg);
+               else
+                       PMAP_UPDATE_TLBS(pmap, vaddr, vaddr + PAGE_SIZE);
+       }
        if (pvh_e != PV_HASHED_ENTRY_NULL) {
                PV_HASHED_FREE_LIST(pvh_e, pvh_e, 1);
        }
@@ -725,15 +1106,19 @@ Done:
                vm_page_t m;
 
                vm_object_lock(delpage_pm_obj);
-               m = vm_page_lookup(delpage_pm_obj, delpage_pde_index);
+               m = vm_page_lookup(delpage_pm_obj, (delpage_pde_index * PAGE_SIZE));
                if (m == VM_PAGE_NULL)
                    panic("pmap_enter: pte page not in object");
                VM_PAGE_FREE(m);
-               OSAddAtomic(-1,  &inuse_ptepages_count);
                vm_object_unlock(delpage_pm_obj);
+               OSAddAtomic(-1,  &inuse_ptepages_count);
+               PMAP_ZINFO_PFREE(pmap, PAGE_SIZE);
        }
 
-       PMAP_TRACE(PMAP_CODE(PMAP__ENTER) | DBG_FUNC_END, 0, 0, 0, 0, 0);
+       kr = KERN_SUCCESS;
+done:
+       PMAP_TRACE(PMAP_CODE(PMAP__ENTER) | DBG_FUNC_END, kr);
+       return kr;
 }
 
 /*
@@ -754,6 +1139,18 @@ pmap_remove_range(
        vm_map_offset_t         start_vaddr,
        pt_entry_t              *spte,
        pt_entry_t              *epte)
+{
+       pmap_remove_range_options(pmap, start_vaddr, spte, epte,
+                                 PMAP_OPTIONS_REMOVE);
+}
+
+void
+pmap_remove_range_options(
+       pmap_t                  pmap,
+       vm_map_offset_t         start_vaddr,
+       pt_entry_t              *spte,
+       pt_entry_t              *epte,
+       int                     options)
 {
        pt_entry_t              *cpte;
        pv_hashed_entry_t       pvh_et = PV_HASHED_ENTRY_NULL;
@@ -761,27 +1158,28 @@ pmap_remove_range(
        pv_hashed_entry_t       pvh_e;
        int                     pvh_cnt = 0;
        int                     num_removed, num_unwired, num_found, num_invalid;
-       int                     pai;
+       int                     stats_external, stats_internal, stats_reusable;
+       uint64_t                stats_compressed;
+       int                     ledgers_internal, ledgers_alt_internal;
+       uint64_t                ledgers_compressed, ledgers_alt_compressed;
+       ppnum_t                 pai;
        pmap_paddr_t            pa;
        vm_map_offset_t         vaddr;
+       boolean_t               is_ept = is_ept_pmap(pmap);
+       boolean_t               was_altacct;
 
        num_removed = 0;
        num_unwired = 0;
        num_found   = 0;
        num_invalid = 0;
-#if    defined(__i386__)
-       if (pmap != kernel_pmap &&
-           pmap->pm_task_map == TASK_MAP_32BIT &&
-           start_vaddr >= HIGH_MEM_BASE) {
-               /*
-                * The range is in the "high_shared_pde" which is shared
-                * between the kernel and all 32-bit tasks.  It holds
-                * the 32-bit commpage but also the trampolines, GDT, etc...
-                * so we can't let user tasks remove anything from it.
-                */
-               return;
-       }
-#endif
+       stats_external = 0;
+       stats_internal = 0;
+       stats_reusable = 0;
+       stats_compressed = 0;
+       ledgers_internal = 0;
+       ledgers_compressed = 0;
+       ledgers_alt_internal = 0;
+       ledgers_alt_compressed = 0;
        /* invalidate the PTEs first to "freeze" them */
        for (cpte = spte, vaddr = start_vaddr;
             cpte < epte;
@@ -789,8 +1187,23 @@ pmap_remove_range(
                pt_entry_t p = *cpte;
 
                pa = pte_to_pa(p);
-               if (pa == 0)
+               if (pa == 0) {
+                       if ((options & PMAP_OPTIONS_REMOVE) &&
+                           (PTE_IS_COMPRESSED(p))) {
+                               assert(pmap != kernel_pmap);
+                               /* one less "compressed"... */
+                               stats_compressed++;
+                               ledgers_compressed++;
+                               if (p & PTE_COMPRESSED_ALT) {
+                                       /* ... but it used to be "ALTACCT" */
+                                       ledgers_alt_compressed++;
+                               }
+                               /* clear marker(s) */
+                               /* XXX probably does not need to be atomic! */
+                               pmap_update_pte(cpte, INTEL_PTE_COMPRESSED_MASK, 0);
+                       }
                        continue;
+               }
                num_found++;
 
                if (iswired(p))
@@ -807,11 +1220,11 @@ pmap_remove_range(
                        continue;
                }
 
-               if ((p & INTEL_PTE_VALID) == 0)
+               if ((p & PTE_VALID_MASK(is_ept)) == 0)
                        num_invalid++;
 
-               /* invalidate the PTE */ 
-               pmap_update_pte(cpte, *cpte, (*cpte & ~INTEL_PTE_VALID));
+               /* invalidate the PTE */
+               pmap_update_pte(cpte, PTE_VALID_MASK(is_ept), 0);
        }
 
        if (num_found == 0) {
@@ -828,8 +1241,27 @@ pmap_remove_range(
             cpte++, vaddr += PAGE_SIZE_64) {
 
                pa = pte_to_pa(*cpte);
-               if (pa == 0)
+               if (pa == 0) {
+               check_pte_for_compressed_marker:
+                       /*
+                        * This PTE could have been replaced with a
+                        * "compressed" marker after our first "freeze"
+                        * loop above, so check again.
+                        */
+                       if ((options & PMAP_OPTIONS_REMOVE) &&
+                           (PTE_IS_COMPRESSED(*cpte))) {
+                               assert(pmap != kernel_pmap);
+                               /* one less "compressed"... */
+                               stats_compressed++;
+                               ledgers_compressed++;
+                               if (*cpte & PTE_COMPRESSED_ALT) {
+                                       /* ... but it used to be "ALTACCT" */
+                                       ledgers_alt_compressed++;
+                               }
+                               pmap_store_pte(cpte, 0);
+                       }
                        continue;
+               }
 
                pai = pa_index(pa);
 
@@ -838,22 +1270,54 @@ pmap_remove_range(
                pa = pte_to_pa(*cpte);
                if (pa == 0) {
                        UNLOCK_PVH(pai);
-                       continue;
+                       goto check_pte_for_compressed_marker;
                }
+
+               /*
+                * Remove the mapping from the pvlist for this physical page.
+                */
+               pvh_e = pmap_pv_remove(pmap, vaddr, (ppnum_t *) &pai, cpte, &was_altacct);
+
                num_removed++;
+               /* update pmap stats */
+               if (IS_REUSABLE_PAGE(pai)) {
+                       stats_reusable++;
+               } else if (IS_INTERNAL_PAGE(pai)) {
+                       stats_internal++;
+               } else {
+                       stats_external++;
+               }
+               /* update ledgers */
+               if (was_altacct) {
+                       /* internal and alternate accounting */
+                       assert(IS_INTERNAL_PAGE(pai));
+                       ledgers_internal++;
+                       ledgers_alt_internal++;
+               } else if (IS_REUSABLE_PAGE(pai)) {
+                       /* internal but reusable */
+                       assert(!was_altacct);
+                       assert(IS_INTERNAL_PAGE(pai));
+               } else if (IS_INTERNAL_PAGE(pai)) {
+                       /* internal */
+                       assert(!was_altacct);
+                       assert(!IS_REUSABLE_PAGE(pai));
+                       ledgers_internal++;
+               } else {
+                       /* not internal */
+               }
 
                /*
                 * Get the modify and reference bits, then
                 * nuke the entry in the page table
                 */
                /* remember reference and change */
-               pmap_phys_attributes[pai] |=
-                       (char) (*cpte & (PHYS_MODIFIED | PHYS_REFERENCED));
-
-               /*
-                * Remove the mapping from the pvlist for this physical page.
-                */
-               pvh_e = pmap_pv_remove(pmap, vaddr, (ppnum_t *) &pai, cpte);
+               if (!is_ept) {
+                       pmap_phys_attributes[pai] |=
+                               *cpte & (PHYS_MODIFIED | PHYS_REFERENCED);
+               } else {
+                       pmap_phys_attributes[pai] |=
+                               ept_refmod_to_physmap((*cpte & (INTEL_EPT_REF | INTEL_EPT_MOD))) & (PHYS_MODIFIED | PHYS_REFERENCED);
+               }
 
                /* completely invalidate the PTE */
                pmap_store_pte(cpte, 0);
@@ -882,15 +1346,75 @@ update_counts:
        if (pmap->stats.resident_count < num_removed)
                panic("pmap_remove_range: resident_count");
 #endif
-       assert(pmap->stats.resident_count >= num_removed);
+       pmap_ledger_debit(pmap, task_ledgers.phys_mem, machine_ptob(num_removed));
+       PMAP_STATS_ASSERTF((pmap->stats.resident_count >= num_removed,
+                           "pmap=%p num_removed=%d stats.resident_count=%d",
+                           pmap, num_removed, pmap->stats.resident_count));
        OSAddAtomic(-num_removed,  &pmap->stats.resident_count);
 
+       if (pmap != kernel_pmap) {
+               PMAP_STATS_ASSERTF((pmap->stats.external >= stats_external,
+                                   "pmap=%p stats_external=%d stats.external=%d",
+                                   pmap, stats_external, pmap->stats.external));
+               PMAP_STATS_ASSERTF((pmap->stats.internal >= stats_internal,
+                                   "pmap=%p stats_internal=%d stats.internal=%d",
+                                   pmap, stats_internal, pmap->stats.internal));
+               PMAP_STATS_ASSERTF((pmap->stats.reusable >= stats_reusable,
+                                   "pmap=%p stats_reusable=%d stats.reusable=%d",
+                                   pmap, stats_reusable, pmap->stats.reusable));
+               PMAP_STATS_ASSERTF((pmap->stats.compressed >= stats_compressed,
+                                   "pmap=%p stats_compressed=%lld, stats.compressed=%lld",
+                                   pmap, stats_compressed, pmap->stats.compressed));
+
+               /* update pmap stats */
+               if (stats_external) {
+                       OSAddAtomic(-stats_external, &pmap->stats.external);
+               }
+               if (stats_internal) {
+                       OSAddAtomic(-stats_internal, &pmap->stats.internal);
+               }
+               if (stats_reusable)
+                       OSAddAtomic(-stats_reusable, &pmap->stats.reusable);
+               if (stats_compressed)
+                       OSAddAtomic64(-stats_compressed, &pmap->stats.compressed);
+               /* update ledgers */
+               if (ledgers_internal) {
+                       pmap_ledger_debit(pmap,
+                                         task_ledgers.internal,
+                                         machine_ptob(ledgers_internal));
+               }
+               if (ledgers_compressed) {
+                       pmap_ledger_debit(pmap,
+                                         task_ledgers.internal_compressed,
+                                         machine_ptob(ledgers_compressed));
+               }
+               if (ledgers_alt_internal) {
+                       pmap_ledger_debit(pmap,
+                                         task_ledgers.alternate_accounting,
+                                         machine_ptob(ledgers_alt_internal));
+               }
+               if (ledgers_alt_compressed) {
+                       pmap_ledger_debit(pmap,
+                                         task_ledgers.alternate_accounting_compressed,
+                                         machine_ptob(ledgers_alt_compressed));
+               }
+               pmap_ledger_debit(pmap,
+                                 task_ledgers.phys_footprint,
+                                 machine_ptob((ledgers_internal -
+                                               ledgers_alt_internal) +
+                                              (ledgers_compressed -
+                                               ledgers_alt_compressed)));
+       }
+
 #if TESTING
        if (pmap->stats.wired_count < num_unwired)
                panic("pmap_remove_range: wired_count");
 #endif
-       assert(pmap->stats.wired_count >= num_unwired);
+       PMAP_STATS_ASSERTF((pmap->stats.wired_count >= num_unwired,
+                           "pmap=%p num_unwired=%d stats.wired_count=%d",
+                           pmap, num_unwired, pmap->stats.wired_count));
        OSAddAtomic(-num_unwired,  &pmap->stats.wired_count);
+       pmap_ledger_debit(pmap, task_ledgers.wired_mem, machine_ptob(num_unwired));
 
        return;
 }
@@ -908,22 +1432,33 @@ pmap_remove(
        pmap_t          map,
        addr64_t        s64,
        addr64_t        e64)
+{
+       pmap_remove_options(map, s64, e64, PMAP_OPTIONS_REMOVE);
+}
+
+void
+pmap_remove_options(
+       pmap_t          map,
+       addr64_t        s64,
+       addr64_t        e64,
+       int             options)
 {
        pt_entry_t     *pde;
        pt_entry_t     *spte, *epte;
        addr64_t        l64;
        uint64_t        deadline;
+       boolean_t       is_ept;
 
        pmap_intr_assert();
 
        if (map == PMAP_NULL || s64 == e64)
                return;
 
-       PMAP_TRACE(PMAP_CODE(PMAP__REMOVE) | DBG_FUNC_START,
-                  map,
-                  (uint32_t) (s64 >> 32), s64,
-                  (uint32_t) (e64 >> 32), e64);
+       is_ept = is_ept_pmap(map);
 
+       PMAP_TRACE(PMAP_CODE(PMAP__REMOVE) | DBG_FUNC_START,
+                  VM_KERNEL_ADDRHIDE(map), VM_KERNEL_ADDRHIDE(s64),
+                  VM_KERNEL_ADDRHIDE(e64));
 
        PMAP_LOCK(map);
 
@@ -967,8 +1502,8 @@ pmap_remove(
                        l64 = e64;
                pde = pmap_pde(map, s64);
 
-               if (pde && (*pde & INTEL_PTE_VALID)) {
-                       if (*pde & INTEL_PTE_PS) {
+               if (pde && (*pde & PTE_VALID_MASK(is_ept))) {
+                       if (*pde & PTE_PS) {
                                /*
                                 * If we're removing a superpage, pmap_remove_range()
                                 * must work on level 2 instead of level 1; and we're
@@ -982,12 +1517,21 @@ pmap_remove(
                                spte = &spte[ptenum(s64)];
                                epte = &spte[intel_btop(l64 - s64)];
                        }
-                       pmap_remove_range(map, s64, spte, epte);
+                       pmap_remove_range_options(map, s64, spte, epte,
+                                                 options);
                }
                s64 = l64;
 
                if (s64 < e64 && rdtsc64() >= deadline) {
                        PMAP_UNLOCK(map)
+                           /* TODO: Rapid release/reacquisition can defeat
+                            * the "backoff" intent here; either consider a
+                            * fair spinlock, or a scheme whereby each lock
+                            * attempt marks the processor as within a spinlock
+                            * acquisition, and scan CPUs here to determine
+                            * if a backoff is necessary, to avoid sacrificing
+                            * performance in the common case.
+                            */
                        PMAP_LOCK(map)
                        deadline = rdtsc64() + max_preemption_latency_tsc;
                }
@@ -995,22 +1539,31 @@ pmap_remove(
 
        PMAP_UNLOCK(map);
 
-       PMAP_TRACE(PMAP_CODE(PMAP__REMOVE) | DBG_FUNC_END,
-                  map, 0, 0, 0, 0);
+       PMAP_TRACE(PMAP_CODE(PMAP__REMOVE) | DBG_FUNC_END);
+
+}
 
+void
+pmap_page_protect(
+        ppnum_t         pn,
+       vm_prot_t       prot)
+{
+       pmap_page_protect_options(pn, prot, 0, NULL);
 }
 
 /*
- *     Routine:        pmap_page_protect
+ *     Routine:        pmap_page_protect_options
  *
  *     Function:
  *             Lower the permission for all mappings to a given
  *             page.
  */
 void
-pmap_page_protect(
+pmap_page_protect_options(
         ppnum_t         pn,
-       vm_prot_t       prot)
+       vm_prot_t       prot,
+       unsigned int    options,
+       void            *arg)
 {
        pv_hashed_entry_t       pvh_eh = PV_HASHED_ENTRY_NULL;
        pv_hashed_entry_t       pvh_et = PV_HASHED_ENTRY_NULL;
@@ -1023,6 +1576,8 @@ pmap_page_protect(
        int                     pai;
        pmap_t                  pmap;
        boolean_t               remove;
+       pt_entry_t              new_pte_value;
+       boolean_t               is_ept;
 
        pmap_intr_assert();
        assert(pn != vm_page_fictitious_addr);
@@ -1037,8 +1592,8 @@ pmap_page_protect(
                 */
                return;
        }
-       PMAP_TRACE(PMAP_CODE(PMAP__PAGE_PROTECT) | DBG_FUNC_START,
-                  pn, prot, 0, 0, 0);
+
+       PMAP_TRACE(PMAP_CODE(PMAP__PAGE_PROTECT) | DBG_FUNC_START, pn, prot);
 
        /*
         * Determine the new protection.
@@ -1072,14 +1627,21 @@ pmap_page_protect(
        do {
                vm_map_offset_t vaddr;
 
+               if ((options & PMAP_OPTIONS_COMPRESSOR_IFF_MODIFIED) &&
+                   (pmap_phys_attributes[pai] & PHYS_MODIFIED)) {
+                       /* page was modified, so it will be compressed */
+                       options &= ~PMAP_OPTIONS_COMPRESSOR_IFF_MODIFIED;
+                       options |= PMAP_OPTIONS_COMPRESSOR;
+               }
+
                pmap = pv_e->pmap;
-               vaddr = pv_e->va;
+               is_ept = is_ept_pmap(pmap);
+               vaddr = PVE_VA(pv_e);
                pte = pmap_pte(pmap, vaddr);
 
-#if    DEBUG
-               if (pa_index(pte_to_pa(*pte)) != pn)
-                       panic("pmap_page_protect: PTE mismatch, pn: 0x%x, pmap: %p, vaddr: 0x%llx, pte: 0x%llx", pn, pmap, vaddr, *pte);
-#endif
+               pmap_assert2((pa_index(pte_to_pa(*pte)) == pn),
+                   "pmap_page_protect: PTE mismatch, pn: 0x%x, pmap: %p, vaddr: 0x%llx, pte: 0x%llx", pn, pmap, vaddr, *pte);
+
                if (0 == pte) {
                        panic("pmap_page_protect() "
                                "pmap=%p pn=0x%x vaddr=0x%llx\n",
@@ -1089,24 +1651,153 @@ pmap_page_protect(
 
                /*
                 * Remove the mapping if new protection is NONE
-                * or if write-protecting a kernel mapping.
                 */
-               if (remove || pmap == kernel_pmap) {
-                       /*
-                        * Remove the mapping, collecting dirty bits.
-                        */
-                       pmap_update_pte(pte, *pte, *pte & ~INTEL_PTE_VALID);
-                       PMAP_UPDATE_TLBS(pmap, vaddr, vaddr+PAGE_SIZE);
-                       pmap_phys_attributes[pai] |=
-                               *pte & (PHYS_MODIFIED|PHYS_REFERENCED);
-                       pmap_store_pte(pte, 0);
+               if (remove) {
 
-#if TESTING
-                       if (pmap->stats.resident_count < 1)
-                               panic("pmap_page_protect: resident_count");
-#endif
-                       assert(pmap->stats.resident_count >= 1);
-                       OSAddAtomic(-1,  &pmap->stats.resident_count);
+                       /* Remove per-pmap wired count */
+                       if (iswired(*pte)) {
+                               OSAddAtomic(-1, &pmap->stats.wired_count);
+                               pmap_ledger_debit(pmap, task_ledgers.wired_mem, PAGE_SIZE);
+                       }
+
+                       if (pmap != kernel_pmap &&
+                           (options & PMAP_OPTIONS_COMPRESSOR) &&
+                           IS_INTERNAL_PAGE(pai)) {
+                               assert(!PTE_IS_COMPRESSED(*pte));
+                               /* mark this PTE as having been "compressed" */
+                               new_pte_value = PTE_COMPRESSED;
+                               if (IS_ALTACCT_PAGE(pai, pv_e)) {
+                                       new_pte_value |= PTE_COMPRESSED_ALT;
+                               }
+                       } else {
+                               new_pte_value = 0;
+                       }
+
+                       if (options & PMAP_OPTIONS_NOREFMOD) {
+                               pmap_store_pte(pte, new_pte_value);
+
+                               if (options & PMAP_OPTIONS_NOFLUSH)
+                                       PMAP_UPDATE_TLBS_DELAYED(pmap, vaddr, vaddr + PAGE_SIZE, (pmap_flush_context *)arg);
+                               else
+                                       PMAP_UPDATE_TLBS(pmap, vaddr, vaddr + PAGE_SIZE);
+                       } else {
+                               /*
+                                * Remove the mapping, collecting dirty bits.
+                                */
+                               pmap_update_pte(pte, PTE_VALID_MASK(is_ept), 0);
+
+                               PMAP_UPDATE_TLBS(pmap, vaddr, vaddr+PAGE_SIZE);
+                               if (!is_ept) {
+                                       pmap_phys_attributes[pai] |=
+                                               *pte & (PHYS_MODIFIED|PHYS_REFERENCED);
+                               } else {
+                                       pmap_phys_attributes[pai] |=
+                                               ept_refmod_to_physmap((*pte & (INTEL_EPT_REF | INTEL_EPT_MOD))) & (PHYS_MODIFIED | PHYS_REFERENCED);
+                               }
+                               if ((options &
+                                    PMAP_OPTIONS_COMPRESSOR_IFF_MODIFIED) &&
+                                   IS_INTERNAL_PAGE(pai) &&
+                                   (pmap_phys_attributes[pai] &
+                                    PHYS_MODIFIED)) {
+                                       /*
+                                        * Page is actually "modified" and
+                                        * will be compressed.  Start
+                                        * accounting for it as "compressed".
+                                        */
+                                       assert(!(options & PMAP_OPTIONS_COMPRESSOR));
+                                       options &= ~PMAP_OPTIONS_COMPRESSOR_IFF_MODIFIED;
+                                       options |= PMAP_OPTIONS_COMPRESSOR;
+                                       assert(new_pte_value == 0);
+                                       if (pmap != kernel_pmap) {
+                                               new_pte_value = PTE_COMPRESSED;
+                                               if (IS_ALTACCT_PAGE(pai, pv_e)) {
+                                                       new_pte_value |= PTE_COMPRESSED_ALT;
+                                               }
+                                       }
+                               }
+                               pmap_store_pte(pte, new_pte_value);
+                       }
+
+#if TESTING
+                       if (pmap->stats.resident_count < 1)
+                               panic("pmap_page_protect: resident_count");
+#endif
+                       pmap_ledger_debit(pmap, task_ledgers.phys_mem, PAGE_SIZE);
+                       assert(pmap->stats.resident_count >= 1);
+                       OSAddAtomic(-1,  &pmap->stats.resident_count);
+
+                       /*
+                        * We only ever compress internal pages.
+                        */
+                       if (options & PMAP_OPTIONS_COMPRESSOR) {
+                               assert(IS_INTERNAL_PAGE(pai));
+                       }
+                       if (pmap != kernel_pmap) {
+                               /* update pmap stats */
+                               if (IS_REUSABLE_PAGE(pai)) {
+                                       assert(pmap->stats.reusable > 0);
+                                       OSAddAtomic(-1, &pmap->stats.reusable);
+                               } else if (IS_INTERNAL_PAGE(pai)) {
+                                       assert(pmap->stats.internal > 0);
+                                       OSAddAtomic(-1, &pmap->stats.internal);
+                               } else {
+                                       assert(pmap->stats.external > 0);
+                                       OSAddAtomic(-1, &pmap->stats.external);
+                               }
+                               if ((options & PMAP_OPTIONS_COMPRESSOR) &&
+                                   IS_INTERNAL_PAGE(pai)) {
+                                       /* adjust "compressed" stats */
+                                       OSAddAtomic64(+1, &pmap->stats.compressed);
+                                       PMAP_STATS_PEAK(pmap->stats.compressed);
+                                       pmap->stats.compressed_lifetime++;
+                               }
+
+                               /* update ledgers */
+                               if (IS_ALTACCT_PAGE(pai, pv_e)) {
+                                       assert(IS_INTERNAL_PAGE(pai));
+                                       pmap_ledger_debit(pmap, task_ledgers.internal, PAGE_SIZE);
+                                       pmap_ledger_debit(pmap, task_ledgers.alternate_accounting, PAGE_SIZE);
+                                       if (options & PMAP_OPTIONS_COMPRESSOR) {
+                                               pmap_ledger_credit(pmap, task_ledgers.internal_compressed, PAGE_SIZE);
+                                               pmap_ledger_credit(pmap, task_ledgers.alternate_accounting_compressed, PAGE_SIZE);
+                                       }
+                               } else if (IS_REUSABLE_PAGE(pai)) {
+                                       assert(!IS_ALTACCT_PAGE(pai, pv_e));
+                                       assert(IS_INTERNAL_PAGE(pai));
+                                       if (options & PMAP_OPTIONS_COMPRESSOR) {
+                                               pmap_ledger_credit(pmap, task_ledgers.internal_compressed, PAGE_SIZE);
+                                               /* was not in footprint, but is now */
+                                               pmap_ledger_credit(pmap, task_ledgers.phys_footprint, PAGE_SIZE);
+                                       }
+                               } else if (IS_INTERNAL_PAGE(pai)) {
+                                       assert(!IS_ALTACCT_PAGE(pai, pv_e));
+                                       assert(!IS_REUSABLE_PAGE(pai));
+                                       pmap_ledger_debit(pmap, task_ledgers.internal, PAGE_SIZE);
+                                       /*
+                                        * Update all stats related to physical
+                                        * footprint, which only deals with
+                                        * internal pages.
+                                        */
+                                       if (options & PMAP_OPTIONS_COMPRESSOR) {
+                                               /*
+                                                * This removal is only being
+                                                * done so we can send this page
+                                                * to the compressor;  therefore
+                                                * it mustn't affect total task
+                                                * footprint.
+                                                */
+                                               pmap_ledger_credit(pmap, task_ledgers.internal_compressed, PAGE_SIZE);
+                                       } else {
+                                               /*
+                                                * This internal page isn't
+                                                * going to the compressor,
+                                                * so adjust stats to keep 
+                                                * phys_footprint up to date.
+                                                */
+                                               pmap_ledger_debit(pmap, task_ledgers.phys_footprint, PAGE_SIZE);
+                                       }
+                               }
+                       }
 
                        /*
                         * Deal with the pv_rooted_entry.
@@ -1117,8 +1808,6 @@ pmap_page_protect(
                                 * Fix up head later.
                                 */
                                pv_h->pmap = PMAP_NULL;
-
-                               pmap_phys_attributes[pai] &= ~PHYS_NOENCRYPT;
                        } else {
                                /*
                                 * Delete this entry.
@@ -1133,10 +1822,21 @@ pmap_page_protect(
                        }
                } else {
                        /*
-                        * Write-protect.
+                        * Write-protect, after opportunistic refmod collect
                         */
-                       pmap_update_pte(pte, *pte, *pte & ~INTEL_PTE_WRITE);
-                       PMAP_UPDATE_TLBS(pmap, vaddr, vaddr+PAGE_SIZE);
+                       if (!is_ept) {
+                               pmap_phys_attributes[pai] |=
+                                       *pte & (PHYS_MODIFIED|PHYS_REFERENCED);
+                       } else {
+                               pmap_phys_attributes[pai] |=
+                                       ept_refmod_to_physmap((*pte & (INTEL_EPT_REF | INTEL_EPT_MOD))) & (PHYS_MODIFIED | PHYS_REFERENCED);
+                       }
+                       pmap_update_pte(pte, PTE_WRITE(is_ept), 0);
+
+                       if (options & PMAP_OPTIONS_NOFLUSH)
+                               PMAP_UPDATE_TLBS_DELAYED(pmap, vaddr, vaddr + PAGE_SIZE, (pmap_flush_context *)arg);
+                       else
+                               PMAP_UPDATE_TLBS(pmap, vaddr, vaddr+PAGE_SIZE);
                }
                pvh_e = nexth;
        } while ((pv_e = (pv_rooted_entry_t) nexth) != pv_h);
@@ -1151,7 +1851,7 @@ pmap_page_protect(
                if (pvh_e != (pv_hashed_entry_t) pv_h) {
                        pv_hash_remove(pvh_e);
                        pv_h->pmap = pvh_e->pmap;
-                       pv_h->va = pvh_e->va;
+                       pv_h->va_and_flags = pvh_e->va_and_flags;
                        pvh_e->qlink.next = (queue_entry_t) pvh_eh;
                        pvh_eh = pvh_e;
 
@@ -1166,184 +1866,629 @@ pmap_page_protect(
 done:
        UNLOCK_PVH(pai);
 
-       PMAP_TRACE(PMAP_CODE(PMAP__PAGE_PROTECT) | DBG_FUNC_END,
-                  0, 0, 0, 0, 0);
+       PMAP_TRACE(PMAP_CODE(PMAP__PAGE_PROTECT) | DBG_FUNC_END);
 }
 
-__private_extern__ void
-pmap_pagetable_corruption_msg_log(int (*log_func)(const char * fmt, ...)__printflike(1,2)) {
-       if (pmap_pagetable_corruption_incidents > 0) {
-               int i, e = MIN(pmap_pagetable_corruption_incidents, PMAP_PAGETABLE_CORRUPTION_MAX_LOG);
-               (*log_func)("%u pagetable corruption incident(s) detected, timeout: %u\n", pmap_pagetable_corruption_incidents, pmap_pagetable_corruption_timeout);
-               for (i = 0; i < e; i++) {
-                       (*log_func)("Incident 0x%x, reason: 0x%x, action: 0x%x, time: 0x%llx\n", pmap_pagetable_corruption_records[i].incident,  pmap_pagetable_corruption_records[i].reason, pmap_pagetable_corruption_records[i].action, pmap_pagetable_corruption_records[i].abstime);
-               }
-       }
-}
 
+/*
+ *     Clear specified attribute bits.
+ */
 void
-mapping_free_prime(void)
+phys_attribute_clear(
+       ppnum_t         pn,
+       int             bits,
+       unsigned int    options,
+       void            *arg)
 {
-       int                     i;
-       pv_hashed_entry_t       pvh_e;
-       pv_hashed_entry_t       pvh_eh;
-       pv_hashed_entry_t       pvh_et;
-       int                     pv_cnt;
+       pv_rooted_entry_t       pv_h;
+       pv_hashed_entry_t       pv_e;
+       pt_entry_t              *pte = NULL;
+       int                     pai;
+       pmap_t                  pmap;
+       char                    attributes = 0;
+       boolean_t               is_internal, is_reusable, is_altacct, is_ept;
+       int                     ept_bits_to_clear;
+       boolean_t               ept_keep_global_mod = FALSE;
+
+       if ((bits & PHYS_MODIFIED) &&
+           (options & PMAP_OPTIONS_NOFLUSH) &&
+           arg == NULL) {
+               panic("phys_attribute_clear(0x%x,0x%x,0x%x,%p): "
+                     "should not clear 'modified' without flushing TLBs\n",
+                     pn, bits, options, arg);
+       }
+
+       /* We only support converting MOD and REF bits for EPT PTEs in this function */
+       assert((bits & ~(PHYS_REFERENCED | PHYS_MODIFIED)) == 0);
 
-       pv_cnt = 0;
-       pvh_eh = pvh_et = PV_HASHED_ENTRY_NULL;
-       for (i = 0; i < (5 * PV_HASHED_ALLOC_CHUNK); i++) {
-               pvh_e = (pv_hashed_entry_t) zalloc(pv_hashed_list_zone);
+       ept_bits_to_clear = (unsigned)physmap_refmod_to_ept(bits & (PHYS_MODIFIED | PHYS_REFERENCED));
 
-               pvh_e->qlink.next = (queue_entry_t)pvh_eh;
-               pvh_eh = pvh_e;
+       pmap_intr_assert();
+       assert(pn != vm_page_fictitious_addr);
+       if (pn == vm_page_guard_addr)
+               return;
+
+       pai = ppn_to_pai(pn);
 
-               if (pvh_et == PV_HASHED_ENTRY_NULL)
-                       pvh_et = pvh_e;
-               pv_cnt++;
+       if (!IS_MANAGED_PAGE(pai)) {
+               /*
+                *      Not a managed page.
+                */
+               return;
        }
-       PV_HASHED_FREE_LIST(pvh_eh, pvh_et, pv_cnt);
 
-       pv_cnt = 0;
-       pvh_eh = pvh_et = PV_HASHED_ENTRY_NULL;
-       for (i = 0; i < PV_HASHED_KERN_ALLOC_CHUNK; i++) {
-               pvh_e = (pv_hashed_entry_t) zalloc(pv_hashed_list_zone);
+       PMAP_TRACE(PMAP_CODE(PMAP__ATTRIBUTE_CLEAR) | DBG_FUNC_START, pn, bits);
+
+       pv_h = pai_to_pvh(pai);
+
+       LOCK_PVH(pai);
+
+
+       /*
+        * Walk down PV list, clearing all modify or reference bits.
+        * We do not have to lock the pv_list because we have
+        * the per-pmap lock
+        */
+       if (pv_h->pmap != PMAP_NULL) {
+               /*
+                * There are some mappings.
+                */
+
+               is_internal = IS_INTERNAL_PAGE(pai);
+               is_reusable = IS_REUSABLE_PAGE(pai);
+
+               pv_e = (pv_hashed_entry_t)pv_h;
+
+               do {
+                       vm_map_offset_t va;
+                       char pte_bits;
+
+                       pmap = pv_e->pmap;
+                       is_ept = is_ept_pmap(pmap);
+                       is_altacct = IS_ALTACCT_PAGE(pai, pv_e);
+                       va = PVE_VA(pv_e);
+                       pte_bits = 0;
+
+                       if (bits) {
+                               pte = pmap_pte(pmap, va);
+                               /* grab ref/mod bits from this PTE */
+                               pte_bits = (*pte & (PTE_REF(is_ept) | PTE_MOD(is_ept)));
+                               /* propagate to page's global attributes */
+                               if (!is_ept) {
+                                       attributes |= pte_bits;
+                               } else {
+                                       attributes |= ept_refmod_to_physmap(pte_bits);
+                                       if (!pmap_ept_support_ad && (pte_bits & INTEL_EPT_MOD)) {
+                                               ept_keep_global_mod = TRUE;
+                                       }
+                               }
+                               /* which bits to clear for this PTE? */
+                               if (!is_ept) {
+                                       pte_bits &= bits;
+                               } else {
+                                       pte_bits &= ept_bits_to_clear;
+                               }
+                       }
+
+                        /*
+                         * Clear modify and/or reference bits.
+                         */
+                       if (pte_bits) {
+                               pmap_update_pte(pte, bits, 0);
+
+                               /* Ensure all processors using this translation
+                                * invalidate this TLB entry. The invalidation
+                                * *must* follow the PTE update, to ensure that
+                                * the TLB shadow of the 'D' bit (in particular)
+                                * is synchronized with the updated PTE.
+                                */
+                               if (! (options & PMAP_OPTIONS_NOFLUSH)) {
+                                       /* flush TLBS now */
+                                       PMAP_UPDATE_TLBS(pmap,
+                                                        va,
+                                                        va + PAGE_SIZE);
+                               } else if (arg) {
+                                       /* delayed TLB flush: add "pmap" info */
+                                       PMAP_UPDATE_TLBS_DELAYED(
+                                               pmap,
+                                               va,
+                                               va + PAGE_SIZE,
+                                               (pmap_flush_context *)arg);
+                               } else {
+                                       /* no TLB flushing at all */
+                               }
+                       }
+
+                       /* update pmap "reusable" stats */
+                       if ((options & PMAP_OPTIONS_CLEAR_REUSABLE) &&
+                           is_reusable &&
+                           pmap != kernel_pmap) {
+                               /* one less "reusable" */
+                               assert(pmap->stats.reusable > 0);
+                               OSAddAtomic(-1, &pmap->stats.reusable);
+                               if (is_internal) {
+                                       /* one more "internal" */
+                                       OSAddAtomic(+1, &pmap->stats.internal);
+                                       PMAP_STATS_PEAK(pmap->stats.internal);
+                                       assert(pmap->stats.internal > 0);
+                                       if (is_altacct) {
+                                               /* no impact on ledgers */
+                                       } else {
+                                               pmap_ledger_credit(pmap,
+                                                                  task_ledgers.internal,
+                                                                  PAGE_SIZE);
+                                               pmap_ledger_credit(
+                                                       pmap,
+                                                       task_ledgers.phys_footprint,
+                                                       PAGE_SIZE);
+                                       }
+                               } else {
+                                       /* one more "external" */
+                                       OSAddAtomic(+1, &pmap->stats.external);
+                                       PMAP_STATS_PEAK(pmap->stats.external);
+                                       assert(pmap->stats.external > 0);
+                               }
+                       } else if ((options & PMAP_OPTIONS_SET_REUSABLE) &&
+                                  !is_reusable &&
+                                  pmap != kernel_pmap) {
+                               /* one more "reusable" */
+                               OSAddAtomic(+1, &pmap->stats.reusable);
+                               PMAP_STATS_PEAK(pmap->stats.reusable);
+                               assert(pmap->stats.reusable > 0);
+                               if (is_internal) {
+                                       /* one less "internal" */
+                                       assert(pmap->stats.internal > 0);
+                                       OSAddAtomic(-1, &pmap->stats.internal);
+                                       if (is_altacct) {
+                                               /* no impact on footprint */
+                                       } else {
+                                               pmap_ledger_debit(pmap,
+                                                                 task_ledgers.internal,
+                                                                 PAGE_SIZE);
+                                               pmap_ledger_debit(
+                                                       pmap,
+                                                       task_ledgers.phys_footprint,
+                                                       PAGE_SIZE);
+                                       }
+                               } else {
+                                       /* one less "external" */
+                                       assert(pmap->stats.external > 0);
+                                       OSAddAtomic(-1, &pmap->stats.external);
+                               }
+                       }
 
-               pvh_e->qlink.next = (queue_entry_t)pvh_eh;
-               pvh_eh = pvh_e;
+                       pv_e = (pv_hashed_entry_t)queue_next(&pv_e->qlink);
 
-               if (pvh_et == PV_HASHED_ENTRY_NULL)
-                       pvh_et = pvh_e;
-               pv_cnt++;
+               } while (pv_e != (pv_hashed_entry_t)pv_h);
        }
-       PV_HASHED_KERN_FREE_LIST(pvh_eh, pvh_et, pv_cnt);
+       /* Opportunistic refmod collection, annulled
+        * if both REF and MOD are being cleared.
+        */
 
-}
+       pmap_phys_attributes[pai] |= attributes;
 
-static inline void
-pmap_pagetable_corruption_log_setup(void) {
-       if (pmap_pagetable_corruption_log_call == NULL) {
-               nanotime_to_absolutetime(PMAP_PAGETABLE_CORRUPTION_INTERVAL, 0, &pmap_pagetable_corruption_interval_abstime);
-               thread_call_setup(&pmap_pagetable_corruption_log_call_data,
-                   (thread_call_func_t) pmap_pagetable_corruption_msg_log,
-                   (thread_call_param_t) &printf);
-               pmap_pagetable_corruption_log_call = &pmap_pagetable_corruption_log_call_data;
+       if (ept_keep_global_mod) {
+               /*
+                * If the hardware doesn't support AD bits for EPT PTEs and someone is
+                * requesting that we clear the modified bit for a phys page, we need
+                * to ensure that there are no EPT mappings for the page with the
+                * modified bit set. If there are, we cannot clear the global modified bit.
+                */
+               bits &= ~PHYS_MODIFIED;
+       }
+       pmap_phys_attributes[pai] &= ~(bits);
+
+       /* update this page's "reusable" status */
+       if (options & PMAP_OPTIONS_CLEAR_REUSABLE) {
+               pmap_phys_attributes[pai] &= ~PHYS_REUSABLE;
+       } else if (options & PMAP_OPTIONS_SET_REUSABLE) {
+               pmap_phys_attributes[pai] |= PHYS_REUSABLE;
        }
+
+       UNLOCK_PVH(pai);
+
+       PMAP_TRACE(PMAP_CODE(PMAP__ATTRIBUTE_CLEAR) | DBG_FUNC_END);
 }
 
-void
-mapping_adjust(void)
+/*
+ *     Check specified attribute bits.
+ */
+int
+phys_attribute_test(
+       ppnum_t         pn,
+       int             bits)
 {
-       pv_hashed_entry_t       pvh_e;
-       pv_hashed_entry_t       pvh_eh;
-       pv_hashed_entry_t       pvh_et;
-       int                     pv_cnt;
-       int                     i;
-
-       if (mapping_adjust_call == NULL) {
-               thread_call_setup(&mapping_adjust_call_data,
-                                 (thread_call_func_t) mapping_adjust,
-                                 (thread_call_param_t) NULL);
-               mapping_adjust_call = &mapping_adjust_call_data;
+       pv_rooted_entry_t       pv_h;
+       pv_hashed_entry_t       pv_e;
+       pt_entry_t              *pte;
+       int                     pai;
+       pmap_t                  pmap;
+       int                     attributes = 0;
+       boolean_t               is_ept;
+
+       pmap_intr_assert();
+       assert(pn != vm_page_fictitious_addr);
+       assert((bits & ~(PHYS_MODIFIED | PHYS_REFERENCED)) == 0);
+       if (pn == vm_page_guard_addr)
+               return 0;
+
+       pai = ppn_to_pai(pn);
+
+       if (!IS_MANAGED_PAGE(pai)) {
+               /*
+                *      Not a managed page.
+                */
+               return 0;
        }
 
-       pmap_pagetable_corruption_log_setup();
+       /*
+        * Fast check...  if bits already collected
+        * no need to take any locks...
+        * if not set, we need to recheck after taking
+        * the lock in case they got pulled in while
+        * we were waiting for the lock
+        */
+       if ((pmap_phys_attributes[pai] & bits) == bits)
+               return bits;
+
+       pv_h = pai_to_pvh(pai);
 
-       pv_cnt = 0;
-       pvh_eh = pvh_et = PV_HASHED_ENTRY_NULL;
-       if (pv_hashed_kern_free_count < PV_HASHED_KERN_LOW_WATER_MARK) {
-               for (i = 0; i < PV_HASHED_KERN_ALLOC_CHUNK; i++) {
-                       pvh_e = (pv_hashed_entry_t) zalloc(pv_hashed_list_zone);
+       LOCK_PVH(pai);
 
-                       pvh_e->qlink.next = (queue_entry_t)pvh_eh;
-                       pvh_eh = pvh_e;
+       attributes = pmap_phys_attributes[pai] & bits;
 
-                       if (pvh_et == PV_HASHED_ENTRY_NULL)
-                               pvh_et = pvh_e;
-                       pv_cnt++;
-               }
-               PV_HASHED_KERN_FREE_LIST(pvh_eh, pvh_et, pv_cnt);
+
+       /*
+        * Walk down PV list, checking the mappings until we
+        * reach the end or we've found the desired attributes.
+        */
+       if (attributes != bits &&
+           pv_h->pmap != PMAP_NULL) {
+               /*
+                * There are some mappings.
+                */
+               pv_e = (pv_hashed_entry_t)pv_h;
+               do {
+                       vm_map_offset_t va;
+
+                       pmap = pv_e->pmap;
+                       is_ept = is_ept_pmap(pmap);
+                       va = PVE_VA(pv_e);
+                       /*
+                        * pick up modify and/or reference bits from mapping
+                        */
+
+                       pte = pmap_pte(pmap, va);
+                       if (!is_ept) {
+                               attributes |= (int)(*pte & bits);
+                       } else {
+                               attributes |= (int)(ept_refmod_to_physmap((*pte & (INTEL_EPT_REF | INTEL_EPT_MOD))) & (PHYS_MODIFIED | PHYS_REFERENCED));
+
+                       }
+
+                       pv_e = (pv_hashed_entry_t)queue_next(&pv_e->qlink);
+
+               } while ((attributes != bits) &&
+                        (pv_e != (pv_hashed_entry_t)pv_h));
        }
+       pmap_phys_attributes[pai] |= attributes;
 
-       pv_cnt = 0;
-       pvh_eh = pvh_et = PV_HASHED_ENTRY_NULL;
-       if (pv_hashed_free_count < PV_HASHED_LOW_WATER_MARK) {
-               for (i = 0; i < PV_HASHED_ALLOC_CHUNK; i++) {
-                       pvh_e = (pv_hashed_entry_t) zalloc(pv_hashed_list_zone);
+       UNLOCK_PVH(pai);
+       return (attributes);
+}
 
-                       pvh_e->qlink.next = (queue_entry_t)pvh_eh;
-                       pvh_eh = pvh_e;
+/*
+ *     Routine:        pmap_change_wiring
+ *     Function:       Change the wiring attribute for a map/virtual-address
+ *                     pair.
+ *     In/out conditions:
+ *                     The mapping must already exist in the pmap.
+ */
+void
+pmap_change_wiring(
+       pmap_t          map,
+       vm_map_offset_t vaddr,
+       boolean_t       wired)
+{
+       pt_entry_t      *pte;
 
-                       if (pvh_et == PV_HASHED_ENTRY_NULL)
-                               pvh_et = pvh_e;
-                       pv_cnt++;
-               }
-               PV_HASHED_FREE_LIST(pvh_eh, pvh_et, pv_cnt);
+       PMAP_LOCK(map);
+
+       if ((pte = pmap_pte(map, vaddr)) == PT_ENTRY_NULL)
+               panic("pmap_change_wiring(%p,0x%llx,%d): pte missing",
+                     map, vaddr, wired);
+
+       if (wired && !iswired(*pte)) {
+               /*
+                * wiring down mapping
+                */
+               pmap_ledger_credit(map, task_ledgers.wired_mem, PAGE_SIZE);
+               OSAddAtomic(+1,  &map->stats.wired_count);
+               pmap_update_pte(pte, 0, PTE_WIRED);
+       }
+       else if (!wired && iswired(*pte)) {
+               /*
+                * unwiring mapping
+                */
+               assert(map->stats.wired_count >= 1);
+               OSAddAtomic(-1,  &map->stats.wired_count);
+               pmap_ledger_debit(map, task_ledgers.wired_mem, PAGE_SIZE);
+               pmap_update_pte(pte, PTE_WIRED, 0);
        }
-       mappingrecurse = 0;
+
+       PMAP_UNLOCK(map);
 }
 
+/*
+ *     "Backdoor" direct map routine for early mappings.
+ *     Useful for mapping memory outside the range
+ *      Sets A, D and NC if requested
+ */
 
-boolean_t
-pmap_is_noencrypt(ppnum_t pn)
+vm_offset_t
+pmap_map_bd(
+       vm_offset_t     virt,
+       vm_map_offset_t start_addr,
+       vm_map_offset_t end_addr,
+       vm_prot_t       prot,
+       unsigned int    flags)
 {
-       int             pai;
+       pt_entry_t      template;
+       pt_entry_t      *ptep;
 
-       pai = ppn_to_pai(pn);
+       vm_offset_t     base = virt;
+       boolean_t       doflush = FALSE;
+
+       template = pa_to_pte(start_addr)
+               | INTEL_PTE_REF
+               | INTEL_PTE_MOD
+               | INTEL_PTE_WIRED
+               | INTEL_PTE_VALID;
+
+       if ((flags & (VM_MEM_NOT_CACHEABLE | VM_WIMG_USE_DEFAULT)) == VM_MEM_NOT_CACHEABLE) {
+               template |= INTEL_PTE_NCACHE;
+               if (!(flags & (VM_MEM_GUARDED)))
+                       template |= INTEL_PTE_PTA;
+       }
 
-       if (!IS_MANAGED_PAGE(pai))
-               return (TRUE);
+       if ((prot & VM_PROT_EXECUTE) == 0)
+               template |= INTEL_PTE_NX;
 
-       if (pmap_phys_attributes[pai] & PHYS_NOENCRYPT)
-               return (TRUE);
+       if (prot & VM_PROT_WRITE)
+               template |= INTEL_PTE_WRITE;
 
-       return (FALSE);
+       while (start_addr < end_addr) {
+               ptep = pmap_pte(kernel_pmap, (vm_map_offset_t)virt);
+               if (ptep == PT_ENTRY_NULL) {
+                       panic("pmap_map_bd: Invalid kernel address");
+               }
+               if (pte_to_pa(*ptep)) {
+                       doflush = TRUE;
+               }
+               pmap_store_pte(ptep, template);
+               pte_increment_pa(template);
+               virt += PAGE_SIZE;
+               start_addr += PAGE_SIZE;
+       }
+       if (doflush) {
+               flush_tlb_raw();
+               PMAP_UPDATE_TLBS(kernel_pmap, base, base + end_addr - start_addr);
+       }
+       return(virt);
 }
 
+/* Create a virtual alias beginning at 'ava' of the specified kernel virtual
+ * range. The aliased pagetable range is expanded if
+ * PMAP_EXPAND_OPTIONS_ALIASMAP is specified. Performs no synchronization,
+ * assumes caller has stabilized the source and destination ranges. Currently
+ * used to populate sections of the trampoline "doublemap" at CPU startup.
+ */
 
 void
-pmap_set_noencrypt(ppnum_t pn)
+pmap_alias(
+       vm_offset_t     ava,
+       vm_map_offset_t start_addr,
+       vm_map_offset_t end_addr,
+       vm_prot_t       prot,
+       unsigned int    eoptions)
 {
-       int             pai;
+       pt_entry_t      prot_template, template;
+       pt_entry_t      *aptep, *sptep;
 
-       pai = ppn_to_pai(pn);
+       prot_template =  INTEL_PTE_REF | INTEL_PTE_MOD | INTEL_PTE_WIRED | INTEL_PTE_VALID;
+       if ((prot & VM_PROT_EXECUTE) == 0)
+               prot_template |= INTEL_PTE_NX;
 
-       if (IS_MANAGED_PAGE(pai)) {
-               LOCK_PVH(pai);
+       if (prot & VM_PROT_WRITE)
+               prot_template |= INTEL_PTE_WRITE;
+       assert(((start_addr | end_addr) & PAGE_MASK) == 0);
+       while (start_addr < end_addr) {
+               aptep = pmap_pte(kernel_pmap, (vm_map_offset_t)ava);
+               if (aptep == PT_ENTRY_NULL) {
+                       if (eoptions & PMAP_EXPAND_OPTIONS_ALIASMAP) {
+                               pmap_expand(kernel_pmap, ava, PMAP_EXPAND_OPTIONS_ALIASMAP);
+                               aptep = pmap_pte(kernel_pmap, (vm_map_offset_t)ava);
+                       } else {
+                               panic("pmap_alias: Invalid alias address");
+                       }
+               }
+               /* The aliased range should not have any active mappings */
+               assert(pte_to_pa(*aptep) == 0);
 
-               pmap_phys_attributes[pai] |= PHYS_NOENCRYPT;
+               sptep = pmap_pte(kernel_pmap, start_addr);
+               assert(sptep != PT_ENTRY_NULL && (pte_to_pa(*sptep) != 0));
+               template = pa_to_pte(pte_to_pa(*sptep)) | prot_template;
+               pmap_store_pte(aptep, template);
 
-               UNLOCK_PVH(pai);
+               ava += PAGE_SIZE;
+               start_addr += PAGE_SIZE;
        }
 }
 
-
-void
-pmap_clear_noencrypt(ppnum_t pn)
+mach_vm_size_t
+pmap_query_resident(
+       pmap_t          pmap,
+       addr64_t        s64,
+       addr64_t        e64,
+       mach_vm_size_t  *compressed_bytes_p)
 {
-       int             pai;
+       pt_entry_t     *pde;
+       pt_entry_t     *spte, *epte;
+       addr64_t        l64;
+       uint64_t        deadline;
+       mach_vm_size_t  resident_bytes;
+       mach_vm_size_t  compressed_bytes;
+       boolean_t       is_ept;
 
-       pai = ppn_to_pai(pn);
+       pmap_intr_assert();
 
-       if (IS_MANAGED_PAGE(pai)) {
-               LOCK_PVH(pai);
+       if (pmap == PMAP_NULL || pmap == kernel_pmap || s64 == e64) {
+               if (compressed_bytes_p) {
+                       *compressed_bytes_p = 0;
+               }
+               return 0;
+       }
 
-               pmap_phys_attributes[pai] &= ~PHYS_NOENCRYPT;
+       is_ept = is_ept_pmap(pmap);
 
-               UNLOCK_PVH(pai);
+       PMAP_TRACE(PMAP_CODE(PMAP__QUERY_RESIDENT) | DBG_FUNC_START,
+                  VM_KERNEL_ADDRHIDE(pmap), VM_KERNEL_ADDRHIDE(s64),
+                  VM_KERNEL_ADDRHIDE(e64));
+
+       resident_bytes = 0;
+       compressed_bytes = 0;
+
+       PMAP_LOCK(pmap);
+
+       deadline = rdtsc64() + max_preemption_latency_tsc;
+
+       while (s64 < e64) {
+               l64 = (s64 + pde_mapped_size) & ~(pde_mapped_size - 1);
+               if (l64 > e64)
+                       l64 = e64;
+               pde = pmap_pde(pmap, s64);
+
+               if (pde && (*pde & PTE_VALID_MASK(is_ept))) {
+                       if (*pde & PTE_PS) {
+                               /* superpage: not supported */
+                       } else {
+                               spte = pmap_pte(pmap,
+                                               (s64 & ~(pde_mapped_size - 1)));
+                               spte = &spte[ptenum(s64)];
+                               epte = &spte[intel_btop(l64 - s64)];
+
+                               for (; spte < epte; spte++) {
+                                       if (pte_to_pa(*spte) != 0) {
+                                               resident_bytes += PAGE_SIZE;
+                                       } else if (*spte & PTE_COMPRESSED) {
+                                               compressed_bytes += PAGE_SIZE;
+                                       }
+                               }
+
+                       }
+               }
+               s64 = l64;
+
+               if (s64 < e64 && rdtsc64() >= deadline) {
+                       PMAP_UNLOCK(pmap);
+                       PMAP_LOCK(pmap);
+                       deadline = rdtsc64() + max_preemption_latency_tsc;
+               }
+       }
+
+       PMAP_UNLOCK(pmap);
+
+       PMAP_TRACE(PMAP_CODE(PMAP__QUERY_RESIDENT) | DBG_FUNC_END,
+                  resident_bytes);
+
+       if (compressed_bytes_p) {
+               *compressed_bytes_p = compressed_bytes;
        }
+       return resident_bytes;
 }
 
-void x86_filter_TLB_coherency_interrupts(boolean_t dofilter) {
-       assert(ml_get_interrupts_enabled() == 0 || get_preemption_level() != 0);
+kern_return_t
+pmap_query_page_info(
+       pmap_t          pmap,
+       vm_map_offset_t va,
+       int             *disp_p)
+{
+       int             disp;
+       boolean_t       is_ept;
+       pmap_paddr_t    pa;
+       ppnum_t         pai;
+       pd_entry_t      *pde;
+       pt_entry_t      *pte;
 
-       if (dofilter) {
-               CPU_CR3_MARK_INACTIVE();
+       pmap_intr_assert();
+       if (pmap == PMAP_NULL || pmap == kernel_pmap) {
+               *disp_p = 0;
+               return KERN_INVALID_ARGUMENT;
+       }
+
+       disp = 0;
+       is_ept = is_ept_pmap(pmap);
+
+       PMAP_LOCK(pmap);
+
+       pde = pmap_pde(pmap, va);
+       if (!pde ||
+           !(*pde & PTE_VALID_MASK(is_ept)) ||
+           (*pde & PTE_PS)) {
+               goto done;
+       }
+
+       pte = pmap_pte(pmap, va);
+       if (pte == PT_ENTRY_NULL) {
+               goto done;
+       }
+
+       pa = pte_to_pa(*pte);
+       if (pa == 0) {
+               if (PTE_IS_COMPRESSED(*pte)) {
+                       disp |= PMAP_QUERY_PAGE_COMPRESSED;
+                       if (*pte & PTE_COMPRESSED_ALT) {
+                               disp |= PMAP_QUERY_PAGE_COMPRESSED_ALTACCT;
+                       }
+               }
        } else {
-               CPU_CR3_MARK_ACTIVE();
-               __asm__ volatile("mfence");
-               if (current_cpu_datap()->cpu_tlb_invalid)
-                       process_pmap_updates();
+               disp |= PMAP_QUERY_PAGE_PRESENT;
+               pai = pa_index(pa);
+               if (!IS_MANAGED_PAGE(pai)) {
+               } else if (pmap_pv_is_altacct(pmap, va, pai)) {
+                       assert(IS_INTERNAL_PAGE(pai));
+                       disp |= PMAP_QUERY_PAGE_INTERNAL;
+                       disp |= PMAP_QUERY_PAGE_ALTACCT;
+               } else if (IS_REUSABLE_PAGE(pai)) {
+                       disp |= PMAP_QUERY_PAGE_REUSABLE;
+               } else if (IS_INTERNAL_PAGE(pai)) {
+                       disp |= PMAP_QUERY_PAGE_INTERNAL;
+               }
        }
+
+done:
+       PMAP_UNLOCK(pmap);
+       *disp_p = disp;
+       return KERN_SUCCESS;
+}
+
+void pmap_set_jit_entitled(__unused pmap_t pmap)
+{
+       /* The x86 pmap layer does not care if a map has a JIT entry. */
+       return;
+}
+
+bool pmap_has_prot_policy(__unused vm_prot_t prot)
+{
+       /*
+        * The x86 pmap layer does not apply any policy to any protection
+        * types.
+        */
+       return FALSE;
+}
+
+void pmap_release_pages_fast(void)
+{
+       return;
 }