]> git.saurik.com Git - apple/xnu.git/blobdiff - osfmk/i386/pmap.h
xnu-3248.40.184.tar.gz
[apple/xnu.git] / osfmk / i386 / pmap.h
index a168562c90c89fb766386611fcc58c1ff3089262..939e47174605d98a377a3a33963f02104a7b63e4 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000-2007 Apple Inc. All rights reserved.
+ * Copyright (c) 2000-2012 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -70,7 +70,6 @@
 
 #ifndef        ASSEMBLER
 
-#include <platforms.h>
 
 #include <mach/kern_return.h>
 #include <mach/machine/vm_types.h>
@@ -79,7 +78,7 @@
 #include <mach/machine/vm_param.h>
 #include <kern/kern_types.h>
 #include <kern/thread.h>
-#include <kern/lock.h>
+#include <kern/simple_lock.h>
 #include <mach/branch_predicates.h>
 
 #include <i386/mp.h>
 
 #endif /* ASSEMBLER */
 
-#define NPGPTD          4
-#define PDESHIFT        21
-#define PTEMASK         0x1ff
-#define PTEINDX         3
+#define NPGPTD          4ULL
+#define PDESHIFT        21ULL
+#define PTEMASK         0x1ffULL
+#define PTEINDX         3ULL
 
-#define PTESHIFT        12
+#define PTESHIFT        12ULL
 
 
-#define INITPT_SEG_BASE  0x100000
-#define INITGDT_SEG_BASE 0x106000
-#define SLEEP_SEG_BASE   0x107000
-
 #ifdef __x86_64__
 #define LOW_4GB_MASK   ((vm_offset_t)0x00000000FFFFFFFFUL)
 #endif
 #define NBPTD           (NPGPTD << PAGE_SHIFT)
 #define NPDEPTD         (NBPTD / (sizeof (pd_entry_t)))
 #define NPDEPG          (PAGE_SIZE/(sizeof (pd_entry_t)))
-#define NBPDE           (1 << PDESHIFT)
+#define NBPDE           (1ULL << PDESHIFT)
 #define PDEMASK         (NBPDE - 1)
 
 #define PTE_PER_PAGE   512 /* number of PTE's per page on any level */
@@ -153,7 +148,7 @@ typedef uint64_t        pdpt_entry_t;
 #define NPDPTPG         (PAGE_SIZE/(sizeof (pdpt_entry_t)))
 #define PDPTSHIFT       30
 #define PDPTPGSHIFT     9
-#define NBPDPT          (1 << PDPTSHIFT)
+#define NBPDPT          (1ULL << PDPTSHIFT)
 #define PDPTMASK        (NBPDPT-1)
 #define PDPT_ENTRY_NULL ((pdpt_entry_t *) 0)
 
@@ -161,7 +156,7 @@ typedef uint64_t        pd_entry_t;
 #define NPDPG           (PAGE_SIZE/(sizeof (pd_entry_t)))
 #define PDSHIFT         21
 #define PDPGSHIFT       9
-#define NBPD            (1 << PDSHIFT)
+#define NBPD            (1ULL << PDSHIFT)
 #define PDMASK          (NBPD-1)
 #define PD_ENTRY_NULL   ((pd_entry_t *) 0)
 
@@ -169,7 +164,7 @@ typedef uint64_t        pt_entry_t;
 #define NPTPG           (PAGE_SIZE/(sizeof (pt_entry_t)))
 #define PTSHIFT         12
 #define PTPGSHIFT       9
-#define NBPT            (1 << PTSHIFT)
+#define NBPT            (1ULL << PTSHIFT)
 #define PTMASK          (NBPT-1)
 #define PT_ENTRY_NULL  ((pt_entry_t *) 0)
 
@@ -206,110 +201,27 @@ typedef uint64_t  pmap_paddr_t;
 static inline void
 pmap_store_pte(pt_entry_t *entryp, pt_entry_t value)
 {
-#ifdef __i386__
-       /*
-        * Load the new value into %ecx:%ebx
-        * Load the old value into %edx:%eax
-        * Compare-exchange-8bytes at address entryp (loaded in %edi)
-        * If the compare succeeds, the new value will have been stored.
-        * Otherwise, the old value changed and reloaded, so try again.
-        */
-       __asm__ volatile(
-               "       movl    (%0), %%eax     \n\t"
-               "       movl    4(%0), %%edx    \n\t"
-               "1:                             \n\t"
-               "       cmpxchg8b (%0)          \n\t"
-               "       jnz 1b"
-               :
-               : "D" (entryp),
-                 "b" ((uint32_t)value),
-                 "c" ((uint32_t)(value >> 32))
-               : "eax", "edx", "memory");
-#else
        /*
         * In the 32-bit kernel a compare-and-exchange loop was
         * required to provide atomicity. For K64, life is easier:
         */
        *entryp = value;
-#endif
 }
 
-/*
- * Atomic 64-bit compare and exchange of a page table entry.
- */
-static inline boolean_t
-pmap_cmpx_pte(pt_entry_t *entryp, pt_entry_t old, pt_entry_t new)
-{
-       boolean_t               ret;
-
-#ifdef __i386__
-       /*
-        * Load the old value into %edx:%eax
-        * Load the new value into %ecx:%ebx
-        * Compare-exchange-8bytes at address entryp (loaded in %edi)
-        * If the compare succeeds, the new value is stored, return TRUE.
-        * Otherwise, no swap is made, return FALSE.
-        */
-       asm volatile(
-               "       lock; cmpxchg8b (%1)    \n\t"
-               "       setz    %%al            \n\t"
-               "       movzbl  %%al,%0"
-               : "=a" (ret)
-               : "D" (entryp),
-                 "a" ((uint32_t)old),
-                 "d" ((uint32_t)(old >> 32)),
-                 "b" ((uint32_t)new),
-                 "c" ((uint32_t)(new >> 32))
-               : "memory");
-#else
-       /*
-        * Load the old value into %rax
-        * Load the new value into another register
-        * Compare-exchange-quad at address entryp
-        * If the compare succeeds, the new value is stored, return TRUE.
-        * Otherwise, no swap is made, return FALSE.
-        */
-       asm volatile(
-               "       lock; cmpxchgq %2,(%3)  \n\t"
-               "       setz    %%al            \n\t"
-               "       movzbl  %%al,%0"
-               : "=a" (ret)
-               : "a" (old),
-                 "r" (new),
-                 "r" (entryp)
-               : "memory");
-#endif
-       return ret;
-}
-
-#define pmap_update_pte(entryp, old, new) \
-       while (!pmap_cmpx_pte((entryp), (old), (new)))
-
-
 /* in 64 bit spaces, the number of each type of page in the page tables */
 #define NPML4PGS        (1ULL * (PAGE_SIZE/(sizeof (pml4_entry_t))))
 #define NPDPTPGS        (NPML4PGS * (PAGE_SIZE/(sizeof (pdpt_entry_t))))
 #define NPDEPGS         (NPDPTPGS * (PAGE_SIZE/(sizeof (pd_entry_t))))
 #define NPTEPGS         (NPDEPGS * (PAGE_SIZE/(sizeof (pt_entry_t))))
 
-#ifdef __i386__
-/*
- * The 64-bit kernel is remapped in uber-space which is at the base
- * the highest 4th-level directory (KERNEL_UBER_PML4_INDEX). That is,
- * 512GB from the top of virtual space (or zero).
- */
-#define KERNEL_UBER_PML4_INDEX 511
-#define KERNEL_UBER_BASE       (0ULL - NBPML4)
-#define KERNEL_UBER_BASE_HI32  ((uint32_t)(KERNEL_UBER_BASE >> 32))
-#else
-#define KERNEL_PML4_INDEX      511
+#define KERNEL_PML4_INDEX              511
 #define KERNEL_KEXTS_INDEX     510     /* Home of KEXTs - the basement */
-#define KERNEL_PHYSMAP_INDEX   509     /* virtual to physical map */ 
+#define KERNEL_PHYSMAP_PML4_INDEX      509     /* virtual to physical map */ 
 #define KERNEL_BASE            (0ULL - NBPML4)
 #define KERNEL_BASEMENT                (KERNEL_BASE - NBPML4)
-#endif
 
 #define        VM_WIMG_COPYBACK        VM_MEM_COHERENT
+#define        VM_WIMG_COPYBACKLW      VM_WIMG_COPYBACK
 #define        VM_WIMG_DEFAULT         VM_MEM_COHERENT
 /* ?? intel ?? */
 #define VM_WIMG_IO             (VM_MEM_COHERENT |      \
@@ -317,14 +229,10 @@ pmap_cmpx_pte(pt_entry_t *entryp, pt_entry_t old, pt_entry_t new)
 #define VM_WIMG_WTHRU          (VM_MEM_WRITE_THROUGH | VM_MEM_COHERENT | VM_MEM_GUARDED)
 /* write combining mode, aka store gather */
 #define VM_WIMG_WCOMB          (VM_MEM_NOT_CACHEABLE | VM_MEM_COHERENT) 
-
+#define        VM_WIMG_INNERWBACK      VM_MEM_COHERENT
 /*
  * Pte related macros
  */
-#ifdef __i386__
-#define VADDR(pdi, pti) ((vm_offset_t)(((pdi)<<PDESHIFT)|((pti)<<PTESHIFT)))
-#define VADDR64(pmi, pdi, pti) ((vm_offset_t)(((pmi)<<PLM4SHIFT))((pdi)<<PDESHIFT)|((pti)<<PTESHIFT))
-#else
 #define KVADDR(pmi, pdpi, pdi, pti)              \
         ((vm_offset_t)                   \
                ((uint64_t) -1    << 47)        | \
@@ -332,7 +240,6 @@ pmap_cmpx_pte(pt_entry_t *entryp, pt_entry_t old, pt_entry_t new)
                ((uint64_t)(pdpi) << PDPTSHIFT) | \
                ((uint64_t)(pdi)  << PDESHIFT)  | \
                ((uint64_t)(pti)  << PTESHIFT))
-#endif
 
 /*
  * Size of Kernel address space.  This is the number of page table pages
@@ -351,53 +258,6 @@ pmap_cmpx_pte(pt_entry_t *entryp, pt_entry_t old, pt_entry_t new)
 #endif
 
 
-#ifdef __i386__
-enum high_cpu_types {
-  HIGH_CPU_ISS0,
-  HIGH_CPU_ISS1,
-  HIGH_CPU_DESC,
-  HIGH_CPU_LDT_BEGIN,
-  HIGH_CPU_LDT_END = HIGH_CPU_LDT_BEGIN + (LDTSZ / 512) - 1,
-  HIGH_CPU_END
-};
-
-enum  high_fixed_addresses {
-  HIGH_FIXED_TRAMPS,  /* must be first */
-  HIGH_FIXED_TRAMPS_END,
-  HIGH_FIXED_GDT,
-  HIGH_FIXED_IDT,
-  HIGH_FIXED_LDT_BEGIN,
-  HIGH_FIXED_LDT_END = HIGH_FIXED_LDT_BEGIN + (LDTSZ / 512) - 1,
-  HIGH_FIXED_KTSS,
-  HIGH_FIXED_DFTSS,
-  HIGH_FIXED_DBTSS,
-  HIGH_FIXED_CPUS_BEGIN,
-  HIGH_FIXED_CPUS_END = HIGH_FIXED_CPUS_BEGIN + (HIGH_CPU_END * MAX_CPUS) - 1,
-};
-
-
-/* XXX64  below PTDI values need cleanup */
-/*
- * The *PTDI values control the layout of virtual memory
- *
- */
-#define        KPTDI           (0x000)/* start of kernel virtual pde's */
-#define        PTDPTDI         (0x7F4) /* ptd entry that points to ptd! */
-#define        APTDPTDI        (0x7F8) /* alt ptd entry that points to APTD */
-#define        UMAXPTDI        (0x7F8) /* ptd entry for user space end */
-#define        UMAXPTEOFF      (NPTEPG)        /* pte entry for user space end */
-
-#define KERNBASE       VADDR(KPTDI,0)
-
-/*
- *     Convert address offset to directory address
- *     containing the page table pointer - legacy
- */
-/*#define pmap_pde(m,v) (&((m)->dirbase[(vm_offset_t)(v) >> PDESHIFT]))*/
-
-#define HIGH_MEM_BASE  ((uint32_t)( -NBPDE) )  /* shared gdt etc seg addr */ /* XXX64 ?? */
-#define pmap_index_to_virt(x)  (HIGH_MEM_BASE | ((unsigned)(x) << PAGE_SHIFT))
-#endif
 
 /*
  *     Convert address offset to page descriptor index
@@ -426,19 +286,19 @@ enum  high_fixed_addresses {
  *     without using the bit fields).
  */
 
-#define INTEL_PTE_VALID                0x00000001
-#define INTEL_PTE_WRITE                0x00000002
-#define INTEL_PTE_RW           0x00000002
-#define INTEL_PTE_USER         0x00000004
-#define INTEL_PTE_WTHRU                0x00000008
-#define INTEL_PTE_NCACHE       0x00000010
-#define INTEL_PTE_REF          0x00000020
-#define INTEL_PTE_MOD          0x00000040
-#define INTEL_PTE_PS           0x00000080
-#define INTEL_PTE_PTA          0x00000080
-#define INTEL_PTE_GLOBAL       0x00000100
-#define INTEL_PTE_WIRED                0x00000200
-#define INTEL_PDPTE_NESTED     0x00000400
+#define INTEL_PTE_VALID                0x00000001ULL
+#define INTEL_PTE_WRITE                0x00000002ULL
+#define INTEL_PTE_RW           0x00000002ULL
+#define INTEL_PTE_USER         0x00000004ULL
+#define INTEL_PTE_WTHRU                0x00000008ULL
+#define INTEL_PTE_NCACHE       0x00000010ULL
+#define INTEL_PTE_REF          0x00000020ULL
+#define INTEL_PTE_MOD          0x00000040ULL
+#define INTEL_PTE_PS           0x00000080ULL
+#define INTEL_PTE_PTA          0x00000080ULL
+#define INTEL_PTE_GLOBAL       0x00000100ULL
+#define INTEL_PTE_WIRED                0x00000400ULL
+#define INTEL_PDPTE_NESTED     0x00000800ULL
 #define INTEL_PTE_PFN          PG_FRAME
 
 #define INTEL_PTE_NX           (1ULL << 63)
@@ -447,22 +307,120 @@ enum  high_fixed_addresses {
 /* This is conservative, but suffices */
 #define INTEL_PTE_RSVD         ((1ULL << 10) | (1ULL << 11) | (0x1FFULL << 54))
 
+#define INTEL_COMPRESSED       (1ULL << 62) /* marker, for invalid PTE only -- ignored by hardware for both regular/EPT entries*/
+
 #define        pa_to_pte(a)            ((a) & INTEL_PTE_PFN) /* XXX */
 #define        pte_to_pa(p)            ((p) & INTEL_PTE_PFN) /* XXX */
 #define        pte_increment_pa(p)     ((p) += INTEL_OFFMASK+1)
 
 #define pte_kernel_rw(p)          ((pt_entry_t)(pa_to_pte(p) | INTEL_PTE_VALID|INTEL_PTE_RW))
 #define pte_kernel_ro(p)          ((pt_entry_t)(pa_to_pte(p) | INTEL_PTE_VALID))
-#define pte_user_rw(p)            ((pt_entry)t)(pa_to_pte(p) | INTEL_PTE_VALID|INTEL_PTE_USER|INTEL_PTE_RW))
+#define pte_user_rw(p)            ((pt_entry_t)(pa_to_pte(p) | INTEL_PTE_VALID|INTEL_PTE_USER|INTEL_PTE_RW))
 #define pte_user_ro(p)            ((pt_entry_t)(pa_to_pte(p) | INTEL_PTE_VALID|INTEL_PTE_USER))
 
+#define PMAP_INVEPT_SINGLE_CONTEXT     1
+
+
+#define INTEL_EPTP_AD          0x00000040ULL
+
+#define INTEL_EPT_READ         0x00000001ULL
+#define INTEL_EPT_WRITE        0x00000002ULL
+#define INTEL_EPT_EX           0x00000004ULL
+#define INTEL_EPT_IPTA         0x00000040ULL
+#define INTEL_EPT_PS           0x00000080ULL
+#define INTEL_EPT_REF          0x00000100ULL
+#define INTEL_EPT_MOD          0x00000200ULL
+
+#define INTEL_EPT_CACHE_MASK   0x00000038ULL
+#define INTEL_EPT_NCACHE       0x00000000ULL
+#define INTEL_EPT_WC           0x00000008ULL
+#define INTEL_EPT_WTHRU        0x00000020ULL
+#define INTEL_EPT_WP           0x00000028ULL
+#define INTEL_EPT_WB           0x00000030ULL
+
+/*
+ * Routines to filter correct bits depending on the pmap type
+ */
+
+static inline pt_entry_t
+pte_remove_ex(pt_entry_t pte, boolean_t is_ept)
+{
+       if (__probable(!is_ept)) {
+               return (pte | INTEL_PTE_NX);
+       }
+
+       return (pte & (~INTEL_EPT_EX));
+}
+
+static inline pt_entry_t
+pte_set_ex(pt_entry_t pte, boolean_t is_ept)
+{
+       if (__probable(!is_ept)) {
+               return (pte & (~INTEL_PTE_NX));
+       }
+
+       return (pte | INTEL_EPT_EX);
+}
+
+static inline pt_entry_t
+physmap_refmod_to_ept(pt_entry_t physmap_pte)
+{
+       pt_entry_t ept_pte = 0;
+
+       if (physmap_pte & INTEL_PTE_MOD) {
+               ept_pte |= INTEL_EPT_MOD;
+       }
+
+       if (physmap_pte & INTEL_PTE_REF) {
+               ept_pte |= INTEL_EPT_REF;
+       }
+
+       return ept_pte;
+}
+
+static inline pt_entry_t
+ept_refmod_to_physmap(pt_entry_t ept_pte)
+{
+       pt_entry_t physmap_pte = 0;
+
+       assert((ept_pte & ~(INTEL_EPT_REF | INTEL_EPT_MOD)) == 0);
+
+       if (ept_pte & INTEL_EPT_REF) {
+               physmap_pte |= INTEL_PTE_REF;
+       }
+
+       if (ept_pte & INTEL_EPT_MOD) {
+               physmap_pte |= INTEL_PTE_MOD;
+       }
+
+       return physmap_pte;
+}
+
+/*
+ * Note: Not all Intel processors support EPT referenced access and dirty bits.
+ *      During pmap_init() we check the VMX capability for the current hardware
+ *      and update this variable accordingly.
+ */
+extern boolean_t pmap_ept_support_ad;
+
+#define PTE_VALID_MASK(is_ept) ((is_ept) ? (INTEL_EPT_READ | INTEL_EPT_WRITE | INTEL_EPT_EX) : INTEL_PTE_VALID)
+#define PTE_READ(is_ept)       ((is_ept) ? INTEL_EPT_READ : INTEL_PTE_VALID)
+#define PTE_WRITE(is_ept)      ((is_ept) ? INTEL_EPT_WRITE : INTEL_PTE_WRITE)
+#define PTE_PS                 INTEL_PTE_PS
+#define PTE_COMPRESSED         INTEL_COMPRESSED
+#define PTE_NCACHE(is_ept)     ((is_ept) ? INTEL_EPT_NCACHE : INTEL_PTE_NCACHE)
+#define PTE_WTHRU(is_ept)      ((is_ept) ? INTEL_EPT_WTHRU : INTEL_PTE_WTHRU)
+#define PTE_REF(is_ept)        ((is_ept) ? INTEL_EPT_REF : INTEL_PTE_REF)
+#define PTE_MOD(is_ept)        ((is_ept) ? INTEL_EPT_MOD : INTEL_PTE_MOD)
+#define PTE_WIRED              INTEL_PTE_WIRED
+
+
 #define PMAP_DEFAULT_CACHE     0
 #define PMAP_INHIBIT_CACHE     1
 #define PMAP_GUARDED_CACHE     2
 #define PMAP_ACTIVATE_CACHE    4
 #define PMAP_NO_GUARD_CACHE    8
 
-
 #ifndef        ASSEMBLER
 
 #include <sys/queue.h>
@@ -472,55 +430,55 @@ enum  high_fixed_addresses {
  * and directories.
  */
 
-#ifdef __i386__
-extern pt_entry_t      PTmap[], APTmap[], Upte;
-extern pd_entry_t      PTD[], APTD[], PTDpde[], APTDpde[], Upde;
-extern pmap_paddr_t    lo_kernel_cr3;
-extern pdpt_entry_t    *IdlePDPT64;
-#else
 extern pt_entry_t      *PTmap;
-#endif
+extern pdpt_entry_t    *IdlePDPT;
+extern pml4_entry_t    *IdlePML4;
 extern boolean_t       no_shared_cr3;
 extern addr64_t                kernel64_cr3;
 extern pd_entry_t      *IdlePTD;       /* physical addr of "Idle" state PTD */
-extern pdpt_entry_t    IdlePDPT[];
-extern pml4_entry_t    IdlePML4[];
 
 extern uint64_t                pmap_pv_hashlist_walks;
 extern uint64_t                pmap_pv_hashlist_cnts;
 extern uint32_t                pmap_pv_hashlist_max;
 extern uint32_t                pmap_kernel_text_ps;
 
-#ifdef __i386__
-/*
- * ** i386 **
- * virtual address to page table entry and
- * to physical address. Likewise for alternate address space.
- * Note: these work recursively, thus vtopte of a pte will give
- * the corresponding pde that in turn maps it.
- */
 
-#define        vtopte(va)      (PTmap + i386_btop((vm_offset_t)va))
-#endif
 
 #ifdef __x86_64__
 #define ID_MAP_VTOP(x) ((void *)(((uint64_t)(x)) & LOW_4GB_MASK))
 
-#define PHYSMAP_BASE   KVADDR(KERNEL_PHYSMAP_INDEX,0,0,0)
+extern uint64_t physmap_base, physmap_max;
+
 #define NPHYSMAP (MAX(K64_MAXMEM/GB + 4, 4))
-#define PHYSMAP_PTOV(x)        ((void *)(((uint64_t)(x)) + PHYSMAP_BASE))
 
 static inline boolean_t physmap_enclosed(addr64_t a) {
        return (a < (NPHYSMAP * GB));
 }
-#endif
 
-typedef        volatile long   cpu_set;        /* set of CPUs - must be <= 32 */
-                                       /* changed by other processors */
-struct md_page {
-  int pv_list_count;
-  TAILQ_HEAD(,pv_entry)  pv_list;
-};
+static inline void * PHYSMAP_PTOV_check(void *paddr) {
+       uint64_t pvaddr = (uint64_t)paddr + physmap_base;
+
+       if (__improbable(pvaddr >= physmap_max))
+               panic("PHYSMAP_PTOV bounds exceeded, 0x%qx, 0x%qx, 0x%qx",
+                     pvaddr, physmap_base, physmap_max);
+
+       return (void *)pvaddr;
+}
+
+#define PHYSMAP_PTOV(x)        (PHYSMAP_PTOV_check((void*) (x)))
+
+/*
+ * For KASLR, we alias the master processor's IDT and GDT at fixed
+ * virtual addresses to defeat SIDT/SGDT address leakage.
+ * And non-boot processor's GDT aliases likewise (skipping LOWGLOBAL_ALIAS)
+ * The low global vector page is mapped at a fixed alias also.
+ */
+#define MASTER_IDT_ALIAS       (VM_MIN_KERNEL_ADDRESS + 0x0000)
+#define MASTER_GDT_ALIAS       (VM_MIN_KERNEL_ADDRESS + 0x1000)
+#define LOWGLOBAL_ALIAS                (VM_MIN_KERNEL_ADDRESS + 0x2000)
+#define CPU_GDT_ALIAS(_cpu)    (LOWGLOBAL_ALIAS + (0x1000*(_cpu)))
+
+#endif /*__x86_64__ */
 
 #include <vm/vm_page.h>
 
@@ -533,26 +491,38 @@ struct md_page {
 struct pmap {
        decl_simple_lock_data(,lock)    /* lock on map */
        pmap_paddr_t    pm_cr3;         /* physical addr */
+       pmap_paddr_t    pm_eptp;        /* EPTP */
        boolean_t       pm_shared;
         pd_entry_t      *dirbase;        /* page directory pointer */
-#ifdef __i386__
-       pmap_paddr_t    pdirbase;        /* phys. address of dirbase */
-       vm_offset_t     pm_hold;        /* true pdpt zalloc addr */
-#endif
         vm_object_t     pm_obj;         /* object to hold pde's */
         task_map_t      pm_task_map;
         pdpt_entry_t    *pm_pdpt;       /* KVA of 3rd level page */
        pml4_entry_t    *pm_pml4;       /* VKA of top level */
        vm_object_t     pm_obj_pdpt;    /* holds pdpt pages */
        vm_object_t     pm_obj_pml4;    /* holds pml4 pages */
-#define        PMAP_PCID_MAX_CPUS      (48)    /* Must be a multiple of 8 */
+#define        PMAP_PCID_MAX_CPUS      MAX_CPUS        /* Must be a multiple of 8 */
        pcid_t          pmap_pcid_cpus[PMAP_PCID_MAX_CPUS];
        volatile uint8_t pmap_pcid_coherency_vector[PMAP_PCID_MAX_CPUS];
        struct pmap_statistics  stats;  /* map statistics */
        int             ref_count;      /* reference count */
         int            nx_enabled;
+       ledger_t        ledger;         /* ledger tracking phys mappings */
 };
 
+static inline boolean_t
+is_ept_pmap(pmap_t p)
+{
+       if (__probable(p->pm_cr3 != 0)) {
+               assert(p->pm_eptp == 0);
+               return FALSE;
+       }
+
+       assert(p->pm_eptp != 0);
+
+       return TRUE;
+}
+
+void hv_ept_pmap_create(void **ept_pmap, void **eptp);
 
 #if NCOPY_WINDOWS > 0
 #define PMAP_PDPT_FIRST_WINDOW 0
@@ -584,9 +554,10 @@ extern void         pmap_put_mapwindow(mapwindow_t *map);
 #endif
 
 typedef struct pmap_memory_regions {
-       ppnum_t base;
-       ppnum_t end;
-       ppnum_t alloc;
+       ppnum_t base;           /* first page of this region */
+       ppnum_t alloc_up;       /* pages below this one have been "stolen" */
+       ppnum_t alloc_down;     /* pages above this one have been "stolen" */
+       ppnum_t end;            /* last page of this region */
        uint32_t type;
        uint64_t attribute;
 } pmap_memory_region_t;
@@ -600,11 +571,10 @@ extern pmap_memory_region_t pmap_memory_regions[];
 #include <i386/pmap_pcid.h>
 
 static inline void
-set_dirbase(pmap_t tpmap, __unused thread_t thread) {
-       int ccpu = cpu_number();
+set_dirbase(pmap_t tpmap, __unused thread_t thread, int my_cpu) {
+       int ccpu = my_cpu;
        cpu_datap(ccpu)->cpu_task_cr3 = tpmap->pm_cr3;
        cpu_datap(ccpu)->cpu_task_map = tpmap->pm_task_map;
-#ifndef __i386__
        /*
         * Switch cr3 if necessary
         * - unless running with no_shared_cr3 debugging mode
@@ -622,7 +592,6 @@ set_dirbase(pmap_t tpmap, __unused thread_t thread) {
                if (get_cr3_base() != cpu_datap(ccpu)->cpu_kernel_cr3)
                        set_cr3_raw(cpu_datap(ccpu)->cpu_kernel_cr3);
        }
-#endif
 }
 
 /*
@@ -639,9 +608,10 @@ extern void                pmap_update_interrupt(void);
 extern addr64_t                (kvtophys)(
                                vm_offset_t     addr);
 
-extern void            pmap_expand(
+extern kern_return_t   pmap_expand(
                                pmap_t          pmap,
-                               vm_map_offset_t addr);
+                               vm_map_offset_t addr,
+                               unsigned int options);
 #if    !defined(__x86_64__)
 extern pt_entry_t      *pmap_pte(
                                struct pmap     *pmap,
@@ -685,21 +655,10 @@ extern int                pmap_list_resident_pages(
                                vm_offset_t     *listp,
                                int             space);
 extern void            x86_filter_TLB_coherency_interrupts(boolean_t);
-#ifdef __i386__
-extern void             pmap_commpage32_init(
-                                          vm_offset_t kernel,
-                                          vm_offset_t user,
-                                          int count);
-extern void             pmap_commpage64_init(
-                                          vm_offset_t  kernel,
-                                          vm_map_offset_t user,
-                                          int count);
-
-#endif
 /*
  * Get cache attributes (as pagetable bits) for the specified phys page
  */
-extern unsigned        pmap_get_cache_attributes(ppnum_t);
+extern unsigned        pmap_get_cache_attributes(ppnum_t, boolean_t is_ept);
 #if NCOPY_WINDOWS > 0
 extern struct cpu_pmap *pmap_cpu_alloc(
                                boolean_t       is_boot_cpu);
@@ -722,16 +681,6 @@ extern ppnum_t          pmap_find_phys(pmap_t map, addr64_t va);
 
 extern void pmap_cpu_init(void);
 extern void pmap_disable_NX(pmap_t pmap);
-#ifdef __i386__
-extern void pmap_set_4GB_pagezero(pmap_t pmap);
-extern void pmap_clear_4GB_pagezero(pmap_t pmap);
-extern void pmap_load_kernel_cr3(void);
-extern vm_offset_t pmap_cpu_high_map_vaddr(int, enum high_cpu_types);
-extern vm_offset_t pmap_high_map_vaddr(enum high_cpu_types);
-extern vm_offset_t pmap_high_map(pt_entry_t, enum high_cpu_types);
-extern vm_offset_t pmap_cpu_high_shared_remap(int, enum high_cpu_types, vm_offset_t, int);
-extern vm_offset_t pmap_high_shared_remap(enum high_fixed_addresses, vm_offset_t, int);
-#endif
 
 extern void pt_fake_zone_init(int);
 extern void pt_fake_zone_info(int *, vm_size_t *, vm_size_t *, vm_size_t *, vm_size_t *, 
@@ -746,73 +695,28 @@ extern void pmap_pagetable_corruption_msg_log(int (*)(const char * fmt, ...)__pr
 #include <kern/spl.h>
 
                                  
-#define PMAP_ACTIVATE_MAP(map, thread) {                               \
+#define PMAP_ACTIVATE_MAP(map, thread, my_cpu) {                               \
        register pmap_t         tpmap;                                  \
                                                                         \
         tpmap = vm_map_pmap(map);                                      \
-        set_dirbase(tpmap, thread);                                    \
+        set_dirbase(tpmap, thread, my_cpu);                                    \
 }
 
-#ifdef __i386__
-#define PMAP_DEACTIVATE_MAP(map, thread)                               \
-       if (vm_map_pmap(map)->pm_task_map == TASK_MAP_64BIT_SHARED)     \
-               pmap_load_kernel_cr3();
-#elif defined(__x86_64__)
-#define PMAP_DEACTIVATE_MAP(map, thread)                               \
-       pmap_assert(pmap_pcid_ncpus ? (pcid_for_pmap_cpu_tuple(map->pmap, cpu_number()) == (get_cr3_raw() & 0xFFF)) : TRUE);
+#if   defined(__x86_64__)
+#define PMAP_DEACTIVATE_MAP(map, thread, ccpu)                         \
+       pmap_assert(pmap_pcid_ncpus ? (pcid_for_pmap_cpu_tuple(map->pmap, ccpu) == (get_cr3_raw() & 0xFFF)) : TRUE);
 #else
 #define PMAP_DEACTIVATE_MAP(map, thread)
 #endif
 
-#if   defined(__i386__)
-
-#define        PMAP_SWITCH_CONTEXT(old_th, new_th, my_cpu) {                   \
-       spl_t           spl;                                            \
-       pt_entry_t      *kpdp;                                          \
-       pt_entry_t      *updp;                                          \
-        int            i;                                              \
-        int            need_flush;                                     \
-                                                                        \
-        need_flush = 0;                                                        \
-        spl = splhigh();                                               \
-       if ((old_th->map != new_th->map) || (new_th->task != old_th->task)) {   \
-               PMAP_DEACTIVATE_MAP(old_th->map, old_th);               \
-               PMAP_ACTIVATE_MAP(new_th->map, new_th);                 \
-       }                                                               \
-        kpdp = current_cpu_datap()->cpu_copywindow_pdp;                        \
-        for (i = 0; i < NCOPY_WINDOWS; i++) {                          \
-                if (new_th->machine.copy_window[i].user_base != (user_addr_t)-1) {     \
-                       updp = pmap_pde(new_th->map->pmap,              \
-                              new_th->machine.copy_window[i].user_base);\
-                        pmap_store_pte(kpdp, updp ? *updp : 0);                \
-                }                                                      \
-                kpdp++;                                                        \
-        }                                                              \
-       splx(spl);                                                      \
-        if (new_th->machine.copyio_state == WINDOWS_OPENED)            \
-                need_flush = 1;                                                \
-        else                                                           \
-                new_th->machine.copyio_state = WINDOWS_DIRTY;          \
-        if (new_th->machine.physwindow_pte) {                          \
-         pmap_store_pte((current_cpu_datap()->cpu_physwindow_ptep),    \
-                              new_th->machine.physwindow_pte);         \
-                if (need_flush == 0)                                   \
-                        invlpg((uintptr_t)current_cpu_datap()->cpu_physwindow_base);\
-        }                                                              \
-        if (need_flush)                                                        \
-                flush_tlb();                                           \
-}
-
-#else /* __x86_64__ */
 #define        PMAP_SWITCH_CONTEXT(old_th, new_th, my_cpu) {                   \
                                                                         \
        pmap_assert(ml_get_interrupts_enabled() == FALSE);              \
        if (old_th->map != new_th->map) {                               \
-               PMAP_DEACTIVATE_MAP(old_th->map, old_th);               \
-               PMAP_ACTIVATE_MAP(new_th->map, new_th);                 \
+               PMAP_DEACTIVATE_MAP(old_th->map, old_th, my_cpu);               \
+               PMAP_ACTIVATE_MAP(new_th->map, new_th, my_cpu);         \
        }                                                               \
 }
-#endif /* __i386__ */
 
 #if NCOPY_WINDOWS > 0
 #define        PMAP_SWITCH_USER(th, new_map, my_cpu) {                         \
@@ -830,9 +734,9 @@ extern void pmap_pagetable_corruption_msg_log(int (*)(const char * fmt, ...)__pr
        spl_t           spl;                                            \
                                                                        \
        spl = splhigh();                                                \
-       PMAP_DEACTIVATE_MAP(th->map, th);                               \
+       PMAP_DEACTIVATE_MAP(th->map, th, my_cpu);                               \
        th->map = new_map;                                              \
-       PMAP_ACTIVATE_MAP(th->map, th);                                 \
+       PMAP_ACTIVATE_MAP(th->map, th, my_cpu);                         \
        splx(spl);                                                      \
 }
 #endif
@@ -866,30 +770,11 @@ extern void pmap_pagetable_corruption_msg_log(int (*)(const char * fmt, ...)__pr
  *     but will queue the update request for when the cpu
  *     becomes active.
  */
-#if   defined(__x86_64__)
-#define MARK_CPU_IDLE(my_cpu)  {                                       \
-       assert(ml_get_interrupts_enabled() == FALSE);                   \
-       CPU_CR3_MARK_INACTIVE();                                        \
-       __asm__ volatile("mfence");                                     \
-}
-#else /* __i386__ native */
 #define MARK_CPU_IDLE(my_cpu)  {                                       \
        assert(ml_get_interrupts_enabled() == FALSE);                   \
-       /*                                                              \
-        *      Mark this cpu idle, and remove it from the active set,  \
-        *      since it is not actively using any pmap.  Signal_cpus   \
-        *      will notice that it is idle, and avoid signaling it,    \
-        *      but will queue the update request for when the cpu      \
-        *      becomes active.                                         \
-        */                                                             \
-       if (!cpu_mode_is64bit() || no_shared_cr3)                       \
-               process_pmap_updates();                                 \
-       else                                                            \
-               pmap_load_kernel_cr3();                                 \
        CPU_CR3_MARK_INACTIVE();                                        \
-       __asm__ volatile("mfence");                                     \
+       mfence();                                                                       \
 }
-#endif /* __i386__ */
 
 #define MARK_CPU_ACTIVE(my_cpu) {                                      \
        assert(ml_get_interrupts_enabled() == FALSE);                   \
@@ -904,7 +789,7 @@ extern void pmap_pagetable_corruption_msg_log(int (*)(const char * fmt, ...)__pr
         *      interrupt if this happens.                              \
         */                                                             \
        CPU_CR3_MARK_ACTIVE();                                          \
-       __asm__ volatile("mfence");                                     \
+       mfence();                                                                       \
                                                                        \
        if (current_cpu_datap()->cpu_tlb_invalid)                       \
            process_pmap_updates();                                     \
@@ -917,6 +802,7 @@ extern void pmap_pagetable_corruption_msg_log(int (*)(const char * fmt, ...)__pr
         (((vm_offset_t) (VA)) <= vm_max_kernel_address))
 
 
+#define pmap_compressed(pmap)          ((pmap)->stats.compressed)
 #define pmap_resident_count(pmap)      ((pmap)->stats.resident_count)
 #define pmap_resident_max(pmap)                ((pmap)->stats.resident_max)
 #define        pmap_copy(dst_pmap,src_pmap,dst_addr,len,src_addr)
@@ -932,6 +818,8 @@ extern boolean_t pmap_is_empty(pmap_t               pmap,
 
 #define MACHINE_BOOTSTRAPPTD   1       /* Static bootstrap page-tables */
 
+kern_return_t
+pmap_permissions_verify(pmap_t, vm_map_t, vm_offset_t, vm_offset_t);
 
 #endif /* ASSEMBLER */