#include <kern/kern_types.h>
#include <kern/thread.h>
#include <kern/lock.h>
+#include <mach/branch_predicates.h>
#include <i386/mp.h>
#include <i386/proc_reg.h>
+#include <i386/pal_routines.h>
+
/*
* Define the generic in terms of the specific
*/
#endif /* ASSEMBLER */
-#define NPGPTD 4
-#define PDESHIFT 21
-#define PTEMASK 0x1ff
-#define PTEINDX 3
-
-#define PTESHIFT 12
+#define NPGPTD 4ULL
+#define PDESHIFT 21ULL
+#define PTEMASK 0x1ffULL
+#define PTEINDX 3ULL
+#define PTESHIFT 12ULL
+#ifdef __i386__
#define INITPT_SEG_BASE 0x100000
-#define INITGDT_SEG_BASE 0x106000
-#define SLEEP_SEG_BASE 0x107000
+#endif
#ifdef __x86_64__
#define LOW_4GB_MASK ((vm_offset_t)0x00000000FFFFFFFFUL)
#define NBPTD (NPGPTD << PAGE_SHIFT)
#define NPDEPTD (NBPTD / (sizeof (pd_entry_t)))
#define NPDEPG (PAGE_SIZE/(sizeof (pd_entry_t)))
-#define NBPDE (1 << PDESHIFT)
+#define NBPDE (1ULL << PDESHIFT)
#define PDEMASK (NBPDE - 1)
#define PTE_PER_PAGE 512 /* number of PTE's per page on any level */
#define NPDPTPG (PAGE_SIZE/(sizeof (pdpt_entry_t)))
#define PDPTSHIFT 30
#define PDPTPGSHIFT 9
-#define NBPDPT (1 << PDPTSHIFT)
+#define NBPDPT (1ULL << PDPTSHIFT)
#define PDPTMASK (NBPDPT-1)
#define PDPT_ENTRY_NULL ((pdpt_entry_t *) 0)
#define NPDPG (PAGE_SIZE/(sizeof (pd_entry_t)))
#define PDSHIFT 21
#define PDPGSHIFT 9
-#define NBPD (1 << PDSHIFT)
+#define NBPD (1ULL << PDSHIFT)
#define PDMASK (NBPD-1)
#define PD_ENTRY_NULL ((pd_entry_t *) 0)
#define NPTPG (PAGE_SIZE/(sizeof (pt_entry_t)))
#define PTSHIFT 12
#define PTPGSHIFT 9
-#define NBPT (1 << PTSHIFT)
+#define NBPT (1ULL << PTSHIFT)
#define PTMASK (NBPT-1)
#define PT_ENTRY_NULL ((pt_entry_t *) 0)
typedef uint64_t pmap_paddr_t;
+#if DEBUG
+#define PMAP_ASSERT 1
+#endif
+#if PMAP_ASSERT
+#define pmap_assert(ex) ((ex) ? (void)0 : Assert(__FILE__, __LINE__, # ex))
+
+#define pmap_assert2(ex, fmt, args...) \
+ do { \
+ if (!(ex)) { \
+ kprintf("Assertion %s failed (%s:%d, caller %p) " fmt , #ex, __FILE__, __LINE__, __builtin_return_address(0), ##args); \
+ panic("Assertion %s failed (%s:%d, caller %p) " fmt , #ex, __FILE__, __LINE__, __builtin_return_address(0), ##args); \
+ } \
+ } while(0)
+#else
+#define pmap_assert(ex)
+#define pmap_assert2(ex, fmt, args...)
+#endif
+
/* superpages */
#ifdef __x86_64__
#define SUPERPAGE_NBASEPAGES 512
#endif
}
-/*
- * Atomic 64-bit compare and exchange of a page table entry.
- */
-static inline boolean_t
-pmap_cmpx_pte(pt_entry_t *entryp, pt_entry_t old, pt_entry_t new)
-{
- boolean_t ret;
-
-#ifdef __i386__
- /*
- * Load the old value into %edx:%eax
- * Load the new value into %ecx:%ebx
- * Compare-exchange-8bytes at address entryp (loaded in %edi)
- * If the compare succeeds, the new value is stored, return TRUE.
- * Otherwise, no swap is made, return FALSE.
- */
- asm volatile(
- " lock; cmpxchg8b (%1) \n\t"
- " setz %%al \n\t"
- " movzbl %%al,%0"
- : "=a" (ret)
- : "D" (entryp),
- "a" ((uint32_t)old),
- "d" ((uint32_t)(old >> 32)),
- "b" ((uint32_t)new),
- "c" ((uint32_t)(new >> 32))
- : "memory");
-#else
- /*
- * Load the old value into %rax
- * Load the new value into another register
- * Compare-exchange-quad at address entryp
- * If the compare succeeds, the new value is stored, return TRUE.
- * Otherwise, no swap is made, return FALSE.
- */
- asm volatile(
- " lock; cmpxchgq %2,(%3) \n\t"
- " setz %%al \n\t"
- " movzbl %%al,%0"
- : "=a" (ret)
- : "a" (old),
- "r" (new),
- "r" (entryp)
- : "memory");
-#endif
- return ret;
-}
-
-#define pmap_update_pte(entryp, old, new) \
- while (!pmap_cmpx_pte((entryp), (old), (new)))
-
-
/* in 64 bit spaces, the number of each type of page in the page tables */
#define NPML4PGS (1ULL * (PAGE_SIZE/(sizeof (pml4_entry_t))))
#define NPDPTPGS (NPML4PGS * (PAGE_SIZE/(sizeof (pdpt_entry_t))))
#define KERNEL_UBER_BASE (0ULL - NBPML4)
#define KERNEL_UBER_BASE_HI32 ((uint32_t)(KERNEL_UBER_BASE >> 32))
#else
-#define KERNEL_PML4_INDEX 511
+#define KERNEL_PML4_INDEX 511
#define KERNEL_KEXTS_INDEX 510 /* Home of KEXTs - the basement */
-#define KERNEL_PHYSMAP_INDEX 509 /* virtual to physical map */
+#define KERNEL_PHYSMAP_PML4_INDEX 509 /* virtual to physical map */
#define KERNEL_BASE (0ULL - NBPML4)
#define KERNEL_BASEMENT (KERNEL_BASE - NBPML4)
#endif
#define VM_WIMG_COPYBACK VM_MEM_COHERENT
+#define VM_WIMG_COPYBACKLW VM_WIMG_COPYBACK
#define VM_WIMG_DEFAULT VM_MEM_COHERENT
/* ?? intel ?? */
#define VM_WIMG_IO (VM_MEM_COHERENT | \
#define VM_WIMG_WTHRU (VM_MEM_WRITE_THROUGH | VM_MEM_COHERENT | VM_MEM_GUARDED)
/* write combining mode, aka store gather */
#define VM_WIMG_WCOMB (VM_MEM_NOT_CACHEABLE | VM_MEM_COHERENT)
-
+#define VM_WIMG_INNERWBACK VM_MEM_COHERENT
/*
* Pte related macros
*/
#define pdenum(pmap, a) (((vm_offset_t)(a) >> PDESHIFT) & PDEMASK)
#define PMAP_INVALID_PDPTNUM (~0ULL)
-#ifdef __i386__
#define pdeidx(pmap, a) (((a) >> PDSHIFT) & ((1ULL<<(48 - PDSHIFT)) -1))
#define pdptidx(pmap, a) (((a) >> PDPTSHIFT) & ((1ULL<<(48 - PDPTSHIFT)) -1))
#define pml4idx(pmap, a) (((a) >> PML4SHIFT) & ((1ULL<<(48 - PML4SHIFT)) -1))
-#else
-#define VAMASK ((1ULL<<48)-1)
-#define pml4idx(pmap, a) ((((a) & VAMASK) >> PML4SHIFT) & \
- ((1ULL<<(48 - PML4SHIFT))-1))
-#define pdptidx(pmap, a) ((((a) & PML4MASK) >> PDPTSHIFT) & \
- ((1ULL<<(48 - PDPTSHIFT))-1))
-#define pdeidx(pmap, a) ((((a) & PML4MASK) >> PDSHIFT) & \
- ((1ULL<<(48 - PDSHIFT)) - 1))
-#endif
+
/*
* Convert page descriptor index to user virtual address
* without using the bit fields).
*/
-#define INTEL_PTE_VALID 0x00000001
-#define INTEL_PTE_WRITE 0x00000002
-#define INTEL_PTE_RW 0x00000002
-#define INTEL_PTE_USER 0x00000004
-#define INTEL_PTE_WTHRU 0x00000008
-#define INTEL_PTE_NCACHE 0x00000010
-#define INTEL_PTE_REF 0x00000020
-#define INTEL_PTE_MOD 0x00000040
-#define INTEL_PTE_PS 0x00000080
-#define INTEL_PTE_PTA 0x00000080
-#define INTEL_PTE_GLOBAL 0x00000100
-#define INTEL_PTE_WIRED 0x00000200
-#define INTEL_PDPTE_NESTED 0x00000400
+#define INTEL_PTE_VALID 0x00000001ULL
+#define INTEL_PTE_WRITE 0x00000002ULL
+#define INTEL_PTE_RW 0x00000002ULL
+#define INTEL_PTE_USER 0x00000004ULL
+#define INTEL_PTE_WTHRU 0x00000008ULL
+#define INTEL_PTE_NCACHE 0x00000010ULL
+#define INTEL_PTE_REF 0x00000020ULL
+#define INTEL_PTE_MOD 0x00000040ULL
+#define INTEL_PTE_PS 0x00000080ULL
+#define INTEL_PTE_PTA 0x00000080ULL
+#define INTEL_PTE_GLOBAL 0x00000100ULL
+#define INTEL_PTE_WIRED 0x00000200ULL
+#define INTEL_PDPTE_NESTED 0x00000400ULL
#define INTEL_PTE_PFN PG_FRAME
#define INTEL_PTE_NX (1ULL << 63)
#define INTEL_PTE_INVALID 0
+/* This is conservative, but suffices */
+#define INTEL_PTE_RSVD ((1ULL << 10) | (1ULL << 11) | (0x1FFULL << 54))
#define pa_to_pte(a) ((a) & INTEL_PTE_PFN) /* XXX */
#define pte_to_pa(p) ((p) & INTEL_PTE_PFN) /* XXX */
extern pd_entry_t PTD[], APTD[], PTDpde[], APTDpde[], Upde;
extern pmap_paddr_t lo_kernel_cr3;
extern pdpt_entry_t *IdlePDPT64;
+extern pdpt_entry_t IdlePDPT[];
+extern pml4_entry_t IdlePML4[];
#else
extern pt_entry_t *PTmap;
+extern pdpt_entry_t *IdlePDPT;
+extern pml4_entry_t *IdlePML4;
#endif
extern boolean_t no_shared_cr3;
extern addr64_t kernel64_cr3;
extern pd_entry_t *IdlePTD; /* physical addr of "Idle" state PTD */
-extern pdpt_entry_t IdlePDPT[];
-extern pml4_entry_t IdlePML4[];
extern uint64_t pmap_pv_hashlist_walks;
extern uint64_t pmap_pv_hashlist_cnts;
#define vtopte(va) (PTmap + i386_btop((vm_offset_t)va))
#endif
+
#ifdef __x86_64__
#define ID_MAP_VTOP(x) ((void *)(((uint64_t)(x)) & LOW_4GB_MASK))
-#define PHYSMAP_BASE KVADDR(KERNEL_PHYSMAP_INDEX,0,0,0)
-#define PHYSMAP_PTOV(x) ((void *)(((uint64_t)(x)) + PHYSMAP_BASE))
-#endif
+extern uint64_t physmap_base, physmap_max;
+
+#define NPHYSMAP (MAX(K64_MAXMEM/GB + 4, 4))
+
+static inline boolean_t physmap_enclosed(addr64_t a) {
+ return (a < (NPHYSMAP * GB));
+}
+
+static inline void * PHYSMAP_PTOV_check(void *paddr) {
+ uint64_t pvaddr = (uint64_t)paddr + physmap_base;
+
+ if (__improbable(pvaddr >= physmap_max))
+ panic("PHYSMAP_PTOV bounds exceeded, 0x%qx, 0x%qx, 0x%qx",
+ pvaddr, physmap_base, physmap_max);
+
+ return (void *)pvaddr;
+}
+
+#define PHYSMAP_PTOV(x) (PHYSMAP_PTOV_check((void*) (x)))
+
+/*
+ * For KASLR, we alias the master processor's IDT and GDT at fixed
+ * virtual addresses to defeat SIDT/SGDT address leakage.
+ */
+#define MASTER_IDT_ALIAS (VM_MIN_KERNEL_ADDRESS + 0x0000)
+#define MASTER_GDT_ALIAS (VM_MIN_KERNEL_ADDRESS + 0x1000)
+
+/*
+ * The low global vector page is mapped at a fixed alias also.
+ */
+#define LOWGLOBAL_ALIAS (VM_MIN_KERNEL_ADDRESS + 0x2000)
+
+#endif /*__x86_64__ */
typedef volatile long cpu_set; /* set of CPUs - must be <= 32 */
/* changed by other processors */
-struct md_page {
- int pv_list_count;
- TAILQ_HEAD(,pv_entry) pv_list;
-};
-
#include <vm/vm_page.h>
/*
*/
struct pmap {
+ decl_simple_lock_data(,lock) /* lock on map */
+ pmap_paddr_t pm_cr3; /* physical addr */
+ boolean_t pm_shared;
pd_entry_t *dirbase; /* page directory pointer */
#ifdef __i386__
pmap_paddr_t pdirbase; /* phys. address of dirbase */
+ vm_offset_t pm_hold; /* true pdpt zalloc addr */
#endif
vm_object_t pm_obj; /* object to hold pde's */
- int ref_count; /* reference count */
- int nx_enabled;
task_map_t pm_task_map;
- decl_simple_lock_data(,lock) /* lock on map */
- struct pmap_statistics stats; /* map statistics */
-#ifdef __i386__
- vm_offset_t pm_hold; /* true pdpt zalloc addr */
-#endif
- pmap_paddr_t pm_cr3; /* physical addr */
pdpt_entry_t *pm_pdpt; /* KVA of 3rd level page */
pml4_entry_t *pm_pml4; /* VKA of top level */
vm_object_t pm_obj_pdpt; /* holds pdpt pages */
vm_object_t pm_obj_pml4; /* holds pml4 pages */
- vm_object_t pm_obj_top; /* holds single top level page */
- boolean_t pm_shared;
+#define PMAP_PCID_MAX_CPUS (48) /* Must be a multiple of 8 */
+ pcid_t pmap_pcid_cpus[PMAP_PCID_MAX_CPUS];
+ volatile uint8_t pmap_pcid_coherency_vector[PMAP_PCID_MAX_CPUS];
+ struct pmap_statistics stats; /* map statistics */
+ int ref_count; /* reference count */
+ int nx_enabled;
+ ledger_t ledger; /* ledger tracking phys mappings */
};
#endif
typedef struct pmap_memory_regions {
- ppnum_t base;
- ppnum_t end;
- ppnum_t alloc;
- uint32_t type;
+ ppnum_t base;
+ ppnum_t end;
+ ppnum_t alloc;
+ uint32_t type;
+ uint64_t attribute;
} pmap_memory_region_t;
extern unsigned pmap_memory_region_count;
#define PMAP_MEMORY_REGIONS_SIZE 128
extern pmap_memory_region_t pmap_memory_regions[];
+#include <i386/pmap_pcid.h>
static inline void
set_dirbase(pmap_t tpmap, __unused thread_t thread) {
- current_cpu_datap()->cpu_task_cr3 = tpmap->pm_cr3;
- current_cpu_datap()->cpu_task_map = tpmap->pm_task_map;
+ int ccpu = cpu_number();
+ cpu_datap(ccpu)->cpu_task_cr3 = tpmap->pm_cr3;
+ cpu_datap(ccpu)->cpu_task_map = tpmap->pm_task_map;
#ifndef __i386__
/*
* Switch cr3 if necessary
* - unless running with no_shared_cr3 debugging mode
* and we're not on the kernel's cr3 (after pre-empted copyio)
*/
- if (!no_shared_cr3) {
- if (get_cr3() != tpmap->pm_cr3)
- set_cr3(tpmap->pm_cr3);
+ if (__probable(!no_shared_cr3)) {
+ if (get_cr3_base() != tpmap->pm_cr3) {
+ if (pmap_pcid_ncpus) {
+ pmap_pcid_activate(tpmap, ccpu);
+ }
+ else
+ set_cr3_raw(tpmap->pm_cr3);
+ }
} else {
- if (get_cr3() != current_cpu_datap()->cpu_kernel_cr3)
- set_cr3(current_cpu_datap()->cpu_kernel_cr3);
+ if (get_cr3_base() != cpu_datap(ccpu)->cpu_kernel_cr3)
+ set_cr3_raw(cpu_datap(ccpu)->cpu_kernel_cr3);
}
#endif
}
extern addr64_t (kvtophys)(
vm_offset_t addr);
-extern void pmap_expand(
+extern kern_return_t pmap_expand(
pmap_t pmap,
- vm_map_offset_t addr);
-
+ vm_map_offset_t addr,
+ unsigned int options);
+#if !defined(__x86_64__)
extern pt_entry_t *pmap_pte(
struct pmap *pmap,
vm_map_offset_t addr);
extern pdpt_entry_t *pmap64_pdpt(
struct pmap *pmap,
vm_map_offset_t addr);
-
+#endif
extern vm_offset_t pmap_map(
vm_offset_t virt,
vm_map_offset_t start,
struct pmap *pmap,
vm_offset_t *listp,
int space);
-
+extern void x86_filter_TLB_coherency_interrupts(boolean_t);
#ifdef __i386__
extern void pmap_commpage32_init(
vm_offset_t kernel,
int count);
#endif
-
+/*
+ * Get cache attributes (as pagetable bits) for the specified phys page
+ */
+extern unsigned pmap_get_cache_attributes(ppnum_t);
#if NCOPY_WINDOWS > 0
extern struct cpu_pmap *pmap_cpu_alloc(
boolean_t is_boot_cpu);
extern vm_offset_t pmap_high_shared_remap(enum high_fixed_addresses, vm_offset_t, int);
#endif
-extern void pt_fake_zone_info(int *, vm_size_t *, vm_size_t *, vm_size_t *, vm_size_t *, int *, int *);
-
-
+extern void pt_fake_zone_init(int);
+extern void pt_fake_zone_info(int *, vm_size_t *, vm_size_t *, vm_size_t *, vm_size_t *,
+ uint64_t *, int *, int *, int *);
+extern void pmap_pagetable_corruption_msg_log(int (*)(const char * fmt, ...)__printflike(1,2));
/*
* Macros for speed.
#define PMAP_DEACTIVATE_MAP(map, thread) \
if (vm_map_pmap(map)->pm_task_map == TASK_MAP_64BIT_SHARED) \
pmap_load_kernel_cr3();
+#elif defined(__x86_64__)
+#define PMAP_DEACTIVATE_MAP(map, thread) \
+ pmap_assert(pmap_pcid_ncpus ? (pcid_for_pmap_cpu_tuple(map->pmap, cpu_number()) == (get_cr3_raw() & 0xFFF)) : TRUE);
#else
-#define PMAP_DEACTIVATE_MAP(map, my_cpu)
+#define PMAP_DEACTIVATE_MAP(map, thread)
#endif
#if defined(__i386__)
#else /* __x86_64__ */
#define PMAP_SWITCH_CONTEXT(old_th, new_th, my_cpu) { \
- spl_t spl; \
\
- spl = splhigh(); \
+ pmap_assert(ml_get_interrupts_enabled() == FALSE); \
if (old_th->map != new_th->map) { \
PMAP_DEACTIVATE_MAP(old_th->map, old_th); \
PMAP_ACTIVATE_MAP(new_th->map, new_th); \
} \
- splx(spl); \
}
#endif /* __i386__ */
-#ifdef __i386__
+#if NCOPY_WINDOWS > 0
#define PMAP_SWITCH_USER(th, new_map, my_cpu) { \
spl_t spl; \
\
th->map = new_map; \
PMAP_ACTIVATE_MAP(th->map, th); \
splx(spl); \
- inval_copy_windows(th); \
+ inval_copy_windows(th); \
}
#else
#define PMAP_SWITCH_USER(th, new_map, my_cpu) { \
* Marking the current cpu's cr3 inactive is achieved by setting its lsb.
* Marking the current cpu's cr3 active once more involves clearng this bit.
* Note that valid page tables are page-aligned and so the bottom 12 bits
- * are noramlly zero.
+ * are normally zero, modulo PCID.
* We can only mark the current cpu active/inactive but we can test any cpu.
*/
#define CPU_CR3_MARK_INACTIVE() \
*/
#if defined(__x86_64__)
#define MARK_CPU_IDLE(my_cpu) { \
- int s = splhigh(); \
+ assert(ml_get_interrupts_enabled() == FALSE); \
CPU_CR3_MARK_INACTIVE(); \
__asm__ volatile("mfence"); \
- splx(s); \
}
#else /* __i386__ native */
#define MARK_CPU_IDLE(my_cpu) { \
+ assert(ml_get_interrupts_enabled() == FALSE); \
/* \
* Mark this cpu idle, and remove it from the active set, \
* since it is not actively using any pmap. Signal_cpus \
* but will queue the update request for when the cpu \
* becomes active. \
*/ \
- int s = splhigh(); \
if (!cpu_mode_is64bit() || no_shared_cr3) \
process_pmap_updates(); \
else \
pmap_load_kernel_cr3(); \
CPU_CR3_MARK_INACTIVE(); \
__asm__ volatile("mfence"); \
- splx(s); \
}
#endif /* __i386__ */
#define MARK_CPU_ACTIVE(my_cpu) { \
- \
- int s = splhigh(); \
+ assert(ml_get_interrupts_enabled() == FALSE); \
/* \
* If a kernel_pmap update was requested while this cpu \
* was idle, process it as if we got the interrupt. \
\
if (current_cpu_datap()->cpu_tlb_invalid) \
process_pmap_updates(); \
- splx(s); \
}
#define PMAP_CONTEXT(pmap, thread)
#define pmap_attribute_cache_sync(addr,size,attr,value) \
(KERN_INVALID_ADDRESS)
-#define MACHINE_PMAP_IS_EMPTY 1
+#define MACHINE_PMAP_IS_EMPTY 1
extern boolean_t pmap_is_empty(pmap_t pmap,
vm_map_offset_t start,
vm_map_offset_t end);
+#define MACHINE_BOOTSTRAPPTD 1 /* Static bootstrap page-tables */
+
+kern_return_t
+pmap_permissions_verify(pmap_t, vm_map_t, vm_offset_t, vm_offset_t);
#endif /* ASSEMBLER */