*/
#include <string.h>
-#include <norma_vm.h>
#include <mach_kdb.h>
#include <mach_ldebug.h>
#include <i386/mp_desc.h>
-/* #define DEBUGINTERRUPTS 1 uncomment to ensure pmap callers have interrupts enabled */
-#ifdef DEBUGINTERRUPTS
-#define pmap_intr_assert() { \
- if (processor_avail_count > 1 && !ml_get_interrupts_enabled()) \
- panic("pmap interrupt assert %s, %d",__FILE__, __LINE__); \
-}
-#else
-#define pmap_intr_assert()
-#endif
#ifdef IWANTTODEBUG
#undef DEBUG
* Forward declarations for internal functions.
*/
-void pmap_remove_range(
- pmap_t pmap,
- vm_map_offset_t va,
- pt_entry_t *spte,
- pt_entry_t *epte);
void phys_attribute_clear(
ppnum_t phys,
const boolean_t cpu_64bit = TRUE; /* Mais oui! */
-/*
- * when spinning through pmap_remove
- * ensure that we don't spend too much
- * time with preemption disabled.
- * I'm setting the current threshold
- * to 20us
- */
-#define MAX_PREEMPTION_LATENCY_NS 20000
-
uint64_t max_preemption_latency_tsc = 0;
-
-/*
- * Private data structures.
- */
-
-/*
- * For each vm_page_t, there is a list of all currently
- * valid virtual mappings of that page. An entry is
- * a pv_rooted_entry_t; the list is the pv_table.
- *
- * N.B. with the new combo rooted/hashed scheme it is
- * only possibly to remove individual non-rooted entries
- * if they are found via the hashed chains as there is no
- * way to unlink the singly linked hashed entries if navigated to
- * via the queue list off the rooted entries. Think of it as
- * hash/walk/pull, keeping track of the prev pointer while walking
- * the singly linked hash list. All of this is to save memory and
- * keep both types of pv_entries as small as possible.
- */
-
-/*
-
-PV HASHING Changes - JK 1/2007
-
-Pve's establish physical to virtual mappings. These are used for aliasing of a
-physical page to (potentially many) virtual addresses within pmaps. In the
-previous implementation the structure of the pv_entries (each 16 bytes in size) was
-
-typedef struct pv_entry {
- struct pv_entry_t next;
- pmap_t pmap;
- vm_map_offset_t va;
-} *pv_entry_t;
-
-An initial array of these is created at boot time, one per physical page of
-memory, indexed by the physical page number. Additionally, a pool of entries
-is created from a pv_zone to be used as needed by pmap_enter() when it is
-creating new mappings. Originally, we kept this pool around because the code
-in pmap_enter() was unable to block if it needed an entry and none were
-available - we'd panic. Some time ago I restructured the pmap_enter() code
-so that for user pmaps it can block while zalloc'ing a pv structure and restart,
-removing a panic from the code (in the case of the kernel pmap we cannot block
-and still panic, so, we keep a separate hot pool for use only on kernel pmaps).
-The pool has not been removed since there is a large performance gain keeping
-freed pv's around for reuse and not suffering the overhead of zalloc for every
-new pv we need.
-
-As pmap_enter() created new mappings it linked the new pve's for them off the
-fixed pv array for that ppn (off the next pointer). These pve's are accessed
-for several operations, one of them being address space teardown. In that case,
-we basically do this
-
- for (every page/pte in the space) {
- calc pve_ptr from the ppn in the pte
- for (every pv in the list for the ppn) {
- if (this pv is for this pmap/vaddr) {
- do housekeeping
- unlink/free the pv
- }
- }
- }
-
-The problem arose when we were running, say 8000 (or even 2000) apache or
-other processes and one or all terminate. The list hanging off each pv array
-entry could have thousands of entries. We were continuously linearly searching
-each of these lists as we stepped through the address space we were tearing
-down. Because of the locks we hold, likely taking a cache miss for each node,
-and interrupt disabling for MP issues the system became completely unresponsive
-for many seconds while we did this.
-
-Realizing that pve's are accessed in two distinct ways (linearly running the
-list by ppn for operations like pmap_page_protect and finding and
-modifying/removing a single pve as part of pmap_enter processing) has led to
-modifying the pve structures and databases.
-
-There are now two types of pve structures. A "rooted" structure which is
-basically the original structure accessed in an array by ppn, and a ''hashed''
-structure accessed on a hash list via a hash of [pmap, vaddr]. These have been
-designed with the two goals of minimizing wired memory and making the lookup of
-a ppn faster. Since a vast majority of pages in the system are not aliased
-and hence represented by a single pv entry I've kept the rooted entry size as
-small as possible because there is one of these dedicated for every physical
-page of memory. The hashed pve's are larger due to the addition of the hash
-link and the ppn entry needed for matching while running the hash list to find
-the entry we are looking for. This way, only systems that have lots of
-aliasing (like 2000+ httpd procs) will pay the extra memory price. Both
-structures have the same first three fields allowing some simplification in
-the code.
-
-They have these shapes
-
-typedef struct pv_rooted_entry {
- queue_head_t qlink;
- vm_map_offset_t va;
- pmap_t pmap;
-} *pv_rooted_entry_t;
-
-
-typedef struct pv_hashed_entry {
- queue_head_t qlink;
- vm_map_offset_t va;
- pmap_t pmap;
- ppnum_t ppn;
- struct pv_hashed_entry *nexth;
-} *pv_hashed_entry_t;
-
-The main flow difference is that the code is now aware of the rooted entry and
-the hashed entries. Code that runs the pv list still starts with the rooted
-entry and then continues down the qlink onto the hashed entries. Code that is
-looking up a specific pv entry first checks the rooted entry and then hashes
-and runs the hash list for the match. The hash list lengths are much smaller
-than the original pv lists that contained all aliases for the specific ppn.
-
-*/
-
-typedef struct pv_rooted_entry {
- /* first three entries must match pv_hashed_entry_t */
- queue_head_t qlink;
- vm_map_offset_t va; /* virtual address for mapping */
- pmap_t pmap; /* pmap where mapping lies */
-} *pv_rooted_entry_t;
-
-#define PV_ROOTED_ENTRY_NULL ((pv_rooted_entry_t) 0)
-
-pv_rooted_entry_t pv_head_table; /* array of entries, one per page */
-
-typedef struct pv_hashed_entry {
- /* first three entries must match pv_rooted_entry_t */
- queue_head_t qlink;
- vm_map_offset_t va;
- pmap_t pmap;
- ppnum_t ppn;
- struct pv_hashed_entry *nexth;
-} *pv_hashed_entry_t;
-
-#define PV_HASHED_ENTRY_NULL ((pv_hashed_entry_t)0)
-
-#define NPVHASH 4095 /* MUST BE 2^N - 1 */
pv_hashed_entry_t *pv_hash_table; /* hash lists */
uint32_t npvhash = 0;
-//#define PV_DEBUG 1 /* uncomment to enable some PV debugging code */
-#ifdef PV_DEBUG
-#define CHK_NPVHASH() if(0 == npvhash) panic("npvhash uninitialized");
-#else
-#define CHK_NPVHASH(x)
-#endif
-
pv_hashed_entry_t pv_hashed_free_list = PV_HASHED_ENTRY_NULL;
pv_hashed_entry_t pv_hashed_kern_free_list = PV_HASHED_ENTRY_NULL;
decl_simple_lock_data(,pv_hashed_free_list_lock)
int pv_hashed_free_count = 0;
int pv_hashed_kern_free_count = 0;
-#define PV_HASHED_LOW_WATER_MARK 5000
-#define PV_HASHED_KERN_LOW_WATER_MARK 100
-#define PV_HASHED_ALLOC_CHUNK 2000
-#define PV_HASHED_KERN_ALLOC_CHUNK 50
-thread_call_t mapping_adjust_call;
-static thread_call_data_t mapping_adjust_call_data;
-uint32_t mappingrecurse = 0;
-
-#define PV_HASHED_ALLOC(pvh_e) { \
- simple_lock(&pv_hashed_free_list_lock); \
- if ((pvh_e = pv_hashed_free_list) != 0) { \
- pv_hashed_free_list = (pv_hashed_entry_t)pvh_e->qlink.next; \
- pv_hashed_free_count--; \
- if (pv_hashed_free_count < PV_HASHED_LOW_WATER_MARK) \
- if (hw_compare_and_store(0,1,(u_int *)&mappingrecurse)) \
- thread_call_enter(mapping_adjust_call); \
- } \
- simple_unlock(&pv_hashed_free_list_lock); \
-}
-
-#define PV_HASHED_FREE_LIST(pvh_eh, pvh_et, pv_cnt) { \
- simple_lock(&pv_hashed_free_list_lock); \
- pvh_et->qlink.next = (queue_entry_t)pv_hashed_free_list; \
- pv_hashed_free_list = pvh_eh; \
- pv_hashed_free_count += pv_cnt; \
- simple_unlock(&pv_hashed_free_list_lock); \
-}
-
-#define PV_HASHED_KERN_ALLOC(pvh_e) { \
- simple_lock(&pv_hashed_kern_free_list_lock); \
- if ((pvh_e = pv_hashed_kern_free_list) != 0) { \
- pv_hashed_kern_free_list = (pv_hashed_entry_t)pvh_e->qlink.next; \
- pv_hashed_kern_free_count--; \
- if (pv_hashed_kern_free_count < PV_HASHED_KERN_LOW_WATER_MARK)\
- if (hw_compare_and_store(0,1,(u_int *)&mappingrecurse)) \
- thread_call_enter(mapping_adjust_call); \
- } \
- simple_unlock(&pv_hashed_kern_free_list_lock); \
-}
-#define PV_HASHED_KERN_FREE_LIST(pvh_eh, pvh_et, pv_cnt) { \
- simple_lock(&pv_hashed_kern_free_list_lock); \
- pvh_et->qlink.next = (queue_entry_t)pv_hashed_kern_free_list; \
- pv_hashed_kern_free_list = pvh_eh; \
- pv_hashed_kern_free_count += pv_cnt; \
- simple_unlock(&pv_hashed_kern_free_list_lock); \
-}
zone_t pv_hashed_list_zone; /* zone of pv_hashed_entry structures */
*/
char *pv_lock_table; /* pointer to array of bits */
-#define pv_lock_table_size(n) (((n)+BYTE_SIZE-1)/BYTE_SIZE)
+
char *pv_hash_lock_table;
-#define pv_hash_lock_table_size(n) (((n)+BYTE_SIZE-1)/BYTE_SIZE)
+
/*
* First and last physical addresses that we maintain any information
static struct vm_object kpdptobj_object_store;
/*
- * Index into pv_head table, its lock bits, and the modify/reference and managed bits
- */
-
-#define pa_index(pa) (i386_btop(pa))
-#define ppn_to_pai(ppn) ((int)ppn)
-
-#define pai_to_pvh(pai) (&pv_head_table[pai])
-#define lock_pvh_pai(pai) bit_lock(pai, (void *)pv_lock_table)
-#define unlock_pvh_pai(pai) bit_unlock(pai, (void *)pv_lock_table)
-
-static inline uint32_t
-pvhashidx(pmap_t pmap, vm_offset_t va)
-{
- return ((uint32_t)(uint64_t)pmap ^
- ((uint32_t)((uint64_t)va >> PAGE_SHIFT) & 0xFFFFFFFF)) &
- npvhash;
-}
-#define pvhash(idx) (&pv_hash_table[idx])
-
-#define lock_hash_hash(hash) bit_lock(hash, (void *)pv_hash_lock_table)
-#define unlock_hash_hash(hash) bit_unlock(hash, (void *)pv_hash_lock_table)
-
-/*
- * Array of physical page attribites for managed pages.
+ * Array of physical page attributes for managed pages.
* One byte per physical page.
*/
char *pmap_phys_attributes;
unsigned int last_managed_page = 0;
-#define IS_MANAGED_PAGE(x) \
- ((unsigned int)(x) <= last_managed_page && \
- (pmap_phys_attributes[x] & PHYS_MANAGED))
-
-/*
- * Physical page attributes. Copy bits from PTE definition.
- */
-#define PHYS_MODIFIED INTEL_PTE_MOD /* page modified */
-#define PHYS_REFERENCED INTEL_PTE_REF /* page referenced */
-#define PHYS_MANAGED INTEL_PTE_VALID /* page is managed */
-
-/*
- * Amount of virtual memory mapped by one
- * page-directory entry.
- */
-#define PDE_MAPPED_SIZE (pdetova(1))
uint64_t pde_mapped_size = PDE_MAPPED_SIZE;
-/*
- * Locking and TLB invalidation
- */
-
-/*
- * Locking Protocols: (changed 2/2007 JK)
- *
- * There are two structures in the pmap module that need locking:
- * the pmaps themselves, and the per-page pv_lists (which are locked
- * by locking the pv_lock_table entry that corresponds to the pv_head
- * for the list in question.) Most routines want to lock a pmap and
- * then do operations in it that require pv_list locking -- however
- * pmap_remove_all and pmap_copy_on_write operate on a physical page
- * basis and want to do the locking in the reverse order, i.e. lock
- * a pv_list and then go through all the pmaps referenced by that list.
- *
- * The system wide pmap lock has been removed. Now, paths take a lock
- * on the pmap before changing its 'shape' and the reverse order lockers
- * (coming in by phys ppn) take a lock on the corresponding pv and then
- * retest to be sure nothing changed during the window before they locked
- * and can then run up/down the pv lists holding the list lock. This also
- * lets the pmap layer run (nearly completely) interrupt enabled, unlike
- * previously.
- */
-
-/*
- * PV locking
- */
-
-#define LOCK_PVH(index) { \
- mp_disable_preemption(); \
- lock_pvh_pai(index); \
-}
-
-#define UNLOCK_PVH(index) { \
- unlock_pvh_pai(index); \
- mp_enable_preemption(); \
-}
-/*
- * PV hash locking
- */
-
-#define LOCK_PV_HASH(hash) lock_hash_hash(hash)
-#define UNLOCK_PV_HASH(hash) unlock_hash_hash(hash)
-
unsigned pmap_memory_region_count;
unsigned pmap_memory_region_current;
struct zone *pmap_zone; /* zone of pmap structures */
-int pmap_debug = 0; /* flag for debugging prints */
-
unsigned int inuse_ptepages_count = 0;
addr64_t kernel64_cr3;
pt_entry_t *DMAP1, *DMAP2;
caddr_t DADDR1;
caddr_t DADDR2;
-
-/*
- * unlinks the pv_hashed_entry_t pvh from the singly linked hash chain.
- * properly deals with the anchor.
- * must be called with the hash locked, does not unlock it
- */
-
-static inline void
-pmap_pvh_unlink(pv_hashed_entry_t pvh)
-{
- pv_hashed_entry_t curh;
- pv_hashed_entry_t *pprevh;
- int pvhash_idx;
-
- CHK_NPVHASH();
- pvhash_idx = pvhashidx(pvh->pmap, pvh->va);
-
- pprevh = pvhash(pvhash_idx);
-
-#if PV_DEBUG
- if (NULL == *pprevh)
- panic("pvh_unlink null anchor"); /* JK DEBUG */
-#endif
- curh = *pprevh;
-
- while (PV_HASHED_ENTRY_NULL != curh) {
- if (pvh == curh)
- break;
- pprevh = &curh->nexth;
- curh = curh->nexth;
- }
- if (PV_HASHED_ENTRY_NULL == curh) panic("pmap_pvh_unlink no pvh");
- *pprevh = pvh->nexth;
- return;
-}
-
-static inline void
-pv_hash_add(pv_hashed_entry_t pvh_e,
- pv_rooted_entry_t pv_h)
-{
- pv_hashed_entry_t *hashp;
- int pvhash_idx;
-
- CHK_NPVHASH();
- pvhash_idx = pvhashidx(pvh_e->pmap, pvh_e->va);
- LOCK_PV_HASH(pvhash_idx);
- insque(&pvh_e->qlink, &pv_h->qlink);
- hashp = pvhash(pvhash_idx);
-#if PV_DEBUG
- if (NULL==hashp)
- panic("pv_hash_add(%p) null hash bucket", pvh_e);
-#endif
- pvh_e->nexth = *hashp;
- *hashp = pvh_e;
- UNLOCK_PV_HASH(pvhash_idx);
-}
-
-static inline void
-pv_hash_remove(pv_hashed_entry_t pvh_e)
-{
- int pvhash_idx;
-
- CHK_NPVHASH();
- pvhash_idx = pvhashidx(pvh_e->pmap,pvh_e->va);
- LOCK_PV_HASH(pvhash_idx);
- remque(&pvh_e->qlink);
- pmap_pvh_unlink(pvh_e);
- UNLOCK_PV_HASH(pvhash_idx);
-}
-
-/*
- * Remove pv list entry.
- * Called with pv_head_table entry locked.
- * Returns pv entry to be freed (or NULL).
- */
-static inline pv_hashed_entry_t
-pmap_pv_remove(pmap_t pmap,
- vm_map_offset_t vaddr,
- ppnum_t ppn)
-{
- pv_hashed_entry_t pvh_e;
- pv_rooted_entry_t pv_h;
- pv_hashed_entry_t *pprevh;
- int pvhash_idx;
- uint32_t pv_cnt;
-
- pvh_e = PV_HASHED_ENTRY_NULL;
- pv_h = pai_to_pvh(ppn_to_pai(ppn));
- if (pv_h->pmap == PMAP_NULL)
- panic("pmap_pv_remove(%p,%llu,%u): null pv_list!",
- pmap, vaddr, ppn);
-
- if (pv_h->va == vaddr && pv_h->pmap == pmap) {
- /*
- * Header is the pv_rooted_entry.
- * We can't free that. If there is a queued
- * entry after this one we remove that
- * from the ppn queue, we remove it from the hash chain
- * and copy it to the rooted entry. Then free it instead.
- */
- pvh_e = (pv_hashed_entry_t) queue_next(&pv_h->qlink);
- if (pv_h != (pv_rooted_entry_t) pvh_e) {
- /*
- * Entry queued to root, remove this from hash
- * and install as nem root.
- */
- CHK_NPVHASH();
- pvhash_idx = pvhashidx(pvh_e->pmap, pvh_e->va);
- LOCK_PV_HASH(pvhash_idx);
- remque(&pvh_e->qlink);
- pprevh = pvhash(pvhash_idx);
- if (PV_HASHED_ENTRY_NULL == *pprevh) {
- panic("pmap_pv_remove(%p,%llu,%u): "
- "empty hash, removing rooted",
- pmap, vaddr, ppn);
- }
- pmap_pvh_unlink(pvh_e);
- UNLOCK_PV_HASH(pvhash_idx);
- pv_h->pmap = pvh_e->pmap;
- pv_h->va = pvh_e->va; /* dispose of pvh_e */
- } else {
- /* none queued after rooted */
- pv_h->pmap = PMAP_NULL;
- pvh_e = PV_HASHED_ENTRY_NULL;
- }
- } else {
- /*
- * not removing rooted pv. find it on hash chain, remove from
- * ppn queue and hash chain and free it
- */
- CHK_NPVHASH();
- pvhash_idx = pvhashidx(pmap, vaddr);
- LOCK_PV_HASH(pvhash_idx);
- pprevh = pvhash(pvhash_idx);
- if (PV_HASHED_ENTRY_NULL == *pprevh) {
- panic("pmap_pv_remove(%p,%llu,%u): empty hash",
- pmap, vaddr, ppn);
- }
- pvh_e = *pprevh;
- pmap_pv_hashlist_walks++;
- pv_cnt = 0;
- while (PV_HASHED_ENTRY_NULL != pvh_e) {
- pv_cnt++;
- if (pvh_e->pmap == pmap &&
- pvh_e->va == vaddr &&
- pvh_e->ppn == ppn)
- break;
- pprevh = &pvh_e->nexth;
- pvh_e = pvh_e->nexth;
- }
- if (PV_HASHED_ENTRY_NULL == pvh_e)
- panic("pmap_pv_remove(%p,%llu,%u): pv not on hash",
- pmap, vaddr, ppn);
- pmap_pv_hashlist_cnts += pv_cnt;
- if (pmap_pv_hashlist_max < pv_cnt)
- pmap_pv_hashlist_max = pv_cnt;
- *pprevh = pvh_e->nexth;
- remque(&pvh_e->qlink);
- UNLOCK_PV_HASH(pvhash_idx);
- }
-
- return pvh_e;
-}
-
/*
* for legacy, returns the address of the pde entry.
* for 64 bit, causes the pdpt page containing the pde entry to be mapped,
}
}
-/*
- * Remove a range of hardware page-table entries.
- * The entries given are the first (inclusive)
- * and last (exclusive) entries for the VM pages.
- * The virtual address is the va for the first pte.
- *
- * The pmap must be locked.
- * If the pmap is not the kernel pmap, the range must lie
- * entirely within one pte-page. This is NOT checked.
- * Assumes that the pte-page exists.
- */
-
-void
-pmap_remove_range(
- pmap_t pmap,
- vm_map_offset_t start_vaddr,
- pt_entry_t *spte,
- pt_entry_t *epte)
-{
- pt_entry_t *cpte;
- pv_hashed_entry_t pvh_et = PV_HASHED_ENTRY_NULL;
- pv_hashed_entry_t pvh_eh = PV_HASHED_ENTRY_NULL;
- pv_hashed_entry_t pvh_e;
- int pvh_cnt = 0;
- int num_removed, num_unwired, num_found;
- int pai;
- pmap_paddr_t pa;
- vm_map_offset_t vaddr;
-
- num_removed = 0;
- num_unwired = 0;
- num_found = 0;
-
- /* invalidate the PTEs first to "freeze" them */
- for (cpte = spte, vaddr = start_vaddr;
- cpte < epte;
- cpte++, vaddr += PAGE_SIZE_64) {
-
- pa = pte_to_pa(*cpte);
- if (pa == 0)
- continue;
- num_found++;
-
- if (iswired(*cpte))
- num_unwired++;
-
- pai = pa_index(pa);
-
- if (!IS_MANAGED_PAGE(pai)) {
- /*
- * Outside range of managed physical memory.
- * Just remove the mappings.
- */
- pmap_store_pte(cpte, 0);
- continue;
- }
-
- /* invalidate the PTE */
- pmap_update_pte(cpte, *cpte, (*cpte & ~INTEL_PTE_VALID));
- }
-
- if (num_found == 0) {
- /* nothing was changed: we're done */
- goto update_counts;
- }
-
- /* propagate the invalidates to other CPUs */
-
- PMAP_UPDATE_TLBS(pmap, start_vaddr, vaddr);
-
- for (cpte = spte, vaddr = start_vaddr;
- cpte < epte;
- cpte++, vaddr += PAGE_SIZE_64) {
-
- pa = pte_to_pa(*cpte);
- if (pa == 0)
- continue;
-
- pai = pa_index(pa);
-
- LOCK_PVH(pai);
-
- pa = pte_to_pa(*cpte);
- if (pa == 0) {
- UNLOCK_PVH(pai);
- continue;
- }
- num_removed++;
-
- /*
- * Get the modify and reference bits, then
- * nuke the entry in the page table
- */
- /* remember reference and change */
- pmap_phys_attributes[pai] |=
- (char) (*cpte & (PHYS_MODIFIED | PHYS_REFERENCED));
- /* completely invalidate the PTE */
- pmap_store_pte(cpte, 0);
-
- /*
- * Remove the mapping from the pvlist for this physical page.
- */
- pvh_e = pmap_pv_remove(pmap, vaddr, (ppnum_t) pai);
-
- UNLOCK_PVH(pai);
-
- if (pvh_e != PV_HASHED_ENTRY_NULL) {
- pvh_e->qlink.next = (queue_entry_t) pvh_eh;
- pvh_eh = pvh_e;
-
- if (pvh_et == PV_HASHED_ENTRY_NULL) {
- pvh_et = pvh_e;
- }
- pvh_cnt++;
- }
- } /* for loop */
-
- if (pvh_eh != PV_HASHED_ENTRY_NULL) {
- PV_HASHED_FREE_LIST(pvh_eh, pvh_et, pvh_cnt);
- }
-update_counts:
- /*
- * Update the counts
- */
-#if TESTING
- if (pmap->stats.resident_count < num_removed)
- panic("pmap_remove_range: resident_count");
-#endif
- assert(pmap->stats.resident_count >= num_removed);
- OSAddAtomic(-num_removed, &pmap->stats.resident_count);
-
-#if TESTING
- if (pmap->stats.wired_count < num_unwired)
- panic("pmap_remove_range: wired_count");
-#endif
- assert(pmap->stats.wired_count >= num_unwired);
- OSAddAtomic(-num_unwired, &pmap->stats.wired_count);
-
- return;
-}
-
/*
* Remove phys addr if mapped in specified map
*
}
-/*
- * Remove the given range of addresses
- * from the specified map.
- *
- * It is assumed that the start and end are properly
- * rounded to the hardware page size.
- */
-void
-pmap_remove(
- pmap_t map,
- addr64_t s64,
- addr64_t e64)
-{
- pt_entry_t *pde;
- pt_entry_t *spte, *epte;
- addr64_t l64;
- uint64_t deadline;
-
- pmap_intr_assert();
-
- if (map == PMAP_NULL || s64 == e64)
- return;
-
- PMAP_TRACE(PMAP_CODE(PMAP__REMOVE) | DBG_FUNC_START,
- map,
- (uint32_t) (s64 >> 32), s64,
- (uint32_t) (e64 >> 32), e64);
-
-
- PMAP_LOCK(map);
-
-#if 0
- /*
- * Check that address range in the kernel does not overlap the stacks.
- * We initialize local static min/max variables once to avoid making
- * 2 function calls for every remove. Note also that these functions
- * both return 0 before kernel stacks have been initialized, and hence
- * the panic is not triggered in this case.
- */
- if (map == kernel_pmap) {
- static vm_offset_t kernel_stack_min = 0;
- static vm_offset_t kernel_stack_max = 0;
-
- if (kernel_stack_min == 0) {
- kernel_stack_min = min_valid_stack_address();
- kernel_stack_max = max_valid_stack_address();
- }
- if ((kernel_stack_min <= s64 && s64 < kernel_stack_max) ||
- (kernel_stack_min < e64 && e64 <= kernel_stack_max))
- panic("pmap_remove() attempted in kernel stack");
- }
-#else
-
- /*
- * The values of kernel_stack_min and kernel_stack_max are no longer
- * relevant now that we allocate kernel stacks in the kernel map,
- * so the old code above no longer applies. If we wanted to check that
- * we weren't removing a mapping of a page in a kernel stack we'd
- * mark the PTE with an unused bit and check that here.
- */
-
-#endif
-
- deadline = rdtsc64() + max_preemption_latency_tsc;
-
- while (s64 < e64) {
- l64 = (s64 + pde_mapped_size) & ~(pde_mapped_size - 1);
- if (l64 > e64)
- l64 = e64;
- pde = pmap_pde(map, s64);
-
- if (pde && (*pde & INTEL_PTE_VALID)) {
- if (*pde & INTEL_PTE_PS) {
- /*
- * If we're removing a superpage, pmap_remove_range()
- * must work on level 2 instead of level 1; and we're
- * only passing a single level 2 entry instead of a
- * level 1 range.
- */
- spte = pde;
- epte = spte+1; /* excluded */
- } else {
- spte = pmap_pte(map, (s64 & ~(pde_mapped_size - 1)));
- spte = &spte[ptenum(s64)];
- epte = &spte[intel_btop(l64 - s64)];
- }
- pmap_remove_range(map, s64, spte, epte);
- }
- s64 = l64;
- pde++;
-
- if (s64 < e64 && rdtsc64() >= deadline) {
- PMAP_UNLOCK(map)
- PMAP_LOCK(map)
- deadline = rdtsc64() + max_preemption_latency_tsc;
- }
- }
-
- PMAP_UNLOCK(map);
-
- PMAP_TRACE(PMAP_CODE(PMAP__REMOVE) | DBG_FUNC_END,
- map, 0, 0, 0, 0);
-
-}
-
-/*
- * Routine: pmap_page_protect
- *
- * Function:
- * Lower the permission for all mappings to a given
- * page.
- */
-void
-pmap_page_protect(
- ppnum_t pn,
- vm_prot_t prot)
-{
- pv_hashed_entry_t pvh_eh = PV_HASHED_ENTRY_NULL;
- pv_hashed_entry_t pvh_et = PV_HASHED_ENTRY_NULL;
- pv_hashed_entry_t nexth;
- int pvh_cnt = 0;
- pv_rooted_entry_t pv_h;
- pv_rooted_entry_t pv_e;
- pv_hashed_entry_t pvh_e;
- pt_entry_t *pte;
- int pai;
- pmap_t pmap;
- boolean_t remove;
-
- pmap_intr_assert();
- assert(pn != vm_page_fictitious_addr);
- if (pn == vm_page_guard_addr)
- return;
-
- pai = ppn_to_pai(pn);
-
- if (!IS_MANAGED_PAGE(pai)) {
- /*
- * Not a managed page.
- */
- return;
- }
- PMAP_TRACE(PMAP_CODE(PMAP__PAGE_PROTECT) | DBG_FUNC_START,
- pn, prot, 0, 0, 0);
-
- /*
- * Determine the new protection.
- */
- switch (prot) {
- case VM_PROT_READ:
- case VM_PROT_READ | VM_PROT_EXECUTE:
- remove = FALSE;
- break;
- case VM_PROT_ALL:
- return; /* nothing to do */
- default:
- remove = TRUE;
- break;
- }
-
- pv_h = pai_to_pvh(pai);
-
- LOCK_PVH(pai);
-
-
- /*
- * Walk down PV list, if any, changing or removing all mappings.
- */
- if (pv_h->pmap == PMAP_NULL)
- goto done;
-
- pv_e = pv_h;
- pvh_e = (pv_hashed_entry_t) pv_e; /* cheat */
-
- do {
- vm_map_offset_t vaddr;
-
- pmap = pv_e->pmap;
- vaddr = pv_e->va;
- pte = pmap_pte(pmap, vaddr);
- if (0 == pte) {
- panic("pmap_page_protect() "
- "pmap=%p pn=0x%x vaddr=0x%llx\n",
- pmap, pn, vaddr);
- }
- nexth = (pv_hashed_entry_t) queue_next(&pvh_e->qlink);
-
- /*
- * Remove the mapping if new protection is NONE
- * or if write-protecting a kernel mapping.
- */
- if (remove || pmap == kernel_pmap) {
- /*
- * Remove the mapping, collecting dirty bits.
- */
- pmap_update_pte(pte, *pte, *pte & ~INTEL_PTE_VALID);
- PMAP_UPDATE_TLBS(pmap, vaddr, vaddr+PAGE_SIZE);
- pmap_phys_attributes[pai] |=
- *pte & (PHYS_MODIFIED|PHYS_REFERENCED);
- pmap_store_pte(pte, 0);
-
-#if TESTING
- if (pmap->stats.resident_count < 1)
- panic("pmap_page_protect: resident_count");
-#endif
- assert(pmap->stats.resident_count >= 1);
- OSAddAtomic(-1, &pmap->stats.resident_count);
-
- /*
- * Deal with the pv_rooted_entry.
- */
-
- if (pv_e == pv_h) {
- /*
- * Fix up head later.
- */
- pv_h->pmap = PMAP_NULL;
- } else {
- /*
- * Delete this entry.
- */
- pv_hash_remove(pvh_e);
- pvh_e->qlink.next = (queue_entry_t) pvh_eh;
- pvh_eh = pvh_e;
-
- if (pvh_et == PV_HASHED_ENTRY_NULL)
- pvh_et = pvh_e;
- pvh_cnt++;
- }
- } else {
- /*
- * Write-protect.
- */
- pmap_update_pte(pte, *pte, *pte & ~INTEL_PTE_WRITE);
- PMAP_UPDATE_TLBS(pmap, vaddr, vaddr+PAGE_SIZE);
- }
- pvh_e = nexth;
- } while ((pv_e = (pv_rooted_entry_t) nexth) != pv_h);
-
-
- /*
- * If pv_head mapping was removed, fix it up.
- */
- if (pv_h->pmap == PMAP_NULL) {
- pvh_e = (pv_hashed_entry_t) queue_next(&pv_h->qlink);
-
- if (pvh_e != (pv_hashed_entry_t) pv_h) {
- pv_hash_remove(pvh_e);
- pv_h->pmap = pvh_e->pmap;
- pv_h->va = pvh_e->va;
- pvh_e->qlink.next = (queue_entry_t) pvh_eh;
- pvh_eh = pvh_e;
-
- if (pvh_et == PV_HASHED_ENTRY_NULL)
- pvh_et = pvh_e;
- pvh_cnt++;
- }
- }
- if (pvh_eh != PV_HASHED_ENTRY_NULL) {
- PV_HASHED_FREE_LIST(pvh_eh, pvh_et, pvh_cnt);
- }
-done:
- UNLOCK_PVH(pai);
-
- PMAP_TRACE(PMAP_CODE(PMAP__PAGE_PROTECT) | DBG_FUNC_END,
- 0, 0, 0, 0, 0);
-}
-
/*
* Routine:
}
}
-
-/*
- * Insert the given physical page (p) at
- * the specified virtual address (v) in the
- * target physical map with the protection requested.
- *
- * If specified, the page will be wired down, meaning
- * that the related pte cannot be reclaimed.
- *
- * NB: This is the only routine which MAY NOT lazy-evaluate
- * or lose information. That is, this routine must actually
- * insert this page into the given map NOW.
- */
-void
-pmap_enter(
- register pmap_t pmap,
- vm_map_offset_t vaddr,
- ppnum_t pn,
- vm_prot_t prot,
- unsigned int flags,
- boolean_t wired)
-{
- pt_entry_t *pte;
- pv_rooted_entry_t pv_h;
- int pai;
- pv_hashed_entry_t pvh_e;
- pv_hashed_entry_t pvh_new;
- pt_entry_t template;
- pmap_paddr_t old_pa;
- pmap_paddr_t pa = (pmap_paddr_t) i386_ptob(pn);
- boolean_t need_tlbflush = FALSE;
- boolean_t set_NX;
- char oattr;
- boolean_t old_pa_locked;
- boolean_t superpage = flags & VM_MEM_SUPERPAGE;
- vm_object_t delpage_pm_obj = NULL;
- int delpage_pde_index = 0;
-
-
- pmap_intr_assert();
- assert(pn != vm_page_fictitious_addr);
- if (pmap_debug)
- kprintf("pmap_enter(%p,%llu,%u)\n", pmap, vaddr, pn);
- if (pmap == PMAP_NULL)
- return;
- if (pn == vm_page_guard_addr)
- return;
-
- PMAP_TRACE(PMAP_CODE(PMAP__ENTER) | DBG_FUNC_START,
- pmap,
- (uint32_t) (vaddr >> 32), (uint32_t) vaddr,
- pn, prot);
-
- if ((prot & VM_PROT_EXECUTE) || !nx_enabled || !pmap->nx_enabled)
- set_NX = FALSE;
- else
- set_NX = TRUE;
-
- /*
- * Must allocate a new pvlist entry while we're unlocked;
- * zalloc may cause pageout (which will lock the pmap system).
- * If we determine we need a pvlist entry, we will unlock
- * and allocate one. Then we will retry, throughing away
- * the allocated entry later (if we no longer need it).
- */
-
- pvh_new = PV_HASHED_ENTRY_NULL;
-Retry:
- pvh_e = PV_HASHED_ENTRY_NULL;
-
- PMAP_LOCK(pmap);
-
- /*
- * Expand pmap to include this pte. Assume that
- * pmap is always expanded to include enough hardware
- * pages to map one VM page.
- */
- if(superpage) {
- while ((pte = pmap64_pde(pmap, vaddr)) == PD_ENTRY_NULL) {
- /* need room for another pde entry */
- PMAP_UNLOCK(pmap);
- pmap_expand_pdpt(pmap, vaddr);
- PMAP_LOCK(pmap);
- }
- } else {
- while ((pte = pmap_pte(pmap, vaddr)) == PT_ENTRY_NULL) {
- /*
- * Must unlock to expand the pmap
- * going to grow pde level page(s)
- */
- PMAP_UNLOCK(pmap);
- pmap_expand(pmap, vaddr);
- PMAP_LOCK(pmap);
- }
- }
-
- if (superpage && *pte && !(*pte & INTEL_PTE_PS)) {
- /*
- * There is still an empty page table mapped that
- * was used for a previous base page mapping.
- * Remember the PDE and the PDE index, so that we
- * can free the page at the end of this function.
- */
- delpage_pde_index = (int)pdeidx(pmap, vaddr);
- delpage_pm_obj = pmap->pm_obj;
- *pte = 0;
- }
-
- old_pa = pte_to_pa(*pte);
- pai = pa_index(old_pa);
- old_pa_locked = FALSE;
-
- /*
- * if we have a previous managed page, lock the pv entry now. after
- * we lock it, check to see if someone beat us to the lock and if so
- * drop the lock
- */
- if ((0 != old_pa) && IS_MANAGED_PAGE(pai)) {
- LOCK_PVH(pai);
- old_pa_locked = TRUE;
- old_pa = pte_to_pa(*pte);
- if (0 == old_pa) {
- UNLOCK_PVH(pai); /* another path beat us to it */
- old_pa_locked = FALSE;
- }
- }
-
- /*
- * Special case if the incoming physical page is already mapped
- * at this address.
- */
- if (old_pa == pa) {
-
- /*
- * May be changing its wired attribute or protection
- */
-
- template = pa_to_pte(pa) | INTEL_PTE_VALID;
-
- if (VM_MEM_NOT_CACHEABLE ==
- (flags & (VM_MEM_NOT_CACHEABLE | VM_WIMG_USE_DEFAULT))) {
- if (!(flags & VM_MEM_GUARDED))
- template |= INTEL_PTE_PTA;
- template |= INTEL_PTE_NCACHE;
- }
- if (pmap != kernel_pmap)
- template |= INTEL_PTE_USER;
- if (prot & VM_PROT_WRITE)
- template |= INTEL_PTE_WRITE;
-
- if (set_NX)
- template |= INTEL_PTE_NX;
-
- if (wired) {
- template |= INTEL_PTE_WIRED;
- if (!iswired(*pte))
- OSAddAtomic(+1,
- &pmap->stats.wired_count);
- } else {
- if (iswired(*pte)) {
- assert(pmap->stats.wired_count >= 1);
- OSAddAtomic(-1,
- &pmap->stats.wired_count);
- }
- }
- if (superpage) /* this path can not be used */
- template |= INTEL_PTE_PS; /* to change the page size! */
-
- /* store modified PTE and preserve RC bits */
- pmap_update_pte(pte, *pte,
- template | (*pte & (INTEL_PTE_REF | INTEL_PTE_MOD)));
- if (old_pa_locked) {
- UNLOCK_PVH(pai);
- old_pa_locked = FALSE;
- }
- need_tlbflush = TRUE;
- goto Done;
- }
-
- /*
- * Outline of code from here:
- * 1) If va was mapped, update TLBs, remove the mapping
- * and remove old pvlist entry.
- * 2) Add pvlist entry for new mapping
- * 3) Enter new mapping.
- *
- * If the old physical page is not managed step 1) is skipped
- * (except for updating the TLBs), and the mapping is
- * overwritten at step 3). If the new physical page is not
- * managed, step 2) is skipped.
- */
-
- if (old_pa != (pmap_paddr_t) 0) {
-
- /*
- * Don't do anything to pages outside valid memory here.
- * Instead convince the code that enters a new mapping
- * to overwrite the old one.
- */
-
- /* invalidate the PTE */
- pmap_update_pte(pte, *pte, (*pte & ~INTEL_PTE_VALID));
- /* propagate invalidate everywhere */
- PMAP_UPDATE_TLBS(pmap, vaddr, vaddr + PAGE_SIZE);
- /* remember reference and change */
- oattr = (char) (*pte & (PHYS_MODIFIED | PHYS_REFERENCED));
- /* completely invalidate the PTE */
- pmap_store_pte(pte, 0);
-
- if (IS_MANAGED_PAGE(pai)) {
-#if TESTING
- if (pmap->stats.resident_count < 1)
- panic("pmap_enter: resident_count");
-#endif
- assert(pmap->stats.resident_count >= 1);
- OSAddAtomic(-1,
- &pmap->stats.resident_count);
-
- if (iswired(*pte)) {
-#if TESTING
- if (pmap->stats.wired_count < 1)
- panic("pmap_enter: wired_count");
-#endif
- assert(pmap->stats.wired_count >= 1);
- OSAddAtomic(-1,
- &pmap->stats.wired_count);
- }
- pmap_phys_attributes[pai] |= oattr;
-
- /*
- * Remove the mapping from the pvlist for
- * this physical page.
- * We'll end up with either a rooted pv or a
- * hashed pv
- */
- pvh_e = pmap_pv_remove(pmap, vaddr, (ppnum_t) pai);
-
- } else {
-
- /*
- * old_pa is not managed.
- * Do removal part of accounting.
- */
-
- if (iswired(*pte)) {
- assert(pmap->stats.wired_count >= 1);
- OSAddAtomic(-1,
- &pmap->stats.wired_count);
- }
- }
- }
-
- /*
- * if we had a previously managed paged locked, unlock it now
- */
- if (old_pa_locked) {
- UNLOCK_PVH(pai);
- old_pa_locked = FALSE;
- }
-
- pai = pa_index(pa); /* now working with new incoming phys page */
- if (IS_MANAGED_PAGE(pai)) {
-
- /*
- * Step 2) Enter the mapping in the PV list for this
- * physical page.
- */
- pv_h = pai_to_pvh(pai);
-
- LOCK_PVH(pai);
-
- if (pv_h->pmap == PMAP_NULL) {
- /*
- * No mappings yet, use rooted pv
- */
- pv_h->va = vaddr;
- pv_h->pmap = pmap;
- queue_init(&pv_h->qlink);
- } else {
- /*
- * Add new pv_hashed_entry after header.
- */
- if ((PV_HASHED_ENTRY_NULL == pvh_e) && pvh_new) {
- pvh_e = pvh_new;
- pvh_new = PV_HASHED_ENTRY_NULL;
- } else if (PV_HASHED_ENTRY_NULL == pvh_e) {
- PV_HASHED_ALLOC(pvh_e);
- if (PV_HASHED_ENTRY_NULL == pvh_e) {
- /*
- * the pv list is empty. if we are on
- * the kernel pmap we'll use one of
- * the special private kernel pv_e's,
- * else, we need to unlock
- * everything, zalloc a pv_e, and
- * restart bringing in the pv_e with
- * us.
- */
- if (kernel_pmap == pmap) {
- PV_HASHED_KERN_ALLOC(pvh_e);
- } else {
- UNLOCK_PVH(pai);
- PMAP_UNLOCK(pmap);
- pvh_new = (pv_hashed_entry_t) zalloc(pv_hashed_list_zone);
- goto Retry;
- }
- }
- }
- if (PV_HASHED_ENTRY_NULL == pvh_e)
- panic("pvh_e exhaustion");
-
- pvh_e->va = vaddr;
- pvh_e->pmap = pmap;
- pvh_e->ppn = pn;
- pv_hash_add(pvh_e, pv_h);
-
- /*
- * Remember that we used the pvlist entry.
- */
- pvh_e = PV_HASHED_ENTRY_NULL;
- }
-
- /*
- * only count the mapping
- * for 'managed memory'
- */
- OSAddAtomic(+1, & pmap->stats.resident_count);
- if (pmap->stats.resident_count > pmap->stats.resident_max) {
- pmap->stats.resident_max = pmap->stats.resident_count;
- }
- }
- /*
- * Step 3) Enter the mapping.
- *
- * Build a template to speed up entering -
- * only the pfn changes.
- */
- template = pa_to_pte(pa) | INTEL_PTE_VALID;
-
- if (flags & VM_MEM_NOT_CACHEABLE) {
- if (!(flags & VM_MEM_GUARDED))
- template |= INTEL_PTE_PTA;
- template |= INTEL_PTE_NCACHE;
- }
- if (pmap != kernel_pmap)
- template |= INTEL_PTE_USER;
- if (prot & VM_PROT_WRITE)
- template |= INTEL_PTE_WRITE;
- if (set_NX)
- template |= INTEL_PTE_NX;
- if (wired) {
- template |= INTEL_PTE_WIRED;
- OSAddAtomic(+1, & pmap->stats.wired_count);
- }
- if (superpage)
- template |= INTEL_PTE_PS;
- pmap_store_pte(pte, template);
-
- /*
- * if this was a managed page we delayed unlocking the pv until here
- * to prevent pmap_page_protect et al from finding it until the pte
- * has been stored
- */
- if (IS_MANAGED_PAGE(pai)) {
- UNLOCK_PVH(pai);
- }
-Done:
- if (need_tlbflush == TRUE)
- PMAP_UPDATE_TLBS(pmap, vaddr, vaddr + PAGE_SIZE);
-
- if (pvh_e != PV_HASHED_ENTRY_NULL) {
- PV_HASHED_FREE_LIST(pvh_e, pvh_e, 1);
- }
- if (pvh_new != PV_HASHED_ENTRY_NULL) {
- PV_HASHED_KERN_FREE_LIST(pvh_new, pvh_new, 1);
- }
- PMAP_UNLOCK(pmap);
-
- if (delpage_pm_obj) {
- vm_page_t m;
-
- vm_object_lock(delpage_pm_obj);
- m = vm_page_lookup(delpage_pm_obj, delpage_pde_index);
- if (m == VM_PAGE_NULL)
- panic("pmap_enter: pte page not in object");
- VM_PAGE_FREE(m);
- OSAddAtomic(-1, &inuse_ptepages_count);
- vm_object_unlock(delpage_pm_obj);
- }
-
- PMAP_TRACE(PMAP_CODE(PMAP__ENTER) | DBG_FUNC_END, 0, 0, 0, 0, 0);
-}
-
/*
* Routine: pmap_change_wiring
* Function: Change the wiring attribute for a map/virtual-address
return TRUE;
}
-void
-mapping_free_prime(void)
-{
- int i;
- pv_hashed_entry_t pvh_e;
- pv_hashed_entry_t pvh_eh;
- pv_hashed_entry_t pvh_et;
- int pv_cnt;
-
- pv_cnt = 0;
- pvh_eh = pvh_et = PV_HASHED_ENTRY_NULL;
- for (i = 0; i < (5 * PV_HASHED_ALLOC_CHUNK); i++) {
- pvh_e = (pv_hashed_entry_t) zalloc(pv_hashed_list_zone);
-
- pvh_e->qlink.next = (queue_entry_t)pvh_eh;
- pvh_eh = pvh_e;
-
- if (pvh_et == PV_HASHED_ENTRY_NULL)
- pvh_et = pvh_e;
- pv_cnt++;
- }
- PV_HASHED_FREE_LIST(pvh_eh, pvh_et, pv_cnt);
-
- pv_cnt = 0;
- pvh_eh = pvh_et = PV_HASHED_ENTRY_NULL;
- for (i = 0; i < PV_HASHED_KERN_ALLOC_CHUNK; i++) {
- pvh_e = (pv_hashed_entry_t) zalloc(pv_hashed_list_zone);
-
- pvh_e->qlink.next = (queue_entry_t)pvh_eh;
- pvh_eh = pvh_e;
-
- if (pvh_et == PV_HASHED_ENTRY_NULL)
- pvh_et = pvh_e;
- pv_cnt++;
- }
- PV_HASHED_KERN_FREE_LIST(pvh_eh, pvh_et, pv_cnt);
-
-}
-
-void
-mapping_adjust(void)
-{
- pv_hashed_entry_t pvh_e;
- pv_hashed_entry_t pvh_eh;
- pv_hashed_entry_t pvh_et;
- int pv_cnt;
- int i;
-
- if (mapping_adjust_call == NULL) {
- thread_call_setup(&mapping_adjust_call_data,
- (thread_call_func_t) mapping_adjust,
- (thread_call_param_t) NULL);
- mapping_adjust_call = &mapping_adjust_call_data;
- }
-
- pv_cnt = 0;
- pvh_eh = pvh_et = PV_HASHED_ENTRY_NULL;
- if (pv_hashed_kern_free_count < PV_HASHED_KERN_LOW_WATER_MARK) {
- for (i = 0; i < PV_HASHED_KERN_ALLOC_CHUNK; i++) {
- pvh_e = (pv_hashed_entry_t) zalloc(pv_hashed_list_zone);
-
- pvh_e->qlink.next = (queue_entry_t)pvh_eh;
- pvh_eh = pvh_e;
-
- if (pvh_et == PV_HASHED_ENTRY_NULL)
- pvh_et = pvh_e;
- pv_cnt++;
- }
- PV_HASHED_KERN_FREE_LIST(pvh_eh, pvh_et, pv_cnt);
- }
-
- pv_cnt = 0;
- pvh_eh = pvh_et = PV_HASHED_ENTRY_NULL;
- if (pv_hashed_free_count < PV_HASHED_LOW_WATER_MARK) {
- for (i = 0; i < PV_HASHED_ALLOC_CHUNK; i++) {
- pvh_e = (pv_hashed_entry_t) zalloc(pv_hashed_list_zone);
-
- pvh_e->qlink.next = (queue_entry_t)pvh_eh;
- pvh_eh = pvh_e;
-
- if (pvh_et == PV_HASHED_ENTRY_NULL)
- pvh_et = pvh_e;
- pv_cnt++;
- }
- PV_HASHED_FREE_LIST(pvh_eh, pvh_et, pv_cnt);
- }
- mappingrecurse = 0;
-}
-
-
void
pmap_switch(pmap_t tpmap)
{