X-Git-Url: https://git.saurik.com/apple/xnu.git/blobdiff_plain/7e4a7d3939db04e70062ae6c7bf24b8c8b2f5a7c..b7266188b87f3620ec3f9f717e57194a7dd989fe:/osfmk/x86_64/pmap.c diff --git a/osfmk/x86_64/pmap.c b/osfmk/x86_64/pmap.c index 13c439a96..e53843224 100644 --- a/osfmk/x86_64/pmap.c +++ b/osfmk/x86_64/pmap.c @@ -90,7 +90,6 @@ */ #include -#include #include #include @@ -147,15 +146,6 @@ #include -/* #define DEBUGINTERRUPTS 1 uncomment to ensure pmap callers have interrupts enabled */ -#ifdef DEBUGINTERRUPTS -#define pmap_intr_assert() { \ - if (processor_avail_count > 1 && !ml_get_interrupts_enabled()) \ - panic("pmap interrupt assert %s, %d",__FILE__, __LINE__); \ -} -#else -#define pmap_intr_assert() -#endif #ifdef IWANTTODEBUG #undef DEBUG @@ -178,11 +168,6 @@ boolean_t no_shared_cr3 = DEBUG; /* TRUE for DEBUG by default */ * Forward declarations for internal functions. */ -void pmap_remove_range( - pmap_t pmap, - vm_map_offset_t va, - pt_entry_t *spte, - pt_entry_t *epte); void phys_attribute_clear( ppnum_t phys, @@ -209,166 +194,12 @@ int allow_stack_exec = 0; /* No apps may execute from the stack by default */ const boolean_t cpu_64bit = TRUE; /* Mais oui! */ -/* - * when spinning through pmap_remove - * ensure that we don't spend too much - * time with preemption disabled. - * I'm setting the current threshold - * to 20us - */ -#define MAX_PREEMPTION_LATENCY_NS 20000 - uint64_t max_preemption_latency_tsc = 0; - -/* - * Private data structures. - */ - -/* - * For each vm_page_t, there is a list of all currently - * valid virtual mappings of that page. An entry is - * a pv_rooted_entry_t; the list is the pv_table. - * - * N.B. with the new combo rooted/hashed scheme it is - * only possibly to remove individual non-rooted entries - * if they are found via the hashed chains as there is no - * way to unlink the singly linked hashed entries if navigated to - * via the queue list off the rooted entries. Think of it as - * hash/walk/pull, keeping track of the prev pointer while walking - * the singly linked hash list. All of this is to save memory and - * keep both types of pv_entries as small as possible. - */ - -/* - -PV HASHING Changes - JK 1/2007 - -Pve's establish physical to virtual mappings. These are used for aliasing of a -physical page to (potentially many) virtual addresses within pmaps. In the -previous implementation the structure of the pv_entries (each 16 bytes in size) was - -typedef struct pv_entry { - struct pv_entry_t next; - pmap_t pmap; - vm_map_offset_t va; -} *pv_entry_t; - -An initial array of these is created at boot time, one per physical page of -memory, indexed by the physical page number. Additionally, a pool of entries -is created from a pv_zone to be used as needed by pmap_enter() when it is -creating new mappings. Originally, we kept this pool around because the code -in pmap_enter() was unable to block if it needed an entry and none were -available - we'd panic. Some time ago I restructured the pmap_enter() code -so that for user pmaps it can block while zalloc'ing a pv structure and restart, -removing a panic from the code (in the case of the kernel pmap we cannot block -and still panic, so, we keep a separate hot pool for use only on kernel pmaps). -The pool has not been removed since there is a large performance gain keeping -freed pv's around for reuse and not suffering the overhead of zalloc for every -new pv we need. - -As pmap_enter() created new mappings it linked the new pve's for them off the -fixed pv array for that ppn (off the next pointer). These pve's are accessed -for several operations, one of them being address space teardown. In that case, -we basically do this - - for (every page/pte in the space) { - calc pve_ptr from the ppn in the pte - for (every pv in the list for the ppn) { - if (this pv is for this pmap/vaddr) { - do housekeeping - unlink/free the pv - } - } - } - -The problem arose when we were running, say 8000 (or even 2000) apache or -other processes and one or all terminate. The list hanging off each pv array -entry could have thousands of entries. We were continuously linearly searching -each of these lists as we stepped through the address space we were tearing -down. Because of the locks we hold, likely taking a cache miss for each node, -and interrupt disabling for MP issues the system became completely unresponsive -for many seconds while we did this. - -Realizing that pve's are accessed in two distinct ways (linearly running the -list by ppn for operations like pmap_page_protect and finding and -modifying/removing a single pve as part of pmap_enter processing) has led to -modifying the pve structures and databases. - -There are now two types of pve structures. A "rooted" structure which is -basically the original structure accessed in an array by ppn, and a ''hashed'' -structure accessed on a hash list via a hash of [pmap, vaddr]. These have been -designed with the two goals of minimizing wired memory and making the lookup of -a ppn faster. Since a vast majority of pages in the system are not aliased -and hence represented by a single pv entry I've kept the rooted entry size as -small as possible because there is one of these dedicated for every physical -page of memory. The hashed pve's are larger due to the addition of the hash -link and the ppn entry needed for matching while running the hash list to find -the entry we are looking for. This way, only systems that have lots of -aliasing (like 2000+ httpd procs) will pay the extra memory price. Both -structures have the same first three fields allowing some simplification in -the code. - -They have these shapes - -typedef struct pv_rooted_entry { - queue_head_t qlink; - vm_map_offset_t va; - pmap_t pmap; -} *pv_rooted_entry_t; - - -typedef struct pv_hashed_entry { - queue_head_t qlink; - vm_map_offset_t va; - pmap_t pmap; - ppnum_t ppn; - struct pv_hashed_entry *nexth; -} *pv_hashed_entry_t; - -The main flow difference is that the code is now aware of the rooted entry and -the hashed entries. Code that runs the pv list still starts with the rooted -entry and then continues down the qlink onto the hashed entries. Code that is -looking up a specific pv entry first checks the rooted entry and then hashes -and runs the hash list for the match. The hash list lengths are much smaller -than the original pv lists that contained all aliases for the specific ppn. - -*/ - -typedef struct pv_rooted_entry { - /* first three entries must match pv_hashed_entry_t */ - queue_head_t qlink; - vm_map_offset_t va; /* virtual address for mapping */ - pmap_t pmap; /* pmap where mapping lies */ -} *pv_rooted_entry_t; - -#define PV_ROOTED_ENTRY_NULL ((pv_rooted_entry_t) 0) - -pv_rooted_entry_t pv_head_table; /* array of entries, one per page */ - -typedef struct pv_hashed_entry { - /* first three entries must match pv_rooted_entry_t */ - queue_head_t qlink; - vm_map_offset_t va; - pmap_t pmap; - ppnum_t ppn; - struct pv_hashed_entry *nexth; -} *pv_hashed_entry_t; - -#define PV_HASHED_ENTRY_NULL ((pv_hashed_entry_t)0) - -#define NPVHASH 4095 /* MUST BE 2^N - 1 */ pv_hashed_entry_t *pv_hash_table; /* hash lists */ uint32_t npvhash = 0; -//#define PV_DEBUG 1 /* uncomment to enable some PV debugging code */ -#ifdef PV_DEBUG -#define CHK_NPVHASH() if(0 == npvhash) panic("npvhash uninitialized"); -#else -#define CHK_NPVHASH(x) -#endif - pv_hashed_entry_t pv_hashed_free_list = PV_HASHED_ENTRY_NULL; pv_hashed_entry_t pv_hashed_kern_free_list = PV_HASHED_ENTRY_NULL; decl_simple_lock_data(,pv_hashed_free_list_lock) @@ -377,53 +208,7 @@ decl_simple_lock_data(,pv_hash_table_lock) int pv_hashed_free_count = 0; int pv_hashed_kern_free_count = 0; -#define PV_HASHED_LOW_WATER_MARK 5000 -#define PV_HASHED_KERN_LOW_WATER_MARK 100 -#define PV_HASHED_ALLOC_CHUNK 2000 -#define PV_HASHED_KERN_ALLOC_CHUNK 50 -thread_call_t mapping_adjust_call; -static thread_call_data_t mapping_adjust_call_data; -uint32_t mappingrecurse = 0; - -#define PV_HASHED_ALLOC(pvh_e) { \ - simple_lock(&pv_hashed_free_list_lock); \ - if ((pvh_e = pv_hashed_free_list) != 0) { \ - pv_hashed_free_list = (pv_hashed_entry_t)pvh_e->qlink.next; \ - pv_hashed_free_count--; \ - if (pv_hashed_free_count < PV_HASHED_LOW_WATER_MARK) \ - if (hw_compare_and_store(0,1,(u_int *)&mappingrecurse)) \ - thread_call_enter(mapping_adjust_call); \ - } \ - simple_unlock(&pv_hashed_free_list_lock); \ -} - -#define PV_HASHED_FREE_LIST(pvh_eh, pvh_et, pv_cnt) { \ - simple_lock(&pv_hashed_free_list_lock); \ - pvh_et->qlink.next = (queue_entry_t)pv_hashed_free_list; \ - pv_hashed_free_list = pvh_eh; \ - pv_hashed_free_count += pv_cnt; \ - simple_unlock(&pv_hashed_free_list_lock); \ -} - -#define PV_HASHED_KERN_ALLOC(pvh_e) { \ - simple_lock(&pv_hashed_kern_free_list_lock); \ - if ((pvh_e = pv_hashed_kern_free_list) != 0) { \ - pv_hashed_kern_free_list = (pv_hashed_entry_t)pvh_e->qlink.next; \ - pv_hashed_kern_free_count--; \ - if (pv_hashed_kern_free_count < PV_HASHED_KERN_LOW_WATER_MARK)\ - if (hw_compare_and_store(0,1,(u_int *)&mappingrecurse)) \ - thread_call_enter(mapping_adjust_call); \ - } \ - simple_unlock(&pv_hashed_kern_free_list_lock); \ -} -#define PV_HASHED_KERN_FREE_LIST(pvh_eh, pvh_et, pv_cnt) { \ - simple_lock(&pv_hashed_kern_free_list_lock); \ - pvh_et->qlink.next = (queue_entry_t)pv_hashed_kern_free_list; \ - pv_hashed_kern_free_list = pvh_eh; \ - pv_hashed_kern_free_count += pv_cnt; \ - simple_unlock(&pv_hashed_kern_free_list_lock); \ -} zone_t pv_hashed_list_zone; /* zone of pv_hashed_entry structures */ @@ -436,10 +221,10 @@ static zone_t pdpt_zone; */ char *pv_lock_table; /* pointer to array of bits */ -#define pv_lock_table_size(n) (((n)+BYTE_SIZE-1)/BYTE_SIZE) + char *pv_hash_lock_table; -#define pv_hash_lock_table_size(n) (((n)+BYTE_SIZE-1)/BYTE_SIZE) + /* * First and last physical addresses that we maintain any information @@ -453,97 +238,13 @@ static struct vm_object kpml4obj_object_store; static struct vm_object kpdptobj_object_store; /* - * Index into pv_head table, its lock bits, and the modify/reference and managed bits - */ - -#define pa_index(pa) (i386_btop(pa)) -#define ppn_to_pai(ppn) ((int)ppn) - -#define pai_to_pvh(pai) (&pv_head_table[pai]) -#define lock_pvh_pai(pai) bit_lock(pai, (void *)pv_lock_table) -#define unlock_pvh_pai(pai) bit_unlock(pai, (void *)pv_lock_table) - -static inline uint32_t -pvhashidx(pmap_t pmap, vm_offset_t va) -{ - return ((uint32_t)(uint64_t)pmap ^ - ((uint32_t)((uint64_t)va >> PAGE_SHIFT) & 0xFFFFFFFF)) & - npvhash; -} -#define pvhash(idx) (&pv_hash_table[idx]) - -#define lock_hash_hash(hash) bit_lock(hash, (void *)pv_hash_lock_table) -#define unlock_hash_hash(hash) bit_unlock(hash, (void *)pv_hash_lock_table) - -/* - * Array of physical page attribites for managed pages. + * Array of physical page attributes for managed pages. * One byte per physical page. */ char *pmap_phys_attributes; unsigned int last_managed_page = 0; -#define IS_MANAGED_PAGE(x) \ - ((unsigned int)(x) <= last_managed_page && \ - (pmap_phys_attributes[x] & PHYS_MANAGED)) - -/* - * Physical page attributes. Copy bits from PTE definition. - */ -#define PHYS_MODIFIED INTEL_PTE_MOD /* page modified */ -#define PHYS_REFERENCED INTEL_PTE_REF /* page referenced */ -#define PHYS_MANAGED INTEL_PTE_VALID /* page is managed */ - -/* - * Amount of virtual memory mapped by one - * page-directory entry. - */ -#define PDE_MAPPED_SIZE (pdetova(1)) uint64_t pde_mapped_size = PDE_MAPPED_SIZE; -/* - * Locking and TLB invalidation - */ - -/* - * Locking Protocols: (changed 2/2007 JK) - * - * There are two structures in the pmap module that need locking: - * the pmaps themselves, and the per-page pv_lists (which are locked - * by locking the pv_lock_table entry that corresponds to the pv_head - * for the list in question.) Most routines want to lock a pmap and - * then do operations in it that require pv_list locking -- however - * pmap_remove_all and pmap_copy_on_write operate on a physical page - * basis and want to do the locking in the reverse order, i.e. lock - * a pv_list and then go through all the pmaps referenced by that list. - * - * The system wide pmap lock has been removed. Now, paths take a lock - * on the pmap before changing its 'shape' and the reverse order lockers - * (coming in by phys ppn) take a lock on the corresponding pv and then - * retest to be sure nothing changed during the window before they locked - * and can then run up/down the pv lists holding the list lock. This also - * lets the pmap layer run (nearly completely) interrupt enabled, unlike - * previously. - */ - -/* - * PV locking - */ - -#define LOCK_PVH(index) { \ - mp_disable_preemption(); \ - lock_pvh_pai(index); \ -} - -#define UNLOCK_PVH(index) { \ - unlock_pvh_pai(index); \ - mp_enable_preemption(); \ -} -/* - * PV hash locking - */ - -#define LOCK_PV_HASH(hash) lock_hash_hash(hash) -#define UNLOCK_PV_HASH(hash) unlock_hash_hash(hash) - unsigned pmap_memory_region_count; unsigned pmap_memory_region_current; @@ -562,8 +263,6 @@ pd_entry_t commpage64_pde; struct zone *pmap_zone; /* zone of pmap structures */ -int pmap_debug = 0; /* flag for debugging prints */ - unsigned int inuse_ptepages_count = 0; addr64_t kernel64_cr3; @@ -585,170 +284,6 @@ static int nkpt; pt_entry_t *DMAP1, *DMAP2; caddr_t DADDR1; caddr_t DADDR2; - -/* - * unlinks the pv_hashed_entry_t pvh from the singly linked hash chain. - * properly deals with the anchor. - * must be called with the hash locked, does not unlock it - */ - -static inline void -pmap_pvh_unlink(pv_hashed_entry_t pvh) -{ - pv_hashed_entry_t curh; - pv_hashed_entry_t *pprevh; - int pvhash_idx; - - CHK_NPVHASH(); - pvhash_idx = pvhashidx(pvh->pmap, pvh->va); - - pprevh = pvhash(pvhash_idx); - -#if PV_DEBUG - if (NULL == *pprevh) - panic("pvh_unlink null anchor"); /* JK DEBUG */ -#endif - curh = *pprevh; - - while (PV_HASHED_ENTRY_NULL != curh) { - if (pvh == curh) - break; - pprevh = &curh->nexth; - curh = curh->nexth; - } - if (PV_HASHED_ENTRY_NULL == curh) panic("pmap_pvh_unlink no pvh"); - *pprevh = pvh->nexth; - return; -} - -static inline void -pv_hash_add(pv_hashed_entry_t pvh_e, - pv_rooted_entry_t pv_h) -{ - pv_hashed_entry_t *hashp; - int pvhash_idx; - - CHK_NPVHASH(); - pvhash_idx = pvhashidx(pvh_e->pmap, pvh_e->va); - LOCK_PV_HASH(pvhash_idx); - insque(&pvh_e->qlink, &pv_h->qlink); - hashp = pvhash(pvhash_idx); -#if PV_DEBUG - if (NULL==hashp) - panic("pv_hash_add(%p) null hash bucket", pvh_e); -#endif - pvh_e->nexth = *hashp; - *hashp = pvh_e; - UNLOCK_PV_HASH(pvhash_idx); -} - -static inline void -pv_hash_remove(pv_hashed_entry_t pvh_e) -{ - int pvhash_idx; - - CHK_NPVHASH(); - pvhash_idx = pvhashidx(pvh_e->pmap,pvh_e->va); - LOCK_PV_HASH(pvhash_idx); - remque(&pvh_e->qlink); - pmap_pvh_unlink(pvh_e); - UNLOCK_PV_HASH(pvhash_idx); -} - -/* - * Remove pv list entry. - * Called with pv_head_table entry locked. - * Returns pv entry to be freed (or NULL). - */ -static inline pv_hashed_entry_t -pmap_pv_remove(pmap_t pmap, - vm_map_offset_t vaddr, - ppnum_t ppn) -{ - pv_hashed_entry_t pvh_e; - pv_rooted_entry_t pv_h; - pv_hashed_entry_t *pprevh; - int pvhash_idx; - uint32_t pv_cnt; - - pvh_e = PV_HASHED_ENTRY_NULL; - pv_h = pai_to_pvh(ppn_to_pai(ppn)); - if (pv_h->pmap == PMAP_NULL) - panic("pmap_pv_remove(%p,%llu,%u): null pv_list!", - pmap, vaddr, ppn); - - if (pv_h->va == vaddr && pv_h->pmap == pmap) { - /* - * Header is the pv_rooted_entry. - * We can't free that. If there is a queued - * entry after this one we remove that - * from the ppn queue, we remove it from the hash chain - * and copy it to the rooted entry. Then free it instead. - */ - pvh_e = (pv_hashed_entry_t) queue_next(&pv_h->qlink); - if (pv_h != (pv_rooted_entry_t) pvh_e) { - /* - * Entry queued to root, remove this from hash - * and install as nem root. - */ - CHK_NPVHASH(); - pvhash_idx = pvhashidx(pvh_e->pmap, pvh_e->va); - LOCK_PV_HASH(pvhash_idx); - remque(&pvh_e->qlink); - pprevh = pvhash(pvhash_idx); - if (PV_HASHED_ENTRY_NULL == *pprevh) { - panic("pmap_pv_remove(%p,%llu,%u): " - "empty hash, removing rooted", - pmap, vaddr, ppn); - } - pmap_pvh_unlink(pvh_e); - UNLOCK_PV_HASH(pvhash_idx); - pv_h->pmap = pvh_e->pmap; - pv_h->va = pvh_e->va; /* dispose of pvh_e */ - } else { - /* none queued after rooted */ - pv_h->pmap = PMAP_NULL; - pvh_e = PV_HASHED_ENTRY_NULL; - } - } else { - /* - * not removing rooted pv. find it on hash chain, remove from - * ppn queue and hash chain and free it - */ - CHK_NPVHASH(); - pvhash_idx = pvhashidx(pmap, vaddr); - LOCK_PV_HASH(pvhash_idx); - pprevh = pvhash(pvhash_idx); - if (PV_HASHED_ENTRY_NULL == *pprevh) { - panic("pmap_pv_remove(%p,%llu,%u): empty hash", - pmap, vaddr, ppn); - } - pvh_e = *pprevh; - pmap_pv_hashlist_walks++; - pv_cnt = 0; - while (PV_HASHED_ENTRY_NULL != pvh_e) { - pv_cnt++; - if (pvh_e->pmap == pmap && - pvh_e->va == vaddr && - pvh_e->ppn == ppn) - break; - pprevh = &pvh_e->nexth; - pvh_e = pvh_e->nexth; - } - if (PV_HASHED_ENTRY_NULL == pvh_e) - panic("pmap_pv_remove(%p,%llu,%u): pv not on hash", - pmap, vaddr, ppn); - pmap_pv_hashlist_cnts += pv_cnt; - if (pmap_pv_hashlist_max < pv_cnt) - pmap_pv_hashlist_max = pv_cnt; - *pprevh = pvh_e->nexth; - remque(&pvh_e->qlink); - UNLOCK_PV_HASH(pvhash_idx); - } - - return pvh_e; -} - /* * for legacy, returns the address of the pde entry. * for 64 bit, causes the pdpt page containing the pde entry to be mapped, @@ -1463,147 +998,6 @@ pmap_reference(pmap_t p) } } -/* - * Remove a range of hardware page-table entries. - * The entries given are the first (inclusive) - * and last (exclusive) entries for the VM pages. - * The virtual address is the va for the first pte. - * - * The pmap must be locked. - * If the pmap is not the kernel pmap, the range must lie - * entirely within one pte-page. This is NOT checked. - * Assumes that the pte-page exists. - */ - -void -pmap_remove_range( - pmap_t pmap, - vm_map_offset_t start_vaddr, - pt_entry_t *spte, - pt_entry_t *epte) -{ - pt_entry_t *cpte; - pv_hashed_entry_t pvh_et = PV_HASHED_ENTRY_NULL; - pv_hashed_entry_t pvh_eh = PV_HASHED_ENTRY_NULL; - pv_hashed_entry_t pvh_e; - int pvh_cnt = 0; - int num_removed, num_unwired, num_found; - int pai; - pmap_paddr_t pa; - vm_map_offset_t vaddr; - - num_removed = 0; - num_unwired = 0; - num_found = 0; - - /* invalidate the PTEs first to "freeze" them */ - for (cpte = spte, vaddr = start_vaddr; - cpte < epte; - cpte++, vaddr += PAGE_SIZE_64) { - - pa = pte_to_pa(*cpte); - if (pa == 0) - continue; - num_found++; - - if (iswired(*cpte)) - num_unwired++; - - pai = pa_index(pa); - - if (!IS_MANAGED_PAGE(pai)) { - /* - * Outside range of managed physical memory. - * Just remove the mappings. - */ - pmap_store_pte(cpte, 0); - continue; - } - - /* invalidate the PTE */ - pmap_update_pte(cpte, *cpte, (*cpte & ~INTEL_PTE_VALID)); - } - - if (num_found == 0) { - /* nothing was changed: we're done */ - goto update_counts; - } - - /* propagate the invalidates to other CPUs */ - - PMAP_UPDATE_TLBS(pmap, start_vaddr, vaddr); - - for (cpte = spte, vaddr = start_vaddr; - cpte < epte; - cpte++, vaddr += PAGE_SIZE_64) { - - pa = pte_to_pa(*cpte); - if (pa == 0) - continue; - - pai = pa_index(pa); - - LOCK_PVH(pai); - - pa = pte_to_pa(*cpte); - if (pa == 0) { - UNLOCK_PVH(pai); - continue; - } - num_removed++; - - /* - * Get the modify and reference bits, then - * nuke the entry in the page table - */ - /* remember reference and change */ - pmap_phys_attributes[pai] |= - (char) (*cpte & (PHYS_MODIFIED | PHYS_REFERENCED)); - /* completely invalidate the PTE */ - pmap_store_pte(cpte, 0); - - /* - * Remove the mapping from the pvlist for this physical page. - */ - pvh_e = pmap_pv_remove(pmap, vaddr, (ppnum_t) pai); - - UNLOCK_PVH(pai); - - if (pvh_e != PV_HASHED_ENTRY_NULL) { - pvh_e->qlink.next = (queue_entry_t) pvh_eh; - pvh_eh = pvh_e; - - if (pvh_et == PV_HASHED_ENTRY_NULL) { - pvh_et = pvh_e; - } - pvh_cnt++; - } - } /* for loop */ - - if (pvh_eh != PV_HASHED_ENTRY_NULL) { - PV_HASHED_FREE_LIST(pvh_eh, pvh_et, pvh_cnt); - } -update_counts: - /* - * Update the counts - */ -#if TESTING - if (pmap->stats.resident_count < num_removed) - panic("pmap_remove_range: resident_count"); -#endif - assert(pmap->stats.resident_count >= num_removed); - OSAddAtomic(-num_removed, &pmap->stats.resident_count); - -#if TESTING - if (pmap->stats.wired_count < num_unwired) - panic("pmap_remove_range: wired_count"); -#endif - assert(pmap->stats.wired_count >= num_unwired); - OSAddAtomic(-num_unwired, &pmap->stats.wired_count); - - return; -} - /* * Remove phys addr if mapped in specified map * @@ -1618,274 +1012,6 @@ pmap_remove_some_phys( } -/* - * Remove the given range of addresses - * from the specified map. - * - * It is assumed that the start and end are properly - * rounded to the hardware page size. - */ -void -pmap_remove( - pmap_t map, - addr64_t s64, - addr64_t e64) -{ - pt_entry_t *pde; - pt_entry_t *spte, *epte; - addr64_t l64; - uint64_t deadline; - - pmap_intr_assert(); - - if (map == PMAP_NULL || s64 == e64) - return; - - PMAP_TRACE(PMAP_CODE(PMAP__REMOVE) | DBG_FUNC_START, - map, - (uint32_t) (s64 >> 32), s64, - (uint32_t) (e64 >> 32), e64); - - - PMAP_LOCK(map); - -#if 0 - /* - * Check that address range in the kernel does not overlap the stacks. - * We initialize local static min/max variables once to avoid making - * 2 function calls for every remove. Note also that these functions - * both return 0 before kernel stacks have been initialized, and hence - * the panic is not triggered in this case. - */ - if (map == kernel_pmap) { - static vm_offset_t kernel_stack_min = 0; - static vm_offset_t kernel_stack_max = 0; - - if (kernel_stack_min == 0) { - kernel_stack_min = min_valid_stack_address(); - kernel_stack_max = max_valid_stack_address(); - } - if ((kernel_stack_min <= s64 && s64 < kernel_stack_max) || - (kernel_stack_min < e64 && e64 <= kernel_stack_max)) - panic("pmap_remove() attempted in kernel stack"); - } -#else - - /* - * The values of kernel_stack_min and kernel_stack_max are no longer - * relevant now that we allocate kernel stacks in the kernel map, - * so the old code above no longer applies. If we wanted to check that - * we weren't removing a mapping of a page in a kernel stack we'd - * mark the PTE with an unused bit and check that here. - */ - -#endif - - deadline = rdtsc64() + max_preemption_latency_tsc; - - while (s64 < e64) { - l64 = (s64 + pde_mapped_size) & ~(pde_mapped_size - 1); - if (l64 > e64) - l64 = e64; - pde = pmap_pde(map, s64); - - if (pde && (*pde & INTEL_PTE_VALID)) { - if (*pde & INTEL_PTE_PS) { - /* - * If we're removing a superpage, pmap_remove_range() - * must work on level 2 instead of level 1; and we're - * only passing a single level 2 entry instead of a - * level 1 range. - */ - spte = pde; - epte = spte+1; /* excluded */ - } else { - spte = pmap_pte(map, (s64 & ~(pde_mapped_size - 1))); - spte = &spte[ptenum(s64)]; - epte = &spte[intel_btop(l64 - s64)]; - } - pmap_remove_range(map, s64, spte, epte); - } - s64 = l64; - pde++; - - if (s64 < e64 && rdtsc64() >= deadline) { - PMAP_UNLOCK(map) - PMAP_LOCK(map) - deadline = rdtsc64() + max_preemption_latency_tsc; - } - } - - PMAP_UNLOCK(map); - - PMAP_TRACE(PMAP_CODE(PMAP__REMOVE) | DBG_FUNC_END, - map, 0, 0, 0, 0); - -} - -/* - * Routine: pmap_page_protect - * - * Function: - * Lower the permission for all mappings to a given - * page. - */ -void -pmap_page_protect( - ppnum_t pn, - vm_prot_t prot) -{ - pv_hashed_entry_t pvh_eh = PV_HASHED_ENTRY_NULL; - pv_hashed_entry_t pvh_et = PV_HASHED_ENTRY_NULL; - pv_hashed_entry_t nexth; - int pvh_cnt = 0; - pv_rooted_entry_t pv_h; - pv_rooted_entry_t pv_e; - pv_hashed_entry_t pvh_e; - pt_entry_t *pte; - int pai; - pmap_t pmap; - boolean_t remove; - - pmap_intr_assert(); - assert(pn != vm_page_fictitious_addr); - if (pn == vm_page_guard_addr) - return; - - pai = ppn_to_pai(pn); - - if (!IS_MANAGED_PAGE(pai)) { - /* - * Not a managed page. - */ - return; - } - PMAP_TRACE(PMAP_CODE(PMAP__PAGE_PROTECT) | DBG_FUNC_START, - pn, prot, 0, 0, 0); - - /* - * Determine the new protection. - */ - switch (prot) { - case VM_PROT_READ: - case VM_PROT_READ | VM_PROT_EXECUTE: - remove = FALSE; - break; - case VM_PROT_ALL: - return; /* nothing to do */ - default: - remove = TRUE; - break; - } - - pv_h = pai_to_pvh(pai); - - LOCK_PVH(pai); - - - /* - * Walk down PV list, if any, changing or removing all mappings. - */ - if (pv_h->pmap == PMAP_NULL) - goto done; - - pv_e = pv_h; - pvh_e = (pv_hashed_entry_t) pv_e; /* cheat */ - - do { - vm_map_offset_t vaddr; - - pmap = pv_e->pmap; - vaddr = pv_e->va; - pte = pmap_pte(pmap, vaddr); - if (0 == pte) { - panic("pmap_page_protect() " - "pmap=%p pn=0x%x vaddr=0x%llx\n", - pmap, pn, vaddr); - } - nexth = (pv_hashed_entry_t) queue_next(&pvh_e->qlink); - - /* - * Remove the mapping if new protection is NONE - * or if write-protecting a kernel mapping. - */ - if (remove || pmap == kernel_pmap) { - /* - * Remove the mapping, collecting dirty bits. - */ - pmap_update_pte(pte, *pte, *pte & ~INTEL_PTE_VALID); - PMAP_UPDATE_TLBS(pmap, vaddr, vaddr+PAGE_SIZE); - pmap_phys_attributes[pai] |= - *pte & (PHYS_MODIFIED|PHYS_REFERENCED); - pmap_store_pte(pte, 0); - -#if TESTING - if (pmap->stats.resident_count < 1) - panic("pmap_page_protect: resident_count"); -#endif - assert(pmap->stats.resident_count >= 1); - OSAddAtomic(-1, &pmap->stats.resident_count); - - /* - * Deal with the pv_rooted_entry. - */ - - if (pv_e == pv_h) { - /* - * Fix up head later. - */ - pv_h->pmap = PMAP_NULL; - } else { - /* - * Delete this entry. - */ - pv_hash_remove(pvh_e); - pvh_e->qlink.next = (queue_entry_t) pvh_eh; - pvh_eh = pvh_e; - - if (pvh_et == PV_HASHED_ENTRY_NULL) - pvh_et = pvh_e; - pvh_cnt++; - } - } else { - /* - * Write-protect. - */ - pmap_update_pte(pte, *pte, *pte & ~INTEL_PTE_WRITE); - PMAP_UPDATE_TLBS(pmap, vaddr, vaddr+PAGE_SIZE); - } - pvh_e = nexth; - } while ((pv_e = (pv_rooted_entry_t) nexth) != pv_h); - - - /* - * If pv_head mapping was removed, fix it up. - */ - if (pv_h->pmap == PMAP_NULL) { - pvh_e = (pv_hashed_entry_t) queue_next(&pv_h->qlink); - - if (pvh_e != (pv_hashed_entry_t) pv_h) { - pv_hash_remove(pvh_e); - pv_h->pmap = pvh_e->pmap; - pv_h->va = pvh_e->va; - pvh_e->qlink.next = (queue_entry_t) pvh_eh; - pvh_eh = pvh_e; - - if (pvh_et == PV_HASHED_ENTRY_NULL) - pvh_et = pvh_e; - pvh_cnt++; - } - } - if (pvh_eh != PV_HASHED_ENTRY_NULL) { - PV_HASHED_FREE_LIST(pvh_eh, pvh_et, pvh_cnt); - } -done: - UNLOCK_PVH(pai); - - PMAP_TRACE(PMAP_CODE(PMAP__PAGE_PROTECT) | DBG_FUNC_END, - 0, 0, 0, 0, 0); -} - /* * Routine: @@ -2019,398 +1145,6 @@ pmap_map_block( } } - -/* - * Insert the given physical page (p) at - * the specified virtual address (v) in the - * target physical map with the protection requested. - * - * If specified, the page will be wired down, meaning - * that the related pte cannot be reclaimed. - * - * NB: This is the only routine which MAY NOT lazy-evaluate - * or lose information. That is, this routine must actually - * insert this page into the given map NOW. - */ -void -pmap_enter( - register pmap_t pmap, - vm_map_offset_t vaddr, - ppnum_t pn, - vm_prot_t prot, - unsigned int flags, - boolean_t wired) -{ - pt_entry_t *pte; - pv_rooted_entry_t pv_h; - int pai; - pv_hashed_entry_t pvh_e; - pv_hashed_entry_t pvh_new; - pt_entry_t template; - pmap_paddr_t old_pa; - pmap_paddr_t pa = (pmap_paddr_t) i386_ptob(pn); - boolean_t need_tlbflush = FALSE; - boolean_t set_NX; - char oattr; - boolean_t old_pa_locked; - boolean_t superpage = flags & VM_MEM_SUPERPAGE; - vm_object_t delpage_pm_obj = NULL; - int delpage_pde_index = 0; - - - pmap_intr_assert(); - assert(pn != vm_page_fictitious_addr); - if (pmap_debug) - kprintf("pmap_enter(%p,%llu,%u)\n", pmap, vaddr, pn); - if (pmap == PMAP_NULL) - return; - if (pn == vm_page_guard_addr) - return; - - PMAP_TRACE(PMAP_CODE(PMAP__ENTER) | DBG_FUNC_START, - pmap, - (uint32_t) (vaddr >> 32), (uint32_t) vaddr, - pn, prot); - - if ((prot & VM_PROT_EXECUTE) || !nx_enabled || !pmap->nx_enabled) - set_NX = FALSE; - else - set_NX = TRUE; - - /* - * Must allocate a new pvlist entry while we're unlocked; - * zalloc may cause pageout (which will lock the pmap system). - * If we determine we need a pvlist entry, we will unlock - * and allocate one. Then we will retry, throughing away - * the allocated entry later (if we no longer need it). - */ - - pvh_new = PV_HASHED_ENTRY_NULL; -Retry: - pvh_e = PV_HASHED_ENTRY_NULL; - - PMAP_LOCK(pmap); - - /* - * Expand pmap to include this pte. Assume that - * pmap is always expanded to include enough hardware - * pages to map one VM page. - */ - if(superpage) { - while ((pte = pmap64_pde(pmap, vaddr)) == PD_ENTRY_NULL) { - /* need room for another pde entry */ - PMAP_UNLOCK(pmap); - pmap_expand_pdpt(pmap, vaddr); - PMAP_LOCK(pmap); - } - } else { - while ((pte = pmap_pte(pmap, vaddr)) == PT_ENTRY_NULL) { - /* - * Must unlock to expand the pmap - * going to grow pde level page(s) - */ - PMAP_UNLOCK(pmap); - pmap_expand(pmap, vaddr); - PMAP_LOCK(pmap); - } - } - - if (superpage && *pte && !(*pte & INTEL_PTE_PS)) { - /* - * There is still an empty page table mapped that - * was used for a previous base page mapping. - * Remember the PDE and the PDE index, so that we - * can free the page at the end of this function. - */ - delpage_pde_index = (int)pdeidx(pmap, vaddr); - delpage_pm_obj = pmap->pm_obj; - *pte = 0; - } - - old_pa = pte_to_pa(*pte); - pai = pa_index(old_pa); - old_pa_locked = FALSE; - - /* - * if we have a previous managed page, lock the pv entry now. after - * we lock it, check to see if someone beat us to the lock and if so - * drop the lock - */ - if ((0 != old_pa) && IS_MANAGED_PAGE(pai)) { - LOCK_PVH(pai); - old_pa_locked = TRUE; - old_pa = pte_to_pa(*pte); - if (0 == old_pa) { - UNLOCK_PVH(pai); /* another path beat us to it */ - old_pa_locked = FALSE; - } - } - - /* - * Special case if the incoming physical page is already mapped - * at this address. - */ - if (old_pa == pa) { - - /* - * May be changing its wired attribute or protection - */ - - template = pa_to_pte(pa) | INTEL_PTE_VALID; - - if (VM_MEM_NOT_CACHEABLE == - (flags & (VM_MEM_NOT_CACHEABLE | VM_WIMG_USE_DEFAULT))) { - if (!(flags & VM_MEM_GUARDED)) - template |= INTEL_PTE_PTA; - template |= INTEL_PTE_NCACHE; - } - if (pmap != kernel_pmap) - template |= INTEL_PTE_USER; - if (prot & VM_PROT_WRITE) - template |= INTEL_PTE_WRITE; - - if (set_NX) - template |= INTEL_PTE_NX; - - if (wired) { - template |= INTEL_PTE_WIRED; - if (!iswired(*pte)) - OSAddAtomic(+1, - &pmap->stats.wired_count); - } else { - if (iswired(*pte)) { - assert(pmap->stats.wired_count >= 1); - OSAddAtomic(-1, - &pmap->stats.wired_count); - } - } - if (superpage) /* this path can not be used */ - template |= INTEL_PTE_PS; /* to change the page size! */ - - /* store modified PTE and preserve RC bits */ - pmap_update_pte(pte, *pte, - template | (*pte & (INTEL_PTE_REF | INTEL_PTE_MOD))); - if (old_pa_locked) { - UNLOCK_PVH(pai); - old_pa_locked = FALSE; - } - need_tlbflush = TRUE; - goto Done; - } - - /* - * Outline of code from here: - * 1) If va was mapped, update TLBs, remove the mapping - * and remove old pvlist entry. - * 2) Add pvlist entry for new mapping - * 3) Enter new mapping. - * - * If the old physical page is not managed step 1) is skipped - * (except for updating the TLBs), and the mapping is - * overwritten at step 3). If the new physical page is not - * managed, step 2) is skipped. - */ - - if (old_pa != (pmap_paddr_t) 0) { - - /* - * Don't do anything to pages outside valid memory here. - * Instead convince the code that enters a new mapping - * to overwrite the old one. - */ - - /* invalidate the PTE */ - pmap_update_pte(pte, *pte, (*pte & ~INTEL_PTE_VALID)); - /* propagate invalidate everywhere */ - PMAP_UPDATE_TLBS(pmap, vaddr, vaddr + PAGE_SIZE); - /* remember reference and change */ - oattr = (char) (*pte & (PHYS_MODIFIED | PHYS_REFERENCED)); - /* completely invalidate the PTE */ - pmap_store_pte(pte, 0); - - if (IS_MANAGED_PAGE(pai)) { -#if TESTING - if (pmap->stats.resident_count < 1) - panic("pmap_enter: resident_count"); -#endif - assert(pmap->stats.resident_count >= 1); - OSAddAtomic(-1, - &pmap->stats.resident_count); - - if (iswired(*pte)) { -#if TESTING - if (pmap->stats.wired_count < 1) - panic("pmap_enter: wired_count"); -#endif - assert(pmap->stats.wired_count >= 1); - OSAddAtomic(-1, - &pmap->stats.wired_count); - } - pmap_phys_attributes[pai] |= oattr; - - /* - * Remove the mapping from the pvlist for - * this physical page. - * We'll end up with either a rooted pv or a - * hashed pv - */ - pvh_e = pmap_pv_remove(pmap, vaddr, (ppnum_t) pai); - - } else { - - /* - * old_pa is not managed. - * Do removal part of accounting. - */ - - if (iswired(*pte)) { - assert(pmap->stats.wired_count >= 1); - OSAddAtomic(-1, - &pmap->stats.wired_count); - } - } - } - - /* - * if we had a previously managed paged locked, unlock it now - */ - if (old_pa_locked) { - UNLOCK_PVH(pai); - old_pa_locked = FALSE; - } - - pai = pa_index(pa); /* now working with new incoming phys page */ - if (IS_MANAGED_PAGE(pai)) { - - /* - * Step 2) Enter the mapping in the PV list for this - * physical page. - */ - pv_h = pai_to_pvh(pai); - - LOCK_PVH(pai); - - if (pv_h->pmap == PMAP_NULL) { - /* - * No mappings yet, use rooted pv - */ - pv_h->va = vaddr; - pv_h->pmap = pmap; - queue_init(&pv_h->qlink); - } else { - /* - * Add new pv_hashed_entry after header. - */ - if ((PV_HASHED_ENTRY_NULL == pvh_e) && pvh_new) { - pvh_e = pvh_new; - pvh_new = PV_HASHED_ENTRY_NULL; - } else if (PV_HASHED_ENTRY_NULL == pvh_e) { - PV_HASHED_ALLOC(pvh_e); - if (PV_HASHED_ENTRY_NULL == pvh_e) { - /* - * the pv list is empty. if we are on - * the kernel pmap we'll use one of - * the special private kernel pv_e's, - * else, we need to unlock - * everything, zalloc a pv_e, and - * restart bringing in the pv_e with - * us. - */ - if (kernel_pmap == pmap) { - PV_HASHED_KERN_ALLOC(pvh_e); - } else { - UNLOCK_PVH(pai); - PMAP_UNLOCK(pmap); - pvh_new = (pv_hashed_entry_t) zalloc(pv_hashed_list_zone); - goto Retry; - } - } - } - if (PV_HASHED_ENTRY_NULL == pvh_e) - panic("pvh_e exhaustion"); - - pvh_e->va = vaddr; - pvh_e->pmap = pmap; - pvh_e->ppn = pn; - pv_hash_add(pvh_e, pv_h); - - /* - * Remember that we used the pvlist entry. - */ - pvh_e = PV_HASHED_ENTRY_NULL; - } - - /* - * only count the mapping - * for 'managed memory' - */ - OSAddAtomic(+1, & pmap->stats.resident_count); - if (pmap->stats.resident_count > pmap->stats.resident_max) { - pmap->stats.resident_max = pmap->stats.resident_count; - } - } - /* - * Step 3) Enter the mapping. - * - * Build a template to speed up entering - - * only the pfn changes. - */ - template = pa_to_pte(pa) | INTEL_PTE_VALID; - - if (flags & VM_MEM_NOT_CACHEABLE) { - if (!(flags & VM_MEM_GUARDED)) - template |= INTEL_PTE_PTA; - template |= INTEL_PTE_NCACHE; - } - if (pmap != kernel_pmap) - template |= INTEL_PTE_USER; - if (prot & VM_PROT_WRITE) - template |= INTEL_PTE_WRITE; - if (set_NX) - template |= INTEL_PTE_NX; - if (wired) { - template |= INTEL_PTE_WIRED; - OSAddAtomic(+1, & pmap->stats.wired_count); - } - if (superpage) - template |= INTEL_PTE_PS; - pmap_store_pte(pte, template); - - /* - * if this was a managed page we delayed unlocking the pv until here - * to prevent pmap_page_protect et al from finding it until the pte - * has been stored - */ - if (IS_MANAGED_PAGE(pai)) { - UNLOCK_PVH(pai); - } -Done: - if (need_tlbflush == TRUE) - PMAP_UPDATE_TLBS(pmap, vaddr, vaddr + PAGE_SIZE); - - if (pvh_e != PV_HASHED_ENTRY_NULL) { - PV_HASHED_FREE_LIST(pvh_e, pvh_e, 1); - } - if (pvh_new != PV_HASHED_ENTRY_NULL) { - PV_HASHED_KERN_FREE_LIST(pvh_new, pvh_new, 1); - } - PMAP_UNLOCK(pmap); - - if (delpage_pm_obj) { - vm_page_t m; - - vm_object_lock(delpage_pm_obj); - m = vm_page_lookup(delpage_pm_obj, delpage_pde_index); - if (m == VM_PAGE_NULL) - panic("pmap_enter: pte page not in object"); - VM_PAGE_FREE(m); - OSAddAtomic(-1, &inuse_ptepages_count); - vm_object_unlock(delpage_pm_obj); - } - - PMAP_TRACE(PMAP_CODE(PMAP__ENTER) | DBG_FUNC_END, 0, 0, 0, 0, 0); -} - /* * Routine: pmap_change_wiring * Function: Change the wiring attribute for a map/virtual-address @@ -3341,96 +2075,6 @@ phys_page_exists(ppnum_t pn) return TRUE; } -void -mapping_free_prime(void) -{ - int i; - pv_hashed_entry_t pvh_e; - pv_hashed_entry_t pvh_eh; - pv_hashed_entry_t pvh_et; - int pv_cnt; - - pv_cnt = 0; - pvh_eh = pvh_et = PV_HASHED_ENTRY_NULL; - for (i = 0; i < (5 * PV_HASHED_ALLOC_CHUNK); i++) { - pvh_e = (pv_hashed_entry_t) zalloc(pv_hashed_list_zone); - - pvh_e->qlink.next = (queue_entry_t)pvh_eh; - pvh_eh = pvh_e; - - if (pvh_et == PV_HASHED_ENTRY_NULL) - pvh_et = pvh_e; - pv_cnt++; - } - PV_HASHED_FREE_LIST(pvh_eh, pvh_et, pv_cnt); - - pv_cnt = 0; - pvh_eh = pvh_et = PV_HASHED_ENTRY_NULL; - for (i = 0; i < PV_HASHED_KERN_ALLOC_CHUNK; i++) { - pvh_e = (pv_hashed_entry_t) zalloc(pv_hashed_list_zone); - - pvh_e->qlink.next = (queue_entry_t)pvh_eh; - pvh_eh = pvh_e; - - if (pvh_et == PV_HASHED_ENTRY_NULL) - pvh_et = pvh_e; - pv_cnt++; - } - PV_HASHED_KERN_FREE_LIST(pvh_eh, pvh_et, pv_cnt); - -} - -void -mapping_adjust(void) -{ - pv_hashed_entry_t pvh_e; - pv_hashed_entry_t pvh_eh; - pv_hashed_entry_t pvh_et; - int pv_cnt; - int i; - - if (mapping_adjust_call == NULL) { - thread_call_setup(&mapping_adjust_call_data, - (thread_call_func_t) mapping_adjust, - (thread_call_param_t) NULL); - mapping_adjust_call = &mapping_adjust_call_data; - } - - pv_cnt = 0; - pvh_eh = pvh_et = PV_HASHED_ENTRY_NULL; - if (pv_hashed_kern_free_count < PV_HASHED_KERN_LOW_WATER_MARK) { - for (i = 0; i < PV_HASHED_KERN_ALLOC_CHUNK; i++) { - pvh_e = (pv_hashed_entry_t) zalloc(pv_hashed_list_zone); - - pvh_e->qlink.next = (queue_entry_t)pvh_eh; - pvh_eh = pvh_e; - - if (pvh_et == PV_HASHED_ENTRY_NULL) - pvh_et = pvh_e; - pv_cnt++; - } - PV_HASHED_KERN_FREE_LIST(pvh_eh, pvh_et, pv_cnt); - } - - pv_cnt = 0; - pvh_eh = pvh_et = PV_HASHED_ENTRY_NULL; - if (pv_hashed_free_count < PV_HASHED_LOW_WATER_MARK) { - for (i = 0; i < PV_HASHED_ALLOC_CHUNK; i++) { - pvh_e = (pv_hashed_entry_t) zalloc(pv_hashed_list_zone); - - pvh_e->qlink.next = (queue_entry_t)pvh_eh; - pvh_eh = pvh_e; - - if (pvh_et == PV_HASHED_ENTRY_NULL) - pvh_et = pvh_e; - pv_cnt++; - } - PV_HASHED_FREE_LIST(pvh_eh, pvh_et, pv_cnt); - } - mappingrecurse = 0; -} - - void pmap_switch(pmap_t tpmap) {