X-Git-Url: https://git.saurik.com/apple/xnu.git/blobdiff_plain/935ed37a5c468c8a1c07408573c08b8b7ef80e8b..b7266188b87f3620ec3f9f717e57194a7dd989fe:/osfmk/i386/pmap.c diff --git a/osfmk/i386/pmap.c b/osfmk/i386/pmap.c index b83947193..e7135803a 100644 --- a/osfmk/i386/pmap.c +++ b/osfmk/i386/pmap.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2007 Apple Inc. All rights reserved. + * Copyright (c) 2000-2009 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -89,7 +89,6 @@ */ #include -#include #include #include @@ -124,13 +123,14 @@ #include #include #include -#include #include #include #include #include #include #include +#include +#include #if MACH_KDB #include @@ -143,8 +143,8 @@ #include #include +#include -#include /* #define DEBUGINTERRUPTS 1 uncomment to ensure pmap callers have interrupts enabled */ #ifdef DEBUGINTERRUPTS @@ -160,29 +160,11 @@ #include #endif /* IWANTTODEBUG */ -//#define PMAP_TRACES 1 -#ifdef PMAP_TRACES -boolean_t pmap_trace = FALSE; -#define PMAP_TRACE(x,a,b,c,d,e) \ - if (pmap_trace) { \ - KERNEL_DEBUG_CONSTANT(x,a,b,c,d,e); \ - } -#else -#define PMAP_TRACE(x,a,b,c,d,e) KERNEL_DEBUG(x,a,b,c,d,e) -#endif /* PMAP_TRACES */ - /* * Forward declarations for internal functions. */ -void pmap_expand_pml4( - pmap_t map, - vm_map_offset_t v); - -void pmap_expand_pdpt( - pmap_t map, - vm_map_offset_t v); -void pmap_remove_range( +void pmap_remove_range( pmap_t pmap, vm_map_offset_t va, pt_entry_t *spte, @@ -203,11 +185,6 @@ void phys_attribute_set( void pmap_set_reference( ppnum_t pn); -void pmap_movepage( - unsigned long from, - unsigned long to, - vm_size_t size); - boolean_t phys_page_exists( ppnum_t pn); @@ -218,8 +195,6 @@ void dump_4GB_pdpt(pmap_t p); void dump_4GB_pdpt_thread(thread_t tp); #endif -#define iswired(pte) ((pte) & INTEL_PTE_WIRED) - int nx_enabled = 1; /* enable no-execute protection */ #ifdef CONFIG_EMBEDDED int allow_data_exec = 0; /* no exec from data, embedded is hardcore like that */ @@ -228,7 +203,8 @@ int allow_data_exec = VM_ABI_32; /* 32-bit apps may execute data by default, 64 #endif int allow_stack_exec = 0; /* No apps may execute from the stack by default */ -int cpu_64bit = 0; +boolean_t cpu_64bit = FALSE; +boolean_t pmap_trace = FALSE; /* * when spinning through pmap_remove @@ -242,143 +218,10 @@ int cpu_64bit = 0; uint64_t max_preemption_latency_tsc = 0; -/* - * Private data structures. - */ - -/* - * For each vm_page_t, there is a list of all currently - * valid virtual mappings of that page. An entry is - * a pv_rooted_entry_t; the list is the pv_table. - * - * N.B. with the new combo rooted/hashed scheme it is - * only possibly to remove individual non-rooted entries - * if they are found via the hashed chains as there is no - * way to unlink the singly linked hashed entries if navigated to - * via the queue list off the rooted entries. Think of it as - * hash/walk/pull, keeping track of the prev pointer while walking - * the singly linked hash list. All of this is to save memory and - * keep both types of pv_entries as small as possible. - */ - -/* - -PV HASHING Changes - JK 1/2007 - -Pve's establish physical to virtual mappings. These are used for aliasing of a -physical page to (potentially many) virtual addresses within pmaps. In the previous -implementation the structure of the pv_entries (each 16 bytes in size) was - -typedef struct pv_entry { - struct pv_entry_t next; - pmap_t pmap; - vm_map_offset_t va; -} *pv_entry_t; - -An initial array of these is created at boot time, one per physical page of memory, -indexed by the physical page number. Additionally, a pool of entries is created from a -pv_zone to be used as needed by pmap_enter() when it is creating new mappings. -Originally, we kept this pool around because the code in pmap_enter() was unable to -block if it needed an entry and none were available - we'd panic. Some time ago I -restructured the pmap_enter() code so that for user pmaps it can block while zalloc'ing -a pv structure and restart, removing a panic from the code (in the case of the kernel -pmap we cannot block and still panic, so, we keep a separate hot pool for use only on -kernel pmaps). The pool has not been removed since there is a large performance gain -keeping freed pv's around for reuse and not suffering the overhead of zalloc for every new pv we need. - -As pmap_enter() created new mappings it linked the new pve's for them off the fixed -pv array for that ppn (off the next pointer). These pve's are accessed for several -operations, one of them being address space teardown. In that case, we basically do this - - for (every page/pte in the space) { - calc pve_ptr from the ppn in the pte - for (every pv in the list for the ppn) { - if (this pv is for this pmap/vaddr) { - do housekeeping - unlink/free the pv - } - } - } - -The problem arose when we were running, say 8000 (or even 2000) apache or other processes -and one or all terminate. The list hanging off each pv array entry could have thousands of -entries. We were continuously linearly searching each of these lists as we stepped through -the address space we were tearing down. Because of the locks we hold, likely taking a cache -miss for each node, and interrupt disabling for MP issues the system became completely -unresponsive for many seconds while we did this. - -Realizing that pve's are accessed in two distinct ways (linearly running the list by ppn -for operations like pmap_page_protect and finding and modifying/removing a single pve as -part of pmap_enter processing) has led to modifying the pve structures and databases. - -There are now two types of pve structures. A "rooted" structure which is basically the -original structure accessed in an array by ppn, and a ''hashed'' structure accessed on a -hash list via a hash of [pmap, vaddr]. These have been designed with the two goals of -minimizing wired memory and making the lookup of a ppn faster. Since a vast majority of -pages in the system are not aliased and hence represented by a single pv entry I've kept -the rooted entry size as small as possible because there is one of these dedicated for -every physical page of memory. The hashed pve's are larger due to the addition of the hash -link and the ppn entry needed for matching while running the hash list to find the entry we -are looking for. This way, only systems that have lots of aliasing (like 2000+ httpd procs) -will pay the extra memory price. Both structures have the same first three fields allowing -some simplification in the code. - -They have these shapes - -typedef struct pv_rooted_entry { - queue_head_t qlink; - vm_map_offset_t va; - pmap_t pmap; -} *pv_rooted_entry_t; - - -typedef struct pv_hashed_entry { - queue_head_t qlink; - vm_map_offset_t va; - pmap_t pmap; - ppnum_t ppn; - struct pv_hashed_entry *nexth; -} *pv_hashed_entry_t; - -The main flow difference is that the code is now aware of the rooted entry and the hashed -entries. Code that runs the pv list still starts with the rooted entry and then continues -down the qlink onto the hashed entries. Code that is looking up a specific pv entry first -checks the rooted entry and then hashes and runs the hash list for the match. The hash list -lengths are much smaller than the original pv lists that contained all aliases for the specific ppn. - -*/ - -typedef struct pv_rooted_entry { /* first three entries must match pv_hashed_entry_t */ - queue_head_t qlink; - vm_map_offset_t va; /* virtual address for mapping */ - pmap_t pmap; /* pmap where mapping lies */ -} *pv_rooted_entry_t; - -#define PV_ROOTED_ENTRY_NULL ((pv_rooted_entry_t) 0) - -pv_rooted_entry_t pv_head_table; /* array of entries, one per page */ - -typedef struct pv_hashed_entry { /* first three entries must match pv_rooted_entry_t */ - queue_head_t qlink; - vm_map_offset_t va; - pmap_t pmap; - ppnum_t ppn; - struct pv_hashed_entry *nexth; -} *pv_hashed_entry_t; - -#define PV_HASHED_ENTRY_NULL ((pv_hashed_entry_t)0) - -#define NPVHASH 4095 /* MUST BE 2^N - 1 */ pv_hashed_entry_t *pv_hash_table; /* hash lists */ uint32_t npvhash = 0; -/* #define PV_DEBUG 1 uncomment to enable some PV debugging code */ -#ifdef PV_DEBUG -#define CHK_NPVHASH() if(0 == npvhash) panic("npvhash uninitialized"); -#else -#define CHK_NPVHASH() -#endif /* * pv_list entries are kept on a list that can only be accessed @@ -396,53 +239,6 @@ int pv_free_count = 0; int pv_hashed_free_count = 0; int pv_kern_free_count = 0; int pv_hashed_kern_free_count = 0; -#define PV_HASHED_LOW_WATER_MARK 5000 -#define PV_HASHED_KERN_LOW_WATER_MARK 100 -#define PV_HASHED_ALLOC_CHUNK 2000 -#define PV_HASHED_KERN_ALLOC_CHUNK 50 -thread_call_t mapping_adjust_call; -static thread_call_data_t mapping_adjust_call_data; -uint32_t mappingrecurse = 0; - -#define PV_HASHED_ALLOC(pvh_e) { \ - simple_lock(&pv_hashed_free_list_lock); \ - if ((pvh_e = pv_hashed_free_list) != 0) { \ - pv_hashed_free_list = (pv_hashed_entry_t)pvh_e->qlink.next; \ - pv_hashed_free_count--; \ - if (pv_hashed_free_count < PV_HASHED_LOW_WATER_MARK) \ - if (hw_compare_and_store(0,1,(u_int *)&mappingrecurse)) \ - thread_call_enter(mapping_adjust_call); \ - } \ - simple_unlock(&pv_hashed_free_list_lock); \ -} - -#define PV_HASHED_FREE_LIST(pvh_eh, pvh_et, pv_cnt) { \ - simple_lock(&pv_hashed_free_list_lock); \ - pvh_et->qlink.next = (queue_entry_t)pv_hashed_free_list; \ - pv_hashed_free_list = pvh_eh; \ - pv_hashed_free_count += pv_cnt; \ - simple_unlock(&pv_hashed_free_list_lock); \ -} - -#define PV_HASHED_KERN_ALLOC(pvh_e) { \ - simple_lock(&pv_hashed_kern_free_list_lock); \ - if ((pvh_e = pv_hashed_kern_free_list) != 0) { \ - pv_hashed_kern_free_list = (pv_hashed_entry_t)pvh_e->qlink.next; \ - pv_hashed_kern_free_count--; \ - if (pv_hashed_kern_free_count < PV_HASHED_KERN_LOW_WATER_MARK) \ - if (hw_compare_and_store(0,1,(u_int *)&mappingrecurse)) \ - thread_call_enter(mapping_adjust_call); \ - } \ - simple_unlock(&pv_hashed_kern_free_list_lock); \ -} - -#define PV_HASHED_KERN_FREE_LIST(pvh_eh, pvh_et, pv_cnt) { \ - simple_lock(&pv_hashed_kern_free_list_lock); \ - pvh_et->qlink.next = (queue_entry_t)pv_hashed_kern_free_list; \ - pv_hashed_kern_free_list = pvh_eh; \ - pv_hashed_kern_free_count += pv_cnt; \ - simple_unlock(&pv_hashed_kern_free_list_lock); \ -} zone_t pv_hashed_list_zone; /* zone of pv_hashed_entry structures */ @@ -470,23 +266,6 @@ boolean_t pmap_initialized = FALSE;/* Has pmap_init completed? */ static struct vm_object kptobj_object_store; static vm_object_t kptobj; -/* - * Index into pv_head table, its lock bits, and the modify/reference and managed bits - */ - -#define pa_index(pa) (i386_btop(pa)) -#define ppn_to_pai(ppn) ((int)ppn) - -#define pai_to_pvh(pai) (&pv_head_table[pai]) -#define lock_pvh_pai(pai) bit_lock(pai, (void *)pv_lock_table) -#define unlock_pvh_pai(pai) bit_unlock(pai, (void *)pv_lock_table) - -#define pvhashidx(pmap, va) (((uint32_t)pmap ^ ((uint32_t)((uint64_t)va >> PAGE_SHIFT) & 0xFFFFFFFF)) & npvhash) -#define pvhash(idx) (&pv_hash_table[idx]) - -#define lock_hash_hash(hash) bit_lock(hash, (void *)pv_hash_lock_table) -#define unlock_hash_hash(hash) bit_unlock(hash, (void *)pv_hash_lock_table) - /* * Array of physical page attribites for managed pages. * One byte per physical page. @@ -533,17 +312,6 @@ uint64_t pde_mapped_size; * previously. */ -/* - * pmap locking - */ - -#define PMAP_LOCK(pmap) { \ - simple_lock(&(pmap)->lock); \ -} - -#define PMAP_UNLOCK(pmap) { \ - simple_unlock(&(pmap)->lock); \ -} /* * PV locking @@ -587,15 +355,8 @@ extern int max_lock_loops; #define LOOP_CHECK(msg, pmap) #endif /* USLOCK_DEBUG */ - -static void pmap_flush_tlbs(pmap_t pmap); - -#define PMAP_UPDATE_TLBS(pmap, s, e) \ - pmap_flush_tlbs(pmap) - - -#define MAX_TBIS_SIZE 32 /* > this -> TBIA */ /* XXX */ - +unsigned pmap_memory_region_count; +unsigned pmap_memory_region_current; pmap_memory_region_t pmap_memory_regions[PMAP_MEMORY_REGIONS_SIZE]; @@ -619,6 +380,7 @@ unsigned int inuse_ptepages_count = 0; addr64_t kernel64_cr3; boolean_t no_shared_cr3 = FALSE; /* -no_shared_cr3 boot arg */ + /* * Pmap cache. Cache is threaded through ref_count field of pmap. * Max will eventually be constant -- variable for experimentation. @@ -632,49 +394,10 @@ decl_simple_lock_data(,pmap_cache_lock) extern char end; static int nkpt; -extern uint32_t lowGlo; pt_entry_t *DMAP1, *DMAP2; caddr_t DADDR1; caddr_t DADDR2; - -static inline -void pmap_pvh_unlink(pv_hashed_entry_t pv); - -/* - * unlinks the pv_hashed_entry_t pvh from the singly linked hash chain. - * properly deals with the anchor. - * must be called with the hash locked, does not unlock it - */ - -static inline -void pmap_pvh_unlink(pv_hashed_entry_t pvh) -{ - pv_hashed_entry_t curh; - pv_hashed_entry_t *pprevh; - int pvhash_idx; - - CHK_NPVHASH(); - pvhash_idx = pvhashidx(pvh->pmap, pvh->va); - - pprevh = pvhash(pvhash_idx); - -#if PV_DEBUG - if (NULL == *pprevh) panic("pvh_unlink null anchor"); /* JK DEBUG */ -#endif - curh = *pprevh; - - while (PV_HASHED_ENTRY_NULL != curh) { - if (pvh == curh) - break; - pprevh = &curh->nexth; - curh = curh->nexth; - } - if (PV_HASHED_ENTRY_NULL == curh) panic("pmap_pvh_unlink no pvh"); - *pprevh = pvh->nexth; - return; -} - /* * for legacy, returns the address of the pde entry. * for 64 bit, causes the pdpt page containing the pde entry to be mapped, @@ -832,6 +555,8 @@ pmap_pte(pmap_t pmap, vm_map_offset_t vaddr) pde = pmap_pde(pmap,vaddr); if (pde && ((*pde & INTEL_PTE_VALID))) { + if (*pde & INTEL_PTE_PS) + return pde; if (pmap == kernel_pmap) return (vtopte(vaddr)); /* compat kernel still has pte's mapped */ #if TESTING @@ -906,7 +631,7 @@ pmap_map_bd( unsigned int flags) { pt_entry_t template; - pt_entry_t *pte; + pt_entry_t *pte; spl_t spl; template = pa_to_pte(start_addr) @@ -924,6 +649,7 @@ pmap_map_bd( if (prot & VM_PROT_WRITE) template |= INTEL_PTE_WRITE; + while (start_addr < end_addr) { spl = splhigh(); pte = pmap_pte(kernel_pmap, (vm_map_offset_t)virt); @@ -935,18 +661,16 @@ pmap_map_bd( pte_increment_pa(template); virt += PAGE_SIZE; start_addr += PAGE_SIZE; - } + } + flush_tlb(); return(virt); } -extern char *first_avail; -extern vm_offset_t virtual_avail, virtual_end; -extern pmap_paddr_t avail_start, avail_end; -extern vm_offset_t etext; -extern void *sectHIBB; -extern int sectSizeHIB; +extern char *first_avail; +extern vm_offset_t virtual_avail, virtual_end; +extern pmap_paddr_t avail_start, avail_end; void pmap_cpu_init(void) @@ -1022,13 +746,13 @@ pmap_init_high_shared(void) { vm_offset_t haddr; - struct __gdt_desc_struct gdt_desc = {0,0,0}; - struct __idt_desc_struct idt_desc = {0,0,0}; spl_t s; #if MACH_KDB struct i386_tss *ttss; #endif + cpu_desc_index_t * cdi = &cpu_data_master.cpu_desc_index; + kprintf("HIGH_MEM_BASE 0x%x fixed per-cpu begin 0x%x\n", HIGH_MEM_BASE,pmap_index_to_virt(HIGH_FIXED_CPUS_BEGIN)); s = splhigh(); @@ -1041,46 +765,48 @@ pmap_init_high_shared(void) haddr = pmap_high_shared_remap(HIGH_FIXED_TRAMPS, (vm_offset_t) &hi_remap_text, 3); kprintf("tramp: 0x%x, ",haddr); - printf("hi mem tramps at 0x%x\n",haddr); /* map gdt up high and update ptr for reload */ haddr = pmap_high_shared_remap(HIGH_FIXED_GDT, (vm_offset_t) master_gdt, 1); - __asm__ __volatile__("sgdt %0": "=m" (gdt_desc): :"memory"); - gdt_desc.address = haddr; + cdi->cdi_gdt.ptr = (void *)haddr; kprintf("GDT: 0x%x, ",haddr); /* map ldt up high */ haddr = pmap_high_shared_remap(HIGH_FIXED_LDT_BEGIN, (vm_offset_t) master_ldt, HIGH_FIXED_LDT_END - HIGH_FIXED_LDT_BEGIN + 1); + cdi->cdi_ldt = (struct fake_descriptor *)haddr; kprintf("LDT: 0x%x, ",haddr); /* put new ldt addr into gdt */ - master_gdt[sel_idx(KERNEL_LDT)] = ldt_desc_pattern; - master_gdt[sel_idx(KERNEL_LDT)].offset = (vm_offset_t) haddr; - fix_desc(&master_gdt[sel_idx(KERNEL_LDT)], 1); - master_gdt[sel_idx(USER_LDT)] = ldt_desc_pattern; - master_gdt[sel_idx(USER_LDT)].offset = (vm_offset_t) haddr; - fix_desc(&master_gdt[sel_idx(USER_LDT)], 1); + struct fake_descriptor temp_fake_desc; + temp_fake_desc = ldt_desc_pattern; + temp_fake_desc.offset = (vm_offset_t) haddr; + fix_desc(&temp_fake_desc, 1); + + *(struct fake_descriptor *) &master_gdt[sel_idx(KERNEL_LDT)] = temp_fake_desc; + *(struct fake_descriptor *) &master_gdt[sel_idx(USER_LDT)] = temp_fake_desc; /* map idt up high */ haddr = pmap_high_shared_remap(HIGH_FIXED_IDT, (vm_offset_t) master_idt, 1); - __asm__ __volatile__("sidt %0" : "=m" (idt_desc)); - idt_desc.address = haddr; + cdi->cdi_idt.ptr = (void *)haddr; kprintf("IDT: 0x%x, ", haddr); /* remap ktss up high and put new high addr into gdt */ haddr = pmap_high_shared_remap(HIGH_FIXED_KTSS, (vm_offset_t) &master_ktss, 1); - master_gdt[sel_idx(KERNEL_TSS)] = tss_desc_pattern; - master_gdt[sel_idx(KERNEL_TSS)].offset = (vm_offset_t) haddr; - fix_desc(&master_gdt[sel_idx(KERNEL_TSS)], 1); + + temp_fake_desc = tss_desc_pattern; + temp_fake_desc.offset = (vm_offset_t) haddr; + fix_desc(&temp_fake_desc, 1); + *(struct fake_descriptor *) &master_gdt[sel_idx(KERNEL_TSS)] = temp_fake_desc; kprintf("KTSS: 0x%x, ",haddr); #if MACH_KDB /* remap dbtss up high and put new high addr into gdt */ haddr = pmap_high_shared_remap(HIGH_FIXED_DBTSS, (vm_offset_t) &master_dbtss, 1); - master_gdt[sel_idx(DEBUG_TSS)] = tss_desc_pattern; - master_gdt[sel_idx(DEBUG_TSS)].offset = (vm_offset_t) haddr; - fix_desc(&master_gdt[sel_idx(DEBUG_TSS)], 1); + temp_fake_desc = tss_desc_pattern; + temp_fake_desc.offset = (vm_offset_t) haddr; + fix_desc(&temp_fake_desc, 1); + *(struct fake_descriptor *)&master_gdt[sel_idx(DEBUG_TSS)] = temp_fake_desc; ttss = (struct i386_tss *)haddr; kprintf("DBTSS: 0x%x, ",haddr); #endif /* MACH_KDB */ @@ -1088,24 +814,22 @@ pmap_init_high_shared(void) /* remap dftss up high and put new high addr into gdt */ haddr = pmap_high_shared_remap(HIGH_FIXED_DFTSS, (vm_offset_t) &master_dftss, 1); - master_gdt[sel_idx(DF_TSS)] = tss_desc_pattern; - master_gdt[sel_idx(DF_TSS)].offset = (vm_offset_t) haddr; - fix_desc(&master_gdt[sel_idx(DF_TSS)], 1); + temp_fake_desc = tss_desc_pattern; + temp_fake_desc.offset = (vm_offset_t) haddr; + fix_desc(&temp_fake_desc, 1); + *(struct fake_descriptor *) &master_gdt[sel_idx(DF_TSS)] = temp_fake_desc; kprintf("DFTSS: 0x%x\n",haddr); /* remap mctss up high and put new high addr into gdt */ haddr = pmap_high_shared_remap(HIGH_FIXED_DFTSS, (vm_offset_t) &master_mctss, 1); - master_gdt[sel_idx(MC_TSS)] = tss_desc_pattern; - master_gdt[sel_idx(MC_TSS)].offset = (vm_offset_t) haddr; - fix_desc(&master_gdt[sel_idx(MC_TSS)], 1); + temp_fake_desc = tss_desc_pattern; + temp_fake_desc.offset = (vm_offset_t) haddr; + fix_desc(&temp_fake_desc, 1); + *(struct fake_descriptor *) &master_gdt[sel_idx(MC_TSS)] = temp_fake_desc; kprintf("MCTSS: 0x%x\n",haddr); - __asm__ __volatile__("lgdt %0": "=m" (gdt_desc)); - __asm__ __volatile__("lidt %0": "=m" (idt_desc)); - kprintf("gdt/idt reloaded, "); - set_tr(KERNEL_TSS); - kprintf("tr reset to KERNEL_TSS\n"); + cpu_desc_load(&cpu_data_master); } @@ -1113,19 +837,6 @@ pmap_init_high_shared(void) * Bootstrap the system enough to run with virtual memory. * Map the kernel's code and data, and allocate the system page table. * Called with mapping OFF. Page_size must already be set. - * - * Parameters: - * load_start: PA where kernel was loaded - * avail_start PA of first available physical page - - * after kernel page tables - * avail_end PA of last available physical page - * virtual_avail VA of first available page - - * after kernel page tables - * virtual_end VA of last available page - - * end of kernel address space - * - * &start_text start of kernel text - * &etext end of kernel text */ void @@ -1136,7 +847,6 @@ pmap_bootstrap( vm_offset_t va; pt_entry_t *pte; int i; - int wpkernel, boot_arg; pdpt_entry_t *pdpt; spl_t s; @@ -1160,11 +870,12 @@ pmap_bootstrap( kernel_pmap->pm_pdpt = pdpt; kernel_pmap->pm_cr3 = (pmap_paddr_t)((int)IdlePDPT); + va = (vm_offset_t)kernel_pmap->dirbase; /* setup self referential mapping(s) */ for (i = 0; i< NPGPTD; i++, pdpt++) { pmap_paddr_t pa; - pa = (pmap_paddr_t) kvtophys(va + i386_ptob(i)); + pa = (pmap_paddr_t) kvtophys((vm_offset_t)(va + i386_ptob(i))); pmap_store_pte( (pd_entry_t *) (kernel_pmap->dirbase + PTDPTDI + i), (pa & PG_FRAME) | INTEL_PTE_VALID | INTEL_PTE_RW | INTEL_PTE_REF | @@ -1186,7 +897,7 @@ pmap_bootstrap( splx(s); nkpt = NKPT; - inuse_ptepages_count += NKPT; + OSAddAtomic(NKPT, &inuse_ptepages_count); virtual_avail = (vm_offset_t)VADDR(KPTDI,0) + (vm_offset_t)first_avail; virtual_end = (vm_offset_t)(VM_MAX_KERNEL_ADDRESS); @@ -1215,7 +926,7 @@ pmap_bootstrap( virtual_avail = va; - if (PE_parse_boot_arg("npvhash", &npvhash)) { + if (PE_parse_boot_argn("npvhash", &npvhash, sizeof (npvhash))) { if (0 != ((npvhash+1) & npvhash)) { kprintf("invalid hash %d, must be ((2^N)-1), using default %d\n",npvhash,NPVHASH); npvhash = NPVHASH; @@ -1225,42 +936,6 @@ pmap_bootstrap( } printf("npvhash=%d\n",npvhash); - wpkernel = 1; - if (PE_parse_boot_arg("wpkernel", &boot_arg)) { - if (boot_arg == 0) - wpkernel = 0; - } - - s = splhigh(); - - /* Remap kernel text readonly unless the "wpkernel" boot-arg is present - * and set to 0. - */ - if (wpkernel) - { - vm_offset_t myva; - pt_entry_t *ptep; - - for (myva = i386_round_page(MP_BOOT + MP_BOOTSTACK); myva < etext; myva += PAGE_SIZE) { - if (myva >= (vm_offset_t)sectHIBB && myva < ((vm_offset_t)sectHIBB + sectSizeHIB)) - continue; - ptep = pmap_pte(kernel_pmap, (vm_map_offset_t)myva); - if (ptep) - pmap_store_pte(ptep, *ptep & ~INTEL_PTE_RW); - } - } - - /* no matter what, kernel page zero is not accessible */ - pte = pmap_pte(kernel_pmap, 0); - pmap_store_pte(pte, INTEL_PTE_INVALID); - - /* map lowmem global page into fixed addr 0x2000 */ - if (0 == (pte = pmap_pte(kernel_pmap,0x2000))) panic("lowmem pte"); - assert(0 == ((vm_offset_t) &lowGlo & PAGE_MASK)); /* make sure it is defined on page boundary */ - pmap_store_pte(pte, kvtophys((vm_offset_t)&lowGlo)|INTEL_PTE_VALID|INTEL_PTE_REF|INTEL_PTE_MOD|INTEL_PTE_WIRED|INTEL_PTE_RW); - splx(s); - flush_tlb(); - simple_lock_init(&kernel_pmap->lock, 0); simple_lock_init(&pv_hashed_free_list_lock, 0); simple_lock_init(&pv_hashed_kern_free_list_lock, 0); @@ -1271,7 +946,7 @@ pmap_bootstrap( pde_mapped_size = PDE_MAPPED_SIZE; if (cpu_64bit) { - pdpt_entry_t *ppdpt = (pdpt_entry_t *)IdlePDPT; + pdpt_entry_t *ppdpt = IdlePDPT; pdpt_entry_t *ppdpt64 = (pdpt_entry_t *)IdlePDPT64; pdpt_entry_t *ppml4 = (pdpt_entry_t *)IdlePML4; int istate = ml_set_interrupts_enabled(FALSE); @@ -1304,7 +979,7 @@ pmap_bootstrap( kernel64_cr3 = (addr64_t) kernel_pmap->pm_cr3; /* Re-initialize descriptors and prepare to switch modes */ - cpu_desc_init64(&cpu_data_master, TRUE); + cpu_desc_init64(&cpu_data_master); current_cpu_datap()->cpu_is64bit = TRUE; current_cpu_datap()->cpu_active_cr3 = kernel64_cr3; @@ -1313,8 +988,11 @@ pmap_bootstrap( ml_set_interrupts_enabled(istate); } - /* Set 64-bit mode if required. */ + /* Sets 64-bit mode if required. */ cpu_mode_init(&cpu_data_master); + /* Update in-kernel CPUID information if we're now in 64-bit mode */ + if (IA32e) + cpuid_set_info(); kernel_pmap->pm_hold = (vm_offset_t)kernel_pmap->pm_pml4; @@ -1331,12 +1009,12 @@ pmap_bootstrap( * By default for 64-bit users loaded at 4GB, share kernel mapping. * But this may be overridden by the -no_shared_cr3 boot-arg. */ - if (PE_parse_boot_arg("-no_shared_cr3", &no_shared_cr3)) { + if (PE_parse_boot_argn("-no_shared_cr3", &no_shared_cr3, sizeof (no_shared_cr3))) { kprintf("Shared kernel address space disabled\n"); } #ifdef PMAP_TRACES - if (PE_parse_boot_arg("-pmap_trace", &pmap_trace)) { + if (PE_parse_boot_argn("-pmap_trace", &pmap_trace, sizeof (pmap_trace))) { kprintf("Kernel traces for pmap operations enabled\n"); } #endif /* PMAP_TRACES */ @@ -1375,7 +1053,7 @@ pmap_init(void) * so we cover all memory */ - npages = i386_btop(avail_end); + npages = (long)i386_btop(avail_end); s = (vm_size_t) (sizeof(struct pv_rooted_entry) * npages + (sizeof (struct pv_hashed_entry_t *) * (npvhash+1)) + pv_lock_table_size(npages) @@ -1383,7 +1061,9 @@ pmap_init(void) + npages); s = round_page(s); - if (kmem_alloc_wired(kernel_map, &addr, s) != KERN_SUCCESS) + if (kernel_memory_allocate(kernel_map, &addr, s, 0, + KMA_KOBJECT | KMA_PERMANENT) + != KERN_SUCCESS) panic("pmap_init"); memset((char *)addr, 0, s); @@ -1414,10 +1094,11 @@ pmap_init(void) ppnum_t last_pn; pmap_memory_region_t *pmptr = pmap_memory_regions; - last_pn = i386_btop(avail_end); + last_pn = (ppnum_t)i386_btop(avail_end); for (i = 0; i < pmap_memory_region_count; i++, pmptr++) { if (pmptr->type == kEfiConventionalMemory) { + for (pn = pmptr->base; pn <= pmptr->end; pn++) { if (pn < last_pn) { pmap_phys_attributes[pn] |= PHYS_MANAGED; @@ -1473,18 +1154,6 @@ pmap_init(void) } -void -x86_lowmem_free(void) -{ - /* free lowmem pages back to the vm system. we had to defer doing this - until the vm system was fully up. - the actual pages that are released are determined by which - pages the memory sizing code puts into the region table */ - - ml_static_mfree((vm_offset_t) i386_ptob(pmap_memory_regions[0].base), - (vm_size_t) i386_ptob(pmap_memory_regions[0].end - pmap_memory_regions[0].base)); -} - #define managed_page(x) ( (unsigned int)x <= last_managed_page && (pmap_phys_attributes[x] & PHYS_MANAGED) ) @@ -1518,8 +1187,8 @@ pmap_verify_free( boolean_t pmap_is_empty( pmap_t pmap, - vm_map_offset_t vstart, - vm_map_offset_t vend) + vm_map_offset_t va_start, + vm_map_offset_t va_end) { vm_map_offset_t offset; ppnum_t phys_page; @@ -1527,8 +1196,20 @@ pmap_is_empty( if (pmap == PMAP_NULL) { return TRUE; } - for (offset = vstart; - offset < vend; + + /* + * Check the resident page count + * - if it's zero, the pmap is completely empty. + * This short-circuit test prevents a virtual address scan which is + * painfully slow for 64-bit spaces. + * This assumes the count is correct + * .. the debug kernel ought to be checking perhaps by page table walk. + */ + if (pmap->stats.resident_count == 0) + return TRUE; + + for (offset = va_start; + offset < va_end; offset += PAGE_SIZE_64) { phys_page = pmap_find_phys(pmap, offset); if (phys_page) { @@ -1548,7 +1229,7 @@ pmap_is_empty( } kprintf("pmap_is_empty(%p,0x%llx,0x%llx): " "page %d at 0x%llx\n", - pmap, vstart, vend, phys_page, offset); + pmap, va_start, va_end, phys_page, offset); return FALSE; } } @@ -1617,8 +1298,8 @@ pmap_create( /* legacy 32 bit setup */ /* in the legacy case the pdpt layer is hardwired to 4 entries and each * entry covers 1GB of addr space */ - if (KERN_SUCCESS != kmem_alloc_wired(kernel_map, (vm_offset_t *)(&p->dirbase), NBPTD)) - panic("pmap_create kmem_alloc_wired"); + if (KERN_SUCCESS != kmem_alloc_kobject(kernel_map, (vm_offset_t *)(&p->dirbase), NBPTD)) + panic("pmap_create kmem_alloc_kobject"); p->pm_hold = (vm_offset_t)zalloc(pdpt_zone); if ((vm_offset_t)NULL == p->pm_hold) { panic("pdpt zalloc"); @@ -1633,10 +1314,10 @@ pmap_create( va = (vm_offset_t)p->dirbase; p->pdirbase = kvtophys(va); - template = cpu_64bit ? INTEL_PTE_VALID|INTEL_PTE_RW|INTEL_PTE_USER|INTEL_PTE_REF : INTEL_PTE_VALID; + template = INTEL_PTE_VALID; for (i = 0; i< NPGPTD; i++, pdpt++ ) { pmap_paddr_t pa; - pa = (pmap_paddr_t) kvtophys(va + i386_ptob(i)); + pa = (pmap_paddr_t) kvtophys((vm_offset_t)(va + i386_ptob(i))); pmap_store_pte(pdpt, pa | template); } @@ -1649,15 +1330,13 @@ pmap_create( /* 64 bit setup */ /* alloc the pml4 page in kernel vm */ - if (KERN_SUCCESS != kmem_alloc_wired(kernel_map, (vm_offset_t *)(&p->pm_hold), PAGE_SIZE)) - panic("pmap_create kmem_alloc_wired pml4"); + if (KERN_SUCCESS != kmem_alloc_kobject(kernel_map, (vm_offset_t *)(&p->pm_hold), PAGE_SIZE)) + panic("pmap_create kmem_alloc_kobject pml4"); memset((char *)p->pm_hold, 0, PAGE_SIZE); p->pm_cr3 = (pmap_paddr_t)kvtophys((vm_offset_t)p->pm_hold); - vm_page_lock_queues(); - inuse_ptepages_count++; - vm_page_unlock_queues(); + OSAddAtomic(1, &inuse_ptepages_count); /* allocate the vm_objs to hold the pdpt, pde and pte pages */ @@ -1673,7 +1352,7 @@ pmap_create( /* uber space points to uber mapped kernel */ s = splhigh(); pml4p = pmap64_pml4(p, 0ULL); - pmap_store_pte((pml4p+KERNEL_UBER_PML4_INDEX),*kernel_pmap->pm_pml4); + pmap_store_pte((pml4p+KERNEL_UBER_PML4_INDEX), *kernel_pmap->pm_pml4); if (!is_64bit) { @@ -1852,9 +1531,7 @@ pmap_destroy( * pmap structure. */ if (!cpu_64bit) { - vm_page_lock_queues(); - inuse_ptepages_count -= p->pm_obj->resident_page_count; - vm_page_unlock_queues(); + OSAddAtomic(-p->pm_obj->resident_page_count, &inuse_ptepages_count); kmem_free(kernel_map, (vm_offset_t)p->dirbase, NBPTD); zfree(pdpt_zone, (void *)p->pm_hold); @@ -1877,9 +1554,7 @@ pmap_destroy( inuse_ptepages += p->pm_obj->resident_page_count; vm_object_deallocate(p->pm_obj); - vm_page_lock_queues(); - inuse_ptepages_count -= inuse_ptepages; - vm_page_unlock_queues(); + OSAddAtomic(-inuse_ptepages, &inuse_ptepages_count); } zfree(pmap_zone, p); @@ -1904,231 +1579,6 @@ pmap_reference( } } -/* - * Remove a range of hardware page-table entries. - * The entries given are the first (inclusive) - * and last (exclusive) entries for the VM pages. - * The virtual address is the va for the first pte. - * - * The pmap must be locked. - * If the pmap is not the kernel pmap, the range must lie - * entirely within one pte-page. This is NOT checked. - * Assumes that the pte-page exists. - */ - -void -pmap_remove_range( - pmap_t pmap, - vm_map_offset_t start_vaddr, - pt_entry_t *spte, - pt_entry_t *epte) -{ - register pt_entry_t *cpte; - pv_hashed_entry_t pvh_et = PV_HASHED_ENTRY_NULL; - pv_hashed_entry_t pvh_eh = PV_HASHED_ENTRY_NULL; - pv_hashed_entry_t pvh_e; - int pvh_cnt = 0; - int num_removed, num_unwired, num_found; - int pai; - pmap_paddr_t pa; - vm_map_offset_t vaddr; - int pvhash_idx; - uint32_t pv_cnt; - - num_removed = 0; - num_unwired = 0; - num_found = 0; - - if (pmap != kernel_pmap && - pmap->pm_task_map == TASK_MAP_32BIT && - start_vaddr >= HIGH_MEM_BASE) { - /* - * The range is in the "high_shared_pde" which is shared - * between the kernel and all 32-bit tasks. It holds - * the 32-bit commpage but also the trampolines, GDT, etc... - * so we can't let user tasks remove anything from it. - */ - return; - } - - /* invalidate the PTEs first to "freeze" them */ - for (cpte = spte, vaddr = start_vaddr; - cpte < epte; - cpte++, vaddr += PAGE_SIZE_64) { - - pa = pte_to_pa(*cpte); - if (pa == 0) - continue; - num_found++; - - if (iswired(*cpte)) - num_unwired++; - - pai = pa_index(pa); - - if (!managed_page(pai)) { - /* - * Outside range of managed physical memory. - * Just remove the mappings. - */ - pmap_store_pte(cpte, 0); - continue; - } - - /* invalidate the PTE */ - pmap_update_pte(cpte, *cpte, (*cpte & ~INTEL_PTE_VALID)); - } - - if (num_found == 0) { - /* nothing was changed: we're done */ - goto update_counts; - } - - /* propagate the invalidates to other CPUs */ - - PMAP_UPDATE_TLBS(pmap, start_vaddr, vaddr); - - for (cpte = spte, vaddr = start_vaddr; - cpte < epte; - cpte++, vaddr += PAGE_SIZE_64) { - - pa = pte_to_pa(*cpte); - if (pa == 0) - continue; - - pai = pa_index(pa); - - LOCK_PVH(pai); - - pa = pte_to_pa(*cpte); - if (pa == 0) { - UNLOCK_PVH(pai); - continue; - } - - num_removed++; - - /* - * Get the modify and reference bits, then - * nuke the entry in the page table - */ - /* remember reference and change */ - pmap_phys_attributes[pai] |= - (char)(*cpte & (PHYS_MODIFIED | PHYS_REFERENCED)); - /* completely invalidate the PTE */ - pmap_store_pte(cpte, 0); - - /* - * Remove the mapping from the pvlist for - * this physical page. - */ - { - pv_rooted_entry_t pv_h; - pv_hashed_entry_t *pprevh; - ppnum_t ppn = (ppnum_t)pai; - - pv_h = pai_to_pvh(pai); - pvh_e = PV_HASHED_ENTRY_NULL; - if (pv_h->pmap == PMAP_NULL) - panic("pmap_remove_range: null pv_list!"); - - if (pv_h->va == vaddr && pv_h->pmap == pmap) { /* rooted or not */ - /* - * Header is the pv_rooted_entry. We can't free that. If there is a queued - * entry after this one we remove that - * from the ppn queue, we remove it from the hash chain - * and copy it to the rooted entry. Then free it instead. - */ - - pvh_e = (pv_hashed_entry_t)queue_next(&pv_h->qlink); - if (pv_h != (pv_rooted_entry_t)pvh_e) { /* any queued after rooted? */ - CHK_NPVHASH(); - pvhash_idx = pvhashidx(pvh_e->pmap,pvh_e->va); - LOCK_PV_HASH(pvhash_idx); - remque(&pvh_e->qlink); - { - pprevh = pvhash(pvhash_idx); - if (PV_HASHED_ENTRY_NULL == *pprevh) { - panic("pmap_remove_range empty hash removing rooted pv"); - } - } - pmap_pvh_unlink(pvh_e); - UNLOCK_PV_HASH(pvhash_idx); - pv_h->pmap = pvh_e->pmap; - pv_h->va = pvh_e->va; /* dispose of pvh_e */ - } else { /* none queued after rooted */ - pv_h->pmap = PMAP_NULL; - pvh_e = PV_HASHED_ENTRY_NULL; - } /* any queued after rooted */ - - } else { /* rooted or not */ - /* not removing rooted pv. find it on hash chain, remove from ppn queue and - * hash chain and free it */ - CHK_NPVHASH(); - pvhash_idx = pvhashidx(pmap,vaddr); - LOCK_PV_HASH(pvhash_idx); - pprevh = pvhash(pvhash_idx); - if (PV_HASHED_ENTRY_NULL == *pprevh) { - panic("pmap_remove_range empty hash removing hashed pv"); - } - pvh_e = *pprevh; - pmap_pv_hashlist_walks++; - pv_cnt = 0; - while (PV_HASHED_ENTRY_NULL != pvh_e) { - pv_cnt++; - if (pvh_e->pmap == pmap && pvh_e->va == vaddr && pvh_e->ppn == ppn) break; - pprevh = &pvh_e->nexth; - pvh_e = pvh_e->nexth; - } - pmap_pv_hashlist_cnts += pv_cnt; - if (pmap_pv_hashlist_max < pv_cnt) pmap_pv_hashlist_max = pv_cnt; - if (PV_HASHED_ENTRY_NULL == pvh_e) panic("pmap_remove_range pv not on hash"); - *pprevh = pvh_e->nexth; - remque(&pvh_e->qlink); - UNLOCK_PV_HASH(pvhash_idx); - - } /* rooted or not */ - - UNLOCK_PVH(pai); - - if (pvh_e != PV_HASHED_ENTRY_NULL) { - pvh_e->qlink.next = (queue_entry_t)pvh_eh; - pvh_eh = pvh_e; - - if (pvh_et == PV_HASHED_ENTRY_NULL) { - pvh_et = pvh_e; - } - - pvh_cnt++; - } - - } /* removing mappings for this phy page */ - } /* for loop */ - - if (pvh_eh != PV_HASHED_ENTRY_NULL) { - PV_HASHED_FREE_LIST(pvh_eh, pvh_et, pvh_cnt); - } - -update_counts: - /* - * Update the counts - */ -#if TESTING - if (pmap->stats.resident_count < num_removed) - panic("pmap_remove_range: resident_count"); -#endif - assert(pmap->stats.resident_count >= num_removed); - OSAddAtomic(-num_removed, (SInt32 *) &pmap->stats.resident_count); - -#if TESTING - if (pmap->stats.wired_count < num_unwired) - panic("pmap_remove_range: wired_count"); -#endif - assert(pmap->stats.wired_count >= num_unwired); - OSAddAtomic(-num_unwired, (SInt32 *) &pmap->stats.wired_count); - - return; -} /* * Remove phys addr if mapped in specified map @@ -2137,297 +1587,13 @@ update_counts: void pmap_remove_some_phys( __unused pmap_t map, - __unused ppnum_t pn) -{ - -/* Implement to support working set code */ - -} - -/* - * Remove the given range of addresses - * from the specified map. - * - * It is assumed that the start and end are properly - * rounded to the hardware page size. - */ - - -void -pmap_remove( - pmap_t map, - addr64_t s64, - addr64_t e64) -{ - pt_entry_t *pde; - pt_entry_t *spte, *epte; - addr64_t l64; - addr64_t orig_s64; - uint64_t deadline; - - pmap_intr_assert(); - - if (map == PMAP_NULL || s64 == e64) - return; - - PMAP_TRACE(PMAP_CODE(PMAP__REMOVE) | DBG_FUNC_START, - (int) map, - (int) (s64>>32), (int) s64, - (int) (e64>>32), (int) e64); - - PMAP_LOCK(map); - -#if 0 - /* - * Check that address range in the kernel does not overlap the stacks. - * We initialize local static min/max variables once to avoid making - * 2 function calls for every remove. Note also that these functions - * both return 0 before kernel stacks have been initialized, and hence - * the panic is not triggered in this case. - */ - if (map == kernel_pmap) { - static vm_offset_t kernel_stack_min = 0; - static vm_offset_t kernel_stack_max = 0; - - if (kernel_stack_min == 0) { - kernel_stack_min = min_valid_stack_address(); - kernel_stack_max = max_valid_stack_address(); - } - if ((kernel_stack_min <= s64 && s64 < kernel_stack_max) || - (kernel_stack_min < e64 && e64 <= kernel_stack_max)) - panic("pmap_remove() attempted in kernel stack"); - } -#else - - /* - * The values of kernel_stack_min and kernel_stack_max are no longer - * relevant now that we allocate kernel stacks anywhere in the kernel map, - * so the old code above no longer applies. If we wanted to check that - * we weren't removing a mapping of a page in a kernel stack we'd have to - * mark the PTE with an unused bit and check that here. - */ - -#endif - - deadline = rdtsc64() + max_preemption_latency_tsc; - - orig_s64 = s64; - - while (s64 < e64) { - - l64 = (s64 + pde_mapped_size) & ~(pde_mapped_size-1); - if (l64 > e64) - l64 = e64; - pde = pmap_pde(map, s64); - - if (pde && (*pde & INTEL_PTE_VALID)) { - spte = (pt_entry_t *)pmap_pte(map, (s64 & ~(pde_mapped_size-1))); - spte = &spte[ptenum(s64)]; - epte = &spte[intel_btop(l64-s64)]; - - pmap_remove_range(map, s64, spte, epte); - } - s64 = l64; - pde++; - - if (s64 < e64 && rdtsc64() >= deadline) { - PMAP_UNLOCK(map) - PMAP_LOCK(map) - - deadline = rdtsc64() + max_preemption_latency_tsc; - } - - } - - PMAP_UNLOCK(map); - - PMAP_TRACE(PMAP_CODE(PMAP__REMOVE) | DBG_FUNC_END, - (int) map, 0, 0, 0, 0); - -} - -/* - * Routine: pmap_page_protect - * - * Function: - * Lower the permission for all mappings to a given - * page. - */ -void -pmap_page_protect( - ppnum_t pn, - vm_prot_t prot) -{ - pv_hashed_entry_t pvh_eh = PV_HASHED_ENTRY_NULL; - pv_hashed_entry_t pvh_et = PV_HASHED_ENTRY_NULL; - pv_hashed_entry_t nexth; - int pvh_cnt = 0; - pv_rooted_entry_t pv_h; - pv_rooted_entry_t pv_e; - pv_hashed_entry_t pvh_e; - pt_entry_t *pte; - int pai; - register pmap_t pmap; - boolean_t remove; - int pvhash_idx; - - pmap_intr_assert(); - assert(pn != vm_page_fictitious_addr); - if (pn == vm_page_guard_addr) - return; - - pai = ppn_to_pai(pn); - - if (!managed_page(pai)) { - /* - * Not a managed page. - */ - return; - } - - PMAP_TRACE(PMAP_CODE(PMAP__PAGE_PROTECT) | DBG_FUNC_START, - (int) pn, (int) prot, 0, 0, 0); - - /* - * Determine the new protection. - */ - switch (prot) { - case VM_PROT_READ: - case VM_PROT_READ|VM_PROT_EXECUTE: - remove = FALSE; - break; - case VM_PROT_ALL: - return; /* nothing to do */ - default: - remove = TRUE; - break; - } - - pv_h = pai_to_pvh(pai); - - LOCK_PVH(pai); - - /* - * Walk down PV list, changing or removing all mappings. - */ - if (pv_h->pmap != PMAP_NULL) { - - pv_e = pv_h; - pvh_e = (pv_hashed_entry_t)pv_e; /* cheat */ - - do { - register vm_map_offset_t vaddr; - pmap = pv_e->pmap; - - vaddr = pv_e->va; - pte = pmap_pte(pmap, vaddr); - - if (0 == pte) { - kprintf("pmap_page_protect pmap %p pn 0x%x vaddr 0x%llx\n",pmap, pn, vaddr); - panic("pmap_page_protect"); - } - - nexth = (pv_hashed_entry_t)queue_next(&pvh_e->qlink); /* if there is one */ - - /* - * Remove the mapping if new protection is NONE - * or if write-protecting a kernel mapping. - */ - if (remove || pmap == kernel_pmap) { - /* - * Remove the mapping, collecting any modify bits. - */ - pmap_update_pte(pte, *pte, (*pte & ~INTEL_PTE_VALID)); - - PMAP_UPDATE_TLBS(pmap, vaddr, vaddr + PAGE_SIZE); - - pmap_phys_attributes[pai] |= *pte & (PHYS_MODIFIED|PHYS_REFERENCED); - - pmap_store_pte(pte, 0); - -#if TESTING - if (pmap->stats.resident_count < 1) - panic("pmap_page_protect: resident_count"); -#endif - assert(pmap->stats.resident_count >= 1); - OSAddAtomic(-1, (SInt32 *) &pmap->stats.resident_count); - - /* - * Deal with the pv_rooted_entry. - */ - - if (pv_e == pv_h) { - /* - * Fix up head later. - */ - pv_h->pmap = PMAP_NULL; - } - else { - /* - * Delete this entry. - */ - CHK_NPVHASH(); - pvhash_idx = pvhashidx(pvh_e->pmap,pvh_e->va); - LOCK_PV_HASH(pvhash_idx); - remque(&pvh_e->qlink); - pmap_pvh_unlink(pvh_e); - UNLOCK_PV_HASH(pvhash_idx); - - pvh_e->qlink.next = (queue_entry_t)pvh_eh; - pvh_eh = pvh_e; - - if (pvh_et == PV_HASHED_ENTRY_NULL) - pvh_et = pvh_e; - pvh_cnt++; - } - } else { - /* - * Write-protect. - */ - pmap_update_pte(pte, *pte, (*pte & ~INTEL_PTE_WRITE)); - PMAP_UPDATE_TLBS(pmap, vaddr, vaddr + PAGE_SIZE); - } - - pvh_e = nexth; - } while ((pv_e = (pv_rooted_entry_t)nexth) != pv_h); - - /* - * If pv_head mapping was removed, fix it up. - */ - - if (pv_h->pmap == PMAP_NULL) { - pvh_e = (pv_hashed_entry_t)queue_next(&pv_h->qlink); - - if (pvh_e != (pv_hashed_entry_t)pv_h) { - CHK_NPVHASH(); - pvhash_idx = pvhashidx(pvh_e->pmap,pvh_e->va); - LOCK_PV_HASH(pvhash_idx); - remque(&pvh_e->qlink); - pmap_pvh_unlink(pvh_e); - UNLOCK_PV_HASH(pvhash_idx); - pv_h->pmap = pvh_e->pmap; - pv_h->va = pvh_e->va; - pvh_e->qlink.next = (queue_entry_t)pvh_eh; - pvh_eh = pvh_e; - - if (pvh_et == PV_HASHED_ENTRY_NULL) - pvh_et = pvh_e; - pvh_cnt++; - } - } - } - if (pvh_eh != PV_HASHED_ENTRY_NULL) { - PV_HASHED_FREE_LIST(pvh_eh, pvh_et, pvh_cnt); - } - - UNLOCK_PVH(pai); + __unused ppnum_t pn) +{ - PMAP_TRACE(PMAP_CODE(PMAP__PAGE_PROTECT) | DBG_FUNC_END, - 0, 0, 0, 0, 0); +/* Implement to support working set code */ } - /* * Routine: * pmap_disconnect @@ -2548,429 +1714,6 @@ pmap_map_block( } -/* - * Insert the given physical page (p) at - * the specified virtual address (v) in the - * target physical map with the protection requested. - * - * If specified, the page will be wired down, meaning - * that the related pte cannot be reclaimed. - * - * NB: This is the only routine which MAY NOT lazy-evaluate - * or lose information. That is, this routine must actually - * insert this page into the given map NOW. - */ -void -pmap_enter( - register pmap_t pmap, - vm_map_offset_t vaddr, - ppnum_t pn, - vm_prot_t prot, - unsigned int flags, - boolean_t wired) -{ - register pt_entry_t *pte; - register pv_rooted_entry_t pv_h; - register int pai; - pv_hashed_entry_t pvh_e; - pv_hashed_entry_t pvh_new; - pv_hashed_entry_t *hashp; - pt_entry_t template; - pmap_paddr_t old_pa; - pmap_paddr_t pa = (pmap_paddr_t)i386_ptob(pn); - boolean_t need_tlbflush = FALSE; - boolean_t set_NX; - char oattr; - int pvhash_idx; - uint32_t pv_cnt; - boolean_t old_pa_locked; - - pmap_intr_assert(); - assert(pn != vm_page_fictitious_addr); - if (pmap_debug) - printf("pmap(%qx, %x)\n", vaddr, pn); - if (pmap == PMAP_NULL) - return; - if (pn == vm_page_guard_addr) - return; - - PMAP_TRACE(PMAP_CODE(PMAP__ENTER) | DBG_FUNC_START, - (int) pmap, - (int) (vaddr>>32), (int) vaddr, - (int) pn, prot); - - if ( (prot & VM_PROT_EXECUTE) || !nx_enabled || !pmap->nx_enabled ) - set_NX = FALSE; - else - set_NX = TRUE; - - /* - * Must allocate a new pvlist entry while we're unlocked; - * zalloc may cause pageout (which will lock the pmap system). - * If we determine we need a pvlist entry, we will unlock - * and allocate one. Then we will retry, throughing away - * the allocated entry later (if we no longer need it). - */ - - pvh_new = PV_HASHED_ENTRY_NULL; -Retry: - pvh_e = PV_HASHED_ENTRY_NULL; - - PMAP_LOCK(pmap); - - /* - * Expand pmap to include this pte. Assume that - * pmap is always expanded to include enough hardware - * pages to map one VM page. - */ - - while ((pte = pmap_pte(pmap, vaddr)) == PT_ENTRY_NULL) { - /* - * Must unlock to expand the pmap. - */ - PMAP_UNLOCK(pmap); - pmap_expand(pmap, vaddr); /* going to grow pde level page(s) */ - PMAP_LOCK(pmap); - } - - old_pa = pte_to_pa(*pte); - pai = pa_index(old_pa); - old_pa_locked = FALSE; - - /* - * if we have a previous managed page, lock the pv entry now. after - * we lock it, check to see if someone beat us to the lock and if so - * drop the lock - */ - - if ((0 != old_pa) && managed_page(pai)) { - LOCK_PVH(pai); - old_pa_locked = TRUE; - old_pa = pte_to_pa(*pte); - if (0 == old_pa) { - UNLOCK_PVH(pai); /* some other path beat us to it */ - old_pa_locked = FALSE; - } - } - - - /* - * Special case if the incoming physical page is already mapped - * at this address. - */ - if (old_pa == pa) { - - /* - * May be changing its wired attribute or protection - */ - - template = pa_to_pte(pa) | INTEL_PTE_VALID; - - if(VM_MEM_NOT_CACHEABLE == (flags & (VM_MEM_NOT_CACHEABLE | VM_WIMG_USE_DEFAULT))) { - if(!(flags & VM_MEM_GUARDED)) - template |= INTEL_PTE_PTA; - template |= INTEL_PTE_NCACHE; - } - - if (pmap != kernel_pmap) - template |= INTEL_PTE_USER; - if (prot & VM_PROT_WRITE) - template |= INTEL_PTE_WRITE; - - if (set_NX == TRUE) - template |= INTEL_PTE_NX; - - if (wired) { - template |= INTEL_PTE_WIRED; - if (!iswired(*pte)) - OSAddAtomic(+1, (SInt32 *) &pmap->stats.wired_count); - } - else { - if (iswired(*pte)) { - assert(pmap->stats.wired_count >= 1); - OSAddAtomic(-1, (SInt32 *) &pmap->stats.wired_count); - } - } - - /* store modified PTE and preserve RC bits */ - pmap_update_pte(pte, *pte, template | (*pte & (INTEL_PTE_REF | INTEL_PTE_MOD))); - if (old_pa_locked) { - UNLOCK_PVH(pai); - old_pa_locked = FALSE; - } - need_tlbflush = TRUE; - goto Done; - } - - /* - * Outline of code from here: - * 1) If va was mapped, update TLBs, remove the mapping - * and remove old pvlist entry. - * 2) Add pvlist entry for new mapping - * 3) Enter new mapping. - * - * If the old physical page is not managed step 1) is skipped - * (except for updating the TLBs), and the mapping is - * overwritten at step 3). If the new physical page is not - * managed, step 2) is skipped. - */ - - if (old_pa != (pmap_paddr_t) 0) { - - /* - * Don't do anything to pages outside valid memory here. - * Instead convince the code that enters a new mapping - * to overwrite the old one. - */ - - /* invalidate the PTE */ - pmap_update_pte(pte, *pte, (*pte & ~INTEL_PTE_VALID)); - /* propagate invalidate everywhere */ - PMAP_UPDATE_TLBS(pmap, vaddr, vaddr + PAGE_SIZE); - /* remember reference and change */ - oattr = (char)(*pte & (PHYS_MODIFIED | PHYS_REFERENCED)); - /* completely invalidate the PTE */ - pmap_store_pte(pte, 0); - - if (managed_page(pai)) { - -#if TESTING - if (pmap->stats.resident_count < 1) - panic("pmap_enter: resident_count"); -#endif - assert(pmap->stats.resident_count >= 1); - OSAddAtomic(-1, (SInt32 *) &pmap->stats.resident_count); - - if (iswired(*pte)) { - -#if TESTING - if (pmap->stats.wired_count < 1) - panic("pmap_enter: wired_count"); -#endif - assert(pmap->stats.wired_count >= 1); - OSAddAtomic(-1, (SInt32 *) &pmap->stats.wired_count); - } - - pmap_phys_attributes[pai] |= oattr; - /* - * Remove the mapping from the pvlist for - * this physical page. - * We'll end up with either a rooted pv or a - * hashed pv - */ - { - - pv_h = pai_to_pvh(pai); - - if (pv_h->pmap == PMAP_NULL) { - panic("pmap_enter: null pv_list!"); - } - - if (pv_h->va == vaddr && pv_h->pmap == pmap) { - /* - * Header is the pv_rooted_entry. - * If there is a next one, copy it to the - * header and free the next one (we cannot - * free the header) - */ - pvh_e = (pv_hashed_entry_t)queue_next(&pv_h->qlink); - if (pvh_e != (pv_hashed_entry_t)pv_h) { - pvhash_idx = pvhashidx(pvh_e->pmap, pvh_e->va); - LOCK_PV_HASH(pvhash_idx); - remque(&pvh_e->qlink); - pmap_pvh_unlink(pvh_e); - UNLOCK_PV_HASH(pvhash_idx); - pv_h->pmap = pvh_e->pmap; - pv_h->va = pvh_e->va; - } - else { - pv_h->pmap = PMAP_NULL; - pvh_e = PV_HASHED_ENTRY_NULL; - } - } - else { - pv_hashed_entry_t *pprevh; - ppnum_t old_ppn; - /* wasn't the rooted pv - hash, find it, and unlink it */ - old_ppn = (ppnum_t)pa_index(old_pa); - CHK_NPVHASH(); - pvhash_idx = pvhashidx(pmap,vaddr); - LOCK_PV_HASH(pvhash_idx); - pprevh = pvhash(pvhash_idx); -#if PV_DEBUG - if (NULL==pprevh)panic("pmap enter 1"); -#endif - pvh_e = *pprevh; - pmap_pv_hashlist_walks++; - pv_cnt = 0; - while (PV_HASHED_ENTRY_NULL != pvh_e) { - pv_cnt++; - if (pvh_e->pmap == pmap && pvh_e->va == vaddr && pvh_e->ppn == old_ppn) break; - pprevh = &pvh_e->nexth; - pvh_e = pvh_e->nexth; - } - pmap_pv_hashlist_cnts += pv_cnt; - if (pmap_pv_hashlist_max < pv_cnt) pmap_pv_hashlist_max = pv_cnt; - if (PV_HASHED_ENTRY_NULL == pvh_e) panic("pmap_enter: pv not in hash list"); - if(NULL==pprevh)panic("pmap enter 2"); - *pprevh = pvh_e->nexth; - remque(&pvh_e->qlink); - UNLOCK_PV_HASH(pvhash_idx); - } - } - } - else { - - /* - * old_pa is not managed. - * Do removal part of accounting. - */ - - if (iswired(*pte)) { - assert(pmap->stats.wired_count >= 1); - OSAddAtomic(-1, (SInt32 *) &pmap->stats.wired_count); - } - } - } - - /* - * if we had a previously managed paged locked, unlock it now - */ - - if (old_pa_locked) { - UNLOCK_PVH(pai); - old_pa_locked = FALSE; - } - - pai = pa_index(pa); /* now working with new incoming phys page */ - if (managed_page(pai)) { - - /* - * Step 2) Enter the mapping in the PV list for this - * physical page. - */ - pv_h = pai_to_pvh(pai); - - LOCK_PVH(pai); - - if (pv_h->pmap == PMAP_NULL) { - /* - * No mappings yet, use rooted pv - */ - pv_h->va = vaddr; - pv_h->pmap = pmap; - queue_init(&pv_h->qlink); - } - else { - /* - * Add new pv_hashed_entry after header. - */ - if ((PV_HASHED_ENTRY_NULL == pvh_e) && pvh_new) { - pvh_e = pvh_new; - pvh_new = PV_HASHED_ENTRY_NULL; /* show we used it */ - } else if (PV_HASHED_ENTRY_NULL == pvh_e) { - PV_HASHED_ALLOC(pvh_e); - if (PV_HASHED_ENTRY_NULL == pvh_e) { - /* the pv list is empty. - * if we are on the kernel pmap we'll use one of the special private - * kernel pv_e's, else, we need to unlock everything, zalloc a pv_e, - * and restart bringing in the pv_e with us. - */ - if (kernel_pmap == pmap) { - PV_HASHED_KERN_ALLOC(pvh_e); - } else { - UNLOCK_PVH(pai); - PMAP_UNLOCK(pmap); - pvh_new = (pv_hashed_entry_t) zalloc(pv_hashed_list_zone); - goto Retry; - } - } - } - - if (PV_HASHED_ENTRY_NULL == pvh_e) panic("pvh_e exhaustion"); - pvh_e->va = vaddr; - pvh_e->pmap = pmap; - pvh_e->ppn = pn; - CHK_NPVHASH(); - pvhash_idx = pvhashidx(pmap,vaddr); - LOCK_PV_HASH(pvhash_idx); - insque(&pvh_e->qlink, &pv_h->qlink); - hashp = pvhash(pvhash_idx); -#if PV_DEBUG - if(NULL==hashp)panic("pmap_enter 4"); -#endif - pvh_e->nexth = *hashp; - *hashp = pvh_e; - UNLOCK_PV_HASH(pvhash_idx); - - /* - * Remember that we used the pvlist entry. - */ - pvh_e = PV_HASHED_ENTRY_NULL; - } - - /* - * only count the mapping - * for 'managed memory' - */ - OSAddAtomic(+1, (SInt32 *) &pmap->stats.resident_count); - if (pmap->stats.resident_count > pmap->stats.resident_max) { - pmap->stats.resident_max = pmap->stats.resident_count; - } - } - - /* - * Step 3) Enter the mapping. - * - * Build a template to speed up entering - - * only the pfn changes. - */ - template = pa_to_pte(pa) | INTEL_PTE_VALID; - - if (flags & VM_MEM_NOT_CACHEABLE) { - if(!(flags & VM_MEM_GUARDED)) - template |= INTEL_PTE_PTA; - template |= INTEL_PTE_NCACHE; - } - - if (pmap != kernel_pmap) - template |= INTEL_PTE_USER; - if (prot & VM_PROT_WRITE) - template |= INTEL_PTE_WRITE; - - if (set_NX == TRUE) - template |= INTEL_PTE_NX; - - if (wired) { - template |= INTEL_PTE_WIRED; - OSAddAtomic(+1, (SInt32 *) &pmap->stats.wired_count); - } - pmap_store_pte(pte, template); - - /* if this was a managed page we delayed unlocking the pv until here - * to prevent pmap_page_protect et al from finding it until the pte - * has been stored */ - - if (managed_page(pai)) { - UNLOCK_PVH(pai); - } - -Done: - if (need_tlbflush == TRUE) - PMAP_UPDATE_TLBS(pmap, vaddr, vaddr + PAGE_SIZE); - - if (pvh_e != PV_HASHED_ENTRY_NULL) { - PV_HASHED_FREE_LIST(pvh_e, pvh_e, 1); - } - - if (pvh_new != PV_HASHED_ENTRY_NULL) { - PV_HASHED_KERN_FREE_LIST(pvh_new, pvh_new, 1); - } - - PMAP_UNLOCK(pmap); - PMAP_TRACE(PMAP_CODE(PMAP__ENTER) | DBG_FUNC_END, 0, 0, 0, 0, 0); -} - /* * Routine: pmap_change_wiring * Function: Change the wiring attribute for a map/virtual-address @@ -2999,7 +1742,7 @@ pmap_change_wiring( /* * wiring down mapping */ - OSAddAtomic(+1, (SInt32 *) &map->stats.wired_count); + OSAddAtomic(+1, &map->stats.wired_count); pmap_update_pte(pte, *pte, (*pte | INTEL_PTE_WIRED)); } else if (!wired && iswired(*pte)) { @@ -3007,31 +1750,13 @@ pmap_change_wiring( * unwiring mapping */ assert(map->stats.wired_count >= 1); - OSAddAtomic(-1, (SInt32 *) &map->stats.wired_count); + OSAddAtomic(-1, &map->stats.wired_count); pmap_update_pte(pte, *pte, (*pte & ~INTEL_PTE_WIRED)); } PMAP_UNLOCK(map); } -ppnum_t -pmap_find_phys(pmap_t pmap, addr64_t va) -{ - pt_entry_t *ptp; - ppnum_t ppn; - - mp_disable_preemption(); - - ptp = pmap_pte(pmap, va); - if (PT_ENTRY_NULL == ptp) { - ppn = 0; - } else { - ppn = (ppnum_t) i386_btop(pte_to_pa(*ptp)); - } - mp_enable_preemption(); - - return ppn; -} /* * Routine: pmap_extract @@ -3055,7 +1780,7 @@ pmap_extract( ppn = pmap_find_phys(pmap, vaddr); if (ppn) { - paddr = ((vm_offset_t)i386_ptob(ppn)) | (vaddr & INTEL_OFFMASK); + paddr = ((vm_offset_t)i386_ptob(ppn)) | ((vm_offset_t)vaddr & INTEL_OFFMASK); } return (paddr); } @@ -3098,11 +1823,12 @@ pmap_expand_pml4( */ pmap_zero_page(pn); - vm_page_lock_queues(); + vm_page_lockspin_queues(); vm_page_wire(m); - inuse_ptepages_count++; vm_page_unlock_queues(); + OSAddAtomic(1, &inuse_ptepages_count); + /* Take the oject lock (mutex) before the PMAP_LOCK (spinlock) */ vm_object_lock(map->pm_obj_pml4); @@ -3114,11 +1840,9 @@ pmap_expand_pml4( PMAP_UNLOCK(map); vm_object_unlock(map->pm_obj_pml4); - vm_page_lock_queues(); - vm_page_free(m); - inuse_ptepages_count--; - vm_page_unlock_queues(); + VM_PAGE_FREE(m); + OSAddAtomic(-1, &inuse_ptepages_count); return; } @@ -3188,11 +1912,12 @@ pmap_expand_pdpt( */ pmap_zero_page(pn); - vm_page_lock_queues(); + vm_page_lockspin_queues(); vm_page_wire(m); - inuse_ptepages_count++; vm_page_unlock_queues(); + OSAddAtomic(1, &inuse_ptepages_count); + /* Take the oject lock (mutex) before the PMAP_LOCK (spinlock) */ vm_object_lock(map->pm_obj_pdpt); @@ -3204,11 +1929,9 @@ pmap_expand_pdpt( PMAP_UNLOCK(map); vm_object_unlock(map->pm_obj_pdpt); - vm_page_lock_queues(); - vm_page_free(m); - inuse_ptepages_count--; - vm_page_unlock_queues(); + VM_PAGE_FREE(m); + OSAddAtomic(-1, &inuse_ptepages_count); return; } @@ -3300,11 +2023,12 @@ pmap_expand( */ pmap_zero_page(pn); - vm_page_lock_queues(); + vm_page_lockspin_queues(); vm_page_wire(m); - inuse_ptepages_count++; vm_page_unlock_queues(); + OSAddAtomic(1, &inuse_ptepages_count); + /* Take the oject lock (mutex) before the PMAP_LOCK (spinlock) */ vm_object_lock(map->pm_obj); @@ -3317,11 +2041,9 @@ pmap_expand( PMAP_UNLOCK(map); vm_object_unlock(map->pm_obj); - vm_page_lock_queues(); - vm_page_free(m); - inuse_ptepages_count--; - vm_page_unlock_queues(); + VM_PAGE_FREE(m); + OSAddAtomic(-1, &inuse_ptepages_count); return; } @@ -3470,10 +2192,9 @@ pmap_collect( if (m == VM_PAGE_NULL) panic("pmap_collect: pte page not in object"); - vm_page_lock_queues(); - vm_page_free(m); - inuse_ptepages_count--; - vm_page_unlock_queues(); + VM_PAGE_FREE(m); + + OSAddAtomic(-1, &inuse_ptepages_count); vm_object_unlock(p->pm_obj); } @@ -3555,6 +2276,7 @@ phys_attribute_clear( return; } + PMAP_TRACE(PMAP_CODE(PMAP__ATTRIBUTE_CLEAR) | DBG_FUNC_START, (int) pn, bits, 0, 0, 0); @@ -3581,12 +2303,6 @@ phys_attribute_clear( vm_map_offset_t va; va = pv_e->va; - /* - * first make sure any processor actively - * using this pmap, flushes its TLB state - */ - - PMAP_UPDATE_TLBS(pmap, va, va + PAGE_SIZE); /* * Clear modify and/or reference bits. @@ -3594,7 +2310,13 @@ phys_attribute_clear( pte = pmap_pte(pmap, va); pmap_update_pte(pte, *pte, (*pte & ~bits)); - + /* Ensure all processors using this translation + * invalidate this TLB entry. The invalidation *must* follow + * the PTE update, to ensure that the TLB shadow of the + * 'D' bit (in particular) is synchronized with the + * updated PTE. + */ + PMAP_UPDATE_TLBS(pmap, va, va + PAGE_SIZE); } pv_e = (pv_hashed_entry_t)queue_next(&pv_e->qlink); @@ -3655,6 +2377,7 @@ phys_attribute_test( attributes = pmap_phys_attributes[pai] & bits; + /* * Walk down PV list, checking the mappings until we * reach the end or we've found the attributes we've asked for @@ -3683,9 +2406,8 @@ phys_attribute_test( /* * pick up modify and/or reference bits from this mapping */ - pte = pmap_pte(pmap, va); - attributes |= *pte & bits; + attributes |= (int)(*pte & bits); } @@ -4029,95 +2751,6 @@ phys_page_exists( return TRUE; } -void -mapping_free_prime(void) -{ - int i; - pv_hashed_entry_t pvh_e; - pv_hashed_entry_t pvh_eh; - pv_hashed_entry_t pvh_et; - int pv_cnt; - - pv_cnt = 0; - pvh_eh = pvh_et = PV_HASHED_ENTRY_NULL; - for (i = 0; i < (5 * PV_HASHED_ALLOC_CHUNK); i++) { - pvh_e = (pv_hashed_entry_t) zalloc(pv_hashed_list_zone); - - pvh_e->qlink.next = (queue_entry_t)pvh_eh; - pvh_eh = pvh_e; - - if (pvh_et == PV_HASHED_ENTRY_NULL) - pvh_et = pvh_e; - pv_cnt++; - } - PV_HASHED_FREE_LIST(pvh_eh, pvh_et, pv_cnt); - - pv_cnt = 0; - pvh_eh = pvh_et = PV_HASHED_ENTRY_NULL; - for (i = 0; i < PV_HASHED_KERN_ALLOC_CHUNK; i++) { - pvh_e = (pv_hashed_entry_t) zalloc(pv_hashed_list_zone); - - pvh_e->qlink.next = (queue_entry_t)pvh_eh; - pvh_eh = pvh_e; - - if (pvh_et == PV_HASHED_ENTRY_NULL) - pvh_et = pvh_e; - pv_cnt++; - } - PV_HASHED_KERN_FREE_LIST(pvh_eh, pvh_et, pv_cnt); - -} - -void -mapping_adjust(void) -{ - pv_hashed_entry_t pvh_e; - pv_hashed_entry_t pvh_eh; - pv_hashed_entry_t pvh_et; - int pv_cnt; - int i; - - if (mapping_adjust_call == NULL) { - thread_call_setup(&mapping_adjust_call_data, - (thread_call_func_t) mapping_adjust, - (thread_call_param_t) NULL); - mapping_adjust_call = &mapping_adjust_call_data; - } - - pv_cnt = 0; - pvh_eh = pvh_et = PV_HASHED_ENTRY_NULL; - if (pv_hashed_kern_free_count < PV_HASHED_KERN_LOW_WATER_MARK) { - for (i = 0; i < PV_HASHED_KERN_ALLOC_CHUNK; i++) { - pvh_e = (pv_hashed_entry_t) zalloc(pv_hashed_list_zone); - - pvh_e->qlink.next = (queue_entry_t)pvh_eh; - pvh_eh = pvh_e; - - if (pvh_et == PV_HASHED_ENTRY_NULL) - pvh_et = pvh_e; - pv_cnt++; - } - PV_HASHED_KERN_FREE_LIST(pvh_eh, pvh_et, pv_cnt); - } - - pv_cnt = 0; - pvh_eh = pvh_et = PV_HASHED_ENTRY_NULL; - if (pv_hashed_free_count < PV_HASHED_LOW_WATER_MARK) { - for (i = 0; i < PV_HASHED_ALLOC_CHUNK; i++) { - pvh_e = (pv_hashed_entry_t) zalloc(pv_hashed_list_zone); - - pvh_e->qlink.next = (queue_entry_t)pvh_eh; - pvh_eh = pvh_e; - - if (pvh_et == PV_HASHED_ENTRY_NULL) - pvh_et = pvh_e; - pv_cnt++; - } - PV_HASHED_FREE_LIST(pvh_eh, pvh_et, pv_cnt); - } - mappingrecurse = 0; -} - void pmap_commpage32_init(vm_offset_t kernel_commpage, vm_offset_t user_commpage, int cnt) { @@ -4268,176 +2901,14 @@ pmap_put_mapwindow(mapwindow_t *mp) pmap_store_pte(mp->prv_CMAP, 0); } - -/* - * The Intel platform can nest at the PDE level, so NBPDE (i.e. 2MB) at a time, - * on a NBPDE boundary. - */ -uint64_t pmap_nesting_size_min = NBPDE; -uint64_t pmap_nesting_size_max = 0 - (uint64_t)NBPDE; /* no limit, really... */ - -/* - * kern_return_t pmap_nest(grand, subord, vstart, size) - * - * grand = the pmap that we will nest subord into - * subord = the pmap that goes into the grand - * vstart = start of range in pmap to be inserted - * nstart = start of range in pmap nested pmap - * size = Size of nest area (up to 16TB) - * - * Inserts a pmap into another. This is used to implement shared segments. - * - * on x86 this is very limited right now. must be exactly 1 segment. - * - * Note that we depend upon higher level VM locks to insure that things don't change while - * we are doing this. For example, VM should not be doing any pmap enters while it is nesting - * or do 2 nests at once. - */ - - -kern_return_t pmap_nest(pmap_t grand, pmap_t subord, addr64_t vstart, addr64_t nstart, uint64_t size) { - - vm_map_offset_t vaddr, nvaddr; - pd_entry_t *pde,*npde; - unsigned int i; - uint64_t num_pde; - - // do validity tests - if (size & (pmap_nesting_size_min-1)) return KERN_INVALID_VALUE; - if(vstart & (pmap_nesting_size_min-1)) return KERN_INVALID_VALUE; - if(nstart & (pmap_nesting_size_min-1)) return KERN_INVALID_VALUE; - if((size >> 28) > 65536) return KERN_INVALID_VALUE; /* Max size we can nest is 16TB */ - if(size == 0) { - panic("pmap_nest: size is invalid - %016llX\n", size); - } - - PMAP_TRACE(PMAP_CODE(PMAP__NEST) | DBG_FUNC_START, - (int) grand, (int) subord, - (int) (vstart>>32), (int) vstart, 0); - - subord->pm_shared = TRUE; - nvaddr = (vm_map_offset_t)nstart; - num_pde = size >> PDESHIFT; - - PMAP_LOCK(subord); - for (i = 0; i < num_pde; i++) { - npde = pmap_pde(subord, nvaddr); - while (0 == npde || ((*npde & INTEL_PTE_VALID) == 0)) { - PMAP_UNLOCK(subord); - pmap_expand(subord, nvaddr); // pmap_expand handles races - PMAP_LOCK(subord); - npde = pmap_pde(subord, nvaddr); - } - nvaddr += NBPDE; - } - - PMAP_UNLOCK(subord); - - vaddr = (vm_map_offset_t)vstart; - - PMAP_LOCK(grand); - - for (i = 0;i < num_pde; i++) { - pd_entry_t tpde; - - npde = pmap_pde(subord, nstart); - if (npde == 0) - panic("pmap_nest: no npde, subord %p nstart 0x%llx", subord, nstart); - tpde = *npde; - nstart += NBPDE; - pde = pmap_pde(grand, vaddr); -/* Legacy mode does not require expansion. - * DRK: consider a debug mode test to verify that no PTEs are extant within - * this range. - */ - if ((0 == pde) && cpu_64bit) { - PMAP_UNLOCK(grand); - pmap_expand_pdpt(grand, vaddr); - PMAP_LOCK(grand); - pde = pmap_pde(grand, vaddr); - } - - if (pde == 0) - panic("pmap_nest: no pde, grand %p vaddr 0x%llx", grand, vaddr); - vaddr += NBPDE; - pmap_store_pte(pde, tpde); - } - - /* XXX FBDP: why do we need to flush here ? */ - PMAP_UPDATE_TLBS(grand, vstart, vstart + size - 1); - - PMAP_UNLOCK(grand); - - PMAP_TRACE(PMAP_CODE(PMAP__NEST) | DBG_FUNC_END, 0, 0, 0, 0, 0); - - return KERN_SUCCESS; -} - -/* - * kern_return_t pmap_unnest(grand, vaddr) - * - * grand = the pmap that we will nest subord into - * vaddr = start of range in pmap to be unnested - * - * Removes a pmap from another. This is used to implement shared segments. - * On the current PPC processors, this is limited to segment (256MB) aligned - * segment sized ranges. - */ - -kern_return_t pmap_unnest(pmap_t grand, addr64_t vaddr, uint64_t size) { - - pd_entry_t *pde; - unsigned int i; - unsigned int num_pde; - addr64_t vstart, vend; - - PMAP_TRACE(PMAP_CODE(PMAP__NEST) | DBG_FUNC_START, - (int) grand, - (int) (vaddr>>32), (int) vaddr, 0, 0); - - if ((size & (pmap_nesting_size_min-1)) || - (vaddr & (pmap_nesting_size_min-1))) { - panic("pmap_unnest(%p,0x%llx,0x%llx): unaligned...\n", - grand, vaddr, size); - } - - /* align everything to PDE boundaries */ - vstart = vaddr & ~(NBPDE-1); - vend = (vaddr + size + NBPDE - 1) & ~(NBPDE-1); - size = vend - vstart; - - PMAP_LOCK(grand); - - // invalidate all pdes for segment at vaddr in pmap grand - - num_pde = size >> PDESHIFT; - - vaddr = vstart; - for (i=0;i> 2); + deadline = mach_absolute_time() + (LockTimeOut); while (mach_absolute_time() < deadline) cpu_pause(); } - /* * Called with pmap locked, we: * - scan through per-cpu data to see which other cpus need to flush @@ -4573,8 +3043,12 @@ pmap_flush_tlbs(pmap_t pmap) */ while (cpus_to_respond != 0) { if (mach_absolute_time() > deadline) { - pmap_tlb_flush_timeout = TRUE; - pmap_cpuset_NMIPI(cpus_to_respond); + if (mp_recent_debugger_activity()) + continue; + if (!panic_active()) { + pmap_tlb_flush_timeout = TRUE; + pmap_cpuset_NMIPI(cpus_to_respond); + } panic("pmap_flush_tlbs() timeout: " "cpu(s) failing to respond to interrupts, pmap=%p cpus_to_respond=0x%lx", pmap, cpus_to_respond); @@ -4594,7 +3068,6 @@ pmap_flush_tlbs(pmap_t pmap) } } } - /* * Flush local tlb if required. * We need this flush even if the pmap being changed @@ -4604,6 +3077,10 @@ pmap_flush_tlbs(pmap_t pmap) if (flush_self) flush_tlb(); + if ((pmap == kernel_pmap) && (flush_self != TRUE)) { + panic("pmap_flush_tlbs: pmap == kernel_pmap && flush_self != TRUE; kernel CR3: 0x%llX, CPU active CR3: 0x%llX, CPU Task Map: %d", kernel_pmap->pm_cr3, current_cpu_datap()->cpu_active_cr3, current_cpu_datap()->cpu_task_map); + } + PMAP_TRACE(PMAP_CODE(PMAP__FLUSH_TLBS) | DBG_FUNC_END, (int) pmap, cpus_to_signal, flush_self, 0, 0); } @@ -4732,3 +3209,4 @@ void dump_4GB_pdpt_thread(thread_t tp) #endif +