X-Git-Url: https://git.saurik.com/apple/xnu.git/blobdiff_plain/89b3af67bb32e691275bf6fa803d1834b2284115..21362eb3e66fd2c787aee132bce100a44d71a99c:/osfmk/i386/pmap.c?ds=sidebyside diff --git a/osfmk/i386/pmap.c b/osfmk/i386/pmap.c index a54c1ef33..cd845a8d0 100644 --- a/osfmk/i386/pmap.c +++ b/osfmk/i386/pmap.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000-2006 Apple Computer, Inc. All rights reserved. + * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -122,8 +122,6 @@ #include #include #include -#include -#include #if MACH_KDB #include @@ -137,35 +135,17 @@ #include #include -#include - -#include - -#ifdef IWANTTODEBUG -#undef DEBUG -#define DEBUG 1 -#define POSTCODE_DELAY 1 -#include -#endif /* IWANTTODEBUG */ /* * Forward declarations for internal functions. */ -void pmap_expand_pml4( - pmap_t map, - vm_map_offset_t v); - -void pmap_expand_pdpt( - pmap_t map, - vm_map_offset_t v); - void pmap_expand( pmap_t map, - vm_map_offset_t v); + vm_offset_t v); -static void pmap_remove_range( +extern void pmap_remove_range( pmap_t pmap, - vm_map_offset_t va, + vm_offset_t va, pt_entry_t *spte, pt_entry_t *epte); @@ -181,6 +161,9 @@ void phys_attribute_set( ppnum_t phys, int bits); +void pmap_growkernel( + vm_offset_t addr); + void pmap_set_reference( ppnum_t pn); @@ -189,21 +172,24 @@ void pmap_movepage( unsigned long to, vm_size_t size); +pt_entry_t * pmap_mapgetpte( + vm_map_t map, + vm_offset_t v); + boolean_t phys_page_exists( ppnum_t pn); -#ifdef PMAP_DEBUG -void dump_pmap(pmap_t); -void dump_4GB_pdpt(pmap_t p); -void dump_4GB_pdpt_thread(thread_t tp); -#endif +#ifndef set_dirbase +void set_dirbase(vm_offset_t dirbase); +#endif /* set_dirbase */ #define iswired(pte) ((pte) & INTEL_PTE_WIRED) -int nx_enabled = 1; /* enable no-execute protection */ - -int cpu_64bit = 0; +#define WRITE_PTE(pte_p, pte_entry) *(pte_p) = (pte_entry); +#define WRITE_PTE_FAST(pte_p, pte_entry) *(pte_p) = (pte_entry); +#define value_64bit(value) ((value) & 0xFFFFFFFF00000000LL) +#define low32(x) ((unsigned int)((x) & 0x00000000ffffffffLL)) /* * Private data structures. @@ -218,7 +204,7 @@ int cpu_64bit = 0; typedef struct pv_entry { struct pv_entry *next; /* next pv_entry */ pmap_t pmap; /* pmap where mapping lies */ - vm_map_offset_t va; /* virtual address for mapping */ + vm_offset_t va; /* virtual address for mapping */ } *pv_entry_t; #define PV_ENTRY_NULL ((pv_entry_t) 0) @@ -245,7 +231,7 @@ int mappingrecurse = 0; pv_free_list = pv_e->next; \ pv_free_count--; \ if (pv_free_count < PV_LOW_WATER_MARK) \ - if (hw_compare_and_store(0,1,(u_int *)&mappingrecurse)) \ + if (hw_compare_and_store(0,1,&mappingrecurse)) \ thread_call_enter(mapping_adjust_call); \ } \ simple_unlock(&pv_free_list_lock); \ @@ -261,7 +247,10 @@ int mappingrecurse = 0; zone_t pv_list_zone; /* zone of pv_entry structures */ +#ifdef PAE static zone_t pdpt_zone; +#endif + /* * Each entry in the pv_head_table is locked by a bit in the @@ -281,8 +270,14 @@ pmap_paddr_t vm_first_phys = (pmap_paddr_t) 0; pmap_paddr_t vm_last_phys = (pmap_paddr_t) 0; boolean_t pmap_initialized = FALSE;/* Has pmap_init completed? */ +pmap_paddr_t kernel_vm_end = (pmap_paddr_t)0; + +#define GROW_KERNEL_FUNCTION_IMPLEMENTED 1 +#if GROW_KERNEL_FUNCTION_IMPLEMENTED /* not needed until growing kernel pmap */ static struct vm_object kptobj_object_store; static vm_object_t kptobj; +#endif + /* * Index into pv_head table, its lock bits, and the modify/reference @@ -313,7 +308,6 @@ char *pmap_phys_attributes; * page-directory entry. */ #define PDE_MAPPED_SIZE (pdetova(1)) -uint64_t pde_mapped_size; /* * Locking and TLB invalidation @@ -354,21 +348,23 @@ uint64_t pde_mapped_size; /* * We raise the interrupt level to splvm, to block interprocessor - * interrupts during pmap operations. We mark the cpu's cr3 inactive - * while interrupts are blocked. + * interrupts during pmap operations. We must take the CPU out of + * the cpus_active set while interrupts are blocked. */ -#define SPLVM(spl) { \ - spl = splhigh(); \ - CPU_CR3_MARK_INACTIVE(); \ +#define SPLVM(spl) { \ + spl = splhigh(); \ + mp_disable_preemption(); \ + i_bit_clear(cpu_number(), &cpus_active); \ + mp_enable_preemption(); \ } -#define SPLX(spl) { \ - if (current_cpu_datap()->cpu_tlb_invalid) \ - process_pmap_updates(); \ - CPU_CR3_MARK_ACTIVE(); \ - splx(spl); \ +#define SPLX(spl) { \ + mp_disable_preemption(); \ + i_bit_set(cpu_number(), &cpus_active); \ + mp_enable_preemption(); \ + splx(spl); \ } - + /* * Lock on pmap system */ @@ -415,8 +411,8 @@ extern int disableSerialOuput; #define LOOP_CHECK(msg, pmap) \ if (--loop_count == 0) { \ mp_disable_preemption(); \ - kprintf("%s: cpu %d pmap %x\n", \ - msg, cpu_number(), pmap); \ + kprintf("%s: cpu %d pmap %x, cpus_active 0x%x\n", \ + msg, cpu_number(), pmap, cpus_active); \ Debugger("deadlock detection"); \ mp_enable_preemption(); \ loop_count = max_lock_loops; \ @@ -426,15 +422,76 @@ extern int disableSerialOuput; #define LOOP_CHECK(msg, pmap) #endif /* USLOCK_DEBUG */ +#define PMAP_UPDATE_TLBS(pmap, s, e) \ +{ \ + cpu_set cpu_mask; \ + cpu_set users; \ + \ + mp_disable_preemption(); \ + cpu_mask = 1 << cpu_number(); \ + \ + /* Since the pmap is locked, other updates are locked */ \ + /* out, and any pmap_activate has finished. */ \ + \ + /* find other cpus using the pmap */ \ + users = (pmap)->cpus_using & ~cpu_mask; \ + if (users) { \ + LOOP_VAR; \ + /* signal them, and wait for them to finish */ \ + /* using the pmap */ \ + signal_cpus(users, (pmap), (s), (e)); \ + while (((pmap)->cpus_using & cpus_active & ~cpu_mask)) { \ + LOOP_CHECK("PMAP_UPDATE_TLBS", pmap); \ + cpu_pause(); \ + } \ + } \ + /* invalidate our own TLB if pmap is in use */ \ + \ + if ((pmap)->cpus_using & cpu_mask) { \ + INVALIDATE_TLB((pmap), (s), (e)); \ + } \ + \ + mp_enable_preemption(); \ +} -static void pmap_flush_tlbs(pmap_t pmap); +#define MAX_TBIS_SIZE 32 /* > this -> TBIA */ /* XXX */ -#define PMAP_UPDATE_TLBS(pmap, s, e) \ - pmap_flush_tlbs(pmap) +#define INVALIDATE_TLB(m, s, e) { \ + flush_tlb(); \ +} +/* + * Structures to keep track of pending TLB invalidations + */ +cpu_set cpus_active; +cpu_set cpus_idle; -#define MAX_TBIS_SIZE 32 /* > this -> TBIA */ /* XXX */ +#define UPDATE_LIST_SIZE 4 +struct pmap_update_item { + pmap_t pmap; /* pmap to invalidate */ + vm_offset_t start; /* start address to invalidate */ + vm_offset_t end; /* end address to invalidate */ +}; + +typedef struct pmap_update_item *pmap_update_item_t; + +/* + * List of pmap updates. If the list overflows, + * the last entry is changed to invalidate all. + */ +struct pmap_update_list { + decl_simple_lock_data(,lock) + int count; + struct pmap_update_item item[UPDATE_LIST_SIZE]; +} ; +typedef struct pmap_update_list *pmap_update_list_t; + +extern void signal_cpus( + cpu_set use_list, + pmap_t pmap, + vm_offset_t start, + vm_offset_t end); pmap_memory_region_t pmap_memory_regions[PMAP_MEMORY_REGIONS_SIZE]; @@ -442,12 +499,14 @@ pmap_memory_region_t pmap_memory_regions[PMAP_MEMORY_REGIONS_SIZE]; * Other useful macros. */ #define current_pmap() (vm_map_pmap(current_thread()->map)) +#define pmap_in_use(pmap, cpu) (((pmap)->cpus_using & (1 << (cpu))) != 0) struct pmap kernel_pmap_store; pmap_t kernel_pmap; -pd_entry_t high_shared_pde; -pd_entry_t commpage64_pde; +#ifdef PMAP_QUEUE +decl_simple_lock_data(,free_pmap_lock) +#endif struct zone *pmap_zone; /* zone of pmap structures */ @@ -455,9 +514,6 @@ int pmap_debug = 0; /* flag for debugging prints */ unsigned int inuse_ptepages_count = 0; /* debugging */ -addr64_t kernel64_cr3; -boolean_t no_shared_cr3 = FALSE; /* -no_shared_cr3 boot arg */ - /* * Pmap cache. Cache is threaded through ref_count field of pmap. * Max will eventually be constant -- variable for experimentation. @@ -468,11 +524,11 @@ pmap_t pmap_cache_list; int pmap_cache_count; decl_simple_lock_data(,pmap_cache_lock) +extern vm_offset_t hole_start, hole_end; + extern char end; static int nkpt; -extern uint32_t lowGlo; -extern void *version; pt_entry_t *DMAP1, *DMAP2; caddr_t DADDR1; @@ -483,7 +539,7 @@ caddr_t DADDR2; struct pmap_alias { vm_offset_t rpc; pmap_t pmap; - vm_map_offset_t va; + vm_offset_t va; int cookie; #define PMAP_ALIAS_COOKIE 0xdeadbeef } pmap_aliasbuf[PMAP_ALIAS_MAX]; @@ -492,183 +548,73 @@ extern vm_offset_t get_rpc(); #endif /* DEBUG_ALIAS */ -/* - * for legacy, returns the address of the pde entry. - * for 64 bit, causes the pdpt page containing the pde entry to be mapped, - * then returns the mapped address of the pde entry in that page - */ -pd_entry_t * -pmap_pde(pmap_t m, vm_map_offset_t v) -{ - pd_entry_t *pde; - if (!cpu_64bit || (m == kernel_pmap)) { - pde = (&((m)->dirbase[(vm_offset_t)(v) >> PDESHIFT])); - } else { - assert(m); - assert(ml_get_interrupts_enabled() == 0 || get_preemption_level() != 0); - pde = pmap64_pde(m, v); - } - return pde; -} - +#define pmap_pde(m, v) (&((m)->dirbase[(vm_offset_t)(v) >> PDESHIFT])) +#define pdir_pde(d, v) (d[(vm_offset_t)(v) >> PDESHIFT]) -/* - * the single pml4 page per pmap is allocated at pmap create time and exists - * for the duration of the pmap. we allocate this page in kernel vm (to save us one - * level of page table dynamic mapping. - * this returns the address of the requested pml4 entry in the top level page. - */ -static inline -pml4_entry_t * -pmap64_pml4(pmap_t pmap, vm_map_offset_t vaddr) +static __inline int +pmap_is_current(pmap_t pmap) { - return ((pml4_entry_t *)pmap->pm_hold + ((vm_offset_t)((vaddr>>PML4SHIFT)&(NPML4PG-1)))); + return (pmap == kernel_pmap || + (pmap->dirbase[PTDPTDI] & PG_FRAME) == (PTDpde[0] & PG_FRAME)); } -/* - * maps in the pml4 page, if any, containing the pdpt entry requested - * and returns the address of the pdpt entry in that mapped page - */ -pdpt_entry_t * -pmap64_pdpt(pmap_t pmap, vm_map_offset_t vaddr) -{ - pml4_entry_t newpf; - pml4_entry_t *pml4; - int i; - - assert(pmap); - assert(ml_get_interrupts_enabled() == 0 || get_preemption_level() != 0); - if ((vaddr > 0x00007FFFFFFFFFFFULL) && (vaddr < 0xFFFF800000000000ULL)) { - return(0); - } - - pml4 = pmap64_pml4(pmap, vaddr); - - if (pml4 && ((*pml4 & INTEL_PTE_VALID))) { - - newpf = *pml4 & PG_FRAME; - - - for (i=PMAP_PDPT_FIRST_WINDOW; i < PMAP_PDPT_FIRST_WINDOW+PMAP_PDPT_NWINDOWS; i++) { - if (((*(current_cpu_datap()->cpu_pmap->mapwindow[i].prv_CMAP)) & PG_FRAME) == newpf) { - return((pdpt_entry_t *)(current_cpu_datap()->cpu_pmap->mapwindow[i].prv_CADDR) + - ((vm_offset_t)((vaddr>>PDPTSHIFT)&(NPDPTPG-1)))); - } - } - - current_cpu_datap()->cpu_pmap->pdpt_window_index++; - if (current_cpu_datap()->cpu_pmap->pdpt_window_index > (PMAP_PDPT_FIRST_WINDOW+PMAP_PDPT_NWINDOWS-1)) - current_cpu_datap()->cpu_pmap->pdpt_window_index = PMAP_PDPT_FIRST_WINDOW; - pmap_store_pte( - (current_cpu_datap()->cpu_pmap->mapwindow[current_cpu_datap()->cpu_pmap->pdpt_window_index].prv_CMAP), - newpf | INTEL_PTE_RW | INTEL_PTE_VALID); - invlpg((u_int)(current_cpu_datap()->cpu_pmap->mapwindow[current_cpu_datap()->cpu_pmap->pdpt_window_index].prv_CADDR)); - return ((pdpt_entry_t *)(current_cpu_datap()->cpu_pmap->mapwindow[current_cpu_datap()->cpu_pmap->pdpt_window_index].prv_CADDR) + - ((vm_offset_t)((vaddr>>PDPTSHIFT)&(NPDPTPG-1)))); - } - - return (0); -} /* - * maps in the pdpt page, if any, containing the pde entry requested - * and returns the address of the pde entry in that mapped page + * return address of mapped pte for vaddr va in pmap pmap. */ -pd_entry_t * -pmap64_pde(pmap_t pmap, vm_map_offset_t vaddr) -{ - pdpt_entry_t newpf; - pdpt_entry_t *pdpt; - int i; - - assert(pmap); - assert(ml_get_interrupts_enabled() == 0 || get_preemption_level() != 0); - if ((vaddr > 0x00007FFFFFFFFFFFULL) && (vaddr < 0xFFFF800000000000ULL)) { - return(0); +pt_entry_t * +pmap_pte(pmap_t pmap, vm_offset_t va) +{ + pd_entry_t *pde; + pd_entry_t newpf; + + pde = pmap_pde(pmap, va); + if (*pde != 0) { + if (pmap_is_current(pmap)) + return( vtopte(va)); + newpf = *pde & PG_FRAME; + if (((*CM4) & PG_FRAME) != newpf) { + *CM4 = newpf | INTEL_PTE_RW | INTEL_PTE_VALID; + invlpg((u_int)CA4); + } + return (pt_entry_t *)CA4 + (i386_btop(va) & (NPTEPG-1)); } - - /* if (vaddr & (1ULL << 63)) panic("neg addr");*/ - pdpt = pmap64_pdpt(pmap, vaddr); - - if (pdpt && ((*pdpt & INTEL_PTE_VALID))) { - - newpf = *pdpt & PG_FRAME; - - for (i=PMAP_PDE_FIRST_WINDOW; i < PMAP_PDE_FIRST_WINDOW+PMAP_PDE_NWINDOWS; i++) { - if (((*(current_cpu_datap()->cpu_pmap->mapwindow[i].prv_CMAP)) & PG_FRAME) == newpf) { - return((pd_entry_t *)(current_cpu_datap()->cpu_pmap->mapwindow[i].prv_CADDR) + - ((vm_offset_t)((vaddr>>PDSHIFT)&(NPDPG-1)))); - } - } - - current_cpu_datap()->cpu_pmap->pde_window_index++; - if (current_cpu_datap()->cpu_pmap->pde_window_index > (PMAP_PDE_FIRST_WINDOW+PMAP_PDE_NWINDOWS-1)) - current_cpu_datap()->cpu_pmap->pde_window_index = PMAP_PDE_FIRST_WINDOW; - pmap_store_pte( - (current_cpu_datap()->cpu_pmap->mapwindow[current_cpu_datap()->cpu_pmap->pde_window_index].prv_CMAP), - newpf | INTEL_PTE_RW | INTEL_PTE_VALID); - invlpg((u_int)(current_cpu_datap()->cpu_pmap->mapwindow[current_cpu_datap()->cpu_pmap->pde_window_index].prv_CADDR)); - return ((pd_entry_t *)(current_cpu_datap()->cpu_pmap->mapwindow[current_cpu_datap()->cpu_pmap->pde_window_index].prv_CADDR) + - ((vm_offset_t)((vaddr>>PDSHIFT)&(NPDPG-1)))); - } - - return (0); + return(0); } + +#define DEBUG_PTE_PAGE 0 - - -/* - * return address of mapped pte for vaddr va in pmap pmap. - * must be called with pre-emption or interrupts disabled - * if targeted pmap is not the kernel pmap - * since we may be passing back a virtual address that is - * associated with this cpu... pre-emption or interrupts - * must remain disabled until the caller is done using - * the pointer that was passed back . - * - * maps the pde page, if any, containing the pte in and returns - * the address of the pte in that mapped page - */ -pt_entry_t * -pmap_pte(pmap_t pmap, vm_map_offset_t vaddr) +#if DEBUG_PTE_PAGE +void +ptep_check( + ptep_t ptep) { - pd_entry_t *pde; - pd_entry_t newpf; - int i; - - assert(pmap); - pde = pmap_pde(pmap,vaddr); - - if (pde && ((*pde & INTEL_PTE_VALID))) { - if (pmap == kernel_pmap) { - return (vtopte(vaddr)); /* compat kernel still has pte's mapped */ - } + register pt_entry_t *pte, *epte; + int ctu, ctw; - assert(ml_get_interrupts_enabled() == 0 || get_preemption_level() != 0); - - newpf = *pde & PG_FRAME; - - for (i=PMAP_PTE_FIRST_WINDOW; i < PMAP_PTE_FIRST_WINDOW+PMAP_PTE_NWINDOWS; i++) { - if (((*(current_cpu_datap()->cpu_pmap->mapwindow[i].prv_CMAP)) & PG_FRAME) == newpf) { - return((pt_entry_t *)(current_cpu_datap()->cpu_pmap->mapwindow[i].prv_CADDR) + - ((vm_offset_t)i386_btop(vaddr) & (NPTEPG-1))); - } + /* check the use and wired counts */ + if (ptep == PTE_PAGE_NULL) + return; + pte = pmap_pte(ptep->pmap, ptep->va); + epte = pte + INTEL_PGBYTES/sizeof(pt_entry_t); + ctu = 0; + ctw = 0; + while (pte < epte) { + if (pte->pfn != 0) { + ctu++; + if (pte->wired) + ctw++; } - - current_cpu_datap()->cpu_pmap->pte_window_index++; - if (current_cpu_datap()->cpu_pmap->pte_window_index > (PMAP_PTE_FIRST_WINDOW+PMAP_PTE_NWINDOWS-1)) - current_cpu_datap()->cpu_pmap->pte_window_index = PMAP_PTE_FIRST_WINDOW; - pmap_store_pte( - (current_cpu_datap()->cpu_pmap->mapwindow[current_cpu_datap()->cpu_pmap->pte_window_index].prv_CMAP), - newpf | INTEL_PTE_RW | INTEL_PTE_VALID); - invlpg((u_int)(current_cpu_datap()->cpu_pmap->mapwindow[current_cpu_datap()->cpu_pmap->pte_window_index].prv_CADDR)); - return ((pt_entry_t *)(current_cpu_datap()->cpu_pmap->mapwindow[current_cpu_datap()->cpu_pmap->pte_window_index].prv_CADDR) + - ((vm_offset_t)i386_btop(vaddr) & (NPTEPG-1))); + pte++; } - return(0); + if (ctu != ptep->use_count || ctw != ptep->wired_count) { + printf("use %d wired %d - actual use %d wired %d\n", + ptep->use_count, ptep->wired_count, ctu, ctw); + panic("pte count"); + } } - +#endif /* DEBUG_PTE_PAGE */ /* * Map memory at initialization. The physical addresses being @@ -679,18 +625,17 @@ pmap_pte(pmap_t pmap, vm_map_offset_t vaddr) */ vm_offset_t pmap_map( - vm_offset_t virt, - vm_map_offset_t start_addr, - vm_map_offset_t end_addr, - vm_prot_t prot, - unsigned int flags) + register vm_offset_t virt, + register vm_offset_t start_addr, + register vm_offset_t end_addr, + register vm_prot_t prot) { - int ps; + register int ps; ps = PAGE_SIZE; while (start_addr < end_addr) { - pmap_enter(kernel_pmap, (vm_map_offset_t)virt, - (ppnum_t) i386_btop(start_addr), prot, flags, FALSE); + pmap_enter(kernel_pmap, + virt, (ppnum_t) i386_btop(start_addr), prot, 0, FALSE); virt += ps; start_addr += ps; } @@ -706,36 +651,30 @@ pmap_map( */ vm_offset_t pmap_map_bd( - vm_offset_t virt, - vm_map_offset_t start_addr, - vm_map_offset_t end_addr, - vm_prot_t prot, - unsigned int flags) + register vm_offset_t virt, + register vm_offset_t start_addr, + register vm_offset_t end_addr, + vm_prot_t prot) { - pt_entry_t template; - pt_entry_t *pte; + register pt_entry_t template; + register pt_entry_t *pte; template = pa_to_pte(start_addr) + | INTEL_PTE_NCACHE | INTEL_PTE_REF | INTEL_PTE_MOD | INTEL_PTE_WIRED | INTEL_PTE_VALID; - - if(flags & (VM_MEM_NOT_CACHEABLE | VM_WIMG_USE_DEFAULT)) { - template |= INTEL_PTE_NCACHE; - if(!(flags & (VM_MEM_GUARDED | VM_WIMG_USE_DEFAULT))) - template |= INTEL_PTE_PTA; - } - if (prot & VM_PROT_WRITE) template |= INTEL_PTE_WRITE; + /* XXX move pmap_pte out of loop, once one pte mapped, all are */ while (start_addr < end_addr) { - pte = pmap_pte(kernel_pmap, (vm_map_offset_t)virt); + pte = pmap_pte(kernel_pmap, virt); if (pte == PT_ENTRY_NULL) { panic("pmap_map_bd: Invalid kernel address\n"); } - pmap_store_pte(pte, template); + WRITE_PTE_FAST(pte, template) pte_increment_pa(template); virt += PAGE_SIZE; start_addr += PAGE_SIZE; @@ -752,142 +691,6 @@ extern vm_offset_t etext; extern void *sectHIBB; extern int sectSizeHIB; - -vm_offset_t -pmap_high_shared_remap(enum high_fixed_addresses e, vm_offset_t va, int sz) -{ - vm_offset_t ve = pmap_index_to_virt(e); - pt_entry_t *ptep; - pmap_paddr_t pa; - int i; - - assert(0 == (va & PAGE_MASK)); /* expecting page aligned */ - ptep = pmap_pte(kernel_pmap, (vm_map_offset_t)ve); - - for (i=0; i< sz; i++) { - pa = (pmap_paddr_t) kvtophys(va); - pmap_store_pte(ptep, (pa & PG_FRAME) - | INTEL_PTE_VALID - | INTEL_PTE_GLOBAL - | INTEL_PTE_RW - | INTEL_PTE_REF - | INTEL_PTE_MOD); - va+= PAGE_SIZE; - ptep++; - } - return ve; -} - -vm_offset_t -pmap_cpu_high_shared_remap(int cpu, enum high_cpu_types e, vm_offset_t va, int sz) -{ - enum high_fixed_addresses a = e + HIGH_CPU_END * cpu; - return pmap_high_shared_remap(HIGH_FIXED_CPUS_BEGIN + a, va, sz); -} - -void pmap_init_high_shared(void); - -extern vm_offset_t gdtptr, idtptr; - -extern uint32_t low_intstack; - -extern struct fake_descriptor ldt_desc_pattern; -extern struct fake_descriptor tss_desc_pattern; - -extern char hi_remap_text, hi_remap_etext; -extern char t_zero_div; - -pt_entry_t *pte_unique_base; - -void -pmap_init_high_shared(void) -{ - - vm_offset_t haddr; - struct __gdt_desc_struct gdt_desc = {0,0,0}; - struct __idt_desc_struct idt_desc = {0,0,0}; -#if MACH_KDB - struct i386_tss *ttss; -#endif - - kprintf("HIGH_MEM_BASE 0x%x fixed per-cpu begin 0x%x\n", - HIGH_MEM_BASE,pmap_index_to_virt(HIGH_FIXED_CPUS_BEGIN)); - pte_unique_base = pmap_pte(kernel_pmap, (vm_map_offset_t)pmap_index_to_virt(HIGH_FIXED_CPUS_BEGIN)); - - if (i386_btop(&hi_remap_etext - &hi_remap_text + 1) > - HIGH_FIXED_TRAMPS_END - HIGH_FIXED_TRAMPS + 1) - panic("tramps too large"); - haddr = pmap_high_shared_remap(HIGH_FIXED_TRAMPS, - (vm_offset_t) &hi_remap_text, 3); - kprintf("tramp: 0x%x, ",haddr); - printf("hi mem tramps at 0x%x\n",haddr); - /* map gdt up high and update ptr for reload */ - haddr = pmap_high_shared_remap(HIGH_FIXED_GDT, - (vm_offset_t) master_gdt, 1); - __asm__ __volatile__("sgdt %0": "=m" (gdt_desc): :"memory"); - gdt_desc.address = haddr; - kprintf("GDT: 0x%x, ",haddr); - /* map ldt up high */ - haddr = pmap_high_shared_remap(HIGH_FIXED_LDT_BEGIN, - (vm_offset_t) master_ldt, - HIGH_FIXED_LDT_END - HIGH_FIXED_LDT_BEGIN + 1); - kprintf("LDT: 0x%x, ",haddr); - /* put new ldt addr into gdt */ - master_gdt[sel_idx(KERNEL_LDT)] = ldt_desc_pattern; - master_gdt[sel_idx(KERNEL_LDT)].offset = (vm_offset_t) haddr; - fix_desc(&master_gdt[sel_idx(KERNEL_LDT)], 1); - master_gdt[sel_idx(USER_LDT)] = ldt_desc_pattern; - master_gdt[sel_idx(USER_LDT)].offset = (vm_offset_t) haddr; - fix_desc(&master_gdt[sel_idx(USER_LDT)], 1); - - /* map idt up high */ - haddr = pmap_high_shared_remap(HIGH_FIXED_IDT, - (vm_offset_t) master_idt, 1); - __asm__ __volatile__("sidt %0" : "=m" (idt_desc)); - idt_desc.address = haddr; - kprintf("IDT: 0x%x, ", haddr); - /* remap ktss up high and put new high addr into gdt */ - haddr = pmap_high_shared_remap(HIGH_FIXED_KTSS, - (vm_offset_t) &master_ktss, 1); - master_gdt[sel_idx(KERNEL_TSS)] = tss_desc_pattern; - master_gdt[sel_idx(KERNEL_TSS)].offset = (vm_offset_t) haddr; - fix_desc(&master_gdt[sel_idx(KERNEL_TSS)], 1); - kprintf("KTSS: 0x%x, ",haddr); -#if MACH_KDB - /* remap dbtss up high and put new high addr into gdt */ - haddr = pmap_high_shared_remap(HIGH_FIXED_DBTSS, - (vm_offset_t) &master_dbtss, 1); - master_gdt[sel_idx(DEBUG_TSS)] = tss_desc_pattern; - master_gdt[sel_idx(DEBUG_TSS)].offset = (vm_offset_t) haddr; - fix_desc(&master_gdt[sel_idx(DEBUG_TSS)], 1); - ttss = (struct i386_tss *)haddr; - kprintf("DBTSS: 0x%x, ",haddr); -#endif /* MACH_KDB */ - - /* remap dftss up high and put new high addr into gdt */ - haddr = pmap_high_shared_remap(HIGH_FIXED_DFTSS, - (vm_offset_t) &master_dftss, 1); - master_gdt[sel_idx(DF_TSS)] = tss_desc_pattern; - master_gdt[sel_idx(DF_TSS)].offset = (vm_offset_t) haddr; - fix_desc(&master_gdt[sel_idx(DF_TSS)], 1); - kprintf("DFTSS: 0x%x\n",haddr); - - /* remap mctss up high and put new high addr into gdt */ - haddr = pmap_high_shared_remap(HIGH_FIXED_DFTSS, - (vm_offset_t) &master_mctss, 1); - master_gdt[sel_idx(MC_TSS)] = tss_desc_pattern; - master_gdt[sel_idx(MC_TSS)].offset = (vm_offset_t) haddr; - fix_desc(&master_gdt[sel_idx(MC_TSS)], 1); - kprintf("MCTSS: 0x%x\n",haddr); - - __asm__ __volatile__("lgdt %0": "=m" (gdt_desc)); - __asm__ __volatile__("lidt %0": "=m" (idt_desc)); - kprintf("gdt/idt reloaded, "); - set_tr(KERNEL_TSS); - kprintf("tr reset to KERNEL_TSS\n"); -} - - /* * Bootstrap the system enough to run with virtual memory. * Map the kernel's code and data, and allocate the system page table. @@ -909,60 +712,50 @@ pmap_init_high_shared(void) void pmap_bootstrap( - __unused vm_offset_t load_start, - boolean_t IA32e) + __unused vm_offset_t load_start) { vm_offset_t va; pt_entry_t *pte; int i; int wpkernel, boot_arg; - pdpt_entry_t *pdpt; vm_last_addr = VM_MAX_KERNEL_ADDRESS; /* Set the highest address * known to VM */ + /* * The kernel's pmap is statically allocated so we don't * have to use pmap_create, which is unlikely to work * correctly at this part of the boot sequence. */ - kernel_pmap = &kernel_pmap_store; +#ifdef PMAP_QUEUE + kernel_pmap->pmap_link.next = (queue_t)kernel_pmap; /* Set up anchor forward */ + kernel_pmap->pmap_link.prev = (queue_t)kernel_pmap; /* Set up anchor reverse */ +#endif kernel_pmap->ref_count = 1; - kernel_pmap->nx_enabled = FALSE; - kernel_pmap->pm_64bit = 0; kernel_pmap->pm_obj = (vm_object_t) NULL; kernel_pmap->dirbase = (pd_entry_t *)((unsigned int)IdlePTD | KERNBASE); - kernel_pmap->pdirbase = (pmap_paddr_t)((int)IdlePTD); - pdpt = (pd_entry_t *)((unsigned int)IdlePDPT | KERNBASE ); - kernel_pmap->pm_pdpt = pdpt; - kernel_pmap->pm_cr3 = (pmap_paddr_t)((int)IdlePDPT); + kernel_pmap->pdirbase = (pd_entry_t *)IdlePTD; +#ifdef PAE + kernel_pmap->pm_pdpt = (pd_entry_t *)((unsigned int)IdlePDPT | KERNBASE ); + kernel_pmap->pm_ppdpt = (vm_offset_t)IdlePDPT; +#endif va = (vm_offset_t)kernel_pmap->dirbase; /* setup self referential mapping(s) */ - for (i = 0; i< NPGPTD; i++, pdpt++) { + for (i = 0; i< NPGPTD; i++ ) { pmap_paddr_t pa; pa = (pmap_paddr_t) kvtophys(va + i386_ptob(i)); - pmap_store_pte( - (pd_entry_t *) (kernel_pmap->dirbase + PTDPTDI + i), + * (pd_entry_t *) (kernel_pmap->dirbase + PTDPTDI + i) = (pa & PG_FRAME) | INTEL_PTE_VALID | INTEL_PTE_RW | INTEL_PTE_REF | - INTEL_PTE_MOD | INTEL_PTE_WIRED) ; - pmap_store_pte(pdpt, pa | INTEL_PTE_VALID); + INTEL_PTE_MOD | INTEL_PTE_WIRED ; +#ifdef PAE + kernel_pmap->pm_pdpt[i] = pa | INTEL_PTE_VALID; +#endif } - cpu_64bit = IA32e; - - lo_kernel_cr3 = kernel_pmap->pm_cr3; - current_cpu_datap()->cpu_kernel_cr3 = (addr64_t) kernel_pmap->pm_cr3; - - /* save the value we stuff into created pmaps to share the gdts etc */ - high_shared_pde = *pmap_pde(kernel_pmap, HIGH_MEM_BASE); - /* make sure G bit is on for high shared pde entry */ - high_shared_pde |= INTEL_PTE_GLOBAL; - pmap_store_pte(pmap_pde(kernel_pmap, HIGH_MEM_BASE), high_shared_pde); - nkpt = NKPT; - inuse_ptepages_count += NKPT; virtual_avail = (vm_offset_t)VADDR(KPTDI,0) + (vm_offset_t)first_avail; virtual_end = (vm_offset_t)(VM_MAX_KERNEL_ADDRESS); @@ -972,18 +765,25 @@ pmap_bootstrap( * mapping of pages. */ #define SYSMAP(c, p, v, n) \ - v = (c)va; va += ((n)*INTEL_PGBYTES); p = pte; pte += (n) + v = (c)va; va += ((n)*INTEL_PGBYTES); p = pte; pte += (n); va = virtual_avail; - pte = vtopte(va); + pte = (pt_entry_t *) pmap_pte(kernel_pmap, va); + + /* + * CMAP1/CMAP2 are used for zeroing and copying pages. + * CMAP3 is used for ml_phys_read/write. + */ + SYSMAP(caddr_t, CM1, CA1, 1) + * (pt_entry_t *) CM1 = 0; + SYSMAP(caddr_t, CM2, CA2, 1) + * (pt_entry_t *) CM2 = 0; + SYSMAP(caddr_t, CM3, CA3, 1) + * (pt_entry_t *) CM3 = 0; - for (i=0; icpu_pmap->mapwindow[i].prv_CMAP), - (current_cpu_datap()->cpu_pmap->mapwindow[i].prv_CADDR), - 1); - *current_cpu_datap()->cpu_pmap->mapwindow[i].prv_CMAP = 0; - } + /* used by pmap_pte */ + SYSMAP(caddr_t, CM4, CA4, 1) + * (pt_entry_t *) CM4 = 0; /* DMAP user for debugger */ SYSMAP(caddr_t, DMAP1, DADDR1, 1); @@ -997,109 +797,45 @@ pmap_bootstrap( virtual_avail = va; wpkernel = 1; - if (PE_parse_boot_arg("wpkernel", &boot_arg)) { - if (boot_arg == 0) - wpkernel = 0; + if (PE_parse_boot_arg("debug", &boot_arg)) { + if (boot_arg & DB_PRT) wpkernel = 0; + if (boot_arg & DB_NMI) wpkernel = 0; } - /* Remap kernel text readonly unless the "wpkernel" boot-arg is present - * and set to 0. - */ + /* remap kernel text readonly if not debugging or kprintfing */ if (wpkernel) { vm_offset_t myva; pt_entry_t *ptep; - for (myva = i386_round_page(MP_BOOT + MP_BOOTSTACK); myva < etext; myva += PAGE_SIZE) { + for (myva = i386_round_page(VM_MIN_KERNEL_ADDRESS + MP_BOOT + MP_BOOTSTACK); myva < etext; myva += PAGE_SIZE) { if (myva >= (vm_offset_t)sectHIBB && myva < ((vm_offset_t)sectHIBB + sectSizeHIB)) continue; - ptep = pmap_pte(kernel_pmap, (vm_map_offset_t)myva); + ptep = pmap_pte(kernel_pmap, myva); if (ptep) - pmap_store_pte(ptep, *ptep & ~INTEL_PTE_RW); + *ptep &= ~INTEL_PTE_RW; } + flush_tlb(); } - /* no matter what, kernel page zero is not accessible */ - pte = pmap_pte(kernel_pmap, 0); - pmap_store_pte(pte, INTEL_PTE_INVALID); - - /* map lowmem global page into fixed addr 0x2000 */ - if (0 == (pte = pmap_pte(kernel_pmap,0x2000))) panic("lowmem pte"); - - pmap_store_pte(pte, kvtophys((vm_offset_t)&lowGlo)|INTEL_PTE_VALID|INTEL_PTE_REF|INTEL_PTE_MOD|INTEL_PTE_WIRED|INTEL_PTE_RW); - flush_tlb(); - simple_lock_init(&kernel_pmap->lock, 0); simple_lock_init(&pv_free_list_lock, 0); - pmap_init_high_shared(); - - pde_mapped_size = PDE_MAPPED_SIZE; - - if (cpu_64bit) { - pdpt_entry_t *ppdpt = (pdpt_entry_t *)IdlePDPT; - pdpt_entry_t *ppdpt64 = (pdpt_entry_t *)IdlePDPT64; - pdpt_entry_t *ppml4 = (pdpt_entry_t *)IdlePML4; - int istate = ml_set_interrupts_enabled(FALSE); - - /* - * Clone a new 64-bit 3rd-level page table directory, IdlePML4, - * with page bits set for the correct IA-32e operation and so that - * the legacy-mode IdlePDPT is retained for slave processor start-up. - * This is necessary due to the incompatible use of page bits between - * 64-bit and legacy modes. - */ - kernel_pmap->pm_cr3 = (pmap_paddr_t)((int)IdlePML4); /* setup in start.s for us */ - kernel_pmap->pm_pml4 = IdlePML4; - kernel_pmap->pm_pdpt = (pd_entry_t *) - ((unsigned int)IdlePDPT64 | KERNBASE ); -#define PAGE_BITS INTEL_PTE_VALID|INTEL_PTE_RW|INTEL_PTE_USER|INTEL_PTE_REF - pmap_store_pte(kernel_pmap->pm_pml4, - (uint32_t)IdlePDPT64 | PAGE_BITS); - pmap_store_pte((ppdpt64+0), *(ppdpt+0) | PAGE_BITS); - pmap_store_pte((ppdpt64+1), *(ppdpt+1) | PAGE_BITS); - pmap_store_pte((ppdpt64+2), *(ppdpt+2) | PAGE_BITS); - pmap_store_pte((ppdpt64+3), *(ppdpt+3) | PAGE_BITS); - - /* - * The kernel is also mapped in the uber-sapce at the 4GB starting - * 0xFFFFFF80:00000000. This is the highest entry in the 4th-level. - */ - pmap_store_pte((ppml4+KERNEL_UBER_PML4_INDEX), *(ppml4+0)); - - kernel64_cr3 = (addr64_t) kernel_pmap->pm_cr3; - cpu_IA32e_enable(current_cpu_datap()); - current_cpu_datap()->cpu_is64bit = TRUE; - /* welcome to a 64 bit world */ - - /* Re-initialize and load descriptors */ - cpu_desc_init64(&cpu_data_master, TRUE); - cpu_desc_load64(&cpu_data_master); - fast_syscall_init64(); - - pde_mapped_size = 512*4096 ; - - ml_set_interrupts_enabled(istate); - - } - kernel_pmap->pm_hold = (vm_offset_t)kernel_pmap->pm_pml4; + /* invalidate user virtual addresses */ + memset((char *)kernel_pmap->dirbase, + 0, + (KPTDI) * sizeof(pd_entry_t)); kprintf("Kernel virtual space from 0x%x to 0x%x.\n", VADDR(KPTDI,0), virtual_end); - printf("PAE enabled\n"); - if (cpu_64bit){ - printf("64 bit mode enabled\n");kprintf("64 bit mode enabled\n"); } - +#ifdef PAE kprintf("Available physical space from 0x%llx to 0x%llx\n", avail_start, avail_end); - - /* - * By default for 64-bit users loaded at 4GB, share kernel mapping. - * But this may be overridden by the -no_shared_cr3 boot-arg. - */ - if (PE_parse_boot_arg("-no_shared_cr3", &no_shared_cr3)) { - kprintf("Shared kernel address space disabled\n"); - } + printf("PAE enabled\n"); +#else + kprintf("Available physical space from 0x%x to 0x%x\n", + avail_start, avail_end); +#endif } void @@ -1122,8 +858,8 @@ pmap_init(void) register long npages; vm_offset_t addr; register vm_size_t s; - vm_map_offset_t vaddr; - ppnum_t ppn; + vm_offset_t vaddr; + ppnum_t ppn; /* * Allocate memory for the pv_head_table and its lock bits, @@ -1162,8 +898,11 @@ pmap_init(void) pmap_zone = zinit(s, 400*s, 4096, "pmap"); /* XXX */ s = (vm_size_t) sizeof(struct pv_entry); pv_list_zone = zinit(s, 10000*s, 4096, "pv_list"); /* XXX */ +#ifdef PAE + // s = (vm_size_t) (sizeof(pdpt_entry_t) * NPGPTD); s = 63; pdpt_zone = zinit(s, 400*s, 4096, "pdpt"); /* XXX */ +#endif /* * Only now, when all of the data structures are allocated, @@ -1176,22 +915,23 @@ pmap_init(void) vm_first_phys = 0; vm_last_phys = avail_end; +#if GROW_KERNEL_FUNCTION_IMPLEMENTED kptobj = &kptobj_object_store; _vm_object_allocate((vm_object_size_t)NKPDE, kptobj); kernel_pmap->pm_obj = kptobj; +#endif /* create pv entries for kernel pages mapped by low level startup code. these have to exist so we can pmap_remove() e.g. kext pages from the middle of our addr space */ - vaddr = (vm_map_offset_t)0; + vaddr = (vm_offset_t)VM_MIN_KERNEL_ADDRESS; for (ppn = 0; ppn < i386_btop(avail_start) ; ppn++ ) { pv_entry_t pv_e; pv_e = pai_to_pvh(ppn); pv_e->va = vaddr; vaddr += PAGE_SIZE; - kernel_pmap->stats.resident_count++; pv_e->pmap = kernel_pmap; pv_e->next = PV_ENTRY_NULL; } @@ -1204,6 +944,10 @@ pmap_init(void) pmap_cache_list = PMAP_NULL; pmap_cache_count = 0; simple_lock_init(&pmap_cache_lock, 0); +#ifdef PMAP_QUEUE + simple_lock_init(&free_pmap_lock, 0); +#endif + } void @@ -1214,7 +958,7 @@ x86_lowmem_free(void) the actual pages that are released are determined by which pages the memory sizing code puts into the region table */ - ml_static_mfree((vm_offset_t) i386_ptob(pmap_memory_regions[0].base), + ml_static_mfree((vm_offset_t) i386_ptob(pmap_memory_regions[0].base)|VM_MIN_KERNEL_ADDRESS, (vm_size_t) i386_ptob(pmap_memory_regions[0].end - pmap_memory_regions[0].base)); } @@ -1264,20 +1008,15 @@ pmap_verify_free( */ pmap_t pmap_create( - vm_map_size_t sz, - boolean_t is_64bit) + vm_size_t size) { - register pmap_t p; - int i; - vm_offset_t va; - vm_size_t size; - pdpt_entry_t *pdpt; - pml4_entry_t *pml4p; - int template; - pd_entry_t *pdp; - spl_t s; - - size = (vm_size_t) sz; + register pmap_t p; +#ifdef PMAP_QUEUE + register pmap_t pro; + spl_t s; +#endif + register int i; + register vm_offset_t va; /* * A software use-only map doesn't even need a map. @@ -1290,168 +1029,61 @@ pmap_create( p = (pmap_t) zalloc(pmap_zone); if (PMAP_NULL == p) panic("pmap_create zalloc"); - - /* init counts now since we'll be bumping some */ + if (KERN_SUCCESS != kmem_alloc_wired(kernel_map, (vm_offset_t *)(&p->dirbase), NBPTD)) + panic("pmap_create kmem_alloc_wired"); +#ifdef PAE + p->pm_hold = (vm_offset_t)zalloc(pdpt_zone); + if ((vm_offset_t)NULL == p->pm_hold) { + panic("pdpt zalloc"); + } + p->pm_pdpt = (pdpt_entry_t *) (( p->pm_hold + 31) & ~31); + p->pm_ppdpt = kvtophys((vm_offset_t)p->pm_pdpt); /* XXX */ +#endif + if (NULL == (p->pm_obj = vm_object_allocate((vm_object_size_t)(NPGPTD*NPDEPG)))) + panic("pmap_create vm_object_allocate"); + memcpy(p->dirbase, + (void *)((unsigned int)IdlePTD | KERNBASE), + NBPTD); + va = (vm_offset_t)p->dirbase; + p->pdirbase = (pd_entry_t *)(kvtophys(va)); simple_lock_init(&p->lock, 0); + + /* setup self referential mapping(s) */ + for (i = 0; i< NPGPTD; i++ ) { + pmap_paddr_t pa; + pa = (pmap_paddr_t) kvtophys(va + i386_ptob(i)); + * (pd_entry_t *) (p->dirbase + PTDPTDI + i) = + (pa & PG_FRAME) | INTEL_PTE_VALID | INTEL_PTE_RW | INTEL_PTE_REF | + INTEL_PTE_MOD | INTEL_PTE_WIRED ; +#ifdef PAE + p->pm_pdpt[i] = pa | INTEL_PTE_VALID; +#endif + } + + p->cpus_using = 0; p->stats.resident_count = 0; p->stats.wired_count = 0; p->ref_count = 1; - p->nx_enabled = 1; - p->pm_64bit = is_64bit; - p->pm_kernel_cr3 = FALSE; - p->pm_shared = FALSE; - - if (!cpu_64bit) { - /* legacy 32 bit setup */ - /* in the legacy case the pdpt layer is hardwired to 4 entries and each - * entry covers 1GB of addr space */ - if (KERN_SUCCESS != kmem_alloc_wired(kernel_map, (vm_offset_t *)(&p->dirbase), NBPTD)) - panic("pmap_create kmem_alloc_wired"); - p->pm_hold = (vm_offset_t)zalloc(pdpt_zone); - if ((vm_offset_t)NULL == p->pm_hold) { - panic("pdpt zalloc"); - } - pdpt = (pdpt_entry_t *) (( p->pm_hold + 31) & ~31); - p->pm_cr3 = (pmap_paddr_t)kvtophys((vm_offset_t)pdpt); - if (NULL == (p->pm_obj = vm_object_allocate((vm_object_size_t)(NPGPTD*NPTDPG)))) - panic("pmap_create vm_object_allocate"); - - memset((char *)p->dirbase, 0, NBPTD); - - va = (vm_offset_t)p->dirbase; - p->pdirbase = kvtophys(va); - - template = cpu_64bit ? INTEL_PTE_VALID|INTEL_PTE_RW|INTEL_PTE_USER|INTEL_PTE_REF : INTEL_PTE_VALID; - for (i = 0; i< NPGPTD; i++, pdpt++) { - pmap_paddr_t pa; - pa = (pmap_paddr_t) kvtophys(va + i386_ptob(i)); - pmap_store_pte(pdpt, pa | template); - } - - /* map the high shared pde */ - pmap_store_pte(pmap_pde(p, HIGH_MEM_BASE), high_shared_pde); - - } else { - - /* 64 bit setup */ - - /* alloc the pml4 page in kernel vm */ - if (KERN_SUCCESS != kmem_alloc_wired(kernel_map, (vm_offset_t *)(&p->pm_hold), PAGE_SIZE)) - panic("pmap_create kmem_alloc_wired pml4"); - - memset((char *)p->pm_hold, 0, PAGE_SIZE); - p->pm_cr3 = (pmap_paddr_t)kvtophys((vm_offset_t)p->pm_hold); - - inuse_ptepages_count++; - p->stats.resident_count++; - p->stats.wired_count++; - /* allocate the vm_objs to hold the pdpt, pde and pte pages */ - - if (NULL == (p->pm_obj_pml4 = vm_object_allocate((vm_object_size_t)(NPML4PGS)))) - panic("pmap_create pdpt obj"); - - if (NULL == (p->pm_obj_pdpt = vm_object_allocate((vm_object_size_t)(NPDPTPGS)))) - panic("pmap_create pdpt obj"); - - if (NULL == (p->pm_obj = vm_object_allocate((vm_object_size_t)(NPDEPGS)))) - panic("pmap_create pte obj"); +#ifdef PMAP_QUEUE + /* insert new pmap at head of queue hanging off kernel_pmap */ + SPLVM(s); + simple_lock(&free_pmap_lock); + p->pmap_link.next = (queue_t)kernel_pmap->pmap_link.next; + kernel_pmap->pmap_link.next = (queue_t)p; - /* uber space points to uber mapped kernel */ - s = splhigh(); - pml4p = pmap64_pml4(p, 0ULL); - pmap_store_pte((pml4p+KERNEL_UBER_PML4_INDEX),*kernel_pmap->pm_pml4); - if (!is_64bit) { - while ((pdp = pmap64_pde(p, (uint64_t)HIGH_MEM_BASE)) == PD_ENTRY_NULL) { - splx(s); - pmap_expand_pdpt(p, (uint64_t)HIGH_MEM_BASE); /* need room for another pde entry */ - s = splhigh(); - } - pmap_store_pte(pdp, high_shared_pde); - } + pro = (pmap_t) p->pmap_link.next; + p->pmap_link.prev = (queue_t)pro->pmap_link.prev; + pro->pmap_link.prev = (queue_t)p; - splx(s); - } + + simple_unlock(&free_pmap_lock); + SPLX(s); +#endif return(p); } -void -pmap_set_4GB_pagezero(pmap_t p) -{ - int spl; - pdpt_entry_t *user_pdptp; - pdpt_entry_t *kern_pdptp; - - assert(p->pm_64bit); - - /* Kernel-shared cr3 may be disabled by boot arg. */ - if (no_shared_cr3) - return; - - /* - * Set the bottom 4 3rd-level pte's to be the kernel's. - */ - spl = splhigh(); - while ((user_pdptp = pmap64_pdpt(p, 0x0)) == PDPT_ENTRY_NULL) { - splx(spl); - pmap_expand_pml4(p, 0x0); - spl = splhigh(); - } - kern_pdptp = kernel_pmap->pm_pdpt; - pmap_store_pte(user_pdptp+0, *(kern_pdptp+0)); - pmap_store_pte(user_pdptp+1, *(kern_pdptp+1)); - pmap_store_pte(user_pdptp+2, *(kern_pdptp+2)); - pmap_store_pte(user_pdptp+3, *(kern_pdptp+3)); - - p->pm_kernel_cr3 = TRUE; - - splx(spl); - -} - -void -pmap_load_kernel_cr3(void) -{ - uint32_t kernel_cr3; - - assert(!ml_get_interrupts_enabled()); - - /* - * Reload cr3 with the true kernel cr3. - * Note: kernel's pml4 resides below 4GB physical. - */ - kernel_cr3 = current_cpu_datap()->cpu_kernel_cr3; - set_cr3(kernel_cr3); - current_cpu_datap()->cpu_active_cr3 = kernel_cr3; - current_cpu_datap()->cpu_task_map = TASK_MAP_32BIT; - current_cpu_datap()->cpu_tlb_invalid = FALSE; - __asm__ volatile("mfence"); -} - -void -pmap_clear_4GB_pagezero(pmap_t p) -{ - int spl; - pdpt_entry_t *user_pdptp; - - if (!p->pm_kernel_cr3) - return; - - spl = splhigh(); - user_pdptp = pmap64_pdpt(p, 0x0); - pmap_store_pte(user_pdptp+0, 0); - pmap_store_pte(user_pdptp+1, 0); - pmap_store_pte(user_pdptp+2, 0); - pmap_store_pte(user_pdptp+3, 0); - - p->pm_kernel_cr3 = FALSE; - - pmap_load_kernel_cr3(); - - splx(spl); -} - /* * Retire the given physical map from service. * Should only be called if the map contains @@ -1462,29 +1094,47 @@ void pmap_destroy( register pmap_t p) { + register pt_entry_t *pdep; register int c; spl_t s; -#if 0 - register pt_entry_t *pdep; register vm_page_t m; +#ifdef PMAP_QUEUE + register pmap_t pre,pro; #endif if (p == PMAP_NULL) return; + SPLVM(s); simple_lock(&p->lock); c = --p->ref_count; if (c == 0) { + register int my_cpu; + + mp_disable_preemption(); + my_cpu = cpu_number(); + /* * If some cpu is not using the physical pmap pointer that it * is supposed to be (see set_dirbase), we might be using the * pmap that is being destroyed! Make sure we are * physically on the right pmap: */ + /* force pmap/cr3 update */ PMAP_UPDATE_TLBS(p, VM_MIN_ADDRESS, VM_MAX_KERNEL_ADDRESS); + if (PMAP_REAL(my_cpu) == p) { + PMAP_CPU_CLR(p, my_cpu); + PMAP_REAL(my_cpu) = kernel_pmap; +#ifdef PAE + set_cr3((unsigned int)kernel_pmap->pm_ppdpt); +#else + set_cr3((unsigned int)kernel_pmap->pdirbase); +#endif + } + mp_enable_preemption(); } simple_unlock(&p->lock); SPLX(s); @@ -1493,21 +1143,31 @@ pmap_destroy( return; /* still in use */ } +#ifdef PMAP_QUEUE + /* remove from pmap queue */ + SPLVM(s); + simple_lock(&free_pmap_lock); + + pre = (pmap_t)p->pmap_link.prev; + pre->pmap_link.next = (queue_t)p->pmap_link.next; + pro = (pmap_t)p->pmap_link.next; + pro->pmap_link.prev = (queue_t)p->pmap_link.prev; + + simple_unlock(&free_pmap_lock); + SPLX(s); +#endif + /* * Free the memory maps, then the * pmap structure. */ - if (!cpu_64bit) { -#if 0 pdep = (pt_entry_t *)p->dirbase; while (pdep < (pt_entry_t *)&p->dirbase[(UMAXPTDI+1)]) { - int ind; - + int ind; if (*pdep & INTEL_PTE_VALID) { - ind = pdep - (pt_entry_t *)&p->dirbase[0]; - + ind = pdep - (pt_entry_t *)&p->dirbase[0]; vm_object_lock(p->pm_obj); m = vm_page_lookup(p->pm_obj, (vm_object_offset_t)ind); if (m == VM_PAGE_NULL) { @@ -1516,49 +1176,25 @@ pmap_destroy( vm_page_lock_queues(); vm_page_free(m); inuse_ptepages_count--; - vm_object_unlock(p->pm_obj); vm_page_unlock_queues(); /* * Clear pdes, this might be headed for the cache. */ - pmap_store_pte(pdep, 0); - pdep++; + *pdep++ = 0; } else { - pmap_store_pte(pdep, 0); - pdep++; + *pdep++ = 0; } } -#else - inuse_ptepages_count -= p->pm_obj->resident_page_count; -#endif - vm_object_deallocate(p->pm_obj); - kmem_free(kernel_map, (vm_offset_t)p->dirbase, NBPTD); - zfree(pdpt_zone, (void *)p->pm_hold); - } else { - - /* 64 bit */ - - pmap_unmap_sharedpage(p); - - /* free 64 bit mode structs */ - inuse_ptepages_count--; - kmem_free(kernel_map, (vm_offset_t)p->pm_hold, PAGE_SIZE); - - inuse_ptepages_count -= p->pm_obj_pml4->resident_page_count; - vm_object_deallocate(p->pm_obj_pml4); - - inuse_ptepages_count -= p->pm_obj_pdpt->resident_page_count; - vm_object_deallocate(p->pm_obj_pdpt); - - inuse_ptepages_count -= p->pm_obj->resident_page_count; - vm_object_deallocate(p->pm_obj); - - } + vm_object_deallocate(p->pm_obj); + kmem_free(kernel_map, (vm_offset_t)p->dirbase, NBPTD); +#ifdef PAE + zfree(pdpt_zone, (void *)p->pm_hold); +#endif zfree(pmap_zone, p); } @@ -1593,10 +1229,11 @@ pmap_reference( * Assumes that the pte-page exists. */ -static void +/* static */ +void pmap_remove_range( pmap_t pmap, - vm_map_offset_t vaddr, + vm_offset_t va, pt_entry_t *spte, pt_entry_t *epte) { @@ -1605,16 +1242,21 @@ pmap_remove_range( int pai; pmap_paddr_t pa; +#if DEBUG_PTE_PAGE + if (pmap != kernel_pmap) + ptep_check(get_pte_page(spte)); +#endif /* DEBUG_PTE_PAGE */ num_removed = 0; num_unwired = 0; for (cpte = spte; cpte < epte; - cpte++, vaddr += PAGE_SIZE) { + cpte++, va += PAGE_SIZE) { pa = pte_to_pa(*cpte); if (pa == 0) continue; + num_removed++; if (iswired(*cpte)) num_unwired++; @@ -1626,10 +1268,9 @@ pmap_remove_range( */ register pt_entry_t *lpte = cpte; - pmap_store_pte(lpte, 0); + *lpte = 0; continue; } - num_removed++; pai = pa_index(pa); LOCK_PVH(pai); @@ -1641,9 +1282,9 @@ pmap_remove_range( register pt_entry_t *lpte; lpte = cpte; - pmap_phys_attributes[pai] |= + pmap_phys_attributes[pai] |= *lpte & (PHYS_MODIFIED|PHYS_REFERENCED); - pmap_store_pte(lpte, 0); + *lpte = 0; } @@ -1658,7 +1299,7 @@ pmap_remove_range( if (pv_h->pmap == PMAP_NULL) { panic("pmap_remove: null pv_list!"); } - if (pv_h->va == vaddr && pv_h->pmap == pmap) { + if (pv_h->va == va && pv_h->pmap == pmap) { /* * Header is the pv_entry. Copy the next one * to header and free the next one (we cannot @@ -1680,7 +1321,7 @@ pmap_remove_range( if ((cur = prev->next) == PV_ENTRY_NULL) { panic("pmap-remove: mapping not in pv_list!"); } - } while (cur->va != vaddr || cur->pmap != pmap); + } while (cur->va != va || cur->pmap != pmap); prev->next = cur->next; PV_FREE(cur); } @@ -1729,31 +1370,39 @@ pmap_remove( spl_t spl; register pt_entry_t *pde; register pt_entry_t *spte, *epte; - addr64_t l64; - addr64_t orig_s64; + vm_offset_t l; + vm_offset_t s, e; + vm_offset_t orig_s; - if (map == PMAP_NULL || s64 == e64) + if (map == PMAP_NULL) return; PMAP_READ_LOCK(map, spl); - orig_s64 = s64; - - while (s64 < e64) { - l64 = (s64 + pde_mapped_size) & ~(pde_mapped_size-1); - if (l64 > e64) - l64 = e64; - pde = pmap_pde(map, s64); - if (pde && (*pde & INTEL_PTE_VALID)) { - spte = (pt_entry_t *)pmap_pte(map, (s64 & ~(pde_mapped_size-1))); - spte = &spte[ptenum(s64)]; - epte = &spte[intel_btop(l64-s64)]; - pmap_remove_range(map, s64, spte, epte); + if (value_64bit(s64) || value_64bit(e64)) { + panic("pmap_remove addr overflow"); + } + + orig_s = s = (vm_offset_t)low32(s64); + e = (vm_offset_t)low32(e64); + + pde = pmap_pde(map, s); + + while (s < e) { + l = (s + PDE_MAPPED_SIZE) & ~(PDE_MAPPED_SIZE-1); + if (l > e) + l = e; + if (*pde & INTEL_PTE_VALID) { + spte = (pt_entry_t *)pmap_pte(map, (s & ~(PDE_MAPPED_SIZE-1))); + spte = &spte[ptenum(s)]; + epte = &spte[intel_btop(l-s)]; + pmap_remove_range(map, s, spte, epte); } - s64 = l64; + s = l; pde++; } - PMAP_UPDATE_TLBS(map, orig_s64, e64); + + PMAP_UPDATE_TLBS(map, orig_s, e); PMAP_READ_UNLOCK(map, spl); } @@ -1777,10 +1426,10 @@ pmap_page_protect( register pmap_t pmap; spl_t spl; boolean_t remove; - pmap_paddr_t phys; + pmap_paddr_t phys; assert(pn != vm_page_fictitious_addr); - + phys = (pmap_paddr_t)i386_ptob(pn); if (!valid_page(pn)) { /* * Not a managed page. @@ -1802,17 +1451,17 @@ pmap_page_protect( remove = TRUE; break; } - phys = (pmap_paddr_t)i386_ptob(pn); - pai = pa_index(phys); - pv_h = pai_to_pvh(pai); - /* * Lock the pmap system first, since we will be changing * several pmaps. */ + PMAP_WRITE_LOCK(spl); + pai = pa_index(phys); + pv_h = pai_to_pvh(pai); + /* * Walk down PV list, changing or removing all mappings. * We do not have to lock the pv_list because we have @@ -1820,95 +1469,93 @@ pmap_page_protect( */ if (pv_h->pmap != PMAP_NULL) { - prev = pv_e = pv_h; - - do { - register vm_map_offset_t vaddr; - - pmap = pv_e->pmap; - /* - * Lock the pmap to block pmap_extract and similar routines. - */ - simple_lock(&pmap->lock); - - vaddr = pv_e->va; - pte = pmap_pte(pmap, vaddr); - if(0 == pte) { - kprintf("pmap_page_protect pmap 0x%x pn 0x%x vaddr 0x%llx\n",pmap, pn, vaddr); - panic("pmap_page_protect"); - } - /* - * Consistency checks. - */ - /* assert(*pte & INTEL_PTE_VALID); XXX */ - /* assert(pte_to_phys(*pte) == phys); */ - + prev = pv_e = pv_h; + do { + register vm_offset_t va; + pmap = pv_e->pmap; + /* + * Lock the pmap to block pmap_extract and similar routines. + */ + simple_lock(&pmap->lock); - /* - * Remove the mapping if new protection is NONE - * or if write-protecting a kernel mapping. - */ - if (remove || pmap == kernel_pmap) { - /* - * Remove the mapping, collecting any modify bits. - */ - pmap_store_pte(pte, *pte & ~INTEL_PTE_VALID); + { - PMAP_UPDATE_TLBS(pmap, vaddr, vaddr + PAGE_SIZE); + va = pv_e->va; + pte = pmap_pte(pmap, va); - pmap_phys_attributes[pai] |= *pte & (PHYS_MODIFIED|PHYS_REFERENCED); + /* + * Consistency checks. + */ + /* assert(*pte & INTEL_PTE_VALID); XXX */ + /* assert(pte_to_phys(*pte) == phys); */ - pmap_store_pte(pte, 0); + } + /* + * Remove the mapping if new protection is NONE + * or if write-protecting a kernel mapping. + */ + if (remove || pmap == kernel_pmap) { + /* + * Remove the mapping, collecting any modify bits. + */ + { + pmap_phys_attributes[pai] |= + *pte & (PHYS_MODIFIED|PHYS_REFERENCED); + *pte++ = 0; + PMAP_UPDATE_TLBS(pmap, va, va + PAGE_SIZE); + } - //XXX breaks DEBUG build assert(pmap->stats.resident_count >= 1); - pmap->stats.resident_count--; + assert(pmap->stats.resident_count >= 1); + pmap->stats.resident_count--; - /* - * Remove the pv_entry. - */ - if (pv_e == pv_h) { - /* - * Fix up head later. - */ - pv_h->pmap = PMAP_NULL; - } - else { - /* - * Delete this entry. - */ - prev->next = pv_e->next; - PV_FREE(pv_e); - } - } else { - /* - * Write-protect. - */ - pmap_store_pte(pte, *pte & ~INTEL_PTE_WRITE); - - PMAP_UPDATE_TLBS(pmap, vaddr, vaddr + PAGE_SIZE); - /* - * Advance prev. - */ - prev = pv_e; - } + /* + * Remove the pv_entry. + */ + if (pv_e == pv_h) { + /* + * Fix up head later. + */ + pv_h->pmap = PMAP_NULL; + } + else { + /* + * Delete this entry. + */ + prev->next = pv_e->next; + PV_FREE(pv_e); + } + } + else { + /* + * Write-protect. + */ - simple_unlock(&pmap->lock); + *pte &= ~INTEL_PTE_WRITE; + pte++; + PMAP_UPDATE_TLBS(pmap, va, va + PAGE_SIZE); + /* + * Advance prev. + */ + prev = pv_e; + } - } while ((pv_e = prev->next) != PV_ENTRY_NULL); + simple_unlock(&pmap->lock); - /* - * If pv_head mapping was removed, fix it up. - */ - if (pv_h->pmap == PMAP_NULL) { - pv_e = pv_h->next; + } while ((pv_e = prev->next) != PV_ENTRY_NULL); - if (pv_e != PV_ENTRY_NULL) { - *pv_h = *pv_e; - PV_FREE(pv_e); - } + /* + * If pv_head mapping was removed, fix it up. + */ + if (pv_h->pmap == PMAP_NULL) { + pv_e = pv_h->next; + if (pv_e != PV_ENTRY_NULL) { + *pv_h = *pv_e; + PV_FREE(pv_e); } + } } + PMAP_WRITE_UNLOCK(spl); } @@ -1936,89 +1583,64 @@ unsigned int pmap_disconnect( void pmap_protect( pmap_t map, - vm_map_offset_t sva, - vm_map_offset_t eva, + vm_offset_t s, + vm_offset_t e, vm_prot_t prot) { register pt_entry_t *pde; register pt_entry_t *spte, *epte; - vm_map_offset_t lva; - vm_map_offset_t orig_sva; + vm_offset_t l; spl_t spl; - boolean_t set_NX; + vm_offset_t orig_s = s; + if (map == PMAP_NULL) return; - if (prot == VM_PROT_NONE) { - pmap_remove(map, sva, eva); + /* + * Determine the new protection. + */ + switch (prot) { + case VM_PROT_READ: + case VM_PROT_READ|VM_PROT_EXECUTE: + break; + case VM_PROT_READ|VM_PROT_WRITE: + case VM_PROT_ALL: + return; /* nothing to do */ + default: + pmap_remove(map, (addr64_t)s, (addr64_t)e); return; } - if ( (prot & VM_PROT_EXECUTE) || !nx_enabled || !map->nx_enabled ) - set_NX = FALSE; - else - set_NX = TRUE; - SPLVM(spl); simple_lock(&map->lock); - orig_sva = sva; - while (sva < eva) { - lva = (sva + pde_mapped_size) & ~(pde_mapped_size-1); - if (lva > eva) - lva = eva; - pde = pmap_pde(map, sva); - if (pde && (*pde & INTEL_PTE_VALID)) { - spte = (pt_entry_t *)pmap_pte(map, (sva & ~(pde_mapped_size-1))); - spte = &spte[ptenum(sva)]; - epte = &spte[intel_btop(lva-sva)]; + pde = pmap_pde(map, s); + while (s < e) { + l = (s + PDE_MAPPED_SIZE) & ~(PDE_MAPPED_SIZE-1); + if (l > e) + l = e; + if (*pde & INTEL_PTE_VALID) { + spte = (pt_entry_t *)pmap_pte(map, (s & ~(PDE_MAPPED_SIZE-1))); + spte = &spte[ptenum(s)]; + epte = &spte[intel_btop(l-s)]; while (spte < epte) { - if (*spte & INTEL_PTE_VALID) { - - if (prot & VM_PROT_WRITE) - pmap_store_pte(spte, *spte | INTEL_PTE_WRITE); - else - pmap_store_pte(spte, *spte & ~INTEL_PTE_WRITE); - - if (set_NX == TRUE) - pmap_store_pte(spte, *spte | INTEL_PTE_NX); - else - pmap_store_pte(spte, *spte & ~INTEL_PTE_NX); - - } + if (*spte & INTEL_PTE_VALID) + *spte &= ~INTEL_PTE_WRITE; spte++; } } - sva = lva; + s = l; pde++; } - PMAP_UPDATE_TLBS(map, orig_sva, eva); + + PMAP_UPDATE_TLBS(map, orig_s, e); simple_unlock(&map->lock); SPLX(spl); } -/* Map a (possibly) autogenned block */ -void -pmap_map_block( - pmap_t pmap, - addr64_t va, - ppnum_t pa, - uint32_t size, - vm_prot_t prot, - int attr, - __unused unsigned int flags) -{ - uint32_t page; - - for (page = 0; page < size; page++) { - pmap_enter(pmap, va, pa, prot, attr, TRUE); - va += PAGE_SIZE; - pa++; - } -} /* @@ -2036,7 +1658,7 @@ pmap_map_block( void pmap_enter( register pmap_t pmap, - vm_map_offset_t vaddr, + vm_offset_t v, ppnum_t pn, vm_prot_t prot, unsigned int flags, @@ -2049,26 +1671,19 @@ pmap_enter( pt_entry_t template; spl_t spl; pmap_paddr_t old_pa; - pmap_paddr_t pa = (pmap_paddr_t)i386_ptob(pn); - boolean_t need_tlbflush = FALSE; - boolean_t set_NX; + pmap_paddr_t pa = (pmap_paddr_t)i386_ptob(pn); - XPR(0x80000000, "%x/%x: pmap_enter %x/%qx/%x\n", + XPR(0x80000000, "%x/%x: pmap_enter %x/%x/%x\n", current_thread(), current_thread(), - pmap, vaddr, pn); + pmap, v, pn); assert(pn != vm_page_fictitious_addr); if (pmap_debug) - printf("pmap(%qx, %x)\n", vaddr, pn); + printf("pmap(%x, %x)\n", v, pn); if (pmap == PMAP_NULL) return; - if ( (prot & VM_PROT_EXECUTE) || !nx_enabled || !pmap->nx_enabled ) - set_NX = FALSE; - else - set_NX = TRUE; - /* * Must allocate a new pvlist entry while we're unlocked; * zalloc may cause pageout (which will lock the pmap system). @@ -2086,13 +1701,13 @@ pmap_enter( * pages to map one VM page. */ - while ((pte = pmap_pte(pmap, vaddr)) == PT_ENTRY_NULL) { + while ((pte = pmap_pte(pmap, v)) == PT_ENTRY_NULL) { /* * Must unlock to expand the pmap. */ PMAP_READ_UNLOCK(pmap, spl); - pmap_expand(pmap, vaddr); /* going to grow pde level page(s) */ + pmap_expand(pmap, v); PMAP_READ_LOCK(pmap, spl); } @@ -2108,7 +1723,7 @@ pmap_enter( template = pa_to_pte(pa) | INTEL_PTE_VALID; - if(VM_MEM_NOT_CACHEABLE == (flags & (VM_MEM_NOT_CACHEABLE | VM_WIMG_USE_DEFAULT))) { + if(flags & VM_MEM_NOT_CACHEABLE) { if(!(flags & VM_MEM_GUARDED)) template |= INTEL_PTE_PTA; template |= INTEL_PTE_NCACHE; @@ -2118,10 +1733,6 @@ pmap_enter( template |= INTEL_PTE_USER; if (prot & VM_PROT_WRITE) template |= INTEL_PTE_WRITE; - - if (set_NX == TRUE) - template |= INTEL_PTE_NX; - if (wired) { template |= INTEL_PTE_WIRED; if (!iswired(*pte)) @@ -2136,11 +1747,9 @@ pmap_enter( if (*pte & INTEL_PTE_MOD) template |= INTEL_PTE_MOD; + WRITE_PTE(pte, template) + pte++; - pmap_store_pte(pte, template); - pte++; - - need_tlbflush = TRUE; goto Done; } @@ -2151,7 +1760,6 @@ pmap_enter( * 2) Add pvlist entry for new mapping * 3) Enter new mapping. * - * SHARING FAULTS IS HORRIBLY BROKEN * SHARING_FAULTS complicates this slightly in that it cannot * replace the mapping, but must remove it (because adding the * pvlist entry for the new mapping may remove others), and @@ -2165,6 +1773,12 @@ pmap_enter( if (old_pa != (pmap_paddr_t) 0) { + +#if DEBUG_PTE_PAGE + if (pmap != kernel_pmap) + ptep_check(get_pte_page(pte)); +#endif /* DEBUG_PTE_PAGE */ + /* * Don't do anything to pages outside valid memory here. * Instead convince the code that enters a new mapping @@ -2185,8 +1799,8 @@ pmap_enter( pmap_phys_attributes[pai] |= *pte & (PHYS_MODIFIED|PHYS_REFERENCED); + WRITE_PTE(pte, 0) - pmap_store_pte(pte, 0); /* * Remove the mapping from the pvlist for * this physical page. @@ -2198,8 +1812,7 @@ pmap_enter( if (pv_h->pmap == PMAP_NULL) { panic("pmap_enter: null pv_list!"); } - - if (pv_h->va == vaddr && pv_h->pmap == pmap) { + if (pv_h->va == v && pv_h->pmap == pmap) { /* * Header is the pv_entry. Copy the next one * to header and free the next one (we cannot @@ -2221,7 +1834,7 @@ pmap_enter( if ((cur = prev->next) == PV_ENTRY_NULL) { panic("pmap_enter: mapping not in pv_list!"); } - } while (cur->va != vaddr || cur->pmap != pmap); + } while (cur->va != v || cur->pmap != pmap); prev->next = cur->next; pv_e = cur; } @@ -2236,13 +1849,13 @@ pmap_enter( * one). Do removal part of accounting. */ old_pa = (pmap_paddr_t) 0; - + assert(pmap->stats.resident_count >= 1); + pmap->stats.resident_count--; if (iswired(*pte)) { assert(pmap->stats.wired_count >= 1); pmap->stats.wired_count--; } } - need_tlbflush = TRUE; } @@ -2256,7 +1869,7 @@ pmap_enter( pai = pa_index(pa); -#if SHARING_FAULTS /* this is horribly broken , do not enable */ +#if SHARING_FAULTS RetryPvList: /* * We can return here from the sharing fault code below @@ -2271,7 +1884,7 @@ RetryPvList: /* * No mappings yet */ - pv_h->va = vaddr; + pv_h->va = v; pv_h->pmap = pmap; pv_h->next = PV_ENTRY_NULL; } @@ -2284,13 +1897,13 @@ RetryPvList: */ pv_entry_t e = pv_h; while (e != PV_ENTRY_NULL) { - if (e->pmap == pmap && e->va == vaddr) + if (e->pmap == pmap && e->va == v) panic("pmap_enter: already in pv_list"); e = e->next; } } #endif /* DEBUG */ -#if SHARING_FAULTS /* broken, do not enable */ +#if SHARING_FAULTS { /* * do sharing faults. @@ -2316,7 +1929,6 @@ RetryPvList: */ pmap_remove_range(pmap, e->va, opte, opte + 1); - PMAP_UPDATE_TLBS(pmap, e->va, e->va + PAGE_SIZE); /* @@ -2370,7 +1982,7 @@ RetryPvList: if (logit) { pma = &pmap_aliasbuf[pmap_alias_index]; pma->pmap = pmap; - pma->va = vaddr; + pma->va = v; pma->rpc = rpc; pma->cookie = PMAP_ALIAS_COOKIE; if (++pmap_alias_index >= PMAP_ALIAS_MAX) @@ -2390,7 +2002,7 @@ RetryPvList: panic("pmap no pv_e's"); } } - pv_e->va = vaddr; + pv_e->va = v; pv_e->pmap = pmap; pv_e->next = pv_h->next; pv_h->next = pv_e; @@ -2400,18 +2012,13 @@ RetryPvList: pv_e = PV_ENTRY_NULL; } UNLOCK_PVH(pai); - - /* - * only count the mapping - * for 'managed memory' - */ - pmap->stats.resident_count++; } /* - * Step 3) Enter the mapping. + * Step 3) Enter and count the mapping. */ + pmap->stats.resident_count++; /* * Build a template to speed up entering - @@ -2429,22 +2036,18 @@ RetryPvList: template |= INTEL_PTE_USER; if (prot & VM_PROT_WRITE) template |= INTEL_PTE_WRITE; - - if (set_NX == TRUE) - template |= INTEL_PTE_NX; - if (wired) { template |= INTEL_PTE_WIRED; pmap->stats.wired_count++; } - pmap_store_pte(pte, template); + + WRITE_PTE(pte, template) Done: - if (need_tlbflush == TRUE) - PMAP_UPDATE_TLBS(pmap, vaddr, vaddr + PAGE_SIZE); + PMAP_UPDATE_TLBS(pmap, v, v + PAGE_SIZE); if (pv_e != PV_ENTRY_NULL) { - PV_FREE(pv_e); + PV_FREE(pv_e); } PMAP_READ_UNLOCK(pmap, spl); @@ -2460,7 +2063,7 @@ Done: void pmap_change_wiring( register pmap_t map, - vm_map_offset_t vaddr, + vm_offset_t v, boolean_t wired) { register pt_entry_t *pte; @@ -2473,7 +2076,7 @@ pmap_change_wiring( */ PMAP_READ_LOCK(map, spl); - if ((pte = pmap_pte(map, vaddr)) == PT_ENTRY_NULL) + if ((pte = pmap_pte(map, v)) == PT_ENTRY_NULL) panic("pmap_change_wiring: pte missing"); if (wired && !iswired(*pte)) { @@ -2481,8 +2084,7 @@ pmap_change_wiring( * wiring down mapping */ map->stats.wired_count++; - pmap_store_pte(pte, *pte | INTEL_PTE_WIRED); - pte++; + *pte++ |= INTEL_PTE_WIRED; } else if (!wired && iswired(*pte)) { /* @@ -2490,8 +2092,7 @@ pmap_change_wiring( */ assert(map->stats.wired_count >= 1); map->stats.wired_count--; - pmap_store_pte(pte, *pte & ~INTEL_PTE_WIRED); - pte++; + *pte++ &= ~INTEL_PTE_WIRED; } PMAP_READ_UNLOCK(map, spl); @@ -2506,18 +2107,18 @@ ppnum_t pmap_find_phys(pmap_t pmap, addr64_t va) { pt_entry_t *ptp; + vm_offset_t a32; ppnum_t ppn; - mp_disable_preemption(); - - ptp = pmap_pte(pmap, va); + if (value_64bit(va)) + panic("pmap_find_phys 64 bit value"); + a32 = (vm_offset_t) low32(va); + ptp = pmap_pte(pmap, a32); if (PT_ENTRY_NULL == ptp) { ppn = 0; } else { ppn = (ppnum_t) i386_btop(pte_to_pa(*ptp)); } - mp_enable_preemption(); - return ppn; } @@ -2534,40 +2135,54 @@ pmap_find_phys(pmap_t pmap, addr64_t va) vm_offset_t pmap_extract( register pmap_t pmap, - vm_map_offset_t vaddr) + vm_offset_t va) { - ppnum_t ppn; - vm_offset_t paddr; + ppnum_t ppn; + vm_offset_t vaddr; - paddr = (vm_offset_t)0; - ppn = pmap_find_phys(pmap, vaddr); - if (ppn) { - paddr = ((vm_offset_t)i386_ptob(ppn)) | (vaddr & INTEL_OFFMASK); - } - return (paddr); + vaddr = (vm_offset_t)0; + ppn = pmap_find_phys(pmap, (addr64_t)va); + if (ppn) { + vaddr = ((vm_offset_t)i386_ptob(ppn)) | (va & INTEL_OFFMASK); + } + return (vaddr); } + +/* + * Routine: pmap_expand + * + * Expands a pmap to be able to map the specified virtual address. + * + * Allocates new virtual memory for the P0 or P1 portion of the + * pmap, then re-maps the physical pages that were in the old + * pmap to be in the new pmap. + * + * Must be called with the pmap system and the pmap unlocked, + * since these must be unlocked to use vm_allocate or vm_deallocate. + * Thus it must be called in a loop that checks whether the map + * has been expanded enough. + * (We won't loop forever, since page tables aren't shrunk.) + */ void -pmap_expand_pml4( - pmap_t map, - vm_map_offset_t vaddr) +pmap_expand( + register pmap_t map, + register vm_offset_t v) { + pt_entry_t *pdp; register vm_page_t m; register pmap_paddr_t pa; - uint64_t i; + register int i; spl_t spl; ppnum_t pn; - pml4_entry_t *pml4p; - - if (kernel_pmap == map) panic("expand kernel pml4"); - spl = splhigh(); - pml4p = pmap64_pml4(map, vaddr); - splx(spl); - if (PML4_ENTRY_NULL == pml4p) panic("pmap_expand_pml4 no pml4p"); + if (map == kernel_pmap) { + pmap_growkernel(v); + return; + } /* - * Allocate a VM page for the pml4 page + * Allocate a VM page for the level 2 page table entries. */ while ((m = vm_page_grab()) == VM_PAGE_NULL) VM_PAGE_WAIT(); @@ -2578,25 +2193,14 @@ pmap_expand_pml4( */ pn = m->phys_page; pa = i386_ptob(pn); - i = pml4idx(map, vaddr); - - vm_object_lock(map->pm_obj_pml4); -#if 0 /* DEBUG */ - if (0 != vm_page_lookup(map->pm_obj_pml4, (vm_object_offset_t)i)) { - kprintf("pmap_expand_pml4: obj_pml4 not empty, pmap 0x%x pm_obj_pml4 0x%x vaddr 0x%llx i 0x%llx\n", - map, map->pm_obj_pml4, vaddr, i); - } -#endif - vm_page_insert(m, map->pm_obj_pml4, (vm_object_offset_t)i); - + i = pdenum(map, v); + vm_object_lock(map->pm_obj); + vm_page_insert(m, map->pm_obj, (vm_object_offset_t)i); vm_page_lock_queues(); vm_page_wire(m); - - vm_page_unlock_queues(); - vm_object_unlock(map->pm_obj_pml4); inuse_ptepages_count++; - map->stats.resident_count++; - map->stats.wired_count++; + vm_object_unlock(map->pm_obj); + vm_page_unlock_queues(); /* * Zero the page. @@ -2607,17 +2211,14 @@ pmap_expand_pml4( /* * See if someone else expanded us first */ - if (pmap64_pdpt(map, vaddr) != PDPT_ENTRY_NULL) { + if (pmap_pte(map, v) != PT_ENTRY_NULL) { PMAP_READ_UNLOCK(map, spl); - vm_object_lock(map->pm_obj_pml4); + vm_object_lock(map->pm_obj); vm_page_lock_queues(); vm_page_free(m); inuse_ptepages_count--; - map->stats.resident_count--; - map->stats.wired_count--; - vm_page_unlock_queues(); - vm_object_unlock(map->pm_obj_pml4); + vm_object_unlock(map->pm_obj); return; } @@ -2627,292 +2228,95 @@ pmap_expand_pml4( * set several page directory entries. */ - pml4p = pmap64_pml4(map, vaddr); /* refetch under lock */ - - pmap_store_pte(pml4p, pa_to_pte(pa) - | INTEL_PTE_VALID - | INTEL_PTE_USER - | INTEL_PTE_WRITE); + pdp = &map->dirbase[pdenum(map, v)]; + *pdp = pa_to_pte(pa) + | INTEL_PTE_VALID + | INTEL_PTE_USER + | INTEL_PTE_WRITE; PMAP_READ_UNLOCK(map, spl); - return; - } +/* + * Copy the range specified by src_addr/len + * from the source map to the range dst_addr/len + * in the destination map. + * + * This routine is only advisory and need not do anything. + */ +#if 0 void -pmap_expand_pdpt( - pmap_t map, - vm_map_offset_t vaddr) +pmap_copy( + pmap_t dst_pmap, + pmap_t src_pmap, + vm_offset_t dst_addr, + vm_size_t len, + vm_offset_t src_addr) { - register vm_page_t m; - register pmap_paddr_t pa; - uint64_t i; - spl_t spl; - ppnum_t pn; - pdpt_entry_t *pdptp; - - if (kernel_pmap == map) panic("expand kernel pdpt"); - - spl = splhigh(); - while ((pdptp = pmap64_pdpt(map, vaddr)) == PDPT_ENTRY_NULL) { - splx(spl); - pmap_expand_pml4(map, vaddr); /* need room for another pdpt entry */ - spl = splhigh(); - } - splx(spl); - +#ifdef lint + dst_pmap++; src_pmap++; dst_addr++; len++; src_addr++; +#endif /* lint */ +} +#endif/* 0 */ - /* - * Allocate a VM page for the pdpt page - */ - while ((m = vm_page_grab()) == VM_PAGE_NULL) - VM_PAGE_WAIT(); +/* + * pmap_sync_page_data_phys(ppnum_t pa) + * + * Invalidates all of the instruction cache on a physical page and + * pushes any dirty data from the data cache for the same physical page + * Not required in i386. + */ +void +pmap_sync_page_data_phys(__unused ppnum_t pa) +{ + return; +} - /* - * put the page into the pmap's obj list so it - * can be found later. - */ - pn = m->phys_page; - pa = i386_ptob(pn); - i = pdptidx(map, vaddr); +/* + * pmap_sync_page_attributes_phys(ppnum_t pa) + * + * Write back and invalidate all cachelines on a physical page. + */ +void +pmap_sync_page_attributes_phys(ppnum_t pa) +{ + cache_flush_page_phys(pa); +} - vm_object_lock(map->pm_obj_pdpt); -#if 0 /* DEBUG */ - if (0 != vm_page_lookup(map->pm_obj_pdpt, (vm_object_offset_t)i)) { - kprintf("pmap_expand_pdpt: obj_pdpt not empty, pmap 0x%x pm_obj_pdpt 0x%x vaddr 0x%llx i 0x%llx\n", - map, map->pm_obj_pdpt, vaddr, i); - } -#endif - vm_page_insert(m, map->pm_obj_pdpt, (vm_object_offset_t)i); +int collect_ref; +int collect_unref; - vm_page_lock_queues(); - vm_page_wire(m); +/* + * Routine: pmap_collect + * Function: + * Garbage collects the physical map system for + * pages which are no longer used. + * Success need not be guaranteed -- that is, there + * may well be pages which are not referenced, but + * others may be collected. + * Usage: + * Called by the pageout daemon when pages are scarce. + */ +void +pmap_collect( + pmap_t p) +{ + register pt_entry_t *pdp, *ptp; + pt_entry_t *eptp; + int wired; + spl_t spl; - vm_page_unlock_queues(); - vm_object_unlock(map->pm_obj_pdpt); - inuse_ptepages_count++; - map->stats.resident_count++; - map->stats.wired_count++; + if (p == PMAP_NULL) + return; - /* - * Zero the page. - */ - pmap_zero_page(pn); + if (p == kernel_pmap) + return; - PMAP_READ_LOCK(map, spl); /* - * See if someone else expanded us first + * Garbage collect map. */ - if (pmap64_pde(map, vaddr) != PD_ENTRY_NULL) { - PMAP_READ_UNLOCK(map, spl); - vm_object_lock(map->pm_obj_pdpt); - vm_page_lock_queues(); - vm_page_free(m); - inuse_ptepages_count--; - map->stats.resident_count--; - map->stats.wired_count--; - - vm_page_unlock_queues(); - vm_object_unlock(map->pm_obj_pdpt); - return; - } - - /* - * Set the page directory entry for this page table. - * If we have allocated more than one hardware page, - * set several page directory entries. - */ - - pdptp = pmap64_pdpt(map, vaddr); /* refetch under lock */ - - pmap_store_pte(pdptp, pa_to_pte(pa) - | INTEL_PTE_VALID - | INTEL_PTE_USER - | INTEL_PTE_WRITE); - - PMAP_READ_UNLOCK(map, spl); - - return; - -} - - - -/* - * Routine: pmap_expand - * - * Expands a pmap to be able to map the specified virtual address. - * - * Allocates new virtual memory for the P0 or P1 portion of the - * pmap, then re-maps the physical pages that were in the old - * pmap to be in the new pmap. - * - * Must be called with the pmap system and the pmap unlocked, - * since these must be unlocked to use vm_allocate or vm_deallocate. - * Thus it must be called in a loop that checks whether the map - * has been expanded enough. - * (We won't loop forever, since page tables aren't shrunk.) - */ -void -pmap_expand( - pmap_t map, - vm_map_offset_t vaddr) -{ - pt_entry_t *pdp; - register vm_page_t m; - register pmap_paddr_t pa; - uint64_t i; - spl_t spl; - ppnum_t pn; - - /* - * if not the kernel map (while we are still compat kernel mode) - * and we are 64 bit, propagate expand upwards - */ - - if (cpu_64bit && (map != kernel_pmap)) { - spl = splhigh(); - while ((pdp = pmap64_pde(map, vaddr)) == PD_ENTRY_NULL) { - splx(spl); - pmap_expand_pdpt(map, vaddr); /* need room for another pde entry */ - spl = splhigh(); - } - splx(spl); - } else { - pdp = pmap_pde(map, vaddr); - } - - - /* - * Allocate a VM page for the pde entries. - */ - while ((m = vm_page_grab()) == VM_PAGE_NULL) - VM_PAGE_WAIT(); - - /* - * put the page into the pmap's obj list so it - * can be found later. - */ - pn = m->phys_page; - pa = i386_ptob(pn); - i = pdeidx(map, vaddr); - - vm_object_lock(map->pm_obj); -#if 0 /* DEBUG */ - if (0 != vm_page_lookup(map->pm_obj, (vm_object_offset_t)i)) { - kprintf("pmap_expand: obj not empty, pmap 0x%x pm_obj 0x%x vaddr 0x%llx i 0x%llx\n", - map, map->pm_obj, vaddr, i); - } -#endif - vm_page_insert(m, map->pm_obj, (vm_object_offset_t)i); - - vm_page_lock_queues(); - vm_page_wire(m); - inuse_ptepages_count++; - - vm_page_unlock_queues(); - vm_object_unlock(map->pm_obj); - - /* - * Zero the page. - */ - pmap_zero_page(pn); - - PMAP_READ_LOCK(map, spl); - /* - * See if someone else expanded us first - */ - if (pmap_pte(map, vaddr) != PT_ENTRY_NULL) { - PMAP_READ_UNLOCK(map, spl); - vm_object_lock(map->pm_obj); - - vm_page_lock_queues(); - vm_page_free(m); - inuse_ptepages_count--; - - vm_page_unlock_queues(); - vm_object_unlock(map->pm_obj); - return; - } - - pdp = pmap_pde(map, vaddr); /* refetch while locked */ - - /* - * Set the page directory entry for this page table. - * If we have allocated more than one hardware page, - * set several page directory entries. - */ - - pmap_store_pte(pdp, pa_to_pte(pa) - | INTEL_PTE_VALID - | INTEL_PTE_USER - | INTEL_PTE_WRITE); - - - PMAP_READ_UNLOCK(map, spl); - - return; -} - - -/* - * pmap_sync_page_data_phys(ppnum_t pa) - * - * Invalidates all of the instruction cache on a physical page and - * pushes any dirty data from the data cache for the same physical page - * Not required in i386. - */ -void -pmap_sync_page_data_phys(__unused ppnum_t pa) -{ - return; -} - -/* - * pmap_sync_page_attributes_phys(ppnum_t pa) - * - * Write back and invalidate all cachelines on a physical page. - */ -void -pmap_sync_page_attributes_phys(ppnum_t pa) -{ - cache_flush_page_phys(pa); -} - -int collect_ref; -int collect_unref; - -/* - * Routine: pmap_collect - * Function: - * Garbage collects the physical map system for - * pages which are no longer used. - * Success need not be guaranteed -- that is, there - * may well be pages which are not referenced, but - * others may be collected. - * Usage: - * Called by the pageout daemon when pages are scarce. - */ -void -pmap_collect( - pmap_t p) -{ - register pt_entry_t *pdp, *ptp; - pt_entry_t *eptp; - int wired; - spl_t spl; - - if (p == PMAP_NULL) - return; - - if (p == kernel_pmap) - return; - - /* - * Garbage collect map. - */ - PMAP_READ_LOCK(p, spl); + PMAP_READ_LOCK(p, spl); for (pdp = (pt_entry_t *)p->dirbase; pdp < (pt_entry_t *)&p->dirbase[(UMAXPTDI+1)]; @@ -2920,7 +2324,7 @@ pmap_collect( { if (*pdp & INTEL_PTE_VALID) { if(*pdp & INTEL_PTE_REF) { - pmap_store_pte(pdp, *pdp & ~INTEL_PTE_REF); + *pdp &= ~INTEL_PTE_REF; collect_ref++; } else { collect_unref++; @@ -2953,7 +2357,7 @@ pmap_collect( /* * Invalidate the page directory pointer. */ - pmap_store_pte(pdp, 0x0); + *pdp = 0x0; PMAP_READ_UNLOCK(p, spl); @@ -2980,21 +2384,32 @@ pmap_collect( } } PMAP_UPDATE_TLBS(p, VM_MIN_ADDRESS, VM_MAX_ADDRESS); - PMAP_READ_UNLOCK(p, spl); return; } +/* + * Routine: pmap_kernel + * Function: + * Returns the physical map handle for the kernel. + */ +#if 0 +pmap_t +pmap_kernel(void) +{ + return (kernel_pmap); +} +#endif/* 0 */ void pmap_copy_page(src, dst) ppnum_t src; ppnum_t dst; { - bcopy_phys((addr64_t)i386_ptob(src), - (addr64_t)i386_ptob(dst), - PAGE_SIZE); + bcopy_phys((addr64_t)i386_ptob(src), + (addr64_t)i386_ptob(dst), + PAGE_SIZE); } @@ -3015,8 +2430,8 @@ pmap_copy_page(src, dst) void pmap_pageable( __unused pmap_t pmap, - __unused vm_map_offset_t start_addr, - __unused vm_map_offset_t end_addr, + __unused vm_offset_t start_addr, + __unused vm_offset_t end_addr, __unused boolean_t pageable) { #ifdef lint @@ -3076,7 +2491,7 @@ phys_attribute_clear( simple_lock(&pmap->lock); { - register vm_map_offset_t va; + register vm_offset_t va; va = pv_e->va; pte = pmap_pte(pmap, va); @@ -3093,8 +2508,7 @@ phys_attribute_clear( * Clear modify or reference bits. */ - pmap_store_pte(pte, *pte & ~bits); - pte++; + *pte++ &= ~bits; PMAP_UPDATE_TLBS(pmap, va, va + PAGE_SIZE); } simple_unlock(&pmap->lock); @@ -3131,24 +2545,15 @@ phys_attribute_test( return (FALSE); } - phys = i386_ptob(pn); - pai = pa_index(phys); - /* - * super fast check... if bits already collected - * no need to take any locks... - * if not set, we need to recheck after taking - * the lock in case they got pulled in while - * we were waiting for the lock - */ - if (pmap_phys_attributes[pai] & bits) - return (TRUE); - pv_h = pai_to_pvh(pai); - /* * Lock the pmap system first, since we will be checking * several pmaps. */ + PMAP_WRITE_LOCK(spl); + phys = i386_ptob(pn); + pai = pa_index(phys); + pv_h = pai_to_pvh(pai); if (pmap_phys_attributes[pai] & bits) { PMAP_WRITE_UNLOCK(spl); @@ -3173,7 +2578,7 @@ phys_attribute_test( simple_lock(&pmap->lock); { - register vm_map_offset_t va; + register vm_offset_t va; va = pv_e->va; pte = pmap_pte(pmap, va); @@ -3339,49 +2744,45 @@ pmap_clear_refmod(ppnum_t pa, unsigned int mask) void pmap_modify_pages( pmap_t map, - vm_map_offset_t sva, - vm_map_offset_t eva) + vm_offset_t s, + vm_offset_t e) { spl_t spl; register pt_entry_t *pde; register pt_entry_t *spte, *epte; - vm_map_offset_t lva; - vm_map_offset_t orig_sva; + vm_offset_t l; + vm_offset_t orig_s = s; if (map == PMAP_NULL) return; PMAP_READ_LOCK(map, spl); - orig_sva = sva; - while (sva && sva < eva) { - lva = (sva + pde_mapped_size) & ~(pde_mapped_size-1); - if (lva > eva) - lva = eva; - pde = pmap_pde(map, sva); - if (pde && (*pde & INTEL_PTE_VALID)) { - spte = (pt_entry_t *)pmap_pte(map, (sva & ~(pde_mapped_size-1))); - if (lva) { - spte = &spte[ptenum(sva)]; - epte = &spte[intel_btop(lva-sva)]; + pde = pmap_pde(map, s); + while (s && s < e) { + l = (s + PDE_MAPPED_SIZE) & ~(PDE_MAPPED_SIZE-1); + if (l > e) + l = e; + if (*pde & INTEL_PTE_VALID) { + spte = (pt_entry_t *)pmap_pte(map, (s & ~(PDE_MAPPED_SIZE-1))); + if (l) { + spte = &spte[ptenum(s)]; + epte = &spte[intel_btop(l-s)]; } else { - epte = &spte[intel_btop(pde_mapped_size)]; - spte = &spte[ptenum(sva)]; + epte = &spte[intel_btop(PDE_MAPPED_SIZE)]; + spte = &spte[ptenum(s)]; } while (spte < epte) { if (*spte & INTEL_PTE_VALID) { - pmap_store_pte(spte, *spte - | INTEL_PTE_MOD - | INTEL_PTE_WRITE); + *spte |= (INTEL_PTE_MOD | INTEL_PTE_WRITE); } spte++; } } - sva = lva; + s = l; pde++; } - PMAP_UPDATE_TLBS(map, orig_sva, eva); - + PMAP_UPDATE_TLBS(map, orig_s, e); PMAP_READ_UNLOCK(map, spl); } @@ -3401,6 +2802,210 @@ flush_dcache(__unused vm_offset_t addr, return; } +/* +* TLB Coherence Code (TLB "shootdown" code) +* +* Threads that belong to the same task share the same address space and +* hence share a pmap. However, they may run on distinct cpus and thus +* have distinct TLBs that cache page table entries. In order to guarantee +* the TLBs are consistent, whenever a pmap is changed, all threads that +* are active in that pmap must have their TLB updated. To keep track of +* this information, the set of cpus that are currently using a pmap is +* maintained within each pmap structure (cpus_using). Pmap_activate() and +* pmap_deactivate add and remove, respectively, a cpu from this set. +* Since the TLBs are not addressable over the bus, each processor must +* flush its own TLB; a processor that needs to invalidate another TLB +* needs to interrupt the processor that owns that TLB to signal the +* update. +* +* Whenever a pmap is updated, the lock on that pmap is locked, and all +* cpus using the pmap are signaled to invalidate. All threads that need +* to activate a pmap must wait for the lock to clear to await any updates +* in progress before using the pmap. They must ACQUIRE the lock to add +* their cpu to the cpus_using set. An implicit assumption made +* throughout the TLB code is that all kernel code that runs at or higher +* than splvm blocks out update interrupts, and that such code does not +* touch pageable pages. +* +* A shootdown interrupt serves another function besides signaling a +* processor to invalidate. The interrupt routine (pmap_update_interrupt) +* waits for the both the pmap lock (and the kernel pmap lock) to clear, +* preventing user code from making implicit pmap updates while the +* sending processor is performing its update. (This could happen via a +* user data write reference that turns on the modify bit in the page +* table). It must wait for any kernel updates that may have started +* concurrently with a user pmap update because the IPC code +* changes mappings. +* Spinning on the VALUES of the locks is sufficient (rather than +* having to acquire the locks) because any updates that occur subsequent +* to finding the lock unlocked will be signaled via another interrupt. +* (This assumes the interrupt is cleared before the low level interrupt code +* calls pmap_update_interrupt()). +* +* The signaling processor must wait for any implicit updates in progress +* to terminate before continuing with its update. Thus it must wait for an +* acknowledgement of the interrupt from each processor for which such +* references could be made. For maintaining this information, a set +* cpus_active is used. A cpu is in this set if and only if it can +* use a pmap. When pmap_update_interrupt() is entered, a cpu is removed from +* this set; when all such cpus are removed, it is safe to update. +* +* Before attempting to acquire the update lock on a pmap, a cpu (A) must +* be at least at the priority of the interprocessor interrupt +* (splip<=splvm). Otherwise, A could grab a lock and be interrupted by a +* kernel update; it would spin forever in pmap_update_interrupt() trying +* to acquire the user pmap lock it had already acquired. Furthermore A +* must remove itself from cpus_active. Otherwise, another cpu holding +* the lock (B) could be in the process of sending an update signal to A, +* and thus be waiting for A to remove itself from cpus_active. If A is +* spinning on the lock at priority this will never happen and a deadlock +* will result. +*/ + +/* + * Signal another CPU that it must flush its TLB + */ +void +signal_cpus( + cpu_set use_list, + pmap_t pmap, + vm_offset_t start_addr, + vm_offset_t end_addr) +{ + register int which_cpu, j; + register pmap_update_list_t update_list_p; + + while ((which_cpu = ffs((unsigned long)use_list)) != 0) { + which_cpu -= 1; /* convert to 0 origin */ + + update_list_p = cpu_update_list(which_cpu); + simple_lock(&update_list_p->lock); + + j = update_list_p->count; + if (j >= UPDATE_LIST_SIZE) { + /* + * list overflowed. Change last item to + * indicate overflow. + */ + update_list_p->item[UPDATE_LIST_SIZE-1].pmap = kernel_pmap; + update_list_p->item[UPDATE_LIST_SIZE-1].start = VM_MIN_ADDRESS; + update_list_p->item[UPDATE_LIST_SIZE-1].end = VM_MAX_KERNEL_ADDRESS; + } + else { + update_list_p->item[j].pmap = pmap; + update_list_p->item[j].start = start_addr; + update_list_p->item[j].end = end_addr; + update_list_p->count = j+1; + } + cpu_update_needed(which_cpu) = TRUE; + simple_unlock(&update_list_p->lock); + + /* if its the kernel pmap, ignore cpus_idle */ + if (((cpus_idle & (1 << which_cpu)) == 0) || + (pmap == kernel_pmap) || PMAP_REAL(which_cpu) == pmap) + { + i386_signal_cpu(which_cpu, MP_TLB_FLUSH, ASYNC); + } + use_list &= ~(1 << which_cpu); + } +} + +void +process_pmap_updates( + register pmap_t my_pmap) +{ + register int my_cpu; + register pmap_update_list_t update_list_p; + register int j; + register pmap_t pmap; + + mp_disable_preemption(); + my_cpu = cpu_number(); + update_list_p = cpu_update_list(my_cpu); + simple_lock(&update_list_p->lock); + + for (j = 0; j < update_list_p->count; j++) { + pmap = update_list_p->item[j].pmap; + if (pmap == my_pmap || + pmap == kernel_pmap) { + + if (pmap->ref_count <= 0) { + PMAP_CPU_CLR(pmap, my_cpu); + PMAP_REAL(my_cpu) = kernel_pmap; +#ifdef PAE + set_cr3((unsigned int)kernel_pmap->pm_ppdpt); +#else + set_cr3((unsigned int)kernel_pmap->pdirbase); +#endif + } else + INVALIDATE_TLB(pmap, + update_list_p->item[j].start, + update_list_p->item[j].end); + } + } + update_list_p->count = 0; + cpu_update_needed(my_cpu) = FALSE; + simple_unlock(&update_list_p->lock); + mp_enable_preemption(); +} + +/* + * Interrupt routine for TBIA requested from other processor. + * This routine can also be called at all interrupts time if + * the cpu was idle. Some driver interrupt routines might access + * newly allocated vm. (This is the case for hd) + */ +void +pmap_update_interrupt(void) +{ + register int my_cpu; + spl_t s; + register pmap_t my_pmap; + + mp_disable_preemption(); + my_cpu = cpu_number(); + + /* + * Raise spl to splvm (above splip) to block out pmap_extract + * from IO code (which would put this cpu back in the active + * set). + */ + s = splhigh(); + + my_pmap = PMAP_REAL(my_cpu); + + if (!(my_pmap && pmap_in_use(my_pmap, my_cpu))) + my_pmap = kernel_pmap; + + do { + LOOP_VAR; + + /* + * Indicate that we're not using either user or kernel + * pmap. + */ + i_bit_clear(my_cpu, &cpus_active); + + /* + * Wait for any pmap updates in progress, on either user + * or kernel pmap. + */ + while (*(volatile int *)(&my_pmap->lock.interlock.lock_data) || + *(volatile int *)(&kernel_pmap->lock.interlock.lock_data)) { + LOOP_CHECK("pmap_update_interrupt", my_pmap); + cpu_pause(); + } + + process_pmap_updates(my_pmap); + + i_bit_set(my_cpu, &cpus_active); + + } while (cpu_update_needed(my_cpu)); + + splx(s); + mp_enable_preemption(); +} + #if MACH_KDB /* show phys page mappings and attributes */ @@ -3445,7 +3050,7 @@ void db_kvtophys( vm_offset_t vaddr) { - db_printf("0x%qx", kvtophys(vaddr)); + db_printf("0x%x", kvtophys(vaddr)); } /* @@ -3456,7 +3061,7 @@ db_show_vaddrs( pt_entry_t *dirbase) { pt_entry_t *ptep, *pdep, tmp; - unsigned int x, y, pdecnt, ptecnt; + int x, y, pdecnt, ptecnt; if (dirbase == 0) { dirbase = kernel_pmap->dirbase; @@ -3465,7 +3070,7 @@ db_show_vaddrs( db_printf("need a dirbase...\n"); return; } - dirbase = (pt_entry_t *) (int) ((unsigned long) dirbase & ~INTEL_OFFMASK); + dirbase = (pt_entry_t *) ((unsigned long) dirbase & ~INTEL_OFFMASK); db_printf("dirbase: 0x%x\n", dirbase); @@ -3510,24 +3115,153 @@ pmap_list_resident_pages( } #endif /* MACH_VM_DEBUG */ +#ifdef MACH_BSD +/* + * pmap_pagemove + * + * BSD support routine to reassign virtual addresses. + */ + +void +pmap_movepage(unsigned long from, unsigned long to, vm_size_t size) +{ + spl_t spl; + pt_entry_t *pte, saved_pte; + + /* Lock the kernel map */ + PMAP_READ_LOCK(kernel_pmap, spl); + + + while (size > 0) { + pte = pmap_pte(kernel_pmap, from); + if (pte == NULL) + panic("pmap_pagemove from pte NULL"); + saved_pte = *pte; + PMAP_READ_UNLOCK(kernel_pmap, spl); + + pmap_enter(kernel_pmap, to, (ppnum_t)i386_btop(i386_trunc_page(*pte)), + VM_PROT_READ|VM_PROT_WRITE, 0, *pte & INTEL_PTE_WIRED); + + pmap_remove(kernel_pmap, (addr64_t)from, (addr64_t)(from+PAGE_SIZE)); + + PMAP_READ_LOCK(kernel_pmap, spl); + pte = pmap_pte(kernel_pmap, to); + if (pte == NULL) + panic("pmap_pagemove 'to' pte NULL"); + + *pte = saved_pte; + + from += PAGE_SIZE; + to += PAGE_SIZE; + size -= PAGE_SIZE; + } + /* Get the processors to update the TLBs */ + PMAP_UPDATE_TLBS(kernel_pmap, from, from+size); + PMAP_UPDATE_TLBS(kernel_pmap, to, to+size); + + PMAP_READ_UNLOCK(kernel_pmap, spl); + +} +#endif /* MACH_BSD */ /* temporary workaround */ boolean_t -coredumpok(__unused vm_map_t map, __unused vm_offset_t va) +coredumpok(vm_map_t map, vm_offset_t va) { -#if 0 pt_entry_t *ptep; ptep = pmap_pte(map->pmap, va); if (0 == ptep) return FALSE; return ((*ptep & (INTEL_PTE_NCACHE | INTEL_PTE_WIRED)) != (INTEL_PTE_NCACHE | INTEL_PTE_WIRED)); -#else - return TRUE; +} + +/* + * grow the number of kernel page table entries, if needed + */ +void +pmap_growkernel(vm_offset_t addr) +{ +#if GROW_KERNEL_FUNCTION_IMPLEMENTED + struct pmap *pmap; + int s; + vm_offset_t ptppaddr; + ppnum_t ppn; + vm_page_t nkpg; + pd_entry_t newpdir = 0; + + /* + * Serialize. + * Losers return to try again until the winner completes the work. + */ + if (kptobj == 0) panic("growkernel 0"); + if (!vm_object_lock_try(kptobj)) { + return; + } + + vm_page_lock_queues(); + + s = splhigh(); + + /* + * If this is the first time thru, locate the end of the + * kernel page table entries and set nkpt to the current + * number of kernel page table pages + */ + + if (kernel_vm_end == 0) { + kernel_vm_end = KERNBASE; + nkpt = 0; + + while (pdir_pde(kernel_pmap->dirbase, kernel_vm_end)) { + kernel_vm_end = (kernel_vm_end + PAGE_SIZE * NPTEPG) & ~(PAGE_SIZE * NPTEPG - 1); + nkpt++; + } + } + + /* + * Now allocate and map the required number of page tables + */ + addr = (addr + PAGE_SIZE * NPTEPG) & ~(PAGE_SIZE * NPTEPG - 1); + while (kernel_vm_end < addr) { + if (pdir_pde(kernel_pmap->dirbase, kernel_vm_end)) { + kernel_vm_end = (kernel_vm_end + PAGE_SIZE * NPTEPG) & ~(PAGE_SIZE * NPTEPG - 1); + continue; /* someone already filled this one */ + } + + nkpg = vm_page_alloc(kptobj, nkpt); + if (!nkpg) + panic("pmap_growkernel: no memory to grow kernel"); + + nkpt++; + vm_page_wire(nkpg); + ppn = nkpg->phys_page; + pmap_zero_page(ppn); + ptppaddr = i386_ptob(ppn); + newpdir = (pd_entry_t) (ptppaddr | INTEL_PTE_VALID | + INTEL_PTE_RW | INTEL_PTE_REF | INTEL_PTE_MOD); + pdir_pde(kernel_pmap->dirbase, kernel_vm_end) = newpdir; + + simple_lock(&free_pmap_lock); + for (pmap = (struct pmap *)kernel_pmap->pmap_link.next; + pmap != kernel_pmap ; + pmap = (struct pmap *)pmap->pmap_link.next ) { + *pmap_pde(pmap, kernel_vm_end) = newpdir; + } + simple_unlock(&free_pmap_lock); + } + splx(s); + vm_page_unlock_queues(); + vm_object_unlock(kptobj); #endif } +pt_entry_t * +pmap_mapgetpte(vm_map_t map, vm_offset_t v) +{ + return pmap_pte(map->pmap, v); +} boolean_t phys_page_exists( @@ -3584,88 +3318,27 @@ mapping_adjust() } void -pmap_commpage32_init(vm_offset_t kernel_commpage, vm_offset_t user_commpage, int cnt) +pmap_commpage_init(vm_offset_t kernel_commpage, vm_offset_t user_commpage, int cnt) { int i; pt_entry_t *opte, *npte; pt_entry_t pte; - for (i = 0; i < cnt; i++) { - opte = pmap_pte(kernel_pmap, (vm_map_offset_t)kernel_commpage); + opte = pmap_pte(kernel_pmap, kernel_commpage); if (0 == opte) panic("kernel_commpage"); + npte = pmap_pte(kernel_pmap, user_commpage); + if (0 == npte) panic("user_commpage"); pte = *opte | INTEL_PTE_USER|INTEL_PTE_GLOBAL; pte &= ~INTEL_PTE_WRITE; // ensure read only - npte = pmap_pte(kernel_pmap, (vm_map_offset_t)user_commpage); - if (0 == npte) panic("user_commpage"); - pmap_store_pte(npte, pte); + WRITE_PTE_FAST(npte, pte); kernel_commpage += INTEL_PGBYTES; user_commpage += INTEL_PGBYTES; } } -#define PMAP_COMMPAGE64_CNT (_COMM_PAGE64_AREA_USED/PAGE_SIZE) -pt_entry_t pmap_commpage64_ptes[PMAP_COMMPAGE64_CNT]; - -void -pmap_commpage64_init(vm_offset_t kernel_commpage, __unused vm_map_offset_t user_commpage, int cnt) -{ - spl_t s; - int i; - pt_entry_t *kptep; - - s = splhigh(); - for (i = 0; i< cnt; i++) { - kptep = pmap_pte(kernel_pmap, (uint64_t)kernel_commpage + (i*PAGE_SIZE)); - if ((0 == kptep) || (0 == (*kptep & INTEL_PTE_VALID))) panic("pmap_commpage64_init pte"); - pmap_commpage64_ptes[i] = ((*kptep & ~INTEL_PTE_WRITE) | INTEL_PTE_USER); - } - splx(s); - -} - -void -pmap_map_sharedpage(__unused task_t task, pmap_t p) -{ - pt_entry_t *ptep; - spl_t s; - int i; - - if (!p->pm_64bit) return; - /* setup high 64 bit commpage */ - s = splhigh(); - while ((ptep = pmap_pte(p, (uint64_t)_COMM_PAGE64_BASE_ADDRESS)) == PD_ENTRY_NULL) { - splx(s); - pmap_expand(p, (uint64_t)_COMM_PAGE64_BASE_ADDRESS); - s = splhigh(); - } - - for (i = 0; i< PMAP_COMMPAGE64_CNT; i++) { - ptep = pmap_pte(p, (uint64_t)_COMM_PAGE64_BASE_ADDRESS + (i*PAGE_SIZE)); - if (0 == ptep) panic("pmap_map_sharedpage"); - pmap_store_pte(ptep, pmap_commpage64_ptes[i]); - } - splx(s); - -} - -void -pmap_unmap_sharedpage(pmap_t pmap) -{ - spl_t s; - pt_entry_t *ptep; - int i; - - if (!pmap->pm_64bit) return; - s = splhigh(); - for (i = 0; i< PMAP_COMMPAGE64_CNT; i++) { - ptep = pmap_pte(pmap, (uint64_t)_COMM_PAGE64_BASE_ADDRESS + (i*PAGE_SIZE)); - if (ptep) pmap_store_pte(ptep, 0); - } - splx(s); -} - static cpu_pmap_t cpu_pmap_master; +static struct pmap_update_list cpu_update_list_master; struct cpu_pmap * pmap_cpu_alloc(boolean_t is_boot_cpu) @@ -3673,13 +3346,13 @@ pmap_cpu_alloc(boolean_t is_boot_cpu) int ret; int i; cpu_pmap_t *cp; + pmap_update_list_t up; vm_offset_t address; - vm_map_address_t mapaddr; vm_map_entry_t entry; - pt_entry_t *pte; if (is_boot_cpu) { cp = &cpu_pmap_master; + up = &cpu_update_list_master; } else { /* * The per-cpu pmap data structure itself. @@ -3693,494 +3366,57 @@ pmap_cpu_alloc(boolean_t is_boot_cpu) bzero((void *)cp, sizeof(cpu_pmap_t)); /* - * The temporary windows used for copy/zero - see loose_ends.c + * The tlb flush update list. */ - ret = vm_map_find_space(kernel_map, - &mapaddr, PMAP_NWINDOWS*PAGE_SIZE, (vm_map_offset_t)0, 0, &entry); + ret = kmem_alloc(kernel_map, + (vm_offset_t *) &up, sizeof(*up)); if (ret != KERN_SUCCESS) { - printf("pmap_cpu_alloc() " - "vm_map_find_space ret=%d\n", ret); + printf("pmap_cpu_alloc() failed ret=%d\n", ret); pmap_cpu_free(cp); return NULL; } - address = (vm_offset_t)mapaddr; - - for (i = 0; i < PMAP_NWINDOWS; i++, address += PAGE_SIZE) { - while ((pte = pmap_pte(kernel_pmap, (vm_map_offset_t)address)) == 0) - pmap_expand(kernel_pmap, (vm_map_offset_t)address); - * (int *) pte = 0; - cp->mapwindow[i].prv_CADDR = (caddr_t) address; - cp->mapwindow[i].prv_CMAP = pte; - } - vm_map_unlock(kernel_map); - } - - cp->pdpt_window_index = PMAP_PDPT_FIRST_WINDOW; - cp->pde_window_index = PMAP_PDE_FIRST_WINDOW; - cp->pte_window_index = PMAP_PTE_FIRST_WINDOW; - - return cp; -} - -void -pmap_cpu_free(struct cpu_pmap *cp) -{ - if (cp != NULL && cp != &cpu_pmap_master) { - kfree((void *) cp, sizeof(cpu_pmap_t)); - } -} - - -mapwindow_t * -pmap_get_mapwindow(pt_entry_t pentry) -{ - mapwindow_t *mp; - int i; - boolean_t istate; - - /* - * can be called from hardware interrupt context - * so we need to protect the lookup process - */ - istate = ml_set_interrupts_enabled(FALSE); - - /* - * Note: 0th map reserved for pmap_pte() - */ - for (i = PMAP_NWINDOWS_FIRSTFREE; i < PMAP_NWINDOWS; i++) { - mp = ¤t_cpu_datap()->cpu_pmap->mapwindow[i]; - - if (*mp->prv_CMAP == 0) { - *mp->prv_CMAP = pentry; - break; - } - } - if (i >= PMAP_NWINDOWS) - mp = NULL; - (void) ml_set_interrupts_enabled(istate); - - return (mp); -} - - -/* - * kern_return_t pmap_nest(grand, subord, vstart, size) - * - * grand = the pmap that we will nest subord into - * subord = the pmap that goes into the grand - * vstart = start of range in pmap to be inserted - * nstart = start of range in pmap nested pmap - * size = Size of nest area (up to 16TB) - * - * Inserts a pmap into another. This is used to implement shared segments. - * - * on x86 this is very limited right now. must be exactly 1 segment. - * - * Note that we depend upon higher level VM locks to insure that things don't change while - * we are doing this. For example, VM should not be doing any pmap enters while it is nesting - * or do 2 nests at once. - */ - - -kern_return_t pmap_nest(pmap_t grand, pmap_t subord, addr64_t vstart, addr64_t nstart, uint64_t size) { - - vm_map_offset_t vaddr, nvaddr; - pd_entry_t *pde,*npde; - unsigned int i, need_flush; - unsigned int num_pde; - spl_t s; - - // do validity tests - - if(size & 0x0FFFFFFFULL) return KERN_INVALID_VALUE; /* We can only do this for multiples of 256MB */ - if((size >> 28) > 65536) return KERN_INVALID_VALUE; /* Max size we can nest is 16TB */ - if(vstart & 0x0FFFFFFFULL) return KERN_INVALID_VALUE; /* We can only do this aligned to 256MB */ - if(nstart & 0x0FFFFFFFULL) return KERN_INVALID_VALUE; /* We can only do this aligned to 256MB */ - if(size == 0) { - panic("pmap_nest: size is invalid - %016llX\n", size); - } - if ((size >> 28) != 1) panic("pmap_nest: size 0x%llx must be 0x%x", size, NBPDE); - - subord->pm_shared = TRUE; - // prepopulate subord pmap pde's if necessary - - if (cpu_64bit) { - s = splhigh(); - while (PD_ENTRY_NULL == (npde = pmap_pde(subord, nstart))) { - splx(s); - pmap_expand(subord, nstart); - s = splhigh(); - } - splx(s); - } - - PMAP_READ_LOCK(subord,s); - nvaddr = (vm_map_offset_t)nstart; - need_flush = 0; - num_pde = size >> PDESHIFT; - - for (i=0;i> PDESHIFT; - - for (i=0;inx_enabled = 0; -} - -void -pt_fake_zone_info(int *count, vm_size_t *cur_size, vm_size_t *max_size, vm_size_t *elem_size, - vm_size_t *alloc_size, int *collectable, int *exhaustable) -{ - *count = inuse_ptepages_count; - *cur_size = PAGE_SIZE * inuse_ptepages_count; - *max_size = PAGE_SIZE * (inuse_ptepages_count + vm_page_inactive_count + vm_page_active_count + vm_page_free_count); - *elem_size = PAGE_SIZE; - *alloc_size = PAGE_SIZE; - - *collectable = 1; - *exhaustable = 0; -} - -vm_offset_t pmap_cpu_high_map_vaddr(int cpu, enum high_cpu_types e) -{ - enum high_fixed_addresses a; - a = e + HIGH_CPU_END * cpu; - return pmap_index_to_virt(HIGH_FIXED_CPUS_BEGIN + a); -} - -vm_offset_t pmap_high_map_vaddr(enum high_cpu_types e) -{ - return pmap_cpu_high_map_vaddr(cpu_number(), e); -} - -vm_offset_t pmap_high_map(pt_entry_t pte, enum high_cpu_types e) -{ - enum high_fixed_addresses a; - vm_offset_t vaddr; - - a = e + HIGH_CPU_END * cpu_number(); - vaddr = (vm_offset_t)pmap_index_to_virt(HIGH_FIXED_CPUS_BEGIN + a); - *(pte_unique_base + a) = pte; - - /* TLB flush for this page for this cpu */ - invlpg((uintptr_t)vaddr); - - return vaddr; -} - - -/* - * Called with pmap locked, we: - * - scan through per-cpu data to see which other cpus need to flush - * - send an IPI to each non-idle cpu to be flushed - * - wait for all to signal back that they are inactive or we see that - * they are in an interrupt handler or at a safe point - * - flush the local tlb is active for this pmap - * - return ... the caller will unlock the pmap - */ -void -pmap_flush_tlbs(pmap_t pmap) -{ - unsigned int cpu; - unsigned int cpu_bit; - cpu_set cpus_to_signal; - unsigned int my_cpu = cpu_number(); - pmap_paddr_t pmap_cr3 = pmap->pm_cr3; - boolean_t flush_self = FALSE; - uint64_t deadline; - - assert(!ml_get_interrupts_enabled()); - - /* - * Scan other cpus for matching active or task CR3. - * For idle cpus (with no active map) we mark them invalid but - * don't signal -- they'll check as they go busy. - * Note: for the kernel pmap we look for 64-bit shared address maps. - */ - cpus_to_signal = 0; - for (cpu = 0, cpu_bit = 1; cpu < real_ncpus; cpu++, cpu_bit <<= 1) { - if (!cpu_datap(cpu)->cpu_running) - continue; - if ((cpu_datap(cpu)->cpu_task_cr3 == pmap_cr3) || - (CPU_GET_ACTIVE_CR3(cpu) == pmap_cr3) || - (pmap->pm_shared) || - ((pmap == kernel_pmap) && - (!CPU_CR3_IS_ACTIVE(cpu) || - cpu_datap(cpu)->cpu_task_map == TASK_MAP_64BIT_SHARED))) { - if (cpu == my_cpu) { - flush_self = TRUE; - continue; - } - cpu_datap(cpu)->cpu_tlb_invalid = TRUE; - __asm__ volatile("mfence"); - - if (CPU_CR3_IS_ACTIVE(cpu)) { - cpus_to_signal |= cpu_bit; - i386_signal_cpu(cpu, MP_TLB_FLUSH, ASYNC); - } - } - } - - if (cpus_to_signal) { - KERNEL_DEBUG(0xef800024 | DBG_FUNC_START, cpus_to_signal, 0, 0, 0, 0); - - deadline = mach_absolute_time() + LockTimeOut; /* - * Wait for those other cpus to acknowledge + * The temporary windows used for copy/zero - see loose_ends.c */ - for (cpu = 0, cpu_bit = 1; cpu < real_ncpus; cpu++, cpu_bit <<= 1) { - while ((cpus_to_signal & cpu_bit) != 0) { - if (!cpu_datap(cpu)->cpu_running || - cpu_datap(cpu)->cpu_tlb_invalid == FALSE || - !CPU_CR3_IS_ACTIVE(cpu)) { - cpus_to_signal &= ~cpu_bit; - break; - } - if (mach_absolute_time() > deadline) - panic("pmap_flush_tlbs() " - "timeout pmap=%p cpus_to_signal=%p", - pmap, cpus_to_signal); - cpu_pause(); + for (i = 0; i < PMAP_NWINDOWS; i++) { + ret = vm_map_find_space(kernel_map, + &address, PAGE_SIZE, 0, &entry); + if (ret != KERN_SUCCESS) { + printf("pmap_cpu_alloc() " + "vm_map_find_space ret=%d\n", ret); + pmap_cpu_free(cp); + return NULL; } - if (cpus_to_signal == 0) - break; + vm_map_unlock(kernel_map); + + cp->mapwindow[i].prv_CADDR = (caddr_t) address; + cp->mapwindow[i].prv_CMAP = vtopte(address); + * (int *) cp->mapwindow[i].prv_CMAP = 0; + + kprintf("pmap_cpu_alloc() " + "window=%d CADDR=0x%x CMAP=0x%x\n", + i, address, vtopte(address)); } - KERNEL_DEBUG(0xef800024 | DBG_FUNC_END, cpus_to_signal, 0, 0, 0, 0); } /* - * Flush local tlb if required. - * We need this flush even if the pmap being changed - * is the user map... in case we do a copyin/out - * before returning to user mode. + * Set up the pmap request list */ - if (flush_self) - flush_tlb(); + cp->update_list = up; + simple_lock_init(&up->lock, 0); + up->count = 0; + return cp; } void -process_pmap_updates(void) -{ - flush_tlb(); - - current_cpu_datap()->cpu_tlb_invalid = FALSE; - __asm__ volatile("mfence"); -} - -void -pmap_update_interrupt(void) -{ - KERNEL_DEBUG(0xef800028 | DBG_FUNC_START, 0, 0, 0, 0, 0); - - assert(!ml_get_interrupts_enabled()); - - process_pmap_updates(); - - KERNEL_DEBUG(0xef800028 | DBG_FUNC_END, 0, 0, 0, 0, 0); -} - - -unsigned int pmap_cache_attributes(ppnum_t pn) { - - if (!pmap_valid_page(pn)) - return (VM_WIMG_IO); - - return (VM_WIMG_COPYBACK); -} - -#ifdef PMAP_DEBUG -void -pmap_dump(pmap_t p) -{ - int i; - - kprintf("pmap 0x%x\n",p); - - kprintf(" pm_cr3 0x%llx\n",p->pm_cr3); - kprintf(" pm_pml4 0x%x\n",p->pm_pml4); - kprintf(" pm_pdpt 0x%x\n",p->pm_pdpt); - - kprintf(" pml4[0] 0x%llx\n",*p->pm_pml4); - for (i=0;i<8;i++) - kprintf(" pdpt[%d] 0x%llx\n",i, p->pm_pdpt[i]); -} - -void pmap_dump_wrap(void) -{ - pmap_dump(current_cpu_datap()->cpu_active_thread->task->map->pmap); -} - -void -dump_4GB_pdpt(pmap_t p) +pmap_cpu_free(struct cpu_pmap *cp) { - int spl; - pdpt_entry_t *user_pdptp; - pdpt_entry_t *kern_pdptp; - pdpt_entry_t *pml4p; - - spl = splhigh(); - while ((user_pdptp = pmap64_pdpt(p, 0x0)) == PDPT_ENTRY_NULL) { - splx(spl); - pmap_expand_pml4(p, 0x0); - spl = splhigh(); + if (cp != NULL && cp != &cpu_pmap_master) { + if (cp->update_list != NULL) + kfree((void *) cp->update_list, + sizeof(*cp->update_list)); + kfree((void *) cp, sizeof(cpu_pmap_t)); } - kern_pdptp = kernel_pmap->pm_pdpt; - if (kern_pdptp == NULL) - panic("kern_pdptp == NULL"); - kprintf("dump_4GB_pdpt(%p)\n" - "kern_pdptp=%p (phys=0x%016llx)\n" - "\t 0x%08x: 0x%016llx\n" - "\t 0x%08x: 0x%016llx\n" - "\t 0x%08x: 0x%016llx\n" - "\t 0x%08x: 0x%016llx\n" - "\t 0x%08x: 0x%016llx\n" - "user_pdptp=%p (phys=0x%016llx)\n" - "\t 0x%08x: 0x%016llx\n" - "\t 0x%08x: 0x%016llx\n" - "\t 0x%08x: 0x%016llx\n" - "\t 0x%08x: 0x%016llx\n" - "\t 0x%08x: 0x%016llx\n", - p, kern_pdptp, kvtophys(kern_pdptp), - kern_pdptp+0, *(kern_pdptp+0), - kern_pdptp+1, *(kern_pdptp+1), - kern_pdptp+2, *(kern_pdptp+2), - kern_pdptp+3, *(kern_pdptp+3), - kern_pdptp+4, *(kern_pdptp+4), - user_pdptp, kvtophys(user_pdptp), - user_pdptp+0, *(user_pdptp+0), - user_pdptp+1, *(user_pdptp+1), - user_pdptp+2, *(user_pdptp+2), - user_pdptp+3, *(user_pdptp+3), - user_pdptp+4, *(user_pdptp+4)); - kprintf("user pm_cr3=0x%016llx pm_hold=0x%08x pm_pml4=0x%08x\n", - p->pm_cr3, p->pm_hold, p->pm_pml4); - pml4p = (pdpt_entry_t *)p->pm_hold; - if (pml4p == NULL) - panic("user pml4p == NULL"); - kprintf("\t 0x%08x: 0x%016llx\n" - "\t 0x%08x: 0x%016llx\n", - pml4p+0, *(pml4p), - pml4p+KERNEL_UBER_PML4_INDEX, *(pml4p+KERNEL_UBER_PML4_INDEX)); - kprintf("kern pm_cr3=0x%016llx pm_hold=0x%08x pm_pml4=0x%08x\n", - kernel_pmap->pm_cr3, kernel_pmap->pm_hold, kernel_pmap->pm_pml4); - pml4p = (pdpt_entry_t *)kernel_pmap->pm_hold; - if (pml4p == NULL) - panic("kern pml4p == NULL"); - kprintf("\t 0x%08x: 0x%016llx\n" - "\t 0x%08x: 0x%016llx\n", - pml4p+0, *(pml4p), - pml4p+511, *(pml4p+511)); - splx(spl); -} - -void dump_4GB_pdpt_thread(thread_t tp) -{ - dump_4GB_pdpt(tp->map->pmap); } - - -#endif