osfmk/i386/pmap_x86_common.c

   1 /*
   2  * Copyright (c) 2000-2009 Apple Inc. All rights reserved.
   3  *
   4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
   5  *
   6  * This file contains Original Code and/or Modifications of Original Code
   7  * as defined in and that are subject to the Apple Public Source License
   8  * Version 2.0 (the 'License'). You may not use this file except in
   9  * compliance with the License. The rights granted to you under the License
  10  * may not be used to create, or enable the creation or redistribution of,
  11  * unlawful or unlicensed copies of an Apple operating system, or to
  12  * circumvent, violate, or enable the circumvention or violation of, any
  13  * terms of an Apple operating system software license agreement.
  14  *
  15  * Please obtain a copy of the License at
  16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
  17  *
  18  * The Original Code and all software distributed under the License are
  19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
  20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
  21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
  22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
  23  * Please see the License for the specific language governing rights and
  24  * limitations under the License.
  25  *
  26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
  27  */
  28 #include <vm/pmap.h>
  29 #include <vm/vm_map.h>
  30 #include <i386/pmap_internal.h>
  31
  32 void            pmap_remove_range(
  33                         pmap_t          pmap,
  34                         vm_map_offset_t va,
  35                         pt_entry_t      *spte,
  36                         pt_entry_t      *epte);
  37
  38 /*
  39  * The Intel platform can nest at the PDE level, so NBPDE (i.e. 2MB) at a time,
  40  * on a NBPDE boundary.
  41  */
  42
  43 /* These symbols may be referenced directly by VM */
  44 uint64_t pmap_nesting_size_min = NBPDE;
  45 uint64_t pmap_nesting_size_max = 0 - (uint64_t)NBPDE;
  46
  47 /*
  48  *      kern_return_t pmap_nest(grand, subord, va_start, size)
  49  *
  50  *      grand  = the pmap that we will nest subord into
  51  *      subord = the pmap that goes into the grand
  52  *      va_start  = start of range in pmap to be inserted
  53  *      nstart  = start of range in pmap nested pmap
  54  *      size   = Size of nest area (up to 16TB)
  55  *
  56  *      Inserts a pmap into another.  This is used to implement shared segments.
  57  *
  58  *      Note that we depend upon higher level VM locks to insure that things don't change while
  59  *      we are doing this.  For example, VM should not be doing any pmap enters while it is nesting
  60  *      or do 2 nests at once.
  61  */
  62
  63 /*
  64  * This routine can nest subtrees either at the PDPT level (1GiB) or at the
  65  * PDE level (2MiB). We currently disallow disparate offsets for the "subord"
  66  * container and the "grand" parent. A minor optimization to consider for the
  67  * future: make the "subord" truly a container rather than a full-fledged
  68  * pagetable hierarchy which can be unnecessarily sparse (DRK).
  69  */
  70
  71 kern_return_t pmap_nest(pmap_t grand, pmap_t subord, addr64_t va_start, addr64_t nstart, uint64_t size) {
  72         vm_map_offset_t vaddr, nvaddr;
  73         pd_entry_t      *pde,*npde;
  74         unsigned int    i;
  75         uint64_t        num_pde;
  76
  77         if ((size & (pmap_nesting_size_min-1)) ||
  78             (va_start & (pmap_nesting_size_min-1)) ||
  79             (nstart & (pmap_nesting_size_min-1)) ||
  80             ((size >> 28) > 65536))     /* Max size we can nest is 16TB */
  81                 return KERN_INVALID_VALUE;
  82
  83         if(size == 0) {
  84                 panic("pmap_nest: size is invalid - %016llX\n", size);
  85         }
  86
  87         if (va_start != nstart)
  88                 panic("pmap_nest: va_start(0x%llx) != nstart(0x%llx)\n", va_start, nstart);
  89
  90         PMAP_TRACE(PMAP_CODE(PMAP__NEST) | DBG_FUNC_START,
  91         (uintptr_t) grand, (uintptr_t) subord,
  92             (uintptr_t) (va_start>>32), (uintptr_t) va_start, 0);
  93
  94         nvaddr = (vm_map_offset_t)nstart;
  95         num_pde = size >> PDESHIFT;
  96
  97         PMAP_LOCK(subord);
  98
  99         subord->pm_shared = TRUE;
 100
 101         for (i = 0; i < num_pde;) {
 102                 if (((nvaddr & PDPTMASK) == 0) && (num_pde - i) >= NPDEPG && cpu_64bit) {
 103
 104                         npde = pmap64_pdpt(subord, nvaddr);
 105
 106                         while (0 == npde || ((*npde & INTEL_PTE_VALID) == 0)) {
 107                                 PMAP_UNLOCK(subord);
 108                                 pmap_expand_pdpt(subord, nvaddr);
 109                                 PMAP_LOCK(subord);
 110                                 npde = pmap64_pdpt(subord, nvaddr);
 111                         }
 112                         *npde |= INTEL_PDPTE_NESTED;
 113                         nvaddr += NBPDPT;
 114                         i += (uint32_t)NPDEPG;
 115                 }
 116                 else {
 117                         npde = pmap_pde(subord, nvaddr);
 118
 119                         while (0 == npde || ((*npde & INTEL_PTE_VALID) == 0)) {
 120                                 PMAP_UNLOCK(subord);
 121                                 pmap_expand(subord, nvaddr);
 122                                 PMAP_LOCK(subord);
 123                                 npde = pmap_pde(subord, nvaddr);
 124                         }
 125                         nvaddr += NBPDE;
 126                         i++;
 127                 }
 128         }
 129
 130         PMAP_UNLOCK(subord);
 131
 132         vaddr = (vm_map_offset_t)va_start;
 133
 134         PMAP_LOCK(grand);
 135
 136         for (i = 0;i < num_pde;) {
 137                 pd_entry_t tpde;
 138
 139                 if (((vaddr & PDPTMASK) == 0) && ((num_pde - i) >= NPDEPG) && cpu_64bit) {
 140                         npde = pmap64_pdpt(subord, vaddr);
 141                         if (npde == 0)
 142                                 panic("pmap_nest: no PDPT, subord %p nstart 0x%llx", subord, vaddr);
 143                         tpde = *npde;
 144                         pde = pmap64_pdpt(grand, vaddr);
 145                         if (0 == pde) {
 146                                 PMAP_UNLOCK(grand);
 147                                 pmap_expand_pml4(grand, vaddr);
 148                                 PMAP_LOCK(grand);
 149                                 pde = pmap64_pdpt(grand, vaddr);
 150                         }
 151                         if (pde == 0)
 152                                 panic("pmap_nest: no PDPT, grand  %p vaddr 0x%llx", grand, vaddr);
 153                         pmap_store_pte(pde, tpde);
 154                         vaddr += NBPDPT;
 155                         i += (uint32_t) NPDEPG;
 156                 }
 157                 else {
 158                         npde = pmap_pde(subord, nstart);
 159                         if (npde == 0)
 160                                 panic("pmap_nest: no npde, subord %p nstart 0x%llx", subord, nstart);
 161                         tpde = *npde;
 162                         nstart += NBPDE;
 163                         pde = pmap_pde(grand, vaddr);
 164                         if ((0 == pde) && cpu_64bit) {
 165                                 PMAP_UNLOCK(grand);
 166                                 pmap_expand_pdpt(grand, vaddr);
 167                                 PMAP_LOCK(grand);
 168                                 pde = pmap_pde(grand, vaddr);
 169                         }
 170
 171                         if (pde == 0)
 172                                 panic("pmap_nest: no pde, grand  %p vaddr 0x%llx", grand, vaddr);
 173                         vaddr += NBPDE;
 174                         pmap_store_pte(pde, tpde);
 175                         i++;
 176                 }
 177         }
 178
 179         PMAP_UNLOCK(grand);
 180
 181         PMAP_TRACE(PMAP_CODE(PMAP__NEST) | DBG_FUNC_END, 0, 0, 0, 0, 0);
 182
 183         return KERN_SUCCESS;
 184 }
 185
 186 /*
 187  *      kern_return_t pmap_unnest(grand, vaddr)
 188  *
 189  *      grand  = the pmap that we will un-nest subord from
 190  *      vaddr  = start of range in pmap to be unnested
 191  *
 192  *      Removes a pmap from another.  This is used to implement shared segments.
 193  */
 194
 195 kern_return_t pmap_unnest(pmap_t grand, addr64_t vaddr, uint64_t size) {
 196
 197         pd_entry_t *pde;
 198         unsigned int i;
 199         uint64_t num_pde;
 200         addr64_t va_start, va_end;
 201         uint64_t npdpt = PMAP_INVALID_PDPTNUM;
 202
 203         PMAP_TRACE(PMAP_CODE(PMAP__UNNEST) | DBG_FUNC_START,
 204             (uintptr_t) grand,
 205             (uintptr_t) (vaddr>>32), (uintptr_t) vaddr, 0, 0);
 206
 207         if ((size & (pmap_nesting_size_min-1)) ||
 208             (vaddr & (pmap_nesting_size_min-1))) {
 209                 panic("pmap_unnest(%p,0x%llx,0x%llx): unaligned...\n",
 210                     grand, vaddr, size);
 211         }
 212
 213         /* align everything to PDE boundaries */
 214         va_start = vaddr & ~(NBPDE-1);
 215         va_end = (vaddr + size + NBPDE - 1) & ~(NBPDE-1);
 216         size = va_end - va_start;
 217
 218         PMAP_LOCK(grand);
 219
 220         num_pde = size >> PDESHIFT;
 221         vaddr = va_start;
 222
 223         for (i = 0; i < num_pde; ) {
 224                 if ((pdptnum(grand, vaddr) != npdpt) && cpu_64bit) {
 225                         npdpt = pdptnum(grand, vaddr);
 226                         pde = pmap64_pdpt(grand, vaddr);
 227                         if (pde && (*pde & INTEL_PDPTE_NESTED)) {
 228                                 pmap_store_pte(pde, (pd_entry_t)0);
 229                                 i += (uint32_t) NPDEPG;
 230                                 vaddr += NBPDPT;
 231                                 continue;
 232                         }
 233                 }
 234                 pde = pmap_pde(grand, (vm_map_offset_t)vaddr);
 235                 if (pde == 0)
 236                         panic("pmap_unnest: no pde, grand %p vaddr 0x%llx\n", grand, vaddr);
 237                 pmap_store_pte(pde, (pd_entry_t)0);
 238                 i++;
 239                 vaddr += NBPDE;
 240         }
 241
 242         PMAP_UPDATE_TLBS(grand, va_start, va_end);
 243
 244         PMAP_UNLOCK(grand);
 245
 246         PMAP_TRACE(PMAP_CODE(PMAP__UNNEST) | DBG_FUNC_END, 0, 0, 0, 0, 0);
 247
 248         return KERN_SUCCESS;
 249 }
 250
 251 /* Invoked by the Mach VM to determine the platform specific unnest region */
 252
 253 boolean_t pmap_adjust_unnest_parameters(pmap_t p, vm_map_offset_t *s, vm_map_offset_t *e) {
 254         pd_entry_t *pdpte;
 255         boolean_t rval = FALSE;
 256
 257         if (!cpu_64bit)
 258                 return rval;
 259
 260         PMAP_LOCK(p);
 261
 262         pdpte = pmap64_pdpt(p, *s);
 263         if (pdpte && (*pdpte & INTEL_PDPTE_NESTED)) {
 264                 *s &= ~(NBPDPT -1);
 265                 rval = TRUE;
 266         }
 267
 268         pdpte = pmap64_pdpt(p, *e);
 269         if (pdpte && (*pdpte & INTEL_PDPTE_NESTED)) {
 270                 *e = ((*e + NBPDPT) & ~(NBPDPT -1));
 271                 rval = TRUE;
 272         }
 273
 274         PMAP_UNLOCK(p);
 275
 276         return rval;
 277 }
 278
 279 /*
 280  * pmap_find_phys returns the (4K) physical page number containing a
 281  * given virtual address in a given pmap.
 282  * Note that pmap_pte may return a pde if this virtual address is
 283  * mapped by a large page and this is taken into account in order
 284  * to return the correct page number in this case.
 285  */
 286 ppnum_t
 287 pmap_find_phys(pmap_t pmap, addr64_t va)
 288 {
 289         pt_entry_t      *ptp;
 290         pd_entry_t      *pdep;
 291         ppnum_t         ppn = 0;
 292         pd_entry_t      pde;
 293         pt_entry_t      pte;
 294
 295         mp_disable_preemption();
 296
 297         /* This refcount test is a band-aid--several infrastructural changes
 298          * are necessary to eliminate invocation of this routine from arbitrary
 299          * contexts.
 300          */
 301
 302         if (!pmap->ref_count)
 303                 goto pfp_exit;
 304
 305         pdep = pmap_pde(pmap, va);
 306
 307         if ((pdep != PD_ENTRY_NULL) && ((pde = *pdep) & INTEL_PTE_VALID)) {
 308                 if (pde & INTEL_PTE_PS) {
 309                         ppn = (ppnum_t) i386_btop(pte_to_pa(pde));
 310                         ppn += (ppnum_t) ptenum(va);
 311                 }
 312                 else {
 313                         ptp = pmap_pte(pmap, va);
 314                         if ((PT_ENTRY_NULL != ptp) && (((pte = *ptp) & INTEL_PTE_VALID) != 0)) {
 315                                 ppn = (ppnum_t) i386_btop(pte_to_pa(pte));
 316                         }
 317                 }
 318         }
 319 pfp_exit:
 320         mp_enable_preemption();
 321
 322         return ppn;
 323 }
 324
 325 /*
 326  * Update cache attributes for all extant managed mappings.
 327  * Assumes PV for this page is locked, and that the page
 328  * is managed.
 329  */
 330
 331 void
 332 pmap_update_cache_attributes_locked(ppnum_t pn, unsigned attributes) {
 333         pv_rooted_entry_t       pv_h, pv_e;
 334         pv_hashed_entry_t       pvh_e, nexth;
 335         vm_map_offset_t vaddr;
 336         pmap_t  pmap;
 337         pt_entry_t      *ptep;
 338
 339         assert(IS_MANAGED_PAGE(pn));
 340
 341         pv_h = pai_to_pvh(pn);
 342         /* TODO: translate the PHYS_* bits to PTE bits, while they're
 343          * currently identical, they may not remain so
 344          * Potential optimization (here and in page_protect),
 345          * parallel shootdowns, check for redundant
 346          * attribute modifications.
 347          */
 348
 349         /*
 350          * Alter attributes on all mappings
 351          */
 352         if (pv_h->pmap != PMAP_NULL) {
 353                 pv_e = pv_h;
 354                 pvh_e = (pv_hashed_entry_t)pv_e;
 355
 356                 do {
 357                         pmap = pv_e->pmap;
 358                         vaddr = pv_e->va;
 359                         ptep = pmap_pte(pmap, vaddr);
 360
 361                         if (0 == ptep)
 362                                 panic("pmap_update_cache_attributes_locked: Missing PTE, pmap: %p, pn: 0x%x vaddr: 0x%llx kernel_pmap: %p", pmap, pn, vaddr, kernel_pmap);
 363
 364                         nexth = (pv_hashed_entry_t)queue_next(&pvh_e->qlink);
 365                         pmap_update_pte(ptep, *ptep, (*ptep & ~PHYS_CACHEABILITY_MASK) | attributes);
 366                         PMAP_UPDATE_TLBS(pmap, vaddr, vaddr + PAGE_SIZE);
 367                         pvh_e = nexth;
 368                 } while ((pv_e = (pv_rooted_entry_t)nexth) != pv_h);
 369         }
 370 }
 371
 372 void x86_filter_TLB_coherency_interrupts(boolean_t dofilter) {
 373         assert(ml_get_interrupts_enabled() == 0 || get_preemption_level() != 0);
 374
 375         if (dofilter) {
 376                 CPU_CR3_MARK_INACTIVE();
 377         } else {
 378                 CPU_CR3_MARK_ACTIVE();
 379                 __asm__ volatile("mfence");
 380                 if (current_cpu_datap()->cpu_tlb_invalid)
 381                         process_pmap_updates();
 382         }
 383 }
 384
 385
 386 /*
 387  *      Insert the given physical page (p) at
 388  *      the specified virtual address (v) in the
 389  *      target physical map with the protection requested.
 390  *
 391  *      If specified, the page will be wired down, meaning
 392  *      that the related pte cannot be reclaimed.
 393  *
 394  *      NB:  This is the only routine which MAY NOT lazy-evaluate
 395  *      or lose information.  That is, this routine must actually
 396  *      insert this page into the given map NOW.
 397  */
 398 void
 399 pmap_enter(
 400         register pmap_t         pmap,
 401         vm_map_offset_t         vaddr,
 402         ppnum_t                 pn,
 403         vm_prot_t               prot,
 404         unsigned int            flags,
 405         boolean_t               wired)
 406 {
 407         pt_entry_t              *pte;
 408         pv_rooted_entry_t       pv_h;
 409         int                     pai;
 410         pv_hashed_entry_t       pvh_e;
 411         pv_hashed_entry_t       pvh_new;
 412         pt_entry_t              template;
 413         pmap_paddr_t            old_pa;
 414         pmap_paddr_t            pa = (pmap_paddr_t) i386_ptob(pn);
 415         boolean_t               need_tlbflush = FALSE;
 416         boolean_t               set_NX;
 417         char                    oattr;
 418         boolean_t               old_pa_locked;
 419         /* 2MiB mappings are confined to x86_64 by VM */
 420         boolean_t               superpage = flags & VM_MEM_SUPERPAGE;
 421         vm_object_t             delpage_pm_obj = NULL;
 422         int                     delpage_pde_index = 0;
 423         pt_entry_t              old_pte;
 424
 425         pmap_intr_assert();
 426         assert(pn != vm_page_fictitious_addr);
 427
 428         if (pmap == PMAP_NULL)
 429                 return;
 430         if (pn == vm_page_guard_addr)
 431                 return;
 432
 433         PMAP_TRACE(PMAP_CODE(PMAP__ENTER) | DBG_FUNC_START,
 434                    pmap,
 435                    (uint32_t) (vaddr >> 32), (uint32_t) vaddr,
 436                    pn, prot);
 437
 438         if ((prot & VM_PROT_EXECUTE) || !nx_enabled || !pmap->nx_enabled)
 439                 set_NX = FALSE;
 440         else
 441                 set_NX = TRUE;
 442
 443         /*
 444          *      Must allocate a new pvlist entry while we're unlocked;
 445          *      zalloc may cause pageout (which will lock the pmap system).
 446          *      If we determine we need a pvlist entry, we will unlock
 447          *      and allocate one.  Then we will retry, throughing away
 448          *      the allocated entry later (if we no longer need it).
 449          */
 450
 451         pvh_new = PV_HASHED_ENTRY_NULL;
 452 Retry:
 453         pvh_e = PV_HASHED_ENTRY_NULL;
 454
 455         PMAP_LOCK(pmap);
 456
 457         /*
 458          *      Expand pmap to include this pte.  Assume that
 459          *      pmap is always expanded to include enough hardware
 460          *      pages to map one VM page.
 461          */
 462          if(superpage) {
 463                 while ((pte = pmap64_pde(pmap, vaddr)) == PD_ENTRY_NULL) {
 464                         /* need room for another pde entry */
 465                         PMAP_UNLOCK(pmap);
 466                         pmap_expand_pdpt(pmap, vaddr);
 467                         PMAP_LOCK(pmap);
 468                 }
 469         } else {
 470                 while ((pte = pmap_pte(pmap, vaddr)) == PT_ENTRY_NULL) {
 471                         /*
 472                          * Must unlock to expand the pmap
 473                          * going to grow pde level page(s)
 474                          */
 475                         PMAP_UNLOCK(pmap);
 476                         pmap_expand(pmap, vaddr);
 477                         PMAP_LOCK(pmap);
 478                 }
 479         }
 480
 481         if (superpage && *pte && !(*pte & INTEL_PTE_PS)) {
 482                 /*
 483                  * There is still an empty page table mapped that
 484                  * was used for a previous base page mapping.
 485                  * Remember the PDE and the PDE index, so that we
 486                  * can free the page at the end of this function.
 487                  */
 488                 delpage_pde_index = (int)pdeidx(pmap, vaddr);
 489                 delpage_pm_obj = pmap->pm_obj;
 490                 *pte = 0;
 491         }
 492
 493         old_pa = pte_to_pa(*pte);
 494         pai = pa_index(old_pa);
 495         old_pa_locked = FALSE;
 496
 497         /*
 498          * if we have a previous managed page, lock the pv entry now. after
 499          * we lock it, check to see if someone beat us to the lock and if so
 500          * drop the lock
 501          */
 502         if ((0 != old_pa) && IS_MANAGED_PAGE(pai)) {
 503                 LOCK_PVH(pai);
 504                 old_pa_locked = TRUE;
 505                 old_pa = pte_to_pa(*pte);
 506                 if (0 == old_pa) {
 507                         UNLOCK_PVH(pai);        /* another path beat us to it */
 508                         old_pa_locked = FALSE;
 509                 }
 510         }
 511
 512         /*
 513          *      Special case if the incoming physical page is already mapped
 514          *      at this address.
 515          */
 516         if (old_pa == pa) {
 517                 pt_entry_t old_attributes =
 518                     *pte & ~(INTEL_PTE_REF | INTEL_PTE_MOD);
 519
 520                 /*
 521                  *      May be changing its wired attribute or protection
 522                  */
 523
 524                 template = pa_to_pte(pa) | INTEL_PTE_VALID;
 525                 template |= pmap_get_cache_attributes(pa_index(pa));
 526
 527                 if (VM_MEM_NOT_CACHEABLE ==
 528                     (flags & (VM_MEM_NOT_CACHEABLE | VM_WIMG_USE_DEFAULT))) {
 529                         if (!(flags & VM_MEM_GUARDED))
 530                                 template |= INTEL_PTE_PTA;
 531                         template |= INTEL_PTE_NCACHE;
 532                 }
 533                 if (pmap != kernel_pmap)
 534                         template |= INTEL_PTE_USER;
 535                 if (prot & VM_PROT_WRITE)
 536                         template |= INTEL_PTE_WRITE;
 537
 538                 if (set_NX)
 539                         template |= INTEL_PTE_NX;
 540
 541                 if (wired) {
 542                         template |= INTEL_PTE_WIRED;
 543                         if (!iswired(old_attributes))
 544                                 OSAddAtomic(+1,
 545                                         &pmap->stats.wired_count);
 546                 } else {
 547                         if (iswired(old_attributes)) {
 548                                 assert(pmap->stats.wired_count >= 1);
 549                                 OSAddAtomic(-1,
 550                                         &pmap->stats.wired_count);
 551                         }
 552                 }
 553                 if (superpage)          /* this path can not be used */
 554                         template |= INTEL_PTE_PS;       /* to change the page size! */
 555                 /* Determine delta, PV locked */
 556                 need_tlbflush =
 557                     ((old_attributes ^ template) != INTEL_PTE_WIRED);
 558
 559                 /* store modified PTE and preserve RC bits */
 560                 pmap_update_pte(pte, *pte,
 561                         template | (*pte & (INTEL_PTE_REF | INTEL_PTE_MOD)));
 562                 if (old_pa_locked) {
 563                         UNLOCK_PVH(pai);
 564                         old_pa_locked = FALSE;
 565                 }
 566                 goto Done;
 567         }
 568
 569         /*
 570          *      Outline of code from here:
 571          *         1) If va was mapped, update TLBs, remove the mapping
 572          *            and remove old pvlist entry.
 573          *         2) Add pvlist entry for new mapping
 574          *         3) Enter new mapping.
 575          *
 576          *      If the old physical page is not managed step 1) is skipped
 577          *      (except for updating the TLBs), and the mapping is
 578          *      overwritten at step 3).  If the new physical page is not
 579          *      managed, step 2) is skipped.
 580          */
 581
 582         if (old_pa != (pmap_paddr_t) 0) {
 583
 584                 /*
 585                  *      Don't do anything to pages outside valid memory here.
 586                  *      Instead convince the code that enters a new mapping
 587                  *      to overwrite the old one.
 588                  */
 589
 590                 /* invalidate the PTE */
 591                 pmap_update_pte(pte, *pte, (*pte & ~INTEL_PTE_VALID));
 592                 /* propagate invalidate everywhere */
 593                 PMAP_UPDATE_TLBS(pmap, vaddr, vaddr + PAGE_SIZE);
 594                 /* remember reference and change */
 595                 old_pte = *pte;
 596                 oattr = (char) (old_pte & (PHYS_MODIFIED | PHYS_REFERENCED));
 597                 /* completely invalidate the PTE */
 598                 pmap_store_pte(pte, 0);
 599
 600                 if (IS_MANAGED_PAGE(pai)) {
 601                         pmap_assert(old_pa_locked == TRUE);
 602                         assert(pmap->stats.resident_count >= 1);
 603                         OSAddAtomic(-1,
 604                                 &pmap->stats.resident_count);
 605
 606                         if (iswired(*pte)) {
 607                                 assert(pmap->stats.wired_count >= 1);
 608                                 OSAddAtomic(-1,
 609                                         &pmap->stats.wired_count);
 610                         }
 611                         pmap_phys_attributes[pai] |= oattr;
 612
 613                         /*
 614                          *      Remove the mapping from the pvlist for
 615                          *      this physical page.
 616                          *      We'll end up with either a rooted pv or a
 617                          *      hashed pv
 618                          */
 619                         pvh_e = pmap_pv_remove(pmap, vaddr, (ppnum_t *) &pai, &old_pte);
 620
 621                 } else {
 622
 623                         /*
 624                          *      old_pa is not managed.
 625                          *      Do removal part of accounting.
 626                          */
 627
 628                         if (iswired(*pte)) {
 629                                 assert(pmap->stats.wired_count >= 1);
 630                                 OSAddAtomic(-1,
 631                                         &pmap->stats.wired_count);
 632                         }
 633                 }
 634         }
 635
 636         /*
 637          * if we had a previously managed paged locked, unlock it now
 638          */
 639         if (old_pa_locked) {
 640                 UNLOCK_PVH(pai);
 641                 old_pa_locked = FALSE;
 642         }
 643
 644         pai = pa_index(pa);     /* now working with new incoming phys page */
 645         if (IS_MANAGED_PAGE(pai)) {
 646
 647                 /*
 648                  *      Step 2) Enter the mapping in the PV list for this
 649                  *      physical page.
 650                  */
 651                 pv_h = pai_to_pvh(pai);
 652
 653                 LOCK_PVH(pai);
 654
 655                 if (pv_h->pmap == PMAP_NULL) {
 656                         /*
 657                          *      No mappings yet, use rooted pv
 658                          */
 659                         pv_h->va = vaddr;
 660                         pv_h->pmap = pmap;
 661                         queue_init(&pv_h->qlink);
 662                 } else {
 663                         /*
 664                          *      Add new pv_hashed_entry after header.
 665                          */
 666                         if ((PV_HASHED_ENTRY_NULL == pvh_e) && pvh_new) {
 667                                 pvh_e = pvh_new;
 668                                 pvh_new = PV_HASHED_ENTRY_NULL;
 669                         } else if (PV_HASHED_ENTRY_NULL == pvh_e) {
 670                                 PV_HASHED_ALLOC(&pvh_e);
 671                                 if (PV_HASHED_ENTRY_NULL == pvh_e) {
 672                                         /*
 673                                          * the pv list is empty. if we are on
 674                                          * the kernel pmap we'll use one of
 675                                          * the special private kernel pv_e's,
 676                                          * else, we need to unlock
 677                                          * everything, zalloc a pv_e, and
 678                                          * restart bringing in the pv_e with
 679                                          * us.
 680                                          */
 681                                         if (kernel_pmap == pmap) {
 682                                                 PV_HASHED_KERN_ALLOC(&pvh_e);
 683                                         } else {
 684                                                 UNLOCK_PVH(pai);
 685                                                 PMAP_UNLOCK(pmap);
 686                                                 pmap_pv_throttle(pmap);
 687                                                 pvh_new = (pv_hashed_entry_t) zalloc(pv_hashed_list_zone);
 688                                                 goto Retry;
 689                                         }
 690                                 }
 691                         }
 692
 693                         if (PV_HASHED_ENTRY_NULL == pvh_e)
 694                                 panic("Mapping alias chain exhaustion, possibly induced by numerous kernel virtual double mappings");
 695
 696                         pvh_e->va = vaddr;
 697                         pvh_e->pmap = pmap;
 698                         pvh_e->ppn = pn;
 699                         pv_hash_add(pvh_e, pv_h);
 700
 701                         /*
 702                          *      Remember that we used the pvlist entry.
 703                          */
 704                         pvh_e = PV_HASHED_ENTRY_NULL;
 705                 }
 706
 707                 /*
 708                  * only count the mapping
 709                  * for 'managed memory'
 710                  */
 711                 OSAddAtomic(+1,  &pmap->stats.resident_count);
 712                 if (pmap->stats.resident_count > pmap->stats.resident_max) {
 713                         pmap->stats.resident_max = pmap->stats.resident_count;
 714                 }
 715         } else if (last_managed_page == 0) {
 716                 /* Account for early mappings created before "managed pages"
 717                  * are determined. Consider consulting the available DRAM map.
 718                  */
 719                 OSAddAtomic(+1,  &pmap->stats.resident_count);
 720         }
 721         /*
 722          * Step 3) Enter the mapping.
 723          *
 724          *      Build a template to speed up entering -
 725          *      only the pfn changes.
 726          */
 727         template = pa_to_pte(pa) | INTEL_PTE_VALID;
 728         /*
 729          * DRK: It may be worth asserting on cache attribute flags that diverge
 730          * from the existing physical page attributes.
 731          */
 732
 733         template |= pmap_get_cache_attributes(pa_index(pa));
 734
 735         if (flags & VM_MEM_NOT_CACHEABLE) {
 736                 if (!(flags & VM_MEM_GUARDED))
 737                         template |= INTEL_PTE_PTA;
 738                 template |= INTEL_PTE_NCACHE;
 739         }
 740         if (pmap != kernel_pmap)
 741                 template |= INTEL_PTE_USER;
 742         if (prot & VM_PROT_WRITE)
 743                 template |= INTEL_PTE_WRITE;
 744         if (set_NX)
 745                 template |= INTEL_PTE_NX;
 746         if (wired) {
 747                 template |= INTEL_PTE_WIRED;
 748                 OSAddAtomic(+1,  & pmap->stats.wired_count);
 749         }
 750         if (superpage)
 751                 template |= INTEL_PTE_PS;
 752         pmap_store_pte(pte, template);
 753
 754         /*
 755          * if this was a managed page we delayed unlocking the pv until here
 756          * to prevent pmap_page_protect et al from finding it until the pte
 757          * has been stored
 758          */
 759         if (IS_MANAGED_PAGE(pai)) {
 760                 UNLOCK_PVH(pai);
 761         }
 762 Done:
 763         if (need_tlbflush == TRUE)
 764                 PMAP_UPDATE_TLBS(pmap, vaddr, vaddr + PAGE_SIZE);
 765
 766         if (pvh_e != PV_HASHED_ENTRY_NULL) {
 767                 PV_HASHED_FREE_LIST(pvh_e, pvh_e, 1);
 768         }
 769         if (pvh_new != PV_HASHED_ENTRY_NULL) {
 770                 PV_HASHED_KERN_FREE_LIST(pvh_new, pvh_new, 1);
 771         }
 772         PMAP_UNLOCK(pmap);
 773
 774         if (delpage_pm_obj) {
 775                 vm_page_t m;
 776
 777                 vm_object_lock(delpage_pm_obj);
 778                 m = vm_page_lookup(delpage_pm_obj, delpage_pde_index);
 779                 if (m == VM_PAGE_NULL)
 780                     panic("pmap_enter: pte page not in object");
 781                 vm_object_unlock(delpage_pm_obj);
 782                 VM_PAGE_FREE(m);
 783                 OSAddAtomic(-1,  &inuse_ptepages_count);
 784                 PMAP_ZINFO_PFREE(PAGE_SIZE);
 785         }
 786
 787         PMAP_TRACE(PMAP_CODE(PMAP__ENTER) | DBG_FUNC_END, 0, 0, 0, 0, 0);
 788 }
 789
 790 /*
 791  *      Remove a range of hardware page-table entries.
 792  *      The entries given are the first (inclusive)
 793  *      and last (exclusive) entries for the VM pages.
 794  *      The virtual address is the va for the first pte.
 795  *
 796  *      The pmap must be locked.
 797  *      If the pmap is not the kernel pmap, the range must lie
 798  *      entirely within one pte-page.  This is NOT checked.
 799  *      Assumes that the pte-page exists.
 800  */
 801
 802 void
 803 pmap_remove_range(
 804         pmap_t                  pmap,
 805         vm_map_offset_t         start_vaddr,
 806         pt_entry_t              *spte,
 807         pt_entry_t              *epte)
 808 {
 809         pt_entry_t              *cpte;
 810         pv_hashed_entry_t       pvh_et = PV_HASHED_ENTRY_NULL;
 811         pv_hashed_entry_t       pvh_eh = PV_HASHED_ENTRY_NULL;
 812         pv_hashed_entry_t       pvh_e;
 813         int                     pvh_cnt = 0;
 814         int                     num_removed, num_unwired, num_found, num_invalid;
 815         int                     pai;
 816         pmap_paddr_t            pa;
 817         vm_map_offset_t         vaddr;
 818
 819         num_removed = 0;
 820         num_unwired = 0;
 821         num_found   = 0;
 822         num_invalid = 0;
 823 #if     defined(__i386__)
 824         if (pmap != kernel_pmap &&
 825             pmap->pm_task_map == TASK_MAP_32BIT &&
 826             start_vaddr >= HIGH_MEM_BASE) {
 827                 /*
 828                  * The range is in the "high_shared_pde" which is shared
 829                  * between the kernel and all 32-bit tasks.  It holds
 830                  * the 32-bit commpage but also the trampolines, GDT, etc...
 831                  * so we can't let user tasks remove anything from it.
 832                  */
 833                 return;
 834         }
 835 #endif
 836         /* invalidate the PTEs first to "freeze" them */
 837         for (cpte = spte, vaddr = start_vaddr;
 838              cpte < epte;
 839              cpte++, vaddr += PAGE_SIZE_64) {
 840                 pt_entry_t p = *cpte;
 841
 842                 pa = pte_to_pa(p);
 843                 if (pa == 0)
 844                         continue;
 845                 num_found++;
 846
 847                 if (iswired(p))
 848                         num_unwired++;
 849
 850                 pai = pa_index(pa);
 851
 852                 if (!IS_MANAGED_PAGE(pai)) {
 853                         /*
 854                          *      Outside range of managed physical memory.
 855                          *      Just remove the mappings.
 856                          */
 857                         pmap_store_pte(cpte, 0);
 858                         continue;
 859                 }
 860
 861                 if ((p & INTEL_PTE_VALID) == 0)
 862                         num_invalid++;
 863
 864                 /* invalidate the PTE */
 865                 pmap_update_pte(cpte, *cpte, (*cpte & ~INTEL_PTE_VALID));
 866         }
 867
 868         if (num_found == 0) {
 869                 /* nothing was changed: we're done */
 870                 goto update_counts;
 871         }
 872
 873         /* propagate the invalidates to other CPUs */
 874
 875         PMAP_UPDATE_TLBS(pmap, start_vaddr, vaddr);
 876
 877         for (cpte = spte, vaddr = start_vaddr;
 878              cpte < epte;
 879              cpte++, vaddr += PAGE_SIZE_64) {
 880
 881                 pa = pte_to_pa(*cpte);
 882                 if (pa == 0)
 883                         continue;
 884
 885                 pai = pa_index(pa);
 886
 887                 LOCK_PVH(pai);
 888
 889                 pa = pte_to_pa(*cpte);
 890                 if (pa == 0) {
 891                         UNLOCK_PVH(pai);
 892                         continue;
 893                 }
 894                 num_removed++;
 895
 896                 /*
 897                  * Get the modify and reference bits, then
 898                  * nuke the entry in the page table
 899                  */
 900                 /* remember reference and change */
 901                 pmap_phys_attributes[pai] |=
 902                         (char) (*cpte & (PHYS_MODIFIED | PHYS_REFERENCED));
 903
 904                 /*
 905                  * Remove the mapping from the pvlist for this physical page.
 906                  */
 907                 pvh_e = pmap_pv_remove(pmap, vaddr, (ppnum_t *) &pai, cpte);
 908
 909                 /* completely invalidate the PTE */
 910                 pmap_store_pte(cpte, 0);
 911
 912                 UNLOCK_PVH(pai);
 913
 914                 if (pvh_e != PV_HASHED_ENTRY_NULL) {
 915                         pvh_e->qlink.next = (queue_entry_t) pvh_eh;
 916                         pvh_eh = pvh_e;
 917
 918                         if (pvh_et == PV_HASHED_ENTRY_NULL) {
 919                                 pvh_et = pvh_e;
 920                         }
 921                         pvh_cnt++;
 922                 }
 923         } /* for loop */
 924
 925         if (pvh_eh != PV_HASHED_ENTRY_NULL) {
 926                 PV_HASHED_FREE_LIST(pvh_eh, pvh_et, pvh_cnt);
 927         }
 928 update_counts:
 929         /*
 930          *      Update the counts
 931          */
 932 #if TESTING
 933         if (pmap->stats.resident_count < num_removed)
 934                 panic("pmap_remove_range: resident_count");
 935 #endif
 936         assert(pmap->stats.resident_count >= num_removed);
 937         OSAddAtomic(-num_removed,  &pmap->stats.resident_count);
 938
 939 #if TESTING
 940         if (pmap->stats.wired_count < num_unwired)
 941                 panic("pmap_remove_range: wired_count");
 942 #endif
 943         assert(pmap->stats.wired_count >= num_unwired);
 944         OSAddAtomic(-num_unwired,  &pmap->stats.wired_count);
 945
 946         return;
 947 }
 948
 949
 950 /*
 951  *      Remove the given range of addresses
 952  *      from the specified map.
 953  *
 954  *      It is assumed that the start and end are properly
 955  *      rounded to the hardware page size.
 956  */
 957 void
 958 pmap_remove(
 959         pmap_t          map,
 960         addr64_t        s64,
 961         addr64_t        e64)
 962 {
 963         pt_entry_t     *pde;
 964         pt_entry_t     *spte, *epte;
 965         addr64_t        l64;
 966         uint64_t        deadline;
 967
 968         pmap_intr_assert();
 969
 970         if (map == PMAP_NULL || s64 == e64)
 971                 return;
 972
 973         PMAP_TRACE(PMAP_CODE(PMAP__REMOVE) | DBG_FUNC_START,
 974                    map,
 975                    (uint32_t) (s64 >> 32), s64,
 976                    (uint32_t) (e64 >> 32), e64);
 977
 978
 979         PMAP_LOCK(map);
 980
 981 #if 0
 982         /*
 983          * Check that address range in the kernel does not overlap the stacks.
 984          * We initialize local static min/max variables once to avoid making
 985          * 2 function calls for every remove. Note also that these functions
 986          * both return 0 before kernel stacks have been initialized, and hence
 987          * the panic is not triggered in this case.
 988          */
 989         if (map == kernel_pmap) {
 990                 static vm_offset_t kernel_stack_min = 0;
 991                 static vm_offset_t kernel_stack_max = 0;
 992
 993                 if (kernel_stack_min == 0) {
 994                         kernel_stack_min = min_valid_stack_address();
 995                         kernel_stack_max = max_valid_stack_address();
 996                 }
 997                 if ((kernel_stack_min <= s64 && s64 < kernel_stack_max) ||
 998                     (kernel_stack_min < e64 && e64 <= kernel_stack_max))
 999                         panic("pmap_remove() attempted in kernel stack");
1000         }
1001 #else
1002
1003         /*
1004          * The values of kernel_stack_min and kernel_stack_max are no longer
1005          * relevant now that we allocate kernel stacks in the kernel map,
1006          * so the old code above no longer applies.  If we wanted to check that
1007          * we weren't removing a mapping of a page in a kernel stack we'd
1008          * mark the PTE with an unused bit and check that here.
1009          */
1010
1011 #endif
1012
1013         deadline = rdtsc64() + max_preemption_latency_tsc;
1014
1015         while (s64 < e64) {
1016                 l64 = (s64 + pde_mapped_size) & ~(pde_mapped_size - 1);
1017                 if (l64 > e64)
1018                         l64 = e64;
1019                 pde = pmap_pde(map, s64);
1020
1021                 if (pde && (*pde & INTEL_PTE_VALID)) {
1022                         if (*pde & INTEL_PTE_PS) {
1023                                 /*
1024                                  * If we're removing a superpage, pmap_remove_range()
1025                                  * must work on level 2 instead of level 1; and we're
1026                                  * only passing a single level 2 entry instead of a
1027                                  * level 1 range.
1028                                  */
1029                                 spte = pde;
1030                                 epte = spte+1; /* excluded */
1031                         } else {
1032                                 spte = pmap_pte(map, (s64 & ~(pde_mapped_size - 1)));
1033                                 spte = &spte[ptenum(s64)];
1034                                 epte = &spte[intel_btop(l64 - s64)];
1035                         }
1036                         pmap_remove_range(map, s64, spte, epte);
1037                 }
1038                 s64 = l64;
1039
1040                 if (s64 < e64 && rdtsc64() >= deadline) {
1041                         PMAP_UNLOCK(map)
1042                         PMAP_LOCK(map)
1043                         deadline = rdtsc64() + max_preemption_latency_tsc;
1044                 }
1045         }
1046
1047         PMAP_UNLOCK(map);
1048
1049         PMAP_TRACE(PMAP_CODE(PMAP__REMOVE) | DBG_FUNC_END,
1050                    map, 0, 0, 0, 0);
1051
1052 }
1053
1054 /*
1055  *      Routine:        pmap_page_protect
1056  *
1057  *      Function:
1058  *              Lower the permission for all mappings to a given
1059  *              page.
1060  */
1061 void
1062 pmap_page_protect(
1063         ppnum_t         pn,
1064         vm_prot_t       prot)
1065 {
1066         pv_hashed_entry_t       pvh_eh = PV_HASHED_ENTRY_NULL;
1067         pv_hashed_entry_t       pvh_et = PV_HASHED_ENTRY_NULL;
1068         pv_hashed_entry_t       nexth;
1069         int                     pvh_cnt = 0;
1070         pv_rooted_entry_t       pv_h;
1071         pv_rooted_entry_t       pv_e;
1072         pv_hashed_entry_t       pvh_e;
1073         pt_entry_t              *pte;
1074         int                     pai;
1075         pmap_t                  pmap;
1076         boolean_t               remove;
1077
1078         pmap_intr_assert();
1079         assert(pn != vm_page_fictitious_addr);
1080         if (pn == vm_page_guard_addr)
1081                 return;
1082
1083         pai = ppn_to_pai(pn);
1084
1085         if (!IS_MANAGED_PAGE(pai)) {
1086                 /*
1087                  *      Not a managed page.
1088                  */
1089                 return;
1090         }
1091         PMAP_TRACE(PMAP_CODE(PMAP__PAGE_PROTECT) | DBG_FUNC_START,
1092                    pn, prot, 0, 0, 0);
1093
1094         /*
1095          * Determine the new protection.
1096          */
1097         switch (prot) {
1098         case VM_PROT_READ:
1099         case VM_PROT_READ | VM_PROT_EXECUTE:
1100                 remove = FALSE;
1101                 break;
1102         case VM_PROT_ALL:
1103                 return;         /* nothing to do */
1104         default:
1105                 remove = TRUE;
1106                 break;
1107         }
1108
1109         pv_h = pai_to_pvh(pai);
1110
1111         LOCK_PVH(pai);
1112
1113
1114         /*
1115          * Walk down PV list, if any, changing or removing all mappings.
1116          */
1117         if (pv_h->pmap == PMAP_NULL)
1118                 goto done;
1119
1120         pv_e = pv_h;
1121         pvh_e = (pv_hashed_entry_t) pv_e;       /* cheat */
1122
1123         do {
1124                 vm_map_offset_t vaddr;
1125
1126                 pmap = pv_e->pmap;
1127                 vaddr = pv_e->va;
1128                 pte = pmap_pte(pmap, vaddr);
1129
1130                 pmap_assert2((pa_index(pte_to_pa(*pte)) == pn),
1131                     "pmap_page_protect: PTE mismatch, pn: 0x%x, pmap: %p, vaddr: 0x%llx, pte: 0x%llx", pn, pmap, vaddr, *pte);
1132
1133                 if (0 == pte) {
1134                         panic("pmap_page_protect() "
1135                                 "pmap=%p pn=0x%x vaddr=0x%llx\n",
1136                                 pmap, pn, vaddr);
1137                 }
1138                 nexth = (pv_hashed_entry_t) queue_next(&pvh_e->qlink);
1139
1140                 /*
1141                  * Remove the mapping if new protection is NONE
1142                  */
1143                 if (remove) {
1144                         /*
1145                          * Remove the mapping, collecting dirty bits.
1146                          */
1147                         pmap_update_pte(pte, *pte, *pte & ~INTEL_PTE_VALID);
1148
1149                         /* Remove per-pmap wired count */
1150                         if (iswired(*pte)) {
1151                                 OSAddAtomic(-1, &pmap->stats.wired_count);
1152                         }
1153
1154                         PMAP_UPDATE_TLBS(pmap, vaddr, vaddr+PAGE_SIZE);
1155                         pmap_phys_attributes[pai] |=
1156                             *pte & (PHYS_MODIFIED|PHYS_REFERENCED);
1157                         pmap_store_pte(pte, 0);
1158
1159 #if TESTING
1160                         if (pmap->stats.resident_count < 1)
1161                                 panic("pmap_page_protect: resident_count");
1162 #endif
1163                         assert(pmap->stats.resident_count >= 1);
1164                         OSAddAtomic(-1,  &pmap->stats.resident_count);
1165
1166                         /*
1167                          * Deal with the pv_rooted_entry.
1168                          */
1169
1170                         if (pv_e == pv_h) {
1171                                 /*
1172                                  * Fix up head later.
1173                                  */
1174                                 pv_h->pmap = PMAP_NULL;
1175                         } else {
1176                                 /*
1177                                  * Delete this entry.
1178                                  */
1179                                 pv_hash_remove(pvh_e);
1180                                 pvh_e->qlink.next = (queue_entry_t) pvh_eh;
1181                                 pvh_eh = pvh_e;
1182
1183                                 if (pvh_et == PV_HASHED_ENTRY_NULL)
1184                                         pvh_et = pvh_e;
1185                                 pvh_cnt++;
1186                         }
1187                 } else {
1188                         /*
1189                          * Write-protect, after opportunistic refmod collect
1190                          */
1191                         pmap_phys_attributes[pai] |=
1192                             *pte & (PHYS_MODIFIED|PHYS_REFERENCED);
1193
1194                         pmap_update_pte(pte, *pte, *pte & ~INTEL_PTE_WRITE);
1195                         PMAP_UPDATE_TLBS(pmap, vaddr, vaddr+PAGE_SIZE);
1196                 }
1197                 pvh_e = nexth;
1198         } while ((pv_e = (pv_rooted_entry_t) nexth) != pv_h);
1199
1200
1201         /*
1202          * If pv_head mapping was removed, fix it up.
1203          */
1204         if (pv_h->pmap == PMAP_NULL) {
1205                 pvh_e = (pv_hashed_entry_t) queue_next(&pv_h->qlink);
1206
1207                 if (pvh_e != (pv_hashed_entry_t) pv_h) {
1208                         pv_hash_remove(pvh_e);
1209                         pv_h->pmap = pvh_e->pmap;
1210                         pv_h->va = pvh_e->va;
1211                         pvh_e->qlink.next = (queue_entry_t) pvh_eh;
1212                         pvh_eh = pvh_e;
1213
1214                         if (pvh_et == PV_HASHED_ENTRY_NULL)
1215                                 pvh_et = pvh_e;
1216                         pvh_cnt++;
1217                 }
1218         }
1219         if (pvh_eh != PV_HASHED_ENTRY_NULL) {
1220                 PV_HASHED_FREE_LIST(pvh_eh, pvh_et, pvh_cnt);
1221         }
1222 done:
1223         UNLOCK_PVH(pai);
1224
1225         PMAP_TRACE(PMAP_CODE(PMAP__PAGE_PROTECT) | DBG_FUNC_END,
1226                    0, 0, 0, 0, 0);
1227 }
1228
1229 /*
1230  *      Clear specified attribute bits.
1231  */
1232 void
1233 phys_attribute_clear(
1234         ppnum_t         pn,
1235         int             bits)
1236 {
1237         pv_rooted_entry_t       pv_h;
1238         pv_hashed_entry_t       pv_e;
1239         pt_entry_t              *pte;
1240         int                     pai;
1241         pmap_t                  pmap;
1242         char                    attributes = 0;
1243
1244         pmap_intr_assert();
1245         assert(pn != vm_page_fictitious_addr);
1246         if (pn == vm_page_guard_addr)
1247                 return;
1248
1249         pai = ppn_to_pai(pn);
1250
1251         if (!IS_MANAGED_PAGE(pai)) {
1252                 /*
1253                  *      Not a managed page.
1254                  */
1255                 return;
1256         }
1257
1258         PMAP_TRACE(PMAP_CODE(PMAP__ATTRIBUTE_CLEAR) | DBG_FUNC_START,
1259                    pn, bits, 0, 0, 0);
1260
1261         pv_h = pai_to_pvh(pai);
1262
1263         LOCK_PVH(pai);
1264
1265         /*
1266          * Walk down PV list, clearing all modify or reference bits.
1267          * We do not have to lock the pv_list because we have
1268          * the entire pmap system locked.
1269          */
1270         if (pv_h->pmap != PMAP_NULL) {
1271                 /*
1272                  * There are some mappings.
1273                  */
1274
1275                 pv_e = (pv_hashed_entry_t)pv_h;
1276
1277                 do {
1278                         vm_map_offset_t va;
1279
1280                         pmap = pv_e->pmap;
1281                         va = pv_e->va;
1282
1283                          /*
1284                           * Clear modify and/or reference bits.
1285                           */
1286                         pte = pmap_pte(pmap, va);
1287                         attributes |= *pte & (PHYS_MODIFIED|PHYS_REFERENCED);
1288
1289                         pmap_update_pte(pte, *pte, (*pte & ~bits));
1290                         /* Ensure all processors using this translation
1291                          * invalidate this TLB entry. The invalidation *must*
1292                          * follow the PTE update, to ensure that the TLB
1293                          * shadow of the 'D' bit (in particular) is
1294                          * synchronized with the updated PTE.
1295                          */
1296                         PMAP_UPDATE_TLBS(pmap, va, va + PAGE_SIZE);
1297
1298                         pv_e = (pv_hashed_entry_t)queue_next(&pv_e->qlink);
1299
1300                 } while (pv_e != (pv_hashed_entry_t)pv_h);
1301         }
1302         /* Opportunistic refmod collection, annulled
1303          * if both REF and MOD are being cleared.
1304          */
1305
1306         pmap_phys_attributes[pai] |= attributes;
1307         pmap_phys_attributes[pai] &= (~bits);
1308
1309         UNLOCK_PVH(pai);
1310
1311         PMAP_TRACE(PMAP_CODE(PMAP__ATTRIBUTE_CLEAR) | DBG_FUNC_END,
1312                    0, 0, 0, 0, 0);
1313 }
1314
1315 /*
1316  *      Check specified attribute bits.
1317  */
1318 int
1319 phys_attribute_test(
1320         ppnum_t         pn,
1321         int             bits)
1322 {
1323         pv_rooted_entry_t       pv_h;
1324         pv_hashed_entry_t       pv_e;
1325         pt_entry_t              *pte;
1326         int                     pai;
1327         pmap_t                  pmap;
1328         int                     attributes = 0;
1329
1330         pmap_intr_assert();
1331         assert(pn != vm_page_fictitious_addr);
1332         if (pn == vm_page_guard_addr)
1333                 return 0;
1334
1335         pai = ppn_to_pai(pn);
1336
1337         if (!IS_MANAGED_PAGE(pai)) {
1338                 /*
1339                  *      Not a managed page.
1340                  */
1341                 return 0;
1342         }
1343
1344         /*
1345          * Fast check...  if bits already collected
1346          * no need to take any locks...
1347          * if not set, we need to recheck after taking
1348          * the lock in case they got pulled in while
1349          * we were waiting for the lock
1350          */
1351         if ((pmap_phys_attributes[pai] & bits) == bits)
1352                 return bits;
1353
1354         pv_h = pai_to_pvh(pai);
1355
1356         LOCK_PVH(pai);
1357
1358         attributes = pmap_phys_attributes[pai] & bits;
1359
1360
1361         /*
1362          * Walk down PV list, checking the mappings until we
1363          * reach the end or we've found the desired attributes.
1364          */
1365         if (attributes != bits &&
1366             pv_h->pmap != PMAP_NULL) {
1367                 /*
1368                  * There are some mappings.
1369                  */
1370                 pv_e = (pv_hashed_entry_t)pv_h;
1371                 do {
1372                         vm_map_offset_t va;
1373
1374                         pmap = pv_e->pmap;
1375                         va = pv_e->va;
1376                         /*
1377                          * pick up modify and/or reference bits from mapping
1378                          */
1379
1380                         pte = pmap_pte(pmap, va);
1381                         attributes |= (int)(*pte & bits);
1382
1383                         pv_e = (pv_hashed_entry_t)queue_next(&pv_e->qlink);
1384
1385                 } while ((attributes != bits) &&
1386                          (pv_e != (pv_hashed_entry_t)pv_h));
1387         }
1388         pmap_phys_attributes[pai] |= attributes;
1389
1390         UNLOCK_PVH(pai);
1391         return (attributes);
1392 }
1393
1394 /*
1395  *      Routine:        pmap_change_wiring
1396  *      Function:       Change the wiring attribute for a map/virtual-address
1397  *                      pair.
1398  *      In/out conditions:
1399  *                      The mapping must already exist in the pmap.
1400  */
1401 void
1402 pmap_change_wiring(
1403         pmap_t          map,
1404         vm_map_offset_t vaddr,
1405         boolean_t       wired)
1406 {
1407         pt_entry_t      *pte;
1408
1409         PMAP_LOCK(map);
1410
1411         if ((pte = pmap_pte(map, vaddr)) == PT_ENTRY_NULL)
1412                 panic("pmap_change_wiring: pte missing");
1413
1414         if (wired && !iswired(*pte)) {
1415                 /*
1416                  * wiring down mapping
1417                  */
1418                 OSAddAtomic(+1,  &map->stats.wired_count);
1419                 pmap_update_pte(pte, *pte, (*pte | INTEL_PTE_WIRED));
1420         }
1421         else if (!wired && iswired(*pte)) {
1422                 /*
1423                  * unwiring mapping
1424                  */
1425                 assert(map->stats.wired_count >= 1);
1426                 OSAddAtomic(-1,  &map->stats.wired_count);
1427                 pmap_update_pte(pte, *pte, (*pte & ~INTEL_PTE_WIRED));
1428         }
1429
1430         PMAP_UNLOCK(map);
1431 }