osfmk/i386/pmap_x86_common.c

   1 /*
   2  * Copyright (c) 2000-2012 Apple Inc. All rights reserved.
   3  *
   4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
   5  *
   6  * This file contains Original Code and/or Modifications of Original Code
   7  * as defined in and that are subject to the Apple Public Source License
   8  * Version 2.0 (the 'License'). You may not use this file except in
   9  * compliance with the License. The rights granted to you under the License
  10  * may not be used to create, or enable the creation or redistribution of,
  11  * unlawful or unlicensed copies of an Apple operating system, or to
  12  * circumvent, violate, or enable the circumvention or violation of, any
  13  * terms of an Apple operating system software license agreement.
  14  *
  15  * Please obtain a copy of the License at
  16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
  17  *
  18  * The Original Code and all software distributed under the License are
  19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
  20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
  21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
  22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
  23  * Please see the License for the specific language governing rights and
  24  * limitations under the License.
  25  *
  26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
  27  */
  28 #include <vm/pmap.h>
  29 #include <vm/vm_map.h>
  30 #include <kern/ledger.h>
  31 #include <i386/pmap_internal.h>
  32
  33 void            pmap_remove_range(
  34                         pmap_t          pmap,
  35                         vm_map_offset_t va,
  36                         pt_entry_t      *spte,
  37                         pt_entry_t      *epte);
  38
  39 void            pmap_remove_range_options(
  40                         pmap_t          pmap,
  41                         vm_map_offset_t va,
  42                         pt_entry_t      *spte,
  43                         pt_entry_t      *epte,
  44                         int             options);
  45
  46 void            pmap_reusable_range(
  47                         pmap_t          pmap,
  48                         vm_map_offset_t va,
  49                         pt_entry_t      *spte,
  50                         pt_entry_t      *epte,
  51                         boolean_t       reusable);
  52
  53 uint32_t pmap_update_clear_pte_count;
  54
  55 /*
  56  * The Intel platform can nest at the PDE level, so NBPDE (i.e. 2MB) at a time,
  57  * on a NBPDE boundary.
  58  */
  59
  60 /* These symbols may be referenced directly by VM */
  61 uint64_t pmap_nesting_size_min = NBPDE;
  62 uint64_t pmap_nesting_size_max = 0 - (uint64_t)NBPDE;
  63
  64 /*
  65  *      kern_return_t pmap_nest(grand, subord, va_start, size)
  66  *
  67  *      grand  = the pmap that we will nest subord into
  68  *      subord = the pmap that goes into the grand
  69  *      va_start  = start of range in pmap to be inserted
  70  *      nstart  = start of range in pmap nested pmap
  71  *      size   = Size of nest area (up to 16TB)
  72  *
  73  *      Inserts a pmap into another.  This is used to implement shared segments.
  74  *
  75  *      Note that we depend upon higher level VM locks to insure that things don't change while
  76  *      we are doing this.  For example, VM should not be doing any pmap enters while it is nesting
  77  *      or do 2 nests at once.
  78  */
  79
  80 /*
  81  * This routine can nest subtrees either at the PDPT level (1GiB) or at the
  82  * PDE level (2MiB). We currently disallow disparate offsets for the "subord"
  83  * container and the "grand" parent. A minor optimization to consider for the
  84  * future: make the "subord" truly a container rather than a full-fledged
  85  * pagetable hierarchy which can be unnecessarily sparse (DRK).
  86  */
  87
  88 kern_return_t pmap_nest(pmap_t grand, pmap_t subord, addr64_t va_start, addr64_t nstart, uint64_t size) {
  89         vm_map_offset_t vaddr, nvaddr;
  90         pd_entry_t      *pde,*npde;
  91         unsigned int    i;
  92         uint64_t        num_pde;
  93
  94         if ((size & (pmap_nesting_size_min-1)) ||
  95             (va_start & (pmap_nesting_size_min-1)) ||
  96             (nstart & (pmap_nesting_size_min-1)) ||
  97             ((size >> 28) > 65536))     /* Max size we can nest is 16TB */
  98                 return KERN_INVALID_VALUE;
  99
 100         if(size == 0) {
 101                 panic("pmap_nest: size is invalid - %016llX\n", size);
 102         }
 103
 104         if (va_start != nstart)
 105                 panic("pmap_nest: va_start(0x%llx) != nstart(0x%llx)\n", va_start, nstart);
 106
 107         PMAP_TRACE(PMAP_CODE(PMAP__NEST) | DBG_FUNC_START,
 108         (uintptr_t) grand, (uintptr_t) subord,
 109             (uintptr_t) (va_start>>32), (uintptr_t) va_start, 0);
 110
 111         nvaddr = (vm_map_offset_t)nstart;
 112         num_pde = size >> PDESHIFT;
 113
 114         PMAP_LOCK(subord);
 115
 116         subord->pm_shared = TRUE;
 117
 118         for (i = 0; i < num_pde;) {
 119                 if (((nvaddr & PDPTMASK) == 0) && (num_pde - i) >= NPDEPG && cpu_64bit) {
 120
 121                         npde = pmap64_pdpt(subord, nvaddr);
 122
 123                         while (0 == npde || ((*npde & INTEL_PTE_VALID) == 0)) {
 124                                 PMAP_UNLOCK(subord);
 125                                 pmap_expand_pdpt(subord, nvaddr, PMAP_EXPAND_OPTIONS_NONE);
 126                                 PMAP_LOCK(subord);
 127                                 npde = pmap64_pdpt(subord, nvaddr);
 128                         }
 129                         *npde |= INTEL_PDPTE_NESTED;
 130                         nvaddr += NBPDPT;
 131                         i += (uint32_t)NPDEPG;
 132                 }
 133                 else {
 134                         npde = pmap_pde(subord, nvaddr);
 135
 136                         while (0 == npde || ((*npde & INTEL_PTE_VALID) == 0)) {
 137                                 PMAP_UNLOCK(subord);
 138                                 pmap_expand(subord, nvaddr, PMAP_EXPAND_OPTIONS_NONE);
 139                                 PMAP_LOCK(subord);
 140                                 npde = pmap_pde(subord, nvaddr);
 141                         }
 142                         nvaddr += NBPDE;
 143                         i++;
 144                 }
 145         }
 146
 147         PMAP_UNLOCK(subord);
 148
 149         vaddr = (vm_map_offset_t)va_start;
 150
 151         PMAP_LOCK(grand);
 152
 153         for (i = 0;i < num_pde;) {
 154                 pd_entry_t tpde;
 155
 156                 if (((vaddr & PDPTMASK) == 0) && ((num_pde - i) >= NPDEPG) && cpu_64bit) {
 157                         npde = pmap64_pdpt(subord, vaddr);
 158                         if (npde == 0)
 159                                 panic("pmap_nest: no PDPT, subord %p nstart 0x%llx", subord, vaddr);
 160                         tpde = *npde;
 161                         pde = pmap64_pdpt(grand, vaddr);
 162                         if (0 == pde) {
 163                                 PMAP_UNLOCK(grand);
 164                                 pmap_expand_pml4(grand, vaddr, PMAP_EXPAND_OPTIONS_NONE);
 165                                 PMAP_LOCK(grand);
 166                                 pde = pmap64_pdpt(grand, vaddr);
 167                         }
 168                         if (pde == 0)
 169                                 panic("pmap_nest: no PDPT, grand  %p vaddr 0x%llx", grand, vaddr);
 170                         pmap_store_pte(pde, tpde);
 171                         vaddr += NBPDPT;
 172                         i += (uint32_t) NPDEPG;
 173                 }
 174                 else {
 175                         npde = pmap_pde(subord, nstart);
 176                         if (npde == 0)
 177                                 panic("pmap_nest: no npde, subord %p nstart 0x%llx", subord, nstart);
 178                         tpde = *npde;
 179                         nstart += NBPDE;
 180                         pde = pmap_pde(grand, vaddr);
 181                         if ((0 == pde) && cpu_64bit) {
 182                                 PMAP_UNLOCK(grand);
 183                                 pmap_expand_pdpt(grand, vaddr, PMAP_EXPAND_OPTIONS_NONE);
 184                                 PMAP_LOCK(grand);
 185                                 pde = pmap_pde(grand, vaddr);
 186                         }
 187
 188                         if (pde == 0)
 189                                 panic("pmap_nest: no pde, grand  %p vaddr 0x%llx", grand, vaddr);
 190                         vaddr += NBPDE;
 191                         pmap_store_pte(pde, tpde);
 192                         i++;
 193                 }
 194         }
 195
 196         PMAP_UNLOCK(grand);
 197
 198         PMAP_TRACE(PMAP_CODE(PMAP__NEST) | DBG_FUNC_END, 0, 0, 0, 0, 0);
 199
 200         return KERN_SUCCESS;
 201 }
 202
 203 /*
 204  *      kern_return_t pmap_unnest(grand, vaddr)
 205  *
 206  *      grand  = the pmap that we will un-nest subord from
 207  *      vaddr  = start of range in pmap to be unnested
 208  *
 209  *      Removes a pmap from another.  This is used to implement shared segments.
 210  */
 211
 212 kern_return_t pmap_unnest(pmap_t grand, addr64_t vaddr, uint64_t size) {
 213
 214         pd_entry_t *pde;
 215         unsigned int i;
 216         uint64_t num_pde;
 217         addr64_t va_start, va_end;
 218         uint64_t npdpt = PMAP_INVALID_PDPTNUM;
 219
 220         PMAP_TRACE(PMAP_CODE(PMAP__UNNEST) | DBG_FUNC_START,
 221             (uintptr_t) grand,
 222             (uintptr_t) (vaddr>>32), (uintptr_t) vaddr, 0, 0);
 223
 224         if ((size & (pmap_nesting_size_min-1)) ||
 225             (vaddr & (pmap_nesting_size_min-1))) {
 226                 panic("pmap_unnest(%p,0x%llx,0x%llx): unaligned...\n",
 227                     grand, vaddr, size);
 228         }
 229
 230         /* align everything to PDE boundaries */
 231         va_start = vaddr & ~(NBPDE-1);
 232         va_end = (vaddr + size + NBPDE - 1) & ~(NBPDE-1);
 233         size = va_end - va_start;
 234
 235         PMAP_LOCK(grand);
 236
 237         num_pde = size >> PDESHIFT;
 238         vaddr = va_start;
 239
 240         for (i = 0; i < num_pde; ) {
 241                 if ((pdptnum(grand, vaddr) != npdpt) && cpu_64bit) {
 242                         npdpt = pdptnum(grand, vaddr);
 243                         pde = pmap64_pdpt(grand, vaddr);
 244                         if (pde && (*pde & INTEL_PDPTE_NESTED)) {
 245                                 pmap_store_pte(pde, (pd_entry_t)0);
 246                                 i += (uint32_t) NPDEPG;
 247                                 vaddr += NBPDPT;
 248                                 continue;
 249                         }
 250                 }
 251                 pde = pmap_pde(grand, (vm_map_offset_t)vaddr);
 252                 if (pde == 0)
 253                         panic("pmap_unnest: no pde, grand %p vaddr 0x%llx\n", grand, vaddr);
 254                 pmap_store_pte(pde, (pd_entry_t)0);
 255                 i++;
 256                 vaddr += NBPDE;
 257         }
 258
 259         PMAP_UPDATE_TLBS(grand, va_start, va_end);
 260
 261         PMAP_UNLOCK(grand);
 262
 263         PMAP_TRACE(PMAP_CODE(PMAP__UNNEST) | DBG_FUNC_END, 0, 0, 0, 0, 0);
 264
 265         return KERN_SUCCESS;
 266 }
 267
 268 /* Invoked by the Mach VM to determine the platform specific unnest region */
 269
 270 boolean_t pmap_adjust_unnest_parameters(pmap_t p, vm_map_offset_t *s, vm_map_offset_t *e) {
 271         pd_entry_t *pdpte;
 272         boolean_t rval = FALSE;
 273
 274         if (!cpu_64bit)
 275                 return rval;
 276
 277         PMAP_LOCK(p);
 278
 279         pdpte = pmap64_pdpt(p, *s);
 280         if (pdpte && (*pdpte & INTEL_PDPTE_NESTED)) {
 281                 *s &= ~(NBPDPT -1);
 282                 rval = TRUE;
 283         }
 284
 285         pdpte = pmap64_pdpt(p, *e);
 286         if (pdpte && (*pdpte & INTEL_PDPTE_NESTED)) {
 287                 *e = ((*e + NBPDPT) & ~(NBPDPT -1));
 288                 rval = TRUE;
 289         }
 290
 291         PMAP_UNLOCK(p);
 292
 293         return rval;
 294 }
 295
 296 /*
 297  * pmap_find_phys returns the (4K) physical page number containing a
 298  * given virtual address in a given pmap.
 299  * Note that pmap_pte may return a pde if this virtual address is
 300  * mapped by a large page and this is taken into account in order
 301  * to return the correct page number in this case.
 302  */
 303 ppnum_t
 304 pmap_find_phys(pmap_t pmap, addr64_t va)
 305 {
 306         pt_entry_t      *ptp;
 307         pd_entry_t      *pdep;
 308         ppnum_t         ppn = 0;
 309         pd_entry_t      pde;
 310         pt_entry_t      pte;
 311
 312         mp_disable_preemption();
 313
 314         /* This refcount test is a band-aid--several infrastructural changes
 315          * are necessary to eliminate invocation of this routine from arbitrary
 316          * contexts.
 317          */
 318
 319         if (!pmap->ref_count)
 320                 goto pfp_exit;
 321
 322         pdep = pmap_pde(pmap, va);
 323
 324         if ((pdep != PD_ENTRY_NULL) && ((pde = *pdep) & INTEL_PTE_VALID)) {
 325                 if (pde & INTEL_PTE_PS) {
 326                         ppn = (ppnum_t) i386_btop(pte_to_pa(pde));
 327                         ppn += (ppnum_t) ptenum(va);
 328                 }
 329                 else {
 330                         ptp = pmap_pte(pmap, va);
 331                         if ((PT_ENTRY_NULL != ptp) && (((pte = *ptp) & INTEL_PTE_VALID) != 0)) {
 332                                 ppn = (ppnum_t) i386_btop(pte_to_pa(pte));
 333                         }
 334                 }
 335         }
 336 pfp_exit:
 337         mp_enable_preemption();
 338
 339         return ppn;
 340 }
 341
 342 /*
 343  * Update cache attributes for all extant managed mappings.
 344  * Assumes PV for this page is locked, and that the page
 345  * is managed.
 346  */
 347
 348 void
 349 pmap_update_cache_attributes_locked(ppnum_t pn, unsigned attributes) {
 350         pv_rooted_entry_t       pv_h, pv_e;
 351         pv_hashed_entry_t       pvh_e, nexth;
 352         vm_map_offset_t vaddr;
 353         pmap_t  pmap;
 354         pt_entry_t      *ptep;
 355
 356         assert(IS_MANAGED_PAGE(pn));
 357
 358         pv_h = pai_to_pvh(pn);
 359         /* TODO: translate the PHYS_* bits to PTE bits, while they're
 360          * currently identical, they may not remain so
 361          * Potential optimization (here and in page_protect),
 362          * parallel shootdowns, check for redundant
 363          * attribute modifications.
 364          */
 365
 366         /*
 367          * Alter attributes on all mappings
 368          */
 369         if (pv_h->pmap != PMAP_NULL) {
 370                 pv_e = pv_h;
 371                 pvh_e = (pv_hashed_entry_t)pv_e;
 372
 373                 do {
 374                         pmap = pv_e->pmap;
 375                         vaddr = pv_e->va;
 376                         ptep = pmap_pte(pmap, vaddr);
 377
 378                         if (0 == ptep)
 379                                 panic("pmap_update_cache_attributes_locked: Missing PTE, pmap: %p, pn: 0x%x vaddr: 0x%llx kernel_pmap: %p", pmap, pn, vaddr, kernel_pmap);
 380
 381                         nexth = (pv_hashed_entry_t)queue_next(&pvh_e->qlink);
 382                         pmap_update_pte(ptep, PHYS_CACHEABILITY_MASK, attributes);
 383                         PMAP_UPDATE_TLBS(pmap, vaddr, vaddr + PAGE_SIZE);
 384                         pvh_e = nexth;
 385                 } while ((pv_e = (pv_rooted_entry_t)nexth) != pv_h);
 386         }
 387 }
 388
 389 void x86_filter_TLB_coherency_interrupts(boolean_t dofilter) {
 390         assert(ml_get_interrupts_enabled() == 0 || get_preemption_level() != 0);
 391
 392         if (dofilter) {
 393                 CPU_CR3_MARK_INACTIVE();
 394         } else {
 395                 CPU_CR3_MARK_ACTIVE();
 396                 mfence();
 397                 if (current_cpu_datap()->cpu_tlb_invalid)
 398                         process_pmap_updates();
 399         }
 400 }
 401
 402
 403 /*
 404  *      Insert the given physical page (p) at
 405  *      the specified virtual address (v) in the
 406  *      target physical map with the protection requested.
 407  *
 408  *      If specified, the page will be wired down, meaning
 409  *      that the related pte cannot be reclaimed.
 410  *
 411  *      NB:  This is the only routine which MAY NOT lazy-evaluate
 412  *      or lose information.  That is, this routine must actually
 413  *      insert this page into the given map NOW.
 414  */
 415
 416 void
 417 pmap_enter(
 418         register pmap_t         pmap,
 419         vm_map_offset_t         vaddr,
 420         ppnum_t                 pn,
 421         vm_prot_t               prot,
 422         vm_prot_t               fault_type,
 423         unsigned int            flags,
 424         boolean_t               wired)
 425 {
 426         (void) pmap_enter_options(pmap, vaddr, pn, prot, fault_type, flags, wired, PMAP_EXPAND_OPTIONS_NONE, NULL);
 427 }
 428
 429
 430 kern_return_t
 431 pmap_enter_options(
 432         register pmap_t         pmap,
 433         vm_map_offset_t         vaddr,
 434         ppnum_t                 pn,
 435         vm_prot_t               prot,
 436         __unused vm_prot_t      fault_type,
 437         unsigned int            flags,
 438         boolean_t               wired,
 439         unsigned int            options,
 440         void                    *arg)
 441 {
 442         pt_entry_t              *pte;
 443         pv_rooted_entry_t       pv_h;
 444         ppnum_t                 pai;
 445         pv_hashed_entry_t       pvh_e;
 446         pv_hashed_entry_t       pvh_new;
 447         pt_entry_t              template;
 448         pmap_paddr_t            old_pa;
 449         pmap_paddr_t            pa = (pmap_paddr_t) i386_ptob(pn);
 450         boolean_t               need_tlbflush = FALSE;
 451         boolean_t               set_NX;
 452         char                    oattr;
 453         boolean_t               old_pa_locked;
 454         /* 2MiB mappings are confined to x86_64 by VM */
 455         boolean_t               superpage = flags & VM_MEM_SUPERPAGE;
 456         vm_object_t             delpage_pm_obj = NULL;
 457         int                     delpage_pde_index = 0;
 458         pt_entry_t              old_pte;
 459         kern_return_t           kr_expand;
 460
 461         pmap_intr_assert();
 462
 463         if (pmap == PMAP_NULL)
 464                 return KERN_INVALID_ARGUMENT;
 465
 466         /* N.B. We can be supplied a zero page frame in the NOENTER case, it's an
 467          * unused value for that scenario.
 468          */
 469         assert(pn != vm_page_fictitious_addr);
 470
 471         if (pn == vm_page_guard_addr)
 472                 return KERN_INVALID_ARGUMENT;
 473
 474         PMAP_TRACE(PMAP_CODE(PMAP__ENTER) | DBG_FUNC_START,
 475             pmap,
 476             (uint32_t) (vaddr >> 32), (uint32_t) vaddr,
 477             pn, prot);
 478
 479         if ((prot & VM_PROT_EXECUTE) || !nx_enabled || !pmap->nx_enabled)
 480                 set_NX = FALSE;
 481         else
 482                 set_NX = TRUE;
 483
 484         if (__improbable(set_NX && (pmap == kernel_pmap) && ((pmap_disable_kstack_nx && (flags & VM_MEM_STACK)) || (pmap_disable_kheap_nx && !(flags & VM_MEM_STACK))))) {
 485                 set_NX = FALSE;
 486         }
 487
 488         /*
 489          *      Must allocate a new pvlist entry while we're unlocked;
 490          *      zalloc may cause pageout (which will lock the pmap system).
 491          *      If we determine we need a pvlist entry, we will unlock
 492          *      and allocate one.  Then we will retry, throughing away
 493          *      the allocated entry later (if we no longer need it).
 494          */
 495
 496         pvh_new = PV_HASHED_ENTRY_NULL;
 497 Retry:
 498         pvh_e = PV_HASHED_ENTRY_NULL;
 499
 500         PMAP_LOCK(pmap);
 501
 502         /*
 503          *      Expand pmap to include this pte.  Assume that
 504          *      pmap is always expanded to include enough hardware
 505          *      pages to map one VM page.
 506          */
 507          if(superpage) {
 508                 while ((pte = pmap64_pde(pmap, vaddr)) == PD_ENTRY_NULL) {
 509                         /* need room for another pde entry */
 510                         PMAP_UNLOCK(pmap);
 511                         kr_expand = pmap_expand_pdpt(pmap, vaddr, options);
 512                         if (kr_expand != KERN_SUCCESS)
 513                                 return kr_expand;
 514                         PMAP_LOCK(pmap);
 515                 }
 516         } else {
 517                 while ((pte = pmap_pte(pmap, vaddr)) == PT_ENTRY_NULL) {
 518                         /*
 519                          * Must unlock to expand the pmap
 520                          * going to grow pde level page(s)
 521                          */
 522                         PMAP_UNLOCK(pmap);
 523                         kr_expand = pmap_expand(pmap, vaddr, options);
 524                         if (kr_expand != KERN_SUCCESS)
 525                                 return kr_expand;
 526                         PMAP_LOCK(pmap);
 527                 }
 528         }
 529         if (options & PMAP_EXPAND_OPTIONS_NOENTER) {
 530                 PMAP_UNLOCK(pmap);
 531                 return KERN_SUCCESS;
 532         }
 533
 534         if (superpage && *pte && !(*pte & INTEL_PTE_PS)) {
 535                 /*
 536                  * There is still an empty page table mapped that
 537                  * was used for a previous base page mapping.
 538                  * Remember the PDE and the PDE index, so that we
 539                  * can free the page at the end of this function.
 540                  */
 541                 delpage_pde_index = (int)pdeidx(pmap, vaddr);
 542                 delpage_pm_obj = pmap->pm_obj;
 543                 *pte = 0;
 544         }
 545
 546         old_pa = pte_to_pa(*pte);
 547         pai = pa_index(old_pa);
 548         old_pa_locked = FALSE;
 549
 550         if (old_pa == 0 &&
 551             (*pte & INTEL_PTE_COMPRESSED)) {
 552                 /* one less "compressed" */
 553                 OSAddAtomic64(-1, &pmap->stats.compressed);
 554                 /* marker will be cleared below */
 555         }
 556
 557         /*
 558          * if we have a previous managed page, lock the pv entry now. after
 559          * we lock it, check to see if someone beat us to the lock and if so
 560          * drop the lock
 561          */
 562         if ((0 != old_pa) && IS_MANAGED_PAGE(pai)) {
 563                 LOCK_PVH(pai);
 564                 old_pa_locked = TRUE;
 565                 old_pa = pte_to_pa(*pte);
 566                 if (0 == old_pa) {
 567                         UNLOCK_PVH(pai);        /* another path beat us to it */
 568                         old_pa_locked = FALSE;
 569                 }
 570         }
 571
 572         /*
 573          *      Special case if the incoming physical page is already mapped
 574          *      at this address.
 575          */
 576         if (old_pa == pa) {
 577                 pt_entry_t old_attributes =
 578                     *pte & ~(INTEL_PTE_REF | INTEL_PTE_MOD);
 579
 580                 /*
 581                  *      May be changing its wired attribute or protection
 582                  */
 583
 584                 template = pa_to_pte(pa) | INTEL_PTE_VALID;
 585                 template |= pmap_get_cache_attributes(pa_index(pa));
 586
 587                 if (VM_MEM_NOT_CACHEABLE ==
 588                     (flags & (VM_MEM_NOT_CACHEABLE | VM_WIMG_USE_DEFAULT))) {
 589                         if (!(flags & VM_MEM_GUARDED))
 590                                 template |= INTEL_PTE_PTA;
 591                         template |= INTEL_PTE_NCACHE;
 592                 }
 593                 if (pmap != kernel_pmap)
 594                         template |= INTEL_PTE_USER;
 595                 if (prot & VM_PROT_WRITE) {
 596                         template |= INTEL_PTE_WRITE;
 597                 }
 598
 599                 if (set_NX)
 600                         template |= INTEL_PTE_NX;
 601
 602                 if (wired) {
 603                         template |= INTEL_PTE_WIRED;
 604                         if (!iswired(old_attributes))  {
 605                                 OSAddAtomic(+1, &pmap->stats.wired_count);
 606                                 pmap_ledger_credit(pmap, task_ledgers.wired_mem, PAGE_SIZE);
 607                         }
 608                 } else {
 609                         if (iswired(old_attributes)) {
 610                                 assert(pmap->stats.wired_count >= 1);
 611                                 OSAddAtomic(-1, &pmap->stats.wired_count);
 612                                 pmap_ledger_debit(pmap, task_ledgers.wired_mem, PAGE_SIZE);
 613                         }
 614                 }
 615                 if (superpage)          /* this path can not be used */
 616                         template |= INTEL_PTE_PS;       /* to change the page size! */
 617
 618                 if (old_attributes == template)
 619                         goto dont_update_pte;
 620
 621                 /* Determine delta, PV locked */
 622                 need_tlbflush =
 623                     ((old_attributes ^ template) != INTEL_PTE_WIRED);
 624
 625                 if (need_tlbflush == TRUE && !(old_attributes & INTEL_PTE_WRITE)) {
 626                         if ((old_attributes ^ template) == INTEL_PTE_WRITE)
 627                                 need_tlbflush = FALSE;
 628                 }
 629
 630                 /* store modified PTE and preserve RC bits */
 631                 pt_entry_t npte, opte;;
 632                 do {
 633                         opte = *pte;
 634                         npte = template | (opte & (INTEL_PTE_REF | INTEL_PTE_MOD));
 635                 } while (!pmap_cmpx_pte(pte, opte, npte));
 636 dont_update_pte:
 637                 if (old_pa_locked) {
 638                         UNLOCK_PVH(pai);
 639                         old_pa_locked = FALSE;
 640                 }
 641                 goto Done;
 642         }
 643
 644         /*
 645          *      Outline of code from here:
 646          *         1) If va was mapped, update TLBs, remove the mapping
 647          *            and remove old pvlist entry.
 648          *         2) Add pvlist entry for new mapping
 649          *         3) Enter new mapping.
 650          *
 651          *      If the old physical page is not managed step 1) is skipped
 652          *      (except for updating the TLBs), and the mapping is
 653          *      overwritten at step 3).  If the new physical page is not
 654          *      managed, step 2) is skipped.
 655          */
 656
 657         if (old_pa != (pmap_paddr_t) 0) {
 658
 659                 /*
 660                  *      Don't do anything to pages outside valid memory here.
 661                  *      Instead convince the code that enters a new mapping
 662                  *      to overwrite the old one.
 663                  */
 664
 665                 /* invalidate the PTE */
 666                 pmap_update_pte(pte, INTEL_PTE_VALID, 0);
 667                 /* propagate invalidate everywhere */
 668                 PMAP_UPDATE_TLBS(pmap, vaddr, vaddr + PAGE_SIZE);
 669                 /* remember reference and change */
 670                 old_pte = *pte;
 671                 oattr = (char) (old_pte & (PHYS_MODIFIED | PHYS_REFERENCED));
 672                 /* completely invalidate the PTE */
 673                 pmap_store_pte(pte, 0);
 674
 675                 if (IS_MANAGED_PAGE(pai)) {
 676                         pmap_assert(old_pa_locked == TRUE);
 677                         pmap_ledger_debit(pmap, task_ledgers.phys_mem, PAGE_SIZE);
 678                         pmap_ledger_debit(pmap, task_ledgers.phys_footprint, PAGE_SIZE);
 679                         assert(pmap->stats.resident_count >= 1);
 680                         OSAddAtomic(-1, &pmap->stats.resident_count);
 681                         if (pmap != kernel_pmap) {
 682                                 if (IS_REUSABLE_PAGE(pai)) {
 683                                         assert(pmap->stats.reusable > 0);
 684                                         OSAddAtomic(-1, &pmap->stats.reusable);
 685                                 } else if (IS_INTERNAL_PAGE(pai)) {
 686                                         assert(pmap->stats.internal > 0);
 687                                         OSAddAtomic(-1, &pmap->stats.internal);
 688                                 } else {
 689                                         assert(pmap->stats.external > 0);
 690                                         OSAddAtomic(-1, &pmap->stats.external);
 691                                 }
 692                         }
 693                         if (iswired(*pte)) {
 694                                 assert(pmap->stats.wired_count >= 1);
 695                                 OSAddAtomic(-1, &pmap->stats.wired_count);
 696                                 pmap_ledger_debit(pmap, task_ledgers.wired_mem,
 697                                     PAGE_SIZE);
 698                         }
 699                         pmap_phys_attributes[pai] |= oattr;
 700
 701                         /*
 702                          *      Remove the mapping from the pvlist for
 703                          *      this physical page.
 704                          *      We'll end up with either a rooted pv or a
 705                          *      hashed pv
 706                          */
 707                         pvh_e = pmap_pv_remove(pmap, vaddr, (ppnum_t *) &pai, &old_pte);
 708
 709                 } else {
 710
 711                         /*
 712                          *      old_pa is not managed.
 713                          *      Do removal part of accounting.
 714                          */
 715
 716                         if (pmap != kernel_pmap) {
 717 #if 00
 718                                 assert(pmap->stats.device > 0);
 719                                 OSAddAtomic(-1, &pmap->stats.device);
 720 #endif
 721                         }
 722                         if (iswired(*pte)) {
 723                                 assert(pmap->stats.wired_count >= 1);
 724                                 OSAddAtomic(-1, &pmap->stats.wired_count);
 725                                 pmap_ledger_debit(pmap, task_ledgers.wired_mem, PAGE_SIZE);
 726                         }
 727                 }
 728         }
 729
 730         /*
 731          * if we had a previously managed paged locked, unlock it now
 732          */
 733         if (old_pa_locked) {
 734                 UNLOCK_PVH(pai);
 735                 old_pa_locked = FALSE;
 736         }
 737
 738         pai = pa_index(pa);     /* now working with new incoming phys page */
 739         if (IS_MANAGED_PAGE(pai)) {
 740
 741                 /*
 742                  *      Step 2) Enter the mapping in the PV list for this
 743                  *      physical page.
 744                  */
 745                 pv_h = pai_to_pvh(pai);
 746
 747                 LOCK_PVH(pai);
 748
 749                 if (pv_h->pmap == PMAP_NULL) {
 750                         /*
 751                          *      No mappings yet, use rooted pv
 752                          */
 753                         pv_h->va = vaddr;
 754                         pv_h->pmap = pmap;
 755                         queue_init(&pv_h->qlink);
 756
 757                         if (options & PMAP_OPTIONS_INTERNAL) {
 758                                 pmap_phys_attributes[pai] |= PHYS_INTERNAL;
 759                         } else {
 760                                 pmap_phys_attributes[pai] &= ~PHYS_INTERNAL;
 761                         }
 762                         if (options & PMAP_OPTIONS_REUSABLE) {
 763                                 pmap_phys_attributes[pai] |= PHYS_REUSABLE;
 764                         } else {
 765                                 pmap_phys_attributes[pai] &= ~PHYS_REUSABLE;
 766                         }
 767                 } else {
 768                         /*
 769                          *      Add new pv_hashed_entry after header.
 770                          */
 771                         if ((PV_HASHED_ENTRY_NULL == pvh_e) && pvh_new) {
 772                                 pvh_e = pvh_new;
 773                                 pvh_new = PV_HASHED_ENTRY_NULL;
 774                         } else if (PV_HASHED_ENTRY_NULL == pvh_e) {
 775                                 PV_HASHED_ALLOC(&pvh_e);
 776                                 if (PV_HASHED_ENTRY_NULL == pvh_e) {
 777                                         /*
 778                                          * the pv list is empty. if we are on
 779                                          * the kernel pmap we'll use one of
 780                                          * the special private kernel pv_e's,
 781                                          * else, we need to unlock
 782                                          * everything, zalloc a pv_e, and
 783                                          * restart bringing in the pv_e with
 784                                          * us.
 785                                          */
 786                                         if (kernel_pmap == pmap) {
 787                                                 PV_HASHED_KERN_ALLOC(&pvh_e);
 788                                         } else {
 789                                                 UNLOCK_PVH(pai);
 790                                                 PMAP_UNLOCK(pmap);
 791                                                 pmap_pv_throttle(pmap);
 792                                                 pvh_new = (pv_hashed_entry_t) zalloc(pv_hashed_list_zone);
 793                                                 goto Retry;
 794                                         }
 795                                 }
 796                         }
 797
 798                         if (PV_HASHED_ENTRY_NULL == pvh_e)
 799                                 panic("Mapping alias chain exhaustion, possibly induced by numerous kernel virtual double mappings");
 800
 801                         pvh_e->va = vaddr;
 802                         pvh_e->pmap = pmap;
 803                         pvh_e->ppn = pn;
 804                         pv_hash_add(pvh_e, pv_h);
 805
 806                         /*
 807                          *      Remember that we used the pvlist entry.
 808                          */
 809                         pvh_e = PV_HASHED_ENTRY_NULL;
 810                 }
 811
 812                 /*
 813                  * only count the mapping
 814                  * for 'managed memory'
 815                  */
 816                 pmap_ledger_credit(pmap, task_ledgers.phys_mem, PAGE_SIZE);
 817                 pmap_ledger_credit(pmap, task_ledgers.phys_footprint, PAGE_SIZE);
 818                 OSAddAtomic(+1,  &pmap->stats.resident_count);
 819                 if (pmap->stats.resident_count > pmap->stats.resident_max) {
 820                         pmap->stats.resident_max = pmap->stats.resident_count;
 821                 }
 822                 if (pmap != kernel_pmap) {
 823                         if (IS_REUSABLE_PAGE(pai)) {
 824                                 OSAddAtomic(+1, &pmap->stats.reusable);
 825                                 PMAP_STATS_PEAK(pmap->stats.reusable);
 826                         } else if (IS_INTERNAL_PAGE(pai)) {
 827                                 OSAddAtomic(+1, &pmap->stats.internal);
 828                                 PMAP_STATS_PEAK(pmap->stats.internal);
 829                         } else {
 830                                 OSAddAtomic(+1, &pmap->stats.external);
 831                                 PMAP_STATS_PEAK(pmap->stats.external);
 832                         }
 833                 }
 834         } else if (last_managed_page == 0) {
 835                 /* Account for early mappings created before "managed pages"
 836                  * are determined. Consider consulting the available DRAM map.
 837                  */
 838                 pmap_ledger_credit(pmap, task_ledgers.phys_mem, PAGE_SIZE);
 839                 pmap_ledger_credit(pmap, task_ledgers.phys_footprint, PAGE_SIZE);
 840                 OSAddAtomic(+1,  &pmap->stats.resident_count);
 841                 if (pmap != kernel_pmap) {
 842 #if 00
 843                         OSAddAtomic(+1, &pmap->stats.device);
 844                         PMAP_STATS_PEAK(pmap->stats.device);
 845 #endif
 846                 }
 847         }
 848         /*
 849          * Step 3) Enter the mapping.
 850          *
 851          *      Build a template to speed up entering -
 852          *      only the pfn changes.
 853          */
 854         template = pa_to_pte(pa) | INTEL_PTE_VALID;
 855         /*
 856          * DRK: It may be worth asserting on cache attribute flags that diverge
 857          * from the existing physical page attributes.
 858          */
 859
 860         template |= pmap_get_cache_attributes(pa_index(pa));
 861
 862         if (flags & VM_MEM_NOT_CACHEABLE) {
 863                 if (!(flags & VM_MEM_GUARDED))
 864                         template |= INTEL_PTE_PTA;
 865                 template |= INTEL_PTE_NCACHE;
 866         }
 867         if (pmap != kernel_pmap)
 868                 template |= INTEL_PTE_USER;
 869         if (prot & VM_PROT_WRITE)
 870                 template |= INTEL_PTE_WRITE;
 871         if (set_NX)
 872                 template |= INTEL_PTE_NX;
 873         if (wired) {
 874                 template |= INTEL_PTE_WIRED;
 875                 OSAddAtomic(+1,  & pmap->stats.wired_count);
 876                 pmap_ledger_credit(pmap, task_ledgers.wired_mem, PAGE_SIZE);
 877         }
 878         if (superpage)
 879                 template |= INTEL_PTE_PS;
 880         pmap_store_pte(pte, template);
 881
 882         /*
 883          * if this was a managed page we delayed unlocking the pv until here
 884          * to prevent pmap_page_protect et al from finding it until the pte
 885          * has been stored
 886          */
 887         if (IS_MANAGED_PAGE(pai)) {
 888                 UNLOCK_PVH(pai);
 889         }
 890 Done:
 891         if (need_tlbflush == TRUE) {
 892                 if (options & PMAP_OPTIONS_NOFLUSH)
 893                         PMAP_UPDATE_TLBS_DELAYED(pmap, vaddr, vaddr + PAGE_SIZE, (pmap_flush_context *)arg);
 894                 else
 895                         PMAP_UPDATE_TLBS(pmap, vaddr, vaddr + PAGE_SIZE);
 896         }
 897         if (pvh_e != PV_HASHED_ENTRY_NULL) {
 898                 PV_HASHED_FREE_LIST(pvh_e, pvh_e, 1);
 899         }
 900         if (pvh_new != PV_HASHED_ENTRY_NULL) {
 901                 PV_HASHED_KERN_FREE_LIST(pvh_new, pvh_new, 1);
 902         }
 903         PMAP_UNLOCK(pmap);
 904
 905         if (delpage_pm_obj) {
 906                 vm_page_t m;
 907
 908                 vm_object_lock(delpage_pm_obj);
 909                 m = vm_page_lookup(delpage_pm_obj, delpage_pde_index);
 910                 if (m == VM_PAGE_NULL)
 911                     panic("pmap_enter: pte page not in object");
 912                 vm_object_unlock(delpage_pm_obj);
 913                 VM_PAGE_FREE(m);
 914                 OSAddAtomic(-1,  &inuse_ptepages_count);
 915                 PMAP_ZINFO_PFREE(pmap, PAGE_SIZE);
 916         }
 917
 918         PMAP_TRACE(PMAP_CODE(PMAP__ENTER) | DBG_FUNC_END, 0, 0, 0, 0, 0);
 919         return KERN_SUCCESS;
 920 }
 921
 922 /*
 923  *      Remove a range of hardware page-table entries.
 924  *      The entries given are the first (inclusive)
 925  *      and last (exclusive) entries for the VM pages.
 926  *      The virtual address is the va for the first pte.
 927  *
 928  *      The pmap must be locked.
 929  *      If the pmap is not the kernel pmap, the range must lie
 930  *      entirely within one pte-page.  This is NOT checked.
 931  *      Assumes that the pte-page exists.
 932  */
 933
 934 void
 935 pmap_remove_range(
 936         pmap_t                  pmap,
 937         vm_map_offset_t         start_vaddr,
 938         pt_entry_t              *spte,
 939         pt_entry_t              *epte)
 940 {
 941         pmap_remove_range_options(pmap, start_vaddr, spte, epte, 0);
 942 }
 943
 944 void
 945 pmap_remove_range_options(
 946         pmap_t                  pmap,
 947         vm_map_offset_t         start_vaddr,
 948         pt_entry_t              *spte,
 949         pt_entry_t              *epte,
 950         int                     options)
 951 {
 952         pt_entry_t              *cpte;
 953         pv_hashed_entry_t       pvh_et = PV_HASHED_ENTRY_NULL;
 954         pv_hashed_entry_t       pvh_eh = PV_HASHED_ENTRY_NULL;
 955         pv_hashed_entry_t       pvh_e;
 956         int                     pvh_cnt = 0;
 957         int                     num_removed, num_unwired, num_found, num_invalid;
 958         int                     num_device, num_external, num_internal, num_reusable;
 959         uint64_t                num_compressed;
 960         ppnum_t                 pai;
 961         pmap_paddr_t            pa;
 962         vm_map_offset_t         vaddr;
 963
 964         num_removed = 0;
 965         num_unwired = 0;
 966         num_found   = 0;
 967         num_invalid = 0;
 968         num_device  = 0;
 969         num_external = 0;
 970         num_internal = 0;
 971         num_reusable = 0;
 972         num_compressed = 0;
 973         /* invalidate the PTEs first to "freeze" them */
 974         for (cpte = spte, vaddr = start_vaddr;
 975              cpte < epte;
 976              cpte++, vaddr += PAGE_SIZE_64) {
 977                 pt_entry_t p = *cpte;
 978
 979                 pa = pte_to_pa(p);
 980                 if (pa == 0) {
 981                         if (pmap != kernel_pmap &&
 982                             (options & PMAP_OPTIONS_REMOVE) &&
 983                             (p & INTEL_PTE_COMPRESSED)) {
 984                                 /* one less "compressed" */
 985                                 num_compressed++;
 986                                 /* clear marker */
 987                                 /* XXX probably does not need to be atomic! */
 988                                 pmap_update_pte(cpte, INTEL_PTE_COMPRESSED, 0);
 989                         }
 990                         continue;
 991                 }
 992                 num_found++;
 993
 994                 if (iswired(p))
 995                         num_unwired++;
 996
 997                 pai = pa_index(pa);
 998
 999                 if (!IS_MANAGED_PAGE(pai)) {
1000                         /*
1001                          *      Outside range of managed physical memory.
1002                          *      Just remove the mappings.
1003                          */
1004                         pmap_store_pte(cpte, 0);
1005                         num_device++;
1006                         continue;
1007                 }
1008
1009                 if ((p & INTEL_PTE_VALID) == 0)
1010                         num_invalid++;
1011
1012                 /* invalidate the PTE */
1013                 pmap_update_pte(cpte, INTEL_PTE_VALID, 0);
1014         }
1015
1016         if (num_found == 0) {
1017                 /* nothing was changed: we're done */
1018                 goto update_counts;
1019         }
1020
1021         /* propagate the invalidates to other CPUs */
1022
1023         PMAP_UPDATE_TLBS(pmap, start_vaddr, vaddr);
1024
1025         for (cpte = spte, vaddr = start_vaddr;
1026              cpte < epte;
1027              cpte++, vaddr += PAGE_SIZE_64) {
1028
1029                 pa = pte_to_pa(*cpte);
1030                 if (pa == 0)
1031                         continue;
1032
1033                 pai = pa_index(pa);
1034
1035                 LOCK_PVH(pai);
1036
1037                 pa = pte_to_pa(*cpte);
1038                 if (pa == 0) {
1039                         UNLOCK_PVH(pai);
1040                         continue;
1041                 }
1042                 num_removed++;
1043                 if (IS_REUSABLE_PAGE(pai)) {
1044                         num_reusable++;
1045                 } else if (IS_INTERNAL_PAGE(pai)) {
1046                         num_internal++;
1047                 } else {
1048                         num_external++;
1049                 }
1050
1051                 /*
1052                  * Get the modify and reference bits, then
1053                  * nuke the entry in the page table
1054                  */
1055                 /* remember reference and change */
1056                 pmap_phys_attributes[pai] |=
1057                         (char) (*cpte & (PHYS_MODIFIED | PHYS_REFERENCED));
1058
1059                 /*
1060                  * Remove the mapping from the pvlist for this physical page.
1061                  */
1062                 pvh_e = pmap_pv_remove(pmap, vaddr, (ppnum_t *) &pai, cpte);
1063
1064                 /* completely invalidate the PTE */
1065                 pmap_store_pte(cpte, 0);
1066
1067                 UNLOCK_PVH(pai);
1068
1069                 if (pvh_e != PV_HASHED_ENTRY_NULL) {
1070                         pvh_e->qlink.next = (queue_entry_t) pvh_eh;
1071                         pvh_eh = pvh_e;
1072
1073                         if (pvh_et == PV_HASHED_ENTRY_NULL) {
1074                                 pvh_et = pvh_e;
1075                         }
1076                         pvh_cnt++;
1077                 }
1078         } /* for loop */
1079
1080         if (pvh_eh != PV_HASHED_ENTRY_NULL) {
1081                 PV_HASHED_FREE_LIST(pvh_eh, pvh_et, pvh_cnt);
1082         }
1083 update_counts:
1084         /*
1085          *      Update the counts
1086          */
1087 #if TESTING
1088         if (pmap->stats.resident_count < num_removed)
1089                 panic("pmap_remove_range: resident_count");
1090 #endif
1091         pmap_ledger_debit(pmap, task_ledgers.phys_mem, machine_ptob(num_removed));
1092         pmap_ledger_debit(pmap, task_ledgers.phys_footprint, machine_ptob(num_removed));
1093         assert(pmap->stats.resident_count >= num_removed);
1094         OSAddAtomic(-num_removed,  &pmap->stats.resident_count);
1095
1096         if (pmap != kernel_pmap) {
1097 #if 00
1098                 assert(pmap->stats.device >= num_device);
1099                 if (num_device)
1100                         OSAddAtomic(-num_device, &pmap->stats.device);
1101 #endif /* 00 */
1102                 assert(pmap->stats.external >= num_external);
1103                 if (num_external)
1104                         OSAddAtomic(-num_external, &pmap->stats.external);
1105                 assert(pmap->stats.internal >= num_internal);
1106                 if (num_internal)
1107                         OSAddAtomic(-num_internal, &pmap->stats.internal);
1108                 assert(pmap->stats.reusable >= num_reusable);
1109                 if (num_reusable)
1110                         OSAddAtomic(-num_reusable, &pmap->stats.reusable);
1111                 assert(pmap->stats.compressed >= num_compressed);
1112                 if (num_compressed)
1113                         OSAddAtomic64(-num_compressed, &pmap->stats.compressed);
1114         }
1115
1116 #if TESTING
1117         if (pmap->stats.wired_count < num_unwired)
1118                 panic("pmap_remove_range: wired_count");
1119 #endif
1120         assert(pmap->stats.wired_count >= num_unwired);
1121         OSAddAtomic(-num_unwired,  &pmap->stats.wired_count);
1122         pmap_ledger_debit(pmap, task_ledgers.wired_mem, machine_ptob(num_unwired));
1123
1124         return;
1125 }
1126
1127
1128 /*
1129  *      Remove the given range of addresses
1130  *      from the specified map.
1131  *
1132  *      It is assumed that the start and end are properly
1133  *      rounded to the hardware page size.
1134  */
1135 void
1136 pmap_remove(
1137         pmap_t          map,
1138         addr64_t        s64,
1139         addr64_t        e64)
1140 {
1141         pmap_remove_options(map, s64, e64, 0);
1142 }
1143
1144 void
1145 pmap_remove_options(
1146         pmap_t          map,
1147         addr64_t        s64,
1148         addr64_t        e64,
1149         int             options)
1150 {
1151         pt_entry_t     *pde;
1152         pt_entry_t     *spte, *epte;
1153         addr64_t        l64;
1154         uint64_t        deadline;
1155
1156         pmap_intr_assert();
1157
1158         if (map == PMAP_NULL || s64 == e64)
1159                 return;
1160
1161         PMAP_TRACE(PMAP_CODE(PMAP__REMOVE) | DBG_FUNC_START,
1162                    map,
1163                    (uint32_t) (s64 >> 32), s64,
1164                    (uint32_t) (e64 >> 32), e64);
1165
1166
1167         PMAP_LOCK(map);
1168
1169 #if 0
1170         /*
1171          * Check that address range in the kernel does not overlap the stacks.
1172          * We initialize local static min/max variables once to avoid making
1173          * 2 function calls for every remove. Note also that these functions
1174          * both return 0 before kernel stacks have been initialized, and hence
1175          * the panic is not triggered in this case.
1176          */
1177         if (map == kernel_pmap) {
1178                 static vm_offset_t kernel_stack_min = 0;
1179                 static vm_offset_t kernel_stack_max = 0;
1180
1181                 if (kernel_stack_min == 0) {
1182                         kernel_stack_min = min_valid_stack_address();
1183                         kernel_stack_max = max_valid_stack_address();
1184                 }
1185                 if ((kernel_stack_min <= s64 && s64 < kernel_stack_max) ||
1186                     (kernel_stack_min < e64 && e64 <= kernel_stack_max))
1187                         panic("pmap_remove() attempted in kernel stack");
1188         }
1189 #else
1190
1191         /*
1192          * The values of kernel_stack_min and kernel_stack_max are no longer
1193          * relevant now that we allocate kernel stacks in the kernel map,
1194          * so the old code above no longer applies.  If we wanted to check that
1195          * we weren't removing a mapping of a page in a kernel stack we'd
1196          * mark the PTE with an unused bit and check that here.
1197          */
1198
1199 #endif
1200
1201         deadline = rdtsc64() + max_preemption_latency_tsc;
1202
1203         while (s64 < e64) {
1204                 l64 = (s64 + pde_mapped_size) & ~(pde_mapped_size - 1);
1205                 if (l64 > e64)
1206                         l64 = e64;
1207                 pde = pmap_pde(map, s64);
1208
1209                 if (pde && (*pde & INTEL_PTE_VALID)) {
1210                         if (*pde & INTEL_PTE_PS) {
1211                                 /*
1212                                  * If we're removing a superpage, pmap_remove_range()
1213                                  * must work on level 2 instead of level 1; and we're
1214                                  * only passing a single level 2 entry instead of a
1215                                  * level 1 range.
1216                                  */
1217                                 spte = pde;
1218                                 epte = spte+1; /* excluded */
1219                         } else {
1220                                 spte = pmap_pte(map, (s64 & ~(pde_mapped_size - 1)));
1221                                 spte = &spte[ptenum(s64)];
1222                                 epte = &spte[intel_btop(l64 - s64)];
1223                         }
1224                         pmap_remove_range_options(map, s64, spte, epte,
1225                                                   options);
1226                 }
1227                 s64 = l64;
1228
1229                 if (s64 < e64 && rdtsc64() >= deadline) {
1230                         PMAP_UNLOCK(map)
1231                         PMAP_LOCK(map)
1232                         deadline = rdtsc64() + max_preemption_latency_tsc;
1233                 }
1234         }
1235
1236         PMAP_UNLOCK(map);
1237
1238         PMAP_TRACE(PMAP_CODE(PMAP__REMOVE) | DBG_FUNC_END,
1239                    map, 0, 0, 0, 0);
1240
1241 }
1242
1243 void
1244 pmap_page_protect(
1245         ppnum_t         pn,
1246         vm_prot_t       prot)
1247 {
1248         pmap_page_protect_options(pn, prot, 0, NULL);
1249 }
1250
1251 /*
1252  *      Routine:        pmap_page_protect_options
1253  *
1254  *      Function:
1255  *              Lower the permission for all mappings to a given
1256  *              page.
1257  */
1258 void
1259 pmap_page_protect_options(
1260         ppnum_t         pn,
1261         vm_prot_t       prot,
1262         unsigned int    options,
1263         void            *arg)
1264 {
1265         pv_hashed_entry_t       pvh_eh = PV_HASHED_ENTRY_NULL;
1266         pv_hashed_entry_t       pvh_et = PV_HASHED_ENTRY_NULL;
1267         pv_hashed_entry_t       nexth;
1268         int                     pvh_cnt = 0;
1269         pv_rooted_entry_t       pv_h;
1270         pv_rooted_entry_t       pv_e;
1271         pv_hashed_entry_t       pvh_e;
1272         pt_entry_t              *pte;
1273         int                     pai;
1274         pmap_t                  pmap;
1275         boolean_t               remove;
1276         pt_entry_t              new_pte_value;
1277
1278         pmap_intr_assert();
1279         assert(pn != vm_page_fictitious_addr);
1280         if (pn == vm_page_guard_addr)
1281                 return;
1282
1283         pai = ppn_to_pai(pn);
1284
1285         if (!IS_MANAGED_PAGE(pai)) {
1286                 /*
1287                  *      Not a managed page.
1288                  */
1289                 return;
1290         }
1291         PMAP_TRACE(PMAP_CODE(PMAP__PAGE_PROTECT) | DBG_FUNC_START,
1292                    pn, prot, 0, 0, 0);
1293
1294         /*
1295          * Determine the new protection.
1296          */
1297         switch (prot) {
1298         case VM_PROT_READ:
1299         case VM_PROT_READ | VM_PROT_EXECUTE:
1300                 remove = FALSE;
1301                 break;
1302         case VM_PROT_ALL:
1303                 return;         /* nothing to do */
1304         default:
1305                 remove = TRUE;
1306                 break;
1307         }
1308
1309         pv_h = pai_to_pvh(pai);
1310
1311         LOCK_PVH(pai);
1312
1313
1314         /*
1315          * Walk down PV list, if any, changing or removing all mappings.
1316          */
1317         if (pv_h->pmap == PMAP_NULL)
1318                 goto done;
1319
1320         pv_e = pv_h;
1321         pvh_e = (pv_hashed_entry_t) pv_e;       /* cheat */
1322
1323         do {
1324                 vm_map_offset_t vaddr;
1325
1326                 pmap = pv_e->pmap;
1327                 vaddr = pv_e->va;
1328                 pte = pmap_pte(pmap, vaddr);
1329
1330                 pmap_assert2((pa_index(pte_to_pa(*pte)) == pn),
1331                     "pmap_page_protect: PTE mismatch, pn: 0x%x, pmap: %p, vaddr: 0x%llx, pte: 0x%llx", pn, pmap, vaddr, *pte);
1332
1333                 if (0 == pte) {
1334                         panic("pmap_page_protect() "
1335                                 "pmap=%p pn=0x%x vaddr=0x%llx\n",
1336                                 pmap, pn, vaddr);
1337                 }
1338                 nexth = (pv_hashed_entry_t) queue_next(&pvh_e->qlink);
1339
1340                 /*
1341                  * Remove the mapping if new protection is NONE
1342                  */
1343                 if (remove) {
1344
1345                         /* Remove per-pmap wired count */
1346                         if (iswired(*pte)) {
1347                                 OSAddAtomic(-1, &pmap->stats.wired_count);
1348                                 pmap_ledger_debit(pmap, task_ledgers.wired_mem, PAGE_SIZE);
1349                         }
1350
1351                         if (pmap != kernel_pmap &&
1352                             (options & PMAP_OPTIONS_COMPRESSOR) &&
1353                             IS_INTERNAL_PAGE(pai)) {
1354                                 /* adjust "reclaimed" stats */
1355                                 OSAddAtomic64(+1, &pmap->stats.compressed);
1356                                 PMAP_STATS_PEAK(pmap->stats.compressed);
1357                                 pmap->stats.compressed_lifetime++;
1358                                 /* mark this PTE as having been "reclaimed" */
1359                                 new_pte_value = INTEL_PTE_COMPRESSED;
1360                         } else {
1361                                 new_pte_value = 0;
1362                         }
1363
1364                         if (options & PMAP_OPTIONS_NOREFMOD) {
1365                                 pmap_store_pte(pte, new_pte_value);
1366
1367                                 if (options & PMAP_OPTIONS_NOFLUSH)
1368                                         PMAP_UPDATE_TLBS_DELAYED(pmap, vaddr, vaddr + PAGE_SIZE, (pmap_flush_context *)arg);
1369                                 else
1370                                         PMAP_UPDATE_TLBS(pmap, vaddr, vaddr + PAGE_SIZE);
1371                         } else {
1372                                 /*
1373                                  * Remove the mapping, collecting dirty bits.
1374                                  */
1375                                 pmap_update_pte(pte, INTEL_PTE_VALID, 0);
1376
1377                                 PMAP_UPDATE_TLBS(pmap, vaddr, vaddr+PAGE_SIZE);
1378                                 pmap_phys_attributes[pai] |=
1379                                         *pte & (PHYS_MODIFIED|PHYS_REFERENCED);
1380                                 pmap_store_pte(pte, new_pte_value);
1381                         }
1382 #if TESTING
1383                         if (pmap->stats.resident_count < 1)
1384                                 panic("pmap_page_protect: resident_count");
1385 #endif
1386                         pmap_ledger_debit(pmap, task_ledgers.phys_mem, PAGE_SIZE);
1387                         assert(pmap->stats.resident_count >= 1);
1388                         OSAddAtomic(-1,  &pmap->stats.resident_count);
1389                         if (options & PMAP_OPTIONS_COMPRESSOR) {
1390                                 /*
1391                                  * This removal is only being done so we can send this page to
1392                                  * the compressor; therefore it mustn't affect total task footprint.
1393                                  */
1394                                 pmap_ledger_credit(pmap, task_ledgers.phys_compressed, PAGE_SIZE);
1395                         } else {
1396                                 pmap_ledger_debit(pmap, task_ledgers.phys_footprint, PAGE_SIZE);
1397                         }
1398
1399                         if (pmap != kernel_pmap) {
1400                                 if (IS_REUSABLE_PAGE(pai)) {
1401                                         assert(pmap->stats.reusable > 0);
1402                                         OSAddAtomic(-1, &pmap->stats.reusable);
1403                                 } else if (IS_INTERNAL_PAGE(pai)) {
1404                                         assert(pmap->stats.internal > 0);
1405                                         OSAddAtomic(-1, &pmap->stats.internal);
1406                                 } else {
1407                                         assert(pmap->stats.external > 0);
1408                                         OSAddAtomic(-1, &pmap->stats.external);
1409                                 }
1410                         }
1411
1412                         /*
1413                          * Deal with the pv_rooted_entry.
1414                          */
1415
1416                         if (pv_e == pv_h) {
1417                                 /*
1418                                  * Fix up head later.
1419                                  */
1420                                 pv_h->pmap = PMAP_NULL;
1421                         } else {
1422                                 /*
1423                                  * Delete this entry.
1424                                  */
1425                                 pv_hash_remove(pvh_e);
1426                                 pvh_e->qlink.next = (queue_entry_t) pvh_eh;
1427                                 pvh_eh = pvh_e;
1428
1429                                 if (pvh_et == PV_HASHED_ENTRY_NULL)
1430                                         pvh_et = pvh_e;
1431                                 pvh_cnt++;
1432                         }
1433                 } else {
1434                         /*
1435                          * Write-protect, after opportunistic refmod collect
1436                          */
1437                         pmap_phys_attributes[pai] |=
1438                             *pte & (PHYS_MODIFIED|PHYS_REFERENCED);
1439                         pmap_update_pte(pte, INTEL_PTE_WRITE, 0);
1440
1441                         if (options & PMAP_OPTIONS_NOFLUSH)
1442                                 PMAP_UPDATE_TLBS_DELAYED(pmap, vaddr, vaddr + PAGE_SIZE, (pmap_flush_context *)arg);
1443                         else
1444                                 PMAP_UPDATE_TLBS(pmap, vaddr, vaddr+PAGE_SIZE);
1445                 }
1446                 pvh_e = nexth;
1447         } while ((pv_e = (pv_rooted_entry_t) nexth) != pv_h);
1448
1449
1450         /*
1451          * If pv_head mapping was removed, fix it up.
1452          */
1453         if (pv_h->pmap == PMAP_NULL) {
1454                 pvh_e = (pv_hashed_entry_t) queue_next(&pv_h->qlink);
1455
1456                 if (pvh_e != (pv_hashed_entry_t) pv_h) {
1457                         pv_hash_remove(pvh_e);
1458                         pv_h->pmap = pvh_e->pmap;
1459                         pv_h->va = pvh_e->va;
1460                         pvh_e->qlink.next = (queue_entry_t) pvh_eh;
1461                         pvh_eh = pvh_e;
1462
1463                         if (pvh_et == PV_HASHED_ENTRY_NULL)
1464                                 pvh_et = pvh_e;
1465                         pvh_cnt++;
1466                 }
1467         }
1468         if (pvh_eh != PV_HASHED_ENTRY_NULL) {
1469                 PV_HASHED_FREE_LIST(pvh_eh, pvh_et, pvh_cnt);
1470         }
1471 done:
1472         UNLOCK_PVH(pai);
1473
1474         PMAP_TRACE(PMAP_CODE(PMAP__PAGE_PROTECT) | DBG_FUNC_END,
1475                    0, 0, 0, 0, 0);
1476 }
1477
1478
1479 /*
1480  *      Clear specified attribute bits.
1481  */
1482 void
1483 phys_attribute_clear(
1484         ppnum_t         pn,
1485         int             bits,
1486         unsigned int    options,
1487         void            *arg)
1488 {
1489         pv_rooted_entry_t       pv_h;
1490         pv_hashed_entry_t       pv_e;
1491         pt_entry_t              *pte;
1492         int                     pai;
1493         pmap_t                  pmap;
1494         char                    attributes = 0;
1495
1496         pmap_intr_assert();
1497         assert(pn != vm_page_fictitious_addr);
1498         if (pn == vm_page_guard_addr)
1499                 return;
1500
1501         pai = ppn_to_pai(pn);
1502
1503         if (!IS_MANAGED_PAGE(pai)) {
1504                 /*
1505                  *      Not a managed page.
1506                  */
1507                 return;
1508         }
1509
1510         PMAP_TRACE(PMAP_CODE(PMAP__ATTRIBUTE_CLEAR) | DBG_FUNC_START,
1511                    pn, bits, 0, 0, 0);
1512
1513         pv_h = pai_to_pvh(pai);
1514
1515         LOCK_PVH(pai);
1516
1517         /*
1518          * Walk down PV list, clearing all modify or reference bits.
1519          * We do not have to lock the pv_list because we have
1520          * the per-pmap lock
1521          */
1522         if (pv_h->pmap != PMAP_NULL) {
1523                 /*
1524                  * There are some mappings.
1525                  */
1526
1527                 pv_e = (pv_hashed_entry_t)pv_h;
1528
1529                 do {
1530                         vm_map_offset_t va;
1531
1532                         pmap = pv_e->pmap;
1533                         va = pv_e->va;
1534
1535                          /*
1536                           * Clear modify and/or reference bits.
1537                           */
1538                         pte = pmap_pte(pmap, va);
1539                         attributes |= *pte & (PHYS_MODIFIED|PHYS_REFERENCED);
1540                         pmap_update_pte(pte, bits, 0);
1541                         /* Ensure all processors using this translation
1542                          * invalidate this TLB entry. The invalidation *must*
1543                          * follow the PTE update, to ensure that the TLB
1544                          * shadow of the 'D' bit (in particular) is
1545                          * synchronized with the updated PTE.
1546                          */
1547                         if (options & PMAP_OPTIONS_NOFLUSH) {
1548                                 if (arg)
1549                                         PMAP_UPDATE_TLBS_DELAYED(pmap, va, va + PAGE_SIZE, (pmap_flush_context *)arg);
1550                         } else
1551                                 PMAP_UPDATE_TLBS(pmap, va, va + PAGE_SIZE);
1552
1553                         pv_e = (pv_hashed_entry_t)queue_next(&pv_e->qlink);
1554
1555                 } while (pv_e != (pv_hashed_entry_t)pv_h);
1556         }
1557         /* Opportunistic refmod collection, annulled
1558          * if both REF and MOD are being cleared.
1559          */
1560
1561         pmap_phys_attributes[pai] |= attributes;
1562         pmap_phys_attributes[pai] &= (~bits);
1563
1564         UNLOCK_PVH(pai);
1565
1566         PMAP_TRACE(PMAP_CODE(PMAP__ATTRIBUTE_CLEAR) | DBG_FUNC_END,
1567                    0, 0, 0, 0, 0);
1568 }
1569
1570 /*
1571  *      Check specified attribute bits.
1572  */
1573 int
1574 phys_attribute_test(
1575         ppnum_t         pn,
1576         int             bits)
1577 {
1578         pv_rooted_entry_t       pv_h;
1579         pv_hashed_entry_t       pv_e;
1580         pt_entry_t              *pte;
1581         int                     pai;
1582         pmap_t                  pmap;
1583         int                     attributes = 0;
1584
1585         pmap_intr_assert();
1586         assert(pn != vm_page_fictitious_addr);
1587         if (pn == vm_page_guard_addr)
1588                 return 0;
1589
1590         pai = ppn_to_pai(pn);
1591
1592         if (!IS_MANAGED_PAGE(pai)) {
1593                 /*
1594                  *      Not a managed page.
1595                  */
1596                 return 0;
1597         }
1598
1599         /*
1600          * Fast check...  if bits already collected
1601          * no need to take any locks...
1602          * if not set, we need to recheck after taking
1603          * the lock in case they got pulled in while
1604          * we were waiting for the lock
1605          */
1606         if ((pmap_phys_attributes[pai] & bits) == bits)
1607                 return bits;
1608
1609         pv_h = pai_to_pvh(pai);
1610
1611         LOCK_PVH(pai);
1612
1613         attributes = pmap_phys_attributes[pai] & bits;
1614
1615
1616         /*
1617          * Walk down PV list, checking the mappings until we
1618          * reach the end or we've found the desired attributes.
1619          */
1620         if (attributes != bits &&
1621             pv_h->pmap != PMAP_NULL) {
1622                 /*
1623                  * There are some mappings.
1624                  */
1625                 pv_e = (pv_hashed_entry_t)pv_h;
1626                 do {
1627                         vm_map_offset_t va;
1628
1629                         pmap = pv_e->pmap;
1630                         va = pv_e->va;
1631                         /*
1632                          * pick up modify and/or reference bits from mapping
1633                          */
1634
1635                         pte = pmap_pte(pmap, va);
1636                         attributes |= (int)(*pte & bits);
1637
1638                         pv_e = (pv_hashed_entry_t)queue_next(&pv_e->qlink);
1639
1640                 } while ((attributes != bits) &&
1641                          (pv_e != (pv_hashed_entry_t)pv_h));
1642         }
1643         pmap_phys_attributes[pai] |= attributes;
1644
1645         UNLOCK_PVH(pai);
1646         return (attributes);
1647 }
1648
1649 /*
1650  *      Routine:        pmap_change_wiring
1651  *      Function:       Change the wiring attribute for a map/virtual-address
1652  *                      pair.
1653  *      In/out conditions:
1654  *                      The mapping must already exist in the pmap.
1655  */
1656 void
1657 pmap_change_wiring(
1658         pmap_t          map,
1659         vm_map_offset_t vaddr,
1660         boolean_t       wired)
1661 {
1662         pt_entry_t      *pte;
1663
1664         PMAP_LOCK(map);
1665
1666         if ((pte = pmap_pte(map, vaddr)) == PT_ENTRY_NULL)
1667                 panic("pmap_change_wiring: pte missing");
1668
1669         if (wired && !iswired(*pte)) {
1670                 /*
1671                  * wiring down mapping
1672                  */
1673                 pmap_ledger_credit(map, task_ledgers.wired_mem, PAGE_SIZE);
1674                 OSAddAtomic(+1,  &map->stats.wired_count);
1675                 pmap_update_pte(pte, 0, INTEL_PTE_WIRED);
1676         }
1677         else if (!wired && iswired(*pte)) {
1678                 /*
1679                  * unwiring mapping
1680                  */
1681                 assert(map->stats.wired_count >= 1);
1682                 OSAddAtomic(-1,  &map->stats.wired_count);
1683                 pmap_ledger_debit(map, task_ledgers.wired_mem, PAGE_SIZE);
1684                 pmap_update_pte(pte, INTEL_PTE_WIRED, 0);
1685         }
1686
1687         PMAP_UNLOCK(map);
1688 }
1689
1690 /*
1691  *      "Backdoor" direct map routine for early mappings.
1692  *      Useful for mapping memory outside the range
1693  *      Sets A, D and NC if requested
1694  */
1695
1696 vm_offset_t
1697 pmap_map_bd(
1698         vm_offset_t     virt,
1699         vm_map_offset_t start_addr,
1700         vm_map_offset_t end_addr,
1701         vm_prot_t       prot,
1702         unsigned int    flags)
1703 {
1704         pt_entry_t      template;
1705         pt_entry_t      *pte;
1706         spl_t           spl;
1707         vm_offset_t     base = virt;
1708         template = pa_to_pte(start_addr)
1709                 | INTEL_PTE_REF
1710                 | INTEL_PTE_MOD
1711                 | INTEL_PTE_WIRED
1712                 | INTEL_PTE_VALID;
1713
1714         if ((flags & (VM_MEM_NOT_CACHEABLE | VM_WIMG_USE_DEFAULT)) == VM_MEM_NOT_CACHEABLE) {
1715                 template |= INTEL_PTE_NCACHE;
1716                 if (!(flags & (VM_MEM_GUARDED)))
1717                         template |= INTEL_PTE_PTA;
1718         }
1719
1720 #if    defined(__x86_64__)
1721         if ((prot & VM_PROT_EXECUTE) == 0)
1722                 template |= INTEL_PTE_NX;
1723 #endif
1724
1725         if (prot & VM_PROT_WRITE)
1726                 template |= INTEL_PTE_WRITE;
1727
1728         while (start_addr < end_addr) {
1729                 spl = splhigh();
1730                 pte = pmap_pte(kernel_pmap, (vm_map_offset_t)virt);
1731                 if (pte == PT_ENTRY_NULL) {
1732                         panic("pmap_map_bd: Invalid kernel address\n");
1733                 }
1734                 pmap_store_pte(pte, template);
1735                 splx(spl);
1736                 pte_increment_pa(template);
1737                 virt += PAGE_SIZE;
1738                 start_addr += PAGE_SIZE;
1739         }
1740         flush_tlb_raw();
1741         PMAP_UPDATE_TLBS(kernel_pmap, base, base + end_addr - start_addr);
1742         return(virt);
1743 }
1744
1745 void
1746 pmap_reusable(
1747         pmap_t          pmap,
1748         addr64_t        s64,
1749         addr64_t        e64,
1750         boolean_t       reusable)
1751 {
1752         pt_entry_t     *pde;
1753         pt_entry_t     *spte, *epte;
1754         addr64_t        l64;
1755         uint64_t        deadline;
1756
1757         pmap_intr_assert();
1758
1759         if (pmap == PMAP_NULL || pmap == kernel_pmap || s64 == e64)
1760                 return;
1761
1762         PMAP_TRACE(PMAP_CODE(PMAP__REUSABLE) | DBG_FUNC_START,
1763                    pmap,
1764                    (uint32_t) (s64 >> 32), s64,
1765                    (uint32_t) (e64 >> 32), e64);
1766
1767         PMAP_LOCK(pmap);
1768
1769         deadline = rdtsc64() + max_preemption_latency_tsc;
1770
1771         while (s64 < e64) {
1772                 l64 = (s64 + pde_mapped_size) & ~(pde_mapped_size - 1);
1773                 if (l64 > e64)
1774                         l64 = e64;
1775                 pde = pmap_pde(pmap, s64);
1776
1777                 if (pde && (*pde & INTEL_PTE_VALID)) {
1778                         if (*pde & INTEL_PTE_PS) {
1779                                 /* superpage: not supported */
1780                         } else {
1781                                 spte = pmap_pte(pmap,
1782                                                 (s64 & ~(pde_mapped_size - 1)));
1783                                 spte = &spte[ptenum(s64)];
1784                                 epte = &spte[intel_btop(l64 - s64)];
1785                                 pmap_reusable_range(pmap, s64, spte, epte,
1786                                                     reusable);
1787                         }
1788                 }
1789                 s64 = l64;
1790
1791                 if (s64 < e64 && rdtsc64() >= deadline) {
1792                         PMAP_UNLOCK(pmap);
1793                         PMAP_LOCK(pmap);
1794                         deadline = rdtsc64() + max_preemption_latency_tsc;
1795                 }
1796         }
1797
1798         PMAP_UNLOCK(pmap);
1799
1800         PMAP_TRACE(PMAP_CODE(PMAP__REUSABLE) | DBG_FUNC_END,
1801                    pmap, reusable, 0, 0, 0);
1802 }
1803
1804 void
1805 pmap_reusable_range(
1806         pmap_t                  pmap,
1807         vm_map_offset_t         start_vaddr,
1808         pt_entry_t              *spte,
1809         pt_entry_t              *epte,
1810         boolean_t               reusable)
1811 {
1812         pt_entry_t              *cpte;
1813         int                     num_external, num_internal, num_reusable;
1814         ppnum_t                 pai;
1815         pmap_paddr_t            pa;
1816         vm_map_offset_t         vaddr;
1817
1818         num_external = 0;
1819         num_internal = 0;
1820         num_reusable = 0;
1821
1822         for (cpte = spte, vaddr = start_vaddr;
1823              cpte < epte;
1824              cpte++, vaddr += PAGE_SIZE_64) {
1825
1826                 pa = pte_to_pa(*cpte);
1827                 if (pa == 0)
1828                         continue;
1829
1830                 pai = pa_index(pa);
1831
1832                 LOCK_PVH(pai);
1833
1834                 pa = pte_to_pa(*cpte);
1835                 if (pa == 0) {
1836                         UNLOCK_PVH(pai);
1837                         continue;
1838                 }
1839                 if (reusable) {
1840                         /* we want to set "reusable" */
1841                         if (IS_REUSABLE_PAGE(pai)) {
1842                                 /* already reusable: no change */
1843                         } else {
1844                                 pmap_phys_attributes[pai] |= PHYS_REUSABLE;
1845                                 /* one more "reusable" */
1846                                 num_reusable++;
1847                                 if (IS_INTERNAL_PAGE(pai)) {
1848                                         /* one less "internal" */
1849                                         num_internal--;
1850                                 } else {
1851                                         /* one less "external" */
1852                                         num_external--;
1853                                 }
1854                         }
1855                 } else {
1856                         /* we want to clear "reusable" */
1857                         if (IS_REUSABLE_PAGE(pai)) {
1858                                 pmap_phys_attributes[pai] &= ~PHYS_REUSABLE;
1859                                 /* one less "reusable" */
1860                                 num_reusable--;
1861                                 if (IS_INTERNAL_PAGE(pai)) {
1862                                         /* one more "internal" */
1863                                         num_internal++;
1864                                 } else {
1865                                         /* one more "external" */
1866                                         num_external++;
1867                                 }
1868                         } else {
1869                                 /* already not reusable: no change */
1870                         }
1871                 }
1872
1873                 UNLOCK_PVH(pai);
1874
1875         } /* for loop */
1876
1877         /*
1878          *      Update the counts
1879          */
1880         if (pmap != kernel_pmap) {
1881                 if (num_external) {
1882                         OSAddAtomic(num_external, &pmap->stats.external);
1883                         PMAP_STATS_PEAK(pmap->stats.external);
1884                 }
1885                 assert(pmap->stats.external >= 0);
1886                 if (num_internal) {
1887                         OSAddAtomic(num_internal, &pmap->stats.internal);
1888                         PMAP_STATS_PEAK(pmap->stats.internal);
1889                 }
1890                 assert(pmap->stats.internal >= 0);
1891                 if (num_reusable) {
1892                         OSAddAtomic(num_reusable, &pmap->stats.reusable);
1893                         PMAP_STATS_PEAK(pmap->stats.reusable);
1894                 }
1895                 assert(pmap->stats.reusable >= 0);
1896         }
1897
1898         return;
1899 }
1900
1901 unsigned int
1902 pmap_query_resident(
1903         pmap_t          pmap,
1904         addr64_t        s64,
1905         addr64_t        e64)
1906 {
1907         pt_entry_t     *pde;
1908         pt_entry_t     *spte, *epte;
1909         addr64_t        l64;
1910         uint64_t        deadline;
1911         unsigned int    result;
1912
1913         pmap_intr_assert();
1914
1915         if (pmap == PMAP_NULL || pmap == kernel_pmap || s64 == e64)
1916                 return 0;
1917
1918         PMAP_TRACE(PMAP_CODE(PMAP__QUERY_RESIDENT) | DBG_FUNC_START,
1919                    pmap,
1920                    (uint32_t) (s64 >> 32), s64,
1921                    (uint32_t) (e64 >> 32), e64);
1922
1923         result = 0;
1924
1925         PMAP_LOCK(pmap);
1926
1927         deadline = rdtsc64() + max_preemption_latency_tsc;
1928
1929         while (s64 < e64) {
1930                 l64 = (s64 + pde_mapped_size) & ~(pde_mapped_size - 1);
1931                 if (l64 > e64)
1932                         l64 = e64;
1933                 pde = pmap_pde(pmap, s64);
1934
1935                 if (pde && (*pde & INTEL_PTE_VALID)) {
1936                         if (*pde & INTEL_PTE_PS) {
1937                                 /* superpage: not supported */
1938                         } else {
1939                                 spte = pmap_pte(pmap,
1940                                                 (s64 & ~(pde_mapped_size - 1)));
1941                                 spte = &spte[ptenum(s64)];
1942                                 epte = &spte[intel_btop(l64 - s64)];
1943
1944                                 for (; spte < epte; spte++) {
1945                                         if (pte_to_pa(*spte) != 0) {
1946                                                 result++;
1947                                         }
1948                                 }
1949
1950                         }
1951                 }
1952                 s64 = l64;
1953
1954                 if (s64 < e64 && rdtsc64() >= deadline) {
1955                         PMAP_UNLOCK(pmap);
1956                         PMAP_LOCK(pmap);
1957                         deadline = rdtsc64() + max_preemption_latency_tsc;
1958                 }
1959         }
1960
1961         PMAP_UNLOCK(pmap);
1962
1963         PMAP_TRACE(PMAP_CODE(PMAP__QUERY_RESIDENT) | DBG_FUNC_END,
1964                    pmap, 0, 0, 0, 0);
1965
1966         return result;
1967 }