osfmk/i386/pmap_x86_common.c

   1 /*
   2  * Copyright (c) 2000-2012 Apple Inc. All rights reserved.
   3  *
   4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
   5  *
   6  * This file contains Original Code and/or Modifications of Original Code
   7  * as defined in and that are subject to the Apple Public Source License
   8  * Version 2.0 (the 'License'). You may not use this file except in
   9  * compliance with the License. The rights granted to you under the License
  10  * may not be used to create, or enable the creation or redistribution of,
  11  * unlawful or unlicensed copies of an Apple operating system, or to
  12  * circumvent, violate, or enable the circumvention or violation of, any
  13  * terms of an Apple operating system software license agreement.
  14  *
  15  * Please obtain a copy of the License at
  16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
  17  *
  18  * The Original Code and all software distributed under the License are
  19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
  20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
  21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
  22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
  23  * Please see the License for the specific language governing rights and
  24  * limitations under the License.
  25  *
  26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
  27  */
  28
  29 #include <mach_assert.h>
  30
  31 #include <vm/pmap.h>
  32 #include <vm/vm_map.h>
  33 #include <kern/ledger.h>
  34 #include <i386/pmap_internal.h>
  35
  36 void            pmap_remove_range(
  37                         pmap_t          pmap,
  38                         vm_map_offset_t va,
  39                         pt_entry_t      *spte,
  40                         pt_entry_t      *epte);
  41
  42 void            pmap_remove_range_options(
  43                         pmap_t          pmap,
  44                         vm_map_offset_t va,
  45                         pt_entry_t      *spte,
  46                         pt_entry_t      *epte,
  47                         int             options);
  48
  49 void            pmap_reusable_range(
  50                         pmap_t          pmap,
  51                         vm_map_offset_t va,
  52                         pt_entry_t      *spte,
  53                         pt_entry_t      *epte,
  54                         boolean_t       reusable);
  55
  56 uint32_t pmap_update_clear_pte_count;
  57
  58 /*
  59  * The Intel platform can nest at the PDE level, so NBPDE (i.e. 2MB) at a time,
  60  * on a NBPDE boundary.
  61  */
  62
  63 /* These symbols may be referenced directly by VM */
  64 uint64_t pmap_nesting_size_min = NBPDE;
  65 uint64_t pmap_nesting_size_max = 0 - (uint64_t)NBPDE;
  66
  67 /*
  68  *      kern_return_t pmap_nest(grand, subord, va_start, size)
  69  *
  70  *      grand  = the pmap that we will nest subord into
  71  *      subord = the pmap that goes into the grand
  72  *      va_start  = start of range in pmap to be inserted
  73  *      nstart  = start of range in pmap nested pmap
  74  *      size   = Size of nest area (up to 16TB)
  75  *
  76  *      Inserts a pmap into another.  This is used to implement shared segments.
  77  *
  78  *      Note that we depend upon higher level VM locks to insure that things don't change while
  79  *      we are doing this.  For example, VM should not be doing any pmap enters while it is nesting
  80  *      or do 2 nests at once.
  81  */
  82
  83 /*
  84  * This routine can nest subtrees either at the PDPT level (1GiB) or at the
  85  * PDE level (2MiB). We currently disallow disparate offsets for the "subord"
  86  * container and the "grand" parent. A minor optimization to consider for the
  87  * future: make the "subord" truly a container rather than a full-fledged
  88  * pagetable hierarchy which can be unnecessarily sparse (DRK).
  89  */
  90
  91 kern_return_t pmap_nest(pmap_t grand, pmap_t subord, addr64_t va_start, addr64_t nstart, uint64_t size) {
  92         vm_map_offset_t vaddr, nvaddr;
  93         pd_entry_t      *pde,*npde;
  94         unsigned int    i;
  95         uint64_t        num_pde;
  96
  97         if ((size & (pmap_nesting_size_min-1)) ||
  98             (va_start & (pmap_nesting_size_min-1)) ||
  99             (nstart & (pmap_nesting_size_min-1)) ||
 100             ((size >> 28) > 65536))     /* Max size we can nest is 16TB */
 101                 return KERN_INVALID_VALUE;
 102
 103         if(size == 0) {
 104                 panic("pmap_nest: size is invalid - %016llX\n", size);
 105         }
 106
 107         if (va_start != nstart)
 108                 panic("pmap_nest: va_start(0x%llx) != nstart(0x%llx)\n", va_start, nstart);
 109
 110         PMAP_TRACE(PMAP_CODE(PMAP__NEST) | DBG_FUNC_START,
 111         (uintptr_t) grand, (uintptr_t) subord,
 112             (uintptr_t) (va_start>>32), (uintptr_t) va_start, 0);
 113
 114         nvaddr = (vm_map_offset_t)nstart;
 115         num_pde = size >> PDESHIFT;
 116
 117         PMAP_LOCK(subord);
 118
 119         subord->pm_shared = TRUE;
 120
 121         for (i = 0; i < num_pde;) {
 122                 if (((nvaddr & PDPTMASK) == 0) && (num_pde - i) >= NPDEPG && cpu_64bit) {
 123
 124                         npde = pmap64_pdpt(subord, nvaddr);
 125
 126                         while (0 == npde || ((*npde & INTEL_PTE_VALID) == 0)) {
 127                                 PMAP_UNLOCK(subord);
 128                                 pmap_expand_pdpt(subord, nvaddr, PMAP_EXPAND_OPTIONS_NONE);
 129                                 PMAP_LOCK(subord);
 130                                 npde = pmap64_pdpt(subord, nvaddr);
 131                         }
 132                         *npde |= INTEL_PDPTE_NESTED;
 133                         nvaddr += NBPDPT;
 134                         i += (uint32_t)NPDEPG;
 135                 }
 136                 else {
 137                         npde = pmap_pde(subord, nvaddr);
 138
 139                         while (0 == npde || ((*npde & INTEL_PTE_VALID) == 0)) {
 140                                 PMAP_UNLOCK(subord);
 141                                 pmap_expand(subord, nvaddr, PMAP_EXPAND_OPTIONS_NONE);
 142                                 PMAP_LOCK(subord);
 143                                 npde = pmap_pde(subord, nvaddr);
 144                         }
 145                         nvaddr += NBPDE;
 146                         i++;
 147                 }
 148         }
 149
 150         PMAP_UNLOCK(subord);
 151
 152         vaddr = (vm_map_offset_t)va_start;
 153
 154         PMAP_LOCK(grand);
 155
 156         for (i = 0;i < num_pde;) {
 157                 pd_entry_t tpde;
 158
 159                 if (((vaddr & PDPTMASK) == 0) && ((num_pde - i) >= NPDEPG) && cpu_64bit) {
 160                         npde = pmap64_pdpt(subord, vaddr);
 161                         if (npde == 0)
 162                                 panic("pmap_nest: no PDPT, subord %p nstart 0x%llx", subord, vaddr);
 163                         tpde = *npde;
 164                         pde = pmap64_pdpt(grand, vaddr);
 165                         if (0 == pde) {
 166                                 PMAP_UNLOCK(grand);
 167                                 pmap_expand_pml4(grand, vaddr, PMAP_EXPAND_OPTIONS_NONE);
 168                                 PMAP_LOCK(grand);
 169                                 pde = pmap64_pdpt(grand, vaddr);
 170                         }
 171                         if (pde == 0)
 172                                 panic("pmap_nest: no PDPT, grand  %p vaddr 0x%llx", grand, vaddr);
 173                         pmap_store_pte(pde, tpde);
 174                         vaddr += NBPDPT;
 175                         i += (uint32_t) NPDEPG;
 176                 }
 177                 else {
 178                         npde = pmap_pde(subord, nstart);
 179                         if (npde == 0)
 180                                 panic("pmap_nest: no npde, subord %p nstart 0x%llx", subord, nstart);
 181                         tpde = *npde;
 182                         nstart += NBPDE;
 183                         pde = pmap_pde(grand, vaddr);
 184                         if ((0 == pde) && cpu_64bit) {
 185                                 PMAP_UNLOCK(grand);
 186                                 pmap_expand_pdpt(grand, vaddr, PMAP_EXPAND_OPTIONS_NONE);
 187                                 PMAP_LOCK(grand);
 188                                 pde = pmap_pde(grand, vaddr);
 189                         }
 190
 191                         if (pde == 0)
 192                                 panic("pmap_nest: no pde, grand  %p vaddr 0x%llx", grand, vaddr);
 193                         vaddr += NBPDE;
 194                         pmap_store_pte(pde, tpde);
 195                         i++;
 196                 }
 197         }
 198
 199         PMAP_UNLOCK(grand);
 200
 201         PMAP_TRACE(PMAP_CODE(PMAP__NEST) | DBG_FUNC_END, 0, 0, 0, 0, 0);
 202
 203         return KERN_SUCCESS;
 204 }
 205
 206 /*
 207  *      kern_return_t pmap_unnest(grand, vaddr)
 208  *
 209  *      grand  = the pmap that we will un-nest subord from
 210  *      vaddr  = start of range in pmap to be unnested
 211  *
 212  *      Removes a pmap from another.  This is used to implement shared segments.
 213  */
 214
 215 kern_return_t pmap_unnest(pmap_t grand, addr64_t vaddr, uint64_t size) {
 216
 217         pd_entry_t *pde;
 218         unsigned int i;
 219         uint64_t num_pde;
 220         addr64_t va_start, va_end;
 221         uint64_t npdpt = PMAP_INVALID_PDPTNUM;
 222
 223         PMAP_TRACE(PMAP_CODE(PMAP__UNNEST) | DBG_FUNC_START,
 224             (uintptr_t) grand,
 225             (uintptr_t) (vaddr>>32), (uintptr_t) vaddr, 0, 0);
 226
 227         if ((size & (pmap_nesting_size_min-1)) ||
 228             (vaddr & (pmap_nesting_size_min-1))) {
 229                 panic("pmap_unnest(%p,0x%llx,0x%llx): unaligned...\n",
 230                     grand, vaddr, size);
 231         }
 232
 233         /* align everything to PDE boundaries */
 234         va_start = vaddr & ~(NBPDE-1);
 235         va_end = (vaddr + size + NBPDE - 1) & ~(NBPDE-1);
 236         size = va_end - va_start;
 237
 238         PMAP_LOCK(grand);
 239
 240         num_pde = size >> PDESHIFT;
 241         vaddr = va_start;
 242
 243         for (i = 0; i < num_pde; ) {
 244                 if ((pdptnum(grand, vaddr) != npdpt) && cpu_64bit) {
 245                         npdpt = pdptnum(grand, vaddr);
 246                         pde = pmap64_pdpt(grand, vaddr);
 247                         if (pde && (*pde & INTEL_PDPTE_NESTED)) {
 248                                 pmap_store_pte(pde, (pd_entry_t)0);
 249                                 i += (uint32_t) NPDEPG;
 250                                 vaddr += NBPDPT;
 251                                 continue;
 252                         }
 253                 }
 254                 pde = pmap_pde(grand, (vm_map_offset_t)vaddr);
 255                 if (pde == 0)
 256                         panic("pmap_unnest: no pde, grand %p vaddr 0x%llx\n", grand, vaddr);
 257                 pmap_store_pte(pde, (pd_entry_t)0);
 258                 i++;
 259                 vaddr += NBPDE;
 260         }
 261
 262         PMAP_UPDATE_TLBS(grand, va_start, va_end);
 263
 264         PMAP_UNLOCK(grand);
 265
 266         PMAP_TRACE(PMAP_CODE(PMAP__UNNEST) | DBG_FUNC_END, 0, 0, 0, 0, 0);
 267
 268         return KERN_SUCCESS;
 269 }
 270
 271 /* Invoked by the Mach VM to determine the platform specific unnest region */
 272
 273 boolean_t pmap_adjust_unnest_parameters(pmap_t p, vm_map_offset_t *s, vm_map_offset_t *e) {
 274         pd_entry_t *pdpte;
 275         boolean_t rval = FALSE;
 276
 277         if (!cpu_64bit)
 278                 return rval;
 279
 280         PMAP_LOCK(p);
 281
 282         pdpte = pmap64_pdpt(p, *s);
 283         if (pdpte && (*pdpte & INTEL_PDPTE_NESTED)) {
 284                 *s &= ~(NBPDPT -1);
 285                 rval = TRUE;
 286         }
 287
 288         pdpte = pmap64_pdpt(p, *e);
 289         if (pdpte && (*pdpte & INTEL_PDPTE_NESTED)) {
 290                 *e = ((*e + NBPDPT) & ~(NBPDPT -1));
 291                 rval = TRUE;
 292         }
 293
 294         PMAP_UNLOCK(p);
 295
 296         return rval;
 297 }
 298
 299 /*
 300  * pmap_find_phys returns the (4K) physical page number containing a
 301  * given virtual address in a given pmap.
 302  * Note that pmap_pte may return a pde if this virtual address is
 303  * mapped by a large page and this is taken into account in order
 304  * to return the correct page number in this case.
 305  */
 306 ppnum_t
 307 pmap_find_phys(pmap_t pmap, addr64_t va)
 308 {
 309         pt_entry_t      *ptp;
 310         pd_entry_t      *pdep;
 311         ppnum_t         ppn = 0;
 312         pd_entry_t      pde;
 313         pt_entry_t      pte;
 314
 315         mp_disable_preemption();
 316
 317         /* This refcount test is a band-aid--several infrastructural changes
 318          * are necessary to eliminate invocation of this routine from arbitrary
 319          * contexts.
 320          */
 321
 322         if (!pmap->ref_count)
 323                 goto pfp_exit;
 324
 325         pdep = pmap_pde(pmap, va);
 326
 327         if ((pdep != PD_ENTRY_NULL) && ((pde = *pdep) & INTEL_PTE_VALID)) {
 328                 if (pde & INTEL_PTE_PS) {
 329                         ppn = (ppnum_t) i386_btop(pte_to_pa(pde));
 330                         ppn += (ppnum_t) ptenum(va);
 331                 }
 332                 else {
 333                         ptp = pmap_pte(pmap, va);
 334                         if ((PT_ENTRY_NULL != ptp) && (((pte = *ptp) & INTEL_PTE_VALID) != 0)) {
 335                                 ppn = (ppnum_t) i386_btop(pte_to_pa(pte));
 336                         }
 337                 }
 338         }
 339 pfp_exit:
 340         mp_enable_preemption();
 341
 342         return ppn;
 343 }
 344
 345 /*
 346  * Update cache attributes for all extant managed mappings.
 347  * Assumes PV for this page is locked, and that the page
 348  * is managed.
 349  */
 350
 351 void
 352 pmap_update_cache_attributes_locked(ppnum_t pn, unsigned attributes) {
 353         pv_rooted_entry_t       pv_h, pv_e;
 354         pv_hashed_entry_t       pvh_e, nexth;
 355         vm_map_offset_t vaddr;
 356         pmap_t  pmap;
 357         pt_entry_t      *ptep;
 358
 359         assert(IS_MANAGED_PAGE(pn));
 360
 361         pv_h = pai_to_pvh(pn);
 362         /* TODO: translate the PHYS_* bits to PTE bits, while they're
 363          * currently identical, they may not remain so
 364          * Potential optimization (here and in page_protect),
 365          * parallel shootdowns, check for redundant
 366          * attribute modifications.
 367          */
 368
 369         /*
 370          * Alter attributes on all mappings
 371          */
 372         if (pv_h->pmap != PMAP_NULL) {
 373                 pv_e = pv_h;
 374                 pvh_e = (pv_hashed_entry_t)pv_e;
 375
 376                 do {
 377                         pmap = pv_e->pmap;
 378                         vaddr = pv_e->va;
 379                         ptep = pmap_pte(pmap, vaddr);
 380
 381                         if (0 == ptep)
 382                                 panic("pmap_update_cache_attributes_locked: Missing PTE, pmap: %p, pn: 0x%x vaddr: 0x%llx kernel_pmap: %p", pmap, pn, vaddr, kernel_pmap);
 383
 384                         nexth = (pv_hashed_entry_t)queue_next(&pvh_e->qlink);
 385                         pmap_update_pte(ptep, PHYS_CACHEABILITY_MASK, attributes);
 386                         PMAP_UPDATE_TLBS(pmap, vaddr, vaddr + PAGE_SIZE);
 387                         pvh_e = nexth;
 388                 } while ((pv_e = (pv_rooted_entry_t)nexth) != pv_h);
 389         }
 390 }
 391
 392 void x86_filter_TLB_coherency_interrupts(boolean_t dofilter) {
 393         assert(ml_get_interrupts_enabled() == 0 || get_preemption_level() != 0);
 394
 395         if (dofilter) {
 396                 CPU_CR3_MARK_INACTIVE();
 397         } else {
 398                 CPU_CR3_MARK_ACTIVE();
 399                 mfence();
 400                 if (current_cpu_datap()->cpu_tlb_invalid)
 401                         process_pmap_updates();
 402         }
 403 }
 404
 405
 406 /*
 407  *      Insert the given physical page (p) at
 408  *      the specified virtual address (v) in the
 409  *      target physical map with the protection requested.
 410  *
 411  *      If specified, the page will be wired down, meaning
 412  *      that the related pte cannot be reclaimed.
 413  *
 414  *      NB:  This is the only routine which MAY NOT lazy-evaluate
 415  *      or lose information.  That is, this routine must actually
 416  *      insert this page into the given map NOW.
 417  */
 418
 419 void
 420 pmap_enter(
 421         register pmap_t         pmap,
 422         vm_map_offset_t         vaddr,
 423         ppnum_t                 pn,
 424         vm_prot_t               prot,
 425         vm_prot_t               fault_type,
 426         unsigned int            flags,
 427         boolean_t               wired)
 428 {
 429         (void) pmap_enter_options(pmap, vaddr, pn, prot, fault_type, flags, wired, PMAP_EXPAND_OPTIONS_NONE, NULL);
 430 }
 431
 432
 433 kern_return_t
 434 pmap_enter_options(
 435         register pmap_t         pmap,
 436         vm_map_offset_t         vaddr,
 437         ppnum_t                 pn,
 438         vm_prot_t               prot,
 439         __unused vm_prot_t      fault_type,
 440         unsigned int            flags,
 441         boolean_t               wired,
 442         unsigned int            options,
 443         void                    *arg)
 444 {
 445         pt_entry_t              *pte;
 446         pv_rooted_entry_t       pv_h;
 447         ppnum_t                 pai;
 448         pv_hashed_entry_t       pvh_e;
 449         pv_hashed_entry_t       pvh_new;
 450         pt_entry_t              template;
 451         pmap_paddr_t            old_pa;
 452         pmap_paddr_t            pa = (pmap_paddr_t) i386_ptob(pn);
 453         boolean_t               need_tlbflush = FALSE;
 454         boolean_t               set_NX;
 455         char                    oattr;
 456         boolean_t               old_pa_locked;
 457         /* 2MiB mappings are confined to x86_64 by VM */
 458         boolean_t               superpage = flags & VM_MEM_SUPERPAGE;
 459         vm_object_t             delpage_pm_obj = NULL;
 460         uint64_t                delpage_pde_index = 0;
 461         pt_entry_t              old_pte;
 462         kern_return_t           kr_expand;
 463
 464         pmap_intr_assert();
 465
 466         if (pmap == PMAP_NULL)
 467                 return KERN_INVALID_ARGUMENT;
 468
 469         /* N.B. We can be supplied a zero page frame in the NOENTER case, it's an
 470          * unused value for that scenario.
 471          */
 472         assert(pn != vm_page_fictitious_addr);
 473
 474         if (pn == vm_page_guard_addr)
 475                 return KERN_INVALID_ARGUMENT;
 476
 477         PMAP_TRACE(PMAP_CODE(PMAP__ENTER) | DBG_FUNC_START,
 478             pmap,
 479             (uint32_t) (vaddr >> 32), (uint32_t) vaddr,
 480             pn, prot);
 481
 482         if ((prot & VM_PROT_EXECUTE) || !nx_enabled || !pmap->nx_enabled)
 483                 set_NX = FALSE;
 484         else
 485                 set_NX = TRUE;
 486
 487         if (__improbable(set_NX && (pmap == kernel_pmap) && ((pmap_disable_kstack_nx && (flags & VM_MEM_STACK)) || (pmap_disable_kheap_nx && !(flags & VM_MEM_STACK))))) {
 488                 set_NX = FALSE;
 489         }
 490
 491         /*
 492          *      Must allocate a new pvlist entry while we're unlocked;
 493          *      zalloc may cause pageout (which will lock the pmap system).
 494          *      If we determine we need a pvlist entry, we will unlock
 495          *      and allocate one.  Then we will retry, throughing away
 496          *      the allocated entry later (if we no longer need it).
 497          */
 498
 499         pvh_new = PV_HASHED_ENTRY_NULL;
 500 Retry:
 501         pvh_e = PV_HASHED_ENTRY_NULL;
 502
 503         PMAP_LOCK(pmap);
 504
 505         /*
 506          *      Expand pmap to include this pte.  Assume that
 507          *      pmap is always expanded to include enough hardware
 508          *      pages to map one VM page.
 509          */
 510          if(superpage) {
 511                 while ((pte = pmap64_pde(pmap, vaddr)) == PD_ENTRY_NULL) {
 512                         /* need room for another pde entry */
 513                         PMAP_UNLOCK(pmap);
 514                         kr_expand = pmap_expand_pdpt(pmap, vaddr, options);
 515                         if (kr_expand != KERN_SUCCESS)
 516                                 return kr_expand;
 517                         PMAP_LOCK(pmap);
 518                 }
 519         } else {
 520                 while ((pte = pmap_pte(pmap, vaddr)) == PT_ENTRY_NULL) {
 521                         /*
 522                          * Must unlock to expand the pmap
 523                          * going to grow pde level page(s)
 524                          */
 525                         PMAP_UNLOCK(pmap);
 526                         kr_expand = pmap_expand(pmap, vaddr, options);
 527                         if (kr_expand != KERN_SUCCESS)
 528                                 return kr_expand;
 529                         PMAP_LOCK(pmap);
 530                 }
 531         }
 532         if (options & PMAP_EXPAND_OPTIONS_NOENTER) {
 533                 PMAP_UNLOCK(pmap);
 534                 return KERN_SUCCESS;
 535         }
 536
 537         if (superpage && *pte && !(*pte & INTEL_PTE_PS)) {
 538                 /*
 539                  * There is still an empty page table mapped that
 540                  * was used for a previous base page mapping.
 541                  * Remember the PDE and the PDE index, so that we
 542                  * can free the page at the end of this function.
 543                  */
 544                 delpage_pde_index = pdeidx(pmap, vaddr);
 545                 delpage_pm_obj = pmap->pm_obj;
 546                 *pte = 0;
 547         }
 548
 549         old_pa = pte_to_pa(*pte);
 550         pai = pa_index(old_pa);
 551         old_pa_locked = FALSE;
 552
 553         if (old_pa == 0 &&
 554             (*pte & INTEL_PTE_COMPRESSED)) {
 555                 /* one less "compressed" */
 556                 OSAddAtomic64(-1, &pmap->stats.compressed);
 557                 /* marker will be cleared below */
 558         }
 559
 560         /*
 561          * if we have a previous managed page, lock the pv entry now. after
 562          * we lock it, check to see if someone beat us to the lock and if so
 563          * drop the lock
 564          */
 565         if ((0 != old_pa) && IS_MANAGED_PAGE(pai)) {
 566                 LOCK_PVH(pai);
 567                 old_pa_locked = TRUE;
 568                 old_pa = pte_to_pa(*pte);
 569                 if (0 == old_pa) {
 570                         UNLOCK_PVH(pai);        /* another path beat us to it */
 571                         old_pa_locked = FALSE;
 572                 }
 573         }
 574
 575         /*
 576          *      Special case if the incoming physical page is already mapped
 577          *      at this address.
 578          */
 579         if (old_pa == pa) {
 580                 pt_entry_t old_attributes =
 581                     *pte & ~(INTEL_PTE_REF | INTEL_PTE_MOD);
 582
 583                 /*
 584                  *      May be changing its wired attribute or protection
 585                  */
 586
 587                 template = pa_to_pte(pa) | INTEL_PTE_VALID;
 588                 template |= pmap_get_cache_attributes(pa_index(pa));
 589
 590                 if (VM_MEM_NOT_CACHEABLE ==
 591                     (flags & (VM_MEM_NOT_CACHEABLE | VM_WIMG_USE_DEFAULT))) {
 592                         if (!(flags & VM_MEM_GUARDED))
 593                                 template |= INTEL_PTE_PTA;
 594                         template |= INTEL_PTE_NCACHE;
 595                 }
 596                 if (pmap != kernel_pmap)
 597                         template |= INTEL_PTE_USER;
 598                 if (prot & VM_PROT_WRITE) {
 599                         template |= INTEL_PTE_WRITE;
 600                 }
 601
 602                 if (set_NX)
 603                         template |= INTEL_PTE_NX;
 604
 605                 if (wired) {
 606                         template |= INTEL_PTE_WIRED;
 607                         if (!iswired(old_attributes))  {
 608                                 OSAddAtomic(+1, &pmap->stats.wired_count);
 609                                 pmap_ledger_credit(pmap, task_ledgers.wired_mem, PAGE_SIZE);
 610                         }
 611                 } else {
 612                         if (iswired(old_attributes)) {
 613                                 assert(pmap->stats.wired_count >= 1);
 614                                 OSAddAtomic(-1, &pmap->stats.wired_count);
 615                                 pmap_ledger_debit(pmap, task_ledgers.wired_mem, PAGE_SIZE);
 616                         }
 617                 }
 618                 if (superpage)          /* this path can not be used */
 619                         template |= INTEL_PTE_PS;       /* to change the page size! */
 620
 621                 if (old_attributes == template)
 622                         goto dont_update_pte;
 623
 624                 /* Determine delta, PV locked */
 625                 need_tlbflush =
 626                     ((old_attributes ^ template) != INTEL_PTE_WIRED);
 627
 628                 if (need_tlbflush == TRUE && !(old_attributes & INTEL_PTE_WRITE)) {
 629                         if ((old_attributes ^ template) == INTEL_PTE_WRITE)
 630                                 need_tlbflush = FALSE;
 631                 }
 632
 633                 /* store modified PTE and preserve RC bits */
 634                 pt_entry_t npte, opte;;
 635                 do {
 636                         opte = *pte;
 637                         npte = template | (opte & (INTEL_PTE_REF | INTEL_PTE_MOD));
 638                 } while (!pmap_cmpx_pte(pte, opte, npte));
 639 dont_update_pte:
 640                 if (old_pa_locked) {
 641                         UNLOCK_PVH(pai);
 642                         old_pa_locked = FALSE;
 643                 }
 644                 goto Done;
 645         }
 646
 647         /*
 648          *      Outline of code from here:
 649          *         1) If va was mapped, update TLBs, remove the mapping
 650          *            and remove old pvlist entry.
 651          *         2) Add pvlist entry for new mapping
 652          *         3) Enter new mapping.
 653          *
 654          *      If the old physical page is not managed step 1) is skipped
 655          *      (except for updating the TLBs), and the mapping is
 656          *      overwritten at step 3).  If the new physical page is not
 657          *      managed, step 2) is skipped.
 658          */
 659
 660         if (old_pa != (pmap_paddr_t) 0) {
 661
 662                 /*
 663                  *      Don't do anything to pages outside valid memory here.
 664                  *      Instead convince the code that enters a new mapping
 665                  *      to overwrite the old one.
 666                  */
 667
 668                 /* invalidate the PTE */
 669                 pmap_update_pte(pte, INTEL_PTE_VALID, 0);
 670                 /* propagate invalidate everywhere */
 671                 PMAP_UPDATE_TLBS(pmap, vaddr, vaddr + PAGE_SIZE);
 672                 /* remember reference and change */
 673                 old_pte = *pte;
 674                 oattr = (char) (old_pte & (PHYS_MODIFIED | PHYS_REFERENCED));
 675                 /* completely invalidate the PTE */
 676                 pmap_store_pte(pte, 0);
 677
 678                 if (IS_MANAGED_PAGE(pai)) {
 679                         pmap_assert(old_pa_locked == TRUE);
 680                         pmap_ledger_debit(pmap, task_ledgers.phys_mem, PAGE_SIZE);
 681                         pmap_ledger_debit(pmap, task_ledgers.phys_footprint, PAGE_SIZE);
 682                         assert(pmap->stats.resident_count >= 1);
 683                         OSAddAtomic(-1, &pmap->stats.resident_count);
 684                         if (pmap != kernel_pmap) {
 685                                 if (IS_REUSABLE_PAGE(pai)) {
 686                                         assert(pmap->stats.reusable > 0);
 687                                         OSAddAtomic(-1, &pmap->stats.reusable);
 688                                 } else if (IS_INTERNAL_PAGE(pai)) {
 689                                         assert(pmap->stats.internal > 0);
 690                                         OSAddAtomic(-1, &pmap->stats.internal);
 691                                 } else {
 692                                         assert(pmap->stats.external > 0);
 693                                         OSAddAtomic(-1, &pmap->stats.external);
 694                                 }
 695                         }
 696                         if (iswired(*pte)) {
 697                                 assert(pmap->stats.wired_count >= 1);
 698                                 OSAddAtomic(-1, &pmap->stats.wired_count);
 699                                 pmap_ledger_debit(pmap, task_ledgers.wired_mem,
 700                                     PAGE_SIZE);
 701                         }
 702                         pmap_phys_attributes[pai] |= oattr;
 703
 704                         /*
 705                          *      Remove the mapping from the pvlist for
 706                          *      this physical page.
 707                          *      We'll end up with either a rooted pv or a
 708                          *      hashed pv
 709                          */
 710                         pvh_e = pmap_pv_remove(pmap, vaddr, (ppnum_t *) &pai, &old_pte);
 711
 712                 } else {
 713
 714                         /*
 715                          *      old_pa is not managed.
 716                          *      Do removal part of accounting.
 717                          */
 718
 719                         if (pmap != kernel_pmap) {
 720 #if 00
 721                                 assert(pmap->stats.device > 0);
 722                                 OSAddAtomic(-1, &pmap->stats.device);
 723 #endif
 724                         }
 725                         if (iswired(*pte)) {
 726                                 assert(pmap->stats.wired_count >= 1);
 727                                 OSAddAtomic(-1, &pmap->stats.wired_count);
 728                                 pmap_ledger_debit(pmap, task_ledgers.wired_mem, PAGE_SIZE);
 729                         }
 730                 }
 731         }
 732
 733         /*
 734          * if we had a previously managed paged locked, unlock it now
 735          */
 736         if (old_pa_locked) {
 737                 UNLOCK_PVH(pai);
 738                 old_pa_locked = FALSE;
 739         }
 740
 741         pai = pa_index(pa);     /* now working with new incoming phys page */
 742         if (IS_MANAGED_PAGE(pai)) {
 743
 744                 /*
 745                  *      Step 2) Enter the mapping in the PV list for this
 746                  *      physical page.
 747                  */
 748                 pv_h = pai_to_pvh(pai);
 749
 750                 LOCK_PVH(pai);
 751
 752                 if (pv_h->pmap == PMAP_NULL) {
 753                         /*
 754                          *      No mappings yet, use rooted pv
 755                          */
 756                         pv_h->va = vaddr;
 757                         pv_h->pmap = pmap;
 758                         queue_init(&pv_h->qlink);
 759
 760                         if (options & PMAP_OPTIONS_INTERNAL) {
 761                                 pmap_phys_attributes[pai] |= PHYS_INTERNAL;
 762                         } else {
 763                                 pmap_phys_attributes[pai] &= ~PHYS_INTERNAL;
 764                         }
 765                         if (options & PMAP_OPTIONS_REUSABLE) {
 766                                 pmap_phys_attributes[pai] |= PHYS_REUSABLE;
 767                         } else {
 768                                 pmap_phys_attributes[pai] &= ~PHYS_REUSABLE;
 769                         }
 770                 } else {
 771                         /*
 772                          *      Add new pv_hashed_entry after header.
 773                          */
 774                         if ((PV_HASHED_ENTRY_NULL == pvh_e) && pvh_new) {
 775                                 pvh_e = pvh_new;
 776                                 pvh_new = PV_HASHED_ENTRY_NULL;
 777                         } else if (PV_HASHED_ENTRY_NULL == pvh_e) {
 778                                 PV_HASHED_ALLOC(&pvh_e);
 779                                 if (PV_HASHED_ENTRY_NULL == pvh_e) {
 780                                         /*
 781                                          * the pv list is empty. if we are on
 782                                          * the kernel pmap we'll use one of
 783                                          * the special private kernel pv_e's,
 784                                          * else, we need to unlock
 785                                          * everything, zalloc a pv_e, and
 786                                          * restart bringing in the pv_e with
 787                                          * us.
 788                                          */
 789                                         if (kernel_pmap == pmap) {
 790                                                 PV_HASHED_KERN_ALLOC(&pvh_e);
 791                                         } else {
 792                                                 UNLOCK_PVH(pai);
 793                                                 PMAP_UNLOCK(pmap);
 794                                                 pmap_pv_throttle(pmap);
 795                                                 pvh_new = (pv_hashed_entry_t) zalloc(pv_hashed_list_zone);
 796                                                 goto Retry;
 797                                         }
 798                                 }
 799                         }
 800
 801                         if (PV_HASHED_ENTRY_NULL == pvh_e)
 802                                 panic("Mapping alias chain exhaustion, possibly induced by numerous kernel virtual double mappings");
 803
 804                         pvh_e->va = vaddr;
 805                         pvh_e->pmap = pmap;
 806                         pvh_e->ppn = pn;
 807                         pv_hash_add(pvh_e, pv_h);
 808
 809                         /*
 810                          *      Remember that we used the pvlist entry.
 811                          */
 812                         pvh_e = PV_HASHED_ENTRY_NULL;
 813                 }
 814
 815                 /*
 816                  * only count the mapping
 817                  * for 'managed memory'
 818                  */
 819                 pmap_ledger_credit(pmap, task_ledgers.phys_mem, PAGE_SIZE);
 820                 pmap_ledger_credit(pmap, task_ledgers.phys_footprint, PAGE_SIZE);
 821                 OSAddAtomic(+1,  &pmap->stats.resident_count);
 822                 if (pmap->stats.resident_count > pmap->stats.resident_max) {
 823                         pmap->stats.resident_max = pmap->stats.resident_count;
 824                 }
 825                 if (pmap != kernel_pmap) {
 826                         if (IS_REUSABLE_PAGE(pai)) {
 827                                 OSAddAtomic(+1, &pmap->stats.reusable);
 828                                 PMAP_STATS_PEAK(pmap->stats.reusable);
 829                         } else if (IS_INTERNAL_PAGE(pai)) {
 830                                 OSAddAtomic(+1, &pmap->stats.internal);
 831                                 PMAP_STATS_PEAK(pmap->stats.internal);
 832                         } else {
 833                                 OSAddAtomic(+1, &pmap->stats.external);
 834                                 PMAP_STATS_PEAK(pmap->stats.external);
 835                         }
 836                 }
 837         } else if (last_managed_page == 0) {
 838                 /* Account for early mappings created before "managed pages"
 839                  * are determined. Consider consulting the available DRAM map.
 840                  */
 841                 pmap_ledger_credit(pmap, task_ledgers.phys_mem, PAGE_SIZE);
 842                 pmap_ledger_credit(pmap, task_ledgers.phys_footprint, PAGE_SIZE);
 843                 OSAddAtomic(+1,  &pmap->stats.resident_count);
 844                 if (pmap != kernel_pmap) {
 845 #if 00
 846                         OSAddAtomic(+1, &pmap->stats.device);
 847                         PMAP_STATS_PEAK(pmap->stats.device);
 848 #endif
 849                 }
 850         }
 851         /*
 852          * Step 3) Enter the mapping.
 853          *
 854          *      Build a template to speed up entering -
 855          *      only the pfn changes.
 856          */
 857         template = pa_to_pte(pa) | INTEL_PTE_VALID;
 858         /*
 859          * DRK: It may be worth asserting on cache attribute flags that diverge
 860          * from the existing physical page attributes.
 861          */
 862
 863         template |= pmap_get_cache_attributes(pa_index(pa));
 864
 865         if (flags & VM_MEM_NOT_CACHEABLE) {
 866                 if (!(flags & VM_MEM_GUARDED))
 867                         template |= INTEL_PTE_PTA;
 868                 template |= INTEL_PTE_NCACHE;
 869         }
 870         if (pmap != kernel_pmap)
 871                 template |= INTEL_PTE_USER;
 872         if (prot & VM_PROT_WRITE)
 873                 template |= INTEL_PTE_WRITE;
 874         if (set_NX)
 875                 template |= INTEL_PTE_NX;
 876         if (wired) {
 877                 template |= INTEL_PTE_WIRED;
 878                 OSAddAtomic(+1,  & pmap->stats.wired_count);
 879                 pmap_ledger_credit(pmap, task_ledgers.wired_mem, PAGE_SIZE);
 880         }
 881         if (superpage)
 882                 template |= INTEL_PTE_PS;
 883         pmap_store_pte(pte, template);
 884
 885         /*
 886          * if this was a managed page we delayed unlocking the pv until here
 887          * to prevent pmap_page_protect et al from finding it until the pte
 888          * has been stored
 889          */
 890         if (IS_MANAGED_PAGE(pai)) {
 891                 UNLOCK_PVH(pai);
 892         }
 893 Done:
 894         if (need_tlbflush == TRUE) {
 895                 if (options & PMAP_OPTIONS_NOFLUSH)
 896                         PMAP_UPDATE_TLBS_DELAYED(pmap, vaddr, vaddr + PAGE_SIZE, (pmap_flush_context *)arg);
 897                 else
 898                         PMAP_UPDATE_TLBS(pmap, vaddr, vaddr + PAGE_SIZE);
 899         }
 900         if (pvh_e != PV_HASHED_ENTRY_NULL) {
 901                 PV_HASHED_FREE_LIST(pvh_e, pvh_e, 1);
 902         }
 903         if (pvh_new != PV_HASHED_ENTRY_NULL) {
 904                 PV_HASHED_KERN_FREE_LIST(pvh_new, pvh_new, 1);
 905         }
 906         PMAP_UNLOCK(pmap);
 907
 908         if (delpage_pm_obj) {
 909                 vm_page_t m;
 910
 911                 vm_object_lock(delpage_pm_obj);
 912                 m = vm_page_lookup(delpage_pm_obj, (delpage_pde_index * PAGE_SIZE));
 913                 if (m == VM_PAGE_NULL)
 914                     panic("pmap_enter: pte page not in object");
 915                 vm_object_unlock(delpage_pm_obj);
 916                 VM_PAGE_FREE(m);
 917                 OSAddAtomic(-1,  &inuse_ptepages_count);
 918                 PMAP_ZINFO_PFREE(pmap, PAGE_SIZE);
 919         }
 920
 921         PMAP_TRACE(PMAP_CODE(PMAP__ENTER) | DBG_FUNC_END, 0, 0, 0, 0, 0);
 922         return KERN_SUCCESS;
 923 }
 924
 925 /*
 926  *      Remove a range of hardware page-table entries.
 927  *      The entries given are the first (inclusive)
 928  *      and last (exclusive) entries for the VM pages.
 929  *      The virtual address is the va for the first pte.
 930  *
 931  *      The pmap must be locked.
 932  *      If the pmap is not the kernel pmap, the range must lie
 933  *      entirely within one pte-page.  This is NOT checked.
 934  *      Assumes that the pte-page exists.
 935  */
 936
 937 void
 938 pmap_remove_range(
 939         pmap_t                  pmap,
 940         vm_map_offset_t         start_vaddr,
 941         pt_entry_t              *spte,
 942         pt_entry_t              *epte)
 943 {
 944         pmap_remove_range_options(pmap, start_vaddr, spte, epte, 0);
 945 }
 946
 947 void
 948 pmap_remove_range_options(
 949         pmap_t                  pmap,
 950         vm_map_offset_t         start_vaddr,
 951         pt_entry_t              *spte,
 952         pt_entry_t              *epte,
 953         int                     options)
 954 {
 955         pt_entry_t              *cpte;
 956         pv_hashed_entry_t       pvh_et = PV_HASHED_ENTRY_NULL;
 957         pv_hashed_entry_t       pvh_eh = PV_HASHED_ENTRY_NULL;
 958         pv_hashed_entry_t       pvh_e;
 959         int                     pvh_cnt = 0;
 960         int                     num_removed, num_unwired, num_found, num_invalid;
 961         int                     num_device, num_external, num_internal, num_reusable;
 962         uint64_t                num_compressed;
 963         ppnum_t                 pai;
 964         pmap_paddr_t            pa;
 965         vm_map_offset_t         vaddr;
 966
 967         num_removed = 0;
 968         num_unwired = 0;
 969         num_found   = 0;
 970         num_invalid = 0;
 971         num_device  = 0;
 972         num_external = 0;
 973         num_internal = 0;
 974         num_reusable = 0;
 975         num_compressed = 0;
 976         /* invalidate the PTEs first to "freeze" them */
 977         for (cpte = spte, vaddr = start_vaddr;
 978              cpte < epte;
 979              cpte++, vaddr += PAGE_SIZE_64) {
 980                 pt_entry_t p = *cpte;
 981
 982                 pa = pte_to_pa(p);
 983                 if (pa == 0) {
 984                         if (pmap != kernel_pmap &&
 985                             (options & PMAP_OPTIONS_REMOVE) &&
 986                             (p & INTEL_PTE_COMPRESSED)) {
 987                                 /* one less "compressed" */
 988                                 num_compressed++;
 989                                 /* clear marker */
 990                                 /* XXX probably does not need to be atomic! */
 991                                 pmap_update_pte(cpte, INTEL_PTE_COMPRESSED, 0);
 992                         }
 993                         continue;
 994                 }
 995                 num_found++;
 996
 997                 if (iswired(p))
 998                         num_unwired++;
 999
1000                 pai = pa_index(pa);
1001
1002                 if (!IS_MANAGED_PAGE(pai)) {
1003                         /*
1004                          *      Outside range of managed physical memory.
1005                          *      Just remove the mappings.
1006                          */
1007                         pmap_store_pte(cpte, 0);
1008                         num_device++;
1009                         continue;
1010                 }
1011
1012                 if ((p & INTEL_PTE_VALID) == 0)
1013                         num_invalid++;
1014
1015                 /* invalidate the PTE */
1016                 pmap_update_pte(cpte, INTEL_PTE_VALID, 0);
1017         }
1018
1019         if (num_found == 0) {
1020                 /* nothing was changed: we're done */
1021                 goto update_counts;
1022         }
1023
1024         /* propagate the invalidates to other CPUs */
1025
1026         PMAP_UPDATE_TLBS(pmap, start_vaddr, vaddr);
1027
1028         for (cpte = spte, vaddr = start_vaddr;
1029              cpte < epte;
1030              cpte++, vaddr += PAGE_SIZE_64) {
1031
1032                 pa = pte_to_pa(*cpte);
1033                 if (pa == 0)
1034                         continue;
1035
1036                 pai = pa_index(pa);
1037
1038                 LOCK_PVH(pai);
1039
1040                 pa = pte_to_pa(*cpte);
1041                 if (pa == 0) {
1042                         UNLOCK_PVH(pai);
1043                         continue;
1044                 }
1045                 num_removed++;
1046                 if (IS_REUSABLE_PAGE(pai)) {
1047                         num_reusable++;
1048                 } else if (IS_INTERNAL_PAGE(pai)) {
1049                         num_internal++;
1050                 } else {
1051                         num_external++;
1052                 }
1053
1054                 /*
1055                  * Get the modify and reference bits, then
1056                  * nuke the entry in the page table
1057                  */
1058                 /* remember reference and change */
1059                 pmap_phys_attributes[pai] |=
1060                         (char) (*cpte & (PHYS_MODIFIED | PHYS_REFERENCED));
1061
1062                 /*
1063                  * Remove the mapping from the pvlist for this physical page.
1064                  */
1065                 pvh_e = pmap_pv_remove(pmap, vaddr, (ppnum_t *) &pai, cpte);
1066
1067                 /* completely invalidate the PTE */
1068                 pmap_store_pte(cpte, 0);
1069
1070                 UNLOCK_PVH(pai);
1071
1072                 if (pvh_e != PV_HASHED_ENTRY_NULL) {
1073                         pvh_e->qlink.next = (queue_entry_t) pvh_eh;
1074                         pvh_eh = pvh_e;
1075
1076                         if (pvh_et == PV_HASHED_ENTRY_NULL) {
1077                                 pvh_et = pvh_e;
1078                         }
1079                         pvh_cnt++;
1080                 }
1081         } /* for loop */
1082
1083         if (pvh_eh != PV_HASHED_ENTRY_NULL) {
1084                 PV_HASHED_FREE_LIST(pvh_eh, pvh_et, pvh_cnt);
1085         }
1086 update_counts:
1087         /*
1088          *      Update the counts
1089          */
1090 #if TESTING
1091         if (pmap->stats.resident_count < num_removed)
1092                 panic("pmap_remove_range: resident_count");
1093 #endif
1094         pmap_ledger_debit(pmap, task_ledgers.phys_mem, machine_ptob(num_removed));
1095         pmap_ledger_debit(pmap, task_ledgers.phys_footprint, machine_ptob(num_removed));
1096         assert(pmap->stats.resident_count >= num_removed);
1097         OSAddAtomic(-num_removed,  &pmap->stats.resident_count);
1098
1099         if (pmap != kernel_pmap) {
1100 #if 00
1101                 assert(pmap->stats.device >= num_device);
1102                 if (num_device)
1103                         OSAddAtomic(-num_device, &pmap->stats.device);
1104 #endif /* 00 */
1105                 assert(pmap->stats.external >= num_external);
1106                 if (num_external)
1107                         OSAddAtomic(-num_external, &pmap->stats.external);
1108                 assert(pmap->stats.internal >= num_internal);
1109                 if (num_internal)
1110                         OSAddAtomic(-num_internal, &pmap->stats.internal);
1111                 assert(pmap->stats.reusable >= num_reusable);
1112                 if (num_reusable)
1113                         OSAddAtomic(-num_reusable, &pmap->stats.reusable);
1114                 assert(pmap->stats.compressed >= num_compressed);
1115                 if (num_compressed)
1116                         OSAddAtomic64(-num_compressed, &pmap->stats.compressed);
1117         }
1118
1119 #if TESTING
1120         if (pmap->stats.wired_count < num_unwired)
1121                 panic("pmap_remove_range: wired_count");
1122 #endif
1123         assert(pmap->stats.wired_count >= num_unwired);
1124         OSAddAtomic(-num_unwired,  &pmap->stats.wired_count);
1125         pmap_ledger_debit(pmap, task_ledgers.wired_mem, machine_ptob(num_unwired));
1126
1127         return;
1128 }
1129
1130
1131 /*
1132  *      Remove the given range of addresses
1133  *      from the specified map.
1134  *
1135  *      It is assumed that the start and end are properly
1136  *      rounded to the hardware page size.
1137  */
1138 void
1139 pmap_remove(
1140         pmap_t          map,
1141         addr64_t        s64,
1142         addr64_t        e64)
1143 {
1144         pmap_remove_options(map, s64, e64, 0);
1145 }
1146
1147 void
1148 pmap_remove_options(
1149         pmap_t          map,
1150         addr64_t        s64,
1151         addr64_t        e64,
1152         int             options)
1153 {
1154         pt_entry_t     *pde;
1155         pt_entry_t     *spte, *epte;
1156         addr64_t        l64;
1157         uint64_t        deadline;
1158
1159         pmap_intr_assert();
1160
1161         if (map == PMAP_NULL || s64 == e64)
1162                 return;
1163
1164         PMAP_TRACE(PMAP_CODE(PMAP__REMOVE) | DBG_FUNC_START,
1165                    map,
1166                    (uint32_t) (s64 >> 32), s64,
1167                    (uint32_t) (e64 >> 32), e64);
1168
1169
1170         PMAP_LOCK(map);
1171
1172 #if 0
1173         /*
1174          * Check that address range in the kernel does not overlap the stacks.
1175          * We initialize local static min/max variables once to avoid making
1176          * 2 function calls for every remove. Note also that these functions
1177          * both return 0 before kernel stacks have been initialized, and hence
1178          * the panic is not triggered in this case.
1179          */
1180         if (map == kernel_pmap) {
1181                 static vm_offset_t kernel_stack_min = 0;
1182                 static vm_offset_t kernel_stack_max = 0;
1183
1184                 if (kernel_stack_min == 0) {
1185                         kernel_stack_min = min_valid_stack_address();
1186                         kernel_stack_max = max_valid_stack_address();
1187                 }
1188                 if ((kernel_stack_min <= s64 && s64 < kernel_stack_max) ||
1189                     (kernel_stack_min < e64 && e64 <= kernel_stack_max))
1190                         panic("pmap_remove() attempted in kernel stack");
1191         }
1192 #else
1193
1194         /*
1195          * The values of kernel_stack_min and kernel_stack_max are no longer
1196          * relevant now that we allocate kernel stacks in the kernel map,
1197          * so the old code above no longer applies.  If we wanted to check that
1198          * we weren't removing a mapping of a page in a kernel stack we'd
1199          * mark the PTE with an unused bit and check that here.
1200          */
1201
1202 #endif
1203
1204         deadline = rdtsc64() + max_preemption_latency_tsc;
1205
1206         while (s64 < e64) {
1207                 l64 = (s64 + pde_mapped_size) & ~(pde_mapped_size - 1);
1208                 if (l64 > e64)
1209                         l64 = e64;
1210                 pde = pmap_pde(map, s64);
1211
1212                 if (pde && (*pde & INTEL_PTE_VALID)) {
1213                         if (*pde & INTEL_PTE_PS) {
1214                                 /*
1215                                  * If we're removing a superpage, pmap_remove_range()
1216                                  * must work on level 2 instead of level 1; and we're
1217                                  * only passing a single level 2 entry instead of a
1218                                  * level 1 range.
1219                                  */
1220                                 spte = pde;
1221                                 epte = spte+1; /* excluded */
1222                         } else {
1223                                 spte = pmap_pte(map, (s64 & ~(pde_mapped_size - 1)));
1224                                 spte = &spte[ptenum(s64)];
1225                                 epte = &spte[intel_btop(l64 - s64)];
1226                         }
1227                         pmap_remove_range_options(map, s64, spte, epte,
1228                                                   options);
1229                 }
1230                 s64 = l64;
1231
1232                 if (s64 < e64 && rdtsc64() >= deadline) {
1233                         PMAP_UNLOCK(map)
1234                             /* TODO: Rapid release/reacquisition can defeat
1235                              * the "backoff" intent here; either consider a
1236                              * fair spinlock, or a scheme whereby each lock
1237                              * attempt marks the processor as within a spinlock
1238                              * acquisition, and scan CPUs here to determine
1239                              * if a backoff is necessary, to avoid sacrificing
1240                              * performance in the common case.
1241                              */
1242                         PMAP_LOCK(map)
1243                         deadline = rdtsc64() + max_preemption_latency_tsc;
1244                 }
1245         }
1246
1247         PMAP_UNLOCK(map);
1248
1249         PMAP_TRACE(PMAP_CODE(PMAP__REMOVE) | DBG_FUNC_END,
1250                    map, 0, 0, 0, 0);
1251
1252 }
1253
1254 void
1255 pmap_page_protect(
1256         ppnum_t         pn,
1257         vm_prot_t       prot)
1258 {
1259         pmap_page_protect_options(pn, prot, 0, NULL);
1260 }
1261
1262 /*
1263  *      Routine:        pmap_page_protect_options
1264  *
1265  *      Function:
1266  *              Lower the permission for all mappings to a given
1267  *              page.
1268  */
1269 void
1270 pmap_page_protect_options(
1271         ppnum_t         pn,
1272         vm_prot_t       prot,
1273         unsigned int    options,
1274         void            *arg)
1275 {
1276         pv_hashed_entry_t       pvh_eh = PV_HASHED_ENTRY_NULL;
1277         pv_hashed_entry_t       pvh_et = PV_HASHED_ENTRY_NULL;
1278         pv_hashed_entry_t       nexth;
1279         int                     pvh_cnt = 0;
1280         pv_rooted_entry_t       pv_h;
1281         pv_rooted_entry_t       pv_e;
1282         pv_hashed_entry_t       pvh_e;
1283         pt_entry_t              *pte;
1284         int                     pai;
1285         pmap_t                  pmap;
1286         boolean_t               remove;
1287         pt_entry_t              new_pte_value;
1288
1289         pmap_intr_assert();
1290         assert(pn != vm_page_fictitious_addr);
1291         if (pn == vm_page_guard_addr)
1292                 return;
1293
1294         pai = ppn_to_pai(pn);
1295
1296         if (!IS_MANAGED_PAGE(pai)) {
1297                 /*
1298                  *      Not a managed page.
1299                  */
1300                 return;
1301         }
1302         PMAP_TRACE(PMAP_CODE(PMAP__PAGE_PROTECT) | DBG_FUNC_START,
1303                    pn, prot, 0, 0, 0);
1304
1305         /*
1306          * Determine the new protection.
1307          */
1308         switch (prot) {
1309         case VM_PROT_READ:
1310         case VM_PROT_READ | VM_PROT_EXECUTE:
1311                 remove = FALSE;
1312                 break;
1313         case VM_PROT_ALL:
1314                 return;         /* nothing to do */
1315         default:
1316                 remove = TRUE;
1317                 break;
1318         }
1319
1320         pv_h = pai_to_pvh(pai);
1321
1322         LOCK_PVH(pai);
1323
1324
1325         /*
1326          * Walk down PV list, if any, changing or removing all mappings.
1327          */
1328         if (pv_h->pmap == PMAP_NULL)
1329                 goto done;
1330
1331         pv_e = pv_h;
1332         pvh_e = (pv_hashed_entry_t) pv_e;       /* cheat */
1333
1334         do {
1335                 vm_map_offset_t vaddr;
1336
1337                 pmap = pv_e->pmap;
1338                 vaddr = pv_e->va;
1339                 pte = pmap_pte(pmap, vaddr);
1340
1341                 pmap_assert2((pa_index(pte_to_pa(*pte)) == pn),
1342                     "pmap_page_protect: PTE mismatch, pn: 0x%x, pmap: %p, vaddr: 0x%llx, pte: 0x%llx", pn, pmap, vaddr, *pte);
1343
1344                 if (0 == pte) {
1345                         panic("pmap_page_protect() "
1346                                 "pmap=%p pn=0x%x vaddr=0x%llx\n",
1347                                 pmap, pn, vaddr);
1348                 }
1349                 nexth = (pv_hashed_entry_t) queue_next(&pvh_e->qlink);
1350
1351                 /*
1352                  * Remove the mapping if new protection is NONE
1353                  */
1354                 if (remove) {
1355
1356                         /* Remove per-pmap wired count */
1357                         if (iswired(*pte)) {
1358                                 OSAddAtomic(-1, &pmap->stats.wired_count);
1359                                 pmap_ledger_debit(pmap, task_ledgers.wired_mem, PAGE_SIZE);
1360                         }
1361
1362                         if (pmap != kernel_pmap &&
1363                             (options & PMAP_OPTIONS_COMPRESSOR) &&
1364                             IS_INTERNAL_PAGE(pai)) {
1365                                 /* adjust "reclaimed" stats */
1366                                 OSAddAtomic64(+1, &pmap->stats.compressed);
1367                                 PMAP_STATS_PEAK(pmap->stats.compressed);
1368                                 pmap->stats.compressed_lifetime++;
1369                                 /* mark this PTE as having been "reclaimed" */
1370                                 new_pte_value = INTEL_PTE_COMPRESSED;
1371                         } else {
1372                                 new_pte_value = 0;
1373                         }
1374
1375                         if (options & PMAP_OPTIONS_NOREFMOD) {
1376                                 pmap_store_pte(pte, new_pte_value);
1377
1378                                 if (options & PMAP_OPTIONS_NOFLUSH)
1379                                         PMAP_UPDATE_TLBS_DELAYED(pmap, vaddr, vaddr + PAGE_SIZE, (pmap_flush_context *)arg);
1380                                 else
1381                                         PMAP_UPDATE_TLBS(pmap, vaddr, vaddr + PAGE_SIZE);
1382                         } else {
1383                                 /*
1384                                  * Remove the mapping, collecting dirty bits.
1385                                  */
1386                                 pmap_update_pte(pte, INTEL_PTE_VALID, 0);
1387
1388                                 PMAP_UPDATE_TLBS(pmap, vaddr, vaddr+PAGE_SIZE);
1389                                 pmap_phys_attributes[pai] |=
1390                                         *pte & (PHYS_MODIFIED|PHYS_REFERENCED);
1391                                 pmap_store_pte(pte, new_pte_value);
1392                         }
1393 #if TESTING
1394                         if (pmap->stats.resident_count < 1)
1395                                 panic("pmap_page_protect: resident_count");
1396 #endif
1397                         pmap_ledger_debit(pmap, task_ledgers.phys_mem, PAGE_SIZE);
1398                         assert(pmap->stats.resident_count >= 1);
1399                         OSAddAtomic(-1,  &pmap->stats.resident_count);
1400                         if (options & PMAP_OPTIONS_COMPRESSOR) {
1401                                 /*
1402                                  * This removal is only being done so we can send this page to
1403                                  * the compressor; therefore it mustn't affect total task footprint.
1404                                  */
1405                                 pmap_ledger_credit(pmap, task_ledgers.internal_compressed, PAGE_SIZE);
1406                         } else {
1407                                 pmap_ledger_debit(pmap, task_ledgers.phys_footprint, PAGE_SIZE);
1408                         }
1409
1410                         if (pmap != kernel_pmap) {
1411                                 if (IS_REUSABLE_PAGE(pai)) {
1412                                         assert(pmap->stats.reusable > 0);
1413                                         OSAddAtomic(-1, &pmap->stats.reusable);
1414                                 } else if (IS_INTERNAL_PAGE(pai)) {
1415                                         assert(pmap->stats.internal > 0);
1416                                         OSAddAtomic(-1, &pmap->stats.internal);
1417                                 } else {
1418                                         assert(pmap->stats.external > 0);
1419                                         OSAddAtomic(-1, &pmap->stats.external);
1420                                 }
1421                         }
1422
1423                         /*
1424                          * Deal with the pv_rooted_entry.
1425                          */
1426
1427                         if (pv_e == pv_h) {
1428                                 /*
1429                                  * Fix up head later.
1430                                  */
1431                                 pv_h->pmap = PMAP_NULL;
1432                         } else {
1433                                 /*
1434                                  * Delete this entry.
1435                                  */
1436                                 pv_hash_remove(pvh_e);
1437                                 pvh_e->qlink.next = (queue_entry_t) pvh_eh;
1438                                 pvh_eh = pvh_e;
1439
1440                                 if (pvh_et == PV_HASHED_ENTRY_NULL)
1441                                         pvh_et = pvh_e;
1442                                 pvh_cnt++;
1443                         }
1444                 } else {
1445                         /*
1446                          * Write-protect, after opportunistic refmod collect
1447                          */
1448                         pmap_phys_attributes[pai] |=
1449                             *pte & (PHYS_MODIFIED|PHYS_REFERENCED);
1450                         pmap_update_pte(pte, INTEL_PTE_WRITE, 0);
1451
1452                         if (options & PMAP_OPTIONS_NOFLUSH)
1453                                 PMAP_UPDATE_TLBS_DELAYED(pmap, vaddr, vaddr + PAGE_SIZE, (pmap_flush_context *)arg);
1454                         else
1455                                 PMAP_UPDATE_TLBS(pmap, vaddr, vaddr+PAGE_SIZE);
1456                 }
1457                 pvh_e = nexth;
1458         } while ((pv_e = (pv_rooted_entry_t) nexth) != pv_h);
1459
1460
1461         /*
1462          * If pv_head mapping was removed, fix it up.
1463          */
1464         if (pv_h->pmap == PMAP_NULL) {
1465                 pvh_e = (pv_hashed_entry_t) queue_next(&pv_h->qlink);
1466
1467                 if (pvh_e != (pv_hashed_entry_t) pv_h) {
1468                         pv_hash_remove(pvh_e);
1469                         pv_h->pmap = pvh_e->pmap;
1470                         pv_h->va = pvh_e->va;
1471                         pvh_e->qlink.next = (queue_entry_t) pvh_eh;
1472                         pvh_eh = pvh_e;
1473
1474                         if (pvh_et == PV_HASHED_ENTRY_NULL)
1475                                 pvh_et = pvh_e;
1476                         pvh_cnt++;
1477                 }
1478         }
1479         if (pvh_eh != PV_HASHED_ENTRY_NULL) {
1480                 PV_HASHED_FREE_LIST(pvh_eh, pvh_et, pvh_cnt);
1481         }
1482 done:
1483         UNLOCK_PVH(pai);
1484
1485         PMAP_TRACE(PMAP_CODE(PMAP__PAGE_PROTECT) | DBG_FUNC_END,
1486                    0, 0, 0, 0, 0);
1487 }
1488
1489
1490 /*
1491  *      Clear specified attribute bits.
1492  */
1493 void
1494 phys_attribute_clear(
1495         ppnum_t         pn,
1496         int             bits,
1497         unsigned int    options,
1498         void            *arg)
1499 {
1500         pv_rooted_entry_t       pv_h;
1501         pv_hashed_entry_t       pv_e;
1502         pt_entry_t              *pte;
1503         int                     pai;
1504         pmap_t                  pmap;
1505         char                    attributes = 0;
1506         boolean_t               is_internal, is_reusable;
1507
1508         if ((bits & PHYS_MODIFIED) &&
1509             (options & PMAP_OPTIONS_NOFLUSH) &&
1510             arg == NULL) {
1511                 panic("phys_attribute_clear(0x%x,0x%x,0x%x,%p): "
1512                       "should not clear 'modified' without flushing TLBs\n",
1513                       pn, bits, options, arg);
1514         }
1515
1516         pmap_intr_assert();
1517         assert(pn != vm_page_fictitious_addr);
1518         if (pn == vm_page_guard_addr)
1519                 return;
1520
1521         pai = ppn_to_pai(pn);
1522
1523         if (!IS_MANAGED_PAGE(pai)) {
1524                 /*
1525                  *      Not a managed page.
1526                  */
1527                 return;
1528         }
1529
1530         PMAP_TRACE(PMAP_CODE(PMAP__ATTRIBUTE_CLEAR) | DBG_FUNC_START,
1531                    pn, bits, 0, 0, 0);
1532
1533         pv_h = pai_to_pvh(pai);
1534
1535         LOCK_PVH(pai);
1536
1537         /*
1538          * Walk down PV list, clearing all modify or reference bits.
1539          * We do not have to lock the pv_list because we have
1540          * the per-pmap lock
1541          */
1542         if (pv_h->pmap != PMAP_NULL) {
1543                 /*
1544                  * There are some mappings.
1545                  */
1546
1547                 is_internal = IS_INTERNAL_PAGE(pai);
1548                 is_reusable = IS_REUSABLE_PAGE(pai);
1549
1550                 pv_e = (pv_hashed_entry_t)pv_h;
1551
1552                 do {
1553                         vm_map_offset_t va;
1554                         char pte_bits;
1555
1556                         pmap = pv_e->pmap;
1557                         va = pv_e->va;
1558                         pte_bits = 0;
1559
1560                         if (bits) {
1561                                 pte = pmap_pte(pmap, va);
1562                                 /* grab ref/mod bits from this PTE */
1563                                 pte_bits = (*pte & (PHYS_MODIFIED |
1564                                                     PHYS_REFERENCED));
1565                                 /* propagate to page's global attributes */
1566                                 attributes |= pte_bits;
1567                                 /* which bits to clear for this PTE? */
1568                                 pte_bits &= bits;
1569                         }
1570
1571                          /*
1572                           * Clear modify and/or reference bits.
1573                           */
1574                         if (pte_bits) {
1575                                 pmap_update_pte(pte, bits, 0);
1576
1577                                 /* Ensure all processors using this translation
1578                                  * invalidate this TLB entry. The invalidation
1579                                  * *must* follow the PTE update, to ensure that
1580                                  * the TLB shadow of the 'D' bit (in particular)
1581                                  * is synchronized with the updated PTE.
1582                                  */
1583                                 if (! (options & PMAP_OPTIONS_NOFLUSH)) {
1584                                         /* flush TLBS now */
1585                                         PMAP_UPDATE_TLBS(pmap,
1586                                                          va,
1587                                                          va + PAGE_SIZE);
1588                                 } else if (arg) {
1589                                         /* delayed TLB flush: add "pmap" info */
1590                                         PMAP_UPDATE_TLBS_DELAYED(
1591                                                 pmap,
1592                                                 va,
1593                                                 va + PAGE_SIZE,
1594                                                 (pmap_flush_context *)arg);
1595                                 } else {
1596                                         /* no TLB flushing at all */
1597                                 }
1598                         }
1599
1600                         /* update pmap "reusable" stats */
1601                         if ((options & PMAP_OPTIONS_CLEAR_REUSABLE) &&
1602                             is_reusable &&
1603                             pmap != kernel_pmap) {
1604                                 /* one less "reusable" */
1605                                 assert(pmap->stats.reusable > 0);
1606                                 OSAddAtomic(-1, &pmap->stats.reusable);
1607                                 if (is_internal) {
1608                                         /* one more "internal" */
1609                                         OSAddAtomic(+1, &pmap->stats.internal);
1610                                         PMAP_STATS_PEAK(pmap->stats.internal);
1611                                 } else {
1612                                         /* one more "external" */
1613                                         OSAddAtomic(+1, &pmap->stats.external);
1614                                         PMAP_STATS_PEAK(pmap->stats.external);
1615                                 }
1616                         } else if ((options & PMAP_OPTIONS_SET_REUSABLE) &&
1617                                    !is_reusable &&
1618                                    pmap != kernel_pmap) {
1619                                 /* one more "reusable" */
1620                                 OSAddAtomic(+1, &pmap->stats.reusable);
1621                                 PMAP_STATS_PEAK(pmap->stats.reusable);
1622                                 if (is_internal) {
1623                                         /* one less "internal" */
1624                                         assert(pmap->stats.internal > 0);
1625                                         OSAddAtomic(-1, &pmap->stats.internal);
1626                                 } else {
1627                                         /* one less "external" */
1628                                         assert(pmap->stats.external > 0);
1629                                         OSAddAtomic(-1, &pmap->stats.external);
1630                                 }
1631                         }
1632
1633                         pv_e = (pv_hashed_entry_t)queue_next(&pv_e->qlink);
1634
1635                 } while (pv_e != (pv_hashed_entry_t)pv_h);
1636         }
1637         /* Opportunistic refmod collection, annulled
1638          * if both REF and MOD are being cleared.
1639          */
1640
1641         pmap_phys_attributes[pai] |= attributes;
1642         pmap_phys_attributes[pai] &= (~bits);
1643
1644         /* update this page's "reusable" status */
1645         if (options & PMAP_OPTIONS_CLEAR_REUSABLE) {
1646                 pmap_phys_attributes[pai] &= ~PHYS_REUSABLE;
1647         } else if (options & PMAP_OPTIONS_SET_REUSABLE) {
1648                 pmap_phys_attributes[pai] |= PHYS_REUSABLE;
1649         }
1650
1651         UNLOCK_PVH(pai);
1652
1653         PMAP_TRACE(PMAP_CODE(PMAP__ATTRIBUTE_CLEAR) | DBG_FUNC_END,
1654                    0, 0, 0, 0, 0);
1655 }
1656
1657 /*
1658  *      Check specified attribute bits.
1659  */
1660 int
1661 phys_attribute_test(
1662         ppnum_t         pn,
1663         int             bits)
1664 {
1665         pv_rooted_entry_t       pv_h;
1666         pv_hashed_entry_t       pv_e;
1667         pt_entry_t              *pte;
1668         int                     pai;
1669         pmap_t                  pmap;
1670         int                     attributes = 0;
1671
1672         pmap_intr_assert();
1673         assert(pn != vm_page_fictitious_addr);
1674         if (pn == vm_page_guard_addr)
1675                 return 0;
1676
1677         pai = ppn_to_pai(pn);
1678
1679         if (!IS_MANAGED_PAGE(pai)) {
1680                 /*
1681                  *      Not a managed page.
1682                  */
1683                 return 0;
1684         }
1685
1686         /*
1687          * Fast check...  if bits already collected
1688          * no need to take any locks...
1689          * if not set, we need to recheck after taking
1690          * the lock in case they got pulled in while
1691          * we were waiting for the lock
1692          */
1693         if ((pmap_phys_attributes[pai] & bits) == bits)
1694                 return bits;
1695
1696         pv_h = pai_to_pvh(pai);
1697
1698         LOCK_PVH(pai);
1699
1700         attributes = pmap_phys_attributes[pai] & bits;
1701
1702
1703         /*
1704          * Walk down PV list, checking the mappings until we
1705          * reach the end or we've found the desired attributes.
1706          */
1707         if (attributes != bits &&
1708             pv_h->pmap != PMAP_NULL) {
1709                 /*
1710                  * There are some mappings.
1711                  */
1712                 pv_e = (pv_hashed_entry_t)pv_h;
1713                 do {
1714                         vm_map_offset_t va;
1715
1716                         pmap = pv_e->pmap;
1717                         va = pv_e->va;
1718                         /*
1719                          * pick up modify and/or reference bits from mapping
1720                          */
1721
1722                         pte = pmap_pte(pmap, va);
1723                         attributes |= (int)(*pte & bits);
1724
1725                         pv_e = (pv_hashed_entry_t)queue_next(&pv_e->qlink);
1726
1727                 } while ((attributes != bits) &&
1728                          (pv_e != (pv_hashed_entry_t)pv_h));
1729         }
1730         pmap_phys_attributes[pai] |= attributes;
1731
1732         UNLOCK_PVH(pai);
1733         return (attributes);
1734 }
1735
1736 /*
1737  *      Routine:        pmap_change_wiring
1738  *      Function:       Change the wiring attribute for a map/virtual-address
1739  *                      pair.
1740  *      In/out conditions:
1741  *                      The mapping must already exist in the pmap.
1742  */
1743 void
1744 pmap_change_wiring(
1745         pmap_t          map,
1746         vm_map_offset_t vaddr,
1747         boolean_t       wired)
1748 {
1749         pt_entry_t      *pte;
1750
1751         PMAP_LOCK(map);
1752
1753         if ((pte = pmap_pte(map, vaddr)) == PT_ENTRY_NULL)
1754                 panic("pmap_change_wiring: pte missing");
1755
1756         if (wired && !iswired(*pte)) {
1757                 /*
1758                  * wiring down mapping
1759                  */
1760                 pmap_ledger_credit(map, task_ledgers.wired_mem, PAGE_SIZE);
1761                 OSAddAtomic(+1,  &map->stats.wired_count);
1762                 pmap_update_pte(pte, 0, INTEL_PTE_WIRED);
1763         }
1764         else if (!wired && iswired(*pte)) {
1765                 /*
1766                  * unwiring mapping
1767                  */
1768                 assert(map->stats.wired_count >= 1);
1769                 OSAddAtomic(-1,  &map->stats.wired_count);
1770                 pmap_ledger_debit(map, task_ledgers.wired_mem, PAGE_SIZE);
1771                 pmap_update_pte(pte, INTEL_PTE_WIRED, 0);
1772         }
1773
1774         PMAP_UNLOCK(map);
1775 }
1776
1777 /*
1778  *      "Backdoor" direct map routine for early mappings.
1779  *      Useful for mapping memory outside the range
1780  *      Sets A, D and NC if requested
1781  */
1782
1783 vm_offset_t
1784 pmap_map_bd(
1785         vm_offset_t     virt,
1786         vm_map_offset_t start_addr,
1787         vm_map_offset_t end_addr,
1788         vm_prot_t       prot,
1789         unsigned int    flags)
1790 {
1791         pt_entry_t      template;
1792         pt_entry_t      *pte;
1793         spl_t           spl;
1794         vm_offset_t     base = virt;
1795         template = pa_to_pte(start_addr)
1796                 | INTEL_PTE_REF
1797                 | INTEL_PTE_MOD
1798                 | INTEL_PTE_WIRED
1799                 | INTEL_PTE_VALID;
1800
1801         if ((flags & (VM_MEM_NOT_CACHEABLE | VM_WIMG_USE_DEFAULT)) == VM_MEM_NOT_CACHEABLE) {
1802                 template |= INTEL_PTE_NCACHE;
1803                 if (!(flags & (VM_MEM_GUARDED)))
1804                         template |= INTEL_PTE_PTA;
1805         }
1806
1807 #if    defined(__x86_64__)
1808         if ((prot & VM_PROT_EXECUTE) == 0)
1809                 template |= INTEL_PTE_NX;
1810 #endif
1811
1812         if (prot & VM_PROT_WRITE)
1813                 template |= INTEL_PTE_WRITE;
1814
1815         while (start_addr < end_addr) {
1816                 spl = splhigh();
1817                 pte = pmap_pte(kernel_pmap, (vm_map_offset_t)virt);
1818                 if (pte == PT_ENTRY_NULL) {
1819                         panic("pmap_map_bd: Invalid kernel address\n");
1820                 }
1821                 pmap_store_pte(pte, template);
1822                 splx(spl);
1823                 pte_increment_pa(template);
1824                 virt += PAGE_SIZE;
1825                 start_addr += PAGE_SIZE;
1826         }
1827         flush_tlb_raw();
1828         PMAP_UPDATE_TLBS(kernel_pmap, base, base + end_addr - start_addr);
1829         return(virt);
1830 }
1831
1832 unsigned int
1833 pmap_query_resident(
1834         pmap_t          pmap,
1835         addr64_t        s64,
1836         addr64_t        e64)
1837 {
1838         pt_entry_t     *pde;
1839         pt_entry_t     *spte, *epte;
1840         addr64_t        l64;
1841         uint64_t        deadline;
1842         unsigned int    result;
1843
1844         pmap_intr_assert();
1845
1846         if (pmap == PMAP_NULL || pmap == kernel_pmap || s64 == e64)
1847                 return 0;
1848
1849         PMAP_TRACE(PMAP_CODE(PMAP__QUERY_RESIDENT) | DBG_FUNC_START,
1850                    pmap,
1851                    (uint32_t) (s64 >> 32), s64,
1852                    (uint32_t) (e64 >> 32), e64);
1853
1854         result = 0;
1855
1856         PMAP_LOCK(pmap);
1857
1858         deadline = rdtsc64() + max_preemption_latency_tsc;
1859
1860         while (s64 < e64) {
1861                 l64 = (s64 + pde_mapped_size) & ~(pde_mapped_size - 1);
1862                 if (l64 > e64)
1863                         l64 = e64;
1864                 pde = pmap_pde(pmap, s64);
1865
1866                 if (pde && (*pde & INTEL_PTE_VALID)) {
1867                         if (*pde & INTEL_PTE_PS) {
1868                                 /* superpage: not supported */
1869                         } else {
1870                                 spte = pmap_pte(pmap,
1871                                                 (s64 & ~(pde_mapped_size - 1)));
1872                                 spte = &spte[ptenum(s64)];
1873                                 epte = &spte[intel_btop(l64 - s64)];
1874
1875                                 for (; spte < epte; spte++) {
1876                                         if (pte_to_pa(*spte) != 0) {
1877                                                 result++;
1878                                         }
1879                                 }
1880
1881                         }
1882                 }
1883                 s64 = l64;
1884
1885                 if (s64 < e64 && rdtsc64() >= deadline) {
1886                         PMAP_UNLOCK(pmap);
1887                         PMAP_LOCK(pmap);
1888                         deadline = rdtsc64() + max_preemption_latency_tsc;
1889                 }
1890         }
1891
1892         PMAP_UNLOCK(pmap);
1893
1894         PMAP_TRACE(PMAP_CODE(PMAP__QUERY_RESIDENT) | DBG_FUNC_END,
1895                    pmap, 0, 0, 0, 0);
1896
1897         return result;
1898 }
1899
1900 #if MACH_ASSERT
1901 void
1902 pmap_set_process(
1903         __unused pmap_t pmap,
1904         __unused int pid,
1905         __unused char *procname)
1906 {
1907 }
1908 #endif /* MACH_ASSERT */