osfmk/i386/pmap_x86_common.c

   1 /*
   2  * Copyright (c) 2000-2012 Apple Inc. All rights reserved.
   3  *
   4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
   5  *
   6  * This file contains Original Code and/or Modifications of Original Code
   7  * as defined in and that are subject to the Apple Public Source License
   8  * Version 2.0 (the 'License'). You may not use this file except in
   9  * compliance with the License. The rights granted to you under the License
  10  * may not be used to create, or enable the creation or redistribution of,
  11  * unlawful or unlicensed copies of an Apple operating system, or to
  12  * circumvent, violate, or enable the circumvention or violation of, any
  13  * terms of an Apple operating system software license agreement.
  14  *
  15  * Please obtain a copy of the License at
  16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
  17  *
  18  * The Original Code and all software distributed under the License are
  19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
  20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
  21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
  22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
  23  * Please see the License for the specific language governing rights and
  24  * limitations under the License.
  25  *
  26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
  27  */
  28
  29 #include <mach_assert.h>
  30
  31 #include <vm/pmap.h>
  32 #include <vm/vm_map.h>
  33 #include <kern/ledger.h>
  34 #include <i386/pmap_internal.h>
  35
  36 void            pmap_remove_range(
  37                         pmap_t          pmap,
  38                         vm_map_offset_t va,
  39                         pt_entry_t      *spte,
  40                         pt_entry_t      *epte);
  41
  42 void            pmap_remove_range_options(
  43                         pmap_t          pmap,
  44                         vm_map_offset_t va,
  45                         pt_entry_t      *spte,
  46                         pt_entry_t      *epte,
  47                         int             options);
  48
  49 void            pmap_reusable_range(
  50                         pmap_t          pmap,
  51                         vm_map_offset_t va,
  52                         pt_entry_t      *spte,
  53                         pt_entry_t      *epte,
  54                         boolean_t       reusable);
  55
  56 uint32_t pmap_update_clear_pte_count;
  57
  58 /*
  59  * The Intel platform can nest at the PDE level, so NBPDE (i.e. 2MB) at a time,
  60  * on a NBPDE boundary.
  61  */
  62
  63 /* These symbols may be referenced directly by VM */
  64 uint64_t pmap_nesting_size_min = NBPDE;
  65 uint64_t pmap_nesting_size_max = 0 - (uint64_t)NBPDE;
  66
  67 /*
  68  *      kern_return_t pmap_nest(grand, subord, va_start, size)
  69  *
  70  *      grand  = the pmap that we will nest subord into
  71  *      subord = the pmap that goes into the grand
  72  *      va_start  = start of range in pmap to be inserted
  73  *      nstart  = start of range in pmap nested pmap
  74  *      size   = Size of nest area (up to 16TB)
  75  *
  76  *      Inserts a pmap into another.  This is used to implement shared segments.
  77  *
  78  *      Note that we depend upon higher level VM locks to insure that things don't change while
  79  *      we are doing this.  For example, VM should not be doing any pmap enters while it is nesting
  80  *      or do 2 nests at once.
  81  */
  82
  83 /*
  84  * This routine can nest subtrees either at the PDPT level (1GiB) or at the
  85  * PDE level (2MiB). We currently disallow disparate offsets for the "subord"
  86  * container and the "grand" parent. A minor optimization to consider for the
  87  * future: make the "subord" truly a container rather than a full-fledged
  88  * pagetable hierarchy which can be unnecessarily sparse (DRK).
  89  */
  90
  91 kern_return_t pmap_nest(pmap_t grand, pmap_t subord, addr64_t va_start, addr64_t nstart, uint64_t size) {
  92         vm_map_offset_t vaddr, nvaddr;
  93         pd_entry_t      *pde,*npde;
  94         unsigned int    i;
  95         uint64_t        num_pde;
  96
  97         assert(!is_ept_pmap(grand));
  98         assert(!is_ept_pmap(subord));
  99
 100         if ((size & (pmap_nesting_size_min-1)) ||
 101             (va_start & (pmap_nesting_size_min-1)) ||
 102             (nstart & (pmap_nesting_size_min-1)) ||
 103             ((size >> 28) > 65536))     /* Max size we can nest is 16TB */
 104                 return KERN_INVALID_VALUE;
 105
 106         if(size == 0) {
 107                 panic("pmap_nest: size is invalid - %016llX\n", size);
 108         }
 109
 110         if (va_start != nstart)
 111                 panic("pmap_nest: va_start(0x%llx) != nstart(0x%llx)\n", va_start, nstart);
 112
 113         PMAP_TRACE(PMAP_CODE(PMAP__NEST) | DBG_FUNC_START,
 114         (uintptr_t) grand, (uintptr_t) subord,
 115             (uintptr_t) (va_start>>32), (uintptr_t) va_start, 0);
 116
 117         nvaddr = (vm_map_offset_t)nstart;
 118         num_pde = size >> PDESHIFT;
 119
 120         PMAP_LOCK(subord);
 121
 122         subord->pm_shared = TRUE;
 123
 124         for (i = 0; i < num_pde;) {
 125                 if (((nvaddr & PDPTMASK) == 0) && (num_pde - i) >= NPDEPG && cpu_64bit) {
 126
 127                         npde = pmap64_pdpt(subord, nvaddr);
 128
 129                         while (0 == npde || ((*npde & INTEL_PTE_VALID) == 0)) {
 130                                 PMAP_UNLOCK(subord);
 131                                 pmap_expand_pdpt(subord, nvaddr, PMAP_EXPAND_OPTIONS_NONE);
 132                                 PMAP_LOCK(subord);
 133                                 npde = pmap64_pdpt(subord, nvaddr);
 134                         }
 135                         *npde |= INTEL_PDPTE_NESTED;
 136                         nvaddr += NBPDPT;
 137                         i += (uint32_t)NPDEPG;
 138                 }
 139                 else {
 140                         npde = pmap_pde(subord, nvaddr);
 141
 142                         while (0 == npde || ((*npde & INTEL_PTE_VALID) == 0)) {
 143                                 PMAP_UNLOCK(subord);
 144                                 pmap_expand(subord, nvaddr, PMAP_EXPAND_OPTIONS_NONE);
 145                                 PMAP_LOCK(subord);
 146                                 npde = pmap_pde(subord, nvaddr);
 147                         }
 148                         nvaddr += NBPDE;
 149                         i++;
 150                 }
 151         }
 152
 153         PMAP_UNLOCK(subord);
 154
 155         vaddr = (vm_map_offset_t)va_start;
 156
 157         PMAP_LOCK(grand);
 158
 159         for (i = 0;i < num_pde;) {
 160                 pd_entry_t tpde;
 161
 162                 if (((vaddr & PDPTMASK) == 0) && ((num_pde - i) >= NPDEPG) && cpu_64bit) {
 163                         npde = pmap64_pdpt(subord, vaddr);
 164                         if (npde == 0)
 165                                 panic("pmap_nest: no PDPT, subord %p nstart 0x%llx", subord, vaddr);
 166                         tpde = *npde;
 167                         pde = pmap64_pdpt(grand, vaddr);
 168                         if (0 == pde) {
 169                                 PMAP_UNLOCK(grand);
 170                                 pmap_expand_pml4(grand, vaddr, PMAP_EXPAND_OPTIONS_NONE);
 171                                 PMAP_LOCK(grand);
 172                                 pde = pmap64_pdpt(grand, vaddr);
 173                         }
 174                         if (pde == 0)
 175                                 panic("pmap_nest: no PDPT, grand  %p vaddr 0x%llx", grand, vaddr);
 176                         pmap_store_pte(pde, tpde);
 177                         vaddr += NBPDPT;
 178                         i += (uint32_t) NPDEPG;
 179                 }
 180                 else {
 181                         npde = pmap_pde(subord, nstart);
 182                         if (npde == 0)
 183                                 panic("pmap_nest: no npde, subord %p nstart 0x%llx", subord, nstart);
 184                         tpde = *npde;
 185                         nstart += NBPDE;
 186                         pde = pmap_pde(grand, vaddr);
 187                         if ((0 == pde) && cpu_64bit) {
 188                                 PMAP_UNLOCK(grand);
 189                                 pmap_expand_pdpt(grand, vaddr, PMAP_EXPAND_OPTIONS_NONE);
 190                                 PMAP_LOCK(grand);
 191                                 pde = pmap_pde(grand, vaddr);
 192                         }
 193
 194                         if (pde == 0)
 195                                 panic("pmap_nest: no pde, grand  %p vaddr 0x%llx", grand, vaddr);
 196                         vaddr += NBPDE;
 197                         pmap_store_pte(pde, tpde);
 198                         i++;
 199                 }
 200         }
 201
 202         PMAP_UNLOCK(grand);
 203
 204         PMAP_TRACE(PMAP_CODE(PMAP__NEST) | DBG_FUNC_END, 0, 0, 0, 0, 0);
 205
 206         return KERN_SUCCESS;
 207 }
 208
 209 /*
 210  *      kern_return_t pmap_unnest(grand, vaddr)
 211  *
 212  *      grand  = the pmap that we will un-nest subord from
 213  *      vaddr  = start of range in pmap to be unnested
 214  *
 215  *      Removes a pmap from another.  This is used to implement shared segments.
 216  */
 217
 218 kern_return_t pmap_unnest(pmap_t grand, addr64_t vaddr, uint64_t size) {
 219
 220         pd_entry_t *pde;
 221         unsigned int i;
 222         uint64_t num_pde;
 223         addr64_t va_start, va_end;
 224         uint64_t npdpt = PMAP_INVALID_PDPTNUM;
 225
 226         PMAP_TRACE(PMAP_CODE(PMAP__UNNEST) | DBG_FUNC_START,
 227             (uintptr_t) grand,
 228             (uintptr_t) (vaddr>>32), (uintptr_t) vaddr, 0, 0);
 229
 230         if ((size & (pmap_nesting_size_min-1)) ||
 231             (vaddr & (pmap_nesting_size_min-1))) {
 232                 panic("pmap_unnest(%p,0x%llx,0x%llx): unaligned...\n",
 233                     grand, vaddr, size);
 234         }
 235
 236         assert(!is_ept_pmap(grand));
 237
 238         /* align everything to PDE boundaries */
 239         va_start = vaddr & ~(NBPDE-1);
 240         va_end = (vaddr + size + NBPDE - 1) & ~(NBPDE-1);
 241         size = va_end - va_start;
 242
 243         PMAP_LOCK(grand);
 244
 245         num_pde = size >> PDESHIFT;
 246         vaddr = va_start;
 247
 248         for (i = 0; i < num_pde; ) {
 249                 if ((pdptnum(grand, vaddr) != npdpt) && cpu_64bit) {
 250                         npdpt = pdptnum(grand, vaddr);
 251                         pde = pmap64_pdpt(grand, vaddr);
 252                         if (pde && (*pde & INTEL_PDPTE_NESTED)) {
 253                                 pmap_store_pte(pde, (pd_entry_t)0);
 254                                 i += (uint32_t) NPDEPG;
 255                                 vaddr += NBPDPT;
 256                                 continue;
 257                         }
 258                 }
 259                 pde = pmap_pde(grand, (vm_map_offset_t)vaddr);
 260                 if (pde == 0)
 261                         panic("pmap_unnest: no pde, grand %p vaddr 0x%llx\n", grand, vaddr);
 262                 pmap_store_pte(pde, (pd_entry_t)0);
 263                 i++;
 264                 vaddr += NBPDE;
 265         }
 266
 267         PMAP_UPDATE_TLBS(grand, va_start, va_end);
 268
 269         PMAP_UNLOCK(grand);
 270
 271         PMAP_TRACE(PMAP_CODE(PMAP__UNNEST) | DBG_FUNC_END, 0, 0, 0, 0, 0);
 272
 273         return KERN_SUCCESS;
 274 }
 275
 276 kern_return_t
 277 pmap_unnest_options(
 278         pmap_t grand,
 279         addr64_t vaddr,
 280         __unused uint64_t size,
 281         __unused unsigned int options) {
 282         return pmap_unnest(grand, vaddr, size);
 283 }
 284
 285 /* Invoked by the Mach VM to determine the platform specific unnest region */
 286
 287 boolean_t pmap_adjust_unnest_parameters(pmap_t p, vm_map_offset_t *s, vm_map_offset_t *e) {
 288         pd_entry_t *pdpte;
 289         boolean_t rval = FALSE;
 290
 291         if (!cpu_64bit)
 292                 return rval;
 293
 294         PMAP_LOCK(p);
 295
 296         pdpte = pmap64_pdpt(p, *s);
 297         if (pdpte && (*pdpte & INTEL_PDPTE_NESTED)) {
 298                 *s &= ~(NBPDPT -1);
 299                 rval = TRUE;
 300         }
 301
 302         pdpte = pmap64_pdpt(p, *e);
 303         if (pdpte && (*pdpte & INTEL_PDPTE_NESTED)) {
 304                 *e = ((*e + NBPDPT) & ~(NBPDPT -1));
 305                 rval = TRUE;
 306         }
 307
 308         PMAP_UNLOCK(p);
 309
 310         return rval;
 311 }
 312
 313 /*
 314  * pmap_find_phys returns the (4K) physical page number containing a
 315  * given virtual address in a given pmap.
 316  * Note that pmap_pte may return a pde if this virtual address is
 317  * mapped by a large page and this is taken into account in order
 318  * to return the correct page number in this case.
 319  */
 320 ppnum_t
 321 pmap_find_phys(pmap_t pmap, addr64_t va)
 322 {
 323         pt_entry_t      *ptp;
 324         pd_entry_t      *pdep;
 325         ppnum_t         ppn = 0;
 326         pd_entry_t      pde;
 327         pt_entry_t      pte;
 328         boolean_t       is_ept;
 329
 330         is_ept = is_ept_pmap(pmap);
 331
 332         mp_disable_preemption();
 333
 334         /* This refcount test is a band-aid--several infrastructural changes
 335          * are necessary to eliminate invocation of this routine from arbitrary
 336          * contexts.
 337          */
 338
 339         if (!pmap->ref_count)
 340                 goto pfp_exit;
 341
 342         pdep = pmap_pde(pmap, va);
 343
 344         if ((pdep != PD_ENTRY_NULL) && ((pde = *pdep) & PTE_VALID_MASK(is_ept))) {
 345                 if (pde & PTE_PS) {
 346                         ppn = (ppnum_t) i386_btop(pte_to_pa(pde));
 347                         ppn += (ppnum_t) ptenum(va);
 348                 }
 349                 else {
 350                         ptp = pmap_pte(pmap, va);
 351                         if ((PT_ENTRY_NULL != ptp) && (((pte = *ptp) & PTE_VALID_MASK(is_ept)) != 0)) {
 352                                 ppn = (ppnum_t) i386_btop(pte_to_pa(pte));
 353                         }
 354                 }
 355         }
 356 pfp_exit:
 357         mp_enable_preemption();
 358
 359         return ppn;
 360 }
 361
 362 /*
 363  * Update cache attributes for all extant managed mappings.
 364  * Assumes PV for this page is locked, and that the page
 365  * is managed. We assume that this physical page may be mapped in
 366  * both EPT and normal Intel PTEs, so we convert the attributes
 367  * to the corresponding format for each pmap.
 368  *
 369  * We assert that the passed set of attributes is a subset of the
 370  * PHYS_CACHEABILITY_MASK.
 371  */
 372 void
 373 pmap_update_cache_attributes_locked(ppnum_t pn, unsigned attributes) {
 374         pv_rooted_entry_t       pv_h, pv_e;
 375         pv_hashed_entry_t       pvh_e, nexth;
 376         vm_map_offset_t vaddr;
 377         pmap_t  pmap;
 378         pt_entry_t      *ptep;
 379         boolean_t       is_ept;
 380         unsigned        ept_attributes;
 381
 382         assert(IS_MANAGED_PAGE(pn));
 383         assert(((~PHYS_CACHEABILITY_MASK) & attributes) == 0);
 384
 385         /* We don't support the PTA bit for EPT PTEs */
 386         if (attributes & INTEL_PTE_NCACHE)
 387                 ept_attributes = INTEL_EPT_NCACHE;
 388         else
 389                 ept_attributes = INTEL_EPT_WB;
 390
 391         pv_h = pai_to_pvh(pn);
 392         /* TODO: translate the PHYS_* bits to PTE bits, while they're
 393          * currently identical, they may not remain so
 394          * Potential optimization (here and in page_protect),
 395          * parallel shootdowns, check for redundant
 396          * attribute modifications.
 397          */
 398
 399         /*
 400          * Alter attributes on all mappings
 401          */
 402         if (pv_h->pmap != PMAP_NULL) {
 403                 pv_e = pv_h;
 404                 pvh_e = (pv_hashed_entry_t)pv_e;
 405
 406                 do {
 407                         pmap = pv_e->pmap;
 408                         vaddr = pv_e->va;
 409                         ptep = pmap_pte(pmap, vaddr);
 410
 411                         if (0 == ptep)
 412                                 panic("pmap_update_cache_attributes_locked: Missing PTE, pmap: %p, pn: 0x%x vaddr: 0x%llx kernel_pmap: %p", pmap, pn, vaddr, kernel_pmap);
 413
 414                         is_ept = is_ept_pmap(pmap);
 415
 416                         nexth = (pv_hashed_entry_t)queue_next(&pvh_e->qlink);
 417                         if (!is_ept) {
 418                                 pmap_update_pte(ptep, PHYS_CACHEABILITY_MASK, attributes);
 419                         } else {
 420                                 pmap_update_pte(ptep, INTEL_EPT_CACHE_MASK, ept_attributes);
 421                         }
 422                         PMAP_UPDATE_TLBS(pmap, vaddr, vaddr + PAGE_SIZE);
 423                         pvh_e = nexth;
 424                 } while ((pv_e = (pv_rooted_entry_t)nexth) != pv_h);
 425         }
 426 }
 427
 428 void x86_filter_TLB_coherency_interrupts(boolean_t dofilter) {
 429         assert(ml_get_interrupts_enabled() == 0 || get_preemption_level() != 0);
 430
 431         if (dofilter) {
 432                 CPU_CR3_MARK_INACTIVE();
 433         } else {
 434                 CPU_CR3_MARK_ACTIVE();
 435                 mfence();
 436                 if (current_cpu_datap()->cpu_tlb_invalid)
 437                         process_pmap_updates();
 438         }
 439 }
 440
 441
 442 /*
 443  *      Insert the given physical page (p) at
 444  *      the specified virtual address (v) in the
 445  *      target physical map with the protection requested.
 446  *
 447  *      If specified, the page will be wired down, meaning
 448  *      that the related pte cannot be reclaimed.
 449  *
 450  *      NB:  This is the only routine which MAY NOT lazy-evaluate
 451  *      or lose information.  That is, this routine must actually
 452  *      insert this page into the given map NOW.
 453  */
 454
 455 void
 456 pmap_enter(
 457         register pmap_t         pmap,
 458         vm_map_offset_t         vaddr,
 459         ppnum_t                 pn,
 460         vm_prot_t               prot,
 461         vm_prot_t               fault_type,
 462         unsigned int            flags,
 463         boolean_t               wired)
 464 {
 465         (void) pmap_enter_options(pmap, vaddr, pn, prot, fault_type, flags, wired, PMAP_EXPAND_OPTIONS_NONE, NULL);
 466 }
 467
 468
 469 kern_return_t
 470 pmap_enter_options(
 471         register pmap_t         pmap,
 472         vm_map_offset_t         vaddr,
 473         ppnum_t                 pn,
 474         vm_prot_t               prot,
 475         __unused vm_prot_t      fault_type,
 476         unsigned int            flags,
 477         boolean_t               wired,
 478         unsigned int            options,
 479         void                    *arg)
 480 {
 481         pt_entry_t              *pte;
 482         pv_rooted_entry_t       pv_h;
 483         ppnum_t                 pai;
 484         pv_hashed_entry_t       pvh_e;
 485         pv_hashed_entry_t       pvh_new;
 486         pt_entry_t              template;
 487         pmap_paddr_t            old_pa;
 488         pmap_paddr_t            pa = (pmap_paddr_t) i386_ptob(pn);
 489         boolean_t               need_tlbflush = FALSE;
 490         boolean_t               set_NX;
 491         char                    oattr;
 492         boolean_t               old_pa_locked;
 493         /* 2MiB mappings are confined to x86_64 by VM */
 494         boolean_t               superpage = flags & VM_MEM_SUPERPAGE;
 495         vm_object_t             delpage_pm_obj = NULL;
 496         uint64_t                delpage_pde_index = 0;
 497         pt_entry_t              old_pte;
 498         kern_return_t           kr_expand;
 499         boolean_t               is_ept;
 500
 501         pmap_intr_assert();
 502
 503         if (pmap == PMAP_NULL)
 504                 return KERN_INVALID_ARGUMENT;
 505
 506         is_ept = is_ept_pmap(pmap);
 507
 508         /* N.B. We can be supplied a zero page frame in the NOENTER case, it's an
 509          * unused value for that scenario.
 510          */
 511         assert(pn != vm_page_fictitious_addr);
 512
 513         if (pn == vm_page_guard_addr)
 514                 return KERN_INVALID_ARGUMENT;
 515
 516         PMAP_TRACE(PMAP_CODE(PMAP__ENTER) | DBG_FUNC_START,
 517             pmap,
 518             (uint32_t) (vaddr >> 32), (uint32_t) vaddr,
 519             pn, prot);
 520
 521         if ((prot & VM_PROT_EXECUTE) || !nx_enabled || !pmap->nx_enabled)
 522                 set_NX = FALSE;
 523         else
 524                 set_NX = TRUE;
 525
 526         if (__improbable(set_NX && (pmap == kernel_pmap) && ((pmap_disable_kstack_nx && (flags & VM_MEM_STACK)) || (pmap_disable_kheap_nx && !(flags & VM_MEM_STACK))))) {
 527                 set_NX = FALSE;
 528         }
 529
 530         /*
 531          *      Must allocate a new pvlist entry while we're unlocked;
 532          *      zalloc may cause pageout (which will lock the pmap system).
 533          *      If we determine we need a pvlist entry, we will unlock
 534          *      and allocate one.  Then we will retry, throughing away
 535          *      the allocated entry later (if we no longer need it).
 536          */
 537
 538         pvh_new = PV_HASHED_ENTRY_NULL;
 539 Retry:
 540         pvh_e = PV_HASHED_ENTRY_NULL;
 541
 542         PMAP_LOCK(pmap);
 543
 544         /*
 545          *      Expand pmap to include this pte.  Assume that
 546          *      pmap is always expanded to include enough hardware
 547          *      pages to map one VM page.
 548          */
 549          if(superpage) {
 550                 while ((pte = pmap64_pde(pmap, vaddr)) == PD_ENTRY_NULL) {
 551                         /* need room for another pde entry */
 552                         PMAP_UNLOCK(pmap);
 553                         kr_expand = pmap_expand_pdpt(pmap, vaddr, options);
 554                         if (kr_expand != KERN_SUCCESS)
 555                                 return kr_expand;
 556                         PMAP_LOCK(pmap);
 557                 }
 558         } else {
 559                 while ((pte = pmap_pte(pmap, vaddr)) == PT_ENTRY_NULL) {
 560                         /*
 561                          * Must unlock to expand the pmap
 562                          * going to grow pde level page(s)
 563                          */
 564                         PMAP_UNLOCK(pmap);
 565                         kr_expand = pmap_expand(pmap, vaddr, options);
 566                         if (kr_expand != KERN_SUCCESS)
 567                                 return kr_expand;
 568                         PMAP_LOCK(pmap);
 569                 }
 570         }
 571         if (options & PMAP_EXPAND_OPTIONS_NOENTER) {
 572                 PMAP_UNLOCK(pmap);
 573                 return KERN_SUCCESS;
 574         }
 575
 576         if (superpage && *pte && !(*pte & PTE_PS)) {
 577                 /*
 578                  * There is still an empty page table mapped that
 579                  * was used for a previous base page mapping.
 580                  * Remember the PDE and the PDE index, so that we
 581                  * can free the page at the end of this function.
 582                  */
 583                 delpage_pde_index = pdeidx(pmap, vaddr);
 584                 delpage_pm_obj = pmap->pm_obj;
 585                 *pte = 0;
 586         }
 587
 588         old_pa = pte_to_pa(*pte);
 589         pai = pa_index(old_pa);
 590         old_pa_locked = FALSE;
 591
 592         if (old_pa == 0 &&
 593             (*pte & PTE_COMPRESSED)) {
 594                 /* one less "compressed" */
 595                 OSAddAtomic64(-1, &pmap->stats.compressed);
 596                 /* marker will be cleared below */
 597         }
 598
 599         /*
 600          * if we have a previous managed page, lock the pv entry now. after
 601          * we lock it, check to see if someone beat us to the lock and if so
 602          * drop the lock
 603          */
 604         if ((0 != old_pa) && IS_MANAGED_PAGE(pai)) {
 605                 LOCK_PVH(pai);
 606                 old_pa_locked = TRUE;
 607                 old_pa = pte_to_pa(*pte);
 608                 if (0 == old_pa) {
 609                         UNLOCK_PVH(pai);        /* another path beat us to it */
 610                         old_pa_locked = FALSE;
 611                 }
 612         }
 613
 614         /*
 615          *      Special case if the incoming physical page is already mapped
 616          *      at this address.
 617          */
 618         if (old_pa == pa) {
 619                 pt_entry_t old_attributes =
 620                     *pte & ~(PTE_REF(is_ept) | PTE_MOD(is_ept));
 621
 622                 /*
 623                  *      May be changing its wired attribute or protection
 624                  */
 625
 626                 template =  pa_to_pte(pa);
 627
 628                 /* ?: WORTH ASSERTING THAT AT LEAST ONE RWX (implicit valid) PASSED FOR EPT? */
 629                 if (!is_ept) {
 630                         template |= INTEL_PTE_VALID;
 631                 } else {
 632                         template |= INTEL_EPT_IPTA;
 633                 }
 634
 635                 template |= pmap_get_cache_attributes(pa_index(pa), is_ept);
 636
 637                 /*
 638                  * We don't support passing VM_MEM_NOT_CACHEABLE flags for EPT PTEs
 639                  */
 640                 if (!is_ept && (VM_MEM_NOT_CACHEABLE ==
 641                     (flags & (VM_MEM_NOT_CACHEABLE | VM_WIMG_USE_DEFAULT)))) {
 642                         if (!(flags & VM_MEM_GUARDED))
 643                                 template |= INTEL_PTE_PTA;
 644                         template |= INTEL_PTE_NCACHE;
 645                 }
 646                 if (pmap != kernel_pmap && !is_ept)
 647                         template |= INTEL_PTE_USER;
 648
 649                 if (prot & VM_PROT_READ)
 650                         template |= PTE_READ(is_ept);
 651
 652                 if (prot & VM_PROT_WRITE) {
 653                         template |= PTE_WRITE(is_ept);
 654                         if (is_ept && !pmap_ept_support_ad) {
 655                                 template |= PTE_MOD(is_ept);
 656                                 if (old_pa_locked) {
 657                                         assert(IS_MANAGED_PAGE(pai));
 658                                         pmap_phys_attributes[pai] |= PHYS_MODIFIED;
 659                                 }
 660                         }
 661                 }
 662                 if (prot & VM_PROT_EXECUTE) {
 663                         assert(set_NX == 0);
 664                         template = pte_set_ex(template, is_ept);
 665                 }
 666
 667                 if (set_NX)
 668                         template = pte_remove_ex(template, is_ept);
 669
 670                 if (wired) {
 671                         template |= PTE_WIRED;
 672                         if (!iswired(old_attributes))  {
 673                                 OSAddAtomic(+1, &pmap->stats.wired_count);
 674                                 pmap_ledger_credit(pmap, task_ledgers.wired_mem, PAGE_SIZE);
 675                         }
 676                 } else {
 677                         if (iswired(old_attributes)) {
 678                                 assert(pmap->stats.wired_count >= 1);
 679                                 OSAddAtomic(-1, &pmap->stats.wired_count);
 680                                 pmap_ledger_debit(pmap, task_ledgers.wired_mem, PAGE_SIZE);
 681                         }
 682                 }
 683
 684                 if (superpage)          /* this path can not be used */
 685                         template |= PTE_PS;     /* to change the page size! */
 686
 687                 if (old_attributes == template)
 688                         goto dont_update_pte;
 689
 690                 /* Determine delta, PV locked */
 691                 need_tlbflush =
 692                     ((old_attributes ^ template) != PTE_WIRED);
 693
 694                 if (need_tlbflush == TRUE && !(old_attributes & PTE_WRITE(is_ept))) {
 695                         if ((old_attributes ^ template) == PTE_WRITE(is_ept))
 696                                 need_tlbflush = FALSE;
 697                 }
 698
 699                 /* For hardware that doesn't have EPT AD support, we always set REFMOD for EPT PTEs */
 700                 if (is_ept && !pmap_ept_support_ad) {
 701                         template |= PTE_REF(is_ept);
 702                         if (old_pa_locked) {
 703                                 assert(IS_MANAGED_PAGE(pai));
 704                                 pmap_phys_attributes[pai] |= PHYS_REFERENCED;
 705                         }
 706                 }
 707
 708                 /* store modified PTE and preserve RC bits */
 709                 pt_entry_t npte, opte;;
 710                 do {
 711                         opte = *pte;
 712                         npte = template | (opte & (PTE_REF(is_ept) | PTE_MOD(is_ept)));
 713                 } while (!pmap_cmpx_pte(pte, opte, npte));
 714 dont_update_pte:
 715                 if (old_pa_locked) {
 716                         UNLOCK_PVH(pai);
 717                         old_pa_locked = FALSE;
 718                 }
 719                 goto Done;
 720         }
 721
 722         /*
 723          *      Outline of code from here:
 724          *         1) If va was mapped, update TLBs, remove the mapping
 725          *            and remove old pvlist entry.
 726          *         2) Add pvlist entry for new mapping
 727          *         3) Enter new mapping.
 728          *
 729          *      If the old physical page is not managed step 1) is skipped
 730          *      (except for updating the TLBs), and the mapping is
 731          *      overwritten at step 3).  If the new physical page is not
 732          *      managed, step 2) is skipped.
 733          */
 734
 735         if (old_pa != (pmap_paddr_t) 0) {
 736
 737                 /*
 738                  *      Don't do anything to pages outside valid memory here.
 739                  *      Instead convince the code that enters a new mapping
 740                  *      to overwrite the old one.
 741                  */
 742
 743                 /* invalidate the PTE */
 744                 pmap_update_pte(pte, PTE_VALID_MASK(is_ept), 0);
 745                 /* propagate invalidate everywhere */
 746                 PMAP_UPDATE_TLBS(pmap, vaddr, vaddr + PAGE_SIZE);
 747                 /* remember reference and change */
 748                 old_pte = *pte;
 749                 oattr = (char) (old_pte & (PTE_MOD(is_ept) | PTE_REF(is_ept)));
 750                 /* completely invalidate the PTE */
 751                 pmap_store_pte(pte, 0);
 752
 753                 if (IS_MANAGED_PAGE(pai)) {
 754                         pmap_assert(old_pa_locked == TRUE);
 755                         pmap_ledger_debit(pmap, task_ledgers.phys_mem, PAGE_SIZE);
 756                         pmap_ledger_debit(pmap, task_ledgers.phys_footprint, PAGE_SIZE);
 757                         assert(pmap->stats.resident_count >= 1);
 758                         OSAddAtomic(-1, &pmap->stats.resident_count);
 759                         if (pmap != kernel_pmap) {
 760                                 if (IS_REUSABLE_PAGE(pai)) {
 761                                         assert(pmap->stats.reusable > 0);
 762                                         OSAddAtomic(-1, &pmap->stats.reusable);
 763                                 } else if (IS_INTERNAL_PAGE(pai)) {
 764                                         assert(pmap->stats.internal > 0);
 765                                         OSAddAtomic(-1, &pmap->stats.internal);
 766                                 } else {
 767                                         assert(pmap->stats.external > 0);
 768                                         OSAddAtomic(-1, &pmap->stats.external);
 769                                 }
 770                         }
 771                         if (iswired(*pte)) {
 772                                 assert(pmap->stats.wired_count >= 1);
 773                                 OSAddAtomic(-1, &pmap->stats.wired_count);
 774                                 pmap_ledger_debit(pmap, task_ledgers.wired_mem,
 775                                     PAGE_SIZE);
 776                         }
 777
 778                         if (!is_ept) {
 779                                 pmap_phys_attributes[pai] |= oattr;
 780                         } else {
 781                                 pmap_phys_attributes[pai] |= ept_refmod_to_physmap(oattr);
 782                         }
 783
 784                         /*
 785                          *      Remove the mapping from the pvlist for
 786                          *      this physical page.
 787                          *      We'll end up with either a rooted pv or a
 788                          *      hashed pv
 789                          */
 790                         pvh_e = pmap_pv_remove(pmap, vaddr, (ppnum_t *) &pai, &old_pte);
 791
 792                 } else {
 793
 794                         /*
 795                          *      old_pa is not managed.
 796                          *      Do removal part of accounting.
 797                          */
 798
 799                         if (pmap != kernel_pmap) {
 800 #if 00
 801                                 assert(pmap->stats.device > 0);
 802                                 OSAddAtomic(-1, &pmap->stats.device);
 803 #endif
 804                         }
 805                         if (iswired(*pte)) {
 806                                 assert(pmap->stats.wired_count >= 1);
 807                                 OSAddAtomic(-1, &pmap->stats.wired_count);
 808                                 pmap_ledger_debit(pmap, task_ledgers.wired_mem, PAGE_SIZE);
 809                         }
 810                 }
 811         }
 812
 813         /*
 814          * if we had a previously managed paged locked, unlock it now
 815          */
 816         if (old_pa_locked) {
 817                 UNLOCK_PVH(pai);
 818                 old_pa_locked = FALSE;
 819         }
 820
 821         pai = pa_index(pa);     /* now working with new incoming phys page */
 822         if (IS_MANAGED_PAGE(pai)) {
 823
 824                 /*
 825                  *      Step 2) Enter the mapping in the PV list for this
 826                  *      physical page.
 827                  */
 828                 pv_h = pai_to_pvh(pai);
 829
 830                 LOCK_PVH(pai);
 831
 832                 if (pv_h->pmap == PMAP_NULL) {
 833                         /*
 834                          *      No mappings yet, use rooted pv
 835                          */
 836                         pv_h->va = vaddr;
 837                         pv_h->pmap = pmap;
 838                         queue_init(&pv_h->qlink);
 839
 840                         if (options & PMAP_OPTIONS_INTERNAL) {
 841                                 pmap_phys_attributes[pai] |= PHYS_INTERNAL;
 842                         } else {
 843                                 pmap_phys_attributes[pai] &= ~PHYS_INTERNAL;
 844                         }
 845                         if (options & PMAP_OPTIONS_REUSABLE) {
 846                                 pmap_phys_attributes[pai] |= PHYS_REUSABLE;
 847                         } else {
 848                                 pmap_phys_attributes[pai] &= ~PHYS_REUSABLE;
 849                         }
 850                 } else {
 851                         /*
 852                          *      Add new pv_hashed_entry after header.
 853                          */
 854                         if ((PV_HASHED_ENTRY_NULL == pvh_e) && pvh_new) {
 855                                 pvh_e = pvh_new;
 856                                 pvh_new = PV_HASHED_ENTRY_NULL;
 857                         } else if (PV_HASHED_ENTRY_NULL == pvh_e) {
 858                                 PV_HASHED_ALLOC(&pvh_e);
 859                                 if (PV_HASHED_ENTRY_NULL == pvh_e) {
 860                                         /*
 861                                          * the pv list is empty. if we are on
 862                                          * the kernel pmap we'll use one of
 863                                          * the special private kernel pv_e's,
 864                                          * else, we need to unlock
 865                                          * everything, zalloc a pv_e, and
 866                                          * restart bringing in the pv_e with
 867                                          * us.
 868                                          */
 869                                         if (kernel_pmap == pmap) {
 870                                                 PV_HASHED_KERN_ALLOC(&pvh_e);
 871                                         } else {
 872                                                 UNLOCK_PVH(pai);
 873                                                 PMAP_UNLOCK(pmap);
 874                                                 pmap_pv_throttle(pmap);
 875                                                 pvh_new = (pv_hashed_entry_t) zalloc(pv_hashed_list_zone);
 876                                                 goto Retry;
 877                                         }
 878                                 }
 879                         }
 880
 881                         if (PV_HASHED_ENTRY_NULL == pvh_e)
 882                                 panic("Mapping alias chain exhaustion, possibly induced by numerous kernel virtual double mappings");
 883
 884                         pvh_e->va = vaddr;
 885                         pvh_e->pmap = pmap;
 886                         pvh_e->ppn = pn;
 887                         pv_hash_add(pvh_e, pv_h);
 888
 889                         /*
 890                          *      Remember that we used the pvlist entry.
 891                          */
 892                         pvh_e = PV_HASHED_ENTRY_NULL;
 893                 }
 894
 895                 /*
 896                  * only count the mapping
 897                  * for 'managed memory'
 898                  */
 899                 pmap_ledger_credit(pmap, task_ledgers.phys_mem, PAGE_SIZE);
 900                 pmap_ledger_credit(pmap, task_ledgers.phys_footprint, PAGE_SIZE);
 901                 OSAddAtomic(+1,  &pmap->stats.resident_count);
 902                 if (pmap->stats.resident_count > pmap->stats.resident_max) {
 903                         pmap->stats.resident_max = pmap->stats.resident_count;
 904                 }
 905                 if (pmap != kernel_pmap) {
 906                         if (IS_REUSABLE_PAGE(pai)) {
 907                                 OSAddAtomic(+1, &pmap->stats.reusable);
 908                                 PMAP_STATS_PEAK(pmap->stats.reusable);
 909                         } else if (IS_INTERNAL_PAGE(pai)) {
 910                                 OSAddAtomic(+1, &pmap->stats.internal);
 911                                 PMAP_STATS_PEAK(pmap->stats.internal);
 912                         } else {
 913                                 OSAddAtomic(+1, &pmap->stats.external);
 914                                 PMAP_STATS_PEAK(pmap->stats.external);
 915                         }
 916                 }
 917         } else if (last_managed_page == 0) {
 918                 /* Account for early mappings created before "managed pages"
 919                  * are determined. Consider consulting the available DRAM map.
 920                  */
 921                 pmap_ledger_credit(pmap, task_ledgers.phys_mem, PAGE_SIZE);
 922                 pmap_ledger_credit(pmap, task_ledgers.phys_footprint, PAGE_SIZE);
 923                 OSAddAtomic(+1,  &pmap->stats.resident_count);
 924                 if (pmap != kernel_pmap) {
 925 #if 00
 926                         OSAddAtomic(+1, &pmap->stats.device);
 927                         PMAP_STATS_PEAK(pmap->stats.device);
 928 #endif
 929                 }
 930         }
 931         /*
 932          * Step 3) Enter the mapping.
 933          *
 934          *      Build a template to speed up entering -
 935          *      only the pfn changes.
 936          */
 937         template = pa_to_pte(pa);
 938
 939         if (!is_ept) {
 940                 template |= INTEL_PTE_VALID;
 941         } else {
 942                 template |= INTEL_EPT_IPTA;
 943         }
 944
 945
 946         /*
 947          * DRK: It may be worth asserting on cache attribute flags that diverge
 948          * from the existing physical page attributes.
 949          */
 950
 951         template |= pmap_get_cache_attributes(pa_index(pa), is_ept);
 952
 953         /*
 954          * We don't support passing VM_MEM_NOT_CACHEABLE flags for EPT PTEs
 955          */
 956         if (!is_ept && (flags & VM_MEM_NOT_CACHEABLE)) {
 957                 if (!(flags & VM_MEM_GUARDED))
 958                         template |= INTEL_PTE_PTA;
 959                 template |= INTEL_PTE_NCACHE;
 960         }
 961         if (pmap != kernel_pmap && !is_ept)
 962                 template |= INTEL_PTE_USER;
 963         if (prot & VM_PROT_READ)
 964                 template |= PTE_READ(is_ept);
 965         if (prot & VM_PROT_WRITE) {
 966                 template |= PTE_WRITE(is_ept);
 967                 if (is_ept && !pmap_ept_support_ad) {
 968                         template |= PTE_MOD(is_ept);
 969                         if (IS_MANAGED_PAGE(pai))
 970                                 pmap_phys_attributes[pai] |= PHYS_MODIFIED;
 971                 }
 972         }
 973         if (prot & VM_PROT_EXECUTE) {
 974                 assert(set_NX == 0);
 975                 template = pte_set_ex(template, is_ept);
 976         }
 977
 978         if (set_NX)
 979                 template = pte_remove_ex(template, is_ept);
 980         if (wired) {
 981                 template |= INTEL_PTE_WIRED;
 982                 OSAddAtomic(+1,  & pmap->stats.wired_count);
 983                 pmap_ledger_credit(pmap, task_ledgers.wired_mem, PAGE_SIZE);
 984         }
 985         if (superpage)
 986                 template |= INTEL_PTE_PS;
 987
 988         /* For hardware that doesn't have EPT AD support, we always set REFMOD for EPT PTEs */
 989         if (is_ept && !pmap_ept_support_ad) {
 990                 template |= PTE_REF(is_ept);
 991                 if (IS_MANAGED_PAGE(pai))
 992                         pmap_phys_attributes[pai] |= PHYS_REFERENCED;
 993         }
 994
 995         pmap_store_pte(pte, template);
 996
 997         /*
 998          * if this was a managed page we delayed unlocking the pv until here
 999          * to prevent pmap_page_protect et al from finding it until the pte
1000          * has been stored
1001          */
1002         if (IS_MANAGED_PAGE(pai)) {
1003                 UNLOCK_PVH(pai);
1004         }
1005 Done:
1006         if (need_tlbflush == TRUE) {
1007                 if (options & PMAP_OPTIONS_NOFLUSH)
1008                         PMAP_UPDATE_TLBS_DELAYED(pmap, vaddr, vaddr + PAGE_SIZE, (pmap_flush_context *)arg);
1009                 else
1010                         PMAP_UPDATE_TLBS(pmap, vaddr, vaddr + PAGE_SIZE);
1011         }
1012         if (pvh_e != PV_HASHED_ENTRY_NULL) {
1013                 PV_HASHED_FREE_LIST(pvh_e, pvh_e, 1);
1014         }
1015         if (pvh_new != PV_HASHED_ENTRY_NULL) {
1016                 PV_HASHED_KERN_FREE_LIST(pvh_new, pvh_new, 1);
1017         }
1018         PMAP_UNLOCK(pmap);
1019
1020         if (delpage_pm_obj) {
1021                 vm_page_t m;
1022
1023                 vm_object_lock(delpage_pm_obj);
1024                 m = vm_page_lookup(delpage_pm_obj, (delpage_pde_index * PAGE_SIZE));
1025                 if (m == VM_PAGE_NULL)
1026                     panic("pmap_enter: pte page not in object");
1027                 VM_PAGE_FREE(m);
1028                 vm_object_unlock(delpage_pm_obj);
1029                 OSAddAtomic(-1,  &inuse_ptepages_count);
1030                 PMAP_ZINFO_PFREE(pmap, PAGE_SIZE);
1031         }
1032
1033         PMAP_TRACE(PMAP_CODE(PMAP__ENTER) | DBG_FUNC_END, 0, 0, 0, 0, 0);
1034         return KERN_SUCCESS;
1035 }
1036
1037 /*
1038  *      Remove a range of hardware page-table entries.
1039  *      The entries given are the first (inclusive)
1040  *      and last (exclusive) entries for the VM pages.
1041  *      The virtual address is the va for the first pte.
1042  *
1043  *      The pmap must be locked.
1044  *      If the pmap is not the kernel pmap, the range must lie
1045  *      entirely within one pte-page.  This is NOT checked.
1046  *      Assumes that the pte-page exists.
1047  */
1048
1049 void
1050 pmap_remove_range(
1051         pmap_t                  pmap,
1052         vm_map_offset_t         start_vaddr,
1053         pt_entry_t              *spte,
1054         pt_entry_t              *epte)
1055 {
1056         pmap_remove_range_options(pmap, start_vaddr, spte, epte,
1057                                   PMAP_OPTIONS_REMOVE);
1058 }
1059
1060 void
1061 pmap_remove_range_options(
1062         pmap_t                  pmap,
1063         vm_map_offset_t         start_vaddr,
1064         pt_entry_t              *spte,
1065         pt_entry_t              *epte,
1066         int                     options)
1067 {
1068         pt_entry_t              *cpte;
1069         pv_hashed_entry_t       pvh_et = PV_HASHED_ENTRY_NULL;
1070         pv_hashed_entry_t       pvh_eh = PV_HASHED_ENTRY_NULL;
1071         pv_hashed_entry_t       pvh_e;
1072         int                     pvh_cnt = 0;
1073         int                     num_removed, num_unwired, num_found, num_invalid;
1074         int                     num_device, num_external, num_internal, num_reusable;
1075         uint64_t                num_compressed;
1076         ppnum_t                 pai;
1077         pmap_paddr_t            pa;
1078         vm_map_offset_t         vaddr;
1079         boolean_t               is_ept = is_ept_pmap(pmap);
1080
1081         num_removed = 0;
1082         num_unwired = 0;
1083         num_found   = 0;
1084         num_invalid = 0;
1085         num_device  = 0;
1086         num_external = 0;
1087         num_internal = 0;
1088         num_reusable = 0;
1089         num_compressed = 0;
1090         /* invalidate the PTEs first to "freeze" them */
1091         for (cpte = spte, vaddr = start_vaddr;
1092              cpte < epte;
1093              cpte++, vaddr += PAGE_SIZE_64) {
1094                 pt_entry_t p = *cpte;
1095
1096                 pa = pte_to_pa(p);
1097                 if (pa == 0) {
1098                         if (pmap != kernel_pmap &&
1099                             (options & PMAP_OPTIONS_REMOVE) &&
1100                             (p & PTE_COMPRESSED)) {
1101                                 /* one less "compressed" */
1102                                 num_compressed++;
1103                                 /* clear marker */
1104                                 /* XXX probably does not need to be atomic! */
1105                                 pmap_update_pte(cpte, PTE_COMPRESSED, 0);
1106                         }
1107                         continue;
1108                 }
1109                 num_found++;
1110
1111                 if (iswired(p))
1112                         num_unwired++;
1113
1114                 pai = pa_index(pa);
1115
1116                 if (!IS_MANAGED_PAGE(pai)) {
1117                         /*
1118                          *      Outside range of managed physical memory.
1119                          *      Just remove the mappings.
1120                          */
1121                         pmap_store_pte(cpte, 0);
1122                         num_device++;
1123                         continue;
1124                 }
1125
1126                 if ((p & PTE_VALID_MASK(is_ept)) == 0)
1127                         num_invalid++;
1128
1129                 /* invalidate the PTE */
1130                 pmap_update_pte(cpte, PTE_VALID_MASK(is_ept), 0);
1131         }
1132
1133         if (num_found == 0) {
1134                 /* nothing was changed: we're done */
1135                 goto update_counts;
1136         }
1137
1138         /* propagate the invalidates to other CPUs */
1139
1140         PMAP_UPDATE_TLBS(pmap, start_vaddr, vaddr);
1141
1142         for (cpte = spte, vaddr = start_vaddr;
1143              cpte < epte;
1144              cpte++, vaddr += PAGE_SIZE_64) {
1145
1146                 pa = pte_to_pa(*cpte);
1147                 if (pa == 0)
1148                         continue;
1149
1150                 pai = pa_index(pa);
1151
1152                 LOCK_PVH(pai);
1153
1154                 pa = pte_to_pa(*cpte);
1155                 if (pa == 0) {
1156                         UNLOCK_PVH(pai);
1157                         continue;
1158                 }
1159                 num_removed++;
1160                 if (IS_REUSABLE_PAGE(pai)) {
1161                         num_reusable++;
1162                 } else if (IS_INTERNAL_PAGE(pai)) {
1163                         num_internal++;
1164                 } else {
1165                         num_external++;
1166                 }
1167
1168                 /*
1169                  * Get the modify and reference bits, then
1170                  * nuke the entry in the page table
1171                  */
1172                 /* remember reference and change */
1173                 pmap_phys_attributes[pai] |=
1174                         (char) (*cpte & (PHYS_MODIFIED | PHYS_REFERENCED));
1175
1176                 /*
1177                  * Remove the mapping from the pvlist for this physical page.
1178                  */
1179                 pvh_e = pmap_pv_remove(pmap, vaddr, (ppnum_t *) &pai, cpte);
1180
1181                 /* completely invalidate the PTE */
1182                 pmap_store_pte(cpte, 0);
1183
1184                 UNLOCK_PVH(pai);
1185
1186                 if (pvh_e != PV_HASHED_ENTRY_NULL) {
1187                         pvh_e->qlink.next = (queue_entry_t) pvh_eh;
1188                         pvh_eh = pvh_e;
1189
1190                         if (pvh_et == PV_HASHED_ENTRY_NULL) {
1191                                 pvh_et = pvh_e;
1192                         }
1193                         pvh_cnt++;
1194                 }
1195         } /* for loop */
1196
1197         if (pvh_eh != PV_HASHED_ENTRY_NULL) {
1198                 PV_HASHED_FREE_LIST(pvh_eh, pvh_et, pvh_cnt);
1199         }
1200 update_counts:
1201         /*
1202          *      Update the counts
1203          */
1204 #if TESTING
1205         if (pmap->stats.resident_count < num_removed)
1206                 panic("pmap_remove_range: resident_count");
1207 #endif
1208         pmap_ledger_debit(pmap, task_ledgers.phys_mem, machine_ptob(num_removed));
1209         pmap_ledger_debit(pmap, task_ledgers.phys_footprint, machine_ptob(num_removed));
1210         assert(pmap->stats.resident_count >= num_removed);
1211         OSAddAtomic(-num_removed,  &pmap->stats.resident_count);
1212
1213         if (pmap != kernel_pmap) {
1214 #if 00
1215                 assert(pmap->stats.device >= num_device);
1216                 if (num_device)
1217                         OSAddAtomic(-num_device, &pmap->stats.device);
1218 #endif /* 00 */
1219                 assert(pmap->stats.external >= num_external);
1220                 if (num_external)
1221                         OSAddAtomic(-num_external, &pmap->stats.external);
1222                 assert(pmap->stats.internal >= num_internal);
1223                 if (num_internal)
1224                         OSAddAtomic(-num_internal, &pmap->stats.internal);
1225                 assert(pmap->stats.reusable >= num_reusable);
1226                 if (num_reusable)
1227                         OSAddAtomic(-num_reusable, &pmap->stats.reusable);
1228                 assert(pmap->stats.compressed >= num_compressed);
1229                 if (num_compressed)
1230                         OSAddAtomic64(-num_compressed, &pmap->stats.compressed);
1231         }
1232
1233 #if TESTING
1234         if (pmap->stats.wired_count < num_unwired)
1235                 panic("pmap_remove_range: wired_count");
1236 #endif
1237         assert(pmap->stats.wired_count >= num_unwired);
1238         OSAddAtomic(-num_unwired,  &pmap->stats.wired_count);
1239         pmap_ledger_debit(pmap, task_ledgers.wired_mem, machine_ptob(num_unwired));
1240
1241         return;
1242 }
1243
1244
1245 /*
1246  *      Remove the given range of addresses
1247  *      from the specified map.
1248  *
1249  *      It is assumed that the start and end are properly
1250  *      rounded to the hardware page size.
1251  */
1252 void
1253 pmap_remove(
1254         pmap_t          map,
1255         addr64_t        s64,
1256         addr64_t        e64)
1257 {
1258         pmap_remove_options(map, s64, e64, PMAP_OPTIONS_REMOVE);
1259 }
1260
1261 void
1262 pmap_remove_options(
1263         pmap_t          map,
1264         addr64_t        s64,
1265         addr64_t        e64,
1266         int             options)
1267 {
1268         pt_entry_t     *pde;
1269         pt_entry_t     *spte, *epte;
1270         addr64_t        l64;
1271         uint64_t        deadline;
1272         boolean_t       is_ept;
1273
1274         pmap_intr_assert();
1275
1276         if (map == PMAP_NULL || s64 == e64)
1277                 return;
1278
1279         is_ept = is_ept_pmap(map);
1280
1281         PMAP_TRACE(PMAP_CODE(PMAP__REMOVE) | DBG_FUNC_START,
1282                    map,
1283                    (uint32_t) (s64 >> 32), s64,
1284                    (uint32_t) (e64 >> 32), e64);
1285
1286
1287         PMAP_LOCK(map);
1288
1289 #if 0
1290         /*
1291          * Check that address range in the kernel does not overlap the stacks.
1292          * We initialize local static min/max variables once to avoid making
1293          * 2 function calls for every remove. Note also that these functions
1294          * both return 0 before kernel stacks have been initialized, and hence
1295          * the panic is not triggered in this case.
1296          */
1297         if (map == kernel_pmap) {
1298                 static vm_offset_t kernel_stack_min = 0;
1299                 static vm_offset_t kernel_stack_max = 0;
1300
1301                 if (kernel_stack_min == 0) {
1302                         kernel_stack_min = min_valid_stack_address();
1303                         kernel_stack_max = max_valid_stack_address();
1304                 }
1305                 if ((kernel_stack_min <= s64 && s64 < kernel_stack_max) ||
1306                     (kernel_stack_min < e64 && e64 <= kernel_stack_max))
1307                         panic("pmap_remove() attempted in kernel stack");
1308         }
1309 #else
1310
1311         /*
1312          * The values of kernel_stack_min and kernel_stack_max are no longer
1313          * relevant now that we allocate kernel stacks in the kernel map,
1314          * so the old code above no longer applies.  If we wanted to check that
1315          * we weren't removing a mapping of a page in a kernel stack we'd
1316          * mark the PTE with an unused bit and check that here.
1317          */
1318
1319 #endif
1320
1321         deadline = rdtsc64() + max_preemption_latency_tsc;
1322
1323         while (s64 < e64) {
1324                 l64 = (s64 + pde_mapped_size) & ~(pde_mapped_size - 1);
1325                 if (l64 > e64)
1326                         l64 = e64;
1327                 pde = pmap_pde(map, s64);
1328
1329                 if (pde && (*pde & PTE_VALID_MASK(is_ept))) {
1330                         if (*pde & PTE_PS) {
1331                                 /*
1332                                  * If we're removing a superpage, pmap_remove_range()
1333                                  * must work on level 2 instead of level 1; and we're
1334                                  * only passing a single level 2 entry instead of a
1335                                  * level 1 range.
1336                                  */
1337                                 spte = pde;
1338                                 epte = spte+1; /* excluded */
1339                         } else {
1340                                 spte = pmap_pte(map, (s64 & ~(pde_mapped_size - 1)));
1341                                 spte = &spte[ptenum(s64)];
1342                                 epte = &spte[intel_btop(l64 - s64)];
1343                         }
1344                         pmap_remove_range_options(map, s64, spte, epte,
1345                                                   options);
1346                 }
1347                 s64 = l64;
1348
1349                 if (s64 < e64 && rdtsc64() >= deadline) {
1350                         PMAP_UNLOCK(map)
1351                             /* TODO: Rapid release/reacquisition can defeat
1352                              * the "backoff" intent here; either consider a
1353                              * fair spinlock, or a scheme whereby each lock
1354                              * attempt marks the processor as within a spinlock
1355                              * acquisition, and scan CPUs here to determine
1356                              * if a backoff is necessary, to avoid sacrificing
1357                              * performance in the common case.
1358                              */
1359                         PMAP_LOCK(map)
1360                         deadline = rdtsc64() + max_preemption_latency_tsc;
1361                 }
1362         }
1363
1364         PMAP_UNLOCK(map);
1365
1366         PMAP_TRACE(PMAP_CODE(PMAP__REMOVE) | DBG_FUNC_END,
1367                    map, 0, 0, 0, 0);
1368
1369 }
1370
1371 void
1372 pmap_page_protect(
1373         ppnum_t         pn,
1374         vm_prot_t       prot)
1375 {
1376         pmap_page_protect_options(pn, prot, 0, NULL);
1377 }
1378
1379 /*
1380  *      Routine:        pmap_page_protect_options
1381  *
1382  *      Function:
1383  *              Lower the permission for all mappings to a given
1384  *              page.
1385  */
1386 void
1387 pmap_page_protect_options(
1388         ppnum_t         pn,
1389         vm_prot_t       prot,
1390         unsigned int    options,
1391         void            *arg)
1392 {
1393         pv_hashed_entry_t       pvh_eh = PV_HASHED_ENTRY_NULL;
1394         pv_hashed_entry_t       pvh_et = PV_HASHED_ENTRY_NULL;
1395         pv_hashed_entry_t       nexth;
1396         int                     pvh_cnt = 0;
1397         pv_rooted_entry_t       pv_h;
1398         pv_rooted_entry_t       pv_e;
1399         pv_hashed_entry_t       pvh_e;
1400         pt_entry_t              *pte;
1401         int                     pai;
1402         pmap_t                  pmap;
1403         boolean_t               remove;
1404         pt_entry_t              new_pte_value;
1405         boolean_t               is_ept;
1406
1407         pmap_intr_assert();
1408         assert(pn != vm_page_fictitious_addr);
1409         if (pn == vm_page_guard_addr)
1410                 return;
1411
1412         pai = ppn_to_pai(pn);
1413
1414         if (!IS_MANAGED_PAGE(pai)) {
1415                 /*
1416                  *      Not a managed page.
1417                  */
1418                 return;
1419         }
1420         PMAP_TRACE(PMAP_CODE(PMAP__PAGE_PROTECT) | DBG_FUNC_START,
1421                    pn, prot, 0, 0, 0);
1422
1423         /*
1424          * Determine the new protection.
1425          */
1426         switch (prot) {
1427         case VM_PROT_READ:
1428         case VM_PROT_READ | VM_PROT_EXECUTE:
1429                 remove = FALSE;
1430                 break;
1431         case VM_PROT_ALL:
1432                 return;         /* nothing to do */
1433         default:
1434                 remove = TRUE;
1435                 break;
1436         }
1437
1438         pv_h = pai_to_pvh(pai);
1439
1440         LOCK_PVH(pai);
1441
1442
1443         /*
1444          * Walk down PV list, if any, changing or removing all mappings.
1445          */
1446         if (pv_h->pmap == PMAP_NULL)
1447                 goto done;
1448
1449         pv_e = pv_h;
1450         pvh_e = (pv_hashed_entry_t) pv_e;       /* cheat */
1451
1452         do {
1453                 vm_map_offset_t vaddr;
1454
1455                 if ((options & PMAP_OPTIONS_COMPRESSOR_IFF_MODIFIED) &&
1456                     (pmap_phys_attributes[pai] & PHYS_MODIFIED)) {
1457                         /* page was modified, so it will be compressed */
1458                         options &= ~PMAP_OPTIONS_COMPRESSOR_IFF_MODIFIED;
1459                         options |= PMAP_OPTIONS_COMPRESSOR;
1460                 }
1461
1462                 pmap = pv_e->pmap;
1463                 is_ept = is_ept_pmap(pmap);
1464                 vaddr = pv_e->va;
1465                 pte = pmap_pte(pmap, vaddr);
1466
1467                 pmap_assert2((pa_index(pte_to_pa(*pte)) == pn),
1468                     "pmap_page_protect: PTE mismatch, pn: 0x%x, pmap: %p, vaddr: 0x%llx, pte: 0x%llx", pn, pmap, vaddr, *pte);
1469
1470                 if (0 == pte) {
1471                         panic("pmap_page_protect() "
1472                                 "pmap=%p pn=0x%x vaddr=0x%llx\n",
1473                                 pmap, pn, vaddr);
1474                 }
1475                 nexth = (pv_hashed_entry_t) queue_next(&pvh_e->qlink);
1476
1477                 /*
1478                  * Remove the mapping if new protection is NONE
1479                  */
1480                 if (remove) {
1481
1482                         /* Remove per-pmap wired count */
1483                         if (iswired(*pte)) {
1484                                 OSAddAtomic(-1, &pmap->stats.wired_count);
1485                                 pmap_ledger_debit(pmap, task_ledgers.wired_mem, PAGE_SIZE);
1486                         }
1487
1488                         if (pmap != kernel_pmap &&
1489                             (options & PMAP_OPTIONS_COMPRESSOR) &&
1490                             IS_INTERNAL_PAGE(pai)) {
1491                                 /* mark this PTE as having been "reclaimed" */
1492                                 new_pte_value = PTE_COMPRESSED;
1493                         } else {
1494                                 new_pte_value = 0;
1495                         }
1496
1497                         if (options & PMAP_OPTIONS_NOREFMOD) {
1498                                 pmap_store_pte(pte, new_pte_value);
1499
1500                                 if (options & PMAP_OPTIONS_NOFLUSH)
1501                                         PMAP_UPDATE_TLBS_DELAYED(pmap, vaddr, vaddr + PAGE_SIZE, (pmap_flush_context *)arg);
1502                                 else
1503                                         PMAP_UPDATE_TLBS(pmap, vaddr, vaddr + PAGE_SIZE);
1504                         } else {
1505                                 /*
1506                                  * Remove the mapping, collecting dirty bits.
1507                                  */
1508                                 pmap_update_pte(pte, PTE_VALID_MASK(is_ept), 0);
1509
1510                                 PMAP_UPDATE_TLBS(pmap, vaddr, vaddr+PAGE_SIZE);
1511                                 if ((options &
1512                                      PMAP_OPTIONS_COMPRESSOR_IFF_MODIFIED) &&
1513                                     ! (pmap_phys_attributes[pai] &
1514                                        PHYS_MODIFIED) &&
1515                                     (*pte & PHYS_MODIFIED)) {
1516                                         /*
1517                                          * Page is actually "modified" and
1518                                          * will be compressed.  Start
1519                                          * accounting for it as "compressed".
1520                                          */
1521                                         options &= ~PMAP_OPTIONS_COMPRESSOR_IFF_MODIFIED;
1522                                         options |= PMAP_OPTIONS_COMPRESSOR;
1523                                         new_pte_value = PTE_COMPRESSED;
1524                                 }
1525                                 if (!is_ept) {
1526                                         pmap_phys_attributes[pai] |=
1527                                                 *pte & (PHYS_MODIFIED|PHYS_REFERENCED);
1528                                 } else {
1529                                         pmap_phys_attributes[pai] |=
1530                                                 ept_refmod_to_physmap((*pte & (INTEL_EPT_REF | INTEL_EPT_MOD))) & (PHYS_MODIFIED | PHYS_REFERENCED);
1531                                 }
1532                                 pmap_store_pte(pte, new_pte_value);
1533                         }
1534
1535                         if (new_pte_value == PTE_COMPRESSED) {
1536                                 /* one more "compressed" page */
1537                                 OSAddAtomic64(+1, &pmap->stats.compressed);
1538                                 PMAP_STATS_PEAK(pmap->stats.compressed);
1539                                 pmap->stats.compressed_lifetime++;
1540                         }
1541
1542 #if TESTING
1543                         if (pmap->stats.resident_count < 1)
1544                                 panic("pmap_page_protect: resident_count");
1545 #endif
1546                         pmap_ledger_debit(pmap, task_ledgers.phys_mem, PAGE_SIZE);
1547                         assert(pmap->stats.resident_count >= 1);
1548                         OSAddAtomic(-1,  &pmap->stats.resident_count);
1549                         if (options & PMAP_OPTIONS_COMPRESSOR) {
1550                                 /*
1551                                  * This removal is only being done so we can send this page to
1552                                  * the compressor; therefore it mustn't affect total task footprint.
1553                                  */
1554                                 pmap_ledger_credit(pmap, task_ledgers.internal_compressed, PAGE_SIZE);
1555                         } else {
1556                                 pmap_ledger_debit(pmap, task_ledgers.phys_footprint, PAGE_SIZE);
1557                         }
1558
1559                         if (pmap != kernel_pmap) {
1560                                 if (IS_REUSABLE_PAGE(pai)) {
1561                                         assert(pmap->stats.reusable > 0);
1562                                         OSAddAtomic(-1, &pmap->stats.reusable);
1563                                 } else if (IS_INTERNAL_PAGE(pai)) {
1564                                         assert(pmap->stats.internal > 0);
1565                                         OSAddAtomic(-1, &pmap->stats.internal);
1566                                 } else {
1567                                         assert(pmap->stats.external > 0);
1568                                         OSAddAtomic(-1, &pmap->stats.external);
1569                                 }
1570                         }
1571
1572                         /*
1573                          * Deal with the pv_rooted_entry.
1574                          */
1575
1576                         if (pv_e == pv_h) {
1577                                 /*
1578                                  * Fix up head later.
1579                                  */
1580                                 pv_h->pmap = PMAP_NULL;
1581                         } else {
1582                                 /*
1583                                  * Delete this entry.
1584                                  */
1585                                 pv_hash_remove(pvh_e);
1586                                 pvh_e->qlink.next = (queue_entry_t) pvh_eh;
1587                                 pvh_eh = pvh_e;
1588
1589                                 if (pvh_et == PV_HASHED_ENTRY_NULL)
1590                                         pvh_et = pvh_e;
1591                                 pvh_cnt++;
1592                         }
1593                 } else {
1594                         /*
1595                          * Write-protect, after opportunistic refmod collect
1596                          */
1597                         if (!is_ept) {
1598                                 pmap_phys_attributes[pai] |=
1599                                         *pte & (PHYS_MODIFIED|PHYS_REFERENCED);
1600                         } else {
1601                                 pmap_phys_attributes[pai] |=
1602                                         ept_refmod_to_physmap((*pte & (INTEL_EPT_REF | INTEL_EPT_MOD))) & (PHYS_MODIFIED | PHYS_REFERENCED);
1603                         }
1604                         pmap_update_pte(pte, PTE_WRITE(is_ept), 0);
1605
1606                         if (options & PMAP_OPTIONS_NOFLUSH)
1607                                 PMAP_UPDATE_TLBS_DELAYED(pmap, vaddr, vaddr + PAGE_SIZE, (pmap_flush_context *)arg);
1608                         else
1609                                 PMAP_UPDATE_TLBS(pmap, vaddr, vaddr+PAGE_SIZE);
1610                 }
1611                 pvh_e = nexth;
1612         } while ((pv_e = (pv_rooted_entry_t) nexth) != pv_h);
1613
1614
1615         /*
1616          * If pv_head mapping was removed, fix it up.
1617          */
1618         if (pv_h->pmap == PMAP_NULL) {
1619                 pvh_e = (pv_hashed_entry_t) queue_next(&pv_h->qlink);
1620
1621                 if (pvh_e != (pv_hashed_entry_t) pv_h) {
1622                         pv_hash_remove(pvh_e);
1623                         pv_h->pmap = pvh_e->pmap;
1624                         pv_h->va = pvh_e->va;
1625                         pvh_e->qlink.next = (queue_entry_t) pvh_eh;
1626                         pvh_eh = pvh_e;
1627
1628                         if (pvh_et == PV_HASHED_ENTRY_NULL)
1629                                 pvh_et = pvh_e;
1630                         pvh_cnt++;
1631                 }
1632         }
1633         if (pvh_eh != PV_HASHED_ENTRY_NULL) {
1634                 PV_HASHED_FREE_LIST(pvh_eh, pvh_et, pvh_cnt);
1635         }
1636 done:
1637         UNLOCK_PVH(pai);
1638
1639         PMAP_TRACE(PMAP_CODE(PMAP__PAGE_PROTECT) | DBG_FUNC_END,
1640                    0, 0, 0, 0, 0);
1641 }
1642
1643
1644 /*
1645  *      Clear specified attribute bits.
1646  */
1647 void
1648 phys_attribute_clear(
1649         ppnum_t         pn,
1650         int             bits,
1651         unsigned int    options,
1652         void            *arg)
1653 {
1654         pv_rooted_entry_t       pv_h;
1655         pv_hashed_entry_t       pv_e;
1656         pt_entry_t              *pte;
1657         int                     pai;
1658         pmap_t                  pmap;
1659         char                    attributes = 0;
1660         boolean_t               is_internal, is_reusable, is_ept;
1661         int                     ept_bits_to_clear;
1662         boolean_t               ept_keep_global_mod = FALSE;
1663
1664         if ((bits & PHYS_MODIFIED) &&
1665             (options & PMAP_OPTIONS_NOFLUSH) &&
1666             arg == NULL) {
1667                 panic("phys_attribute_clear(0x%x,0x%x,0x%x,%p): "
1668                       "should not clear 'modified' without flushing TLBs\n",
1669                       pn, bits, options, arg);
1670         }
1671
1672         /* We only support converting MOD and REF bits for EPT PTEs in this function */
1673         assert((bits & ~(PHYS_REFERENCED | PHYS_MODIFIED)) == 0);
1674
1675         ept_bits_to_clear = (unsigned)physmap_refmod_to_ept(bits & (PHYS_MODIFIED | PHYS_REFERENCED));
1676
1677         pmap_intr_assert();
1678         assert(pn != vm_page_fictitious_addr);
1679         if (pn == vm_page_guard_addr)
1680                 return;
1681
1682         pai = ppn_to_pai(pn);
1683
1684         if (!IS_MANAGED_PAGE(pai)) {
1685                 /*
1686                  *      Not a managed page.
1687                  */
1688                 return;
1689         }
1690
1691         PMAP_TRACE(PMAP_CODE(PMAP__ATTRIBUTE_CLEAR) | DBG_FUNC_START,
1692                    pn, bits, 0, 0, 0);
1693
1694         pv_h = pai_to_pvh(pai);
1695
1696         LOCK_PVH(pai);
1697
1698
1699         /*
1700          * Walk down PV list, clearing all modify or reference bits.
1701          * We do not have to lock the pv_list because we have
1702          * the per-pmap lock
1703          */
1704         if (pv_h->pmap != PMAP_NULL) {
1705                 /*
1706                  * There are some mappings.
1707                  */
1708
1709                 is_internal = IS_INTERNAL_PAGE(pai);
1710                 is_reusable = IS_REUSABLE_PAGE(pai);
1711
1712                 pv_e = (pv_hashed_entry_t)pv_h;
1713
1714                 do {
1715                         vm_map_offset_t va;
1716                         char pte_bits;
1717
1718                         pmap = pv_e->pmap;
1719                         is_ept = is_ept_pmap(pmap);
1720                         va = pv_e->va;
1721                         pte_bits = 0;
1722
1723                         if (bits) {
1724                                 pte = pmap_pte(pmap, va);
1725                                 /* grab ref/mod bits from this PTE */
1726                                 pte_bits = (*pte & (PTE_REF(is_ept) | PTE_MOD(is_ept)));
1727                                 /* propagate to page's global attributes */
1728                                 if (!is_ept) {
1729                                         attributes |= pte_bits;
1730                                 } else {
1731                                         attributes |= ept_refmod_to_physmap(pte_bits);
1732                                         if (!pmap_ept_support_ad && (pte_bits & INTEL_EPT_MOD)) {
1733                                                 ept_keep_global_mod = TRUE;
1734                                         }
1735                                 }
1736                                 /* which bits to clear for this PTE? */
1737                                 if (!is_ept) {
1738                                         pte_bits &= bits;
1739                                 } else {
1740                                         pte_bits &= ept_bits_to_clear;
1741                                 }
1742                         }
1743
1744                          /*
1745                           * Clear modify and/or reference bits.
1746                           */
1747                         if (pte_bits) {
1748                                 pmap_update_pte(pte, bits, 0);
1749
1750                                 /* Ensure all processors using this translation
1751                                  * invalidate this TLB entry. The invalidation
1752                                  * *must* follow the PTE update, to ensure that
1753                                  * the TLB shadow of the 'D' bit (in particular)
1754                                  * is synchronized with the updated PTE.
1755                                  */
1756                                 if (! (options & PMAP_OPTIONS_NOFLUSH)) {
1757                                         /* flush TLBS now */
1758                                         PMAP_UPDATE_TLBS(pmap,
1759                                                          va,
1760                                                          va + PAGE_SIZE);
1761                                 } else if (arg) {
1762                                         /* delayed TLB flush: add "pmap" info */
1763                                         PMAP_UPDATE_TLBS_DELAYED(
1764                                                 pmap,
1765                                                 va,
1766                                                 va + PAGE_SIZE,
1767                                                 (pmap_flush_context *)arg);
1768                                 } else {
1769                                         /* no TLB flushing at all */
1770                                 }
1771                         }
1772
1773                         /* update pmap "reusable" stats */
1774                         if ((options & PMAP_OPTIONS_CLEAR_REUSABLE) &&
1775                             is_reusable &&
1776                             pmap != kernel_pmap) {
1777                                 /* one less "reusable" */
1778                                 assert(pmap->stats.reusable > 0);
1779                                 OSAddAtomic(-1, &pmap->stats.reusable);
1780                                 if (is_internal) {
1781                                         /* one more "internal" */
1782                                         OSAddAtomic(+1, &pmap->stats.internal);
1783                                         PMAP_STATS_PEAK(pmap->stats.internal);
1784                                 } else {
1785                                         /* one more "external" */
1786                                         OSAddAtomic(+1, &pmap->stats.external);
1787                                         PMAP_STATS_PEAK(pmap->stats.external);
1788                                 }
1789                         } else if ((options & PMAP_OPTIONS_SET_REUSABLE) &&
1790                                    !is_reusable &&
1791                                    pmap != kernel_pmap) {
1792                                 /* one more "reusable" */
1793                                 OSAddAtomic(+1, &pmap->stats.reusable);
1794                                 PMAP_STATS_PEAK(pmap->stats.reusable);
1795                                 if (is_internal) {
1796                                         /* one less "internal" */
1797                                         assert(pmap->stats.internal > 0);
1798                                         OSAddAtomic(-1, &pmap->stats.internal);
1799                                 } else {
1800                                         /* one less "external" */
1801                                         assert(pmap->stats.external > 0);
1802                                         OSAddAtomic(-1, &pmap->stats.external);
1803                                 }
1804                         }
1805
1806                         pv_e = (pv_hashed_entry_t)queue_next(&pv_e->qlink);
1807
1808                 } while (pv_e != (pv_hashed_entry_t)pv_h);
1809         }
1810         /* Opportunistic refmod collection, annulled
1811          * if both REF and MOD are being cleared.
1812          */
1813
1814         pmap_phys_attributes[pai] |= attributes;
1815
1816         if (ept_keep_global_mod) {
1817                 /*
1818                  * If the hardware doesn't support AD bits for EPT PTEs and someone is
1819                  * requesting that we clear the modified bit for a phys page, we need
1820                  * to ensure that there are no EPT mappings for the page with the
1821                  * modified bit set. If there are, we cannot clear the global modified bit.
1822                  */
1823                 bits &= ~PHYS_MODIFIED;
1824         }
1825         pmap_phys_attributes[pai] &= ~(bits);
1826
1827         /* update this page's "reusable" status */
1828         if (options & PMAP_OPTIONS_CLEAR_REUSABLE) {
1829                 pmap_phys_attributes[pai] &= ~PHYS_REUSABLE;
1830         } else if (options & PMAP_OPTIONS_SET_REUSABLE) {
1831                 pmap_phys_attributes[pai] |= PHYS_REUSABLE;
1832         }
1833
1834         UNLOCK_PVH(pai);
1835
1836         PMAP_TRACE(PMAP_CODE(PMAP__ATTRIBUTE_CLEAR) | DBG_FUNC_END,
1837                    0, 0, 0, 0, 0);
1838 }
1839
1840 /*
1841  *      Check specified attribute bits.
1842  */
1843 int
1844 phys_attribute_test(
1845         ppnum_t         pn,
1846         int             bits)
1847 {
1848         pv_rooted_entry_t       pv_h;
1849         pv_hashed_entry_t       pv_e;
1850         pt_entry_t              *pte;
1851         int                     pai;
1852         pmap_t                  pmap;
1853         int                     attributes = 0;
1854         boolean_t               is_ept;
1855
1856         pmap_intr_assert();
1857         assert(pn != vm_page_fictitious_addr);
1858         assert((bits & ~(PHYS_MODIFIED | PHYS_REFERENCED)) == 0);
1859         if (pn == vm_page_guard_addr)
1860                 return 0;
1861
1862         pai = ppn_to_pai(pn);
1863
1864         if (!IS_MANAGED_PAGE(pai)) {
1865                 /*
1866                  *      Not a managed page.
1867                  */
1868                 return 0;
1869         }
1870
1871         /*
1872          * Fast check...  if bits already collected
1873          * no need to take any locks...
1874          * if not set, we need to recheck after taking
1875          * the lock in case they got pulled in while
1876          * we were waiting for the lock
1877          */
1878         if ((pmap_phys_attributes[pai] & bits) == bits)
1879                 return bits;
1880
1881         pv_h = pai_to_pvh(pai);
1882
1883         LOCK_PVH(pai);
1884
1885         attributes = pmap_phys_attributes[pai] & bits;
1886
1887
1888         /*
1889          * Walk down PV list, checking the mappings until we
1890          * reach the end or we've found the desired attributes.
1891          */
1892         if (attributes != bits &&
1893             pv_h->pmap != PMAP_NULL) {
1894                 /*
1895                  * There are some mappings.
1896                  */
1897                 pv_e = (pv_hashed_entry_t)pv_h;
1898                 do {
1899                         vm_map_offset_t va;
1900
1901                         pmap = pv_e->pmap;
1902                         is_ept = is_ept_pmap(pmap);
1903                         va = pv_e->va;
1904                         /*
1905                          * pick up modify and/or reference bits from mapping
1906                          */
1907
1908                         pte = pmap_pte(pmap, va);
1909                         if (!is_ept) {
1910                                 attributes |= (int)(*pte & bits);
1911                         } else {
1912                                 attributes |= (int)(ept_refmod_to_physmap((*pte & (INTEL_EPT_REF | INTEL_EPT_MOD))) & (PHYS_MODIFIED | PHYS_REFERENCED));
1913
1914                         }
1915
1916                         pv_e = (pv_hashed_entry_t)queue_next(&pv_e->qlink);
1917
1918                 } while ((attributes != bits) &&
1919                          (pv_e != (pv_hashed_entry_t)pv_h));
1920         }
1921         pmap_phys_attributes[pai] |= attributes;
1922
1923         UNLOCK_PVH(pai);
1924         return (attributes);
1925 }
1926
1927 /*
1928  *      Routine:        pmap_change_wiring
1929  *      Function:       Change the wiring attribute for a map/virtual-address
1930  *                      pair.
1931  *      In/out conditions:
1932  *                      The mapping must already exist in the pmap.
1933  */
1934 void
1935 pmap_change_wiring(
1936         pmap_t          map,
1937         vm_map_offset_t vaddr,
1938         boolean_t       wired)
1939 {
1940         pt_entry_t      *pte;
1941
1942         PMAP_LOCK(map);
1943
1944         if ((pte = pmap_pte(map, vaddr)) == PT_ENTRY_NULL)
1945                 panic("pmap_change_wiring: pte missing");
1946
1947         if (wired && !iswired(*pte)) {
1948                 /*
1949                  * wiring down mapping
1950                  */
1951                 pmap_ledger_credit(map, task_ledgers.wired_mem, PAGE_SIZE);
1952                 OSAddAtomic(+1,  &map->stats.wired_count);
1953                 pmap_update_pte(pte, 0, PTE_WIRED);
1954         }
1955         else if (!wired && iswired(*pte)) {
1956                 /*
1957                  * unwiring mapping
1958                  */
1959                 assert(map->stats.wired_count >= 1);
1960                 OSAddAtomic(-1,  &map->stats.wired_count);
1961                 pmap_ledger_debit(map, task_ledgers.wired_mem, PAGE_SIZE);
1962                 pmap_update_pte(pte, PTE_WIRED, 0);
1963         }
1964
1965         PMAP_UNLOCK(map);
1966 }
1967
1968 /*
1969  *      "Backdoor" direct map routine for early mappings.
1970  *      Useful for mapping memory outside the range
1971  *      Sets A, D and NC if requested
1972  */
1973
1974 vm_offset_t
1975 pmap_map_bd(
1976         vm_offset_t     virt,
1977         vm_map_offset_t start_addr,
1978         vm_map_offset_t end_addr,
1979         vm_prot_t       prot,
1980         unsigned int    flags)
1981 {
1982         pt_entry_t      template;
1983         pt_entry_t      *pte;
1984         spl_t           spl;
1985         vm_offset_t     base = virt;
1986         template = pa_to_pte(start_addr)
1987                 | INTEL_PTE_REF
1988                 | INTEL_PTE_MOD
1989                 | INTEL_PTE_WIRED
1990                 | INTEL_PTE_VALID;
1991
1992         if ((flags & (VM_MEM_NOT_CACHEABLE | VM_WIMG_USE_DEFAULT)) == VM_MEM_NOT_CACHEABLE) {
1993                 template |= INTEL_PTE_NCACHE;
1994                 if (!(flags & (VM_MEM_GUARDED)))
1995                         template |= INTEL_PTE_PTA;
1996         }
1997
1998 #if    defined(__x86_64__)
1999         if ((prot & VM_PROT_EXECUTE) == 0)
2000                 template |= INTEL_PTE_NX;
2001 #endif
2002
2003         if (prot & VM_PROT_WRITE)
2004                 template |= INTEL_PTE_WRITE;
2005
2006         while (start_addr < end_addr) {
2007                 spl = splhigh();
2008                 pte = pmap_pte(kernel_pmap, (vm_map_offset_t)virt);
2009                 if (pte == PT_ENTRY_NULL) {
2010                         panic("pmap_map_bd: Invalid kernel address\n");
2011                 }
2012                 pmap_store_pte(pte, template);
2013                 splx(spl);
2014                 pte_increment_pa(template);
2015                 virt += PAGE_SIZE;
2016                 start_addr += PAGE_SIZE;
2017         }
2018         flush_tlb_raw();
2019         PMAP_UPDATE_TLBS(kernel_pmap, base, base + end_addr - start_addr);
2020         return(virt);
2021 }
2022
2023 unsigned int
2024 pmap_query_resident(
2025         pmap_t          pmap,
2026         addr64_t        s64,
2027         addr64_t        e64,
2028         unsigned int    *compressed_count_p)
2029 {
2030         pt_entry_t     *pde;
2031         pt_entry_t     *spte, *epte;
2032         addr64_t        l64;
2033         uint64_t        deadline;
2034         unsigned int    result;
2035         boolean_t       is_ept;
2036         unsigned int    compressed_count;
2037
2038         pmap_intr_assert();
2039
2040         if (pmap == PMAP_NULL || pmap == kernel_pmap || s64 == e64) {
2041                 if (compressed_count_p) {
2042                         *compressed_count_p = 0;
2043                 }
2044                 return 0;
2045         }
2046
2047         is_ept = is_ept_pmap(pmap);
2048
2049         PMAP_TRACE(PMAP_CODE(PMAP__QUERY_RESIDENT) | DBG_FUNC_START,
2050                    pmap,
2051                    (uint32_t) (s64 >> 32), s64,
2052                    (uint32_t) (e64 >> 32), e64);
2053
2054         result = 0;
2055         compressed_count = 0;
2056
2057         PMAP_LOCK(pmap);
2058
2059         deadline = rdtsc64() + max_preemption_latency_tsc;
2060
2061         while (s64 < e64) {
2062                 l64 = (s64 + pde_mapped_size) & ~(pde_mapped_size - 1);
2063                 if (l64 > e64)
2064                         l64 = e64;
2065                 pde = pmap_pde(pmap, s64);
2066
2067                 if (pde && (*pde & PTE_VALID_MASK(is_ept))) {
2068                         if (*pde & PTE_PS) {
2069                                 /* superpage: not supported */
2070                         } else {
2071                                 spte = pmap_pte(pmap,
2072                                                 (s64 & ~(pde_mapped_size - 1)));
2073                                 spte = &spte[ptenum(s64)];
2074                                 epte = &spte[intel_btop(l64 - s64)];
2075
2076                                 for (; spte < epte; spte++) {
2077                                         if (pte_to_pa(*spte) != 0) {
2078                                                 result++;
2079                                         } else if (*spte & PTE_COMPRESSED) {
2080                                                 compressed_count++;
2081                                         }
2082                                 }
2083
2084                         }
2085                 }
2086                 s64 = l64;
2087
2088                 if (s64 < e64 && rdtsc64() >= deadline) {
2089                         PMAP_UNLOCK(pmap);
2090                         PMAP_LOCK(pmap);
2091                         deadline = rdtsc64() + max_preemption_latency_tsc;
2092                 }
2093         }
2094
2095         PMAP_UNLOCK(pmap);
2096
2097         PMAP_TRACE(PMAP_CODE(PMAP__QUERY_RESIDENT) | DBG_FUNC_END,
2098                    pmap, 0, 0, 0, 0);
2099
2100         if (compressed_count_p) {
2101                 *compressed_count_p = compressed_count;
2102         }
2103         return result;
2104 }
2105
2106 #if MACH_ASSERT
2107 void
2108 pmap_set_process(
2109         __unused pmap_t pmap,
2110         __unused int pid,
2111         __unused char *procname)
2112 {
2113 }
2114 #endif /* MACH_ASSERT */