2 * Copyright (c) 2000-2012 Apple Inc. All rights reserved.
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
29 #include <mach_assert.h>
32 #include <vm/vm_map.h>
33 #include <vm/vm_kern.h>
34 #include <kern/ledger.h>
35 #include <i386/pmap_internal.h>
37 void pmap_remove_range(
43 void pmap_remove_range_options(
50 void pmap_reusable_range(
57 uint32_t pmap_update_clear_pte_count
;
60 * The Intel platform can nest at the PDE level, so NBPDE (i.e. 2MB) at a time,
61 * on a NBPDE boundary.
64 /* These symbols may be referenced directly by VM */
65 uint64_t pmap_nesting_size_min
= NBPDE
;
66 uint64_t pmap_nesting_size_max
= 0 - (uint64_t)NBPDE
;
69 * kern_return_t pmap_nest(grand, subord, va_start, size)
71 * grand = the pmap that we will nest subord into
72 * subord = the pmap that goes into the grand
73 * va_start = start of range in pmap to be inserted
74 * nstart = start of range in pmap nested pmap
75 * size = Size of nest area (up to 16TB)
77 * Inserts a pmap into another. This is used to implement shared segments.
79 * Note that we depend upon higher level VM locks to insure that things don't change while
80 * we are doing this. For example, VM should not be doing any pmap enters while it is nesting
81 * or do 2 nests at once.
85 * This routine can nest subtrees either at the PDPT level (1GiB) or at the
86 * PDE level (2MiB). We currently disallow disparate offsets for the "subord"
87 * container and the "grand" parent. A minor optimization to consider for the
88 * future: make the "subord" truly a container rather than a full-fledged
89 * pagetable hierarchy which can be unnecessarily sparse (DRK).
93 pmap_nest(pmap_t grand
, pmap_t subord
, addr64_t va_start
, addr64_t nstart
, uint64_t size
)
95 vm_map_offset_t vaddr
, nvaddr
;
96 pd_entry_t
*pde
, *npde
;
100 assert(!is_ept_pmap(grand
));
101 assert(!is_ept_pmap(subord
));
103 if ((size
& (pmap_nesting_size_min
- 1)) ||
104 (va_start
& (pmap_nesting_size_min
- 1)) ||
105 (nstart
& (pmap_nesting_size_min
- 1)) ||
106 ((size
>> 28) > 65536)) { /* Max size we can nest is 16TB */
107 return KERN_INVALID_VALUE
;
111 panic("pmap_nest: size is invalid - %016llX\n", size
);
114 if (va_start
!= nstart
) {
115 panic("pmap_nest: va_start(0x%llx) != nstart(0x%llx)\n", va_start
, nstart
);
118 PMAP_TRACE(PMAP_CODE(PMAP__NEST
) | DBG_FUNC_START
,
119 VM_KERNEL_ADDRHIDE(grand
), VM_KERNEL_ADDRHIDE(subord
),
120 VM_KERNEL_ADDRHIDE(va_start
));
122 nvaddr
= (vm_map_offset_t
)nstart
;
123 num_pde
= size
>> PDESHIFT
;
125 PMAP_LOCK_EXCLUSIVE(subord
);
127 subord
->pm_shared
= TRUE
;
129 for (i
= 0; i
< num_pde
;) {
130 if (((nvaddr
& PDPTMASK
) == 0) && (num_pde
- i
) >= NPDEPG
) {
131 npde
= pmap64_pdpt(subord
, nvaddr
);
133 while (0 == npde
|| ((*npde
& INTEL_PTE_VALID
) == 0)) {
134 PMAP_UNLOCK_EXCLUSIVE(subord
);
135 pmap_expand_pdpt(subord
, nvaddr
, PMAP_EXPAND_OPTIONS_NONE
);
136 PMAP_LOCK_EXCLUSIVE(subord
);
137 npde
= pmap64_pdpt(subord
, nvaddr
);
139 *npde
|= INTEL_PDPTE_NESTED
;
141 i
+= (uint32_t)NPDEPG
;
143 npde
= pmap_pde(subord
, nvaddr
);
145 while (0 == npde
|| ((*npde
& INTEL_PTE_VALID
) == 0)) {
146 PMAP_UNLOCK_EXCLUSIVE(subord
);
147 pmap_expand(subord
, nvaddr
, PMAP_EXPAND_OPTIONS_NONE
);
148 PMAP_LOCK_EXCLUSIVE(subord
);
149 npde
= pmap_pde(subord
, nvaddr
);
156 PMAP_UNLOCK_EXCLUSIVE(subord
);
158 vaddr
= (vm_map_offset_t
)va_start
;
160 PMAP_LOCK_EXCLUSIVE(grand
);
162 for (i
= 0; i
< num_pde
;) {
165 if (((vaddr
& PDPTMASK
) == 0) && ((num_pde
- i
) >= NPDEPG
)) {
166 npde
= pmap64_pdpt(subord
, vaddr
);
168 panic("pmap_nest: no PDPT, subord %p nstart 0x%llx", subord
, vaddr
);
171 pde
= pmap64_pdpt(grand
, vaddr
);
173 PMAP_UNLOCK_EXCLUSIVE(grand
);
174 pmap_expand_pml4(grand
, vaddr
, PMAP_EXPAND_OPTIONS_NONE
);
175 PMAP_LOCK_EXCLUSIVE(grand
);
176 pde
= pmap64_pdpt(grand
, vaddr
);
179 panic("pmap_nest: no PDPT, grand %p vaddr 0x%llx", grand
, vaddr
);
181 pmap_store_pte(pde
, tpde
);
183 i
+= (uint32_t) NPDEPG
;
185 npde
= pmap_pde(subord
, vaddr
);
187 panic("pmap_nest: no npde, subord %p vaddr 0x%llx", subord
, vaddr
);
190 pde
= pmap_pde(grand
, vaddr
);
192 PMAP_UNLOCK_EXCLUSIVE(grand
);
193 pmap_expand_pdpt(grand
, vaddr
, PMAP_EXPAND_OPTIONS_NONE
);
194 PMAP_LOCK_EXCLUSIVE(grand
);
195 pde
= pmap_pde(grand
, vaddr
);
199 panic("pmap_nest: no pde, grand %p vaddr 0x%llx", grand
, vaddr
);
202 pmap_store_pte(pde
, tpde
);
207 PMAP_UNLOCK_EXCLUSIVE(grand
);
209 PMAP_TRACE(PMAP_CODE(PMAP__NEST
) | DBG_FUNC_END
, KERN_SUCCESS
);
215 * kern_return_t pmap_unnest(grand, vaddr)
217 * grand = the pmap that we will un-nest subord from
218 * vaddr = start of range in pmap to be unnested
220 * Removes a pmap from another. This is used to implement shared segments.
224 pmap_unnest(pmap_t grand
, addr64_t vaddr
, uint64_t size
)
229 addr64_t va_start
, va_end
;
230 uint64_t npdpt
= PMAP_INVALID_PDPTNUM
;
232 PMAP_TRACE(PMAP_CODE(PMAP__UNNEST
) | DBG_FUNC_START
,
233 VM_KERNEL_ADDRHIDE(grand
), VM_KERNEL_ADDRHIDE(vaddr
));
235 if ((size
& (pmap_nesting_size_min
- 1)) ||
236 (vaddr
& (pmap_nesting_size_min
- 1))) {
237 panic("pmap_unnest(%p,0x%llx,0x%llx): unaligned...\n",
241 assert(!is_ept_pmap(grand
));
243 /* align everything to PDE boundaries */
244 va_start
= vaddr
& ~(NBPDE
- 1);
245 va_end
= (vaddr
+ size
+ NBPDE
- 1) & ~(NBPDE
- 1);
246 size
= va_end
- va_start
;
248 PMAP_LOCK_EXCLUSIVE(grand
);
250 num_pde
= size
>> PDESHIFT
;
253 for (i
= 0; i
< num_pde
;) {
254 if (pdptnum(grand
, vaddr
) != npdpt
) {
255 npdpt
= pdptnum(grand
, vaddr
);
256 pde
= pmap64_pdpt(grand
, vaddr
);
257 if (pde
&& (*pde
& INTEL_PDPTE_NESTED
)) {
258 pmap_store_pte(pde
, (pd_entry_t
)0);
259 i
+= (uint32_t) NPDEPG
;
264 pde
= pmap_pde(grand
, (vm_map_offset_t
)vaddr
);
266 panic("pmap_unnest: no pde, grand %p vaddr 0x%llx\n", grand
, vaddr
);
268 pmap_store_pte(pde
, (pd_entry_t
)0);
273 PMAP_UPDATE_TLBS(grand
, va_start
, va_end
);
275 PMAP_UNLOCK_EXCLUSIVE(grand
);
277 PMAP_TRACE(PMAP_CODE(PMAP__UNNEST
) | DBG_FUNC_END
, KERN_SUCCESS
);
286 __unused
uint64_t size
,
287 __unused
unsigned int options
)
289 return pmap_unnest(grand
, vaddr
, size
);
292 /* Invoked by the Mach VM to determine the platform specific unnest region */
295 pmap_adjust_unnest_parameters(pmap_t p
, vm_map_offset_t
*s
, vm_map_offset_t
*e
)
298 boolean_t rval
= FALSE
;
300 PMAP_LOCK_EXCLUSIVE(p
);
302 pdpte
= pmap64_pdpt(p
, *s
);
303 if (pdpte
&& (*pdpte
& INTEL_PDPTE_NESTED
)) {
308 pdpte
= pmap64_pdpt(p
, *e
);
309 if (pdpte
&& (*pdpte
& INTEL_PDPTE_NESTED
)) {
310 *e
= ((*e
+ NBPDPT
) & ~(NBPDPT
- 1));
314 PMAP_UNLOCK_EXCLUSIVE(p
);
320 * pmap_find_phys returns the (4K) physical page number containing a
321 * given virtual address in a given pmap.
322 * Note that pmap_pte may return a pde if this virtual address is
323 * mapped by a large page and this is taken into account in order
324 * to return the correct page number in this case.
327 pmap_find_phys(pmap_t pmap
, addr64_t va
)
334 boolean_t is_ept
, locked
= FALSE
;
336 is_ept
= is_ept_pmap(pmap
);
338 if ((pmap
!= kernel_pmap
) && not_in_kdp
) {
339 PMAP_LOCK_EXCLUSIVE(pmap
);
342 mp_disable_preemption();
345 if (!pmap
->ref_count
) {
349 pdep
= pmap_pde(pmap
, va
);
351 if ((pdep
!= PD_ENTRY_NULL
) && ((pde
= *pdep
) & PTE_VALID_MASK(is_ept
))) {
353 ppn
= (ppnum_t
) i386_btop(pte_to_pa(pde
));
354 ppn
+= (ppnum_t
) ptenum(va
);
356 ptp
= pmap_pte(pmap
, va
);
357 if ((PT_ENTRY_NULL
!= ptp
) && (((pte
= *ptp
) & PTE_VALID_MASK(is_ept
)) != 0)) {
358 ppn
= (ppnum_t
) i386_btop(pte_to_pa(pte
));
364 PMAP_UNLOCK_EXCLUSIVE(pmap
);
366 mp_enable_preemption();
373 * Update cache attributes for all extant managed mappings.
374 * Assumes PV for this page is locked, and that the page
375 * is managed. We assume that this physical page may be mapped in
376 * both EPT and normal Intel PTEs, so we convert the attributes
377 * to the corresponding format for each pmap.
379 * We assert that the passed set of attributes is a subset of the
380 * PHYS_CACHEABILITY_MASK.
383 pmap_update_cache_attributes_locked(ppnum_t pn
, unsigned attributes
)
385 pv_rooted_entry_t pv_h
, pv_e
;
386 pv_hashed_entry_t pvh_e
, nexth
;
387 vm_map_offset_t vaddr
;
391 unsigned ept_attributes
;
393 assert(IS_MANAGED_PAGE(pn
));
394 assert(((~PHYS_CACHEABILITY_MASK
) & attributes
) == 0);
396 /* We don't support the PAT bit for EPT PTEs */
397 if (attributes
& INTEL_PTE_NCACHE
) {
398 ept_attributes
= INTEL_EPT_NCACHE
;
400 ept_attributes
= INTEL_EPT_WB
;
403 pv_h
= pai_to_pvh(pn
);
404 /* TODO: translate the PHYS_* bits to PTE bits, while they're
405 * currently identical, they may not remain so
406 * Potential optimization (here and in page_protect),
407 * parallel shootdowns, check for redundant
408 * attribute modifications.
412 * Alter attributes on all mappings
414 if (pv_h
->pmap
!= PMAP_NULL
) {
416 pvh_e
= (pv_hashed_entry_t
)pv_e
;
420 vaddr
= PVE_VA(pv_e
);
421 ptep
= pmap_pte(pmap
, vaddr
);
424 panic("pmap_update_cache_attributes_locked: Missing PTE, pmap: %p, pn: 0x%x vaddr: 0x%llx kernel_pmap: %p", pmap
, pn
, vaddr
, kernel_pmap
);
427 is_ept
= is_ept_pmap(pmap
);
429 nexth
= (pv_hashed_entry_t
)queue_next(&pvh_e
->qlink
);
431 pmap_update_pte(ptep
, PHYS_CACHEABILITY_MASK
, attributes
);
433 pmap_update_pte(ptep
, INTEL_EPT_CACHE_MASK
, ept_attributes
);
435 PMAP_UPDATE_TLBS(pmap
, vaddr
, vaddr
+ PAGE_SIZE
);
437 } while ((pv_e
= (pv_rooted_entry_t
)nexth
) != pv_h
);
442 x86_filter_TLB_coherency_interrupts(boolean_t dofilter
)
444 assert(ml_get_interrupts_enabled() == 0 || get_preemption_level() != 0);
447 CPU_CR3_MARK_INACTIVE();
449 CPU_CR3_MARK_ACTIVE();
451 pmap_update_interrupt();
457 * Insert the given physical page (p) at
458 * the specified virtual address (v) in the
459 * target physical map with the protection requested.
461 * If specified, the page will be wired down, meaning
462 * that the related pte cannot be reclaimed.
464 * NB: This is the only routine which MAY NOT lazy-evaluate
465 * or lose information. That is, this routine must actually
466 * insert this page into the given map NOW.
472 vm_map_offset_t vaddr
,
475 vm_prot_t fault_type
,
479 return pmap_enter_options(pmap
, vaddr
, pn
, prot
, fault_type
, flags
, wired
, PMAP_EXPAND_OPTIONS_NONE
, NULL
);
482 #define PTE_LOCK(EPT) INTEL_PTE_SWLOCK
484 static inline void PTE_LOCK_LOCK(pt_entry_t
*);
485 static inline void PTE_LOCK_UNLOCK(pt_entry_t
*);
488 PTE_LOCK_LOCK(pt_entry_t
*lpte
)
492 while ((pte
= __c11_atomic_load((_Atomic pt_entry_t
*)lpte
, memory_order_relaxed
)) & PTE_LOCK(0)) {
493 __builtin_ia32_pause();
495 if (__c11_atomic_compare_exchange_strong((_Atomic pt_entry_t
*)lpte
, &pte
, pte
| PTE_LOCK(0), memory_order_acquire_smp
, TRUE
)) {
503 PTE_LOCK_UNLOCK(pt_entry_t
*lpte
)
505 __c11_atomic_fetch_and((_Atomic pt_entry_t
*)lpte
, ~PTE_LOCK(0), memory_order_release_smp
);
511 vm_map_offset_t vaddr
,
514 __unused vm_prot_t fault_type
,
517 unsigned int options
,
520 pt_entry_t
*pte
= NULL
;
521 pv_rooted_entry_t pv_h
;
523 pv_hashed_entry_t pvh_e
;
524 pv_hashed_entry_t pvh_new
;
527 pmap_paddr_t pa
= (pmap_paddr_t
) i386_ptob(pn
);
528 boolean_t need_tlbflush
= FALSE
;
531 boolean_t old_pa_locked
;
532 /* 2MiB mappings are confined to x86_64 by VM */
533 boolean_t superpage
= flags
& VM_MEM_SUPERPAGE
;
534 vm_object_t delpage_pm_obj
= NULL
;
535 uint64_t delpage_pde_index
= 0;
537 kern_return_t kr
= KERN_FAILURE
;
539 boolean_t is_altacct
;
540 boolean_t ptelocked
= FALSE
;
544 if (__improbable(pmap
== PMAP_NULL
)) {
545 return KERN_INVALID_ARGUMENT
;
547 if (__improbable(pn
== vm_page_guard_addr
)) {
548 return KERN_INVALID_ARGUMENT
;
551 is_ept
= is_ept_pmap(pmap
);
553 /* N.B. We can be supplied a zero page frame in the NOENTER case, it's an
554 * unused value for that scenario.
556 assert(pn
!= vm_page_fictitious_addr
);
559 PMAP_TRACE(PMAP_CODE(PMAP__ENTER
) | DBG_FUNC_START
,
560 VM_KERNEL_ADDRHIDE(pmap
), VM_KERNEL_ADDRHIDE(vaddr
), pn
,
563 if ((prot
& VM_PROT_EXECUTE
)) {
569 #if DEVELOPMENT || DEBUG
570 if (__improbable(set_NX
&& (!nx_enabled
|| !pmap
->nx_enabled
))) {
574 if (__improbable(set_NX
&& (pmap
== kernel_pmap
) &&
575 ((pmap_disable_kstack_nx
&& (flags
& VM_MEM_STACK
)) ||
576 (pmap_disable_kheap_nx
&& !(flags
& VM_MEM_STACK
))))) {
581 pvh_new
= PV_HASHED_ENTRY_NULL
;
583 pvh_e
= PV_HASHED_ENTRY_NULL
;
585 PMAP_LOCK_SHARED(pmap
);
588 * Expand pmap to include this pte. Assume that
589 * pmap is always expanded to include enough hardware
590 * pages to map one VM page.
592 if (__improbable(superpage
)) {
593 while ((pte
= pmap_pde(pmap
, vaddr
)) == PD_ENTRY_NULL
) {
594 /* need room for another pde entry */
595 PMAP_UNLOCK_SHARED(pmap
);
596 kr
= pmap_expand_pdpt(pmap
, vaddr
, options
);
597 if (kr
!= KERN_SUCCESS
) {
600 PMAP_LOCK_SHARED(pmap
);
603 while ((pte
= pmap_pte(pmap
, vaddr
)) == PT_ENTRY_NULL
) {
605 * Must unlock to expand the pmap
606 * going to grow pde level page(s)
608 PMAP_UNLOCK_SHARED(pmap
);
609 kr
= pmap_expand(pmap
, vaddr
, options
);
610 if (kr
!= KERN_SUCCESS
) {
613 PMAP_LOCK_SHARED(pmap
);
617 if (__improbable(options
& PMAP_EXPAND_OPTIONS_NOENTER
)) {
618 PMAP_UNLOCK_SHARED(pmap
);
623 if (__improbable(superpage
&& *pte
&& !(*pte
& PTE_PS
))) {
625 * There is still an empty page table mapped that
626 * was used for a previous base page mapping.
627 * Remember the PDE and the PDE index, so that we
628 * can free the page at the end of this function.
630 delpage_pde_index
= pdeidx(pmap
, vaddr
);
631 delpage_pm_obj
= pmap
->pm_obj
;
632 pmap_store_pte(pte
, 0);
638 old_pa
= pte_to_pa(*pte
);
639 pai
= pa_index(old_pa
);
640 old_pa_locked
= FALSE
;
643 PTE_IS_COMPRESSED(*pte
, pte
)) {
645 * "pmap" should be locked at this point, so this should
646 * not race with another pmap_enter() or pmap_remove_range().
648 assert(pmap
!= kernel_pmap
);
650 /* one less "compressed" */
651 OSAddAtomic64(-1, &pmap
->stats
.compressed
);
652 pmap_ledger_debit(pmap
, task_ledgers
.internal_compressed
,
654 if (*pte
& PTE_COMPRESSED_ALT
) {
657 task_ledgers
.alternate_accounting_compressed
,
660 /* was part of the footprint */
661 pmap_ledger_debit(pmap
, task_ledgers
.phys_footprint
,
664 /* marker will be cleared below */
668 * if we have a previous managed page, lock the pv entry now. after
669 * we lock it, check to see if someone beat us to the lock and if so
672 if ((0 != old_pa
) && IS_MANAGED_PAGE(pai
)) {
674 old_pa_locked
= TRUE
;
675 old_pa
= pte_to_pa(*pte
);
677 UNLOCK_PVH(pai
); /* another path beat us to it */
678 old_pa_locked
= FALSE
;
683 * Special case if the incoming physical page is already mapped
687 pt_entry_t old_attributes
=
688 *pte
& ~(PTE_REF(is_ept
) | PTE_MOD(is_ept
) | PTE_LOCK(is_ept
));
691 * May be changing its wired attribute or protection
694 template = pa_to_pte(pa
);
696 if (__probable(!is_ept
)) {
697 template |= INTEL_PTE_VALID
;
699 template |= INTEL_EPT_IPAT
;
702 template |= pmap_get_cache_attributes(pa_index(pa
), is_ept
);
705 * We don't support passing VM_MEM_NOT_CACHEABLE flags for EPT PTEs
707 if (!is_ept
&& (VM_MEM_NOT_CACHEABLE
==
708 (flags
& (VM_MEM_NOT_CACHEABLE
| VM_WIMG_USE_DEFAULT
)))) {
709 if (!(flags
& VM_MEM_GUARDED
)) {
710 template |= INTEL_PTE_PAT
;
712 template |= INTEL_PTE_NCACHE
;
714 if (pmap
!= kernel_pmap
&& !is_ept
) {
715 template |= INTEL_PTE_USER
;
718 if (prot
& VM_PROT_READ
) {
719 template |= PTE_READ(is_ept
);
722 if (prot
& VM_PROT_WRITE
) {
723 template |= PTE_WRITE(is_ept
);
724 if (is_ept
&& !pmap_ept_support_ad
) {
725 template |= PTE_MOD(is_ept
);
727 assert(IS_MANAGED_PAGE(pai
));
728 pmap_phys_attributes
[pai
] |= PHYS_MODIFIED
;
732 if (prot
& VM_PROT_EXECUTE
) {
734 template = pte_set_ex(template, is_ept
);
738 template = pte_remove_ex(template, is_ept
);
742 template |= PTE_WIRED
;
743 if (!iswired(old_attributes
)) {
744 OSAddAtomic(+1, &pmap
->stats
.wired_count
);
745 pmap_ledger_credit(pmap
, task_ledgers
.wired_mem
, PAGE_SIZE
);
748 if (iswired(old_attributes
)) {
749 assert(pmap
->stats
.wired_count
>= 1);
750 OSAddAtomic(-1, &pmap
->stats
.wired_count
);
751 pmap_ledger_debit(pmap
, task_ledgers
.wired_mem
, PAGE_SIZE
);
755 if (superpage
) { /* this path can not be used */
756 template |= PTE_PS
; /* to change the page size! */
758 if (old_attributes
== template) {
759 goto dont_update_pte
;
762 /* Determine delta, PV locked */
764 ((old_attributes
^ template) != PTE_WIRED
);
766 /* Optimisation: avoid TLB flush when adding writability */
767 if (need_tlbflush
== TRUE
&& !(old_attributes
& PTE_WRITE(is_ept
))) {
768 if ((old_attributes
^ template) == PTE_WRITE(is_ept
)) {
769 need_tlbflush
= FALSE
;
773 /* For hardware that doesn't have EPT AD support, we always set REFMOD for EPT PTEs */
774 if (__improbable(is_ept
&& !pmap_ept_support_ad
)) {
775 template |= PTE_REF(is_ept
);
777 assert(IS_MANAGED_PAGE(pai
));
778 pmap_phys_attributes
[pai
] |= PHYS_REFERENCED
;
782 /* store modified PTE and preserve RC bits */
783 pt_entry_t npte
, opte
;
785 assert((*pte
& PTE_LOCK(is_ept
)) != 0);
789 npte
= template | (opte
& (PTE_REF(is_ept
) |
790 PTE_MOD(is_ept
))) | PTE_LOCK(is_ept
);
791 } while (!pmap_cmpx_pte(pte
, opte
, npte
));
796 old_pa_locked
= FALSE
;
802 * Outline of code from here:
803 * 1) If va was mapped, update TLBs, remove the mapping
804 * and remove old pvlist entry.
805 * 2) Add pvlist entry for new mapping
806 * 3) Enter new mapping.
808 * If the old physical page is not managed step 1) is skipped
809 * (except for updating the TLBs), and the mapping is
810 * overwritten at step 3). If the new physical page is not
811 * managed, step 2) is skipped.
813 /* TODO: add opportunistic refmod collect */
814 if (old_pa
!= (pmap_paddr_t
) 0) {
815 boolean_t was_altacct
= FALSE
;
818 * Don't do anything to pages outside valid memory here.
819 * Instead convince the code that enters a new mapping
820 * to overwrite the old one.
823 /* invalidate the PTE */
824 pmap_update_pte(pte
, PTE_VALID_MASK(is_ept
), 0);
825 /* propagate invalidate everywhere */
826 PMAP_UPDATE_TLBS(pmap
, vaddr
, vaddr
+ PAGE_SIZE
);
827 /* remember reference and change */
829 oattr
= (char) (old_pte
& (PTE_MOD(is_ept
) | PTE_REF(is_ept
)));
830 /* completely invalidate the PTE */
831 pmap_store_pte(pte
, PTE_LOCK(is_ept
));
833 if (IS_MANAGED_PAGE(pai
)) {
835 * Remove the mapping from the pvlist for
836 * this physical page.
837 * We'll end up with either a rooted pv or a
840 pvh_e
= pmap_pv_remove(pmap
, vaddr
, (ppnum_t
*) &pai
, &old_pte
, &was_altacct
);
843 if (IS_MANAGED_PAGE(pai
)) {
844 pmap_assert(old_pa_locked
== TRUE
);
845 pmap_ledger_debit(pmap
, task_ledgers
.phys_mem
, PAGE_SIZE
);
846 assert(pmap
->stats
.resident_count
>= 1);
847 OSAddAtomic(-1, &pmap
->stats
.resident_count
);
848 if (pmap
!= kernel_pmap
) {
849 /* update pmap stats */
850 if (IS_REUSABLE_PAGE(pai
)) {
852 (pmap
->stats
.reusable
> 0,
854 pmap
->stats
.reusable
));
855 OSAddAtomic(-1, &pmap
->stats
.reusable
);
856 } else if (IS_INTERNAL_PAGE(pai
)) {
858 (pmap
->stats
.internal
> 0,
860 pmap
->stats
.internal
));
861 OSAddAtomic(-1, &pmap
->stats
.internal
);
864 (pmap
->stats
.external
> 0,
866 pmap
->stats
.external
));
867 OSAddAtomic(-1, &pmap
->stats
.external
);
872 assert(IS_INTERNAL_PAGE(pai
));
873 pmap_ledger_debit(pmap
, task_ledgers
.internal
, PAGE_SIZE
);
874 pmap_ledger_debit(pmap
, task_ledgers
.alternate_accounting
, PAGE_SIZE
);
875 } else if (IS_REUSABLE_PAGE(pai
)) {
876 assert(!was_altacct
);
877 assert(IS_INTERNAL_PAGE(pai
));
878 /* was already not in phys_footprint */
879 } else if (IS_INTERNAL_PAGE(pai
)) {
880 assert(!was_altacct
);
881 assert(!IS_REUSABLE_PAGE(pai
));
882 pmap_ledger_debit(pmap
, task_ledgers
.internal
, PAGE_SIZE
);
883 pmap_ledger_debit(pmap
, task_ledgers
.phys_footprint
, PAGE_SIZE
);
885 /* not an internal page */
889 assert(pmap
->stats
.wired_count
>= 1);
890 OSAddAtomic(-1, &pmap
->stats
.wired_count
);
891 pmap_ledger_debit(pmap
, task_ledgers
.wired_mem
,
896 pmap_phys_attributes
[pai
] |= oattr
;
898 pmap_phys_attributes
[pai
] |= ept_refmod_to_physmap(oattr
);
902 * old_pa is not managed.
903 * Do removal part of accounting.
906 if (pmap
!= kernel_pmap
) {
908 assert(pmap
->stats
.device
> 0);
909 OSAddAtomic(-1, &pmap
->stats
.device
);
913 assert(pmap
->stats
.wired_count
>= 1);
914 OSAddAtomic(-1, &pmap
->stats
.wired_count
);
915 pmap_ledger_debit(pmap
, task_ledgers
.wired_mem
, PAGE_SIZE
);
921 * if we had a previously managed paged locked, unlock it now
925 old_pa_locked
= FALSE
;
928 pai
= pa_index(pa
); /* now working with new incoming phys page */
929 if (IS_MANAGED_PAGE(pai
)) {
931 * Step 2) Enter the mapping in the PV list for this
934 pv_h
= pai_to_pvh(pai
);
938 if (pv_h
->pmap
== PMAP_NULL
) {
940 * No mappings yet, use rooted pv
942 pv_h
->va_and_flags
= vaddr
;
944 queue_init(&pv_h
->qlink
);
946 if (options
& PMAP_OPTIONS_INTERNAL
) {
947 pmap_phys_attributes
[pai
] |= PHYS_INTERNAL
;
949 pmap_phys_attributes
[pai
] &= ~PHYS_INTERNAL
;
951 if (options
& PMAP_OPTIONS_REUSABLE
) {
952 pmap_phys_attributes
[pai
] |= PHYS_REUSABLE
;
954 pmap_phys_attributes
[pai
] &= ~PHYS_REUSABLE
;
956 if ((options
& PMAP_OPTIONS_ALT_ACCT
) &&
957 IS_INTERNAL_PAGE(pai
)) {
958 pv_h
->va_and_flags
|= PVE_IS_ALTACCT
;
961 pv_h
->va_and_flags
&= ~PVE_IS_ALTACCT
;
966 * Add new pv_hashed_entry after header.
968 if ((PV_HASHED_ENTRY_NULL
== pvh_e
) && pvh_new
) {
970 pvh_new
= PV_HASHED_ENTRY_NULL
;
971 } else if (PV_HASHED_ENTRY_NULL
== pvh_e
) {
972 PV_HASHED_ALLOC(&pvh_e
);
973 if (PV_HASHED_ENTRY_NULL
== pvh_e
) {
975 * the pv list is empty. if we are on
976 * the kernel pmap we'll use one of
977 * the special private kernel pv_e's,
978 * else, we need to unlock
979 * everything, zalloc a pv_e, and
980 * restart bringing in the pv_e with
983 if (kernel_pmap
== pmap
) {
984 PV_HASHED_KERN_ALLOC(&pvh_e
);
987 PTE_LOCK_UNLOCK(pte
);
988 PMAP_UNLOCK_SHARED(pmap
);
989 pmap_pv_throttle(pmap
);
990 pvh_new
= (pv_hashed_entry_t
) zalloc(pv_hashed_list_zone
);
996 if (PV_HASHED_ENTRY_NULL
== pvh_e
) {
997 panic("Mapping alias chain exhaustion, possibly induced by numerous kernel virtual double mappings");
1000 pvh_e
->va_and_flags
= vaddr
;
1003 if ((options
& PMAP_OPTIONS_ALT_ACCT
) &&
1004 IS_INTERNAL_PAGE(pai
)) {
1005 pvh_e
->va_and_flags
|= PVE_IS_ALTACCT
;
1008 pvh_e
->va_and_flags
&= ~PVE_IS_ALTACCT
;
1011 pv_hash_add(pvh_e
, pv_h
);
1014 * Remember that we used the pvlist entry.
1016 pvh_e
= PV_HASHED_ENTRY_NULL
;
1020 * only count the mapping
1021 * for 'managed memory'
1023 pmap_ledger_credit(pmap
, task_ledgers
.phys_mem
, PAGE_SIZE
);
1024 OSAddAtomic(+1, &pmap
->stats
.resident_count
);
1025 if (pmap
->stats
.resident_count
> pmap
->stats
.resident_max
) {
1026 pmap
->stats
.resident_max
= pmap
->stats
.resident_count
;
1028 if (pmap
!= kernel_pmap
) {
1029 /* update pmap stats */
1030 if (IS_REUSABLE_PAGE(pai
)) {
1031 OSAddAtomic(+1, &pmap
->stats
.reusable
);
1032 PMAP_STATS_PEAK(pmap
->stats
.reusable
);
1033 } else if (IS_INTERNAL_PAGE(pai
)) {
1034 OSAddAtomic(+1, &pmap
->stats
.internal
);
1035 PMAP_STATS_PEAK(pmap
->stats
.internal
);
1037 OSAddAtomic(+1, &pmap
->stats
.external
);
1038 PMAP_STATS_PEAK(pmap
->stats
.external
);
1041 /* update ledgers */
1043 /* internal but also alternate accounting */
1044 assert(IS_INTERNAL_PAGE(pai
));
1045 pmap_ledger_credit(pmap
, task_ledgers
.internal
, PAGE_SIZE
);
1046 pmap_ledger_credit(pmap
, task_ledgers
.alternate_accounting
, PAGE_SIZE
);
1047 /* alternate accounting, so not in footprint */
1048 } else if (IS_REUSABLE_PAGE(pai
)) {
1049 assert(!is_altacct
);
1050 assert(IS_INTERNAL_PAGE(pai
));
1051 /* internal but reusable: not in footprint */
1052 } else if (IS_INTERNAL_PAGE(pai
)) {
1053 assert(!is_altacct
);
1054 assert(!IS_REUSABLE_PAGE(pai
));
1055 /* internal: add to footprint */
1056 pmap_ledger_credit(pmap
, task_ledgers
.internal
, PAGE_SIZE
);
1057 pmap_ledger_credit(pmap
, task_ledgers
.phys_footprint
, PAGE_SIZE
);
1059 /* not internal: not in footprint */
1062 } else if (last_managed_page
== 0) {
1063 /* Account for early mappings created before "managed pages"
1064 * are determined. Consider consulting the available DRAM map.
1066 pmap_ledger_credit(pmap
, task_ledgers
.phys_mem
, PAGE_SIZE
);
1067 OSAddAtomic(+1, &pmap
->stats
.resident_count
);
1068 if (pmap
!= kernel_pmap
) {
1070 OSAddAtomic(+1, &pmap
->stats
.device
);
1071 PMAP_STATS_PEAK(pmap
->stats
.device
);
1076 * Step 3) Enter the mapping.
1078 * Build a template to speed up entering -
1079 * only the pfn changes.
1081 template = pa_to_pte(pa
);
1084 template |= INTEL_PTE_VALID
;
1086 template |= INTEL_EPT_IPAT
;
1091 * DRK: It may be worth asserting on cache attribute flags that diverge
1092 * from the existing physical page attributes.
1095 template |= pmap_get_cache_attributes(pa_index(pa
), is_ept
);
1098 * We don't support passing VM_MEM_NOT_CACHEABLE flags for EPT PTEs
1100 if (!is_ept
&& (flags
& VM_MEM_NOT_CACHEABLE
)) {
1101 if (!(flags
& VM_MEM_GUARDED
)) {
1102 template |= INTEL_PTE_PAT
;
1104 template |= INTEL_PTE_NCACHE
;
1106 if (pmap
!= kernel_pmap
&& !is_ept
) {
1107 template |= INTEL_PTE_USER
;
1109 if (prot
& VM_PROT_READ
) {
1110 template |= PTE_READ(is_ept
);
1112 if (prot
& VM_PROT_WRITE
) {
1113 template |= PTE_WRITE(is_ept
);
1114 if (is_ept
&& !pmap_ept_support_ad
) {
1115 template |= PTE_MOD(is_ept
);
1116 if (IS_MANAGED_PAGE(pai
)) {
1117 pmap_phys_attributes
[pai
] |= PHYS_MODIFIED
;
1121 if (prot
& VM_PROT_EXECUTE
) {
1122 assert(set_NX
== 0);
1123 template = pte_set_ex(template, is_ept
);
1127 template = pte_remove_ex(template, is_ept
);
1130 template |= INTEL_PTE_WIRED
;
1131 OSAddAtomic(+1, &pmap
->stats
.wired_count
);
1132 pmap_ledger_credit(pmap
, task_ledgers
.wired_mem
, PAGE_SIZE
);
1134 if (__improbable(superpage
)) {
1135 template |= INTEL_PTE_PS
;
1138 /* For hardware that doesn't have EPT AD support, we always set REFMOD for EPT PTEs */
1139 if (__improbable(is_ept
&& !pmap_ept_support_ad
)) {
1140 template |= PTE_REF(is_ept
);
1141 if (IS_MANAGED_PAGE(pai
)) {
1142 pmap_phys_attributes
[pai
] |= PHYS_REFERENCED
;
1145 template |= PTE_LOCK(is_ept
);
1146 pmap_store_pte(pte
, template);
1149 * if this was a managed page we delayed unlocking the pv until here
1150 * to prevent pmap_page_protect et al from finding it until the pte
1153 if (IS_MANAGED_PAGE(pai
)) {
1157 if (need_tlbflush
== TRUE
) {
1158 if (options
& PMAP_OPTIONS_NOFLUSH
) {
1159 PMAP_UPDATE_TLBS_DELAYED(pmap
, vaddr
, vaddr
+ PAGE_SIZE
, (pmap_flush_context
*)arg
);
1161 PMAP_UPDATE_TLBS(pmap
, vaddr
, vaddr
+ PAGE_SIZE
);
1165 PTE_LOCK_UNLOCK(pte
);
1167 PMAP_UNLOCK_SHARED(pmap
);
1169 if (pvh_e
!= PV_HASHED_ENTRY_NULL
) {
1170 PV_HASHED_FREE_LIST(pvh_e
, pvh_e
, 1);
1172 if (pvh_new
!= PV_HASHED_ENTRY_NULL
) {
1173 PV_HASHED_KERN_FREE_LIST(pvh_new
, pvh_new
, 1);
1176 if (delpage_pm_obj
) {
1179 vm_object_lock(delpage_pm_obj
);
1180 m
= vm_page_lookup(delpage_pm_obj
, (delpage_pde_index
* PAGE_SIZE
));
1181 if (m
== VM_PAGE_NULL
) {
1182 panic("pmap_enter: pte page not in object");
1185 vm_object_unlock(delpage_pm_obj
);
1186 OSAddAtomic(-1, &inuse_ptepages_count
);
1187 PMAP_ZINFO_PFREE(pmap
, PAGE_SIZE
);
1192 PMAP_TRACE(PMAP_CODE(PMAP__ENTER
) | DBG_FUNC_END
, kr
);
1197 * Remove a range of hardware page-table entries.
1198 * The entries given are the first (inclusive)
1199 * and last (exclusive) entries for the VM pages.
1200 * The virtual address is the va for the first pte.
1202 * The pmap must be locked.
1203 * If the pmap is not the kernel pmap, the range must lie
1204 * entirely within one pte-page. This is NOT checked.
1205 * Assumes that the pte-page exists.
1211 vm_map_offset_t start_vaddr
,
1215 pmap_remove_range_options(pmap
, start_vaddr
, spte
, epte
,
1216 PMAP_OPTIONS_REMOVE
);
1220 pmap_remove_range_options(
1222 vm_map_offset_t start_vaddr
,
1228 pv_hashed_entry_t pvh_et
= PV_HASHED_ENTRY_NULL
;
1229 pv_hashed_entry_t pvh_eh
= PV_HASHED_ENTRY_NULL
;
1230 pv_hashed_entry_t pvh_e
;
1232 int num_removed
, num_unwired
, num_found
, num_invalid
;
1233 int stats_external
, stats_internal
, stats_reusable
;
1234 uint64_t stats_compressed
;
1235 int ledgers_internal
, ledgers_alt_internal
;
1236 uint64_t ledgers_compressed
, ledgers_alt_compressed
;
1239 vm_map_offset_t vaddr
;
1240 boolean_t is_ept
= is_ept_pmap(pmap
);
1241 boolean_t was_altacct
;
1250 stats_compressed
= 0;
1251 ledgers_internal
= 0;
1252 ledgers_compressed
= 0;
1253 ledgers_alt_internal
= 0;
1254 ledgers_alt_compressed
= 0;
1255 /* invalidate the PTEs first to "freeze" them */
1256 for (cpte
= spte
, vaddr
= start_vaddr
;
1258 cpte
++, vaddr
+= PAGE_SIZE_64
) {
1259 pt_entry_t p
= *cpte
;
1263 if ((options
& PMAP_OPTIONS_REMOVE
) &&
1264 (PTE_IS_COMPRESSED(p
, cpte
))) {
1265 assert(pmap
!= kernel_pmap
);
1266 /* one less "compressed"... */
1268 ledgers_compressed
++;
1269 if (p
& PTE_COMPRESSED_ALT
) {
1270 /* ... but it used to be "ALTACCT" */
1271 ledgers_alt_compressed
++;
1273 /* clear marker(s) */
1274 /* XXX probably does not need to be atomic! */
1275 pmap_update_pte(cpte
, INTEL_PTE_COMPRESSED_MASK
, 0);
1287 if (!IS_MANAGED_PAGE(pai
)) {
1289 * Outside range of managed physical memory.
1290 * Just remove the mappings.
1292 pmap_store_pte(cpte
, 0);
1296 if ((p
& PTE_VALID_MASK(is_ept
)) == 0) {
1300 /* invalidate the PTE */
1301 pmap_update_pte(cpte
, PTE_VALID_MASK(is_ept
), 0);
1304 if (num_found
== 0) {
1305 /* nothing was changed: we're done */
1309 /* propagate the invalidates to other CPUs */
1311 PMAP_UPDATE_TLBS(pmap
, start_vaddr
, vaddr
);
1313 for (cpte
= spte
, vaddr
= start_vaddr
;
1315 cpte
++, vaddr
+= PAGE_SIZE_64
) {
1316 pa
= pte_to_pa(*cpte
);
1318 check_pte_for_compressed_marker
:
1320 * This PTE could have been replaced with a
1321 * "compressed" marker after our first "freeze"
1322 * loop above, so check again.
1324 if ((options
& PMAP_OPTIONS_REMOVE
) &&
1325 (PTE_IS_COMPRESSED(*cpte
, cpte
))) {
1326 assert(pmap
!= kernel_pmap
);
1327 /* one less "compressed"... */
1329 ledgers_compressed
++;
1330 if (*cpte
& PTE_COMPRESSED_ALT
) {
1331 /* ... but it used to be "ALTACCT" */
1332 ledgers_alt_compressed
++;
1334 pmap_store_pte(cpte
, 0);
1343 pa
= pte_to_pa(*cpte
);
1346 goto check_pte_for_compressed_marker
;
1350 * Remove the mapping from the pvlist for this physical page.
1352 pvh_e
= pmap_pv_remove(pmap
, vaddr
, (ppnum_t
*) &pai
, cpte
, &was_altacct
);
1355 /* update pmap stats */
1356 if (IS_REUSABLE_PAGE(pai
)) {
1358 } else if (IS_INTERNAL_PAGE(pai
)) {
1363 /* update ledgers */
1365 /* internal and alternate accounting */
1366 assert(IS_INTERNAL_PAGE(pai
));
1368 ledgers_alt_internal
++;
1369 } else if (IS_REUSABLE_PAGE(pai
)) {
1370 /* internal but reusable */
1371 assert(!was_altacct
);
1372 assert(IS_INTERNAL_PAGE(pai
));
1373 } else if (IS_INTERNAL_PAGE(pai
)) {
1375 assert(!was_altacct
);
1376 assert(!IS_REUSABLE_PAGE(pai
));
1383 * Get the modify and reference bits, then
1384 * nuke the entry in the page table
1386 /* remember reference and change */
1388 pmap_phys_attributes
[pai
] |=
1389 *cpte
& (PHYS_MODIFIED
| PHYS_REFERENCED
);
1391 pmap_phys_attributes
[pai
] |=
1392 ept_refmod_to_physmap((*cpte
& (INTEL_EPT_REF
| INTEL_EPT_MOD
))) & (PHYS_MODIFIED
| PHYS_REFERENCED
);
1395 /* completely invalidate the PTE */
1396 pmap_store_pte(cpte
, 0);
1400 if (pvh_e
!= PV_HASHED_ENTRY_NULL
) {
1401 pvh_e
->qlink
.next
= (queue_entry_t
) pvh_eh
;
1404 if (pvh_et
== PV_HASHED_ENTRY_NULL
) {
1411 if (pvh_eh
!= PV_HASHED_ENTRY_NULL
) {
1412 PV_HASHED_FREE_LIST(pvh_eh
, pvh_et
, pvh_cnt
);
1419 if (pmap
->stats
.resident_count
< num_removed
) {
1420 panic("pmap_remove_range: resident_count");
1423 pmap_ledger_debit(pmap
, task_ledgers
.phys_mem
, machine_ptob(num_removed
));
1424 PMAP_STATS_ASSERTF((pmap
->stats
.resident_count
>= num_removed
,
1425 "pmap=%p num_removed=%d stats.resident_count=%d",
1426 pmap
, num_removed
, pmap
->stats
.resident_count
));
1427 OSAddAtomic(-num_removed
, &pmap
->stats
.resident_count
);
1429 if (pmap
!= kernel_pmap
) {
1430 PMAP_STATS_ASSERTF((pmap
->stats
.external
>= stats_external
,
1431 "pmap=%p stats_external=%d stats.external=%d",
1432 pmap
, stats_external
, pmap
->stats
.external
));
1433 PMAP_STATS_ASSERTF((pmap
->stats
.internal
>= stats_internal
,
1434 "pmap=%p stats_internal=%d stats.internal=%d",
1435 pmap
, stats_internal
, pmap
->stats
.internal
));
1436 PMAP_STATS_ASSERTF((pmap
->stats
.reusable
>= stats_reusable
,
1437 "pmap=%p stats_reusable=%d stats.reusable=%d",
1438 pmap
, stats_reusable
, pmap
->stats
.reusable
));
1439 PMAP_STATS_ASSERTF((pmap
->stats
.compressed
>= stats_compressed
,
1440 "pmap=%p stats_compressed=%lld, stats.compressed=%lld",
1441 pmap
, stats_compressed
, pmap
->stats
.compressed
));
1443 /* update pmap stats */
1444 if (stats_external
) {
1445 OSAddAtomic(-stats_external
, &pmap
->stats
.external
);
1447 if (stats_internal
) {
1448 OSAddAtomic(-stats_internal
, &pmap
->stats
.internal
);
1450 if (stats_reusable
) {
1451 OSAddAtomic(-stats_reusable
, &pmap
->stats
.reusable
);
1453 if (stats_compressed
) {
1454 OSAddAtomic64(-stats_compressed
, &pmap
->stats
.compressed
);
1456 /* update ledgers */
1457 if (ledgers_internal
) {
1458 pmap_ledger_debit(pmap
,
1459 task_ledgers
.internal
,
1460 machine_ptob(ledgers_internal
));
1462 if (ledgers_compressed
) {
1463 pmap_ledger_debit(pmap
,
1464 task_ledgers
.internal_compressed
,
1465 machine_ptob(ledgers_compressed
));
1467 if (ledgers_alt_internal
) {
1468 pmap_ledger_debit(pmap
,
1469 task_ledgers
.alternate_accounting
,
1470 machine_ptob(ledgers_alt_internal
));
1472 if (ledgers_alt_compressed
) {
1473 pmap_ledger_debit(pmap
,
1474 task_ledgers
.alternate_accounting_compressed
,
1475 machine_ptob(ledgers_alt_compressed
));
1477 pmap_ledger_debit(pmap
,
1478 task_ledgers
.phys_footprint
,
1479 machine_ptob((ledgers_internal
-
1480 ledgers_alt_internal
) +
1481 (ledgers_compressed
-
1482 ledgers_alt_compressed
)));
1486 if (pmap
->stats
.wired_count
< num_unwired
) {
1487 panic("pmap_remove_range: wired_count");
1490 PMAP_STATS_ASSERTF((pmap
->stats
.wired_count
>= num_unwired
,
1491 "pmap=%p num_unwired=%d stats.wired_count=%d",
1492 pmap
, num_unwired
, pmap
->stats
.wired_count
));
1493 OSAddAtomic(-num_unwired
, &pmap
->stats
.wired_count
);
1494 pmap_ledger_debit(pmap
, task_ledgers
.wired_mem
, machine_ptob(num_unwired
));
1501 * Remove the given range of addresses
1502 * from the specified map.
1504 * It is assumed that the start and end are properly
1505 * rounded to the hardware page size.
1513 pmap_remove_options(map
, s64
, e64
, PMAP_OPTIONS_REMOVE
);
1515 #define PLCHECK_THRESHOLD (8)
1518 pmap_remove_options(
1525 pt_entry_t
*spte
, *epte
;
1527 uint64_t deadline
= 0;
1532 if (map
== PMAP_NULL
|| s64
== e64
) {
1536 is_ept
= is_ept_pmap(map
);
1538 PMAP_TRACE(PMAP_CODE(PMAP__REMOVE
) | DBG_FUNC_START
,
1539 VM_KERNEL_ADDRHIDE(map
), VM_KERNEL_ADDRHIDE(s64
),
1540 VM_KERNEL_ADDRHIDE(e64
));
1542 PMAP_LOCK_EXCLUSIVE(map
);
1543 uint32_t traverse_count
= 0;
1546 pml4_entry_t
*pml4e
= pmap64_pml4(map
, s64
);
1547 if ((pml4e
== NULL
) ||
1548 ((*pml4e
& PTE_VALID_MASK(is_ept
)) == 0)) {
1549 s64
= (s64
+ NBPML4
) & ~(PML4MASK
);
1552 pdpt_entry_t
*pdpte
= pmap64_pdpt(map
, s64
);
1553 if ((pdpte
== NULL
) ||
1554 ((*pdpte
& PTE_VALID_MASK(is_ept
)) == 0)) {
1555 s64
= (s64
+ NBPDPT
) & ~(PDPTMASK
);
1559 l64
= (s64
+ PDE_MAPPED_SIZE
) & ~(PDE_MAPPED_SIZE
- 1);
1565 pde
= pmap_pde(map
, s64
);
1567 if (pde
&& (*pde
& PTE_VALID_MASK(is_ept
))) {
1568 if (*pde
& PTE_PS
) {
1570 * If we're removing a superpage, pmap_remove_range()
1571 * must work on level 2 instead of level 1; and we're
1572 * only passing a single level 2 entry instead of a
1576 epte
= spte
+ 1; /* excluded */
1578 spte
= pmap_pte(map
, (s64
& ~(PDE_MAPPED_SIZE
- 1)));
1579 spte
= &spte
[ptenum(s64
)];
1580 epte
= &spte
[intel_btop(l64
- s64
)];
1582 pmap_remove_range_options(map
, s64
, spte
, epte
,
1587 if ((s64
< e64
) && (traverse_count
++ > PLCHECK_THRESHOLD
)) {
1588 if (deadline
== 0) {
1589 deadline
= rdtsc64() + max_preemption_latency_tsc
;
1591 if (rdtsc64() > deadline
) {
1592 PMAP_UNLOCK_EXCLUSIVE(map
);
1593 __builtin_ia32_pause();
1594 PMAP_LOCK_EXCLUSIVE(map
);
1595 deadline
= rdtsc64() + max_preemption_latency_tsc
;
1601 PMAP_UNLOCK_EXCLUSIVE(map
);
1603 PMAP_TRACE(PMAP_CODE(PMAP__REMOVE
) | DBG_FUNC_END
);
1611 pmap_page_protect_options(pn
, prot
, 0, NULL
);
1615 * Routine: pmap_page_protect_options
1618 * Lower the permission for all mappings to a given
1622 pmap_page_protect_options(
1625 unsigned int options
,
1628 pv_hashed_entry_t pvh_eh
= PV_HASHED_ENTRY_NULL
;
1629 pv_hashed_entry_t pvh_et
= PV_HASHED_ENTRY_NULL
;
1630 pv_hashed_entry_t nexth
;
1632 pv_rooted_entry_t pv_h
;
1633 pv_rooted_entry_t pv_e
;
1634 pv_hashed_entry_t pvh_e
;
1639 pt_entry_t new_pte_value
;
1643 assert(pn
!= vm_page_fictitious_addr
);
1644 if (pn
== vm_page_guard_addr
) {
1648 pai
= ppn_to_pai(pn
);
1650 if (!IS_MANAGED_PAGE(pai
)) {
1652 * Not a managed page.
1657 PMAP_TRACE(PMAP_CODE(PMAP__PAGE_PROTECT
) | DBG_FUNC_START
, pn
, prot
);
1660 * Determine the new protection.
1664 case VM_PROT_READ
| VM_PROT_EXECUTE
:
1668 return; /* nothing to do */
1674 pv_h
= pai_to_pvh(pai
);
1680 * Walk down PV list, if any, changing or removing all mappings.
1682 if (pv_h
->pmap
== PMAP_NULL
) {
1687 pvh_e
= (pv_hashed_entry_t
) pv_e
; /* cheat */
1690 vm_map_offset_t vaddr
;
1692 if ((options
& PMAP_OPTIONS_COMPRESSOR_IFF_MODIFIED
) &&
1693 (pmap_phys_attributes
[pai
] & PHYS_MODIFIED
)) {
1694 /* page was modified, so it will be compressed */
1695 options
&= ~PMAP_OPTIONS_COMPRESSOR_IFF_MODIFIED
;
1696 options
|= PMAP_OPTIONS_COMPRESSOR
;
1700 is_ept
= is_ept_pmap(pmap
);
1701 vaddr
= PVE_VA(pv_e
);
1702 pte
= pmap_pte(pmap
, vaddr
);
1704 pmap_assert2((pa_index(pte_to_pa(*pte
)) == pn
),
1705 "pmap_page_protect: PTE mismatch, pn: 0x%x, pmap: %p, vaddr: 0x%llx, pte: 0x%llx", pn
, pmap
, vaddr
, *pte
);
1708 panic("pmap_page_protect() "
1709 "pmap=%p pn=0x%x vaddr=0x%llx\n",
1712 nexth
= (pv_hashed_entry_t
) queue_next(&pvh_e
->qlink
);
1715 * Remove the mapping if new protection is NONE
1718 /* Remove per-pmap wired count */
1719 if (iswired(*pte
)) {
1720 OSAddAtomic(-1, &pmap
->stats
.wired_count
);
1721 pmap_ledger_debit(pmap
, task_ledgers
.wired_mem
, PAGE_SIZE
);
1724 if (pmap
!= kernel_pmap
&&
1725 (options
& PMAP_OPTIONS_COMPRESSOR
) &&
1726 IS_INTERNAL_PAGE(pai
)) {
1727 assert(!PTE_IS_COMPRESSED(*pte
, pte
));
1728 /* mark this PTE as having been "compressed" */
1729 new_pte_value
= PTE_COMPRESSED
;
1730 if (IS_ALTACCT_PAGE(pai
, pv_e
)) {
1731 new_pte_value
|= PTE_COMPRESSED_ALT
;
1737 if (options
& PMAP_OPTIONS_NOREFMOD
) {
1738 pmap_store_pte(pte
, new_pte_value
);
1740 if (options
& PMAP_OPTIONS_NOFLUSH
) {
1741 PMAP_UPDATE_TLBS_DELAYED(pmap
, vaddr
, vaddr
+ PAGE_SIZE
, (pmap_flush_context
*)arg
);
1743 PMAP_UPDATE_TLBS(pmap
, vaddr
, vaddr
+ PAGE_SIZE
);
1747 * Remove the mapping, collecting dirty bits.
1749 pmap_update_pte(pte
, PTE_VALID_MASK(is_ept
), 0);
1751 PMAP_UPDATE_TLBS(pmap
, vaddr
, vaddr
+ PAGE_SIZE
);
1753 pmap_phys_attributes
[pai
] |=
1754 *pte
& (PHYS_MODIFIED
| PHYS_REFERENCED
);
1756 pmap_phys_attributes
[pai
] |=
1757 ept_refmod_to_physmap((*pte
& (INTEL_EPT_REF
| INTEL_EPT_MOD
))) & (PHYS_MODIFIED
| PHYS_REFERENCED
);
1760 PMAP_OPTIONS_COMPRESSOR_IFF_MODIFIED
) &&
1761 IS_INTERNAL_PAGE(pai
) &&
1762 (pmap_phys_attributes
[pai
] &
1765 * Page is actually "modified" and
1766 * will be compressed. Start
1767 * accounting for it as "compressed".
1769 assert(!(options
& PMAP_OPTIONS_COMPRESSOR
));
1770 options
&= ~PMAP_OPTIONS_COMPRESSOR_IFF_MODIFIED
;
1771 options
|= PMAP_OPTIONS_COMPRESSOR
;
1772 assert(new_pte_value
== 0);
1773 if (pmap
!= kernel_pmap
) {
1774 new_pte_value
= PTE_COMPRESSED
;
1775 if (IS_ALTACCT_PAGE(pai
, pv_e
)) {
1776 new_pte_value
|= PTE_COMPRESSED_ALT
;
1780 pmap_store_pte(pte
, new_pte_value
);
1784 if (pmap
->stats
.resident_count
< 1) {
1785 panic("pmap_page_protect: resident_count");
1788 pmap_ledger_debit(pmap
, task_ledgers
.phys_mem
, PAGE_SIZE
);
1789 assert(pmap
->stats
.resident_count
>= 1);
1790 OSAddAtomic(-1, &pmap
->stats
.resident_count
);
1793 * We only ever compress internal pages.
1795 if (options
& PMAP_OPTIONS_COMPRESSOR
) {
1796 assert(IS_INTERNAL_PAGE(pai
));
1798 if (pmap
!= kernel_pmap
) {
1799 /* update pmap stats */
1800 if (IS_REUSABLE_PAGE(pai
)) {
1801 assert(pmap
->stats
.reusable
> 0);
1802 OSAddAtomic(-1, &pmap
->stats
.reusable
);
1803 } else if (IS_INTERNAL_PAGE(pai
)) {
1804 assert(pmap
->stats
.internal
> 0);
1805 OSAddAtomic(-1, &pmap
->stats
.internal
);
1807 assert(pmap
->stats
.external
> 0);
1808 OSAddAtomic(-1, &pmap
->stats
.external
);
1810 if ((options
& PMAP_OPTIONS_COMPRESSOR
) &&
1811 IS_INTERNAL_PAGE(pai
)) {
1812 /* adjust "compressed" stats */
1813 OSAddAtomic64(+1, &pmap
->stats
.compressed
);
1814 PMAP_STATS_PEAK(pmap
->stats
.compressed
);
1815 pmap
->stats
.compressed_lifetime
++;
1818 /* update ledgers */
1819 if (IS_ALTACCT_PAGE(pai
, pv_e
)) {
1820 assert(IS_INTERNAL_PAGE(pai
));
1821 pmap_ledger_debit(pmap
, task_ledgers
.internal
, PAGE_SIZE
);
1822 pmap_ledger_debit(pmap
, task_ledgers
.alternate_accounting
, PAGE_SIZE
);
1823 if (options
& PMAP_OPTIONS_COMPRESSOR
) {
1824 pmap_ledger_credit(pmap
, task_ledgers
.internal_compressed
, PAGE_SIZE
);
1825 pmap_ledger_credit(pmap
, task_ledgers
.alternate_accounting_compressed
, PAGE_SIZE
);
1827 } else if (IS_REUSABLE_PAGE(pai
)) {
1828 assert(!IS_ALTACCT_PAGE(pai
, pv_e
));
1829 assert(IS_INTERNAL_PAGE(pai
));
1830 if (options
& PMAP_OPTIONS_COMPRESSOR
) {
1831 pmap_ledger_credit(pmap
, task_ledgers
.internal_compressed
, PAGE_SIZE
);
1832 /* was not in footprint, but is now */
1833 pmap_ledger_credit(pmap
, task_ledgers
.phys_footprint
, PAGE_SIZE
);
1835 } else if (IS_INTERNAL_PAGE(pai
)) {
1836 assert(!IS_ALTACCT_PAGE(pai
, pv_e
));
1837 assert(!IS_REUSABLE_PAGE(pai
));
1838 pmap_ledger_debit(pmap
, task_ledgers
.internal
, PAGE_SIZE
);
1840 * Update all stats related to physical
1841 * footprint, which only deals with
1844 if (options
& PMAP_OPTIONS_COMPRESSOR
) {
1846 * This removal is only being
1847 * done so we can send this page
1848 * to the compressor; therefore
1849 * it mustn't affect total task
1852 pmap_ledger_credit(pmap
, task_ledgers
.internal_compressed
, PAGE_SIZE
);
1855 * This internal page isn't
1856 * going to the compressor,
1857 * so adjust stats to keep
1858 * phys_footprint up to date.
1860 pmap_ledger_debit(pmap
, task_ledgers
.phys_footprint
, PAGE_SIZE
);
1866 * Deal with the pv_rooted_entry.
1871 * Fix up head later.
1873 pv_h
->pmap
= PMAP_NULL
;
1876 * Delete this entry.
1878 pv_hash_remove(pvh_e
);
1879 pvh_e
->qlink
.next
= (queue_entry_t
) pvh_eh
;
1882 if (pvh_et
== PV_HASHED_ENTRY_NULL
) {
1889 * Write-protect, after opportunistic refmod collect
1892 pmap_phys_attributes
[pai
] |=
1893 *pte
& (PHYS_MODIFIED
| PHYS_REFERENCED
);
1895 pmap_phys_attributes
[pai
] |=
1896 ept_refmod_to_physmap((*pte
& (INTEL_EPT_REF
| INTEL_EPT_MOD
))) & (PHYS_MODIFIED
| PHYS_REFERENCED
);
1898 pmap_update_pte(pte
, PTE_WRITE(is_ept
), 0);
1900 if (options
& PMAP_OPTIONS_NOFLUSH
) {
1901 PMAP_UPDATE_TLBS_DELAYED(pmap
, vaddr
, vaddr
+ PAGE_SIZE
, (pmap_flush_context
*)arg
);
1903 PMAP_UPDATE_TLBS(pmap
, vaddr
, vaddr
+ PAGE_SIZE
);
1907 } while ((pv_e
= (pv_rooted_entry_t
) nexth
) != pv_h
);
1911 * If pv_head mapping was removed, fix it up.
1913 if (pv_h
->pmap
== PMAP_NULL
) {
1914 pvh_e
= (pv_hashed_entry_t
) queue_next(&pv_h
->qlink
);
1916 if (pvh_e
!= (pv_hashed_entry_t
) pv_h
) {
1917 pv_hash_remove(pvh_e
);
1918 pv_h
->pmap
= pvh_e
->pmap
;
1919 pv_h
->va_and_flags
= pvh_e
->va_and_flags
;
1920 pvh_e
->qlink
.next
= (queue_entry_t
) pvh_eh
;
1923 if (pvh_et
== PV_HASHED_ENTRY_NULL
) {
1929 if (pvh_eh
!= PV_HASHED_ENTRY_NULL
) {
1930 PV_HASHED_FREE_LIST(pvh_eh
, pvh_et
, pvh_cnt
);
1935 PMAP_TRACE(PMAP_CODE(PMAP__PAGE_PROTECT
) | DBG_FUNC_END
);
1940 * Clear specified attribute bits.
1943 phys_attribute_clear(
1946 unsigned int options
,
1949 pv_rooted_entry_t pv_h
;
1950 pv_hashed_entry_t pv_e
;
1951 pt_entry_t
*pte
= NULL
;
1954 char attributes
= 0;
1955 boolean_t is_internal
, is_reusable
, is_altacct
, is_ept
;
1956 int ept_bits_to_clear
;
1957 boolean_t ept_keep_global_mod
= FALSE
;
1959 if ((bits
& PHYS_MODIFIED
) &&
1960 (options
& PMAP_OPTIONS_NOFLUSH
) &&
1962 panic("phys_attribute_clear(0x%x,0x%x,0x%x,%p): "
1963 "should not clear 'modified' without flushing TLBs\n",
1964 pn
, bits
, options
, arg
);
1967 /* We only support converting MOD and REF bits for EPT PTEs in this function */
1968 assert((bits
& ~(PHYS_REFERENCED
| PHYS_MODIFIED
)) == 0);
1970 ept_bits_to_clear
= (unsigned)physmap_refmod_to_ept(bits
& (PHYS_MODIFIED
| PHYS_REFERENCED
));
1973 assert(pn
!= vm_page_fictitious_addr
);
1974 if (pn
== vm_page_guard_addr
) {
1978 pai
= ppn_to_pai(pn
);
1980 if (!IS_MANAGED_PAGE(pai
)) {
1982 * Not a managed page.
1987 PMAP_TRACE(PMAP_CODE(PMAP__ATTRIBUTE_CLEAR
) | DBG_FUNC_START
, pn
, bits
);
1989 pv_h
= pai_to_pvh(pai
);
1995 * Walk down PV list, clearing all modify or reference bits.
1996 * We do not have to lock the pv_list because we have
1999 if (pv_h
->pmap
!= PMAP_NULL
) {
2001 * There are some mappings.
2004 is_internal
= IS_INTERNAL_PAGE(pai
);
2005 is_reusable
= IS_REUSABLE_PAGE(pai
);
2007 pv_e
= (pv_hashed_entry_t
)pv_h
;
2014 is_ept
= is_ept_pmap(pmap
);
2015 is_altacct
= IS_ALTACCT_PAGE(pai
, pv_e
);
2020 pte
= pmap_pte(pmap
, va
);
2021 /* grab ref/mod bits from this PTE */
2022 pte_bits
= (*pte
& (PTE_REF(is_ept
) | PTE_MOD(is_ept
)));
2023 /* propagate to page's global attributes */
2025 attributes
|= pte_bits
;
2027 attributes
|= ept_refmod_to_physmap(pte_bits
);
2028 if (!pmap_ept_support_ad
&& (pte_bits
& INTEL_EPT_MOD
)) {
2029 ept_keep_global_mod
= TRUE
;
2032 /* which bits to clear for this PTE? */
2036 pte_bits
&= ept_bits_to_clear
;
2039 if (options
& PMAP_OPTIONS_CLEAR_WRITE
) {
2040 pte_bits
|= PTE_WRITE(is_ept
);
2044 * Clear modify and/or reference bits.
2047 pmap_update_pte(pte
, pte_bits
, 0);
2049 /* Ensure all processors using this translation
2050 * invalidate this TLB entry. The invalidation
2051 * *must* follow the PTE update, to ensure that
2052 * the TLB shadow of the 'D' bit (in particular)
2053 * is synchronized with the updated PTE.
2055 if (!(options
& PMAP_OPTIONS_NOFLUSH
)) {
2056 /* flush TLBS now */
2057 PMAP_UPDATE_TLBS(pmap
,
2061 /* delayed TLB flush: add "pmap" info */
2062 PMAP_UPDATE_TLBS_DELAYED(
2066 (pmap_flush_context
*)arg
);
2068 /* no TLB flushing at all */
2072 /* update pmap "reusable" stats */
2073 if ((options
& PMAP_OPTIONS_CLEAR_REUSABLE
) &&
2075 pmap
!= kernel_pmap
) {
2076 /* one less "reusable" */
2077 assert(pmap
->stats
.reusable
> 0);
2078 OSAddAtomic(-1, &pmap
->stats
.reusable
);
2080 /* one more "internal" */
2081 OSAddAtomic(+1, &pmap
->stats
.internal
);
2082 PMAP_STATS_PEAK(pmap
->stats
.internal
);
2083 assert(pmap
->stats
.internal
> 0);
2085 /* no impact on ledgers */
2087 pmap_ledger_credit(pmap
,
2088 task_ledgers
.internal
,
2092 task_ledgers
.phys_footprint
,
2096 /* one more "external" */
2097 OSAddAtomic(+1, &pmap
->stats
.external
);
2098 PMAP_STATS_PEAK(pmap
->stats
.external
);
2099 assert(pmap
->stats
.external
> 0);
2101 } else if ((options
& PMAP_OPTIONS_SET_REUSABLE
) &&
2103 pmap
!= kernel_pmap
) {
2104 /* one more "reusable" */
2105 OSAddAtomic(+1, &pmap
->stats
.reusable
);
2106 PMAP_STATS_PEAK(pmap
->stats
.reusable
);
2107 assert(pmap
->stats
.reusable
> 0);
2109 /* one less "internal" */
2110 assert(pmap
->stats
.internal
> 0);
2111 OSAddAtomic(-1, &pmap
->stats
.internal
);
2113 /* no impact on footprint */
2115 pmap_ledger_debit(pmap
,
2116 task_ledgers
.internal
,
2120 task_ledgers
.phys_footprint
,
2124 /* one less "external" */
2125 assert(pmap
->stats
.external
> 0);
2126 OSAddAtomic(-1, &pmap
->stats
.external
);
2130 pv_e
= (pv_hashed_entry_t
)queue_next(&pv_e
->qlink
);
2131 } while (pv_e
!= (pv_hashed_entry_t
)pv_h
);
2133 /* Opportunistic refmod collection, annulled
2134 * if both REF and MOD are being cleared.
2137 pmap_phys_attributes
[pai
] |= attributes
;
2139 if (ept_keep_global_mod
) {
2141 * If the hardware doesn't support AD bits for EPT PTEs and someone is
2142 * requesting that we clear the modified bit for a phys page, we need
2143 * to ensure that there are no EPT mappings for the page with the
2144 * modified bit set. If there are, we cannot clear the global modified bit.
2146 bits
&= ~PHYS_MODIFIED
;
2148 pmap_phys_attributes
[pai
] &= ~(bits
);
2150 /* update this page's "reusable" status */
2151 if (options
& PMAP_OPTIONS_CLEAR_REUSABLE
) {
2152 pmap_phys_attributes
[pai
] &= ~PHYS_REUSABLE
;
2153 } else if (options
& PMAP_OPTIONS_SET_REUSABLE
) {
2154 pmap_phys_attributes
[pai
] |= PHYS_REUSABLE
;
2159 PMAP_TRACE(PMAP_CODE(PMAP__ATTRIBUTE_CLEAR
) | DBG_FUNC_END
);
2163 * Check specified attribute bits.
2166 phys_attribute_test(
2170 pv_rooted_entry_t pv_h
;
2171 pv_hashed_entry_t pv_e
;
2179 assert(pn
!= vm_page_fictitious_addr
);
2180 assert((bits
& ~(PHYS_MODIFIED
| PHYS_REFERENCED
)) == 0);
2181 if (pn
== vm_page_guard_addr
) {
2185 pai
= ppn_to_pai(pn
);
2187 if (!IS_MANAGED_PAGE(pai
)) {
2189 * Not a managed page.
2195 * Fast check... if bits already collected
2196 * no need to take any locks...
2197 * if not set, we need to recheck after taking
2198 * the lock in case they got pulled in while
2199 * we were waiting for the lock
2201 if ((pmap_phys_attributes
[pai
] & bits
) == bits
) {
2205 pv_h
= pai_to_pvh(pai
);
2209 attributes
= pmap_phys_attributes
[pai
] & bits
;
2213 * Walk down PV list, checking the mappings until we
2214 * reach the end or we've found the desired attributes.
2216 if (attributes
!= bits
&&
2217 pv_h
->pmap
!= PMAP_NULL
) {
2219 * There are some mappings.
2221 pv_e
= (pv_hashed_entry_t
)pv_h
;
2226 is_ept
= is_ept_pmap(pmap
);
2229 * pick up modify and/or reference bits from mapping
2232 pte
= pmap_pte(pmap
, va
);
2234 attributes
|= (int)(*pte
& bits
);
2236 attributes
|= (int)(ept_refmod_to_physmap((*pte
& (INTEL_EPT_REF
| INTEL_EPT_MOD
))) & (PHYS_MODIFIED
| PHYS_REFERENCED
));
2239 pv_e
= (pv_hashed_entry_t
)queue_next(&pv_e
->qlink
);
2240 } while ((attributes
!= bits
) &&
2241 (pv_e
!= (pv_hashed_entry_t
)pv_h
));
2243 pmap_phys_attributes
[pai
] |= attributes
;
2250 * Routine: pmap_change_wiring
2251 * Function: Change the wiring attribute for a map/virtual-address
2253 * In/out conditions:
2254 * The mapping must already exist in the pmap.
2259 vm_map_offset_t vaddr
,
2264 PMAP_LOCK_SHARED(map
);
2266 if ((pte
= pmap_pte(map
, vaddr
)) == PT_ENTRY_NULL
) {
2267 panic("pmap_change_wiring(%p,0x%llx,%d): pte missing",
2271 if (wired
&& !iswired(*pte
)) {
2273 * wiring down mapping
2275 pmap_ledger_credit(map
, task_ledgers
.wired_mem
, PAGE_SIZE
);
2276 OSAddAtomic(+1, &map
->stats
.wired_count
);
2277 pmap_update_pte(pte
, 0, PTE_WIRED
);
2278 } else if (!wired
&& iswired(*pte
)) {
2282 assert(map
->stats
.wired_count
>= 1);
2283 OSAddAtomic(-1, &map
->stats
.wired_count
);
2284 pmap_ledger_debit(map
, task_ledgers
.wired_mem
, PAGE_SIZE
);
2285 pmap_update_pte(pte
, PTE_WIRED
, 0);
2288 PMAP_UNLOCK_SHARED(map
);
2292 * "Backdoor" direct map routine for early mappings.
2293 * Useful for mapping memory outside the range
2294 * Sets A, D and NC if requested
2300 vm_map_offset_t start_addr
,
2301 vm_map_offset_t end_addr
,
2305 pt_entry_t
template;
2308 vm_offset_t base
= virt
;
2309 boolean_t doflush
= FALSE
;
2311 template = pa_to_pte(start_addr
)
2317 if ((flags
& (VM_MEM_NOT_CACHEABLE
| VM_WIMG_USE_DEFAULT
)) == VM_MEM_NOT_CACHEABLE
) {
2318 template |= INTEL_PTE_NCACHE
;
2319 if (!(flags
& (VM_MEM_GUARDED
))) {
2320 template |= INTEL_PTE_PAT
;
2324 if ((prot
& VM_PROT_EXECUTE
) == 0) {
2325 template |= INTEL_PTE_NX
;
2328 if (prot
& VM_PROT_WRITE
) {
2329 template |= INTEL_PTE_WRITE
;
2331 vm_map_offset_t caddr
= start_addr
;
2332 while (caddr
< end_addr
) {
2333 ptep
= pmap_pte(kernel_pmap
, (vm_map_offset_t
)virt
);
2334 if (ptep
== PT_ENTRY_NULL
) {
2335 panic("pmap_map_bd: Invalid kernel address");
2337 if (pte_to_pa(*ptep
)) {
2340 pmap_store_pte(ptep
, template);
2341 pte_increment_pa(template);
2346 pmap_tlbi_range(0, ~0ULL, true, 0);
2347 PMAP_UPDATE_TLBS(kernel_pmap
, base
, base
+ end_addr
- start_addr
);
2352 /* Create a virtual alias beginning at 'ava' of the specified kernel virtual
2353 * range. The aliased pagetable range is expanded if
2354 * PMAP_EXPAND_OPTIONS_ALIASMAP is specified. Performs no synchronization,
2355 * assumes caller has stabilized the source and destination ranges. Currently
2356 * used to populate sections of the trampoline "doublemap" at CPU startup.
2362 vm_map_offset_t start_addr
,
2363 vm_map_offset_t end_addr
,
2365 unsigned int eoptions
)
2367 pt_entry_t prot_template
, template;
2368 pt_entry_t
*aptep
, *sptep
;
2370 prot_template
= INTEL_PTE_REF
| INTEL_PTE_MOD
| INTEL_PTE_WIRED
| INTEL_PTE_VALID
;
2371 if ((prot
& VM_PROT_EXECUTE
) == 0) {
2372 prot_template
|= INTEL_PTE_NX
;
2375 if (prot
& VM_PROT_WRITE
) {
2376 prot_template
|= INTEL_PTE_WRITE
;
2378 assert(((start_addr
| end_addr
) & PAGE_MASK
) == 0);
2379 while (start_addr
< end_addr
) {
2380 aptep
= pmap_pte(kernel_pmap
, (vm_map_offset_t
)ava
);
2381 if (aptep
== PT_ENTRY_NULL
) {
2382 if (eoptions
& PMAP_EXPAND_OPTIONS_ALIASMAP
) {
2383 pmap_expand(kernel_pmap
, ava
, PMAP_EXPAND_OPTIONS_ALIASMAP
);
2384 aptep
= pmap_pte(kernel_pmap
, (vm_map_offset_t
)ava
);
2386 panic("pmap_alias: Invalid alias address");
2389 /* The aliased range should not have any active mappings */
2390 assert(pte_to_pa(*aptep
) == 0);
2392 sptep
= pmap_pte(kernel_pmap
, start_addr
);
2393 assert(sptep
!= PT_ENTRY_NULL
&& (pte_to_pa(*sptep
) != 0));
2394 template = pa_to_pte(pte_to_pa(*sptep
)) | prot_template
;
2395 pmap_store_pte(aptep
, template);
2398 start_addr
+= PAGE_SIZE
;
2403 pmap_query_resident(
2407 mach_vm_size_t
*compressed_bytes_p
)
2410 pt_entry_t
*spte
, *epte
;
2412 uint64_t deadline
= 0;
2413 mach_vm_size_t resident_bytes
;
2414 mach_vm_size_t compressed_bytes
;
2419 if (pmap
== PMAP_NULL
|| pmap
== kernel_pmap
|| s64
== e64
) {
2420 if (compressed_bytes_p
) {
2421 *compressed_bytes_p
= 0;
2426 is_ept
= is_ept_pmap(pmap
);
2428 PMAP_TRACE(PMAP_CODE(PMAP__QUERY_RESIDENT
) | DBG_FUNC_START
,
2429 VM_KERNEL_ADDRHIDE(pmap
), VM_KERNEL_ADDRHIDE(s64
),
2430 VM_KERNEL_ADDRHIDE(e64
));
2433 compressed_bytes
= 0;
2435 PMAP_LOCK_EXCLUSIVE(pmap
);
2436 uint32_t traverse_count
= 0;
2439 l64
= (s64
+ PDE_MAPPED_SIZE
) & ~(PDE_MAPPED_SIZE
- 1);
2443 pde
= pmap_pde(pmap
, s64
);
2445 if (pde
&& (*pde
& PTE_VALID_MASK(is_ept
))) {
2446 if (*pde
& PTE_PS
) {
2447 /* superpage: not supported */
2449 spte
= pmap_pte(pmap
,
2450 (s64
& ~(PDE_MAPPED_SIZE
- 1)));
2451 spte
= &spte
[ptenum(s64
)];
2452 epte
= &spte
[intel_btop(l64
- s64
)];
2454 for (; spte
< epte
; spte
++) {
2455 if (pte_to_pa(*spte
) != 0) {
2456 resident_bytes
+= PAGE_SIZE
;
2457 } else if (*spte
& PTE_COMPRESSED
) {
2458 compressed_bytes
+= PAGE_SIZE
;
2465 if ((s64
< e64
) && (traverse_count
++ > PLCHECK_THRESHOLD
)) {
2466 if (deadline
== 0) {
2467 deadline
= rdtsc64() + max_preemption_latency_tsc
;
2469 if (rdtsc64() > deadline
) {
2470 PMAP_UNLOCK_EXCLUSIVE(pmap
);
2471 __builtin_ia32_pause();
2472 PMAP_LOCK_EXCLUSIVE(pmap
);
2473 deadline
= rdtsc64() + max_preemption_latency_tsc
;
2479 PMAP_UNLOCK_EXCLUSIVE(pmap
);
2481 PMAP_TRACE(PMAP_CODE(PMAP__QUERY_RESIDENT
) | DBG_FUNC_END
,
2484 if (compressed_bytes_p
) {
2485 *compressed_bytes_p
= compressed_bytes
;
2487 return resident_bytes
;
2491 pmap_query_page_info(
2504 if (pmap
== PMAP_NULL
|| pmap
== kernel_pmap
) {
2506 return KERN_INVALID_ARGUMENT
;
2510 is_ept
= is_ept_pmap(pmap
);
2512 PMAP_LOCK_EXCLUSIVE(pmap
);
2514 pde
= pmap_pde(pmap
, va
);
2516 !(*pde
& PTE_VALID_MASK(is_ept
)) ||
2521 pte
= pmap_pte(pmap
, va
);
2522 if (pte
== PT_ENTRY_NULL
) {
2526 pa
= pte_to_pa(*pte
);
2528 if (PTE_IS_COMPRESSED(*pte
, pte
)) {
2529 disp
|= PMAP_QUERY_PAGE_COMPRESSED
;
2530 if (*pte
& PTE_COMPRESSED_ALT
) {
2531 disp
|= PMAP_QUERY_PAGE_COMPRESSED_ALTACCT
;
2535 disp
|= PMAP_QUERY_PAGE_PRESENT
;
2537 if (!IS_MANAGED_PAGE(pai
)) {
2538 } else if (pmap_pv_is_altacct(pmap
, va
, pai
)) {
2539 assert(IS_INTERNAL_PAGE(pai
));
2540 disp
|= PMAP_QUERY_PAGE_INTERNAL
;
2541 disp
|= PMAP_QUERY_PAGE_ALTACCT
;
2542 } else if (IS_REUSABLE_PAGE(pai
)) {
2543 disp
|= PMAP_QUERY_PAGE_REUSABLE
;
2544 } else if (IS_INTERNAL_PAGE(pai
)) {
2545 disp
|= PMAP_QUERY_PAGE_INTERNAL
;
2550 PMAP_UNLOCK_EXCLUSIVE(pmap
);
2552 return KERN_SUCCESS
;
2556 pmap_set_jit_entitled(__unused pmap_t pmap
)
2558 /* The x86 pmap layer does not care if a map has a JIT entry. */
2563 pmap_has_prot_policy(__unused vm_prot_t prot
)
2566 * The x86 pmap layer does not apply any policy to any protection
2573 pmap_release_pages_fast(void)
2579 pmap_trim(__unused pmap_t grand
, __unused pmap_t subord
, __unused addr64_t vstart
, __unused addr64_t nstart
, __unused
uint64_t size
)
2585 pmap_ledger_alloc_init(size_t size
)
2587 panic("%s: unsupported, "
2593 pmap_ledger_alloc(void)
2595 panic("%s: unsupported",
2602 pmap_ledger_free(ledger_t ledger
)
2604 panic("%s: unsupported, "
2610 pmap_dump_page_tables(pmap_t pmap __unused
, void *bufp __unused
, void *buf_end __unused
)
2616 pmap_map_compressor_page(ppnum_t pn
)
2618 assertf(IS_MANAGED_PAGE(ppn_to_pai(pn
)), "%s called on non-managed page 0x%08x", __func__
, pn
);
2619 return PHYSMAP_PTOV((uint64_t)pn
<< (uint64_t)PAGE_SHIFT
);
2623 pmap_unmap_compressor_page(ppnum_t pn __unused
, void *kva __unused
)