2 * Copyright (c) 2000-2012 Apple Inc. All rights reserved.
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
29 #include <mach_assert.h>
32 #include <vm/vm_map.h>
33 #include <vm/vm_kern.h>
34 #include <kern/ledger.h>
35 #include <i386/pmap_internal.h>
37 void pmap_remove_range(
43 static void pmap_remove_range_options(
50 void pmap_reusable_range(
57 uint32_t pmap_update_clear_pte_count
;
60 * The Intel platform can nest at the PDE level, so NBPDE (i.e. 2MB) at a time,
61 * on a NBPDE boundary.
65 pmap_shared_region_size_min(__unused pmap_t pmap
)
71 pmap_commpage_size_min(__unused pmap_t pmap
)
77 pmap_nesting_size_max(__unused pmap_t pmap
)
79 return 0llu - (uint64_t)NBPDE
;
83 * kern_return_t pmap_nest(grand, subord, va_start, size)
85 * grand = the pmap that we will nest subord into
86 * subord = the pmap that goes into the grand
87 * va_start = start of range in pmap to be inserted
88 * size = Size of nest area (up to 16TB)
90 * Inserts a pmap into another. This is used to implement shared segments.
92 * Note that we depend upon higher level VM locks to insure that things don't change while
93 * we are doing this. For example, VM should not be doing any pmap enters while it is nesting
94 * or do 2 nests at once.
98 * This routine can nest subtrees either at the PDPT level (1GiB) or at the
99 * PDE level (2MiB). We currently disallow disparate offsets for the "subord"
100 * container and the "grand" parent. A minor optimization to consider for the
101 * future: make the "subord" truly a container rather than a full-fledged
102 * pagetable hierarchy which can be unnecessarily sparse (DRK).
106 pmap_nest(pmap_t grand
, pmap_t subord
, addr64_t va_start
, uint64_t size
)
108 vm_map_offset_t vaddr
;
109 pd_entry_t
*pde
, *npde
;
113 assert(!is_ept_pmap(grand
));
114 assert(!is_ept_pmap(subord
));
116 if ((size
& (pmap_shared_region_size_min(grand
) - 1)) ||
117 (va_start
& (pmap_shared_region_size_min(grand
) - 1)) ||
118 ((size
>> 28) > 65536)) { /* Max size we can nest is 16TB */
119 return KERN_INVALID_VALUE
;
123 panic("pmap_nest: size is invalid - %016llX\n", size
);
126 PMAP_TRACE(PMAP_CODE(PMAP__NEST
) | DBG_FUNC_START
,
127 VM_KERNEL_ADDRHIDE(grand
), VM_KERNEL_ADDRHIDE(subord
),
128 VM_KERNEL_ADDRHIDE(va_start
));
130 vaddr
= (vm_map_offset_t
)va_start
;
131 num_pde
= size
>> PDESHIFT
;
133 PMAP_LOCK_EXCLUSIVE(subord
);
135 subord
->pm_shared
= TRUE
;
137 for (i
= 0; i
< num_pde
;) {
138 if (((vaddr
& PDPTMASK
) == 0) && (num_pde
- i
) >= NPDEPG
) {
139 npde
= pmap64_pdpt(subord
, vaddr
);
141 while (0 == npde
|| ((*npde
& INTEL_PTE_VALID
) == 0)) {
142 PMAP_UNLOCK_EXCLUSIVE(subord
);
143 pmap_expand_pdpt(subord
, vaddr
, PMAP_EXPAND_OPTIONS_NONE
);
144 PMAP_LOCK_EXCLUSIVE(subord
);
145 npde
= pmap64_pdpt(subord
, vaddr
);
147 *npde
|= INTEL_PDPTE_NESTED
;
149 i
+= (uint32_t)NPDEPG
;
151 npde
= pmap_pde(subord
, vaddr
);
153 while (0 == npde
|| ((*npde
& INTEL_PTE_VALID
) == 0)) {
154 PMAP_UNLOCK_EXCLUSIVE(subord
);
155 pmap_expand(subord
, vaddr
, PMAP_EXPAND_OPTIONS_NONE
);
156 PMAP_LOCK_EXCLUSIVE(subord
);
157 npde
= pmap_pde(subord
, vaddr
);
164 PMAP_UNLOCK_EXCLUSIVE(subord
);
166 vaddr
= (vm_map_offset_t
)va_start
;
168 PMAP_LOCK_EXCLUSIVE(grand
);
170 for (i
= 0; i
< num_pde
;) {
173 if (((vaddr
& PDPTMASK
) == 0) && ((num_pde
- i
) >= NPDEPG
)) {
174 npde
= pmap64_pdpt(subord
, vaddr
);
176 panic("pmap_nest: no PDPT, subord %p nstart 0x%llx", subord
, vaddr
);
179 pde
= pmap64_pdpt(grand
, vaddr
);
181 PMAP_UNLOCK_EXCLUSIVE(grand
);
182 pmap_expand_pml4(grand
, vaddr
, PMAP_EXPAND_OPTIONS_NONE
);
183 PMAP_LOCK_EXCLUSIVE(grand
);
184 pde
= pmap64_pdpt(grand
, vaddr
);
187 panic("pmap_nest: no PDPT, grand %p vaddr 0x%llx", grand
, vaddr
);
189 pmap_store_pte(pde
, tpde
);
191 i
+= (uint32_t) NPDEPG
;
193 npde
= pmap_pde(subord
, vaddr
);
195 panic("pmap_nest: no npde, subord %p vaddr 0x%llx", subord
, vaddr
);
198 pde
= pmap_pde(grand
, vaddr
);
200 PMAP_UNLOCK_EXCLUSIVE(grand
);
201 pmap_expand_pdpt(grand
, vaddr
, PMAP_EXPAND_OPTIONS_NONE
);
202 PMAP_LOCK_EXCLUSIVE(grand
);
203 pde
= pmap_pde(grand
, vaddr
);
207 panic("pmap_nest: no pde, grand %p vaddr 0x%llx", grand
, vaddr
);
210 pmap_store_pte(pde
, tpde
);
215 PMAP_UNLOCK_EXCLUSIVE(grand
);
217 PMAP_TRACE(PMAP_CODE(PMAP__NEST
) | DBG_FUNC_END
, KERN_SUCCESS
);
223 * kern_return_t pmap_unnest(grand, vaddr)
225 * grand = the pmap that we will un-nest subord from
226 * vaddr = start of range in pmap to be unnested
228 * Removes a pmap from another. This is used to implement shared segments.
232 pmap_unnest(pmap_t grand
, addr64_t vaddr
, uint64_t size
)
237 addr64_t va_start
, va_end
;
238 uint64_t npdpt
= PMAP_INVALID_PDPTNUM
;
240 PMAP_TRACE(PMAP_CODE(PMAP__UNNEST
) | DBG_FUNC_START
,
241 VM_KERNEL_ADDRHIDE(grand
), VM_KERNEL_ADDRHIDE(vaddr
));
243 if ((size
& (pmap_shared_region_size_min(grand
) - 1)) ||
244 (vaddr
& (pmap_shared_region_size_min(grand
) - 1))) {
245 panic("pmap_unnest(%p,0x%llx,0x%llx): unaligned...\n",
249 assert(!is_ept_pmap(grand
));
251 /* align everything to PDE boundaries */
252 va_start
= vaddr
& ~(NBPDE
- 1);
253 va_end
= (vaddr
+ size
+ NBPDE
- 1) & ~(NBPDE
- 1);
254 size
= va_end
- va_start
;
256 PMAP_LOCK_EXCLUSIVE(grand
);
258 num_pde
= size
>> PDESHIFT
;
261 for (i
= 0; i
< num_pde
;) {
262 if (pdptnum(grand
, vaddr
) != npdpt
) {
263 npdpt
= pdptnum(grand
, vaddr
);
264 pde
= pmap64_pdpt(grand
, vaddr
);
265 if (pde
&& (*pde
& INTEL_PDPTE_NESTED
)) {
266 pmap_store_pte(pde
, (pd_entry_t
)0);
267 i
+= (uint32_t) NPDEPG
;
272 pde
= pmap_pde(grand
, (vm_map_offset_t
)vaddr
);
274 panic("pmap_unnest: no pde, grand %p vaddr 0x%llx\n", grand
, vaddr
);
276 pmap_store_pte(pde
, (pd_entry_t
)0);
281 PMAP_UPDATE_TLBS(grand
, va_start
, va_end
);
283 PMAP_UNLOCK_EXCLUSIVE(grand
);
285 PMAP_TRACE(PMAP_CODE(PMAP__UNNEST
) | DBG_FUNC_END
, KERN_SUCCESS
);
294 __unused
uint64_t size
,
295 __unused
unsigned int options
)
297 return pmap_unnest(grand
, vaddr
, size
);
300 /* Invoked by the Mach VM to determine the platform specific unnest region */
303 pmap_adjust_unnest_parameters(pmap_t p
, vm_map_offset_t
*s
, vm_map_offset_t
*e
)
306 boolean_t rval
= FALSE
;
308 PMAP_LOCK_EXCLUSIVE(p
);
310 pdpte
= pmap64_pdpt(p
, *s
);
311 if (pdpte
&& (*pdpte
& INTEL_PDPTE_NESTED
)) {
316 pdpte
= pmap64_pdpt(p
, *e
);
317 if (pdpte
&& (*pdpte
& INTEL_PDPTE_NESTED
)) {
318 *e
= ((*e
+ NBPDPT
) & ~(NBPDPT
- 1));
322 PMAP_UNLOCK_EXCLUSIVE(p
);
328 pmap_find_pa(pmap_t pmap
, addr64_t va
)
334 boolean_t is_ept
, locked
= FALSE
;
337 is_ept
= is_ept_pmap(pmap
);
339 if ((pmap
!= kernel_pmap
) && not_in_kdp
) {
340 PMAP_LOCK_EXCLUSIVE(pmap
);
343 mp_disable_preemption();
346 if (os_ref_get_count(&pmap
->ref_count
) == 0) {
350 pdep
= pmap_pde(pmap
, va
);
352 if ((pdep
!= PD_ENTRY_NULL
) && ((pde
= *pdep
) & PTE_VALID_MASK(is_ept
))) {
354 pa
= pte_to_pa(pde
) + (va
& I386_LPGMASK
);
356 ptp
= pmap_pte(pmap
, va
);
357 if ((PT_ENTRY_NULL
!= ptp
) && (((pte
= *ptp
) & PTE_VALID_MASK(is_ept
)) != 0)) {
358 pa
= pte_to_pa(pte
) + (va
& PAGE_MASK
);
364 PMAP_UNLOCK_EXCLUSIVE(pmap
);
366 mp_enable_preemption();
373 * pmap_find_phys returns the (4K) physical page number containing a
374 * given virtual address in a given pmap.
375 * Note that pmap_pte may return a pde if this virtual address is
376 * mapped by a large page and this is taken into account in order
377 * to return the correct page number in this case.
380 pmap_find_phys(pmap_t pmap
, addr64_t va
)
385 pa
= pmap_find_pa(pmap
, va
);
386 ppn
= (ppnum_t
) i386_btop(pa
);
392 pmap_find_phys_nofault(pmap_t pmap
, addr64_t va
)
394 if ((pmap
== kernel_pmap
) ||
395 ((current_thread()->map
) && (pmap
== vm_map_pmap(current_thread()->map
)))) {
396 return pmap_find_phys(pmap
, va
);
402 * pmap_get_prot returns the equivalent Vm page protections
403 * set on a given address, 'va'. This function is used in the
404 * ml_static_verify_page_protections() routine which is used
405 * by the kext loading code to validate that the TEXT segment
406 * of a kext is mapped executable.
409 pmap_get_prot(pmap_t pmap
, addr64_t va
, vm_prot_t
*protp
)
415 boolean_t is_ept
, locked
= FALSE
;
416 kern_return_t retval
= KERN_FAILURE
;
419 is_ept
= is_ept_pmap(pmap
);
421 if ((pmap
!= kernel_pmap
) && not_in_kdp
) {
422 PMAP_LOCK_EXCLUSIVE(pmap
);
425 mp_disable_preemption();
428 if (os_ref_get_count(&pmap
->ref_count
) == 0) {
432 pdep
= pmap_pde(pmap
, va
);
434 if ((pdep
!= PD_ENTRY_NULL
) && ((pde
= *pdep
) & PTE_VALID_MASK(is_ept
))) {
438 if (pde
& PTE_WRITE(is_ept
)) {
439 prot
|= VM_PROT_WRITE
;
441 if (PTE_IS_EXECUTABLE(is_ept
, pde
)) {
442 prot
|= VM_PROT_EXECUTE
;
444 retval
= KERN_SUCCESS
;
446 ptp
= pmap_pte(pmap
, va
);
447 if ((PT_ENTRY_NULL
!= ptp
) && (((pte
= *ptp
) & PTE_VALID_MASK(is_ept
)) != 0)) {
450 if (pte
& PTE_WRITE(is_ept
)) {
451 prot
|= VM_PROT_WRITE
;
453 if (PTE_IS_EXECUTABLE(is_ept
, pte
)) {
454 prot
|= VM_PROT_EXECUTE
;
456 retval
= KERN_SUCCESS
;
463 PMAP_UNLOCK_EXCLUSIVE(pmap
);
465 mp_enable_preemption();
476 * Update cache attributes for all extant managed mappings.
477 * Assumes PV for this page is locked, and that the page
478 * is managed. We assume that this physical page may be mapped in
479 * both EPT and normal Intel PTEs, so we convert the attributes
480 * to the corresponding format for each pmap.
482 * We assert that the passed set of attributes is a subset of the
483 * PHYS_CACHEABILITY_MASK.
486 pmap_update_cache_attributes_locked(ppnum_t pn
, unsigned attributes
)
488 pv_rooted_entry_t pv_h
, pv_e
;
489 pv_hashed_entry_t pvh_e
, nexth
;
490 vm_map_offset_t vaddr
;
494 unsigned ept_attributes
;
496 assert(IS_MANAGED_PAGE(pn
));
497 assert(((~PHYS_CACHEABILITY_MASK
) & attributes
) == 0);
499 /* We don't support the PAT bit for EPT PTEs */
500 if (attributes
& INTEL_PTE_NCACHE
) {
501 ept_attributes
= INTEL_EPT_NCACHE
;
503 ept_attributes
= INTEL_EPT_WB
;
506 pv_h
= pai_to_pvh(pn
);
507 /* TODO: translate the PHYS_* bits to PTE bits, while they're
508 * currently identical, they may not remain so
509 * Potential optimization (here and in page_protect),
510 * parallel shootdowns, check for redundant
511 * attribute modifications.
515 * Alter attributes on all mappings
517 if (pv_h
->pmap
!= PMAP_NULL
) {
519 pvh_e
= (pv_hashed_entry_t
)pv_e
;
523 vaddr
= PVE_VA(pv_e
);
524 ptep
= pmap_pte(pmap
, vaddr
);
527 panic("pmap_update_cache_attributes_locked: Missing PTE, pmap: %p, pn: 0x%x vaddr: 0x%llx kernel_pmap: %p", pmap
, pn
, vaddr
, kernel_pmap
);
530 is_ept
= is_ept_pmap(pmap
);
532 nexth
= (pv_hashed_entry_t
)queue_next(&pvh_e
->qlink
);
534 pmap_update_pte(ptep
, PHYS_CACHEABILITY_MASK
, attributes
);
536 pmap_update_pte(ptep
, INTEL_EPT_CACHE_MASK
, ept_attributes
);
538 PMAP_UPDATE_TLBS(pmap
, vaddr
, vaddr
+ PAGE_SIZE
);
540 } while ((pv_e
= (pv_rooted_entry_t
)nexth
) != pv_h
);
545 x86_filter_TLB_coherency_interrupts(boolean_t dofilter
)
547 assert(ml_get_interrupts_enabled() == 0 || get_preemption_level() != 0);
550 CPU_CR3_MARK_INACTIVE();
552 CPU_CR3_MARK_ACTIVE();
554 pmap_update_interrupt();
560 * Insert the given physical page (p) at
561 * the specified virtual address (v) in the
562 * target physical map with the protection requested.
564 * If specified, the page will be wired down, meaning
565 * that the related pte cannot be reclaimed.
567 * NB: This is the only routine which MAY NOT lazy-evaluate
568 * or lose information. That is, this routine must actually
569 * insert this page into the given map NOW.
575 vm_map_offset_t vaddr
,
578 vm_prot_t fault_type
,
582 return pmap_enter_options(pmap
, vaddr
, pn
, prot
, fault_type
, flags
, wired
, PMAP_EXPAND_OPTIONS_NONE
, NULL
);
585 #define PTE_LOCK(EPT) INTEL_PTE_SWLOCK
587 static inline void PTE_LOCK_LOCK(pt_entry_t
*);
588 static inline void PTE_LOCK_UNLOCK(pt_entry_t
*);
591 PTE_LOCK_LOCK(pt_entry_t
*lpte
)
595 while ((pte
= __c11_atomic_load((_Atomic pt_entry_t
*)lpte
, memory_order_relaxed
)) & PTE_LOCK(0)) {
596 __builtin_ia32_pause();
598 if (__c11_atomic_compare_exchange_strong((_Atomic pt_entry_t
*)lpte
, &pte
, pte
| PTE_LOCK(0), memory_order_acquire_smp
, TRUE
)) {
606 PTE_LOCK_UNLOCK(pt_entry_t
*lpte
)
608 __c11_atomic_fetch_and((_Atomic pt_entry_t
*)lpte
, ~PTE_LOCK(0), memory_order_release_smp
);
612 pmap_enter_options_addr(
617 vm_prot_t fault_type
,
620 unsigned int options
,
623 return pmap_enter_options(pmap
, v
, intel_btop(pa
), prot
, fault_type
, flags
, wired
, options
, arg
);
629 vm_map_offset_t vaddr
,
632 __unused vm_prot_t fault_type
,
635 unsigned int options
,
638 pt_entry_t
*pte
= NULL
;
639 pv_rooted_entry_t pv_h
;
641 pv_hashed_entry_t pvh_e
;
642 pv_hashed_entry_t pvh_new
;
645 pmap_paddr_t pa
= (pmap_paddr_t
) i386_ptob(pn
);
646 boolean_t need_tlbflush
= FALSE
;
649 boolean_t old_pa_locked
;
650 /* 2MiB mappings are confined to x86_64 by VM */
651 boolean_t superpage
= flags
& VM_MEM_SUPERPAGE
;
652 vm_object_t delpage_pm_obj
= NULL
;
653 uint64_t delpage_pde_index
= 0;
655 kern_return_t kr
= KERN_FAILURE
;
657 boolean_t is_altacct
;
658 boolean_t ptelocked
= FALSE
;
662 if (__improbable(pmap
== PMAP_NULL
)) {
663 return KERN_INVALID_ARGUMENT
;
665 if (__improbable(pn
== vm_page_guard_addr
)) {
666 return KERN_INVALID_ARGUMENT
;
669 is_ept
= is_ept_pmap(pmap
);
671 /* N.B. We can be supplied a zero page frame in the NOENTER case, it's an
672 * unused value for that scenario.
674 assert(pn
!= vm_page_fictitious_addr
);
677 PMAP_TRACE(PMAP_CODE(PMAP__ENTER
) | DBG_FUNC_START
,
678 VM_KERNEL_ADDRHIDE(pmap
), VM_KERNEL_ADDRHIDE(vaddr
), pn
,
681 if ((prot
& VM_PROT_EXECUTE
)) {
687 #if DEVELOPMENT || DEBUG
688 if (__improbable(set_NX
&& (!nx_enabled
|| !pmap
->nx_enabled
))) {
692 if (__improbable(set_NX
&& (pmap
== kernel_pmap
) &&
693 ((pmap_disable_kstack_nx
&& (flags
& VM_MEM_STACK
)) ||
694 (pmap_disable_kheap_nx
&& !(flags
& VM_MEM_STACK
))))) {
699 pvh_new
= PV_HASHED_ENTRY_NULL
;
701 pvh_e
= PV_HASHED_ENTRY_NULL
;
703 PMAP_LOCK_SHARED(pmap
);
706 * Expand pmap to include this pte. Assume that
707 * pmap is always expanded to include enough hardware
708 * pages to map one VM page.
710 if (__improbable(superpage
)) {
711 while ((pte
= pmap_pde(pmap
, vaddr
)) == PD_ENTRY_NULL
) {
712 /* need room for another pde entry */
713 PMAP_UNLOCK_SHARED(pmap
);
714 kr
= pmap_expand_pdpt(pmap
, vaddr
, options
);
715 if (kr
!= KERN_SUCCESS
) {
718 PMAP_LOCK_SHARED(pmap
);
721 while ((pte
= pmap_pte(pmap
, vaddr
)) == PT_ENTRY_NULL
) {
723 * Must unlock to expand the pmap
724 * going to grow pde level page(s)
726 PMAP_UNLOCK_SHARED(pmap
);
727 kr
= pmap_expand(pmap
, vaddr
, options
);
728 if (kr
!= KERN_SUCCESS
) {
731 PMAP_LOCK_SHARED(pmap
);
735 if (__improbable(options
& PMAP_EXPAND_OPTIONS_NOENTER
)) {
736 PMAP_UNLOCK_SHARED(pmap
);
741 if (__improbable(superpage
&& *pte
&& !(*pte
& PTE_PS
))) {
743 * There is still an empty page table mapped that
744 * was used for a previous base page mapping.
745 * Remember the PDE and the PDE index, so that we
746 * can free the page at the end of this function.
748 delpage_pde_index
= pdeidx(pmap
, vaddr
);
749 delpage_pm_obj
= pmap
->pm_obj
;
750 pmap_store_pte(pte
, 0);
756 old_pa
= pte_to_pa(*pte
);
757 pai
= pa_index(old_pa
);
758 old_pa_locked
= FALSE
;
761 PTE_IS_COMPRESSED(*pte
, pte
, pmap
, vaddr
)) {
763 * "pmap" should be locked at this point, so this should
764 * not race with another pmap_enter() or pmap_remove_range().
766 assert(pmap
!= kernel_pmap
);
768 /* one less "compressed" */
769 OSAddAtomic64(-1, &pmap
->stats
.compressed
);
770 pmap_ledger_debit(pmap
, task_ledgers
.internal_compressed
,
772 if (*pte
& PTE_COMPRESSED_ALT
) {
775 task_ledgers
.alternate_accounting_compressed
,
778 /* was part of the footprint */
779 pmap_ledger_debit(pmap
, task_ledgers
.phys_footprint
,
782 /* marker will be cleared below */
786 * if we have a previous managed page, lock the pv entry now. after
787 * we lock it, check to see if someone beat us to the lock and if so
790 if ((0 != old_pa
) && IS_MANAGED_PAGE(pai
)) {
792 old_pa_locked
= TRUE
;
793 old_pa
= pte_to_pa(*pte
);
795 UNLOCK_PVH(pai
); /* another path beat us to it */
796 old_pa_locked
= FALSE
;
801 * Special case if the incoming physical page is already mapped
805 pt_entry_t old_attributes
=
806 *pte
& ~(PTE_REF(is_ept
) | PTE_MOD(is_ept
) | PTE_LOCK(is_ept
));
809 * May be changing its wired attribute or protection
812 template = pa_to_pte(pa
);
814 if (__probable(!is_ept
)) {
815 template |= INTEL_PTE_VALID
;
817 template |= INTEL_EPT_IPAT
;
820 template |= pmap_get_cache_attributes(pa_index(pa
), is_ept
);
823 * We don't support passing VM_MEM_NOT_CACHEABLE flags for EPT PTEs
825 if (!is_ept
&& (VM_MEM_NOT_CACHEABLE
==
826 (flags
& (VM_MEM_NOT_CACHEABLE
| VM_WIMG_USE_DEFAULT
)))) {
827 if (!(flags
& VM_MEM_GUARDED
)) {
828 template |= INTEL_PTE_PAT
;
830 template |= INTEL_PTE_NCACHE
;
832 if (pmap
!= kernel_pmap
&& !is_ept
) {
833 template |= INTEL_PTE_USER
;
836 if (prot
& VM_PROT_READ
) {
837 template |= PTE_READ(is_ept
);
840 if (prot
& VM_PROT_WRITE
) {
841 template |= PTE_WRITE(is_ept
);
842 if (is_ept
&& !pmap_ept_support_ad
) {
843 template |= PTE_MOD(is_ept
);
845 assert(IS_MANAGED_PAGE(pai
));
846 pmap_phys_attributes
[pai
] |= PHYS_MODIFIED
;
850 if (prot
& VM_PROT_EXECUTE
) {
852 template = pte_set_ex(template, is_ept
);
856 template = pte_remove_ex(template, is_ept
);
860 template |= PTE_WIRED
;
861 if (!iswired(old_attributes
)) {
862 OSAddAtomic(+1, &pmap
->stats
.wired_count
);
863 pmap_ledger_credit(pmap
, task_ledgers
.wired_mem
, PAGE_SIZE
);
866 if (iswired(old_attributes
)) {
867 assert(pmap
->stats
.wired_count
>= 1);
868 OSAddAtomic(-1, &pmap
->stats
.wired_count
);
869 pmap_ledger_debit(pmap
, task_ledgers
.wired_mem
, PAGE_SIZE
);
873 if (superpage
) { /* this path can not be used */
874 template |= PTE_PS
; /* to change the page size! */
876 if (old_attributes
== template) {
877 goto dont_update_pte
;
880 /* Determine delta, PV locked */
882 ((old_attributes
^ template) != PTE_WIRED
);
884 /* Optimisation: avoid TLB flush when adding writability */
885 if (need_tlbflush
== TRUE
&& !(old_attributes
& PTE_WRITE(is_ept
))) {
886 if ((old_attributes
^ template) == PTE_WRITE(is_ept
)) {
887 need_tlbflush
= FALSE
;
891 /* For hardware that doesn't have EPT AD support, we always set REFMOD for EPT PTEs */
892 if (__improbable(is_ept
&& !pmap_ept_support_ad
)) {
893 template |= PTE_REF(is_ept
);
895 assert(IS_MANAGED_PAGE(pai
));
896 pmap_phys_attributes
[pai
] |= PHYS_REFERENCED
;
900 /* store modified PTE and preserve RC bits */
901 pt_entry_t npte
, opte
;
903 assert((*pte
& PTE_LOCK(is_ept
)) != 0);
907 npte
= template | (opte
& (PTE_REF(is_ept
) |
908 PTE_MOD(is_ept
))) | PTE_LOCK(is_ept
);
909 } while (!pmap_cmpx_pte(pte
, opte
, npte
));
914 old_pa_locked
= FALSE
;
920 * Outline of code from here:
921 * 1) If va was mapped, update TLBs, remove the mapping
922 * and remove old pvlist entry.
923 * 2) Add pvlist entry for new mapping
924 * 3) Enter new mapping.
926 * If the old physical page is not managed step 1) is skipped
927 * (except for updating the TLBs), and the mapping is
928 * overwritten at step 3). If the new physical page is not
929 * managed, step 2) is skipped.
931 /* TODO: add opportunistic refmod collect */
932 if (old_pa
!= (pmap_paddr_t
) 0) {
933 boolean_t was_altacct
= FALSE
;
936 * Don't do anything to pages outside valid memory here.
937 * Instead convince the code that enters a new mapping
938 * to overwrite the old one.
941 /* invalidate the PTE */
942 pmap_update_pte(pte
, PTE_VALID_MASK(is_ept
), 0);
943 /* propagate invalidate everywhere */
944 PMAP_UPDATE_TLBS(pmap
, vaddr
, vaddr
+ PAGE_SIZE
);
945 /* remember reference and change */
947 oattr
= (char) (old_pte
& (PTE_MOD(is_ept
) | PTE_REF(is_ept
)));
948 /* completely invalidate the PTE */
949 pmap_store_pte(pte
, PTE_LOCK(is_ept
));
951 if (IS_MANAGED_PAGE(pai
)) {
953 * Remove the mapping from the pvlist for
954 * this physical page.
955 * We'll end up with either a rooted pv or a
958 pvh_e
= pmap_pv_remove(pmap
, vaddr
, (ppnum_t
*) &pai
, &old_pte
, &was_altacct
);
961 if (IS_MANAGED_PAGE(pai
)) {
962 pmap_assert(old_pa_locked
== TRUE
);
963 pmap_ledger_debit(pmap
, task_ledgers
.phys_mem
, PAGE_SIZE
);
964 assert(pmap
->stats
.resident_count
>= 1);
965 OSAddAtomic(-1, &pmap
->stats
.resident_count
);
966 if (pmap
!= kernel_pmap
) {
967 /* update pmap stats */
968 if (IS_REUSABLE_PAGE(pai
)) {
970 (pmap
->stats
.reusable
> 0,
972 pmap
->stats
.reusable
));
973 OSAddAtomic(-1, &pmap
->stats
.reusable
);
974 } else if (IS_INTERNAL_PAGE(pai
)) {
976 (pmap
->stats
.internal
> 0,
978 pmap
->stats
.internal
));
979 OSAddAtomic(-1, &pmap
->stats
.internal
);
982 (pmap
->stats
.external
> 0,
984 pmap
->stats
.external
));
985 OSAddAtomic(-1, &pmap
->stats
.external
);
990 assert(IS_INTERNAL_PAGE(pai
));
991 pmap_ledger_debit(pmap
, task_ledgers
.internal
, PAGE_SIZE
);
992 pmap_ledger_debit(pmap
, task_ledgers
.alternate_accounting
, PAGE_SIZE
);
993 } else if (IS_REUSABLE_PAGE(pai
)) {
994 assert(!was_altacct
);
995 assert(IS_INTERNAL_PAGE(pai
));
996 /* was already not in phys_footprint */
997 } else if (IS_INTERNAL_PAGE(pai
)) {
998 assert(!was_altacct
);
999 assert(!IS_REUSABLE_PAGE(pai
));
1000 pmap_ledger_debit(pmap
, task_ledgers
.internal
, PAGE_SIZE
);
1001 pmap_ledger_debit(pmap
, task_ledgers
.phys_footprint
, PAGE_SIZE
);
1003 /* not an internal page */
1006 if (iswired(*pte
)) {
1007 assert(pmap
->stats
.wired_count
>= 1);
1008 OSAddAtomic(-1, &pmap
->stats
.wired_count
);
1009 pmap_ledger_debit(pmap
, task_ledgers
.wired_mem
,
1014 pmap_phys_attributes
[pai
] |= oattr
;
1016 pmap_phys_attributes
[pai
] |= ept_refmod_to_physmap(oattr
);
1020 * old_pa is not managed.
1021 * Do removal part of accounting.
1024 if (pmap
!= kernel_pmap
) {
1026 assert(pmap
->stats
.device
> 0);
1027 OSAddAtomic(-1, &pmap
->stats
.device
);
1030 if (iswired(*pte
)) {
1031 assert(pmap
->stats
.wired_count
>= 1);
1032 OSAddAtomic(-1, &pmap
->stats
.wired_count
);
1033 pmap_ledger_debit(pmap
, task_ledgers
.wired_mem
, PAGE_SIZE
);
1039 * if we had a previously managed paged locked, unlock it now
1041 if (old_pa_locked
) {
1043 old_pa_locked
= FALSE
;
1046 pai
= pa_index(pa
); /* now working with new incoming phys page */
1047 if (IS_MANAGED_PAGE(pai
)) {
1049 * Step 2) Enter the mapping in the PV list for this
1052 pv_h
= pai_to_pvh(pai
);
1056 if (pv_h
->pmap
== PMAP_NULL
) {
1058 * No mappings yet, use rooted pv
1060 pv_h
->va_and_flags
= vaddr
;
1062 queue_init(&pv_h
->qlink
);
1064 if (options
& PMAP_OPTIONS_INTERNAL
) {
1065 pmap_phys_attributes
[pai
] |= PHYS_INTERNAL
;
1067 pmap_phys_attributes
[pai
] &= ~PHYS_INTERNAL
;
1069 if (options
& PMAP_OPTIONS_REUSABLE
) {
1070 pmap_phys_attributes
[pai
] |= PHYS_REUSABLE
;
1072 pmap_phys_attributes
[pai
] &= ~PHYS_REUSABLE
;
1074 if ((options
& PMAP_OPTIONS_ALT_ACCT
) &&
1075 IS_INTERNAL_PAGE(pai
)) {
1076 pv_h
->va_and_flags
|= PVE_IS_ALTACCT
;
1079 pv_h
->va_and_flags
&= ~PVE_IS_ALTACCT
;
1084 * Add new pv_hashed_entry after header.
1086 if ((PV_HASHED_ENTRY_NULL
== pvh_e
) && pvh_new
) {
1088 pvh_new
= PV_HASHED_ENTRY_NULL
;
1089 } else if (PV_HASHED_ENTRY_NULL
== pvh_e
) {
1090 PV_HASHED_ALLOC(&pvh_e
);
1091 if (PV_HASHED_ENTRY_NULL
== pvh_e
) {
1093 * the pv list is empty. if we are on
1094 * the kernel pmap we'll use one of
1095 * the special private kernel pv_e's,
1096 * else, we need to unlock
1097 * everything, zalloc a pv_e, and
1098 * restart bringing in the pv_e with
1101 if (kernel_pmap
== pmap
) {
1102 PV_HASHED_KERN_ALLOC(&pvh_e
);
1105 PTE_LOCK_UNLOCK(pte
);
1106 PMAP_UNLOCK_SHARED(pmap
);
1107 pmap_pv_throttle(pmap
);
1108 pvh_new
= (pv_hashed_entry_t
) zalloc(pv_hashed_list_zone
);
1114 if (PV_HASHED_ENTRY_NULL
== pvh_e
) {
1115 panic("Mapping alias chain exhaustion, possibly induced by numerous kernel virtual double mappings");
1118 pvh_e
->va_and_flags
= vaddr
;
1121 if ((options
& PMAP_OPTIONS_ALT_ACCT
) &&
1122 IS_INTERNAL_PAGE(pai
)) {
1123 pvh_e
->va_and_flags
|= PVE_IS_ALTACCT
;
1126 pvh_e
->va_and_flags
&= ~PVE_IS_ALTACCT
;
1129 pv_hash_add(pvh_e
, pv_h
);
1132 * Remember that we used the pvlist entry.
1134 pvh_e
= PV_HASHED_ENTRY_NULL
;
1138 * only count the mapping
1139 * for 'managed memory'
1141 pmap_ledger_credit(pmap
, task_ledgers
.phys_mem
, PAGE_SIZE
);
1142 OSAddAtomic(+1, &pmap
->stats
.resident_count
);
1143 if (pmap
->stats
.resident_count
> pmap
->stats
.resident_max
) {
1144 pmap
->stats
.resident_max
= pmap
->stats
.resident_count
;
1146 if (pmap
!= kernel_pmap
) {
1147 /* update pmap stats */
1148 if (IS_REUSABLE_PAGE(pai
)) {
1149 OSAddAtomic(+1, &pmap
->stats
.reusable
);
1150 PMAP_STATS_PEAK(pmap
->stats
.reusable
);
1151 } else if (IS_INTERNAL_PAGE(pai
)) {
1152 OSAddAtomic(+1, &pmap
->stats
.internal
);
1153 PMAP_STATS_PEAK(pmap
->stats
.internal
);
1155 OSAddAtomic(+1, &pmap
->stats
.external
);
1156 PMAP_STATS_PEAK(pmap
->stats
.external
);
1159 /* update ledgers */
1161 /* internal but also alternate accounting */
1162 assert(IS_INTERNAL_PAGE(pai
));
1163 pmap_ledger_credit(pmap
, task_ledgers
.internal
, PAGE_SIZE
);
1164 pmap_ledger_credit(pmap
, task_ledgers
.alternate_accounting
, PAGE_SIZE
);
1165 /* alternate accounting, so not in footprint */
1166 } else if (IS_REUSABLE_PAGE(pai
)) {
1167 assert(!is_altacct
);
1168 assert(IS_INTERNAL_PAGE(pai
));
1169 /* internal but reusable: not in footprint */
1170 } else if (IS_INTERNAL_PAGE(pai
)) {
1171 assert(!is_altacct
);
1172 assert(!IS_REUSABLE_PAGE(pai
));
1173 /* internal: add to footprint */
1174 pmap_ledger_credit(pmap
, task_ledgers
.internal
, PAGE_SIZE
);
1175 pmap_ledger_credit(pmap
, task_ledgers
.phys_footprint
, PAGE_SIZE
);
1177 /* not internal: not in footprint */
1180 } else if (last_managed_page
== 0) {
1181 /* Account for early mappings created before "managed pages"
1182 * are determined. Consider consulting the available DRAM map.
1184 pmap_ledger_credit(pmap
, task_ledgers
.phys_mem
, PAGE_SIZE
);
1185 OSAddAtomic(+1, &pmap
->stats
.resident_count
);
1186 if (pmap
!= kernel_pmap
) {
1188 OSAddAtomic(+1, &pmap
->stats
.device
);
1189 PMAP_STATS_PEAK(pmap
->stats
.device
);
1194 * Step 3) Enter the mapping.
1196 * Build a template to speed up entering -
1197 * only the pfn changes.
1199 template = pa_to_pte(pa
);
1202 template |= INTEL_PTE_VALID
;
1204 template |= INTEL_EPT_IPAT
;
1209 * DRK: It may be worth asserting on cache attribute flags that diverge
1210 * from the existing physical page attributes.
1213 template |= pmap_get_cache_attributes(pa_index(pa
), is_ept
);
1216 * We don't support passing VM_MEM_NOT_CACHEABLE flags for EPT PTEs
1218 if (!is_ept
&& (flags
& VM_MEM_NOT_CACHEABLE
)) {
1219 if (!(flags
& VM_MEM_GUARDED
)) {
1220 template |= INTEL_PTE_PAT
;
1222 template |= INTEL_PTE_NCACHE
;
1224 if (pmap
!= kernel_pmap
&& !is_ept
) {
1225 template |= INTEL_PTE_USER
;
1227 if (prot
& VM_PROT_READ
) {
1228 template |= PTE_READ(is_ept
);
1230 if (prot
& VM_PROT_WRITE
) {
1231 template |= PTE_WRITE(is_ept
);
1232 if (is_ept
&& !pmap_ept_support_ad
) {
1233 template |= PTE_MOD(is_ept
);
1234 if (IS_MANAGED_PAGE(pai
)) {
1235 pmap_phys_attributes
[pai
] |= PHYS_MODIFIED
;
1239 if (prot
& VM_PROT_EXECUTE
) {
1240 assert(set_NX
== 0);
1241 template = pte_set_ex(template, is_ept
);
1245 template = pte_remove_ex(template, is_ept
);
1248 template |= INTEL_PTE_WIRED
;
1249 OSAddAtomic(+1, &pmap
->stats
.wired_count
);
1250 pmap_ledger_credit(pmap
, task_ledgers
.wired_mem
, PAGE_SIZE
);
1252 if (__improbable(superpage
)) {
1253 template |= INTEL_PTE_PS
;
1256 /* For hardware that doesn't have EPT AD support, we always set REFMOD for EPT PTEs */
1257 if (__improbable(is_ept
&& !pmap_ept_support_ad
)) {
1258 template |= PTE_REF(is_ept
);
1259 if (IS_MANAGED_PAGE(pai
)) {
1260 pmap_phys_attributes
[pai
] |= PHYS_REFERENCED
;
1263 template |= PTE_LOCK(is_ept
);
1264 pmap_store_pte(pte
, template);
1267 * if this was a managed page we delayed unlocking the pv until here
1268 * to prevent pmap_page_protect et al from finding it until the pte
1271 if (IS_MANAGED_PAGE(pai
)) {
1275 if (need_tlbflush
== TRUE
) {
1276 if (options
& PMAP_OPTIONS_NOFLUSH
) {
1277 PMAP_UPDATE_TLBS_DELAYED(pmap
, vaddr
, vaddr
+ PAGE_SIZE
, (pmap_flush_context
*)arg
);
1279 PMAP_UPDATE_TLBS(pmap
, vaddr
, vaddr
+ PAGE_SIZE
);
1283 PTE_LOCK_UNLOCK(pte
);
1285 PMAP_UNLOCK_SHARED(pmap
);
1287 if (pvh_e
!= PV_HASHED_ENTRY_NULL
) {
1288 PV_HASHED_FREE_LIST(pvh_e
, pvh_e
, 1);
1290 if (pvh_new
!= PV_HASHED_ENTRY_NULL
) {
1291 PV_HASHED_KERN_FREE_LIST(pvh_new
, pvh_new
, 1);
1294 if (delpage_pm_obj
) {
1297 vm_object_lock(delpage_pm_obj
);
1298 m
= vm_page_lookup(delpage_pm_obj
, (delpage_pde_index
* PAGE_SIZE
));
1299 if (m
== VM_PAGE_NULL
) {
1300 panic("pmap_enter: pte page not in object");
1303 vm_object_unlock(delpage_pm_obj
);
1304 OSAddAtomic(-1, &inuse_ptepages_count
);
1305 PMAP_ZINFO_PFREE(pmap
, PAGE_SIZE
);
1310 PMAP_TRACE(PMAP_CODE(PMAP__ENTER
) | DBG_FUNC_END
, kr
);
1315 * Remove a range of hardware page-table entries.
1316 * The entries given are the first (inclusive)
1317 * and last (exclusive) entries for the VM pages.
1318 * The virtual address is the va for the first pte.
1320 * The pmap must be locked.
1321 * If the pmap is not the kernel pmap, the range must lie
1322 * entirely within one pte-page. This is NOT checked.
1323 * Assumes that the pte-page exists.
1329 vm_map_offset_t start_vaddr
,
1333 pmap_remove_range_options(pmap
, start_vaddr
, spte
, epte
,
1334 PMAP_OPTIONS_REMOVE
);
1338 pmap_remove_range_options(
1340 vm_map_offset_t start_vaddr
,
1346 pv_hashed_entry_t pvh_et
= PV_HASHED_ENTRY_NULL
;
1347 pv_hashed_entry_t pvh_eh
= PV_HASHED_ENTRY_NULL
;
1348 pv_hashed_entry_t pvh_e
;
1350 int num_removed
, num_unwired
, num_found
, num_invalid
;
1351 int stats_external
, stats_internal
, stats_reusable
;
1352 uint64_t stats_compressed
;
1353 int ledgers_internal
, ledgers_alt_internal
;
1354 uint64_t ledgers_compressed
, ledgers_alt_compressed
;
1357 vm_map_offset_t vaddr
;
1358 boolean_t is_ept
= is_ept_pmap(pmap
);
1359 boolean_t was_altacct
;
1368 stats_compressed
= 0;
1369 ledgers_internal
= 0;
1370 ledgers_compressed
= 0;
1371 ledgers_alt_internal
= 0;
1372 ledgers_alt_compressed
= 0;
1374 /* invalidate the PTEs first to "freeze" them */
1375 for (cpte
= spte
, vaddr
= start_vaddr
;
1377 cpte
++, vaddr
+= PAGE_SIZE_64
) {
1378 pt_entry_t p
= *cpte
;
1382 if ((options
& PMAP_OPTIONS_REMOVE
) &&
1383 (PTE_IS_COMPRESSED(p
, cpte
, pmap
, vaddr
))) {
1384 assert(pmap
!= kernel_pmap
);
1385 /* one less "compressed"... */
1387 ledgers_compressed
++;
1388 if (p
& PTE_COMPRESSED_ALT
) {
1389 /* ... but it used to be "ALTACCT" */
1390 ledgers_alt_compressed
++;
1392 /* clear marker(s) */
1393 /* XXX probably does not need to be atomic! */
1394 pmap_update_pte(cpte
, INTEL_PTE_COMPRESSED_MASK
, 0);
1406 if (!IS_MANAGED_PAGE(pai
)) {
1408 * Outside range of managed physical memory.
1409 * Just remove the mappings.
1411 pmap_store_pte(cpte
, 0);
1415 if ((p
& PTE_VALID_MASK(is_ept
)) == 0) {
1419 /* invalidate the PTE */
1420 pmap_update_pte(cpte
, PTE_VALID_MASK(is_ept
), 0);
1423 if (num_found
== 0) {
1424 /* nothing was changed: we're done */
1428 /* propagate the invalidates to other CPUs */
1430 PMAP_UPDATE_TLBS(pmap
, start_vaddr
, vaddr
);
1432 for (cpte
= spte
, vaddr
= start_vaddr
;
1434 cpte
++, vaddr
+= PAGE_SIZE_64
) {
1435 pa
= pte_to_pa(*cpte
);
1437 check_pte_for_compressed_marker
:
1439 * This PTE could have been replaced with a
1440 * "compressed" marker after our first "freeze"
1441 * loop above, so check again.
1443 if ((options
& PMAP_OPTIONS_REMOVE
) &&
1444 (PTE_IS_COMPRESSED(*cpte
, cpte
, pmap
, vaddr
))) {
1445 assert(pmap
!= kernel_pmap
);
1446 /* one less "compressed"... */
1448 ledgers_compressed
++;
1449 if (*cpte
& PTE_COMPRESSED_ALT
) {
1450 /* ... but it used to be "ALTACCT" */
1451 ledgers_alt_compressed
++;
1453 pmap_store_pte(cpte
, 0);
1462 pa
= pte_to_pa(*cpte
);
1465 goto check_pte_for_compressed_marker
;
1469 * Remove the mapping from the pvlist for this physical page.
1471 pvh_e
= pmap_pv_remove(pmap
, vaddr
, (ppnum_t
*) &pai
, cpte
, &was_altacct
);
1474 /* update pmap stats */
1475 if (IS_REUSABLE_PAGE(pai
)) {
1477 } else if (IS_INTERNAL_PAGE(pai
)) {
1482 /* update ledgers */
1484 /* internal and alternate accounting */
1485 assert(IS_INTERNAL_PAGE(pai
));
1487 ledgers_alt_internal
++;
1488 } else if (IS_REUSABLE_PAGE(pai
)) {
1489 /* internal but reusable */
1490 assert(!was_altacct
);
1491 assert(IS_INTERNAL_PAGE(pai
));
1492 } else if (IS_INTERNAL_PAGE(pai
)) {
1494 assert(!was_altacct
);
1495 assert(!IS_REUSABLE_PAGE(pai
));
1502 * Get the modify and reference bits, then
1503 * nuke the entry in the page table
1505 /* remember reference and change */
1507 pmap_phys_attributes
[pai
] |=
1508 *cpte
& (PHYS_MODIFIED
| PHYS_REFERENCED
);
1510 pmap_phys_attributes
[pai
] |=
1511 ept_refmod_to_physmap((*cpte
& (INTEL_EPT_REF
| INTEL_EPT_MOD
))) & (PHYS_MODIFIED
| PHYS_REFERENCED
);
1514 /* completely invalidate the PTE */
1515 pmap_store_pte(cpte
, 0);
1519 if (pvh_e
!= PV_HASHED_ENTRY_NULL
) {
1520 pvh_e
->qlink
.next
= (queue_entry_t
) pvh_eh
;
1523 if (pvh_et
== PV_HASHED_ENTRY_NULL
) {
1528 /* We can encounter at most 'num_found' PTEs for this level
1529 * Fewer may be encountered if some were replaced by
1530 * compressed markers. No new valid PTEs can be created
1531 * since the pmap lock is held exclusively.
1533 if (num_removed
== num_found
) {
1538 if (pvh_eh
!= PV_HASHED_ENTRY_NULL
) {
1539 PV_HASHED_FREE_LIST(pvh_eh
, pvh_et
, pvh_cnt
);
1546 if (pmap
->stats
.resident_count
< num_removed
) {
1547 panic("pmap_remove_range: resident_count");
1551 pmap_ledger_debit(pmap
, task_ledgers
.phys_mem
, machine_ptob(num_removed
));
1552 PMAP_STATS_ASSERTF((pmap
->stats
.resident_count
>= num_removed
,
1553 "pmap=%p num_removed=%d stats.resident_count=%d",
1554 pmap
, num_removed
, pmap
->stats
.resident_count
));
1555 OSAddAtomic(-num_removed
, &pmap
->stats
.resident_count
);
1558 if (pmap
!= kernel_pmap
) {
1559 PMAP_STATS_ASSERTF((pmap
->stats
.external
>= stats_external
,
1560 "pmap=%p stats_external=%d stats.external=%d",
1561 pmap
, stats_external
, pmap
->stats
.external
));
1562 PMAP_STATS_ASSERTF((pmap
->stats
.internal
>= stats_internal
,
1563 "pmap=%p stats_internal=%d stats.internal=%d",
1564 pmap
, stats_internal
, pmap
->stats
.internal
));
1565 PMAP_STATS_ASSERTF((pmap
->stats
.reusable
>= stats_reusable
,
1566 "pmap=%p stats_reusable=%d stats.reusable=%d",
1567 pmap
, stats_reusable
, pmap
->stats
.reusable
));
1568 PMAP_STATS_ASSERTF((pmap
->stats
.compressed
>= stats_compressed
,
1569 "pmap=%p stats_compressed=%lld, stats.compressed=%lld",
1570 pmap
, stats_compressed
, pmap
->stats
.compressed
));
1572 /* update pmap stats */
1573 if (stats_external
) {
1574 OSAddAtomic(-stats_external
, &pmap
->stats
.external
);
1576 if (stats_internal
) {
1577 OSAddAtomic(-stats_internal
, &pmap
->stats
.internal
);
1579 if (stats_reusable
) {
1580 OSAddAtomic(-stats_reusable
, &pmap
->stats
.reusable
);
1582 if (stats_compressed
) {
1583 OSAddAtomic64(-stats_compressed
, &pmap
->stats
.compressed
);
1585 /* update ledgers */
1587 if (ledgers_internal
) {
1588 pmap_ledger_debit(pmap
,
1589 task_ledgers
.internal
,
1590 machine_ptob(ledgers_internal
));
1592 if (ledgers_compressed
) {
1593 pmap_ledger_debit(pmap
,
1594 task_ledgers
.internal_compressed
,
1595 machine_ptob(ledgers_compressed
));
1597 if (ledgers_alt_internal
) {
1598 pmap_ledger_debit(pmap
,
1599 task_ledgers
.alternate_accounting
,
1600 machine_ptob(ledgers_alt_internal
));
1602 if (ledgers_alt_compressed
) {
1603 pmap_ledger_debit(pmap
,
1604 task_ledgers
.alternate_accounting_compressed
,
1605 machine_ptob(ledgers_alt_compressed
));
1608 uint64_t net_debit
= (ledgers_internal
- ledgers_alt_internal
) + (ledgers_compressed
- ledgers_alt_compressed
);
1610 pmap_ledger_debit(pmap
, task_ledgers
.phys_footprint
, machine_ptob(net_debit
));
1615 if (pmap
->stats
.wired_count
< num_unwired
) {
1616 panic("pmap_remove_range: wired_count");
1619 PMAP_STATS_ASSERTF((pmap
->stats
.wired_count
>= num_unwired
,
1620 "pmap=%p num_unwired=%d stats.wired_count=%d",
1621 pmap
, num_unwired
, pmap
->stats
.wired_count
));
1623 if (num_unwired
!= 0) {
1624 OSAddAtomic(-num_unwired
, &pmap
->stats
.wired_count
);
1625 pmap_ledger_debit(pmap
, task_ledgers
.wired_mem
, machine_ptob(num_unwired
));
1632 * Remove the given range of addresses
1633 * from the specified map.
1635 * It is assumed that the start and end are properly
1636 * rounded to the hardware page size.
1644 pmap_remove_options(map
, s64
, e64
, PMAP_OPTIONS_REMOVE
);
1646 #define PLCHECK_THRESHOLD (2)
1649 pmap_remove_options(
1656 pt_entry_t
*spte
, *epte
;
1658 uint64_t deadline
= 0;
1663 if (map
== PMAP_NULL
|| s64
== e64
) {
1667 is_ept
= is_ept_pmap(map
);
1669 PMAP_TRACE(PMAP_CODE(PMAP__REMOVE
) | DBG_FUNC_START
,
1670 VM_KERNEL_ADDRHIDE(map
), VM_KERNEL_ADDRHIDE(s64
),
1671 VM_KERNEL_ADDRHIDE(e64
));
1673 PMAP_LOCK_EXCLUSIVE(map
);
1674 uint32_t traverse_count
= 0;
1677 pml4_entry_t
*pml4e
= pmap64_pml4(map
, s64
);
1678 if ((pml4e
== NULL
) ||
1679 ((*pml4e
& PTE_VALID_MASK(is_ept
)) == 0)) {
1680 s64
= (s64
+ NBPML4
) & ~(PML4MASK
);
1683 pdpt_entry_t
*pdpte
= pmap64_pdpt(map
, s64
);
1684 if ((pdpte
== NULL
) ||
1685 ((*pdpte
& PTE_VALID_MASK(is_ept
)) == 0)) {
1686 s64
= (s64
+ NBPDPT
) & ~(PDPTMASK
);
1690 l64
= (s64
+ PDE_MAPPED_SIZE
) & ~(PDE_MAPPED_SIZE
- 1);
1696 pde
= pmap_pde(map
, s64
);
1698 if (pde
&& (*pde
& PTE_VALID_MASK(is_ept
))) {
1699 if (*pde
& PTE_PS
) {
1701 * If we're removing a superpage, pmap_remove_range()
1702 * must work on level 2 instead of level 1; and we're
1703 * only passing a single level 2 entry instead of a
1707 epte
= spte
+ 1; /* excluded */
1709 spte
= pmap_pte(map
, (s64
& ~(PDE_MAPPED_SIZE
- 1)));
1710 spte
= &spte
[ptenum(s64
)];
1711 epte
= &spte
[intel_btop(l64
- s64
)];
1713 pmap_remove_range_options(map
, s64
, spte
, epte
,
1718 if ((s64
< e64
) && (traverse_count
++ > PLCHECK_THRESHOLD
)) {
1719 if (deadline
== 0) {
1720 deadline
= rdtsc64_nofence() + max_preemption_latency_tsc
;
1722 if (rdtsc64_nofence() > deadline
) {
1723 PMAP_UNLOCK_EXCLUSIVE(map
);
1724 __builtin_ia32_pause();
1725 PMAP_LOCK_EXCLUSIVE(map
);
1726 deadline
= rdtsc64_nofence() + max_preemption_latency_tsc
;
1732 PMAP_UNLOCK_EXCLUSIVE(map
);
1734 PMAP_TRACE(PMAP_CODE(PMAP__REMOVE
) | DBG_FUNC_END
);
1742 pmap_page_protect_options(pn
, prot
, 0, NULL
);
1746 * Routine: pmap_page_protect_options
1749 * Lower the permission for all mappings to a given
1753 pmap_page_protect_options(
1756 unsigned int options
,
1759 pv_hashed_entry_t pvh_eh
= PV_HASHED_ENTRY_NULL
;
1760 pv_hashed_entry_t pvh_et
= PV_HASHED_ENTRY_NULL
;
1761 pv_hashed_entry_t nexth
;
1763 pv_rooted_entry_t pv_h
;
1764 pv_rooted_entry_t pv_e
;
1765 pv_hashed_entry_t pvh_e
;
1770 pt_entry_t new_pte_value
;
1774 assert(pn
!= vm_page_fictitious_addr
);
1775 if (pn
== vm_page_guard_addr
) {
1779 pai
= ppn_to_pai(pn
);
1781 if (!IS_MANAGED_PAGE(pai
)) {
1783 * Not a managed page.
1788 PMAP_TRACE(PMAP_CODE(PMAP__PAGE_PROTECT
) | DBG_FUNC_START
, pn
, prot
);
1791 * Determine the new protection.
1795 case VM_PROT_READ
| VM_PROT_EXECUTE
:
1799 return; /* nothing to do */
1805 pv_h
= pai_to_pvh(pai
);
1811 * Walk down PV list, if any, changing or removing all mappings.
1813 if (pv_h
->pmap
== PMAP_NULL
) {
1818 pvh_e
= (pv_hashed_entry_t
) pv_e
; /* cheat */
1821 vm_map_offset_t vaddr
;
1823 if ((options
& PMAP_OPTIONS_COMPRESSOR_IFF_MODIFIED
) &&
1824 (pmap_phys_attributes
[pai
] & PHYS_MODIFIED
)) {
1825 /* page was modified, so it will be compressed */
1826 options
&= ~PMAP_OPTIONS_COMPRESSOR_IFF_MODIFIED
;
1827 options
|= PMAP_OPTIONS_COMPRESSOR
;
1831 is_ept
= is_ept_pmap(pmap
);
1832 vaddr
= PVE_VA(pv_e
);
1833 pte
= pmap_pte(pmap
, vaddr
);
1835 pmap_assert2((pa_index(pte_to_pa(*pte
)) == pn
),
1836 "pmap_page_protect: PTE mismatch, pn: 0x%x, pmap: %p, vaddr: 0x%llx, pte: 0x%llx", pn
, pmap
, vaddr
, *pte
);
1839 panic("pmap_page_protect() "
1840 "pmap=%p pn=0x%x vaddr=0x%llx\n",
1843 nexth
= (pv_hashed_entry_t
) queue_next(&pvh_e
->qlink
);
1846 * Remove the mapping if new protection is NONE
1849 /* Remove per-pmap wired count */
1850 if (iswired(*pte
)) {
1851 OSAddAtomic(-1, &pmap
->stats
.wired_count
);
1852 pmap_ledger_debit(pmap
, task_ledgers
.wired_mem
, PAGE_SIZE
);
1855 if (pmap
!= kernel_pmap
&&
1856 (options
& PMAP_OPTIONS_COMPRESSOR
) &&
1857 IS_INTERNAL_PAGE(pai
)) {
1858 assert(!PTE_IS_COMPRESSED(*pte
, pte
, pmap
, vaddr
));
1859 /* mark this PTE as having been "compressed" */
1860 new_pte_value
= PTE_COMPRESSED
;
1861 if (IS_ALTACCT_PAGE(pai
, pv_e
)) {
1862 new_pte_value
|= PTE_COMPRESSED_ALT
;
1868 if (options
& PMAP_OPTIONS_NOREFMOD
) {
1869 pmap_store_pte(pte
, new_pte_value
);
1871 if (options
& PMAP_OPTIONS_NOFLUSH
) {
1872 PMAP_UPDATE_TLBS_DELAYED(pmap
, vaddr
, vaddr
+ PAGE_SIZE
, (pmap_flush_context
*)arg
);
1874 PMAP_UPDATE_TLBS(pmap
, vaddr
, vaddr
+ PAGE_SIZE
);
1878 * Remove the mapping, collecting dirty bits.
1880 pmap_update_pte(pte
, PTE_VALID_MASK(is_ept
), 0);
1882 PMAP_UPDATE_TLBS(pmap
, vaddr
, vaddr
+ PAGE_SIZE
);
1884 pmap_phys_attributes
[pai
] |=
1885 *pte
& (PHYS_MODIFIED
| PHYS_REFERENCED
);
1887 pmap_phys_attributes
[pai
] |=
1888 ept_refmod_to_physmap((*pte
& (INTEL_EPT_REF
| INTEL_EPT_MOD
))) & (PHYS_MODIFIED
| PHYS_REFERENCED
);
1891 PMAP_OPTIONS_COMPRESSOR_IFF_MODIFIED
) &&
1892 IS_INTERNAL_PAGE(pai
) &&
1893 (pmap_phys_attributes
[pai
] &
1896 * Page is actually "modified" and
1897 * will be compressed. Start
1898 * accounting for it as "compressed".
1900 assert(!(options
& PMAP_OPTIONS_COMPRESSOR
));
1901 options
&= ~PMAP_OPTIONS_COMPRESSOR_IFF_MODIFIED
;
1902 options
|= PMAP_OPTIONS_COMPRESSOR
;
1903 assert(new_pte_value
== 0);
1904 if (pmap
!= kernel_pmap
) {
1905 new_pte_value
= PTE_COMPRESSED
;
1906 if (IS_ALTACCT_PAGE(pai
, pv_e
)) {
1907 new_pte_value
|= PTE_COMPRESSED_ALT
;
1911 pmap_store_pte(pte
, new_pte_value
);
1915 if (pmap
->stats
.resident_count
< 1) {
1916 panic("pmap_page_protect: resident_count");
1919 pmap_ledger_debit(pmap
, task_ledgers
.phys_mem
, PAGE_SIZE
);
1920 assert(pmap
->stats
.resident_count
>= 1);
1921 OSAddAtomic(-1, &pmap
->stats
.resident_count
);
1924 * We only ever compress internal pages.
1926 if (options
& PMAP_OPTIONS_COMPRESSOR
) {
1927 assert(IS_INTERNAL_PAGE(pai
));
1929 if (pmap
!= kernel_pmap
) {
1930 /* update pmap stats */
1931 if (IS_REUSABLE_PAGE(pai
)) {
1932 assert(pmap
->stats
.reusable
> 0);
1933 OSAddAtomic(-1, &pmap
->stats
.reusable
);
1934 } else if (IS_INTERNAL_PAGE(pai
)) {
1935 assert(pmap
->stats
.internal
> 0);
1936 OSAddAtomic(-1, &pmap
->stats
.internal
);
1938 assert(pmap
->stats
.external
> 0);
1939 OSAddAtomic(-1, &pmap
->stats
.external
);
1941 if ((options
& PMAP_OPTIONS_COMPRESSOR
) &&
1942 IS_INTERNAL_PAGE(pai
)) {
1943 /* adjust "compressed" stats */
1944 OSAddAtomic64(+1, &pmap
->stats
.compressed
);
1945 PMAP_STATS_PEAK(pmap
->stats
.compressed
);
1946 pmap
->stats
.compressed_lifetime
++;
1949 /* update ledgers */
1950 if (IS_ALTACCT_PAGE(pai
, pv_e
)) {
1951 assert(IS_INTERNAL_PAGE(pai
));
1952 pmap_ledger_debit(pmap
, task_ledgers
.internal
, PAGE_SIZE
);
1953 pmap_ledger_debit(pmap
, task_ledgers
.alternate_accounting
, PAGE_SIZE
);
1954 if (options
& PMAP_OPTIONS_COMPRESSOR
) {
1955 pmap_ledger_credit(pmap
, task_ledgers
.internal_compressed
, PAGE_SIZE
);
1956 pmap_ledger_credit(pmap
, task_ledgers
.alternate_accounting_compressed
, PAGE_SIZE
);
1958 } else if (IS_REUSABLE_PAGE(pai
)) {
1959 assert(!IS_ALTACCT_PAGE(pai
, pv_e
));
1960 assert(IS_INTERNAL_PAGE(pai
));
1961 if (options
& PMAP_OPTIONS_COMPRESSOR
) {
1962 pmap_ledger_credit(pmap
, task_ledgers
.internal_compressed
, PAGE_SIZE
);
1963 /* was not in footprint, but is now */
1964 pmap_ledger_credit(pmap
, task_ledgers
.phys_footprint
, PAGE_SIZE
);
1966 } else if (IS_INTERNAL_PAGE(pai
)) {
1967 assert(!IS_ALTACCT_PAGE(pai
, pv_e
));
1968 assert(!IS_REUSABLE_PAGE(pai
));
1969 pmap_ledger_debit(pmap
, task_ledgers
.internal
, PAGE_SIZE
);
1971 * Update all stats related to physical
1972 * footprint, which only deals with
1975 if (options
& PMAP_OPTIONS_COMPRESSOR
) {
1977 * This removal is only being
1978 * done so we can send this page
1979 * to the compressor; therefore
1980 * it mustn't affect total task
1983 pmap_ledger_credit(pmap
, task_ledgers
.internal_compressed
, PAGE_SIZE
);
1986 * This internal page isn't
1987 * going to the compressor,
1988 * so adjust stats to keep
1989 * phys_footprint up to date.
1991 pmap_ledger_debit(pmap
, task_ledgers
.phys_footprint
, PAGE_SIZE
);
1997 * Deal with the pv_rooted_entry.
2002 * Fix up head later.
2004 pv_h
->pmap
= PMAP_NULL
;
2007 * Delete this entry.
2009 pv_hash_remove(pvh_e
);
2010 pvh_e
->qlink
.next
= (queue_entry_t
) pvh_eh
;
2013 if (pvh_et
== PV_HASHED_ENTRY_NULL
) {
2020 * Write-protect, after opportunistic refmod collect
2023 pmap_phys_attributes
[pai
] |=
2024 *pte
& (PHYS_MODIFIED
| PHYS_REFERENCED
);
2026 pmap_phys_attributes
[pai
] |=
2027 ept_refmod_to_physmap((*pte
& (INTEL_EPT_REF
| INTEL_EPT_MOD
))) & (PHYS_MODIFIED
| PHYS_REFERENCED
);
2029 pmap_update_pte(pte
, PTE_WRITE(is_ept
), 0);
2031 if (options
& PMAP_OPTIONS_NOFLUSH
) {
2032 PMAP_UPDATE_TLBS_DELAYED(pmap
, vaddr
, vaddr
+ PAGE_SIZE
, (pmap_flush_context
*)arg
);
2034 PMAP_UPDATE_TLBS(pmap
, vaddr
, vaddr
+ PAGE_SIZE
);
2038 } while ((pv_e
= (pv_rooted_entry_t
) nexth
) != pv_h
);
2042 * If pv_head mapping was removed, fix it up.
2044 if (pv_h
->pmap
== PMAP_NULL
) {
2045 pvh_e
= (pv_hashed_entry_t
) queue_next(&pv_h
->qlink
);
2047 if (pvh_e
!= (pv_hashed_entry_t
) pv_h
) {
2048 pv_hash_remove(pvh_e
);
2049 pv_h
->pmap
= pvh_e
->pmap
;
2050 pv_h
->va_and_flags
= pvh_e
->va_and_flags
;
2051 pvh_e
->qlink
.next
= (queue_entry_t
) pvh_eh
;
2054 if (pvh_et
== PV_HASHED_ENTRY_NULL
) {
2060 if (pvh_eh
!= PV_HASHED_ENTRY_NULL
) {
2061 PV_HASHED_FREE_LIST(pvh_eh
, pvh_et
, pvh_cnt
);
2066 PMAP_TRACE(PMAP_CODE(PMAP__PAGE_PROTECT
) | DBG_FUNC_END
);
2071 * Clear specified attribute bits.
2074 phys_attribute_clear(
2077 unsigned int options
,
2080 pv_rooted_entry_t pv_h
;
2081 pv_hashed_entry_t pv_e
;
2082 pt_entry_t
*pte
= NULL
;
2085 char attributes
= 0;
2086 boolean_t is_internal
, is_reusable
, is_altacct
, is_ept
;
2087 int ept_bits_to_clear
;
2088 boolean_t ept_keep_global_mod
= FALSE
;
2090 if ((bits
& PHYS_MODIFIED
) &&
2091 (options
& PMAP_OPTIONS_NOFLUSH
) &&
2093 panic("phys_attribute_clear(0x%x,0x%x,0x%x,%p): "
2094 "should not clear 'modified' without flushing TLBs\n",
2095 pn
, bits
, options
, arg
);
2098 /* We only support converting MOD and REF bits for EPT PTEs in this function */
2099 assert((bits
& ~(PHYS_REFERENCED
| PHYS_MODIFIED
)) == 0);
2101 ept_bits_to_clear
= (unsigned)physmap_refmod_to_ept(bits
& (PHYS_MODIFIED
| PHYS_REFERENCED
));
2104 assert(pn
!= vm_page_fictitious_addr
);
2105 if (pn
== vm_page_guard_addr
) {
2109 pai
= ppn_to_pai(pn
);
2111 if (!IS_MANAGED_PAGE(pai
)) {
2113 * Not a managed page.
2118 PMAP_TRACE(PMAP_CODE(PMAP__ATTRIBUTE_CLEAR
) | DBG_FUNC_START
, pn
, bits
);
2120 pv_h
= pai_to_pvh(pai
);
2126 * Walk down PV list, clearing all modify or reference bits.
2127 * We do not have to lock the pv_list because we have
2130 if (pv_h
->pmap
!= PMAP_NULL
) {
2132 * There are some mappings.
2135 is_internal
= IS_INTERNAL_PAGE(pai
);
2136 is_reusable
= IS_REUSABLE_PAGE(pai
);
2138 pv_e
= (pv_hashed_entry_t
)pv_h
;
2145 is_ept
= is_ept_pmap(pmap
);
2146 is_altacct
= IS_ALTACCT_PAGE(pai
, pv_e
);
2151 pte
= pmap_pte(pmap
, va
);
2152 /* grab ref/mod bits from this PTE */
2153 pte_bits
= (*pte
& (PTE_REF(is_ept
) | PTE_MOD(is_ept
)));
2154 /* propagate to page's global attributes */
2156 attributes
|= pte_bits
;
2158 attributes
|= ept_refmod_to_physmap(pte_bits
);
2159 if (!pmap_ept_support_ad
&& (pte_bits
& INTEL_EPT_MOD
)) {
2160 ept_keep_global_mod
= TRUE
;
2163 /* which bits to clear for this PTE? */
2167 pte_bits
&= ept_bits_to_clear
;
2170 if (options
& PMAP_OPTIONS_CLEAR_WRITE
) {
2171 pte_bits
|= PTE_WRITE(is_ept
);
2175 * Clear modify and/or reference bits.
2178 pmap_update_pte(pte
, pte_bits
, 0);
2180 /* Ensure all processors using this translation
2181 * invalidate this TLB entry. The invalidation
2182 * *must* follow the PTE update, to ensure that
2183 * the TLB shadow of the 'D' bit (in particular)
2184 * is synchronized with the updated PTE.
2186 if (!(options
& PMAP_OPTIONS_NOFLUSH
)) {
2187 /* flush TLBS now */
2188 PMAP_UPDATE_TLBS(pmap
,
2192 /* delayed TLB flush: add "pmap" info */
2193 PMAP_UPDATE_TLBS_DELAYED(
2197 (pmap_flush_context
*)arg
);
2199 /* no TLB flushing at all */
2203 /* update pmap "reusable" stats */
2204 if ((options
& PMAP_OPTIONS_CLEAR_REUSABLE
) &&
2206 pmap
!= kernel_pmap
) {
2207 /* one less "reusable" */
2208 assert(pmap
->stats
.reusable
> 0);
2209 OSAddAtomic(-1, &pmap
->stats
.reusable
);
2211 /* one more "internal" */
2212 OSAddAtomic(+1, &pmap
->stats
.internal
);
2213 PMAP_STATS_PEAK(pmap
->stats
.internal
);
2214 assert(pmap
->stats
.internal
> 0);
2216 /* no impact on ledgers */
2218 pmap_ledger_credit(pmap
,
2219 task_ledgers
.internal
,
2223 task_ledgers
.phys_footprint
,
2227 /* one more "external" */
2228 OSAddAtomic(+1, &pmap
->stats
.external
);
2229 PMAP_STATS_PEAK(pmap
->stats
.external
);
2230 assert(pmap
->stats
.external
> 0);
2232 } else if ((options
& PMAP_OPTIONS_SET_REUSABLE
) &&
2234 pmap
!= kernel_pmap
) {
2235 /* one more "reusable" */
2236 OSAddAtomic(+1, &pmap
->stats
.reusable
);
2237 PMAP_STATS_PEAK(pmap
->stats
.reusable
);
2238 assert(pmap
->stats
.reusable
> 0);
2240 /* one less "internal" */
2241 assert(pmap
->stats
.internal
> 0);
2242 OSAddAtomic(-1, &pmap
->stats
.internal
);
2244 /* no impact on footprint */
2246 pmap_ledger_debit(pmap
,
2247 task_ledgers
.internal
,
2251 task_ledgers
.phys_footprint
,
2255 /* one less "external" */
2256 assert(pmap
->stats
.external
> 0);
2257 OSAddAtomic(-1, &pmap
->stats
.external
);
2261 pv_e
= (pv_hashed_entry_t
)queue_next(&pv_e
->qlink
);
2262 } while (pv_e
!= (pv_hashed_entry_t
)pv_h
);
2264 /* Opportunistic refmod collection, annulled
2265 * if both REF and MOD are being cleared.
2268 pmap_phys_attributes
[pai
] |= attributes
;
2270 if (ept_keep_global_mod
) {
2272 * If the hardware doesn't support AD bits for EPT PTEs and someone is
2273 * requesting that we clear the modified bit for a phys page, we need
2274 * to ensure that there are no EPT mappings for the page with the
2275 * modified bit set. If there are, we cannot clear the global modified bit.
2277 bits
&= ~PHYS_MODIFIED
;
2279 pmap_phys_attributes
[pai
] &= ~(bits
);
2281 /* update this page's "reusable" status */
2282 if (options
& PMAP_OPTIONS_CLEAR_REUSABLE
) {
2283 pmap_phys_attributes
[pai
] &= ~PHYS_REUSABLE
;
2284 } else if (options
& PMAP_OPTIONS_SET_REUSABLE
) {
2285 pmap_phys_attributes
[pai
] |= PHYS_REUSABLE
;
2290 PMAP_TRACE(PMAP_CODE(PMAP__ATTRIBUTE_CLEAR
) | DBG_FUNC_END
);
2294 * Check specified attribute bits.
2297 phys_attribute_test(
2301 pv_rooted_entry_t pv_h
;
2302 pv_hashed_entry_t pv_e
;
2310 assert(pn
!= vm_page_fictitious_addr
);
2311 assert((bits
& ~(PHYS_MODIFIED
| PHYS_REFERENCED
)) == 0);
2312 if (pn
== vm_page_guard_addr
) {
2316 pai
= ppn_to_pai(pn
);
2318 if (!IS_MANAGED_PAGE(pai
)) {
2320 * Not a managed page.
2326 * Fast check... if bits already collected
2327 * no need to take any locks...
2328 * if not set, we need to recheck after taking
2329 * the lock in case they got pulled in while
2330 * we were waiting for the lock
2332 if ((pmap_phys_attributes
[pai
] & bits
) == bits
) {
2336 pv_h
= pai_to_pvh(pai
);
2340 attributes
= pmap_phys_attributes
[pai
] & bits
;
2344 * Walk down PV list, checking the mappings until we
2345 * reach the end or we've found the desired attributes.
2347 if (attributes
!= bits
&&
2348 pv_h
->pmap
!= PMAP_NULL
) {
2350 * There are some mappings.
2352 pv_e
= (pv_hashed_entry_t
)pv_h
;
2357 is_ept
= is_ept_pmap(pmap
);
2360 * pick up modify and/or reference bits from mapping
2363 pte
= pmap_pte(pmap
, va
);
2365 attributes
|= (int)(*pte
& bits
);
2367 attributes
|= (int)(ept_refmod_to_physmap((*pte
& (INTEL_EPT_REF
| INTEL_EPT_MOD
))) & (PHYS_MODIFIED
| PHYS_REFERENCED
));
2370 pv_e
= (pv_hashed_entry_t
)queue_next(&pv_e
->qlink
);
2371 } while ((attributes
!= bits
) &&
2372 (pv_e
!= (pv_hashed_entry_t
)pv_h
));
2374 pmap_phys_attributes
[pai
] |= attributes
;
2381 * Routine: pmap_change_wiring
2382 * Function: Change the wiring attribute for a map/virtual-address
2384 * In/out conditions:
2385 * The mapping must already exist in the pmap.
2390 vm_map_offset_t vaddr
,
2395 PMAP_LOCK_SHARED(map
);
2397 if ((pte
= pmap_pte(map
, vaddr
)) == PT_ENTRY_NULL
) {
2398 panic("pmap_change_wiring(%p,0x%llx,%d): pte missing",
2402 if (wired
&& !iswired(*pte
)) {
2404 * wiring down mapping
2406 pmap_ledger_credit(map
, task_ledgers
.wired_mem
, PAGE_SIZE
);
2407 OSAddAtomic(+1, &map
->stats
.wired_count
);
2408 pmap_update_pte(pte
, 0, PTE_WIRED
);
2409 } else if (!wired
&& iswired(*pte
)) {
2413 assert(map
->stats
.wired_count
>= 1);
2414 OSAddAtomic(-1, &map
->stats
.wired_count
);
2415 pmap_ledger_debit(map
, task_ledgers
.wired_mem
, PAGE_SIZE
);
2416 pmap_update_pte(pte
, PTE_WIRED
, 0);
2419 PMAP_UNLOCK_SHARED(map
);
2423 * "Backdoor" direct map routine for early mappings.
2424 * Useful for mapping memory outside the range
2425 * Sets A, D and NC if requested
2431 vm_map_offset_t start_addr
,
2432 vm_map_offset_t end_addr
,
2436 pt_entry_t
template;
2439 vm_offset_t base
= virt
;
2440 boolean_t doflush
= FALSE
;
2442 template = pa_to_pte(start_addr
)
2448 if ((flags
& (VM_MEM_NOT_CACHEABLE
| VM_WIMG_USE_DEFAULT
)) == VM_MEM_NOT_CACHEABLE
) {
2449 template |= INTEL_PTE_NCACHE
;
2450 if (!(flags
& (VM_MEM_GUARDED
))) {
2451 template |= INTEL_PTE_PAT
;
2455 if ((prot
& VM_PROT_EXECUTE
) == 0) {
2456 template |= INTEL_PTE_NX
;
2459 if (prot
& VM_PROT_WRITE
) {
2460 template |= INTEL_PTE_WRITE
;
2462 vm_map_offset_t caddr
= start_addr
;
2463 while (caddr
< end_addr
) {
2464 ptep
= pmap_pte(kernel_pmap
, (vm_map_offset_t
)virt
);
2465 if (ptep
== PT_ENTRY_NULL
) {
2466 panic("pmap_map_bd: Invalid kernel address");
2468 if (pte_to_pa(*ptep
)) {
2471 pmap_store_pte(ptep
, template);
2472 pte_increment_pa(template);
2477 pmap_tlbi_range(0, ~0ULL, true, 0);
2478 PMAP_UPDATE_TLBS(kernel_pmap
, base
, base
+ end_addr
- start_addr
);
2483 /* Create a virtual alias beginning at 'ava' of the specified kernel virtual
2484 * range. The aliased pagetable range is expanded if
2485 * PMAP_EXPAND_OPTIONS_ALIASMAP is specified. Performs no synchronization,
2486 * assumes caller has stabilized the source and destination ranges. Currently
2487 * used to populate sections of the trampoline "doublemap" at CPU startup.
2493 vm_map_offset_t start_addr
,
2494 vm_map_offset_t end_addr
,
2496 unsigned int eoptions
)
2498 pt_entry_t prot_template
, template;
2499 pt_entry_t
*aptep
, *sptep
;
2501 prot_template
= INTEL_PTE_REF
| INTEL_PTE_MOD
| INTEL_PTE_WIRED
| INTEL_PTE_VALID
;
2502 if ((prot
& VM_PROT_EXECUTE
) == 0) {
2503 prot_template
|= INTEL_PTE_NX
;
2506 if (prot
& VM_PROT_WRITE
) {
2507 prot_template
|= INTEL_PTE_WRITE
;
2509 assert(((start_addr
| end_addr
) & PAGE_MASK
) == 0);
2510 while (start_addr
< end_addr
) {
2511 aptep
= pmap_pte(kernel_pmap
, (vm_map_offset_t
)ava
);
2512 if (aptep
== PT_ENTRY_NULL
) {
2513 if (eoptions
& PMAP_EXPAND_OPTIONS_ALIASMAP
) {
2514 pmap_expand(kernel_pmap
, ava
, PMAP_EXPAND_OPTIONS_ALIASMAP
);
2515 aptep
= pmap_pte(kernel_pmap
, (vm_map_offset_t
)ava
);
2517 panic("pmap_alias: Invalid alias address");
2520 /* The aliased range should not have any active mappings */
2521 assert(pte_to_pa(*aptep
) == 0);
2523 sptep
= pmap_pte(kernel_pmap
, start_addr
);
2524 assert(sptep
!= PT_ENTRY_NULL
&& (pte_to_pa(*sptep
) != 0));
2525 template = pa_to_pte(pte_to_pa(*sptep
)) | prot_template
;
2526 pmap_store_pte(aptep
, template);
2529 start_addr
+= PAGE_SIZE
;
2534 pmap_query_resident(
2538 mach_vm_size_t
*compressed_bytes_p
)
2541 pt_entry_t
*spte
, *epte
;
2543 uint64_t deadline
= 0;
2544 mach_vm_size_t resident_bytes
;
2545 mach_vm_size_t compressed_bytes
;
2550 if (pmap
== PMAP_NULL
|| pmap
== kernel_pmap
|| s64
== e64
) {
2551 if (compressed_bytes_p
) {
2552 *compressed_bytes_p
= 0;
2557 is_ept
= is_ept_pmap(pmap
);
2559 PMAP_TRACE(PMAP_CODE(PMAP__QUERY_RESIDENT
) | DBG_FUNC_START
,
2560 VM_KERNEL_ADDRHIDE(pmap
), VM_KERNEL_ADDRHIDE(s64
),
2561 VM_KERNEL_ADDRHIDE(e64
));
2564 compressed_bytes
= 0;
2566 PMAP_LOCK_EXCLUSIVE(pmap
);
2567 uint32_t traverse_count
= 0;
2570 l64
= (s64
+ PDE_MAPPED_SIZE
) & ~(PDE_MAPPED_SIZE
- 1);
2574 pde
= pmap_pde(pmap
, s64
);
2576 if (pde
&& (*pde
& PTE_VALID_MASK(is_ept
))) {
2577 if (*pde
& PTE_PS
) {
2578 /* superpage: not supported */
2580 spte
= pmap_pte(pmap
,
2581 (s64
& ~(PDE_MAPPED_SIZE
- 1)));
2582 spte
= &spte
[ptenum(s64
)];
2583 epte
= &spte
[intel_btop(l64
- s64
)];
2585 for (; spte
< epte
; spte
++) {
2586 if (pte_to_pa(*spte
) != 0) {
2587 resident_bytes
+= PAGE_SIZE
;
2588 } else if (*spte
& PTE_COMPRESSED
) {
2589 compressed_bytes
+= PAGE_SIZE
;
2596 if ((s64
< e64
) && (traverse_count
++ > PLCHECK_THRESHOLD
)) {
2597 if (deadline
== 0) {
2598 deadline
= rdtsc64() + max_preemption_latency_tsc
;
2600 if (rdtsc64() > deadline
) {
2601 PMAP_UNLOCK_EXCLUSIVE(pmap
);
2602 __builtin_ia32_pause();
2603 PMAP_LOCK_EXCLUSIVE(pmap
);
2604 deadline
= rdtsc64() + max_preemption_latency_tsc
;
2610 PMAP_UNLOCK_EXCLUSIVE(pmap
);
2612 PMAP_TRACE(PMAP_CODE(PMAP__QUERY_RESIDENT
) | DBG_FUNC_END
,
2615 if (compressed_bytes_p
) {
2616 *compressed_bytes_p
= compressed_bytes
;
2618 return resident_bytes
;
2622 pmap_query_page_info(
2635 if (pmap
== PMAP_NULL
|| pmap
== kernel_pmap
) {
2637 return KERN_INVALID_ARGUMENT
;
2641 is_ept
= is_ept_pmap(pmap
);
2643 PMAP_LOCK_EXCLUSIVE(pmap
);
2645 pde
= pmap_pde(pmap
, va
);
2647 !(*pde
& PTE_VALID_MASK(is_ept
)) ||
2652 pte
= pmap_pte(pmap
, va
);
2653 if (pte
== PT_ENTRY_NULL
) {
2657 pa
= pte_to_pa(*pte
);
2659 if (PTE_IS_COMPRESSED(*pte
, pte
, pmap
, va
)) {
2660 disp
|= PMAP_QUERY_PAGE_COMPRESSED
;
2661 if (*pte
& PTE_COMPRESSED_ALT
) {
2662 disp
|= PMAP_QUERY_PAGE_COMPRESSED_ALTACCT
;
2666 disp
|= PMAP_QUERY_PAGE_PRESENT
;
2668 if (!IS_MANAGED_PAGE(pai
)) {
2669 } else if (pmap_pv_is_altacct(pmap
, va
, pai
)) {
2670 assert(IS_INTERNAL_PAGE(pai
));
2671 disp
|= PMAP_QUERY_PAGE_INTERNAL
;
2672 disp
|= PMAP_QUERY_PAGE_ALTACCT
;
2673 } else if (IS_REUSABLE_PAGE(pai
)) {
2674 disp
|= PMAP_QUERY_PAGE_REUSABLE
;
2675 } else if (IS_INTERNAL_PAGE(pai
)) {
2676 disp
|= PMAP_QUERY_PAGE_INTERNAL
;
2681 PMAP_UNLOCK_EXCLUSIVE(pmap
);
2683 return KERN_SUCCESS
;
2687 pmap_set_vm_map_cs_enforced(
2691 PMAP_LOCK_EXCLUSIVE(pmap
);
2692 pmap
->pm_vm_map_cs_enforced
= new_value
;
2693 PMAP_UNLOCK_EXCLUSIVE(pmap
);
2695 extern int cs_process_enforcement_enable
;
2697 pmap_get_vm_map_cs_enforced(
2700 if (cs_process_enforcement_enable
) {
2703 return pmap
->pm_vm_map_cs_enforced
;
2707 pmap_set_jit_entitled(__unused pmap_t pmap
)
2709 /* The x86 pmap layer does not care if a map has a JIT entry. */
2714 pmap_get_jit_entitled(__unused pmap_t pmap
)
2716 /* The x86 pmap layer does not care if a map is using JIT. */
2721 pmap_has_prot_policy(__unused pmap_t pmap
, __unused
bool translated_allow_execute
, __unused vm_prot_t prot
)
2724 * The x86 pmap layer does not apply any policy to any protection
2731 pmap_release_pages_fast(void)
2737 pmap_trim(__unused pmap_t grand
, __unused pmap_t subord
, __unused addr64_t vstart
, __unused
uint64_t size
)
2744 pmap_ledger_alloc_init(size_t size
)
2746 panic("%s: unsupported, "
2753 pmap_ledger_alloc(void)
2755 panic("%s: unsupported",
2761 pmap_ledger_free(ledger_t ledger
)
2763 panic("%s: unsupported, "
2769 pmap_dump_page_tables(pmap_t pmap __unused
, void *bufp __unused
, void *buf_end __unused
,
2770 unsigned int level_mask __unused
, size_t *bytes_copied __unused
)
2772 return KERN_NOT_SUPPORTED
;
2776 pmap_map_compressor_page(ppnum_t pn
)
2778 assertf(IS_MANAGED_PAGE(ppn_to_pai(pn
)), "%s called on non-managed page 0x%08x", __func__
, pn
);
2779 return PHYSMAP_PTOV((uint64_t)pn
<< (uint64_t)PAGE_SHIFT
);
2783 pmap_unmap_compressor_page(ppnum_t pn __unused
, void *kva __unused
)
2788 pmap_clear_refmod_range_options(
2789 pmap_t pmap __unused
,
2790 vm_map_address_t start __unused
,
2791 vm_map_address_t end __unused
,
2792 unsigned int mask __unused
,
2793 unsigned int options __unused
)
2796 * x86 doesn't have ranged tlbi instructions, and we already have
2797 * the pmap_flush_context. This operation isn't implemented.