2 * Copyright (c) 2000-2012 Apple Inc. All rights reserved.
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
29 #include <mach_assert.h>
32 #include <vm/vm_map.h>
33 #include <kern/ledger.h>
34 #include <i386/pmap_internal.h>
36 void pmap_remove_range(
42 void pmap_remove_range_options(
49 void pmap_reusable_range(
56 uint32_t pmap_update_clear_pte_count
;
59 * The Intel platform can nest at the PDE level, so NBPDE (i.e. 2MB) at a time,
60 * on a NBPDE boundary.
63 /* These symbols may be referenced directly by VM */
64 uint64_t pmap_nesting_size_min
= NBPDE
;
65 uint64_t pmap_nesting_size_max
= 0 - (uint64_t)NBPDE
;
68 * kern_return_t pmap_nest(grand, subord, va_start, size)
70 * grand = the pmap that we will nest subord into
71 * subord = the pmap that goes into the grand
72 * va_start = start of range in pmap to be inserted
73 * nstart = start of range in pmap nested pmap
74 * size = Size of nest area (up to 16TB)
76 * Inserts a pmap into another. This is used to implement shared segments.
78 * Note that we depend upon higher level VM locks to insure that things don't change while
79 * we are doing this. For example, VM should not be doing any pmap enters while it is nesting
80 * or do 2 nests at once.
84 * This routine can nest subtrees either at the PDPT level (1GiB) or at the
85 * PDE level (2MiB). We currently disallow disparate offsets for the "subord"
86 * container and the "grand" parent. A minor optimization to consider for the
87 * future: make the "subord" truly a container rather than a full-fledged
88 * pagetable hierarchy which can be unnecessarily sparse (DRK).
91 kern_return_t
pmap_nest(pmap_t grand
, pmap_t subord
, addr64_t va_start
, addr64_t nstart
, uint64_t size
) {
92 vm_map_offset_t vaddr
, nvaddr
;
93 pd_entry_t
*pde
,*npde
;
97 assert(!is_ept_pmap(grand
));
98 assert(!is_ept_pmap(subord
));
100 if ((size
& (pmap_nesting_size_min
-1)) ||
101 (va_start
& (pmap_nesting_size_min
-1)) ||
102 (nstart
& (pmap_nesting_size_min
-1)) ||
103 ((size
>> 28) > 65536)) /* Max size we can nest is 16TB */
104 return KERN_INVALID_VALUE
;
107 panic("pmap_nest: size is invalid - %016llX\n", size
);
110 if (va_start
!= nstart
)
111 panic("pmap_nest: va_start(0x%llx) != nstart(0x%llx)\n", va_start
, nstart
);
113 PMAP_TRACE(PMAP_CODE(PMAP__NEST
) | DBG_FUNC_START
,
114 (uintptr_t) grand
, (uintptr_t) subord
,
115 (uintptr_t) (va_start
>>32), (uintptr_t) va_start
, 0);
117 nvaddr
= (vm_map_offset_t
)nstart
;
118 num_pde
= size
>> PDESHIFT
;
122 subord
->pm_shared
= TRUE
;
124 for (i
= 0; i
< num_pde
;) {
125 if (((nvaddr
& PDPTMASK
) == 0) && (num_pde
- i
) >= NPDEPG
&& cpu_64bit
) {
127 npde
= pmap64_pdpt(subord
, nvaddr
);
129 while (0 == npde
|| ((*npde
& INTEL_PTE_VALID
) == 0)) {
131 pmap_expand_pdpt(subord
, nvaddr
, PMAP_EXPAND_OPTIONS_NONE
);
133 npde
= pmap64_pdpt(subord
, nvaddr
);
135 *npde
|= INTEL_PDPTE_NESTED
;
137 i
+= (uint32_t)NPDEPG
;
140 npde
= pmap_pde(subord
, nvaddr
);
142 while (0 == npde
|| ((*npde
& INTEL_PTE_VALID
) == 0)) {
144 pmap_expand(subord
, nvaddr
, PMAP_EXPAND_OPTIONS_NONE
);
146 npde
= pmap_pde(subord
, nvaddr
);
155 vaddr
= (vm_map_offset_t
)va_start
;
159 for (i
= 0;i
< num_pde
;) {
162 if (((vaddr
& PDPTMASK
) == 0) && ((num_pde
- i
) >= NPDEPG
) && cpu_64bit
) {
163 npde
= pmap64_pdpt(subord
, vaddr
);
165 panic("pmap_nest: no PDPT, subord %p nstart 0x%llx", subord
, vaddr
);
167 pde
= pmap64_pdpt(grand
, vaddr
);
170 pmap_expand_pml4(grand
, vaddr
, PMAP_EXPAND_OPTIONS_NONE
);
172 pde
= pmap64_pdpt(grand
, vaddr
);
175 panic("pmap_nest: no PDPT, grand %p vaddr 0x%llx", grand
, vaddr
);
176 pmap_store_pte(pde
, tpde
);
178 i
+= (uint32_t) NPDEPG
;
181 npde
= pmap_pde(subord
, nstart
);
183 panic("pmap_nest: no npde, subord %p nstart 0x%llx", subord
, nstart
);
186 pde
= pmap_pde(grand
, vaddr
);
187 if ((0 == pde
) && cpu_64bit
) {
189 pmap_expand_pdpt(grand
, vaddr
, PMAP_EXPAND_OPTIONS_NONE
);
191 pde
= pmap_pde(grand
, vaddr
);
195 panic("pmap_nest: no pde, grand %p vaddr 0x%llx", grand
, vaddr
);
197 pmap_store_pte(pde
, tpde
);
204 PMAP_TRACE(PMAP_CODE(PMAP__NEST
) | DBG_FUNC_END
, 0, 0, 0, 0, 0);
210 * kern_return_t pmap_unnest(grand, vaddr)
212 * grand = the pmap that we will un-nest subord from
213 * vaddr = start of range in pmap to be unnested
215 * Removes a pmap from another. This is used to implement shared segments.
218 kern_return_t
pmap_unnest(pmap_t grand
, addr64_t vaddr
, uint64_t size
) {
223 addr64_t va_start
, va_end
;
224 uint64_t npdpt
= PMAP_INVALID_PDPTNUM
;
226 PMAP_TRACE(PMAP_CODE(PMAP__UNNEST
) | DBG_FUNC_START
,
228 (uintptr_t) (vaddr
>>32), (uintptr_t) vaddr
, 0, 0);
230 if ((size
& (pmap_nesting_size_min
-1)) ||
231 (vaddr
& (pmap_nesting_size_min
-1))) {
232 panic("pmap_unnest(%p,0x%llx,0x%llx): unaligned...\n",
236 assert(!is_ept_pmap(grand
));
238 /* align everything to PDE boundaries */
239 va_start
= vaddr
& ~(NBPDE
-1);
240 va_end
= (vaddr
+ size
+ NBPDE
- 1) & ~(NBPDE
-1);
241 size
= va_end
- va_start
;
245 num_pde
= size
>> PDESHIFT
;
248 for (i
= 0; i
< num_pde
; ) {
249 if ((pdptnum(grand
, vaddr
) != npdpt
) && cpu_64bit
) {
250 npdpt
= pdptnum(grand
, vaddr
);
251 pde
= pmap64_pdpt(grand
, vaddr
);
252 if (pde
&& (*pde
& INTEL_PDPTE_NESTED
)) {
253 pmap_store_pte(pde
, (pd_entry_t
)0);
254 i
+= (uint32_t) NPDEPG
;
259 pde
= pmap_pde(grand
, (vm_map_offset_t
)vaddr
);
261 panic("pmap_unnest: no pde, grand %p vaddr 0x%llx\n", grand
, vaddr
);
262 pmap_store_pte(pde
, (pd_entry_t
)0);
267 PMAP_UPDATE_TLBS(grand
, va_start
, va_end
);
271 PMAP_TRACE(PMAP_CODE(PMAP__UNNEST
) | DBG_FUNC_END
, 0, 0, 0, 0, 0);
280 __unused
uint64_t size
,
281 __unused
unsigned int options
) {
282 return pmap_unnest(grand
, vaddr
, size
);
285 /* Invoked by the Mach VM to determine the platform specific unnest region */
287 boolean_t
pmap_adjust_unnest_parameters(pmap_t p
, vm_map_offset_t
*s
, vm_map_offset_t
*e
) {
289 boolean_t rval
= FALSE
;
296 pdpte
= pmap64_pdpt(p
, *s
);
297 if (pdpte
&& (*pdpte
& INTEL_PDPTE_NESTED
)) {
302 pdpte
= pmap64_pdpt(p
, *e
);
303 if (pdpte
&& (*pdpte
& INTEL_PDPTE_NESTED
)) {
304 *e
= ((*e
+ NBPDPT
) & ~(NBPDPT
-1));
314 * pmap_find_phys returns the (4K) physical page number containing a
315 * given virtual address in a given pmap.
316 * Note that pmap_pte may return a pde if this virtual address is
317 * mapped by a large page and this is taken into account in order
318 * to return the correct page number in this case.
321 pmap_find_phys(pmap_t pmap
, addr64_t va
)
330 is_ept
= is_ept_pmap(pmap
);
332 mp_disable_preemption();
334 /* This refcount test is a band-aid--several infrastructural changes
335 * are necessary to eliminate invocation of this routine from arbitrary
339 if (!pmap
->ref_count
)
342 pdep
= pmap_pde(pmap
, va
);
344 if ((pdep
!= PD_ENTRY_NULL
) && ((pde
= *pdep
) & PTE_VALID_MASK(is_ept
))) {
346 ppn
= (ppnum_t
) i386_btop(pte_to_pa(pde
));
347 ppn
+= (ppnum_t
) ptenum(va
);
350 ptp
= pmap_pte(pmap
, va
);
351 if ((PT_ENTRY_NULL
!= ptp
) && (((pte
= *ptp
) & PTE_VALID_MASK(is_ept
)) != 0)) {
352 ppn
= (ppnum_t
) i386_btop(pte_to_pa(pte
));
357 mp_enable_preemption();
363 * Update cache attributes for all extant managed mappings.
364 * Assumes PV for this page is locked, and that the page
365 * is managed. We assume that this physical page may be mapped in
366 * both EPT and normal Intel PTEs, so we convert the attributes
367 * to the corresponding format for each pmap.
369 * We assert that the passed set of attributes is a subset of the
370 * PHYS_CACHEABILITY_MASK.
373 pmap_update_cache_attributes_locked(ppnum_t pn
, unsigned attributes
) {
374 pv_rooted_entry_t pv_h
, pv_e
;
375 pv_hashed_entry_t pvh_e
, nexth
;
376 vm_map_offset_t vaddr
;
380 unsigned ept_attributes
;
382 assert(IS_MANAGED_PAGE(pn
));
383 assert(((~PHYS_CACHEABILITY_MASK
) & attributes
) == 0);
385 /* We don't support the PTA bit for EPT PTEs */
386 if (attributes
& INTEL_PTE_NCACHE
)
387 ept_attributes
= INTEL_EPT_NCACHE
;
389 ept_attributes
= INTEL_EPT_WB
;
391 pv_h
= pai_to_pvh(pn
);
392 /* TODO: translate the PHYS_* bits to PTE bits, while they're
393 * currently identical, they may not remain so
394 * Potential optimization (here and in page_protect),
395 * parallel shootdowns, check for redundant
396 * attribute modifications.
400 * Alter attributes on all mappings
402 if (pv_h
->pmap
!= PMAP_NULL
) {
404 pvh_e
= (pv_hashed_entry_t
)pv_e
;
409 ptep
= pmap_pte(pmap
, vaddr
);
412 panic("pmap_update_cache_attributes_locked: Missing PTE, pmap: %p, pn: 0x%x vaddr: 0x%llx kernel_pmap: %p", pmap
, pn
, vaddr
, kernel_pmap
);
414 is_ept
= is_ept_pmap(pmap
);
416 nexth
= (pv_hashed_entry_t
)queue_next(&pvh_e
->qlink
);
418 pmap_update_pte(ptep
, PHYS_CACHEABILITY_MASK
, attributes
);
420 pmap_update_pte(ptep
, INTEL_EPT_CACHE_MASK
, ept_attributes
);
422 PMAP_UPDATE_TLBS(pmap
, vaddr
, vaddr
+ PAGE_SIZE
);
424 } while ((pv_e
= (pv_rooted_entry_t
)nexth
) != pv_h
);
428 void x86_filter_TLB_coherency_interrupts(boolean_t dofilter
) {
429 assert(ml_get_interrupts_enabled() == 0 || get_preemption_level() != 0);
432 CPU_CR3_MARK_INACTIVE();
434 CPU_CR3_MARK_ACTIVE();
436 if (current_cpu_datap()->cpu_tlb_invalid
)
437 process_pmap_updates();
443 * Insert the given physical page (p) at
444 * the specified virtual address (v) in the
445 * target physical map with the protection requested.
447 * If specified, the page will be wired down, meaning
448 * that the related pte cannot be reclaimed.
450 * NB: This is the only routine which MAY NOT lazy-evaluate
451 * or lose information. That is, this routine must actually
452 * insert this page into the given map NOW.
457 register pmap_t pmap
,
458 vm_map_offset_t vaddr
,
461 vm_prot_t fault_type
,
465 (void) pmap_enter_options(pmap
, vaddr
, pn
, prot
, fault_type
, flags
, wired
, PMAP_EXPAND_OPTIONS_NONE
, NULL
);
471 register pmap_t pmap
,
472 vm_map_offset_t vaddr
,
475 __unused vm_prot_t fault_type
,
478 unsigned int options
,
482 pv_rooted_entry_t pv_h
;
484 pv_hashed_entry_t pvh_e
;
485 pv_hashed_entry_t pvh_new
;
488 pmap_paddr_t pa
= (pmap_paddr_t
) i386_ptob(pn
);
489 boolean_t need_tlbflush
= FALSE
;
492 boolean_t old_pa_locked
;
493 /* 2MiB mappings are confined to x86_64 by VM */
494 boolean_t superpage
= flags
& VM_MEM_SUPERPAGE
;
495 vm_object_t delpage_pm_obj
= NULL
;
496 uint64_t delpage_pde_index
= 0;
498 kern_return_t kr_expand
;
503 if (pmap
== PMAP_NULL
)
504 return KERN_INVALID_ARGUMENT
;
506 is_ept
= is_ept_pmap(pmap
);
508 /* N.B. We can be supplied a zero page frame in the NOENTER case, it's an
509 * unused value for that scenario.
511 assert(pn
!= vm_page_fictitious_addr
);
513 if (pn
== vm_page_guard_addr
)
514 return KERN_INVALID_ARGUMENT
;
516 PMAP_TRACE(PMAP_CODE(PMAP__ENTER
) | DBG_FUNC_START
,
518 (uint32_t) (vaddr
>> 32), (uint32_t) vaddr
,
521 if ((prot
& VM_PROT_EXECUTE
) || !nx_enabled
|| !pmap
->nx_enabled
)
526 if (__improbable(set_NX
&& (pmap
== kernel_pmap
) && ((pmap_disable_kstack_nx
&& (flags
& VM_MEM_STACK
)) || (pmap_disable_kheap_nx
&& !(flags
& VM_MEM_STACK
))))) {
531 * Must allocate a new pvlist entry while we're unlocked;
532 * zalloc may cause pageout (which will lock the pmap system).
533 * If we determine we need a pvlist entry, we will unlock
534 * and allocate one. Then we will retry, throughing away
535 * the allocated entry later (if we no longer need it).
538 pvh_new
= PV_HASHED_ENTRY_NULL
;
540 pvh_e
= PV_HASHED_ENTRY_NULL
;
545 * Expand pmap to include this pte. Assume that
546 * pmap is always expanded to include enough hardware
547 * pages to map one VM page.
550 while ((pte
= pmap64_pde(pmap
, vaddr
)) == PD_ENTRY_NULL
) {
551 /* need room for another pde entry */
553 kr_expand
= pmap_expand_pdpt(pmap
, vaddr
, options
);
554 if (kr_expand
!= KERN_SUCCESS
)
559 while ((pte
= pmap_pte(pmap
, vaddr
)) == PT_ENTRY_NULL
) {
561 * Must unlock to expand the pmap
562 * going to grow pde level page(s)
565 kr_expand
= pmap_expand(pmap
, vaddr
, options
);
566 if (kr_expand
!= KERN_SUCCESS
)
571 if (options
& PMAP_EXPAND_OPTIONS_NOENTER
) {
576 if (superpage
&& *pte
&& !(*pte
& PTE_PS
)) {
578 * There is still an empty page table mapped that
579 * was used for a previous base page mapping.
580 * Remember the PDE and the PDE index, so that we
581 * can free the page at the end of this function.
583 delpage_pde_index
= pdeidx(pmap
, vaddr
);
584 delpage_pm_obj
= pmap
->pm_obj
;
588 old_pa
= pte_to_pa(*pte
);
589 pai
= pa_index(old_pa
);
590 old_pa_locked
= FALSE
;
593 (*pte
& PTE_COMPRESSED
)) {
594 /* one less "compressed" */
595 OSAddAtomic64(-1, &pmap
->stats
.compressed
);
596 /* marker will be cleared below */
600 * if we have a previous managed page, lock the pv entry now. after
601 * we lock it, check to see if someone beat us to the lock and if so
604 if ((0 != old_pa
) && IS_MANAGED_PAGE(pai
)) {
606 old_pa_locked
= TRUE
;
607 old_pa
= pte_to_pa(*pte
);
609 UNLOCK_PVH(pai
); /* another path beat us to it */
610 old_pa_locked
= FALSE
;
615 * Special case if the incoming physical page is already mapped
619 pt_entry_t old_attributes
=
620 *pte
& ~(PTE_REF(is_ept
) | PTE_MOD(is_ept
));
623 * May be changing its wired attribute or protection
626 template = pa_to_pte(pa
);
628 /* ?: WORTH ASSERTING THAT AT LEAST ONE RWX (implicit valid) PASSED FOR EPT? */
630 template |= INTEL_PTE_VALID
;
632 template |= INTEL_EPT_IPTA
;
635 template |= pmap_get_cache_attributes(pa_index(pa
), is_ept
);
638 * We don't support passing VM_MEM_NOT_CACHEABLE flags for EPT PTEs
640 if (!is_ept
&& (VM_MEM_NOT_CACHEABLE
==
641 (flags
& (VM_MEM_NOT_CACHEABLE
| VM_WIMG_USE_DEFAULT
)))) {
642 if (!(flags
& VM_MEM_GUARDED
))
643 template |= INTEL_PTE_PTA
;
644 template |= INTEL_PTE_NCACHE
;
646 if (pmap
!= kernel_pmap
&& !is_ept
)
647 template |= INTEL_PTE_USER
;
649 if (prot
& VM_PROT_READ
)
650 template |= PTE_READ(is_ept
);
652 if (prot
& VM_PROT_WRITE
) {
653 template |= PTE_WRITE(is_ept
);
654 if (is_ept
&& !pmap_ept_support_ad
) {
655 template |= PTE_MOD(is_ept
);
657 assert(IS_MANAGED_PAGE(pai
));
658 pmap_phys_attributes
[pai
] |= PHYS_MODIFIED
;
662 if (prot
& VM_PROT_EXECUTE
) {
664 template = pte_set_ex(template, is_ept
);
668 template = pte_remove_ex(template, is_ept
);
671 template |= PTE_WIRED
;
672 if (!iswired(old_attributes
)) {
673 OSAddAtomic(+1, &pmap
->stats
.wired_count
);
674 pmap_ledger_credit(pmap
, task_ledgers
.wired_mem
, PAGE_SIZE
);
677 if (iswired(old_attributes
)) {
678 assert(pmap
->stats
.wired_count
>= 1);
679 OSAddAtomic(-1, &pmap
->stats
.wired_count
);
680 pmap_ledger_debit(pmap
, task_ledgers
.wired_mem
, PAGE_SIZE
);
684 if (superpage
) /* this path can not be used */
685 template |= PTE_PS
; /* to change the page size! */
687 if (old_attributes
== template)
688 goto dont_update_pte
;
690 /* Determine delta, PV locked */
692 ((old_attributes
^ template) != PTE_WIRED
);
694 if (need_tlbflush
== TRUE
&& !(old_attributes
& PTE_WRITE(is_ept
))) {
695 if ((old_attributes
^ template) == PTE_WRITE(is_ept
))
696 need_tlbflush
= FALSE
;
699 /* For hardware that doesn't have EPT AD support, we always set REFMOD for EPT PTEs */
700 if (is_ept
&& !pmap_ept_support_ad
) {
701 template |= PTE_REF(is_ept
);
703 assert(IS_MANAGED_PAGE(pai
));
704 pmap_phys_attributes
[pai
] |= PHYS_REFERENCED
;
708 /* store modified PTE and preserve RC bits */
709 pt_entry_t npte
, opte
;;
712 npte
= template | (opte
& (PTE_REF(is_ept
) | PTE_MOD(is_ept
)));
713 } while (!pmap_cmpx_pte(pte
, opte
, npte
));
717 old_pa_locked
= FALSE
;
723 * Outline of code from here:
724 * 1) If va was mapped, update TLBs, remove the mapping
725 * and remove old pvlist entry.
726 * 2) Add pvlist entry for new mapping
727 * 3) Enter new mapping.
729 * If the old physical page is not managed step 1) is skipped
730 * (except for updating the TLBs), and the mapping is
731 * overwritten at step 3). If the new physical page is not
732 * managed, step 2) is skipped.
735 if (old_pa
!= (pmap_paddr_t
) 0) {
738 * Don't do anything to pages outside valid memory here.
739 * Instead convince the code that enters a new mapping
740 * to overwrite the old one.
743 /* invalidate the PTE */
744 pmap_update_pte(pte
, PTE_VALID_MASK(is_ept
), 0);
745 /* propagate invalidate everywhere */
746 PMAP_UPDATE_TLBS(pmap
, vaddr
, vaddr
+ PAGE_SIZE
);
747 /* remember reference and change */
749 oattr
= (char) (old_pte
& (PTE_MOD(is_ept
) | PTE_REF(is_ept
)));
750 /* completely invalidate the PTE */
751 pmap_store_pte(pte
, 0);
753 if (IS_MANAGED_PAGE(pai
)) {
754 pmap_assert(old_pa_locked
== TRUE
);
755 pmap_ledger_debit(pmap
, task_ledgers
.phys_mem
, PAGE_SIZE
);
756 pmap_ledger_debit(pmap
, task_ledgers
.phys_footprint
, PAGE_SIZE
);
757 assert(pmap
->stats
.resident_count
>= 1);
758 OSAddAtomic(-1, &pmap
->stats
.resident_count
);
759 if (pmap
!= kernel_pmap
) {
760 if (IS_REUSABLE_PAGE(pai
)) {
761 assert(pmap
->stats
.reusable
> 0);
762 OSAddAtomic(-1, &pmap
->stats
.reusable
);
763 } else if (IS_INTERNAL_PAGE(pai
)) {
764 assert(pmap
->stats
.internal
> 0);
765 OSAddAtomic(-1, &pmap
->stats
.internal
);
767 assert(pmap
->stats
.external
> 0);
768 OSAddAtomic(-1, &pmap
->stats
.external
);
772 assert(pmap
->stats
.wired_count
>= 1);
773 OSAddAtomic(-1, &pmap
->stats
.wired_count
);
774 pmap_ledger_debit(pmap
, task_ledgers
.wired_mem
,
779 pmap_phys_attributes
[pai
] |= oattr
;
781 pmap_phys_attributes
[pai
] |= ept_refmod_to_physmap(oattr
);
785 * Remove the mapping from the pvlist for
786 * this physical page.
787 * We'll end up with either a rooted pv or a
790 pvh_e
= pmap_pv_remove(pmap
, vaddr
, (ppnum_t
*) &pai
, &old_pte
);
795 * old_pa is not managed.
796 * Do removal part of accounting.
799 if (pmap
!= kernel_pmap
) {
801 assert(pmap
->stats
.device
> 0);
802 OSAddAtomic(-1, &pmap
->stats
.device
);
806 assert(pmap
->stats
.wired_count
>= 1);
807 OSAddAtomic(-1, &pmap
->stats
.wired_count
);
808 pmap_ledger_debit(pmap
, task_ledgers
.wired_mem
, PAGE_SIZE
);
814 * if we had a previously managed paged locked, unlock it now
818 old_pa_locked
= FALSE
;
821 pai
= pa_index(pa
); /* now working with new incoming phys page */
822 if (IS_MANAGED_PAGE(pai
)) {
825 * Step 2) Enter the mapping in the PV list for this
828 pv_h
= pai_to_pvh(pai
);
832 if (pv_h
->pmap
== PMAP_NULL
) {
834 * No mappings yet, use rooted pv
838 queue_init(&pv_h
->qlink
);
840 if (options
& PMAP_OPTIONS_INTERNAL
) {
841 pmap_phys_attributes
[pai
] |= PHYS_INTERNAL
;
843 pmap_phys_attributes
[pai
] &= ~PHYS_INTERNAL
;
845 if (options
& PMAP_OPTIONS_REUSABLE
) {
846 pmap_phys_attributes
[pai
] |= PHYS_REUSABLE
;
848 pmap_phys_attributes
[pai
] &= ~PHYS_REUSABLE
;
852 * Add new pv_hashed_entry after header.
854 if ((PV_HASHED_ENTRY_NULL
== pvh_e
) && pvh_new
) {
856 pvh_new
= PV_HASHED_ENTRY_NULL
;
857 } else if (PV_HASHED_ENTRY_NULL
== pvh_e
) {
858 PV_HASHED_ALLOC(&pvh_e
);
859 if (PV_HASHED_ENTRY_NULL
== pvh_e
) {
861 * the pv list is empty. if we are on
862 * the kernel pmap we'll use one of
863 * the special private kernel pv_e's,
864 * else, we need to unlock
865 * everything, zalloc a pv_e, and
866 * restart bringing in the pv_e with
869 if (kernel_pmap
== pmap
) {
870 PV_HASHED_KERN_ALLOC(&pvh_e
);
874 pmap_pv_throttle(pmap
);
875 pvh_new
= (pv_hashed_entry_t
) zalloc(pv_hashed_list_zone
);
881 if (PV_HASHED_ENTRY_NULL
== pvh_e
)
882 panic("Mapping alias chain exhaustion, possibly induced by numerous kernel virtual double mappings");
887 pv_hash_add(pvh_e
, pv_h
);
890 * Remember that we used the pvlist entry.
892 pvh_e
= PV_HASHED_ENTRY_NULL
;
896 * only count the mapping
897 * for 'managed memory'
899 pmap_ledger_credit(pmap
, task_ledgers
.phys_mem
, PAGE_SIZE
);
900 pmap_ledger_credit(pmap
, task_ledgers
.phys_footprint
, PAGE_SIZE
);
901 OSAddAtomic(+1, &pmap
->stats
.resident_count
);
902 if (pmap
->stats
.resident_count
> pmap
->stats
.resident_max
) {
903 pmap
->stats
.resident_max
= pmap
->stats
.resident_count
;
905 if (pmap
!= kernel_pmap
) {
906 if (IS_REUSABLE_PAGE(pai
)) {
907 OSAddAtomic(+1, &pmap
->stats
.reusable
);
908 PMAP_STATS_PEAK(pmap
->stats
.reusable
);
909 } else if (IS_INTERNAL_PAGE(pai
)) {
910 OSAddAtomic(+1, &pmap
->stats
.internal
);
911 PMAP_STATS_PEAK(pmap
->stats
.internal
);
913 OSAddAtomic(+1, &pmap
->stats
.external
);
914 PMAP_STATS_PEAK(pmap
->stats
.external
);
917 } else if (last_managed_page
== 0) {
918 /* Account for early mappings created before "managed pages"
919 * are determined. Consider consulting the available DRAM map.
921 pmap_ledger_credit(pmap
, task_ledgers
.phys_mem
, PAGE_SIZE
);
922 pmap_ledger_credit(pmap
, task_ledgers
.phys_footprint
, PAGE_SIZE
);
923 OSAddAtomic(+1, &pmap
->stats
.resident_count
);
924 if (pmap
!= kernel_pmap
) {
926 OSAddAtomic(+1, &pmap
->stats
.device
);
927 PMAP_STATS_PEAK(pmap
->stats
.device
);
932 * Step 3) Enter the mapping.
934 * Build a template to speed up entering -
935 * only the pfn changes.
937 template = pa_to_pte(pa
);
940 template |= INTEL_PTE_VALID
;
942 template |= INTEL_EPT_IPTA
;
947 * DRK: It may be worth asserting on cache attribute flags that diverge
948 * from the existing physical page attributes.
951 template |= pmap_get_cache_attributes(pa_index(pa
), is_ept
);
954 * We don't support passing VM_MEM_NOT_CACHEABLE flags for EPT PTEs
956 if (!is_ept
&& (flags
& VM_MEM_NOT_CACHEABLE
)) {
957 if (!(flags
& VM_MEM_GUARDED
))
958 template |= INTEL_PTE_PTA
;
959 template |= INTEL_PTE_NCACHE
;
961 if (pmap
!= kernel_pmap
&& !is_ept
)
962 template |= INTEL_PTE_USER
;
963 if (prot
& VM_PROT_READ
)
964 template |= PTE_READ(is_ept
);
965 if (prot
& VM_PROT_WRITE
) {
966 template |= PTE_WRITE(is_ept
);
967 if (is_ept
&& !pmap_ept_support_ad
) {
968 template |= PTE_MOD(is_ept
);
969 if (IS_MANAGED_PAGE(pai
))
970 pmap_phys_attributes
[pai
] |= PHYS_MODIFIED
;
973 if (prot
& VM_PROT_EXECUTE
) {
975 template = pte_set_ex(template, is_ept
);
979 template = pte_remove_ex(template, is_ept
);
981 template |= INTEL_PTE_WIRED
;
982 OSAddAtomic(+1, & pmap
->stats
.wired_count
);
983 pmap_ledger_credit(pmap
, task_ledgers
.wired_mem
, PAGE_SIZE
);
986 template |= INTEL_PTE_PS
;
988 /* For hardware that doesn't have EPT AD support, we always set REFMOD for EPT PTEs */
989 if (is_ept
&& !pmap_ept_support_ad
) {
990 template |= PTE_REF(is_ept
);
991 if (IS_MANAGED_PAGE(pai
))
992 pmap_phys_attributes
[pai
] |= PHYS_REFERENCED
;
995 pmap_store_pte(pte
, template);
998 * if this was a managed page we delayed unlocking the pv until here
999 * to prevent pmap_page_protect et al from finding it until the pte
1002 if (IS_MANAGED_PAGE(pai
)) {
1006 if (need_tlbflush
== TRUE
) {
1007 if (options
& PMAP_OPTIONS_NOFLUSH
)
1008 PMAP_UPDATE_TLBS_DELAYED(pmap
, vaddr
, vaddr
+ PAGE_SIZE
, (pmap_flush_context
*)arg
);
1010 PMAP_UPDATE_TLBS(pmap
, vaddr
, vaddr
+ PAGE_SIZE
);
1012 if (pvh_e
!= PV_HASHED_ENTRY_NULL
) {
1013 PV_HASHED_FREE_LIST(pvh_e
, pvh_e
, 1);
1015 if (pvh_new
!= PV_HASHED_ENTRY_NULL
) {
1016 PV_HASHED_KERN_FREE_LIST(pvh_new
, pvh_new
, 1);
1020 if (delpage_pm_obj
) {
1023 vm_object_lock(delpage_pm_obj
);
1024 m
= vm_page_lookup(delpage_pm_obj
, (delpage_pde_index
* PAGE_SIZE
));
1025 if (m
== VM_PAGE_NULL
)
1026 panic("pmap_enter: pte page not in object");
1028 vm_object_unlock(delpage_pm_obj
);
1029 OSAddAtomic(-1, &inuse_ptepages_count
);
1030 PMAP_ZINFO_PFREE(pmap
, PAGE_SIZE
);
1033 PMAP_TRACE(PMAP_CODE(PMAP__ENTER
) | DBG_FUNC_END
, 0, 0, 0, 0, 0);
1034 return KERN_SUCCESS
;
1038 * Remove a range of hardware page-table entries.
1039 * The entries given are the first (inclusive)
1040 * and last (exclusive) entries for the VM pages.
1041 * The virtual address is the va for the first pte.
1043 * The pmap must be locked.
1044 * If the pmap is not the kernel pmap, the range must lie
1045 * entirely within one pte-page. This is NOT checked.
1046 * Assumes that the pte-page exists.
1052 vm_map_offset_t start_vaddr
,
1056 pmap_remove_range_options(pmap
, start_vaddr
, spte
, epte
,
1057 PMAP_OPTIONS_REMOVE
);
1061 pmap_remove_range_options(
1063 vm_map_offset_t start_vaddr
,
1069 pv_hashed_entry_t pvh_et
= PV_HASHED_ENTRY_NULL
;
1070 pv_hashed_entry_t pvh_eh
= PV_HASHED_ENTRY_NULL
;
1071 pv_hashed_entry_t pvh_e
;
1073 int num_removed
, num_unwired
, num_found
, num_invalid
;
1074 int num_device
, num_external
, num_internal
, num_reusable
;
1075 uint64_t num_compressed
;
1078 vm_map_offset_t vaddr
;
1079 boolean_t is_ept
= is_ept_pmap(pmap
);
1090 /* invalidate the PTEs first to "freeze" them */
1091 for (cpte
= spte
, vaddr
= start_vaddr
;
1093 cpte
++, vaddr
+= PAGE_SIZE_64
) {
1094 pt_entry_t p
= *cpte
;
1098 if (pmap
!= kernel_pmap
&&
1099 (options
& PMAP_OPTIONS_REMOVE
) &&
1100 (p
& PTE_COMPRESSED
)) {
1101 /* one less "compressed" */
1104 /* XXX probably does not need to be atomic! */
1105 pmap_update_pte(cpte
, PTE_COMPRESSED
, 0);
1116 if (!IS_MANAGED_PAGE(pai
)) {
1118 * Outside range of managed physical memory.
1119 * Just remove the mappings.
1121 pmap_store_pte(cpte
, 0);
1126 if ((p
& PTE_VALID_MASK(is_ept
)) == 0)
1129 /* invalidate the PTE */
1130 pmap_update_pte(cpte
, PTE_VALID_MASK(is_ept
), 0);
1133 if (num_found
== 0) {
1134 /* nothing was changed: we're done */
1138 /* propagate the invalidates to other CPUs */
1140 PMAP_UPDATE_TLBS(pmap
, start_vaddr
, vaddr
);
1142 for (cpte
= spte
, vaddr
= start_vaddr
;
1144 cpte
++, vaddr
+= PAGE_SIZE_64
) {
1146 pa
= pte_to_pa(*cpte
);
1154 pa
= pte_to_pa(*cpte
);
1160 if (IS_REUSABLE_PAGE(pai
)) {
1162 } else if (IS_INTERNAL_PAGE(pai
)) {
1169 * Get the modify and reference bits, then
1170 * nuke the entry in the page table
1172 /* remember reference and change */
1174 pmap_phys_attributes
[pai
] |=
1175 *cpte
& (PHYS_MODIFIED
| PHYS_REFERENCED
);
1177 pmap_phys_attributes
[pai
] |=
1178 ept_refmod_to_physmap((*cpte
& (INTEL_EPT_REF
| INTEL_EPT_MOD
))) & (PHYS_MODIFIED
| PHYS_REFERENCED
);
1182 * Remove the mapping from the pvlist for this physical page.
1184 pvh_e
= pmap_pv_remove(pmap
, vaddr
, (ppnum_t
*) &pai
, cpte
);
1186 /* completely invalidate the PTE */
1187 pmap_store_pte(cpte
, 0);
1191 if (pvh_e
!= PV_HASHED_ENTRY_NULL
) {
1192 pvh_e
->qlink
.next
= (queue_entry_t
) pvh_eh
;
1195 if (pvh_et
== PV_HASHED_ENTRY_NULL
) {
1202 if (pvh_eh
!= PV_HASHED_ENTRY_NULL
) {
1203 PV_HASHED_FREE_LIST(pvh_eh
, pvh_et
, pvh_cnt
);
1210 if (pmap
->stats
.resident_count
< num_removed
)
1211 panic("pmap_remove_range: resident_count");
1213 pmap_ledger_debit(pmap
, task_ledgers
.phys_mem
, machine_ptob(num_removed
));
1214 pmap_ledger_debit(pmap
, task_ledgers
.phys_footprint
, machine_ptob(num_removed
));
1215 assert(pmap
->stats
.resident_count
>= num_removed
);
1216 OSAddAtomic(-num_removed
, &pmap
->stats
.resident_count
);
1218 if (pmap
!= kernel_pmap
) {
1220 assert(pmap
->stats
.device
>= num_device
);
1222 OSAddAtomic(-num_device
, &pmap
->stats
.device
);
1224 assert(pmap
->stats
.external
>= num_external
);
1226 OSAddAtomic(-num_external
, &pmap
->stats
.external
);
1227 assert(pmap
->stats
.internal
>= num_internal
);
1229 OSAddAtomic(-num_internal
, &pmap
->stats
.internal
);
1230 assert(pmap
->stats
.reusable
>= num_reusable
);
1232 OSAddAtomic(-num_reusable
, &pmap
->stats
.reusable
);
1233 assert(pmap
->stats
.compressed
>= num_compressed
);
1235 OSAddAtomic64(-num_compressed
, &pmap
->stats
.compressed
);
1239 if (pmap
->stats
.wired_count
< num_unwired
)
1240 panic("pmap_remove_range: wired_count");
1242 assert(pmap
->stats
.wired_count
>= num_unwired
);
1243 OSAddAtomic(-num_unwired
, &pmap
->stats
.wired_count
);
1244 pmap_ledger_debit(pmap
, task_ledgers
.wired_mem
, machine_ptob(num_unwired
));
1251 * Remove the given range of addresses
1252 * from the specified map.
1254 * It is assumed that the start and end are properly
1255 * rounded to the hardware page size.
1263 pmap_remove_options(map
, s64
, e64
, PMAP_OPTIONS_REMOVE
);
1267 pmap_remove_options(
1274 pt_entry_t
*spte
, *epte
;
1281 if (map
== PMAP_NULL
|| s64
== e64
)
1284 is_ept
= is_ept_pmap(map
);
1286 PMAP_TRACE(PMAP_CODE(PMAP__REMOVE
) | DBG_FUNC_START
,
1288 (uint32_t) (s64
>> 32), s64
,
1289 (uint32_t) (e64
>> 32), e64
);
1296 * Check that address range in the kernel does not overlap the stacks.
1297 * We initialize local static min/max variables once to avoid making
1298 * 2 function calls for every remove. Note also that these functions
1299 * both return 0 before kernel stacks have been initialized, and hence
1300 * the panic is not triggered in this case.
1302 if (map
== kernel_pmap
) {
1303 static vm_offset_t kernel_stack_min
= 0;
1304 static vm_offset_t kernel_stack_max
= 0;
1306 if (kernel_stack_min
== 0) {
1307 kernel_stack_min
= min_valid_stack_address();
1308 kernel_stack_max
= max_valid_stack_address();
1310 if ((kernel_stack_min
<= s64
&& s64
< kernel_stack_max
) ||
1311 (kernel_stack_min
< e64
&& e64
<= kernel_stack_max
))
1312 panic("pmap_remove() attempted in kernel stack");
1317 * The values of kernel_stack_min and kernel_stack_max are no longer
1318 * relevant now that we allocate kernel stacks in the kernel map,
1319 * so the old code above no longer applies. If we wanted to check that
1320 * we weren't removing a mapping of a page in a kernel stack we'd
1321 * mark the PTE with an unused bit and check that here.
1326 deadline
= rdtsc64() + max_preemption_latency_tsc
;
1329 l64
= (s64
+ pde_mapped_size
) & ~(pde_mapped_size
- 1);
1332 pde
= pmap_pde(map
, s64
);
1334 if (pde
&& (*pde
& PTE_VALID_MASK(is_ept
))) {
1335 if (*pde
& PTE_PS
) {
1337 * If we're removing a superpage, pmap_remove_range()
1338 * must work on level 2 instead of level 1; and we're
1339 * only passing a single level 2 entry instead of a
1343 epte
= spte
+1; /* excluded */
1345 spte
= pmap_pte(map
, (s64
& ~(pde_mapped_size
- 1)));
1346 spte
= &spte
[ptenum(s64
)];
1347 epte
= &spte
[intel_btop(l64
- s64
)];
1349 pmap_remove_range_options(map
, s64
, spte
, epte
,
1354 if (s64
< e64
&& rdtsc64() >= deadline
) {
1356 /* TODO: Rapid release/reacquisition can defeat
1357 * the "backoff" intent here; either consider a
1358 * fair spinlock, or a scheme whereby each lock
1359 * attempt marks the processor as within a spinlock
1360 * acquisition, and scan CPUs here to determine
1361 * if a backoff is necessary, to avoid sacrificing
1362 * performance in the common case.
1365 deadline
= rdtsc64() + max_preemption_latency_tsc
;
1371 PMAP_TRACE(PMAP_CODE(PMAP__REMOVE
) | DBG_FUNC_END
,
1381 pmap_page_protect_options(pn
, prot
, 0, NULL
);
1385 * Routine: pmap_page_protect_options
1388 * Lower the permission for all mappings to a given
1392 pmap_page_protect_options(
1395 unsigned int options
,
1398 pv_hashed_entry_t pvh_eh
= PV_HASHED_ENTRY_NULL
;
1399 pv_hashed_entry_t pvh_et
= PV_HASHED_ENTRY_NULL
;
1400 pv_hashed_entry_t nexth
;
1402 pv_rooted_entry_t pv_h
;
1403 pv_rooted_entry_t pv_e
;
1404 pv_hashed_entry_t pvh_e
;
1409 pt_entry_t new_pte_value
;
1413 assert(pn
!= vm_page_fictitious_addr
);
1414 if (pn
== vm_page_guard_addr
)
1417 pai
= ppn_to_pai(pn
);
1419 if (!IS_MANAGED_PAGE(pai
)) {
1421 * Not a managed page.
1425 PMAP_TRACE(PMAP_CODE(PMAP__PAGE_PROTECT
) | DBG_FUNC_START
,
1429 * Determine the new protection.
1433 case VM_PROT_READ
| VM_PROT_EXECUTE
:
1437 return; /* nothing to do */
1443 pv_h
= pai_to_pvh(pai
);
1449 * Walk down PV list, if any, changing or removing all mappings.
1451 if (pv_h
->pmap
== PMAP_NULL
)
1455 pvh_e
= (pv_hashed_entry_t
) pv_e
; /* cheat */
1458 vm_map_offset_t vaddr
;
1460 if ((options
& PMAP_OPTIONS_COMPRESSOR_IFF_MODIFIED
) &&
1461 (pmap_phys_attributes
[pai
] & PHYS_MODIFIED
)) {
1462 /* page was modified, so it will be compressed */
1463 options
&= ~PMAP_OPTIONS_COMPRESSOR_IFF_MODIFIED
;
1464 options
|= PMAP_OPTIONS_COMPRESSOR
;
1468 is_ept
= is_ept_pmap(pmap
);
1470 pte
= pmap_pte(pmap
, vaddr
);
1472 pmap_assert2((pa_index(pte_to_pa(*pte
)) == pn
),
1473 "pmap_page_protect: PTE mismatch, pn: 0x%x, pmap: %p, vaddr: 0x%llx, pte: 0x%llx", pn
, pmap
, vaddr
, *pte
);
1476 panic("pmap_page_protect() "
1477 "pmap=%p pn=0x%x vaddr=0x%llx\n",
1480 nexth
= (pv_hashed_entry_t
) queue_next(&pvh_e
->qlink
);
1483 * Remove the mapping if new protection is NONE
1487 /* Remove per-pmap wired count */
1488 if (iswired(*pte
)) {
1489 OSAddAtomic(-1, &pmap
->stats
.wired_count
);
1490 pmap_ledger_debit(pmap
, task_ledgers
.wired_mem
, PAGE_SIZE
);
1493 if (pmap
!= kernel_pmap
&&
1494 (options
& PMAP_OPTIONS_COMPRESSOR
) &&
1495 IS_INTERNAL_PAGE(pai
)) {
1496 /* mark this PTE as having been "reclaimed" */
1497 new_pte_value
= PTE_COMPRESSED
;
1502 if (options
& PMAP_OPTIONS_NOREFMOD
) {
1503 pmap_store_pte(pte
, new_pte_value
);
1505 if (options
& PMAP_OPTIONS_NOFLUSH
)
1506 PMAP_UPDATE_TLBS_DELAYED(pmap
, vaddr
, vaddr
+ PAGE_SIZE
, (pmap_flush_context
*)arg
);
1508 PMAP_UPDATE_TLBS(pmap
, vaddr
, vaddr
+ PAGE_SIZE
);
1511 * Remove the mapping, collecting dirty bits.
1513 pmap_update_pte(pte
, PTE_VALID_MASK(is_ept
), 0);
1515 PMAP_UPDATE_TLBS(pmap
, vaddr
, vaddr
+PAGE_SIZE
);
1517 PMAP_OPTIONS_COMPRESSOR_IFF_MODIFIED
) &&
1518 ! (pmap_phys_attributes
[pai
] &
1520 (*pte
& PHYS_MODIFIED
)) {
1522 * Page is actually "modified" and
1523 * will be compressed. Start
1524 * accounting for it as "compressed".
1526 options
&= ~PMAP_OPTIONS_COMPRESSOR_IFF_MODIFIED
;
1527 options
|= PMAP_OPTIONS_COMPRESSOR
;
1528 new_pte_value
= PTE_COMPRESSED
;
1531 pmap_phys_attributes
[pai
] |=
1532 *pte
& (PHYS_MODIFIED
|PHYS_REFERENCED
);
1534 pmap_phys_attributes
[pai
] |=
1535 ept_refmod_to_physmap((*pte
& (INTEL_EPT_REF
| INTEL_EPT_MOD
))) & (PHYS_MODIFIED
| PHYS_REFERENCED
);
1537 pmap_store_pte(pte
, new_pte_value
);
1540 if (new_pte_value
== PTE_COMPRESSED
) {
1541 /* one more "compressed" page */
1542 OSAddAtomic64(+1, &pmap
->stats
.compressed
);
1543 PMAP_STATS_PEAK(pmap
->stats
.compressed
);
1544 pmap
->stats
.compressed_lifetime
++;
1548 if (pmap
->stats
.resident_count
< 1)
1549 panic("pmap_page_protect: resident_count");
1551 pmap_ledger_debit(pmap
, task_ledgers
.phys_mem
, PAGE_SIZE
);
1552 assert(pmap
->stats
.resident_count
>= 1);
1553 OSAddAtomic(-1, &pmap
->stats
.resident_count
);
1554 if (options
& PMAP_OPTIONS_COMPRESSOR
) {
1556 * This removal is only being done so we can send this page to
1557 * the compressor; therefore it mustn't affect total task footprint.
1559 pmap_ledger_credit(pmap
, task_ledgers
.internal_compressed
, PAGE_SIZE
);
1561 pmap_ledger_debit(pmap
, task_ledgers
.phys_footprint
, PAGE_SIZE
);
1564 if (pmap
!= kernel_pmap
) {
1565 if (IS_REUSABLE_PAGE(pai
)) {
1566 assert(pmap
->stats
.reusable
> 0);
1567 OSAddAtomic(-1, &pmap
->stats
.reusable
);
1568 } else if (IS_INTERNAL_PAGE(pai
)) {
1569 assert(pmap
->stats
.internal
> 0);
1570 OSAddAtomic(-1, &pmap
->stats
.internal
);
1572 assert(pmap
->stats
.external
> 0);
1573 OSAddAtomic(-1, &pmap
->stats
.external
);
1578 * Deal with the pv_rooted_entry.
1583 * Fix up head later.
1585 pv_h
->pmap
= PMAP_NULL
;
1588 * Delete this entry.
1590 pv_hash_remove(pvh_e
);
1591 pvh_e
->qlink
.next
= (queue_entry_t
) pvh_eh
;
1594 if (pvh_et
== PV_HASHED_ENTRY_NULL
)
1600 * Write-protect, after opportunistic refmod collect
1603 pmap_phys_attributes
[pai
] |=
1604 *pte
& (PHYS_MODIFIED
|PHYS_REFERENCED
);
1606 pmap_phys_attributes
[pai
] |=
1607 ept_refmod_to_physmap((*pte
& (INTEL_EPT_REF
| INTEL_EPT_MOD
))) & (PHYS_MODIFIED
| PHYS_REFERENCED
);
1609 pmap_update_pte(pte
, PTE_WRITE(is_ept
), 0);
1611 if (options
& PMAP_OPTIONS_NOFLUSH
)
1612 PMAP_UPDATE_TLBS_DELAYED(pmap
, vaddr
, vaddr
+ PAGE_SIZE
, (pmap_flush_context
*)arg
);
1614 PMAP_UPDATE_TLBS(pmap
, vaddr
, vaddr
+PAGE_SIZE
);
1617 } while ((pv_e
= (pv_rooted_entry_t
) nexth
) != pv_h
);
1621 * If pv_head mapping was removed, fix it up.
1623 if (pv_h
->pmap
== PMAP_NULL
) {
1624 pvh_e
= (pv_hashed_entry_t
) queue_next(&pv_h
->qlink
);
1626 if (pvh_e
!= (pv_hashed_entry_t
) pv_h
) {
1627 pv_hash_remove(pvh_e
);
1628 pv_h
->pmap
= pvh_e
->pmap
;
1629 pv_h
->va
= pvh_e
->va
;
1630 pvh_e
->qlink
.next
= (queue_entry_t
) pvh_eh
;
1633 if (pvh_et
== PV_HASHED_ENTRY_NULL
)
1638 if (pvh_eh
!= PV_HASHED_ENTRY_NULL
) {
1639 PV_HASHED_FREE_LIST(pvh_eh
, pvh_et
, pvh_cnt
);
1644 PMAP_TRACE(PMAP_CODE(PMAP__PAGE_PROTECT
) | DBG_FUNC_END
,
1650 * Clear specified attribute bits.
1653 phys_attribute_clear(
1656 unsigned int options
,
1659 pv_rooted_entry_t pv_h
;
1660 pv_hashed_entry_t pv_e
;
1664 char attributes
= 0;
1665 boolean_t is_internal
, is_reusable
, is_ept
;
1666 int ept_bits_to_clear
;
1667 boolean_t ept_keep_global_mod
= FALSE
;
1669 if ((bits
& PHYS_MODIFIED
) &&
1670 (options
& PMAP_OPTIONS_NOFLUSH
) &&
1672 panic("phys_attribute_clear(0x%x,0x%x,0x%x,%p): "
1673 "should not clear 'modified' without flushing TLBs\n",
1674 pn
, bits
, options
, arg
);
1677 /* We only support converting MOD and REF bits for EPT PTEs in this function */
1678 assert((bits
& ~(PHYS_REFERENCED
| PHYS_MODIFIED
)) == 0);
1680 ept_bits_to_clear
= (unsigned)physmap_refmod_to_ept(bits
& (PHYS_MODIFIED
| PHYS_REFERENCED
));
1683 assert(pn
!= vm_page_fictitious_addr
);
1684 if (pn
== vm_page_guard_addr
)
1687 pai
= ppn_to_pai(pn
);
1689 if (!IS_MANAGED_PAGE(pai
)) {
1691 * Not a managed page.
1696 PMAP_TRACE(PMAP_CODE(PMAP__ATTRIBUTE_CLEAR
) | DBG_FUNC_START
,
1699 pv_h
= pai_to_pvh(pai
);
1705 * Walk down PV list, clearing all modify or reference bits.
1706 * We do not have to lock the pv_list because we have
1709 if (pv_h
->pmap
!= PMAP_NULL
) {
1711 * There are some mappings.
1714 is_internal
= IS_INTERNAL_PAGE(pai
);
1715 is_reusable
= IS_REUSABLE_PAGE(pai
);
1717 pv_e
= (pv_hashed_entry_t
)pv_h
;
1724 is_ept
= is_ept_pmap(pmap
);
1729 pte
= pmap_pte(pmap
, va
);
1730 /* grab ref/mod bits from this PTE */
1731 pte_bits
= (*pte
& (PTE_REF(is_ept
) | PTE_MOD(is_ept
)));
1732 /* propagate to page's global attributes */
1734 attributes
|= pte_bits
;
1736 attributes
|= ept_refmod_to_physmap(pte_bits
);
1737 if (!pmap_ept_support_ad
&& (pte_bits
& INTEL_EPT_MOD
)) {
1738 ept_keep_global_mod
= TRUE
;
1741 /* which bits to clear for this PTE? */
1745 pte_bits
&= ept_bits_to_clear
;
1750 * Clear modify and/or reference bits.
1753 pmap_update_pte(pte
, bits
, 0);
1755 /* Ensure all processors using this translation
1756 * invalidate this TLB entry. The invalidation
1757 * *must* follow the PTE update, to ensure that
1758 * the TLB shadow of the 'D' bit (in particular)
1759 * is synchronized with the updated PTE.
1761 if (! (options
& PMAP_OPTIONS_NOFLUSH
)) {
1762 /* flush TLBS now */
1763 PMAP_UPDATE_TLBS(pmap
,
1767 /* delayed TLB flush: add "pmap" info */
1768 PMAP_UPDATE_TLBS_DELAYED(
1772 (pmap_flush_context
*)arg
);
1774 /* no TLB flushing at all */
1778 /* update pmap "reusable" stats */
1779 if ((options
& PMAP_OPTIONS_CLEAR_REUSABLE
) &&
1781 pmap
!= kernel_pmap
) {
1782 /* one less "reusable" */
1783 assert(pmap
->stats
.reusable
> 0);
1784 OSAddAtomic(-1, &pmap
->stats
.reusable
);
1786 /* one more "internal" */
1787 OSAddAtomic(+1, &pmap
->stats
.internal
);
1788 PMAP_STATS_PEAK(pmap
->stats
.internal
);
1790 /* one more "external" */
1791 OSAddAtomic(+1, &pmap
->stats
.external
);
1792 PMAP_STATS_PEAK(pmap
->stats
.external
);
1794 } else if ((options
& PMAP_OPTIONS_SET_REUSABLE
) &&
1796 pmap
!= kernel_pmap
) {
1797 /* one more "reusable" */
1798 OSAddAtomic(+1, &pmap
->stats
.reusable
);
1799 PMAP_STATS_PEAK(pmap
->stats
.reusable
);
1801 /* one less "internal" */
1802 assert(pmap
->stats
.internal
> 0);
1803 OSAddAtomic(-1, &pmap
->stats
.internal
);
1805 /* one less "external" */
1806 assert(pmap
->stats
.external
> 0);
1807 OSAddAtomic(-1, &pmap
->stats
.external
);
1811 pv_e
= (pv_hashed_entry_t
)queue_next(&pv_e
->qlink
);
1813 } while (pv_e
!= (pv_hashed_entry_t
)pv_h
);
1815 /* Opportunistic refmod collection, annulled
1816 * if both REF and MOD are being cleared.
1819 pmap_phys_attributes
[pai
] |= attributes
;
1821 if (ept_keep_global_mod
) {
1823 * If the hardware doesn't support AD bits for EPT PTEs and someone is
1824 * requesting that we clear the modified bit for a phys page, we need
1825 * to ensure that there are no EPT mappings for the page with the
1826 * modified bit set. If there are, we cannot clear the global modified bit.
1828 bits
&= ~PHYS_MODIFIED
;
1830 pmap_phys_attributes
[pai
] &= ~(bits
);
1832 /* update this page's "reusable" status */
1833 if (options
& PMAP_OPTIONS_CLEAR_REUSABLE
) {
1834 pmap_phys_attributes
[pai
] &= ~PHYS_REUSABLE
;
1835 } else if (options
& PMAP_OPTIONS_SET_REUSABLE
) {
1836 pmap_phys_attributes
[pai
] |= PHYS_REUSABLE
;
1841 PMAP_TRACE(PMAP_CODE(PMAP__ATTRIBUTE_CLEAR
) | DBG_FUNC_END
,
1846 * Check specified attribute bits.
1849 phys_attribute_test(
1853 pv_rooted_entry_t pv_h
;
1854 pv_hashed_entry_t pv_e
;
1862 assert(pn
!= vm_page_fictitious_addr
);
1863 assert((bits
& ~(PHYS_MODIFIED
| PHYS_REFERENCED
)) == 0);
1864 if (pn
== vm_page_guard_addr
)
1867 pai
= ppn_to_pai(pn
);
1869 if (!IS_MANAGED_PAGE(pai
)) {
1871 * Not a managed page.
1877 * Fast check... if bits already collected
1878 * no need to take any locks...
1879 * if not set, we need to recheck after taking
1880 * the lock in case they got pulled in while
1881 * we were waiting for the lock
1883 if ((pmap_phys_attributes
[pai
] & bits
) == bits
)
1886 pv_h
= pai_to_pvh(pai
);
1890 attributes
= pmap_phys_attributes
[pai
] & bits
;
1894 * Walk down PV list, checking the mappings until we
1895 * reach the end or we've found the desired attributes.
1897 if (attributes
!= bits
&&
1898 pv_h
->pmap
!= PMAP_NULL
) {
1900 * There are some mappings.
1902 pv_e
= (pv_hashed_entry_t
)pv_h
;
1907 is_ept
= is_ept_pmap(pmap
);
1910 * pick up modify and/or reference bits from mapping
1913 pte
= pmap_pte(pmap
, va
);
1915 attributes
|= (int)(*pte
& bits
);
1917 attributes
|= (int)(ept_refmod_to_physmap((*pte
& (INTEL_EPT_REF
| INTEL_EPT_MOD
))) & (PHYS_MODIFIED
| PHYS_REFERENCED
));
1921 pv_e
= (pv_hashed_entry_t
)queue_next(&pv_e
->qlink
);
1923 } while ((attributes
!= bits
) &&
1924 (pv_e
!= (pv_hashed_entry_t
)pv_h
));
1926 pmap_phys_attributes
[pai
] |= attributes
;
1929 return (attributes
);
1933 * Routine: pmap_change_wiring
1934 * Function: Change the wiring attribute for a map/virtual-address
1936 * In/out conditions:
1937 * The mapping must already exist in the pmap.
1942 vm_map_offset_t vaddr
,
1949 if ((pte
= pmap_pte(map
, vaddr
)) == PT_ENTRY_NULL
)
1950 panic("pmap_change_wiring(%p,0x%llx,%d): pte missing",
1953 if (wired
&& !iswired(*pte
)) {
1955 * wiring down mapping
1957 pmap_ledger_credit(map
, task_ledgers
.wired_mem
, PAGE_SIZE
);
1958 OSAddAtomic(+1, &map
->stats
.wired_count
);
1959 pmap_update_pte(pte
, 0, PTE_WIRED
);
1961 else if (!wired
&& iswired(*pte
)) {
1965 assert(map
->stats
.wired_count
>= 1);
1966 OSAddAtomic(-1, &map
->stats
.wired_count
);
1967 pmap_ledger_debit(map
, task_ledgers
.wired_mem
, PAGE_SIZE
);
1968 pmap_update_pte(pte
, PTE_WIRED
, 0);
1975 * "Backdoor" direct map routine for early mappings.
1976 * Useful for mapping memory outside the range
1977 * Sets A, D and NC if requested
1983 vm_map_offset_t start_addr
,
1984 vm_map_offset_t end_addr
,
1988 pt_entry_t
template;
1991 vm_offset_t base
= virt
;
1992 template = pa_to_pte(start_addr
)
1998 if ((flags
& (VM_MEM_NOT_CACHEABLE
| VM_WIMG_USE_DEFAULT
)) == VM_MEM_NOT_CACHEABLE
) {
1999 template |= INTEL_PTE_NCACHE
;
2000 if (!(flags
& (VM_MEM_GUARDED
)))
2001 template |= INTEL_PTE_PTA
;
2004 #if defined(__x86_64__)
2005 if ((prot
& VM_PROT_EXECUTE
) == 0)
2006 template |= INTEL_PTE_NX
;
2009 if (prot
& VM_PROT_WRITE
)
2010 template |= INTEL_PTE_WRITE
;
2012 while (start_addr
< end_addr
) {
2014 pte
= pmap_pte(kernel_pmap
, (vm_map_offset_t
)virt
);
2015 if (pte
== PT_ENTRY_NULL
) {
2016 panic("pmap_map_bd: Invalid kernel address\n");
2018 pmap_store_pte(pte
, template);
2020 pte_increment_pa(template);
2022 start_addr
+= PAGE_SIZE
;
2025 PMAP_UPDATE_TLBS(kernel_pmap
, base
, base
+ end_addr
- start_addr
);
2030 pmap_query_resident(
2034 mach_vm_size_t
*compressed_bytes_p
)
2037 pt_entry_t
*spte
, *epte
;
2040 mach_vm_size_t resident_bytes
;
2041 mach_vm_size_t compressed_bytes
;
2046 if (pmap
== PMAP_NULL
|| pmap
== kernel_pmap
|| s64
== e64
) {
2047 if (compressed_bytes_p
) {
2048 *compressed_bytes_p
= 0;
2053 is_ept
= is_ept_pmap(pmap
);
2055 PMAP_TRACE(PMAP_CODE(PMAP__QUERY_RESIDENT
) | DBG_FUNC_START
,
2057 (uint32_t) (s64
>> 32), s64
,
2058 (uint32_t) (e64
>> 32), e64
);
2061 compressed_bytes
= 0;
2065 deadline
= rdtsc64() + max_preemption_latency_tsc
;
2068 l64
= (s64
+ pde_mapped_size
) & ~(pde_mapped_size
- 1);
2071 pde
= pmap_pde(pmap
, s64
);
2073 if (pde
&& (*pde
& PTE_VALID_MASK(is_ept
))) {
2074 if (*pde
& PTE_PS
) {
2075 /* superpage: not supported */
2077 spte
= pmap_pte(pmap
,
2078 (s64
& ~(pde_mapped_size
- 1)));
2079 spte
= &spte
[ptenum(s64
)];
2080 epte
= &spte
[intel_btop(l64
- s64
)];
2082 for (; spte
< epte
; spte
++) {
2083 if (pte_to_pa(*spte
) != 0) {
2084 resident_bytes
+= PAGE_SIZE
;
2085 } else if (*spte
& PTE_COMPRESSED
) {
2086 compressed_bytes
+= PAGE_SIZE
;
2094 if (s64
< e64
&& rdtsc64() >= deadline
) {
2097 deadline
= rdtsc64() + max_preemption_latency_tsc
;
2103 PMAP_TRACE(PMAP_CODE(PMAP__QUERY_RESIDENT
) | DBG_FUNC_END
,
2106 if (compressed_bytes_p
) {
2107 *compressed_bytes_p
= compressed_bytes
;
2109 return resident_bytes
;
2115 __unused pmap_t pmap
,
2117 __unused
char *procname
)
2120 #endif /* MACH_ASSERT */