2 * Copyright (c) 2000-2009 Apple Inc. All rights reserved.
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
29 #include <vm/vm_map.h>
30 #include <i386/pmap_internal.h>
32 void pmap_remove_range(
39 * The Intel platform can nest at the PDE level, so NBPDE (i.e. 2MB) at a time,
40 * on a NBPDE boundary.
43 /* These symbols may be referenced directly by VM */
44 uint64_t pmap_nesting_size_min
= NBPDE
;
45 uint64_t pmap_nesting_size_max
= 0 - (uint64_t)NBPDE
;
48 * kern_return_t pmap_nest(grand, subord, va_start, size)
50 * grand = the pmap that we will nest subord into
51 * subord = the pmap that goes into the grand
52 * va_start = start of range in pmap to be inserted
53 * nstart = start of range in pmap nested pmap
54 * size = Size of nest area (up to 16TB)
56 * Inserts a pmap into another. This is used to implement shared segments.
58 * Note that we depend upon higher level VM locks to insure that things don't change while
59 * we are doing this. For example, VM should not be doing any pmap enters while it is nesting
60 * or do 2 nests at once.
64 * This routine can nest subtrees either at the PDPT level (1GiB) or at the
65 * PDE level (2MiB). We currently disallow disparate offsets for the "subord"
66 * container and the "grand" parent. A minor optimization to consider for the
67 * future: make the "subord" truly a container rather than a full-fledged
68 * pagetable hierarchy which can be unnecessarily sparse (DRK).
71 kern_return_t
pmap_nest(pmap_t grand
, pmap_t subord
, addr64_t va_start
, addr64_t nstart
, uint64_t size
) {
72 vm_map_offset_t vaddr
, nvaddr
;
73 pd_entry_t
*pde
,*npde
;
77 if ((size
& (pmap_nesting_size_min
-1)) ||
78 (va_start
& (pmap_nesting_size_min
-1)) ||
79 (nstart
& (pmap_nesting_size_min
-1)) ||
80 ((size
>> 28) > 65536)) /* Max size we can nest is 16TB */
81 return KERN_INVALID_VALUE
;
84 panic("pmap_nest: size is invalid - %016llX\n", size
);
87 if (va_start
!= nstart
)
88 panic("pmap_nest: va_start(0x%llx) != nstart(0x%llx)\n", va_start
, nstart
);
90 PMAP_TRACE(PMAP_CODE(PMAP__NEST
) | DBG_FUNC_START
,
91 (uintptr_t) grand
, (uintptr_t) subord
,
92 (uintptr_t) (va_start
>>32), (uintptr_t) va_start
, 0);
94 nvaddr
= (vm_map_offset_t
)nstart
;
95 num_pde
= size
>> PDESHIFT
;
99 subord
->pm_shared
= TRUE
;
101 for (i
= 0; i
< num_pde
;) {
102 if (((nvaddr
& PDPTMASK
) == 0) && (num_pde
- i
) >= NPDEPG
&& cpu_64bit
) {
104 npde
= pmap64_pdpt(subord
, nvaddr
);
106 while (0 == npde
|| ((*npde
& INTEL_PTE_VALID
) == 0)) {
108 pmap_expand_pdpt(subord
, nvaddr
);
110 npde
= pmap64_pdpt(subord
, nvaddr
);
112 *npde
|= INTEL_PDPTE_NESTED
;
114 i
+= (uint32_t)NPDEPG
;
117 npde
= pmap_pde(subord
, nvaddr
);
119 while (0 == npde
|| ((*npde
& INTEL_PTE_VALID
) == 0)) {
121 pmap_expand(subord
, nvaddr
);
123 npde
= pmap_pde(subord
, nvaddr
);
132 vaddr
= (vm_map_offset_t
)va_start
;
136 for (i
= 0;i
< num_pde
;) {
139 if (((vaddr
& PDPTMASK
) == 0) && ((num_pde
- i
) >= NPDEPG
) && cpu_64bit
) {
140 npde
= pmap64_pdpt(subord
, vaddr
);
142 panic("pmap_nest: no PDPT, subord %p nstart 0x%llx", subord
, vaddr
);
144 pde
= pmap64_pdpt(grand
, vaddr
);
147 pmap_expand_pml4(grand
, vaddr
);
149 pde
= pmap64_pdpt(grand
, vaddr
);
152 panic("pmap_nest: no PDPT, grand %p vaddr 0x%llx", grand
, vaddr
);
153 pmap_store_pte(pde
, tpde
);
155 i
+= (uint32_t) NPDEPG
;
158 npde
= pmap_pde(subord
, nstart
);
160 panic("pmap_nest: no npde, subord %p nstart 0x%llx", subord
, nstart
);
163 pde
= pmap_pde(grand
, vaddr
);
164 if ((0 == pde
) && cpu_64bit
) {
166 pmap_expand_pdpt(grand
, vaddr
);
168 pde
= pmap_pde(grand
, vaddr
);
172 panic("pmap_nest: no pde, grand %p vaddr 0x%llx", grand
, vaddr
);
174 pmap_store_pte(pde
, tpde
);
181 PMAP_TRACE(PMAP_CODE(PMAP__NEST
) | DBG_FUNC_END
, 0, 0, 0, 0, 0);
187 * kern_return_t pmap_unnest(grand, vaddr)
189 * grand = the pmap that we will un-nest subord from
190 * vaddr = start of range in pmap to be unnested
192 * Removes a pmap from another. This is used to implement shared segments.
195 kern_return_t
pmap_unnest(pmap_t grand
, addr64_t vaddr
, uint64_t size
) {
200 addr64_t va_start
, va_end
;
201 uint64_t npdpt
= PMAP_INVALID_PDPTNUM
;
203 PMAP_TRACE(PMAP_CODE(PMAP__UNNEST
) | DBG_FUNC_START
,
205 (uintptr_t) (vaddr
>>32), (uintptr_t) vaddr
, 0, 0);
207 if ((size
& (pmap_nesting_size_min
-1)) ||
208 (vaddr
& (pmap_nesting_size_min
-1))) {
209 panic("pmap_unnest(%p,0x%llx,0x%llx): unaligned...\n",
213 /* align everything to PDE boundaries */
214 va_start
= vaddr
& ~(NBPDE
-1);
215 va_end
= (vaddr
+ size
+ NBPDE
- 1) & ~(NBPDE
-1);
216 size
= va_end
- va_start
;
220 num_pde
= size
>> PDESHIFT
;
223 for (i
= 0; i
< num_pde
; ) {
224 if ((pdptnum(grand
, vaddr
) != npdpt
) && cpu_64bit
) {
225 npdpt
= pdptnum(grand
, vaddr
);
226 pde
= pmap64_pdpt(grand
, vaddr
);
227 if (pde
&& (*pde
& INTEL_PDPTE_NESTED
)) {
228 pmap_store_pte(pde
, (pd_entry_t
)0);
229 i
+= (uint32_t) NPDEPG
;
234 pde
= pmap_pde(grand
, (vm_map_offset_t
)vaddr
);
236 panic("pmap_unnest: no pde, grand %p vaddr 0x%llx\n", grand
, vaddr
);
237 pmap_store_pte(pde
, (pd_entry_t
)0);
242 PMAP_UPDATE_TLBS(grand
, va_start
, va_end
);
246 PMAP_TRACE(PMAP_CODE(PMAP__UNNEST
) | DBG_FUNC_END
, 0, 0, 0, 0, 0);
251 /* Invoked by the Mach VM to determine the platform specific unnest region */
253 boolean_t
pmap_adjust_unnest_parameters(pmap_t p
, vm_map_offset_t
*s
, vm_map_offset_t
*e
) {
255 boolean_t rval
= FALSE
;
262 pdpte
= pmap64_pdpt(p
, *s
);
263 if (pdpte
&& (*pdpte
& INTEL_PDPTE_NESTED
)) {
268 pdpte
= pmap64_pdpt(p
, *e
);
269 if (pdpte
&& (*pdpte
& INTEL_PDPTE_NESTED
)) {
270 *e
= ((*e
+ NBPDPT
) & ~(NBPDPT
-1));
280 * pmap_find_phys returns the (4K) physical page number containing a
281 * given virtual address in a given pmap.
282 * Note that pmap_pte may return a pde if this virtual address is
283 * mapped by a large page and this is taken into account in order
284 * to return the correct page number in this case.
287 pmap_find_phys(pmap_t pmap
, addr64_t va
)
295 mp_disable_preemption();
297 /* This refcount test is a band-aid--several infrastructural changes
298 * are necessary to eliminate invocation of this routine from arbitrary
302 if (!pmap
->ref_count
)
305 pdep
= pmap_pde(pmap
, va
);
307 if ((pdep
!= PD_ENTRY_NULL
) && ((pde
= *pdep
) & INTEL_PTE_VALID
)) {
308 if (pde
& INTEL_PTE_PS
) {
309 ppn
= (ppnum_t
) i386_btop(pte_to_pa(pde
));
310 ppn
+= (ppnum_t
) ptenum(va
);
313 ptp
= pmap_pte(pmap
, va
);
314 if ((PT_ENTRY_NULL
!= ptp
) && (((pte
= *ptp
) & INTEL_PTE_VALID
) != 0)) {
315 ppn
= (ppnum_t
) i386_btop(pte_to_pa(pte
));
320 mp_enable_preemption();
326 * Update cache attributes for all extant managed mappings.
327 * Assumes PV for this page is locked, and that the page
332 pmap_update_cache_attributes_locked(ppnum_t pn
, unsigned attributes
) {
333 pv_rooted_entry_t pv_h
, pv_e
;
334 pv_hashed_entry_t pvh_e
, nexth
;
335 vm_map_offset_t vaddr
;
339 assert(IS_MANAGED_PAGE(pn
));
341 pv_h
= pai_to_pvh(pn
);
342 /* TODO: translate the PHYS_* bits to PTE bits, while they're
343 * currently identical, they may not remain so
344 * Potential optimization (here and in page_protect),
345 * parallel shootdowns, check for redundant
346 * attribute modifications.
350 * Alter attributes on all mappings
352 if (pv_h
->pmap
!= PMAP_NULL
) {
354 pvh_e
= (pv_hashed_entry_t
)pv_e
;
359 ptep
= pmap_pte(pmap
, vaddr
);
362 panic("pmap_update_cache_attributes_locked: Missing PTE, pmap: %p, pn: 0x%x vaddr: 0x%llx kernel_pmap: %p", pmap
, pn
, vaddr
, kernel_pmap
);
364 nexth
= (pv_hashed_entry_t
)queue_next(&pvh_e
->qlink
);
365 pmap_update_pte(ptep
, *ptep
, (*ptep
& ~PHYS_CACHEABILITY_MASK
) | attributes
);
366 PMAP_UPDATE_TLBS(pmap
, vaddr
, vaddr
+ PAGE_SIZE
);
368 } while ((pv_e
= (pv_rooted_entry_t
)nexth
) != pv_h
);
372 void x86_filter_TLB_coherency_interrupts(boolean_t dofilter
) {
373 assert(ml_get_interrupts_enabled() == 0 || get_preemption_level() != 0);
376 CPU_CR3_MARK_INACTIVE();
378 CPU_CR3_MARK_ACTIVE();
379 __asm__
volatile("mfence");
380 if (current_cpu_datap()->cpu_tlb_invalid
)
381 process_pmap_updates();
387 * Insert the given physical page (p) at
388 * the specified virtual address (v) in the
389 * target physical map with the protection requested.
391 * If specified, the page will be wired down, meaning
392 * that the related pte cannot be reclaimed.
394 * NB: This is the only routine which MAY NOT lazy-evaluate
395 * or lose information. That is, this routine must actually
396 * insert this page into the given map NOW.
400 register pmap_t pmap
,
401 vm_map_offset_t vaddr
,
408 pv_rooted_entry_t pv_h
;
410 pv_hashed_entry_t pvh_e
;
411 pv_hashed_entry_t pvh_new
;
414 pmap_paddr_t pa
= (pmap_paddr_t
) i386_ptob(pn
);
415 boolean_t need_tlbflush
= FALSE
;
418 boolean_t old_pa_locked
;
419 /* 2MiB mappings are confined to x86_64 by VM */
420 boolean_t superpage
= flags
& VM_MEM_SUPERPAGE
;
421 vm_object_t delpage_pm_obj
= NULL
;
422 int delpage_pde_index
= 0;
426 assert(pn
!= vm_page_fictitious_addr
);
428 if (pmap
== PMAP_NULL
)
430 if (pn
== vm_page_guard_addr
)
433 PMAP_TRACE(PMAP_CODE(PMAP__ENTER
) | DBG_FUNC_START
,
435 (uint32_t) (vaddr
>> 32), (uint32_t) vaddr
,
438 if ((prot
& VM_PROT_EXECUTE
) || !nx_enabled
|| !pmap
->nx_enabled
)
444 * Must allocate a new pvlist entry while we're unlocked;
445 * zalloc may cause pageout (which will lock the pmap system).
446 * If we determine we need a pvlist entry, we will unlock
447 * and allocate one. Then we will retry, throughing away
448 * the allocated entry later (if we no longer need it).
451 pvh_new
= PV_HASHED_ENTRY_NULL
;
453 pvh_e
= PV_HASHED_ENTRY_NULL
;
458 * Expand pmap to include this pte. Assume that
459 * pmap is always expanded to include enough hardware
460 * pages to map one VM page.
463 while ((pte
= pmap64_pde(pmap
, vaddr
)) == PD_ENTRY_NULL
) {
464 /* need room for another pde entry */
466 pmap_expand_pdpt(pmap
, vaddr
);
470 while ((pte
= pmap_pte(pmap
, vaddr
)) == PT_ENTRY_NULL
) {
472 * Must unlock to expand the pmap
473 * going to grow pde level page(s)
476 pmap_expand(pmap
, vaddr
);
481 if (superpage
&& *pte
&& !(*pte
& INTEL_PTE_PS
)) {
483 * There is still an empty page table mapped that
484 * was used for a previous base page mapping.
485 * Remember the PDE and the PDE index, so that we
486 * can free the page at the end of this function.
488 delpage_pde_index
= (int)pdeidx(pmap
, vaddr
);
489 delpage_pm_obj
= pmap
->pm_obj
;
493 old_pa
= pte_to_pa(*pte
);
494 pai
= pa_index(old_pa
);
495 old_pa_locked
= FALSE
;
498 * if we have a previous managed page, lock the pv entry now. after
499 * we lock it, check to see if someone beat us to the lock and if so
502 if ((0 != old_pa
) && IS_MANAGED_PAGE(pai
)) {
504 old_pa_locked
= TRUE
;
505 old_pa
= pte_to_pa(*pte
);
507 UNLOCK_PVH(pai
); /* another path beat us to it */
508 old_pa_locked
= FALSE
;
513 * Special case if the incoming physical page is already mapped
517 pt_entry_t old_attributes
=
518 *pte
& ~(INTEL_PTE_REF
| INTEL_PTE_MOD
);
521 * May be changing its wired attribute or protection
524 template = pa_to_pte(pa
) | INTEL_PTE_VALID
;
525 template |= pmap_get_cache_attributes(pa_index(pa
));
527 if (VM_MEM_NOT_CACHEABLE
==
528 (flags
& (VM_MEM_NOT_CACHEABLE
| VM_WIMG_USE_DEFAULT
))) {
529 if (!(flags
& VM_MEM_GUARDED
))
530 template |= INTEL_PTE_PTA
;
531 template |= INTEL_PTE_NCACHE
;
533 if (pmap
!= kernel_pmap
)
534 template |= INTEL_PTE_USER
;
535 if (prot
& VM_PROT_WRITE
)
536 template |= INTEL_PTE_WRITE
;
539 template |= INTEL_PTE_NX
;
542 template |= INTEL_PTE_WIRED
;
543 if (!iswired(old_attributes
))
545 &pmap
->stats
.wired_count
);
547 if (iswired(old_attributes
)) {
548 assert(pmap
->stats
.wired_count
>= 1);
550 &pmap
->stats
.wired_count
);
553 if (superpage
) /* this path can not be used */
554 template |= INTEL_PTE_PS
; /* to change the page size! */
555 /* Determine delta, PV locked */
557 ((old_attributes
^ template) != INTEL_PTE_WIRED
);
559 /* store modified PTE and preserve RC bits */
560 pmap_update_pte(pte
, *pte
,
561 template | (*pte
& (INTEL_PTE_REF
| INTEL_PTE_MOD
)));
564 old_pa_locked
= FALSE
;
570 * Outline of code from here:
571 * 1) If va was mapped, update TLBs, remove the mapping
572 * and remove old pvlist entry.
573 * 2) Add pvlist entry for new mapping
574 * 3) Enter new mapping.
576 * If the old physical page is not managed step 1) is skipped
577 * (except for updating the TLBs), and the mapping is
578 * overwritten at step 3). If the new physical page is not
579 * managed, step 2) is skipped.
582 if (old_pa
!= (pmap_paddr_t
) 0) {
585 * Don't do anything to pages outside valid memory here.
586 * Instead convince the code that enters a new mapping
587 * to overwrite the old one.
590 /* invalidate the PTE */
591 pmap_update_pte(pte
, *pte
, (*pte
& ~INTEL_PTE_VALID
));
592 /* propagate invalidate everywhere */
593 PMAP_UPDATE_TLBS(pmap
, vaddr
, vaddr
+ PAGE_SIZE
);
594 /* remember reference and change */
596 oattr
= (char) (old_pte
& (PHYS_MODIFIED
| PHYS_REFERENCED
));
597 /* completely invalidate the PTE */
598 pmap_store_pte(pte
, 0);
600 if (IS_MANAGED_PAGE(pai
)) {
601 pmap_assert(old_pa_locked
== TRUE
);
602 assert(pmap
->stats
.resident_count
>= 1);
604 &pmap
->stats
.resident_count
);
607 assert(pmap
->stats
.wired_count
>= 1);
609 &pmap
->stats
.wired_count
);
611 pmap_phys_attributes
[pai
] |= oattr
;
614 * Remove the mapping from the pvlist for
615 * this physical page.
616 * We'll end up with either a rooted pv or a
619 pvh_e
= pmap_pv_remove(pmap
, vaddr
, (ppnum_t
*) &pai
, &old_pte
);
624 * old_pa is not managed.
625 * Do removal part of accounting.
629 assert(pmap
->stats
.wired_count
>= 1);
631 &pmap
->stats
.wired_count
);
637 * if we had a previously managed paged locked, unlock it now
641 old_pa_locked
= FALSE
;
644 pai
= pa_index(pa
); /* now working with new incoming phys page */
645 if (IS_MANAGED_PAGE(pai
)) {
648 * Step 2) Enter the mapping in the PV list for this
651 pv_h
= pai_to_pvh(pai
);
655 if (pv_h
->pmap
== PMAP_NULL
) {
657 * No mappings yet, use rooted pv
661 queue_init(&pv_h
->qlink
);
664 * Add new pv_hashed_entry after header.
666 if ((PV_HASHED_ENTRY_NULL
== pvh_e
) && pvh_new
) {
668 pvh_new
= PV_HASHED_ENTRY_NULL
;
669 } else if (PV_HASHED_ENTRY_NULL
== pvh_e
) {
670 PV_HASHED_ALLOC(&pvh_e
);
671 if (PV_HASHED_ENTRY_NULL
== pvh_e
) {
673 * the pv list is empty. if we are on
674 * the kernel pmap we'll use one of
675 * the special private kernel pv_e's,
676 * else, we need to unlock
677 * everything, zalloc a pv_e, and
678 * restart bringing in the pv_e with
681 if (kernel_pmap
== pmap
) {
682 PV_HASHED_KERN_ALLOC(&pvh_e
);
686 pmap_pv_throttle(pmap
);
687 pvh_new
= (pv_hashed_entry_t
) zalloc(pv_hashed_list_zone
);
693 if (PV_HASHED_ENTRY_NULL
== pvh_e
)
694 panic("Mapping alias chain exhaustion, possibly induced by numerous kernel virtual double mappings");
699 pv_hash_add(pvh_e
, pv_h
);
702 * Remember that we used the pvlist entry.
704 pvh_e
= PV_HASHED_ENTRY_NULL
;
708 * only count the mapping
709 * for 'managed memory'
711 OSAddAtomic(+1, &pmap
->stats
.resident_count
);
712 if (pmap
->stats
.resident_count
> pmap
->stats
.resident_max
) {
713 pmap
->stats
.resident_max
= pmap
->stats
.resident_count
;
715 } else if (last_managed_page
== 0) {
716 /* Account for early mappings created before "managed pages"
717 * are determined. Consider consulting the available DRAM map.
719 OSAddAtomic(+1, &pmap
->stats
.resident_count
);
722 * Step 3) Enter the mapping.
724 * Build a template to speed up entering -
725 * only the pfn changes.
727 template = pa_to_pte(pa
) | INTEL_PTE_VALID
;
729 * DRK: It may be worth asserting on cache attribute flags that diverge
730 * from the existing physical page attributes.
733 template |= pmap_get_cache_attributes(pa_index(pa
));
735 if (flags
& VM_MEM_NOT_CACHEABLE
) {
736 if (!(flags
& VM_MEM_GUARDED
))
737 template |= INTEL_PTE_PTA
;
738 template |= INTEL_PTE_NCACHE
;
740 if (pmap
!= kernel_pmap
)
741 template |= INTEL_PTE_USER
;
742 if (prot
& VM_PROT_WRITE
)
743 template |= INTEL_PTE_WRITE
;
745 template |= INTEL_PTE_NX
;
747 template |= INTEL_PTE_WIRED
;
748 OSAddAtomic(+1, & pmap
->stats
.wired_count
);
751 template |= INTEL_PTE_PS
;
752 pmap_store_pte(pte
, template);
755 * if this was a managed page we delayed unlocking the pv until here
756 * to prevent pmap_page_protect et al from finding it until the pte
759 if (IS_MANAGED_PAGE(pai
)) {
763 if (need_tlbflush
== TRUE
)
764 PMAP_UPDATE_TLBS(pmap
, vaddr
, vaddr
+ PAGE_SIZE
);
766 if (pvh_e
!= PV_HASHED_ENTRY_NULL
) {
767 PV_HASHED_FREE_LIST(pvh_e
, pvh_e
, 1);
769 if (pvh_new
!= PV_HASHED_ENTRY_NULL
) {
770 PV_HASHED_KERN_FREE_LIST(pvh_new
, pvh_new
, 1);
774 if (delpage_pm_obj
) {
777 vm_object_lock(delpage_pm_obj
);
778 m
= vm_page_lookup(delpage_pm_obj
, delpage_pde_index
);
779 if (m
== VM_PAGE_NULL
)
780 panic("pmap_enter: pte page not in object");
781 vm_object_unlock(delpage_pm_obj
);
783 OSAddAtomic(-1, &inuse_ptepages_count
);
784 PMAP_ZINFO_PFREE(PAGE_SIZE
);
787 PMAP_TRACE(PMAP_CODE(PMAP__ENTER
) | DBG_FUNC_END
, 0, 0, 0, 0, 0);
791 * Remove a range of hardware page-table entries.
792 * The entries given are the first (inclusive)
793 * and last (exclusive) entries for the VM pages.
794 * The virtual address is the va for the first pte.
796 * The pmap must be locked.
797 * If the pmap is not the kernel pmap, the range must lie
798 * entirely within one pte-page. This is NOT checked.
799 * Assumes that the pte-page exists.
805 vm_map_offset_t start_vaddr
,
810 pv_hashed_entry_t pvh_et
= PV_HASHED_ENTRY_NULL
;
811 pv_hashed_entry_t pvh_eh
= PV_HASHED_ENTRY_NULL
;
812 pv_hashed_entry_t pvh_e
;
814 int num_removed
, num_unwired
, num_found
, num_invalid
;
817 vm_map_offset_t vaddr
;
823 #if defined(__i386__)
824 if (pmap
!= kernel_pmap
&&
825 pmap
->pm_task_map
== TASK_MAP_32BIT
&&
826 start_vaddr
>= HIGH_MEM_BASE
) {
828 * The range is in the "high_shared_pde" which is shared
829 * between the kernel and all 32-bit tasks. It holds
830 * the 32-bit commpage but also the trampolines, GDT, etc...
831 * so we can't let user tasks remove anything from it.
836 /* invalidate the PTEs first to "freeze" them */
837 for (cpte
= spte
, vaddr
= start_vaddr
;
839 cpte
++, vaddr
+= PAGE_SIZE_64
) {
840 pt_entry_t p
= *cpte
;
852 if (!IS_MANAGED_PAGE(pai
)) {
854 * Outside range of managed physical memory.
855 * Just remove the mappings.
857 pmap_store_pte(cpte
, 0);
861 if ((p
& INTEL_PTE_VALID
) == 0)
864 /* invalidate the PTE */
865 pmap_update_pte(cpte
, *cpte
, (*cpte
& ~INTEL_PTE_VALID
));
868 if (num_found
== 0) {
869 /* nothing was changed: we're done */
873 /* propagate the invalidates to other CPUs */
875 PMAP_UPDATE_TLBS(pmap
, start_vaddr
, vaddr
);
877 for (cpte
= spte
, vaddr
= start_vaddr
;
879 cpte
++, vaddr
+= PAGE_SIZE_64
) {
881 pa
= pte_to_pa(*cpte
);
889 pa
= pte_to_pa(*cpte
);
897 * Get the modify and reference bits, then
898 * nuke the entry in the page table
900 /* remember reference and change */
901 pmap_phys_attributes
[pai
] |=
902 (char) (*cpte
& (PHYS_MODIFIED
| PHYS_REFERENCED
));
905 * Remove the mapping from the pvlist for this physical page.
907 pvh_e
= pmap_pv_remove(pmap
, vaddr
, (ppnum_t
*) &pai
, cpte
);
909 /* completely invalidate the PTE */
910 pmap_store_pte(cpte
, 0);
914 if (pvh_e
!= PV_HASHED_ENTRY_NULL
) {
915 pvh_e
->qlink
.next
= (queue_entry_t
) pvh_eh
;
918 if (pvh_et
== PV_HASHED_ENTRY_NULL
) {
925 if (pvh_eh
!= PV_HASHED_ENTRY_NULL
) {
926 PV_HASHED_FREE_LIST(pvh_eh
, pvh_et
, pvh_cnt
);
933 if (pmap
->stats
.resident_count
< num_removed
)
934 panic("pmap_remove_range: resident_count");
936 assert(pmap
->stats
.resident_count
>= num_removed
);
937 OSAddAtomic(-num_removed
, &pmap
->stats
.resident_count
);
940 if (pmap
->stats
.wired_count
< num_unwired
)
941 panic("pmap_remove_range: wired_count");
943 assert(pmap
->stats
.wired_count
>= num_unwired
);
944 OSAddAtomic(-num_unwired
, &pmap
->stats
.wired_count
);
951 * Remove the given range of addresses
952 * from the specified map.
954 * It is assumed that the start and end are properly
955 * rounded to the hardware page size.
964 pt_entry_t
*spte
, *epte
;
970 if (map
== PMAP_NULL
|| s64
== e64
)
973 PMAP_TRACE(PMAP_CODE(PMAP__REMOVE
) | DBG_FUNC_START
,
975 (uint32_t) (s64
>> 32), s64
,
976 (uint32_t) (e64
>> 32), e64
);
983 * Check that address range in the kernel does not overlap the stacks.
984 * We initialize local static min/max variables once to avoid making
985 * 2 function calls for every remove. Note also that these functions
986 * both return 0 before kernel stacks have been initialized, and hence
987 * the panic is not triggered in this case.
989 if (map
== kernel_pmap
) {
990 static vm_offset_t kernel_stack_min
= 0;
991 static vm_offset_t kernel_stack_max
= 0;
993 if (kernel_stack_min
== 0) {
994 kernel_stack_min
= min_valid_stack_address();
995 kernel_stack_max
= max_valid_stack_address();
997 if ((kernel_stack_min
<= s64
&& s64
< kernel_stack_max
) ||
998 (kernel_stack_min
< e64
&& e64
<= kernel_stack_max
))
999 panic("pmap_remove() attempted in kernel stack");
1004 * The values of kernel_stack_min and kernel_stack_max are no longer
1005 * relevant now that we allocate kernel stacks in the kernel map,
1006 * so the old code above no longer applies. If we wanted to check that
1007 * we weren't removing a mapping of a page in a kernel stack we'd
1008 * mark the PTE with an unused bit and check that here.
1013 deadline
= rdtsc64() + max_preemption_latency_tsc
;
1016 l64
= (s64
+ pde_mapped_size
) & ~(pde_mapped_size
- 1);
1019 pde
= pmap_pde(map
, s64
);
1021 if (pde
&& (*pde
& INTEL_PTE_VALID
)) {
1022 if (*pde
& INTEL_PTE_PS
) {
1024 * If we're removing a superpage, pmap_remove_range()
1025 * must work on level 2 instead of level 1; and we're
1026 * only passing a single level 2 entry instead of a
1030 epte
= spte
+1; /* excluded */
1032 spte
= pmap_pte(map
, (s64
& ~(pde_mapped_size
- 1)));
1033 spte
= &spte
[ptenum(s64
)];
1034 epte
= &spte
[intel_btop(l64
- s64
)];
1036 pmap_remove_range(map
, s64
, spte
, epte
);
1040 if (s64
< e64
&& rdtsc64() >= deadline
) {
1043 deadline
= rdtsc64() + max_preemption_latency_tsc
;
1049 PMAP_TRACE(PMAP_CODE(PMAP__REMOVE
) | DBG_FUNC_END
,
1055 * Routine: pmap_page_protect
1058 * Lower the permission for all mappings to a given
1066 pv_hashed_entry_t pvh_eh
= PV_HASHED_ENTRY_NULL
;
1067 pv_hashed_entry_t pvh_et
= PV_HASHED_ENTRY_NULL
;
1068 pv_hashed_entry_t nexth
;
1070 pv_rooted_entry_t pv_h
;
1071 pv_rooted_entry_t pv_e
;
1072 pv_hashed_entry_t pvh_e
;
1079 assert(pn
!= vm_page_fictitious_addr
);
1080 if (pn
== vm_page_guard_addr
)
1083 pai
= ppn_to_pai(pn
);
1085 if (!IS_MANAGED_PAGE(pai
)) {
1087 * Not a managed page.
1091 PMAP_TRACE(PMAP_CODE(PMAP__PAGE_PROTECT
) | DBG_FUNC_START
,
1095 * Determine the new protection.
1099 case VM_PROT_READ
| VM_PROT_EXECUTE
:
1103 return; /* nothing to do */
1109 pv_h
= pai_to_pvh(pai
);
1115 * Walk down PV list, if any, changing or removing all mappings.
1117 if (pv_h
->pmap
== PMAP_NULL
)
1121 pvh_e
= (pv_hashed_entry_t
) pv_e
; /* cheat */
1124 vm_map_offset_t vaddr
;
1128 pte
= pmap_pte(pmap
, vaddr
);
1130 pmap_assert2((pa_index(pte_to_pa(*pte
)) == pn
),
1131 "pmap_page_protect: PTE mismatch, pn: 0x%x, pmap: %p, vaddr: 0x%llx, pte: 0x%llx", pn
, pmap
, vaddr
, *pte
);
1134 panic("pmap_page_protect() "
1135 "pmap=%p pn=0x%x vaddr=0x%llx\n",
1138 nexth
= (pv_hashed_entry_t
) queue_next(&pvh_e
->qlink
);
1141 * Remove the mapping if new protection is NONE
1145 * Remove the mapping, collecting dirty bits.
1147 pmap_update_pte(pte
, *pte
, *pte
& ~INTEL_PTE_VALID
);
1149 /* Remove per-pmap wired count */
1150 if (iswired(*pte
)) {
1151 OSAddAtomic(-1, &pmap
->stats
.wired_count
);
1154 PMAP_UPDATE_TLBS(pmap
, vaddr
, vaddr
+PAGE_SIZE
);
1155 pmap_phys_attributes
[pai
] |=
1156 *pte
& (PHYS_MODIFIED
|PHYS_REFERENCED
);
1157 pmap_store_pte(pte
, 0);
1160 if (pmap
->stats
.resident_count
< 1)
1161 panic("pmap_page_protect: resident_count");
1163 assert(pmap
->stats
.resident_count
>= 1);
1164 OSAddAtomic(-1, &pmap
->stats
.resident_count
);
1167 * Deal with the pv_rooted_entry.
1172 * Fix up head later.
1174 pv_h
->pmap
= PMAP_NULL
;
1177 * Delete this entry.
1179 pv_hash_remove(pvh_e
);
1180 pvh_e
->qlink
.next
= (queue_entry_t
) pvh_eh
;
1183 if (pvh_et
== PV_HASHED_ENTRY_NULL
)
1189 * Write-protect, after opportunistic refmod collect
1191 pmap_phys_attributes
[pai
] |=
1192 *pte
& (PHYS_MODIFIED
|PHYS_REFERENCED
);
1194 pmap_update_pte(pte
, *pte
, *pte
& ~INTEL_PTE_WRITE
);
1195 PMAP_UPDATE_TLBS(pmap
, vaddr
, vaddr
+PAGE_SIZE
);
1198 } while ((pv_e
= (pv_rooted_entry_t
) nexth
) != pv_h
);
1202 * If pv_head mapping was removed, fix it up.
1204 if (pv_h
->pmap
== PMAP_NULL
) {
1205 pvh_e
= (pv_hashed_entry_t
) queue_next(&pv_h
->qlink
);
1207 if (pvh_e
!= (pv_hashed_entry_t
) pv_h
) {
1208 pv_hash_remove(pvh_e
);
1209 pv_h
->pmap
= pvh_e
->pmap
;
1210 pv_h
->va
= pvh_e
->va
;
1211 pvh_e
->qlink
.next
= (queue_entry_t
) pvh_eh
;
1214 if (pvh_et
== PV_HASHED_ENTRY_NULL
)
1219 if (pvh_eh
!= PV_HASHED_ENTRY_NULL
) {
1220 PV_HASHED_FREE_LIST(pvh_eh
, pvh_et
, pvh_cnt
);
1225 PMAP_TRACE(PMAP_CODE(PMAP__PAGE_PROTECT
) | DBG_FUNC_END
,
1230 * Clear specified attribute bits.
1233 phys_attribute_clear(
1237 pv_rooted_entry_t pv_h
;
1238 pv_hashed_entry_t pv_e
;
1242 char attributes
= 0;
1245 assert(pn
!= vm_page_fictitious_addr
);
1246 if (pn
== vm_page_guard_addr
)
1249 pai
= ppn_to_pai(pn
);
1251 if (!IS_MANAGED_PAGE(pai
)) {
1253 * Not a managed page.
1258 PMAP_TRACE(PMAP_CODE(PMAP__ATTRIBUTE_CLEAR
) | DBG_FUNC_START
,
1261 pv_h
= pai_to_pvh(pai
);
1266 * Walk down PV list, clearing all modify or reference bits.
1267 * We do not have to lock the pv_list because we have
1268 * the entire pmap system locked.
1270 if (pv_h
->pmap
!= PMAP_NULL
) {
1272 * There are some mappings.
1275 pv_e
= (pv_hashed_entry_t
)pv_h
;
1284 * Clear modify and/or reference bits.
1286 pte
= pmap_pte(pmap
, va
);
1287 attributes
|= *pte
& (PHYS_MODIFIED
|PHYS_REFERENCED
);
1289 pmap_update_pte(pte
, *pte
, (*pte
& ~bits
));
1290 /* Ensure all processors using this translation
1291 * invalidate this TLB entry. The invalidation *must*
1292 * follow the PTE update, to ensure that the TLB
1293 * shadow of the 'D' bit (in particular) is
1294 * synchronized with the updated PTE.
1296 PMAP_UPDATE_TLBS(pmap
, va
, va
+ PAGE_SIZE
);
1298 pv_e
= (pv_hashed_entry_t
)queue_next(&pv_e
->qlink
);
1300 } while (pv_e
!= (pv_hashed_entry_t
)pv_h
);
1302 /* Opportunistic refmod collection, annulled
1303 * if both REF and MOD are being cleared.
1306 pmap_phys_attributes
[pai
] |= attributes
;
1307 pmap_phys_attributes
[pai
] &= (~bits
);
1311 PMAP_TRACE(PMAP_CODE(PMAP__ATTRIBUTE_CLEAR
) | DBG_FUNC_END
,
1316 * Check specified attribute bits.
1319 phys_attribute_test(
1323 pv_rooted_entry_t pv_h
;
1324 pv_hashed_entry_t pv_e
;
1331 assert(pn
!= vm_page_fictitious_addr
);
1332 if (pn
== vm_page_guard_addr
)
1335 pai
= ppn_to_pai(pn
);
1337 if (!IS_MANAGED_PAGE(pai
)) {
1339 * Not a managed page.
1345 * Fast check... if bits already collected
1346 * no need to take any locks...
1347 * if not set, we need to recheck after taking
1348 * the lock in case they got pulled in while
1349 * we were waiting for the lock
1351 if ((pmap_phys_attributes
[pai
] & bits
) == bits
)
1354 pv_h
= pai_to_pvh(pai
);
1358 attributes
= pmap_phys_attributes
[pai
] & bits
;
1362 * Walk down PV list, checking the mappings until we
1363 * reach the end or we've found the desired attributes.
1365 if (attributes
!= bits
&&
1366 pv_h
->pmap
!= PMAP_NULL
) {
1368 * There are some mappings.
1370 pv_e
= (pv_hashed_entry_t
)pv_h
;
1377 * pick up modify and/or reference bits from mapping
1380 pte
= pmap_pte(pmap
, va
);
1381 attributes
|= (int)(*pte
& bits
);
1383 pv_e
= (pv_hashed_entry_t
)queue_next(&pv_e
->qlink
);
1385 } while ((attributes
!= bits
) &&
1386 (pv_e
!= (pv_hashed_entry_t
)pv_h
));
1388 pmap_phys_attributes
[pai
] |= attributes
;
1391 return (attributes
);
1395 * Routine: pmap_change_wiring
1396 * Function: Change the wiring attribute for a map/virtual-address
1398 * In/out conditions:
1399 * The mapping must already exist in the pmap.
1404 vm_map_offset_t vaddr
,
1411 if ((pte
= pmap_pte(map
, vaddr
)) == PT_ENTRY_NULL
)
1412 panic("pmap_change_wiring: pte missing");
1414 if (wired
&& !iswired(*pte
)) {
1416 * wiring down mapping
1418 OSAddAtomic(+1, &map
->stats
.wired_count
);
1419 pmap_update_pte(pte
, *pte
, (*pte
| INTEL_PTE_WIRED
));
1421 else if (!wired
&& iswired(*pte
)) {
1425 assert(map
->stats
.wired_count
>= 1);
1426 OSAddAtomic(-1, &map
->stats
.wired_count
);
1427 pmap_update_pte(pte
, *pte
, (*pte
& ~INTEL_PTE_WIRED
));