2 * Copyright (c) 2000-2009 Apple Inc. All rights reserved.
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
29 #include <vm/vm_map.h>
30 #include <i386/pmap_internal.h>
33 void pmap_remove_range(
39 pv_rooted_entry_t pv_head_table
; /* array of entries, one per
41 thread_call_t mapping_adjust_call
;
42 static thread_call_data_t mapping_adjust_call_data
;
43 uint32_t mappingrecurse
= 0;
45 pmap_pagetable_corruption_record_t pmap_pagetable_corruption_records
[PMAP_PAGETABLE_CORRUPTION_MAX_LOG
];
46 uint32_t pmap_pagetable_corruption_incidents
;
47 uint64_t pmap_pagetable_corruption_last_abstime
= (~(0ULL) >> 1);
48 uint64_t pmap_pagetable_corruption_interval_abstime
;
49 thread_call_t pmap_pagetable_corruption_log_call
;
50 static thread_call_data_t pmap_pagetable_corruption_log_call_data
;
51 boolean_t pmap_pagetable_corruption_timeout
= FALSE
;
54 * The Intel platform can nest at the PDE level, so NBPDE (i.e. 2MB) at a time,
55 * on a NBPDE boundary.
58 /* These symbols may be referenced directly by VM */
59 uint64_t pmap_nesting_size_min
= NBPDE
;
60 uint64_t pmap_nesting_size_max
= 0 - (uint64_t)NBPDE
;
63 * kern_return_t pmap_nest(grand, subord, va_start, size)
65 * grand = the pmap that we will nest subord into
66 * subord = the pmap that goes into the grand
67 * va_start = start of range in pmap to be inserted
68 * nstart = start of range in pmap nested pmap
69 * size = Size of nest area (up to 16TB)
71 * Inserts a pmap into another. This is used to implement shared segments.
73 * Note that we depend upon higher level VM locks to insure that things don't change while
74 * we are doing this. For example, VM should not be doing any pmap enters while it is nesting
75 * or do 2 nests at once.
79 * This routine can nest subtrees either at the PDPT level (1GiB) or at the
80 * PDE level (2MiB). We currently disallow disparate offsets for the "subord"
81 * container and the "grand" parent. A minor optimization to consider for the
82 * future: make the "subord" truly a container rather than a full-fledged
83 * pagetable hierarchy which can be unnecessarily sparse (DRK).
86 kern_return_t
pmap_nest(pmap_t grand
, pmap_t subord
, addr64_t va_start
, addr64_t nstart
, uint64_t size
) {
87 vm_map_offset_t vaddr
, nvaddr
;
88 pd_entry_t
*pde
,*npde
;
92 if ((size
& (pmap_nesting_size_min
-1)) ||
93 (va_start
& (pmap_nesting_size_min
-1)) ||
94 (nstart
& (pmap_nesting_size_min
-1)) ||
95 ((size
>> 28) > 65536)) /* Max size we can nest is 16TB */
96 return KERN_INVALID_VALUE
;
99 panic("pmap_nest: size is invalid - %016llX\n", size
);
102 if (va_start
!= nstart
)
103 panic("pmap_nest: va_start(0x%llx) != nstart(0x%llx)\n", va_start
, nstart
);
105 PMAP_TRACE(PMAP_CODE(PMAP__NEST
) | DBG_FUNC_START
,
106 (int) grand
, (int) subord
,
107 (int) (va_start
>>32), (int) va_start
, 0);
109 nvaddr
= (vm_map_offset_t
)nstart
;
110 num_pde
= size
>> PDESHIFT
;
114 subord
->pm_shared
= TRUE
;
116 for (i
= 0; i
< num_pde
;) {
117 if (((nvaddr
& PDPTMASK
) == 0) && (num_pde
- i
) >= NPDEPG
&& cpu_64bit
) {
119 npde
= pmap64_pdpt(subord
, nvaddr
);
121 while (0 == npde
|| ((*npde
& INTEL_PTE_VALID
) == 0)) {
123 pmap_expand_pdpt(subord
, nvaddr
);
125 npde
= pmap64_pdpt(subord
, nvaddr
);
127 *npde
|= INTEL_PDPTE_NESTED
;
129 i
+= (uint32_t)NPDEPG
;
132 npde
= pmap_pde(subord
, nvaddr
);
134 while (0 == npde
|| ((*npde
& INTEL_PTE_VALID
) == 0)) {
136 pmap_expand(subord
, nvaddr
);
138 npde
= pmap_pde(subord
, nvaddr
);
147 vaddr
= (vm_map_offset_t
)va_start
;
151 for (i
= 0;i
< num_pde
;) {
154 if (((vaddr
& PDPTMASK
) == 0) && ((num_pde
- i
) >= NPDEPG
) && cpu_64bit
) {
155 npde
= pmap64_pdpt(subord
, vaddr
);
157 panic("pmap_nest: no PDPT, subord %p nstart 0x%llx", subord
, vaddr
);
159 pde
= pmap64_pdpt(grand
, vaddr
);
162 pmap_expand_pml4(grand
, vaddr
);
164 pde
= pmap64_pdpt(grand
, vaddr
);
167 panic("pmap_nest: no PDPT, grand %p vaddr 0x%llx", grand
, vaddr
);
168 pmap_store_pte(pde
, tpde
);
170 i
+= (uint32_t) NPDEPG
;
173 npde
= pmap_pde(subord
, nstart
);
175 panic("pmap_nest: no npde, subord %p nstart 0x%llx", subord
, nstart
);
178 pde
= pmap_pde(grand
, vaddr
);
179 if ((0 == pde
) && cpu_64bit
) {
181 pmap_expand_pdpt(grand
, vaddr
);
183 pde
= pmap_pde(grand
, vaddr
);
187 panic("pmap_nest: no pde, grand %p vaddr 0x%llx", grand
, vaddr
);
189 pmap_store_pte(pde
, tpde
);
196 PMAP_TRACE(PMAP_CODE(PMAP__NEST
) | DBG_FUNC_END
, 0, 0, 0, 0, 0);
202 * kern_return_t pmap_unnest(grand, vaddr)
204 * grand = the pmap that we will un-nest subord from
205 * vaddr = start of range in pmap to be unnested
207 * Removes a pmap from another. This is used to implement shared segments.
210 kern_return_t
pmap_unnest(pmap_t grand
, addr64_t vaddr
, uint64_t size
) {
215 addr64_t va_start
, va_end
;
216 uint64_t npdpt
= PMAP_INVALID_PDPTNUM
;
218 PMAP_TRACE(PMAP_CODE(PMAP__UNNEST
) | DBG_FUNC_START
,
220 (int) (vaddr
>>32), (int) vaddr
, 0, 0);
222 if ((size
& (pmap_nesting_size_min
-1)) ||
223 (vaddr
& (pmap_nesting_size_min
-1))) {
224 panic("pmap_unnest(%p,0x%llx,0x%llx): unaligned...\n",
228 /* align everything to PDE boundaries */
229 va_start
= vaddr
& ~(NBPDE
-1);
230 va_end
= (vaddr
+ size
+ NBPDE
- 1) & ~(NBPDE
-1);
231 size
= va_end
- va_start
;
235 num_pde
= size
>> PDESHIFT
;
238 for (i
= 0; i
< num_pde
; ) {
239 if ((pdptnum(grand
, vaddr
) != npdpt
) && cpu_64bit
) {
240 npdpt
= pdptnum(grand
, vaddr
);
241 pde
= pmap64_pdpt(grand
, vaddr
);
242 if (pde
&& (*pde
& INTEL_PDPTE_NESTED
)) {
243 pmap_store_pte(pde
, (pd_entry_t
)0);
244 i
+= (uint32_t) NPDEPG
;
249 pde
= pmap_pde(grand
, (vm_map_offset_t
)vaddr
);
251 panic("pmap_unnest: no pde, grand %p vaddr 0x%llx\n", grand
, vaddr
);
252 pmap_store_pte(pde
, (pd_entry_t
)0);
257 PMAP_UPDATE_TLBS(grand
, va_start
, va_end
);
261 PMAP_TRACE(PMAP_CODE(PMAP__UNNEST
) | DBG_FUNC_END
, 0, 0, 0, 0, 0);
266 /* Invoked by the Mach VM to determine the platform specific unnest region */
268 boolean_t
pmap_adjust_unnest_parameters(pmap_t p
, vm_map_offset_t
*s
, vm_map_offset_t
*e
) {
270 boolean_t rval
= FALSE
;
277 pdpte
= pmap64_pdpt(p
, *s
);
278 if (pdpte
&& (*pdpte
& INTEL_PDPTE_NESTED
)) {
283 pdpte
= pmap64_pdpt(p
, *e
);
284 if (pdpte
&& (*pdpte
& INTEL_PDPTE_NESTED
)) {
285 *e
= ((*e
+ NBPDPT
) & ~(NBPDPT
-1));
295 * pmap_find_phys returns the (4K) physical page number containing a
296 * given virtual address in a given pmap.
297 * Note that pmap_pte may return a pde if this virtual address is
298 * mapped by a large page and this is taken into account in order
299 * to return the correct page number in this case.
302 pmap_find_phys(pmap_t pmap
, addr64_t va
)
310 mp_disable_preemption();
312 /* This refcount test is a band-aid--several infrastructural changes
313 * are necessary to eliminate invocation of this routine from arbitrary
317 if (!pmap
->ref_count
)
320 pdep
= pmap_pde(pmap
, va
);
322 if ((pdep
!= PD_ENTRY_NULL
) && ((pde
= *pdep
) & INTEL_PTE_VALID
)) {
323 if (pde
& INTEL_PTE_PS
) {
324 ppn
= (ppnum_t
) i386_btop(pte_to_pa(pde
));
325 ppn
+= (ppnum_t
) ptenum(va
);
328 ptp
= pmap_pte(pmap
, va
);
329 if ((PT_ENTRY_NULL
!= ptp
) && (((pte
= *ptp
) & INTEL_PTE_VALID
) != 0)) {
330 ppn
= (ppnum_t
) i386_btop(pte_to_pa(pte
));
335 mp_enable_preemption();
341 * Insert the given physical page (p) at
342 * the specified virtual address (v) in the
343 * target physical map with the protection requested.
345 * If specified, the page will be wired down, meaning
346 * that the related pte cannot be reclaimed.
348 * NB: This is the only routine which MAY NOT lazy-evaluate
349 * or lose information. That is, this routine must actually
350 * insert this page into the given map NOW.
354 register pmap_t pmap
,
355 vm_map_offset_t vaddr
,
362 pv_rooted_entry_t pv_h
;
364 pv_hashed_entry_t pvh_e
;
365 pv_hashed_entry_t pvh_new
;
368 pmap_paddr_t pa
= (pmap_paddr_t
) i386_ptob(pn
);
369 boolean_t need_tlbflush
= FALSE
;
372 boolean_t old_pa_locked
;
373 /* 2MiB mappings are confined to x86_64 by VM */
374 boolean_t superpage
= flags
& VM_MEM_SUPERPAGE
;
375 vm_object_t delpage_pm_obj
= NULL
;
376 int delpage_pde_index
= 0;
380 assert(pn
!= vm_page_fictitious_addr
);
382 if (pmap
== PMAP_NULL
)
384 if (pn
== vm_page_guard_addr
)
387 PMAP_TRACE(PMAP_CODE(PMAP__ENTER
) | DBG_FUNC_START
,
389 (uint32_t) (vaddr
>> 32), (uint32_t) vaddr
,
392 if ((prot
& VM_PROT_EXECUTE
) || !nx_enabled
|| !pmap
->nx_enabled
)
398 * Must allocate a new pvlist entry while we're unlocked;
399 * zalloc may cause pageout (which will lock the pmap system).
400 * If we determine we need a pvlist entry, we will unlock
401 * and allocate one. Then we will retry, throughing away
402 * the allocated entry later (if we no longer need it).
405 pvh_new
= PV_HASHED_ENTRY_NULL
;
407 pvh_e
= PV_HASHED_ENTRY_NULL
;
412 * Expand pmap to include this pte. Assume that
413 * pmap is always expanded to include enough hardware
414 * pages to map one VM page.
417 while ((pte
= pmap64_pde(pmap
, vaddr
)) == PD_ENTRY_NULL
) {
418 /* need room for another pde entry */
420 pmap_expand_pdpt(pmap
, vaddr
);
424 while ((pte
= pmap_pte(pmap
, vaddr
)) == PT_ENTRY_NULL
) {
426 * Must unlock to expand the pmap
427 * going to grow pde level page(s)
430 pmap_expand(pmap
, vaddr
);
435 if (superpage
&& *pte
&& !(*pte
& INTEL_PTE_PS
)) {
437 * There is still an empty page table mapped that
438 * was used for a previous base page mapping.
439 * Remember the PDE and the PDE index, so that we
440 * can free the page at the end of this function.
442 delpage_pde_index
= (int)pdeidx(pmap
, vaddr
);
443 delpage_pm_obj
= pmap
->pm_obj
;
448 old_pa
= pte_to_pa(*pte
);
449 pai
= pa_index(old_pa
);
450 old_pa_locked
= FALSE
;
453 * if we have a previous managed page, lock the pv entry now. after
454 * we lock it, check to see if someone beat us to the lock and if so
457 if ((0 != old_pa
) && IS_MANAGED_PAGE(pai
)) {
459 old_pa_locked
= TRUE
;
460 old_pa
= pte_to_pa(*pte
);
462 UNLOCK_PVH(pai
); /* another path beat us to it */
463 old_pa_locked
= FALSE
;
468 * Special case if the incoming physical page is already mapped
474 * May be changing its wired attribute or protection
477 template = pa_to_pte(pa
) | INTEL_PTE_VALID
;
479 if (VM_MEM_NOT_CACHEABLE
==
480 (flags
& (VM_MEM_NOT_CACHEABLE
| VM_WIMG_USE_DEFAULT
))) {
481 if (!(flags
& VM_MEM_GUARDED
))
482 template |= INTEL_PTE_PTA
;
483 template |= INTEL_PTE_NCACHE
;
485 if (pmap
!= kernel_pmap
)
486 template |= INTEL_PTE_USER
;
487 if (prot
& VM_PROT_WRITE
)
488 template |= INTEL_PTE_WRITE
;
491 template |= INTEL_PTE_NX
;
494 template |= INTEL_PTE_WIRED
;
497 &pmap
->stats
.wired_count
);
500 assert(pmap
->stats
.wired_count
>= 1);
502 &pmap
->stats
.wired_count
);
505 if (superpage
) /* this path can not be used */
506 template |= INTEL_PTE_PS
; /* to change the page size! */
508 /* store modified PTE and preserve RC bits */
509 pmap_update_pte(pte
, *pte
,
510 template | (*pte
& (INTEL_PTE_REF
| INTEL_PTE_MOD
)));
513 old_pa_locked
= FALSE
;
515 need_tlbflush
= TRUE
;
520 * Outline of code from here:
521 * 1) If va was mapped, update TLBs, remove the mapping
522 * and remove old pvlist entry.
523 * 2) Add pvlist entry for new mapping
524 * 3) Enter new mapping.
526 * If the old physical page is not managed step 1) is skipped
527 * (except for updating the TLBs), and the mapping is
528 * overwritten at step 3). If the new physical page is not
529 * managed, step 2) is skipped.
532 if (old_pa
!= (pmap_paddr_t
) 0) {
535 * Don't do anything to pages outside valid memory here.
536 * Instead convince the code that enters a new mapping
537 * to overwrite the old one.
540 /* invalidate the PTE */
541 pmap_update_pte(pte
, *pte
, (*pte
& ~INTEL_PTE_VALID
));
542 /* propagate invalidate everywhere */
543 PMAP_UPDATE_TLBS(pmap
, vaddr
, vaddr
+ PAGE_SIZE
);
544 /* remember reference and change */
546 oattr
= (char) (old_pte
& (PHYS_MODIFIED
| PHYS_REFERENCED
));
547 /* completely invalidate the PTE */
548 pmap_store_pte(pte
, 0);
550 if (IS_MANAGED_PAGE(pai
)) {
552 if (pmap
->stats
.resident_count
< 1)
553 panic("pmap_enter: resident_count");
555 assert(pmap
->stats
.resident_count
>= 1);
557 &pmap
->stats
.resident_count
);
561 if (pmap
->stats
.wired_count
< 1)
562 panic("pmap_enter: wired_count");
564 assert(pmap
->stats
.wired_count
>= 1);
566 &pmap
->stats
.wired_count
);
568 pmap_phys_attributes
[pai
] |= oattr
;
571 * Remove the mapping from the pvlist for
572 * this physical page.
573 * We'll end up with either a rooted pv or a
576 pvh_e
= pmap_pv_remove(pmap
, vaddr
, (ppnum_t
*) &pai
, &old_pte
);
581 * old_pa is not managed.
582 * Do removal part of accounting.
586 assert(pmap
->stats
.wired_count
>= 1);
588 &pmap
->stats
.wired_count
);
594 * if we had a previously managed paged locked, unlock it now
598 old_pa_locked
= FALSE
;
601 pai
= pa_index(pa
); /* now working with new incoming phys page */
602 if (IS_MANAGED_PAGE(pai
)) {
605 * Step 2) Enter the mapping in the PV list for this
608 pv_h
= pai_to_pvh(pai
);
612 if (pv_h
->pmap
== PMAP_NULL
) {
614 * No mappings yet, use rooted pv
618 queue_init(&pv_h
->qlink
);
621 * Add new pv_hashed_entry after header.
623 if ((PV_HASHED_ENTRY_NULL
== pvh_e
) && pvh_new
) {
625 pvh_new
= PV_HASHED_ENTRY_NULL
;
626 } else if (PV_HASHED_ENTRY_NULL
== pvh_e
) {
627 PV_HASHED_ALLOC(pvh_e
);
628 if (PV_HASHED_ENTRY_NULL
== pvh_e
) {
630 * the pv list is empty. if we are on
631 * the kernel pmap we'll use one of
632 * the special private kernel pv_e's,
633 * else, we need to unlock
634 * everything, zalloc a pv_e, and
635 * restart bringing in the pv_e with
638 if (kernel_pmap
== pmap
) {
639 PV_HASHED_KERN_ALLOC(pvh_e
);
643 pvh_new
= (pv_hashed_entry_t
) zalloc(pv_hashed_list_zone
);
649 if (PV_HASHED_ENTRY_NULL
== pvh_e
)
650 panic("Mapping alias chain exhaustion, possibly induced by numerous kernel virtual double mappings");
655 pv_hash_add(pvh_e
, pv_h
);
658 * Remember that we used the pvlist entry.
660 pvh_e
= PV_HASHED_ENTRY_NULL
;
664 * only count the mapping
665 * for 'managed memory'
667 OSAddAtomic(+1, & pmap
->stats
.resident_count
);
668 if (pmap
->stats
.resident_count
> pmap
->stats
.resident_max
) {
669 pmap
->stats
.resident_max
= pmap
->stats
.resident_count
;
673 * Step 3) Enter the mapping.
675 * Build a template to speed up entering -
676 * only the pfn changes.
678 template = pa_to_pte(pa
) | INTEL_PTE_VALID
;
680 if (flags
& VM_MEM_NOT_CACHEABLE
) {
681 if (!(flags
& VM_MEM_GUARDED
))
682 template |= INTEL_PTE_PTA
;
683 template |= INTEL_PTE_NCACHE
;
685 if (pmap
!= kernel_pmap
)
686 template |= INTEL_PTE_USER
;
687 if (prot
& VM_PROT_WRITE
)
688 template |= INTEL_PTE_WRITE
;
690 template |= INTEL_PTE_NX
;
692 template |= INTEL_PTE_WIRED
;
693 OSAddAtomic(+1, & pmap
->stats
.wired_count
);
696 template |= INTEL_PTE_PS
;
697 pmap_store_pte(pte
, template);
700 * if this was a managed page we delayed unlocking the pv until here
701 * to prevent pmap_page_protect et al from finding it until the pte
704 if (IS_MANAGED_PAGE(pai
)) {
708 if (need_tlbflush
== TRUE
)
709 PMAP_UPDATE_TLBS(pmap
, vaddr
, vaddr
+ PAGE_SIZE
);
711 if (pvh_e
!= PV_HASHED_ENTRY_NULL
) {
712 PV_HASHED_FREE_LIST(pvh_e
, pvh_e
, 1);
714 if (pvh_new
!= PV_HASHED_ENTRY_NULL
) {
715 PV_HASHED_KERN_FREE_LIST(pvh_new
, pvh_new
, 1);
719 if (delpage_pm_obj
) {
722 vm_object_lock(delpage_pm_obj
);
723 m
= vm_page_lookup(delpage_pm_obj
, delpage_pde_index
);
724 if (m
== VM_PAGE_NULL
)
725 panic("pmap_enter: pte page not in object");
727 OSAddAtomic(-1, &inuse_ptepages_count
);
728 vm_object_unlock(delpage_pm_obj
);
731 PMAP_TRACE(PMAP_CODE(PMAP__ENTER
) | DBG_FUNC_END
, 0, 0, 0, 0, 0);
735 * Remove a range of hardware page-table entries.
736 * The entries given are the first (inclusive)
737 * and last (exclusive) entries for the VM pages.
738 * The virtual address is the va for the first pte.
740 * The pmap must be locked.
741 * If the pmap is not the kernel pmap, the range must lie
742 * entirely within one pte-page. This is NOT checked.
743 * Assumes that the pte-page exists.
749 vm_map_offset_t start_vaddr
,
754 pv_hashed_entry_t pvh_et
= PV_HASHED_ENTRY_NULL
;
755 pv_hashed_entry_t pvh_eh
= PV_HASHED_ENTRY_NULL
;
756 pv_hashed_entry_t pvh_e
;
758 int num_removed
, num_unwired
, num_found
, num_invalid
;
761 vm_map_offset_t vaddr
;
767 #if defined(__i386__)
768 if (pmap
!= kernel_pmap
&&
769 pmap
->pm_task_map
== TASK_MAP_32BIT
&&
770 start_vaddr
>= HIGH_MEM_BASE
) {
772 * The range is in the "high_shared_pde" which is shared
773 * between the kernel and all 32-bit tasks. It holds
774 * the 32-bit commpage but also the trampolines, GDT, etc...
775 * so we can't let user tasks remove anything from it.
780 /* invalidate the PTEs first to "freeze" them */
781 for (cpte
= spte
, vaddr
= start_vaddr
;
783 cpte
++, vaddr
+= PAGE_SIZE_64
) {
784 pt_entry_t p
= *cpte
;
796 if (!IS_MANAGED_PAGE(pai
)) {
798 * Outside range of managed physical memory.
799 * Just remove the mappings.
801 pmap_store_pte(cpte
, 0);
805 if ((p
& INTEL_PTE_VALID
) == 0)
808 /* invalidate the PTE */
809 pmap_update_pte(cpte
, *cpte
, (*cpte
& ~INTEL_PTE_VALID
));
812 if (num_found
== 0) {
813 /* nothing was changed: we're done */
817 /* propagate the invalidates to other CPUs */
819 PMAP_UPDATE_TLBS(pmap
, start_vaddr
, vaddr
);
821 for (cpte
= spte
, vaddr
= start_vaddr
;
823 cpte
++, vaddr
+= PAGE_SIZE_64
) {
825 pa
= pte_to_pa(*cpte
);
833 pa
= pte_to_pa(*cpte
);
841 * Get the modify and reference bits, then
842 * nuke the entry in the page table
844 /* remember reference and change */
845 pmap_phys_attributes
[pai
] |=
846 (char) (*cpte
& (PHYS_MODIFIED
| PHYS_REFERENCED
));
849 * Remove the mapping from the pvlist for this physical page.
851 pvh_e
= pmap_pv_remove(pmap
, vaddr
, (ppnum_t
*) &pai
, cpte
);
853 /* completely invalidate the PTE */
854 pmap_store_pte(cpte
, 0);
858 if (pvh_e
!= PV_HASHED_ENTRY_NULL
) {
859 pvh_e
->qlink
.next
= (queue_entry_t
) pvh_eh
;
862 if (pvh_et
== PV_HASHED_ENTRY_NULL
) {
869 if (pvh_eh
!= PV_HASHED_ENTRY_NULL
) {
870 PV_HASHED_FREE_LIST(pvh_eh
, pvh_et
, pvh_cnt
);
877 if (pmap
->stats
.resident_count
< num_removed
)
878 panic("pmap_remove_range: resident_count");
880 assert(pmap
->stats
.resident_count
>= num_removed
);
881 OSAddAtomic(-num_removed
, &pmap
->stats
.resident_count
);
884 if (pmap
->stats
.wired_count
< num_unwired
)
885 panic("pmap_remove_range: wired_count");
887 assert(pmap
->stats
.wired_count
>= num_unwired
);
888 OSAddAtomic(-num_unwired
, &pmap
->stats
.wired_count
);
895 * Remove the given range of addresses
896 * from the specified map.
898 * It is assumed that the start and end are properly
899 * rounded to the hardware page size.
908 pt_entry_t
*spte
, *epte
;
914 if (map
== PMAP_NULL
|| s64
== e64
)
917 PMAP_TRACE(PMAP_CODE(PMAP__REMOVE
) | DBG_FUNC_START
,
919 (uint32_t) (s64
>> 32), s64
,
920 (uint32_t) (e64
>> 32), e64
);
927 * Check that address range in the kernel does not overlap the stacks.
928 * We initialize local static min/max variables once to avoid making
929 * 2 function calls for every remove. Note also that these functions
930 * both return 0 before kernel stacks have been initialized, and hence
931 * the panic is not triggered in this case.
933 if (map
== kernel_pmap
) {
934 static vm_offset_t kernel_stack_min
= 0;
935 static vm_offset_t kernel_stack_max
= 0;
937 if (kernel_stack_min
== 0) {
938 kernel_stack_min
= min_valid_stack_address();
939 kernel_stack_max
= max_valid_stack_address();
941 if ((kernel_stack_min
<= s64
&& s64
< kernel_stack_max
) ||
942 (kernel_stack_min
< e64
&& e64
<= kernel_stack_max
))
943 panic("pmap_remove() attempted in kernel stack");
948 * The values of kernel_stack_min and kernel_stack_max are no longer
949 * relevant now that we allocate kernel stacks in the kernel map,
950 * so the old code above no longer applies. If we wanted to check that
951 * we weren't removing a mapping of a page in a kernel stack we'd
952 * mark the PTE with an unused bit and check that here.
957 deadline
= rdtsc64() + max_preemption_latency_tsc
;
960 l64
= (s64
+ pde_mapped_size
) & ~(pde_mapped_size
- 1);
963 pde
= pmap_pde(map
, s64
);
965 if (pde
&& (*pde
& INTEL_PTE_VALID
)) {
966 if (*pde
& INTEL_PTE_PS
) {
968 * If we're removing a superpage, pmap_remove_range()
969 * must work on level 2 instead of level 1; and we're
970 * only passing a single level 2 entry instead of a
974 epte
= spte
+1; /* excluded */
976 spte
= pmap_pte(map
, (s64
& ~(pde_mapped_size
- 1)));
977 spte
= &spte
[ptenum(s64
)];
978 epte
= &spte
[intel_btop(l64
- s64
)];
980 pmap_remove_range(map
, s64
, spte
, epte
);
984 if (s64
< e64
&& rdtsc64() >= deadline
) {
987 deadline
= rdtsc64() + max_preemption_latency_tsc
;
993 PMAP_TRACE(PMAP_CODE(PMAP__REMOVE
) | DBG_FUNC_END
,
999 * Routine: pmap_page_protect
1002 * Lower the permission for all mappings to a given
1010 pv_hashed_entry_t pvh_eh
= PV_HASHED_ENTRY_NULL
;
1011 pv_hashed_entry_t pvh_et
= PV_HASHED_ENTRY_NULL
;
1012 pv_hashed_entry_t nexth
;
1014 pv_rooted_entry_t pv_h
;
1015 pv_rooted_entry_t pv_e
;
1016 pv_hashed_entry_t pvh_e
;
1023 assert(pn
!= vm_page_fictitious_addr
);
1024 if (pn
== vm_page_guard_addr
)
1027 pai
= ppn_to_pai(pn
);
1029 if (!IS_MANAGED_PAGE(pai
)) {
1031 * Not a managed page.
1035 PMAP_TRACE(PMAP_CODE(PMAP__PAGE_PROTECT
) | DBG_FUNC_START
,
1039 * Determine the new protection.
1043 case VM_PROT_READ
| VM_PROT_EXECUTE
:
1047 return; /* nothing to do */
1053 pv_h
= pai_to_pvh(pai
);
1059 * Walk down PV list, if any, changing or removing all mappings.
1061 if (pv_h
->pmap
== PMAP_NULL
)
1065 pvh_e
= (pv_hashed_entry_t
) pv_e
; /* cheat */
1068 vm_map_offset_t vaddr
;
1072 pte
= pmap_pte(pmap
, vaddr
);
1075 if (pa_index(pte_to_pa(*pte
)) != pn
)
1076 panic("pmap_page_protect: PTE mismatch, pn: 0x%x, pmap: %p, vaddr: 0x%llx, pte: 0x%llx", pn
, pmap
, vaddr
, *pte
);
1079 panic("pmap_page_protect() "
1080 "pmap=%p pn=0x%x vaddr=0x%llx\n",
1083 nexth
= (pv_hashed_entry_t
) queue_next(&pvh_e
->qlink
);
1086 * Remove the mapping if new protection is NONE
1087 * or if write-protecting a kernel mapping.
1089 if (remove
|| pmap
== kernel_pmap
) {
1091 * Remove the mapping, collecting dirty bits.
1093 pmap_update_pte(pte
, *pte
, *pte
& ~INTEL_PTE_VALID
);
1094 PMAP_UPDATE_TLBS(pmap
, vaddr
, vaddr
+PAGE_SIZE
);
1095 pmap_phys_attributes
[pai
] |=
1096 *pte
& (PHYS_MODIFIED
|PHYS_REFERENCED
);
1097 pmap_store_pte(pte
, 0);
1100 if (pmap
->stats
.resident_count
< 1)
1101 panic("pmap_page_protect: resident_count");
1103 assert(pmap
->stats
.resident_count
>= 1);
1104 OSAddAtomic(-1, &pmap
->stats
.resident_count
);
1107 * Deal with the pv_rooted_entry.
1112 * Fix up head later.
1114 pv_h
->pmap
= PMAP_NULL
;
1117 * Delete this entry.
1119 pv_hash_remove(pvh_e
);
1120 pvh_e
->qlink
.next
= (queue_entry_t
) pvh_eh
;
1123 if (pvh_et
== PV_HASHED_ENTRY_NULL
)
1131 pmap_update_pte(pte
, *pte
, *pte
& ~INTEL_PTE_WRITE
);
1132 PMAP_UPDATE_TLBS(pmap
, vaddr
, vaddr
+PAGE_SIZE
);
1135 } while ((pv_e
= (pv_rooted_entry_t
) nexth
) != pv_h
);
1139 * If pv_head mapping was removed, fix it up.
1141 if (pv_h
->pmap
== PMAP_NULL
) {
1142 pvh_e
= (pv_hashed_entry_t
) queue_next(&pv_h
->qlink
);
1144 if (pvh_e
!= (pv_hashed_entry_t
) pv_h
) {
1145 pv_hash_remove(pvh_e
);
1146 pv_h
->pmap
= pvh_e
->pmap
;
1147 pv_h
->va
= pvh_e
->va
;
1148 pvh_e
->qlink
.next
= (queue_entry_t
) pvh_eh
;
1151 if (pvh_et
== PV_HASHED_ENTRY_NULL
)
1156 if (pvh_eh
!= PV_HASHED_ENTRY_NULL
) {
1157 PV_HASHED_FREE_LIST(pvh_eh
, pvh_et
, pvh_cnt
);
1162 PMAP_TRACE(PMAP_CODE(PMAP__PAGE_PROTECT
) | DBG_FUNC_END
,
1166 __private_extern__
void
1167 pmap_pagetable_corruption_msg_log(int (*log_func
)(const char * fmt
, ...)__printflike(1,2)) {
1168 if (pmap_pagetable_corruption_incidents
> 0) {
1169 int i
, e
= MIN(pmap_pagetable_corruption_incidents
, PMAP_PAGETABLE_CORRUPTION_MAX_LOG
);
1170 (*log_func
)("%u pagetable corruption incident(s) detected, timeout: %u\n", pmap_pagetable_corruption_incidents
, pmap_pagetable_corruption_timeout
);
1171 for (i
= 0; i
< e
; i
++) {
1172 (*log_func
)("Incident 0x%x, reason: 0x%x, action: 0x%x, time: 0x%llx\n", pmap_pagetable_corruption_records
[i
].incident
, pmap_pagetable_corruption_records
[i
].reason
, pmap_pagetable_corruption_records
[i
].action
, pmap_pagetable_corruption_records
[i
].abstime
);
1178 mapping_free_prime(void)
1181 pv_hashed_entry_t pvh_e
;
1182 pv_hashed_entry_t pvh_eh
;
1183 pv_hashed_entry_t pvh_et
;
1187 pvh_eh
= pvh_et
= PV_HASHED_ENTRY_NULL
;
1188 for (i
= 0; i
< (5 * PV_HASHED_ALLOC_CHUNK
); i
++) {
1189 pvh_e
= (pv_hashed_entry_t
) zalloc(pv_hashed_list_zone
);
1191 pvh_e
->qlink
.next
= (queue_entry_t
)pvh_eh
;
1194 if (pvh_et
== PV_HASHED_ENTRY_NULL
)
1198 PV_HASHED_FREE_LIST(pvh_eh
, pvh_et
, pv_cnt
);
1201 pvh_eh
= pvh_et
= PV_HASHED_ENTRY_NULL
;
1202 for (i
= 0; i
< PV_HASHED_KERN_ALLOC_CHUNK
; i
++) {
1203 pvh_e
= (pv_hashed_entry_t
) zalloc(pv_hashed_list_zone
);
1205 pvh_e
->qlink
.next
= (queue_entry_t
)pvh_eh
;
1208 if (pvh_et
== PV_HASHED_ENTRY_NULL
)
1212 PV_HASHED_KERN_FREE_LIST(pvh_eh
, pvh_et
, pv_cnt
);
1217 pmap_pagetable_corruption_log_setup(void) {
1218 if (pmap_pagetable_corruption_log_call
== NULL
) {
1219 nanotime_to_absolutetime(PMAP_PAGETABLE_CORRUPTION_INTERVAL
, 0, &pmap_pagetable_corruption_interval_abstime
);
1220 thread_call_setup(&pmap_pagetable_corruption_log_call_data
,
1221 (thread_call_func_t
) pmap_pagetable_corruption_msg_log
,
1222 (thread_call_param_t
) &printf
);
1223 pmap_pagetable_corruption_log_call
= &pmap_pagetable_corruption_log_call_data
;
1228 mapping_adjust(void)
1230 pv_hashed_entry_t pvh_e
;
1231 pv_hashed_entry_t pvh_eh
;
1232 pv_hashed_entry_t pvh_et
;
1236 if (mapping_adjust_call
== NULL
) {
1237 thread_call_setup(&mapping_adjust_call_data
,
1238 (thread_call_func_t
) mapping_adjust
,
1239 (thread_call_param_t
) NULL
);
1240 mapping_adjust_call
= &mapping_adjust_call_data
;
1243 pmap_pagetable_corruption_log_setup();
1246 pvh_eh
= pvh_et
= PV_HASHED_ENTRY_NULL
;
1247 if (pv_hashed_kern_free_count
< PV_HASHED_KERN_LOW_WATER_MARK
) {
1248 for (i
= 0; i
< PV_HASHED_KERN_ALLOC_CHUNK
; i
++) {
1249 pvh_e
= (pv_hashed_entry_t
) zalloc(pv_hashed_list_zone
);
1251 pvh_e
->qlink
.next
= (queue_entry_t
)pvh_eh
;
1254 if (pvh_et
== PV_HASHED_ENTRY_NULL
)
1258 PV_HASHED_KERN_FREE_LIST(pvh_eh
, pvh_et
, pv_cnt
);
1262 pvh_eh
= pvh_et
= PV_HASHED_ENTRY_NULL
;
1263 if (pv_hashed_free_count
< PV_HASHED_LOW_WATER_MARK
) {
1264 for (i
= 0; i
< PV_HASHED_ALLOC_CHUNK
; i
++) {
1265 pvh_e
= (pv_hashed_entry_t
) zalloc(pv_hashed_list_zone
);
1267 pvh_e
->qlink
.next
= (queue_entry_t
)pvh_eh
;
1270 if (pvh_et
== PV_HASHED_ENTRY_NULL
)
1274 PV_HASHED_FREE_LIST(pvh_eh
, pvh_et
, pv_cnt
);