2 * Copyright (c) 2000-2012 Apple Inc. All rights reserved.
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
29 #include <mach_assert.h>
32 #include <vm/vm_map.h>
33 #include <kern/ledger.h>
34 #include <i386/pmap_internal.h>
36 void pmap_remove_range(
42 void pmap_remove_range_options(
49 void pmap_reusable_range(
56 uint32_t pmap_update_clear_pte_count
;
59 * The Intel platform can nest at the PDE level, so NBPDE (i.e. 2MB) at a time,
60 * on a NBPDE boundary.
63 /* These symbols may be referenced directly by VM */
64 uint64_t pmap_nesting_size_min
= NBPDE
;
65 uint64_t pmap_nesting_size_max
= 0 - (uint64_t)NBPDE
;
68 * kern_return_t pmap_nest(grand, subord, va_start, size)
70 * grand = the pmap that we will nest subord into
71 * subord = the pmap that goes into the grand
72 * va_start = start of range in pmap to be inserted
73 * nstart = start of range in pmap nested pmap
74 * size = Size of nest area (up to 16TB)
76 * Inserts a pmap into another. This is used to implement shared segments.
78 * Note that we depend upon higher level VM locks to insure that things don't change while
79 * we are doing this. For example, VM should not be doing any pmap enters while it is nesting
80 * or do 2 nests at once.
84 * This routine can nest subtrees either at the PDPT level (1GiB) or at the
85 * PDE level (2MiB). We currently disallow disparate offsets for the "subord"
86 * container and the "grand" parent. A minor optimization to consider for the
87 * future: make the "subord" truly a container rather than a full-fledged
88 * pagetable hierarchy which can be unnecessarily sparse (DRK).
91 kern_return_t
pmap_nest(pmap_t grand
, pmap_t subord
, addr64_t va_start
, addr64_t nstart
, uint64_t size
) {
92 vm_map_offset_t vaddr
, nvaddr
;
93 pd_entry_t
*pde
,*npde
;
97 if ((size
& (pmap_nesting_size_min
-1)) ||
98 (va_start
& (pmap_nesting_size_min
-1)) ||
99 (nstart
& (pmap_nesting_size_min
-1)) ||
100 ((size
>> 28) > 65536)) /* Max size we can nest is 16TB */
101 return KERN_INVALID_VALUE
;
104 panic("pmap_nest: size is invalid - %016llX\n", size
);
107 if (va_start
!= nstart
)
108 panic("pmap_nest: va_start(0x%llx) != nstart(0x%llx)\n", va_start
, nstart
);
110 PMAP_TRACE(PMAP_CODE(PMAP__NEST
) | DBG_FUNC_START
,
111 (uintptr_t) grand
, (uintptr_t) subord
,
112 (uintptr_t) (va_start
>>32), (uintptr_t) va_start
, 0);
114 nvaddr
= (vm_map_offset_t
)nstart
;
115 num_pde
= size
>> PDESHIFT
;
119 subord
->pm_shared
= TRUE
;
121 for (i
= 0; i
< num_pde
;) {
122 if (((nvaddr
& PDPTMASK
) == 0) && (num_pde
- i
) >= NPDEPG
&& cpu_64bit
) {
124 npde
= pmap64_pdpt(subord
, nvaddr
);
126 while (0 == npde
|| ((*npde
& INTEL_PTE_VALID
) == 0)) {
128 pmap_expand_pdpt(subord
, nvaddr
, PMAP_EXPAND_OPTIONS_NONE
);
130 npde
= pmap64_pdpt(subord
, nvaddr
);
132 *npde
|= INTEL_PDPTE_NESTED
;
134 i
+= (uint32_t)NPDEPG
;
137 npde
= pmap_pde(subord
, nvaddr
);
139 while (0 == npde
|| ((*npde
& INTEL_PTE_VALID
) == 0)) {
141 pmap_expand(subord
, nvaddr
, PMAP_EXPAND_OPTIONS_NONE
);
143 npde
= pmap_pde(subord
, nvaddr
);
152 vaddr
= (vm_map_offset_t
)va_start
;
156 for (i
= 0;i
< num_pde
;) {
159 if (((vaddr
& PDPTMASK
) == 0) && ((num_pde
- i
) >= NPDEPG
) && cpu_64bit
) {
160 npde
= pmap64_pdpt(subord
, vaddr
);
162 panic("pmap_nest: no PDPT, subord %p nstart 0x%llx", subord
, vaddr
);
164 pde
= pmap64_pdpt(grand
, vaddr
);
167 pmap_expand_pml4(grand
, vaddr
, PMAP_EXPAND_OPTIONS_NONE
);
169 pde
= pmap64_pdpt(grand
, vaddr
);
172 panic("pmap_nest: no PDPT, grand %p vaddr 0x%llx", grand
, vaddr
);
173 pmap_store_pte(pde
, tpde
);
175 i
+= (uint32_t) NPDEPG
;
178 npde
= pmap_pde(subord
, nstart
);
180 panic("pmap_nest: no npde, subord %p nstart 0x%llx", subord
, nstart
);
183 pde
= pmap_pde(grand
, vaddr
);
184 if ((0 == pde
) && cpu_64bit
) {
186 pmap_expand_pdpt(grand
, vaddr
, PMAP_EXPAND_OPTIONS_NONE
);
188 pde
= pmap_pde(grand
, vaddr
);
192 panic("pmap_nest: no pde, grand %p vaddr 0x%llx", grand
, vaddr
);
194 pmap_store_pte(pde
, tpde
);
201 PMAP_TRACE(PMAP_CODE(PMAP__NEST
) | DBG_FUNC_END
, 0, 0, 0, 0, 0);
207 * kern_return_t pmap_unnest(grand, vaddr)
209 * grand = the pmap that we will un-nest subord from
210 * vaddr = start of range in pmap to be unnested
212 * Removes a pmap from another. This is used to implement shared segments.
215 kern_return_t
pmap_unnest(pmap_t grand
, addr64_t vaddr
, uint64_t size
) {
220 addr64_t va_start
, va_end
;
221 uint64_t npdpt
= PMAP_INVALID_PDPTNUM
;
223 PMAP_TRACE(PMAP_CODE(PMAP__UNNEST
) | DBG_FUNC_START
,
225 (uintptr_t) (vaddr
>>32), (uintptr_t) vaddr
, 0, 0);
227 if ((size
& (pmap_nesting_size_min
-1)) ||
228 (vaddr
& (pmap_nesting_size_min
-1))) {
229 panic("pmap_unnest(%p,0x%llx,0x%llx): unaligned...\n",
233 /* align everything to PDE boundaries */
234 va_start
= vaddr
& ~(NBPDE
-1);
235 va_end
= (vaddr
+ size
+ NBPDE
- 1) & ~(NBPDE
-1);
236 size
= va_end
- va_start
;
240 num_pde
= size
>> PDESHIFT
;
243 for (i
= 0; i
< num_pde
; ) {
244 if ((pdptnum(grand
, vaddr
) != npdpt
) && cpu_64bit
) {
245 npdpt
= pdptnum(grand
, vaddr
);
246 pde
= pmap64_pdpt(grand
, vaddr
);
247 if (pde
&& (*pde
& INTEL_PDPTE_NESTED
)) {
248 pmap_store_pte(pde
, (pd_entry_t
)0);
249 i
+= (uint32_t) NPDEPG
;
254 pde
= pmap_pde(grand
, (vm_map_offset_t
)vaddr
);
256 panic("pmap_unnest: no pde, grand %p vaddr 0x%llx\n", grand
, vaddr
);
257 pmap_store_pte(pde
, (pd_entry_t
)0);
262 PMAP_UPDATE_TLBS(grand
, va_start
, va_end
);
266 PMAP_TRACE(PMAP_CODE(PMAP__UNNEST
) | DBG_FUNC_END
, 0, 0, 0, 0, 0);
271 /* Invoked by the Mach VM to determine the platform specific unnest region */
273 boolean_t
pmap_adjust_unnest_parameters(pmap_t p
, vm_map_offset_t
*s
, vm_map_offset_t
*e
) {
275 boolean_t rval
= FALSE
;
282 pdpte
= pmap64_pdpt(p
, *s
);
283 if (pdpte
&& (*pdpte
& INTEL_PDPTE_NESTED
)) {
288 pdpte
= pmap64_pdpt(p
, *e
);
289 if (pdpte
&& (*pdpte
& INTEL_PDPTE_NESTED
)) {
290 *e
= ((*e
+ NBPDPT
) & ~(NBPDPT
-1));
300 * pmap_find_phys returns the (4K) physical page number containing a
301 * given virtual address in a given pmap.
302 * Note that pmap_pte may return a pde if this virtual address is
303 * mapped by a large page and this is taken into account in order
304 * to return the correct page number in this case.
307 pmap_find_phys(pmap_t pmap
, addr64_t va
)
315 mp_disable_preemption();
317 /* This refcount test is a band-aid--several infrastructural changes
318 * are necessary to eliminate invocation of this routine from arbitrary
322 if (!pmap
->ref_count
)
325 pdep
= pmap_pde(pmap
, va
);
327 if ((pdep
!= PD_ENTRY_NULL
) && ((pde
= *pdep
) & INTEL_PTE_VALID
)) {
328 if (pde
& INTEL_PTE_PS
) {
329 ppn
= (ppnum_t
) i386_btop(pte_to_pa(pde
));
330 ppn
+= (ppnum_t
) ptenum(va
);
333 ptp
= pmap_pte(pmap
, va
);
334 if ((PT_ENTRY_NULL
!= ptp
) && (((pte
= *ptp
) & INTEL_PTE_VALID
) != 0)) {
335 ppn
= (ppnum_t
) i386_btop(pte_to_pa(pte
));
340 mp_enable_preemption();
346 * Update cache attributes for all extant managed mappings.
347 * Assumes PV for this page is locked, and that the page
352 pmap_update_cache_attributes_locked(ppnum_t pn
, unsigned attributes
) {
353 pv_rooted_entry_t pv_h
, pv_e
;
354 pv_hashed_entry_t pvh_e
, nexth
;
355 vm_map_offset_t vaddr
;
359 assert(IS_MANAGED_PAGE(pn
));
361 pv_h
= pai_to_pvh(pn
);
362 /* TODO: translate the PHYS_* bits to PTE bits, while they're
363 * currently identical, they may not remain so
364 * Potential optimization (here and in page_protect),
365 * parallel shootdowns, check for redundant
366 * attribute modifications.
370 * Alter attributes on all mappings
372 if (pv_h
->pmap
!= PMAP_NULL
) {
374 pvh_e
= (pv_hashed_entry_t
)pv_e
;
379 ptep
= pmap_pte(pmap
, vaddr
);
382 panic("pmap_update_cache_attributes_locked: Missing PTE, pmap: %p, pn: 0x%x vaddr: 0x%llx kernel_pmap: %p", pmap
, pn
, vaddr
, kernel_pmap
);
384 nexth
= (pv_hashed_entry_t
)queue_next(&pvh_e
->qlink
);
385 pmap_update_pte(ptep
, PHYS_CACHEABILITY_MASK
, attributes
);
386 PMAP_UPDATE_TLBS(pmap
, vaddr
, vaddr
+ PAGE_SIZE
);
388 } while ((pv_e
= (pv_rooted_entry_t
)nexth
) != pv_h
);
392 void x86_filter_TLB_coherency_interrupts(boolean_t dofilter
) {
393 assert(ml_get_interrupts_enabled() == 0 || get_preemption_level() != 0);
396 CPU_CR3_MARK_INACTIVE();
398 CPU_CR3_MARK_ACTIVE();
400 if (current_cpu_datap()->cpu_tlb_invalid
)
401 process_pmap_updates();
407 * Insert the given physical page (p) at
408 * the specified virtual address (v) in the
409 * target physical map with the protection requested.
411 * If specified, the page will be wired down, meaning
412 * that the related pte cannot be reclaimed.
414 * NB: This is the only routine which MAY NOT lazy-evaluate
415 * or lose information. That is, this routine must actually
416 * insert this page into the given map NOW.
421 register pmap_t pmap
,
422 vm_map_offset_t vaddr
,
425 vm_prot_t fault_type
,
429 (void) pmap_enter_options(pmap
, vaddr
, pn
, prot
, fault_type
, flags
, wired
, PMAP_EXPAND_OPTIONS_NONE
, NULL
);
435 register pmap_t pmap
,
436 vm_map_offset_t vaddr
,
439 __unused vm_prot_t fault_type
,
442 unsigned int options
,
446 pv_rooted_entry_t pv_h
;
448 pv_hashed_entry_t pvh_e
;
449 pv_hashed_entry_t pvh_new
;
452 pmap_paddr_t pa
= (pmap_paddr_t
) i386_ptob(pn
);
453 boolean_t need_tlbflush
= FALSE
;
456 boolean_t old_pa_locked
;
457 /* 2MiB mappings are confined to x86_64 by VM */
458 boolean_t superpage
= flags
& VM_MEM_SUPERPAGE
;
459 vm_object_t delpage_pm_obj
= NULL
;
460 uint64_t delpage_pde_index
= 0;
462 kern_return_t kr_expand
;
466 if (pmap
== PMAP_NULL
)
467 return KERN_INVALID_ARGUMENT
;
469 /* N.B. We can be supplied a zero page frame in the NOENTER case, it's an
470 * unused value for that scenario.
472 assert(pn
!= vm_page_fictitious_addr
);
474 if (pn
== vm_page_guard_addr
)
475 return KERN_INVALID_ARGUMENT
;
477 PMAP_TRACE(PMAP_CODE(PMAP__ENTER
) | DBG_FUNC_START
,
479 (uint32_t) (vaddr
>> 32), (uint32_t) vaddr
,
482 if ((prot
& VM_PROT_EXECUTE
) || !nx_enabled
|| !pmap
->nx_enabled
)
487 if (__improbable(set_NX
&& (pmap
== kernel_pmap
) && ((pmap_disable_kstack_nx
&& (flags
& VM_MEM_STACK
)) || (pmap_disable_kheap_nx
&& !(flags
& VM_MEM_STACK
))))) {
492 * Must allocate a new pvlist entry while we're unlocked;
493 * zalloc may cause pageout (which will lock the pmap system).
494 * If we determine we need a pvlist entry, we will unlock
495 * and allocate one. Then we will retry, throughing away
496 * the allocated entry later (if we no longer need it).
499 pvh_new
= PV_HASHED_ENTRY_NULL
;
501 pvh_e
= PV_HASHED_ENTRY_NULL
;
506 * Expand pmap to include this pte. Assume that
507 * pmap is always expanded to include enough hardware
508 * pages to map one VM page.
511 while ((pte
= pmap64_pde(pmap
, vaddr
)) == PD_ENTRY_NULL
) {
512 /* need room for another pde entry */
514 kr_expand
= pmap_expand_pdpt(pmap
, vaddr
, options
);
515 if (kr_expand
!= KERN_SUCCESS
)
520 while ((pte
= pmap_pte(pmap
, vaddr
)) == PT_ENTRY_NULL
) {
522 * Must unlock to expand the pmap
523 * going to grow pde level page(s)
526 kr_expand
= pmap_expand(pmap
, vaddr
, options
);
527 if (kr_expand
!= KERN_SUCCESS
)
532 if (options
& PMAP_EXPAND_OPTIONS_NOENTER
) {
537 if (superpage
&& *pte
&& !(*pte
& INTEL_PTE_PS
)) {
539 * There is still an empty page table mapped that
540 * was used for a previous base page mapping.
541 * Remember the PDE and the PDE index, so that we
542 * can free the page at the end of this function.
544 delpage_pde_index
= pdeidx(pmap
, vaddr
);
545 delpage_pm_obj
= pmap
->pm_obj
;
549 old_pa
= pte_to_pa(*pte
);
550 pai
= pa_index(old_pa
);
551 old_pa_locked
= FALSE
;
554 (*pte
& INTEL_PTE_COMPRESSED
)) {
555 /* one less "compressed" */
556 OSAddAtomic64(-1, &pmap
->stats
.compressed
);
557 /* marker will be cleared below */
561 * if we have a previous managed page, lock the pv entry now. after
562 * we lock it, check to see if someone beat us to the lock and if so
565 if ((0 != old_pa
) && IS_MANAGED_PAGE(pai
)) {
567 old_pa_locked
= TRUE
;
568 old_pa
= pte_to_pa(*pte
);
570 UNLOCK_PVH(pai
); /* another path beat us to it */
571 old_pa_locked
= FALSE
;
576 * Special case if the incoming physical page is already mapped
580 pt_entry_t old_attributes
=
581 *pte
& ~(INTEL_PTE_REF
| INTEL_PTE_MOD
);
584 * May be changing its wired attribute or protection
587 template = pa_to_pte(pa
) | INTEL_PTE_VALID
;
588 template |= pmap_get_cache_attributes(pa_index(pa
));
590 if (VM_MEM_NOT_CACHEABLE
==
591 (flags
& (VM_MEM_NOT_CACHEABLE
| VM_WIMG_USE_DEFAULT
))) {
592 if (!(flags
& VM_MEM_GUARDED
))
593 template |= INTEL_PTE_PTA
;
594 template |= INTEL_PTE_NCACHE
;
596 if (pmap
!= kernel_pmap
)
597 template |= INTEL_PTE_USER
;
598 if (prot
& VM_PROT_WRITE
) {
599 template |= INTEL_PTE_WRITE
;
603 template |= INTEL_PTE_NX
;
606 template |= INTEL_PTE_WIRED
;
607 if (!iswired(old_attributes
)) {
608 OSAddAtomic(+1, &pmap
->stats
.wired_count
);
609 pmap_ledger_credit(pmap
, task_ledgers
.wired_mem
, PAGE_SIZE
);
612 if (iswired(old_attributes
)) {
613 assert(pmap
->stats
.wired_count
>= 1);
614 OSAddAtomic(-1, &pmap
->stats
.wired_count
);
615 pmap_ledger_debit(pmap
, task_ledgers
.wired_mem
, PAGE_SIZE
);
618 if (superpage
) /* this path can not be used */
619 template |= INTEL_PTE_PS
; /* to change the page size! */
621 if (old_attributes
== template)
622 goto dont_update_pte
;
624 /* Determine delta, PV locked */
626 ((old_attributes
^ template) != INTEL_PTE_WIRED
);
628 if (need_tlbflush
== TRUE
&& !(old_attributes
& INTEL_PTE_WRITE
)) {
629 if ((old_attributes
^ template) == INTEL_PTE_WRITE
)
630 need_tlbflush
= FALSE
;
633 /* store modified PTE and preserve RC bits */
634 pt_entry_t npte
, opte
;;
637 npte
= template | (opte
& (INTEL_PTE_REF
| INTEL_PTE_MOD
));
638 } while (!pmap_cmpx_pte(pte
, opte
, npte
));
642 old_pa_locked
= FALSE
;
648 * Outline of code from here:
649 * 1) If va was mapped, update TLBs, remove the mapping
650 * and remove old pvlist entry.
651 * 2) Add pvlist entry for new mapping
652 * 3) Enter new mapping.
654 * If the old physical page is not managed step 1) is skipped
655 * (except for updating the TLBs), and the mapping is
656 * overwritten at step 3). If the new physical page is not
657 * managed, step 2) is skipped.
660 if (old_pa
!= (pmap_paddr_t
) 0) {
663 * Don't do anything to pages outside valid memory here.
664 * Instead convince the code that enters a new mapping
665 * to overwrite the old one.
668 /* invalidate the PTE */
669 pmap_update_pte(pte
, INTEL_PTE_VALID
, 0);
670 /* propagate invalidate everywhere */
671 PMAP_UPDATE_TLBS(pmap
, vaddr
, vaddr
+ PAGE_SIZE
);
672 /* remember reference and change */
674 oattr
= (char) (old_pte
& (PHYS_MODIFIED
| PHYS_REFERENCED
));
675 /* completely invalidate the PTE */
676 pmap_store_pte(pte
, 0);
678 if (IS_MANAGED_PAGE(pai
)) {
679 pmap_assert(old_pa_locked
== TRUE
);
680 pmap_ledger_debit(pmap
, task_ledgers
.phys_mem
, PAGE_SIZE
);
681 pmap_ledger_debit(pmap
, task_ledgers
.phys_footprint
, PAGE_SIZE
);
682 assert(pmap
->stats
.resident_count
>= 1);
683 OSAddAtomic(-1, &pmap
->stats
.resident_count
);
684 if (pmap
!= kernel_pmap
) {
685 if (IS_REUSABLE_PAGE(pai
)) {
686 assert(pmap
->stats
.reusable
> 0);
687 OSAddAtomic(-1, &pmap
->stats
.reusable
);
688 } else if (IS_INTERNAL_PAGE(pai
)) {
689 assert(pmap
->stats
.internal
> 0);
690 OSAddAtomic(-1, &pmap
->stats
.internal
);
692 assert(pmap
->stats
.external
> 0);
693 OSAddAtomic(-1, &pmap
->stats
.external
);
697 assert(pmap
->stats
.wired_count
>= 1);
698 OSAddAtomic(-1, &pmap
->stats
.wired_count
);
699 pmap_ledger_debit(pmap
, task_ledgers
.wired_mem
,
702 pmap_phys_attributes
[pai
] |= oattr
;
705 * Remove the mapping from the pvlist for
706 * this physical page.
707 * We'll end up with either a rooted pv or a
710 pvh_e
= pmap_pv_remove(pmap
, vaddr
, (ppnum_t
*) &pai
, &old_pte
);
715 * old_pa is not managed.
716 * Do removal part of accounting.
719 if (pmap
!= kernel_pmap
) {
721 assert(pmap
->stats
.device
> 0);
722 OSAddAtomic(-1, &pmap
->stats
.device
);
726 assert(pmap
->stats
.wired_count
>= 1);
727 OSAddAtomic(-1, &pmap
->stats
.wired_count
);
728 pmap_ledger_debit(pmap
, task_ledgers
.wired_mem
, PAGE_SIZE
);
734 * if we had a previously managed paged locked, unlock it now
738 old_pa_locked
= FALSE
;
741 pai
= pa_index(pa
); /* now working with new incoming phys page */
742 if (IS_MANAGED_PAGE(pai
)) {
745 * Step 2) Enter the mapping in the PV list for this
748 pv_h
= pai_to_pvh(pai
);
752 if (pv_h
->pmap
== PMAP_NULL
) {
754 * No mappings yet, use rooted pv
758 queue_init(&pv_h
->qlink
);
760 if (options
& PMAP_OPTIONS_INTERNAL
) {
761 pmap_phys_attributes
[pai
] |= PHYS_INTERNAL
;
763 pmap_phys_attributes
[pai
] &= ~PHYS_INTERNAL
;
765 if (options
& PMAP_OPTIONS_REUSABLE
) {
766 pmap_phys_attributes
[pai
] |= PHYS_REUSABLE
;
768 pmap_phys_attributes
[pai
] &= ~PHYS_REUSABLE
;
772 * Add new pv_hashed_entry after header.
774 if ((PV_HASHED_ENTRY_NULL
== pvh_e
) && pvh_new
) {
776 pvh_new
= PV_HASHED_ENTRY_NULL
;
777 } else if (PV_HASHED_ENTRY_NULL
== pvh_e
) {
778 PV_HASHED_ALLOC(&pvh_e
);
779 if (PV_HASHED_ENTRY_NULL
== pvh_e
) {
781 * the pv list is empty. if we are on
782 * the kernel pmap we'll use one of
783 * the special private kernel pv_e's,
784 * else, we need to unlock
785 * everything, zalloc a pv_e, and
786 * restart bringing in the pv_e with
789 if (kernel_pmap
== pmap
) {
790 PV_HASHED_KERN_ALLOC(&pvh_e
);
794 pmap_pv_throttle(pmap
);
795 pvh_new
= (pv_hashed_entry_t
) zalloc(pv_hashed_list_zone
);
801 if (PV_HASHED_ENTRY_NULL
== pvh_e
)
802 panic("Mapping alias chain exhaustion, possibly induced by numerous kernel virtual double mappings");
807 pv_hash_add(pvh_e
, pv_h
);
810 * Remember that we used the pvlist entry.
812 pvh_e
= PV_HASHED_ENTRY_NULL
;
816 * only count the mapping
817 * for 'managed memory'
819 pmap_ledger_credit(pmap
, task_ledgers
.phys_mem
, PAGE_SIZE
);
820 pmap_ledger_credit(pmap
, task_ledgers
.phys_footprint
, PAGE_SIZE
);
821 OSAddAtomic(+1, &pmap
->stats
.resident_count
);
822 if (pmap
->stats
.resident_count
> pmap
->stats
.resident_max
) {
823 pmap
->stats
.resident_max
= pmap
->stats
.resident_count
;
825 if (pmap
!= kernel_pmap
) {
826 if (IS_REUSABLE_PAGE(pai
)) {
827 OSAddAtomic(+1, &pmap
->stats
.reusable
);
828 PMAP_STATS_PEAK(pmap
->stats
.reusable
);
829 } else if (IS_INTERNAL_PAGE(pai
)) {
830 OSAddAtomic(+1, &pmap
->stats
.internal
);
831 PMAP_STATS_PEAK(pmap
->stats
.internal
);
833 OSAddAtomic(+1, &pmap
->stats
.external
);
834 PMAP_STATS_PEAK(pmap
->stats
.external
);
837 } else if (last_managed_page
== 0) {
838 /* Account for early mappings created before "managed pages"
839 * are determined. Consider consulting the available DRAM map.
841 pmap_ledger_credit(pmap
, task_ledgers
.phys_mem
, PAGE_SIZE
);
842 pmap_ledger_credit(pmap
, task_ledgers
.phys_footprint
, PAGE_SIZE
);
843 OSAddAtomic(+1, &pmap
->stats
.resident_count
);
844 if (pmap
!= kernel_pmap
) {
846 OSAddAtomic(+1, &pmap
->stats
.device
);
847 PMAP_STATS_PEAK(pmap
->stats
.device
);
852 * Step 3) Enter the mapping.
854 * Build a template to speed up entering -
855 * only the pfn changes.
857 template = pa_to_pte(pa
) | INTEL_PTE_VALID
;
859 * DRK: It may be worth asserting on cache attribute flags that diverge
860 * from the existing physical page attributes.
863 template |= pmap_get_cache_attributes(pa_index(pa
));
865 if (flags
& VM_MEM_NOT_CACHEABLE
) {
866 if (!(flags
& VM_MEM_GUARDED
))
867 template |= INTEL_PTE_PTA
;
868 template |= INTEL_PTE_NCACHE
;
870 if (pmap
!= kernel_pmap
)
871 template |= INTEL_PTE_USER
;
872 if (prot
& VM_PROT_WRITE
)
873 template |= INTEL_PTE_WRITE
;
875 template |= INTEL_PTE_NX
;
877 template |= INTEL_PTE_WIRED
;
878 OSAddAtomic(+1, & pmap
->stats
.wired_count
);
879 pmap_ledger_credit(pmap
, task_ledgers
.wired_mem
, PAGE_SIZE
);
882 template |= INTEL_PTE_PS
;
883 pmap_store_pte(pte
, template);
886 * if this was a managed page we delayed unlocking the pv until here
887 * to prevent pmap_page_protect et al from finding it until the pte
890 if (IS_MANAGED_PAGE(pai
)) {
894 if (need_tlbflush
== TRUE
) {
895 if (options
& PMAP_OPTIONS_NOFLUSH
)
896 PMAP_UPDATE_TLBS_DELAYED(pmap
, vaddr
, vaddr
+ PAGE_SIZE
, (pmap_flush_context
*)arg
);
898 PMAP_UPDATE_TLBS(pmap
, vaddr
, vaddr
+ PAGE_SIZE
);
900 if (pvh_e
!= PV_HASHED_ENTRY_NULL
) {
901 PV_HASHED_FREE_LIST(pvh_e
, pvh_e
, 1);
903 if (pvh_new
!= PV_HASHED_ENTRY_NULL
) {
904 PV_HASHED_KERN_FREE_LIST(pvh_new
, pvh_new
, 1);
908 if (delpage_pm_obj
) {
911 vm_object_lock(delpage_pm_obj
);
912 m
= vm_page_lookup(delpage_pm_obj
, (delpage_pde_index
* PAGE_SIZE
));
913 if (m
== VM_PAGE_NULL
)
914 panic("pmap_enter: pte page not in object");
915 vm_object_unlock(delpage_pm_obj
);
917 OSAddAtomic(-1, &inuse_ptepages_count
);
918 PMAP_ZINFO_PFREE(pmap
, PAGE_SIZE
);
921 PMAP_TRACE(PMAP_CODE(PMAP__ENTER
) | DBG_FUNC_END
, 0, 0, 0, 0, 0);
926 * Remove a range of hardware page-table entries.
927 * The entries given are the first (inclusive)
928 * and last (exclusive) entries for the VM pages.
929 * The virtual address is the va for the first pte.
931 * The pmap must be locked.
932 * If the pmap is not the kernel pmap, the range must lie
933 * entirely within one pte-page. This is NOT checked.
934 * Assumes that the pte-page exists.
940 vm_map_offset_t start_vaddr
,
944 pmap_remove_range_options(pmap
, start_vaddr
, spte
, epte
, 0);
948 pmap_remove_range_options(
950 vm_map_offset_t start_vaddr
,
956 pv_hashed_entry_t pvh_et
= PV_HASHED_ENTRY_NULL
;
957 pv_hashed_entry_t pvh_eh
= PV_HASHED_ENTRY_NULL
;
958 pv_hashed_entry_t pvh_e
;
960 int num_removed
, num_unwired
, num_found
, num_invalid
;
961 int num_device
, num_external
, num_internal
, num_reusable
;
962 uint64_t num_compressed
;
965 vm_map_offset_t vaddr
;
976 /* invalidate the PTEs first to "freeze" them */
977 for (cpte
= spte
, vaddr
= start_vaddr
;
979 cpte
++, vaddr
+= PAGE_SIZE_64
) {
980 pt_entry_t p
= *cpte
;
984 if (pmap
!= kernel_pmap
&&
985 (options
& PMAP_OPTIONS_REMOVE
) &&
986 (p
& INTEL_PTE_COMPRESSED
)) {
987 /* one less "compressed" */
990 /* XXX probably does not need to be atomic! */
991 pmap_update_pte(cpte
, INTEL_PTE_COMPRESSED
, 0);
1002 if (!IS_MANAGED_PAGE(pai
)) {
1004 * Outside range of managed physical memory.
1005 * Just remove the mappings.
1007 pmap_store_pte(cpte
, 0);
1012 if ((p
& INTEL_PTE_VALID
) == 0)
1015 /* invalidate the PTE */
1016 pmap_update_pte(cpte
, INTEL_PTE_VALID
, 0);
1019 if (num_found
== 0) {
1020 /* nothing was changed: we're done */
1024 /* propagate the invalidates to other CPUs */
1026 PMAP_UPDATE_TLBS(pmap
, start_vaddr
, vaddr
);
1028 for (cpte
= spte
, vaddr
= start_vaddr
;
1030 cpte
++, vaddr
+= PAGE_SIZE_64
) {
1032 pa
= pte_to_pa(*cpte
);
1040 pa
= pte_to_pa(*cpte
);
1046 if (IS_REUSABLE_PAGE(pai
)) {
1048 } else if (IS_INTERNAL_PAGE(pai
)) {
1055 * Get the modify and reference bits, then
1056 * nuke the entry in the page table
1058 /* remember reference and change */
1059 pmap_phys_attributes
[pai
] |=
1060 (char) (*cpte
& (PHYS_MODIFIED
| PHYS_REFERENCED
));
1063 * Remove the mapping from the pvlist for this physical page.
1065 pvh_e
= pmap_pv_remove(pmap
, vaddr
, (ppnum_t
*) &pai
, cpte
);
1067 /* completely invalidate the PTE */
1068 pmap_store_pte(cpte
, 0);
1072 if (pvh_e
!= PV_HASHED_ENTRY_NULL
) {
1073 pvh_e
->qlink
.next
= (queue_entry_t
) pvh_eh
;
1076 if (pvh_et
== PV_HASHED_ENTRY_NULL
) {
1083 if (pvh_eh
!= PV_HASHED_ENTRY_NULL
) {
1084 PV_HASHED_FREE_LIST(pvh_eh
, pvh_et
, pvh_cnt
);
1091 if (pmap
->stats
.resident_count
< num_removed
)
1092 panic("pmap_remove_range: resident_count");
1094 pmap_ledger_debit(pmap
, task_ledgers
.phys_mem
, machine_ptob(num_removed
));
1095 pmap_ledger_debit(pmap
, task_ledgers
.phys_footprint
, machine_ptob(num_removed
));
1096 assert(pmap
->stats
.resident_count
>= num_removed
);
1097 OSAddAtomic(-num_removed
, &pmap
->stats
.resident_count
);
1099 if (pmap
!= kernel_pmap
) {
1101 assert(pmap
->stats
.device
>= num_device
);
1103 OSAddAtomic(-num_device
, &pmap
->stats
.device
);
1105 assert(pmap
->stats
.external
>= num_external
);
1107 OSAddAtomic(-num_external
, &pmap
->stats
.external
);
1108 assert(pmap
->stats
.internal
>= num_internal
);
1110 OSAddAtomic(-num_internal
, &pmap
->stats
.internal
);
1111 assert(pmap
->stats
.reusable
>= num_reusable
);
1113 OSAddAtomic(-num_reusable
, &pmap
->stats
.reusable
);
1114 assert(pmap
->stats
.compressed
>= num_compressed
);
1116 OSAddAtomic64(-num_compressed
, &pmap
->stats
.compressed
);
1120 if (pmap
->stats
.wired_count
< num_unwired
)
1121 panic("pmap_remove_range: wired_count");
1123 assert(pmap
->stats
.wired_count
>= num_unwired
);
1124 OSAddAtomic(-num_unwired
, &pmap
->stats
.wired_count
);
1125 pmap_ledger_debit(pmap
, task_ledgers
.wired_mem
, machine_ptob(num_unwired
));
1132 * Remove the given range of addresses
1133 * from the specified map.
1135 * It is assumed that the start and end are properly
1136 * rounded to the hardware page size.
1144 pmap_remove_options(map
, s64
, e64
, 0);
1148 pmap_remove_options(
1155 pt_entry_t
*spte
, *epte
;
1161 if (map
== PMAP_NULL
|| s64
== e64
)
1164 PMAP_TRACE(PMAP_CODE(PMAP__REMOVE
) | DBG_FUNC_START
,
1166 (uint32_t) (s64
>> 32), s64
,
1167 (uint32_t) (e64
>> 32), e64
);
1174 * Check that address range in the kernel does not overlap the stacks.
1175 * We initialize local static min/max variables once to avoid making
1176 * 2 function calls for every remove. Note also that these functions
1177 * both return 0 before kernel stacks have been initialized, and hence
1178 * the panic is not triggered in this case.
1180 if (map
== kernel_pmap
) {
1181 static vm_offset_t kernel_stack_min
= 0;
1182 static vm_offset_t kernel_stack_max
= 0;
1184 if (kernel_stack_min
== 0) {
1185 kernel_stack_min
= min_valid_stack_address();
1186 kernel_stack_max
= max_valid_stack_address();
1188 if ((kernel_stack_min
<= s64
&& s64
< kernel_stack_max
) ||
1189 (kernel_stack_min
< e64
&& e64
<= kernel_stack_max
))
1190 panic("pmap_remove() attempted in kernel stack");
1195 * The values of kernel_stack_min and kernel_stack_max are no longer
1196 * relevant now that we allocate kernel stacks in the kernel map,
1197 * so the old code above no longer applies. If we wanted to check that
1198 * we weren't removing a mapping of a page in a kernel stack we'd
1199 * mark the PTE with an unused bit and check that here.
1204 deadline
= rdtsc64() + max_preemption_latency_tsc
;
1207 l64
= (s64
+ pde_mapped_size
) & ~(pde_mapped_size
- 1);
1210 pde
= pmap_pde(map
, s64
);
1212 if (pde
&& (*pde
& INTEL_PTE_VALID
)) {
1213 if (*pde
& INTEL_PTE_PS
) {
1215 * If we're removing a superpage, pmap_remove_range()
1216 * must work on level 2 instead of level 1; and we're
1217 * only passing a single level 2 entry instead of a
1221 epte
= spte
+1; /* excluded */
1223 spte
= pmap_pte(map
, (s64
& ~(pde_mapped_size
- 1)));
1224 spte
= &spte
[ptenum(s64
)];
1225 epte
= &spte
[intel_btop(l64
- s64
)];
1227 pmap_remove_range_options(map
, s64
, spte
, epte
,
1232 if (s64
< e64
&& rdtsc64() >= deadline
) {
1234 /* TODO: Rapid release/reacquisition can defeat
1235 * the "backoff" intent here; either consider a
1236 * fair spinlock, or a scheme whereby each lock
1237 * attempt marks the processor as within a spinlock
1238 * acquisition, and scan CPUs here to determine
1239 * if a backoff is necessary, to avoid sacrificing
1240 * performance in the common case.
1243 deadline
= rdtsc64() + max_preemption_latency_tsc
;
1249 PMAP_TRACE(PMAP_CODE(PMAP__REMOVE
) | DBG_FUNC_END
,
1259 pmap_page_protect_options(pn
, prot
, 0, NULL
);
1263 * Routine: pmap_page_protect_options
1266 * Lower the permission for all mappings to a given
1270 pmap_page_protect_options(
1273 unsigned int options
,
1276 pv_hashed_entry_t pvh_eh
= PV_HASHED_ENTRY_NULL
;
1277 pv_hashed_entry_t pvh_et
= PV_HASHED_ENTRY_NULL
;
1278 pv_hashed_entry_t nexth
;
1280 pv_rooted_entry_t pv_h
;
1281 pv_rooted_entry_t pv_e
;
1282 pv_hashed_entry_t pvh_e
;
1287 pt_entry_t new_pte_value
;
1290 assert(pn
!= vm_page_fictitious_addr
);
1291 if (pn
== vm_page_guard_addr
)
1294 pai
= ppn_to_pai(pn
);
1296 if (!IS_MANAGED_PAGE(pai
)) {
1298 * Not a managed page.
1302 PMAP_TRACE(PMAP_CODE(PMAP__PAGE_PROTECT
) | DBG_FUNC_START
,
1306 * Determine the new protection.
1310 case VM_PROT_READ
| VM_PROT_EXECUTE
:
1314 return; /* nothing to do */
1320 pv_h
= pai_to_pvh(pai
);
1326 * Walk down PV list, if any, changing or removing all mappings.
1328 if (pv_h
->pmap
== PMAP_NULL
)
1332 pvh_e
= (pv_hashed_entry_t
) pv_e
; /* cheat */
1335 vm_map_offset_t vaddr
;
1339 pte
= pmap_pte(pmap
, vaddr
);
1341 pmap_assert2((pa_index(pte_to_pa(*pte
)) == pn
),
1342 "pmap_page_protect: PTE mismatch, pn: 0x%x, pmap: %p, vaddr: 0x%llx, pte: 0x%llx", pn
, pmap
, vaddr
, *pte
);
1345 panic("pmap_page_protect() "
1346 "pmap=%p pn=0x%x vaddr=0x%llx\n",
1349 nexth
= (pv_hashed_entry_t
) queue_next(&pvh_e
->qlink
);
1352 * Remove the mapping if new protection is NONE
1356 /* Remove per-pmap wired count */
1357 if (iswired(*pte
)) {
1358 OSAddAtomic(-1, &pmap
->stats
.wired_count
);
1359 pmap_ledger_debit(pmap
, task_ledgers
.wired_mem
, PAGE_SIZE
);
1362 if (pmap
!= kernel_pmap
&&
1363 (options
& PMAP_OPTIONS_COMPRESSOR
) &&
1364 IS_INTERNAL_PAGE(pai
)) {
1365 /* adjust "reclaimed" stats */
1366 OSAddAtomic64(+1, &pmap
->stats
.compressed
);
1367 PMAP_STATS_PEAK(pmap
->stats
.compressed
);
1368 pmap
->stats
.compressed_lifetime
++;
1369 /* mark this PTE as having been "reclaimed" */
1370 new_pte_value
= INTEL_PTE_COMPRESSED
;
1375 if (options
& PMAP_OPTIONS_NOREFMOD
) {
1376 pmap_store_pte(pte
, new_pte_value
);
1378 if (options
& PMAP_OPTIONS_NOFLUSH
)
1379 PMAP_UPDATE_TLBS_DELAYED(pmap
, vaddr
, vaddr
+ PAGE_SIZE
, (pmap_flush_context
*)arg
);
1381 PMAP_UPDATE_TLBS(pmap
, vaddr
, vaddr
+ PAGE_SIZE
);
1384 * Remove the mapping, collecting dirty bits.
1386 pmap_update_pte(pte
, INTEL_PTE_VALID
, 0);
1388 PMAP_UPDATE_TLBS(pmap
, vaddr
, vaddr
+PAGE_SIZE
);
1389 pmap_phys_attributes
[pai
] |=
1390 *pte
& (PHYS_MODIFIED
|PHYS_REFERENCED
);
1391 pmap_store_pte(pte
, new_pte_value
);
1394 if (pmap
->stats
.resident_count
< 1)
1395 panic("pmap_page_protect: resident_count");
1397 pmap_ledger_debit(pmap
, task_ledgers
.phys_mem
, PAGE_SIZE
);
1398 assert(pmap
->stats
.resident_count
>= 1);
1399 OSAddAtomic(-1, &pmap
->stats
.resident_count
);
1400 if (options
& PMAP_OPTIONS_COMPRESSOR
) {
1402 * This removal is only being done so we can send this page to
1403 * the compressor; therefore it mustn't affect total task footprint.
1405 pmap_ledger_credit(pmap
, task_ledgers
.internal_compressed
, PAGE_SIZE
);
1407 pmap_ledger_debit(pmap
, task_ledgers
.phys_footprint
, PAGE_SIZE
);
1410 if (pmap
!= kernel_pmap
) {
1411 if (IS_REUSABLE_PAGE(pai
)) {
1412 assert(pmap
->stats
.reusable
> 0);
1413 OSAddAtomic(-1, &pmap
->stats
.reusable
);
1414 } else if (IS_INTERNAL_PAGE(pai
)) {
1415 assert(pmap
->stats
.internal
> 0);
1416 OSAddAtomic(-1, &pmap
->stats
.internal
);
1418 assert(pmap
->stats
.external
> 0);
1419 OSAddAtomic(-1, &pmap
->stats
.external
);
1424 * Deal with the pv_rooted_entry.
1429 * Fix up head later.
1431 pv_h
->pmap
= PMAP_NULL
;
1434 * Delete this entry.
1436 pv_hash_remove(pvh_e
);
1437 pvh_e
->qlink
.next
= (queue_entry_t
) pvh_eh
;
1440 if (pvh_et
== PV_HASHED_ENTRY_NULL
)
1446 * Write-protect, after opportunistic refmod collect
1448 pmap_phys_attributes
[pai
] |=
1449 *pte
& (PHYS_MODIFIED
|PHYS_REFERENCED
);
1450 pmap_update_pte(pte
, INTEL_PTE_WRITE
, 0);
1452 if (options
& PMAP_OPTIONS_NOFLUSH
)
1453 PMAP_UPDATE_TLBS_DELAYED(pmap
, vaddr
, vaddr
+ PAGE_SIZE
, (pmap_flush_context
*)arg
);
1455 PMAP_UPDATE_TLBS(pmap
, vaddr
, vaddr
+PAGE_SIZE
);
1458 } while ((pv_e
= (pv_rooted_entry_t
) nexth
) != pv_h
);
1462 * If pv_head mapping was removed, fix it up.
1464 if (pv_h
->pmap
== PMAP_NULL
) {
1465 pvh_e
= (pv_hashed_entry_t
) queue_next(&pv_h
->qlink
);
1467 if (pvh_e
!= (pv_hashed_entry_t
) pv_h
) {
1468 pv_hash_remove(pvh_e
);
1469 pv_h
->pmap
= pvh_e
->pmap
;
1470 pv_h
->va
= pvh_e
->va
;
1471 pvh_e
->qlink
.next
= (queue_entry_t
) pvh_eh
;
1474 if (pvh_et
== PV_HASHED_ENTRY_NULL
)
1479 if (pvh_eh
!= PV_HASHED_ENTRY_NULL
) {
1480 PV_HASHED_FREE_LIST(pvh_eh
, pvh_et
, pvh_cnt
);
1485 PMAP_TRACE(PMAP_CODE(PMAP__PAGE_PROTECT
) | DBG_FUNC_END
,
1491 * Clear specified attribute bits.
1494 phys_attribute_clear(
1497 unsigned int options
,
1500 pv_rooted_entry_t pv_h
;
1501 pv_hashed_entry_t pv_e
;
1505 char attributes
= 0;
1506 boolean_t is_internal
, is_reusable
;
1508 if ((bits
& PHYS_MODIFIED
) &&
1509 (options
& PMAP_OPTIONS_NOFLUSH
) &&
1511 panic("phys_attribute_clear(0x%x,0x%x,0x%x,%p): "
1512 "should not clear 'modified' without flushing TLBs\n",
1513 pn
, bits
, options
, arg
);
1517 assert(pn
!= vm_page_fictitious_addr
);
1518 if (pn
== vm_page_guard_addr
)
1521 pai
= ppn_to_pai(pn
);
1523 if (!IS_MANAGED_PAGE(pai
)) {
1525 * Not a managed page.
1530 PMAP_TRACE(PMAP_CODE(PMAP__ATTRIBUTE_CLEAR
) | DBG_FUNC_START
,
1533 pv_h
= pai_to_pvh(pai
);
1538 * Walk down PV list, clearing all modify or reference bits.
1539 * We do not have to lock the pv_list because we have
1542 if (pv_h
->pmap
!= PMAP_NULL
) {
1544 * There are some mappings.
1547 is_internal
= IS_INTERNAL_PAGE(pai
);
1548 is_reusable
= IS_REUSABLE_PAGE(pai
);
1550 pv_e
= (pv_hashed_entry_t
)pv_h
;
1561 pte
= pmap_pte(pmap
, va
);
1562 /* grab ref/mod bits from this PTE */
1563 pte_bits
= (*pte
& (PHYS_MODIFIED
|
1565 /* propagate to page's global attributes */
1566 attributes
|= pte_bits
;
1567 /* which bits to clear for this PTE? */
1572 * Clear modify and/or reference bits.
1575 pmap_update_pte(pte
, bits
, 0);
1577 /* Ensure all processors using this translation
1578 * invalidate this TLB entry. The invalidation
1579 * *must* follow the PTE update, to ensure that
1580 * the TLB shadow of the 'D' bit (in particular)
1581 * is synchronized with the updated PTE.
1583 if (! (options
& PMAP_OPTIONS_NOFLUSH
)) {
1584 /* flush TLBS now */
1585 PMAP_UPDATE_TLBS(pmap
,
1589 /* delayed TLB flush: add "pmap" info */
1590 PMAP_UPDATE_TLBS_DELAYED(
1594 (pmap_flush_context
*)arg
);
1596 /* no TLB flushing at all */
1600 /* update pmap "reusable" stats */
1601 if ((options
& PMAP_OPTIONS_CLEAR_REUSABLE
) &&
1603 pmap
!= kernel_pmap
) {
1604 /* one less "reusable" */
1605 assert(pmap
->stats
.reusable
> 0);
1606 OSAddAtomic(-1, &pmap
->stats
.reusable
);
1608 /* one more "internal" */
1609 OSAddAtomic(+1, &pmap
->stats
.internal
);
1610 PMAP_STATS_PEAK(pmap
->stats
.internal
);
1612 /* one more "external" */
1613 OSAddAtomic(+1, &pmap
->stats
.external
);
1614 PMAP_STATS_PEAK(pmap
->stats
.external
);
1616 } else if ((options
& PMAP_OPTIONS_SET_REUSABLE
) &&
1618 pmap
!= kernel_pmap
) {
1619 /* one more "reusable" */
1620 OSAddAtomic(+1, &pmap
->stats
.reusable
);
1621 PMAP_STATS_PEAK(pmap
->stats
.reusable
);
1623 /* one less "internal" */
1624 assert(pmap
->stats
.internal
> 0);
1625 OSAddAtomic(-1, &pmap
->stats
.internal
);
1627 /* one less "external" */
1628 assert(pmap
->stats
.external
> 0);
1629 OSAddAtomic(-1, &pmap
->stats
.external
);
1633 pv_e
= (pv_hashed_entry_t
)queue_next(&pv_e
->qlink
);
1635 } while (pv_e
!= (pv_hashed_entry_t
)pv_h
);
1637 /* Opportunistic refmod collection, annulled
1638 * if both REF and MOD are being cleared.
1641 pmap_phys_attributes
[pai
] |= attributes
;
1642 pmap_phys_attributes
[pai
] &= (~bits
);
1644 /* update this page's "reusable" status */
1645 if (options
& PMAP_OPTIONS_CLEAR_REUSABLE
) {
1646 pmap_phys_attributes
[pai
] &= ~PHYS_REUSABLE
;
1647 } else if (options
& PMAP_OPTIONS_SET_REUSABLE
) {
1648 pmap_phys_attributes
[pai
] |= PHYS_REUSABLE
;
1653 PMAP_TRACE(PMAP_CODE(PMAP__ATTRIBUTE_CLEAR
) | DBG_FUNC_END
,
1658 * Check specified attribute bits.
1661 phys_attribute_test(
1665 pv_rooted_entry_t pv_h
;
1666 pv_hashed_entry_t pv_e
;
1673 assert(pn
!= vm_page_fictitious_addr
);
1674 if (pn
== vm_page_guard_addr
)
1677 pai
= ppn_to_pai(pn
);
1679 if (!IS_MANAGED_PAGE(pai
)) {
1681 * Not a managed page.
1687 * Fast check... if bits already collected
1688 * no need to take any locks...
1689 * if not set, we need to recheck after taking
1690 * the lock in case they got pulled in while
1691 * we were waiting for the lock
1693 if ((pmap_phys_attributes
[pai
] & bits
) == bits
)
1696 pv_h
= pai_to_pvh(pai
);
1700 attributes
= pmap_phys_attributes
[pai
] & bits
;
1704 * Walk down PV list, checking the mappings until we
1705 * reach the end or we've found the desired attributes.
1707 if (attributes
!= bits
&&
1708 pv_h
->pmap
!= PMAP_NULL
) {
1710 * There are some mappings.
1712 pv_e
= (pv_hashed_entry_t
)pv_h
;
1719 * pick up modify and/or reference bits from mapping
1722 pte
= pmap_pte(pmap
, va
);
1723 attributes
|= (int)(*pte
& bits
);
1725 pv_e
= (pv_hashed_entry_t
)queue_next(&pv_e
->qlink
);
1727 } while ((attributes
!= bits
) &&
1728 (pv_e
!= (pv_hashed_entry_t
)pv_h
));
1730 pmap_phys_attributes
[pai
] |= attributes
;
1733 return (attributes
);
1737 * Routine: pmap_change_wiring
1738 * Function: Change the wiring attribute for a map/virtual-address
1740 * In/out conditions:
1741 * The mapping must already exist in the pmap.
1746 vm_map_offset_t vaddr
,
1753 if ((pte
= pmap_pte(map
, vaddr
)) == PT_ENTRY_NULL
)
1754 panic("pmap_change_wiring: pte missing");
1756 if (wired
&& !iswired(*pte
)) {
1758 * wiring down mapping
1760 pmap_ledger_credit(map
, task_ledgers
.wired_mem
, PAGE_SIZE
);
1761 OSAddAtomic(+1, &map
->stats
.wired_count
);
1762 pmap_update_pte(pte
, 0, INTEL_PTE_WIRED
);
1764 else if (!wired
&& iswired(*pte
)) {
1768 assert(map
->stats
.wired_count
>= 1);
1769 OSAddAtomic(-1, &map
->stats
.wired_count
);
1770 pmap_ledger_debit(map
, task_ledgers
.wired_mem
, PAGE_SIZE
);
1771 pmap_update_pte(pte
, INTEL_PTE_WIRED
, 0);
1778 * "Backdoor" direct map routine for early mappings.
1779 * Useful for mapping memory outside the range
1780 * Sets A, D and NC if requested
1786 vm_map_offset_t start_addr
,
1787 vm_map_offset_t end_addr
,
1791 pt_entry_t
template;
1794 vm_offset_t base
= virt
;
1795 template = pa_to_pte(start_addr
)
1801 if ((flags
& (VM_MEM_NOT_CACHEABLE
| VM_WIMG_USE_DEFAULT
)) == VM_MEM_NOT_CACHEABLE
) {
1802 template |= INTEL_PTE_NCACHE
;
1803 if (!(flags
& (VM_MEM_GUARDED
)))
1804 template |= INTEL_PTE_PTA
;
1807 #if defined(__x86_64__)
1808 if ((prot
& VM_PROT_EXECUTE
) == 0)
1809 template |= INTEL_PTE_NX
;
1812 if (prot
& VM_PROT_WRITE
)
1813 template |= INTEL_PTE_WRITE
;
1815 while (start_addr
< end_addr
) {
1817 pte
= pmap_pte(kernel_pmap
, (vm_map_offset_t
)virt
);
1818 if (pte
== PT_ENTRY_NULL
) {
1819 panic("pmap_map_bd: Invalid kernel address\n");
1821 pmap_store_pte(pte
, template);
1823 pte_increment_pa(template);
1825 start_addr
+= PAGE_SIZE
;
1828 PMAP_UPDATE_TLBS(kernel_pmap
, base
, base
+ end_addr
- start_addr
);
1833 pmap_query_resident(
1839 pt_entry_t
*spte
, *epte
;
1842 unsigned int result
;
1846 if (pmap
== PMAP_NULL
|| pmap
== kernel_pmap
|| s64
== e64
)
1849 PMAP_TRACE(PMAP_CODE(PMAP__QUERY_RESIDENT
) | DBG_FUNC_START
,
1851 (uint32_t) (s64
>> 32), s64
,
1852 (uint32_t) (e64
>> 32), e64
);
1858 deadline
= rdtsc64() + max_preemption_latency_tsc
;
1861 l64
= (s64
+ pde_mapped_size
) & ~(pde_mapped_size
- 1);
1864 pde
= pmap_pde(pmap
, s64
);
1866 if (pde
&& (*pde
& INTEL_PTE_VALID
)) {
1867 if (*pde
& INTEL_PTE_PS
) {
1868 /* superpage: not supported */
1870 spte
= pmap_pte(pmap
,
1871 (s64
& ~(pde_mapped_size
- 1)));
1872 spte
= &spte
[ptenum(s64
)];
1873 epte
= &spte
[intel_btop(l64
- s64
)];
1875 for (; spte
< epte
; spte
++) {
1876 if (pte_to_pa(*spte
) != 0) {
1885 if (s64
< e64
&& rdtsc64() >= deadline
) {
1888 deadline
= rdtsc64() + max_preemption_latency_tsc
;
1894 PMAP_TRACE(PMAP_CODE(PMAP__QUERY_RESIDENT
) | DBG_FUNC_END
,
1903 __unused pmap_t pmap
,
1905 __unused
char *procname
)
1908 #endif /* MACH_ASSERT */