2 * Copyright (c) 2011-2018 Apple Inc. All rights reserved.
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
29 #include <mach_assert.h>
30 #include <mach_ldebug.h>
32 #include <mach/shared_region.h>
33 #include <mach/vm_param.h>
34 #include <mach/vm_prot.h>
35 #include <mach/vm_map.h>
36 #include <mach/machine/vm_param.h>
37 #include <mach/machine/vm_types.h>
39 #include <mach/boolean.h>
40 #include <kern/thread.h>
41 #include <kern/sched.h>
42 #include <kern/zalloc.h>
43 #include <kern/kalloc.h>
44 #include <kern/ledger.h>
45 #include <kern/misc_protos.h>
48 #include <kern/trustcache.h>
50 #include <os/overflow.h>
53 #include <vm/vm_map.h>
54 #include <vm/vm_kern.h>
55 #include <vm/vm_protos.h>
56 #include <vm/vm_object.h>
57 #include <vm/vm_page.h>
58 #include <vm/vm_pageout.h>
61 #include <libkern/img4/interface.h>
62 #include <libkern/section_keywords.h>
64 #include <machine/atomic.h>
65 #include <machine/thread.h>
66 #include <machine/lowglobals.h>
68 #include <arm/caches_internal.h>
69 #include <arm/cpu_data.h>
70 #include <arm/cpu_data_internal.h>
71 #include <arm/cpu_capabilities.h>
72 #include <arm/cpu_number.h>
73 #include <arm/machine_cpu.h>
74 #include <arm/misc_protos.h>
77 #if (__ARM_VMSA__ > 7)
78 #include <arm64/proc_reg.h>
79 #include <pexpert/arm64/boot.h>
82 #include <arm64/pgtrace.h>
83 #if CONFIG_PGTRACE_NONKEXT
84 #include <arm64/pgtrace_decoder.h>
85 #endif // CONFIG_PGTRACE_NONKEXT
89 #include <pexpert/device_tree.h>
91 #include <san/kasan.h>
92 #include <sys/cdefs.h>
96 int vm_footprint_suspend_allowed
= 1;
98 extern int pmap_ledgers_panic
;
99 extern int pmap_ledgers_panic_leeway
;
101 int pmap_stats_assert
= 1;
102 #define PMAP_STATS_ASSERTF(cond, pmap, fmt, ...) \
104 if (pmap_stats_assert && (pmap)->pmap_stats_assert) \
105 assertf(cond, fmt, ##__VA_ARGS__); \
107 #else /* MACH_ASSERT */
108 #define PMAP_STATS_ASSERTF(cond, pmap, fmt, ...)
109 #endif /* MACH_ASSERT */
111 #if DEVELOPMENT || DEBUG
112 #define PMAP_FOOTPRINT_SUSPENDED(pmap) \
113 (current_thread()->pmap_footprint_suspended)
114 #else /* DEVELOPMENT || DEBUG */
115 #define PMAP_FOOTPRINT_SUSPENDED(pmap) (FALSE)
116 #endif /* DEVELOPMENT || DEBUG */
120 #define pmap_ledger_debit(p, e, a) ledger_debit((p)->ledger, e, a)
121 #define pmap_ledger_credit(p, e, a) ledger_credit((p)->ledger, e, a)
124 #if DEVELOPMENT || DEBUG
125 int panic_on_unsigned_execute
= 0;
126 #endif /* DEVELOPMENT || DEBUG */
129 /* Virtual memory region for early allocation */
130 #if (__ARM_VMSA__ == 7)
131 #define VREGION1_START (VM_HIGH_KERNEL_WINDOW & ~ARM_TT_L1_PT_OFFMASK)
133 #define VREGION1_HIGH_WINDOW (PE_EARLY_BOOT_VA)
134 #define VREGION1_START ((VM_MAX_KERNEL_ADDRESS & CPUWINDOWS_BASE_MASK) - VREGION1_HIGH_WINDOW)
136 #define VREGION1_SIZE (trunc_page(VM_MAX_KERNEL_ADDRESS - (VREGION1_START)))
138 extern unsigned int not_in_kdp
;
140 extern vm_offset_t first_avail
;
142 extern pmap_paddr_t avail_start
;
143 extern pmap_paddr_t avail_end
;
145 extern vm_offset_t virtual_space_start
; /* Next available kernel VA */
146 extern vm_offset_t virtual_space_end
; /* End of kernel address space */
147 extern vm_offset_t static_memory_end
;
149 extern int hard_maxproc
;
151 #if (__ARM_VMSA__ > 7)
152 /* The number of address bits one TTBR can cover. */
153 #define PGTABLE_ADDR_BITS (64ULL - T0SZ_BOOT)
156 * The bounds on our TTBRs. These are for sanity checking that
157 * an address is accessible by a TTBR before we attempt to map it.
159 #define ARM64_TTBR0_MIN_ADDR (0ULL)
160 #define ARM64_TTBR0_MAX_ADDR (0ULL + (1ULL << PGTABLE_ADDR_BITS) - 1)
161 #define ARM64_TTBR1_MIN_ADDR (0ULL - (1ULL << PGTABLE_ADDR_BITS))
162 #define ARM64_TTBR1_MAX_ADDR (~0ULL)
164 /* The level of the root of a page table. */
165 const uint64_t arm64_root_pgtable_level
= (3 - ((PGTABLE_ADDR_BITS
- 1 - ARM_PGSHIFT
) / (ARM_PGSHIFT
- TTE_SHIFT
)));
167 /* The number of entries in the root TT of a page table. */
168 const uint64_t arm64_root_pgtable_num_ttes
= (2 << ((PGTABLE_ADDR_BITS
- 1 - ARM_PGSHIFT
) % (ARM_PGSHIFT
- TTE_SHIFT
)));
170 const uint64_t arm64_root_pgtable_level
= 0;
171 const uint64_t arm64_root_pgtable_num_ttes
= 0;
174 struct pmap kernel_pmap_store MARK_AS_PMAP_DATA
;
175 SECURITY_READ_ONLY_LATE(pmap_t
) kernel_pmap
= &kernel_pmap_store
;
177 struct vm_object pmap_object_store
__attribute__((aligned(VM_PACKED_POINTER_ALIGNMENT
))); /* store pt pages */
178 vm_object_t pmap_object
= &pmap_object_store
;
180 static struct zone
*pmap_zone
; /* zone of pmap structures */
182 decl_simple_lock_data(, pmaps_lock MARK_AS_PMAP_DATA
)
183 unsigned int pmap_stamp MARK_AS_PMAP_DATA
;
184 queue_head_t map_pmap_list MARK_AS_PMAP_DATA
;
186 decl_simple_lock_data(, pt_pages_lock MARK_AS_PMAP_DATA
)
187 queue_head_t pt_page_list MARK_AS_PMAP_DATA
; /* pt page ptd entries list */
189 decl_simple_lock_data(, pmap_pages_lock MARK_AS_PMAP_DATA
)
191 typedef struct page_free_entry
{
192 struct page_free_entry
*next
;
195 #define PAGE_FREE_ENTRY_NULL ((page_free_entry_t *) 0)
197 page_free_entry_t
*pmap_pages_reclaim_list MARK_AS_PMAP_DATA
; /* Reclaimed pt page list */
198 unsigned int pmap_pages_request_count MARK_AS_PMAP_DATA
; /* Pending requests to reclaim pt page */
199 unsigned long long pmap_pages_request_acum MARK_AS_PMAP_DATA
;
202 typedef struct tt_free_entry
{
203 struct tt_free_entry
*next
;
206 #define TT_FREE_ENTRY_NULL ((tt_free_entry_t *) 0)
208 tt_free_entry_t
*free_page_size_tt_list MARK_AS_PMAP_DATA
;
209 unsigned int free_page_size_tt_count MARK_AS_PMAP_DATA
;
210 unsigned int free_page_size_tt_max MARK_AS_PMAP_DATA
;
211 #define FREE_PAGE_SIZE_TT_MAX 4
212 tt_free_entry_t
*free_two_page_size_tt_list MARK_AS_PMAP_DATA
;
213 unsigned int free_two_page_size_tt_count MARK_AS_PMAP_DATA
;
214 unsigned int free_two_page_size_tt_max MARK_AS_PMAP_DATA
;
215 #define FREE_TWO_PAGE_SIZE_TT_MAX 4
216 tt_free_entry_t
*free_tt_list MARK_AS_PMAP_DATA
;
217 unsigned int free_tt_count MARK_AS_PMAP_DATA
;
218 unsigned int free_tt_max MARK_AS_PMAP_DATA
;
220 #define TT_FREE_ENTRY_NULL ((tt_free_entry_t *) 0)
222 boolean_t pmap_gc_allowed MARK_AS_PMAP_DATA
= TRUE
;
223 boolean_t pmap_gc_forced MARK_AS_PMAP_DATA
= FALSE
;
224 boolean_t pmap_gc_allowed_by_time_throttle
= TRUE
;
226 unsigned int inuse_user_ttepages_count MARK_AS_PMAP_DATA
= 0; /* non-root, non-leaf user pagetable pages, in units of PAGE_SIZE */
227 unsigned int inuse_user_ptepages_count MARK_AS_PMAP_DATA
= 0; /* leaf user pagetable pages, in units of PAGE_SIZE */
228 unsigned int inuse_user_tteroot_count MARK_AS_PMAP_DATA
= 0; /* root user pagetables, in units of PMAP_ROOT_ALLOC_SIZE */
229 unsigned int inuse_kernel_ttepages_count MARK_AS_PMAP_DATA
= 0; /* non-root, non-leaf kernel pagetable pages, in units of PAGE_SIZE */
230 unsigned int inuse_kernel_ptepages_count MARK_AS_PMAP_DATA
= 0; /* leaf kernel pagetable pages, in units of PAGE_SIZE */
231 unsigned int inuse_kernel_tteroot_count MARK_AS_PMAP_DATA
= 0; /* root kernel pagetables, in units of PMAP_ROOT_ALLOC_SIZE */
232 unsigned int inuse_pmap_pages_count
= 0; /* debugging */
234 SECURITY_READ_ONLY_LATE(tt_entry_t
*) invalid_tte
= 0;
235 SECURITY_READ_ONLY_LATE(pmap_paddr_t
) invalid_ttep
= 0;
237 SECURITY_READ_ONLY_LATE(tt_entry_t
*) cpu_tte
= 0; /* set by arm_vm_init() - keep out of bss */
238 SECURITY_READ_ONLY_LATE(pmap_paddr_t
) cpu_ttep
= 0; /* set by arm_vm_init() - phys tte addr */
240 #if DEVELOPMENT || DEBUG
241 int nx_enabled
= 1; /* enable no-execute protection */
242 int allow_data_exec
= 0; /* No apps may execute data */
243 int allow_stack_exec
= 0; /* No apps may execute from the stack */
244 #else /* DEVELOPMENT || DEBUG */
245 const int nx_enabled
= 1; /* enable no-execute protection */
246 const int allow_data_exec
= 0; /* No apps may execute data */
247 const int allow_stack_exec
= 0; /* No apps may execute from the stack */
248 #endif /* DEVELOPMENT || DEBUG */
251 * pv_entry_t - structure to track the active mappings for a given page
253 typedef struct pv_entry
{
254 struct pv_entry
*pve_next
; /* next alias */
255 pt_entry_t
*pve_ptep
; /* page table entry */
256 #if __arm__ && (__BIGGEST_ALIGNMENT__ > 4)
257 /* For the newer ARMv7k ABI where 64-bit types are 64-bit aligned, but pointers
259 * Since pt_desc is 64-bit aligned and we cast often from pv_entry to
262 } __attribute__ ((aligned(8))) pv_entry_t
;
267 #define PV_ENTRY_NULL ((pv_entry_t *) 0)
271 * We use the least significant bit of the "pve_next" pointer in a "pv_entry"
272 * as a marker for pages mapped through an "alternate accounting" mapping.
273 * These macros set, clear and test for this marker and extract the actual
274 * value of the "pve_next" pointer.
276 #define PVE_NEXT_ALTACCT ((uintptr_t) 0x1)
277 #define PVE_NEXT_SET_ALTACCT(pve_next_p) \
278 *(pve_next_p) = (struct pv_entry *) (((uintptr_t) *(pve_next_p)) | \
280 #define PVE_NEXT_CLR_ALTACCT(pve_next_p) \
281 *(pve_next_p) = (struct pv_entry *) (((uintptr_t) *(pve_next_p)) & \
283 #define PVE_NEXT_IS_ALTACCT(pve_next) \
284 ((((uintptr_t) (pve_next)) & PVE_NEXT_ALTACCT) ? TRUE : FALSE)
285 #define PVE_NEXT_PTR(pve_next) \
286 ((struct pv_entry *)(((uintptr_t) (pve_next)) & \
289 static void pmap_check_ledgers(pmap_t pmap
);
292 pmap_check_ledgers(__unused pmap_t pmap
)
295 #endif /* MACH_ASSERT */
297 SECURITY_READ_ONLY_LATE(pv_entry_t
* *) pv_head_table
; /* array of pv entry pointers */
299 pv_entry_t
*pv_free_list MARK_AS_PMAP_DATA
;
300 pv_entry_t
*pv_kern_free_list MARK_AS_PMAP_DATA
;
301 decl_simple_lock_data(, pv_free_list_lock MARK_AS_PMAP_DATA
)
302 decl_simple_lock_data(, pv_kern_free_list_lock MARK_AS_PMAP_DATA
)
304 decl_simple_lock_data(, phys_backup_lock
)
307 * pt_desc - structure to keep info on page assigned to page tables
309 #if (__ARM_VMSA__ == 7)
310 #define PT_INDEX_MAX 1
312 #if (ARM_PGSHIFT == 14)
313 #define PT_INDEX_MAX 1
315 #define PT_INDEX_MAX 4
319 #define PT_DESC_REFCOUNT 0x4000U
320 #define PT_DESC_IOMMU_REFCOUNT 0x8000U
322 typedef struct pt_desc
{
323 queue_chain_t pt_page
;
326 * For non-leaf pagetables, should always be PT_DESC_REFCOUNT
327 * For leaf pagetables, should reflect the number of non-empty PTEs
328 * For IOMMU pages, should always be PT_DESC_IOMMU_REFCOUNT
330 unsigned short refcnt
;
332 * For non-leaf pagetables, should be 0
333 * For leaf pagetables, should reflect the number of wired entries
334 * For IOMMU pages, may optionally reflect a driver-defined refcount (IOMMU operations are implicitly wired)
336 unsigned short wiredcnt
;
337 } pt_cnt
[PT_INDEX_MAX
];
343 } pt_map
[PT_INDEX_MAX
];
347 #define PTD_ENTRY_NULL ((pt_desc_t *) 0)
349 SECURITY_READ_ONLY_LATE(pt_desc_t
*) ptd_root_table
;
351 pt_desc_t
*ptd_free_list MARK_AS_PMAP_DATA
= PTD_ENTRY_NULL
;
352 SECURITY_READ_ONLY_LATE(boolean_t
) ptd_preboot
= TRUE
;
353 unsigned int ptd_free_count MARK_AS_PMAP_DATA
= 0;
354 decl_simple_lock_data(, ptd_free_list_lock MARK_AS_PMAP_DATA
)
357 * physical page attribute
359 typedef u_int16_t pp_attr_t
;
361 #define PP_ATTR_WIMG_MASK 0x003F
362 #define PP_ATTR_WIMG(x) ((x) & PP_ATTR_WIMG_MASK)
364 #define PP_ATTR_REFERENCED 0x0040
365 #define PP_ATTR_MODIFIED 0x0080
367 #define PP_ATTR_INTERNAL 0x0100
368 #define PP_ATTR_REUSABLE 0x0200
369 #define PP_ATTR_ALTACCT 0x0400
370 #define PP_ATTR_NOENCRYPT 0x0800
372 #define PP_ATTR_REFFAULT 0x1000
373 #define PP_ATTR_MODFAULT 0x2000
376 SECURITY_READ_ONLY_LATE(pp_attr_t
*) pp_attr_table
;
378 typedef struct pmap_io_range
{
381 uint32_t wimg
; // treated as pp_attr_t
382 } __attribute__((packed
)) pmap_io_range_t
;
384 SECURITY_READ_ONLY_LATE(pmap_io_range_t
*) io_attr_table
;
386 SECURITY_READ_ONLY_LATE(pmap_paddr_t
) vm_first_phys
= (pmap_paddr_t
) 0;
387 SECURITY_READ_ONLY_LATE(pmap_paddr_t
) vm_last_phys
= (pmap_paddr_t
) 0;
389 SECURITY_READ_ONLY_LATE(pmap_paddr_t
) io_rgn_start
= 0;
390 SECURITY_READ_ONLY_LATE(pmap_paddr_t
) io_rgn_end
= 0;
391 SECURITY_READ_ONLY_LATE(unsigned int) num_io_rgns
= 0;
393 SECURITY_READ_ONLY_LATE(boolean_t
) pmap_initialized
= FALSE
; /* Has pmap_init completed? */
395 SECURITY_READ_ONLY_LATE(uint64_t) pmap_nesting_size_min
;
396 SECURITY_READ_ONLY_LATE(uint64_t) pmap_nesting_size_max
;
398 SECURITY_READ_ONLY_LATE(vm_map_offset_t
) arm_pmap_max_offset_default
= 0x0;
399 #if defined(__arm64__)
400 SECURITY_READ_ONLY_LATE(vm_map_offset_t
) arm64_pmap_max_offset_default
= 0x0;
403 /* free address spaces (1 means free) */
404 static uint32_t asid_bitmap
[MAX_ASID
/ (sizeof(uint32_t) * NBBY
)] MARK_AS_PMAP_DATA
;
406 #if (__ARM_VMSA__ > 7)
407 SECURITY_READ_ONLY_LATE(pmap_t
) sharedpage_pmap
;
411 #define pa_index(pa) \
412 (atop((pa) - vm_first_phys))
414 #define pai_to_pvh(pai) \
415 (&pv_head_table[pai])
417 #define pa_valid(x) \
418 ((x) >= vm_first_phys && (x) < vm_last_phys)
420 /* PTE Define Macros */
422 #define pte_is_wired(pte) \
423 (((pte) & ARM_PTE_WIRED_MASK) == ARM_PTE_WIRED)
425 #define pte_set_wired(ptep, wired) \
427 SInt16 *ptd_wiredcnt_ptr; \
428 ptd_wiredcnt_ptr = (SInt16 *)&(ptep_get_ptd(ptep)->pt_cnt[ARM_PT_DESC_INDEX(ptep)].wiredcnt); \
430 *ptep |= ARM_PTE_WIRED; \
431 OSAddAtomic16(1, ptd_wiredcnt_ptr); \
433 *ptep &= ~ARM_PTE_WIRED; \
434 OSAddAtomic16(-1, ptd_wiredcnt_ptr); \
438 #define pte_was_writeable(pte) \
439 (((pte) & ARM_PTE_WRITEABLE) == ARM_PTE_WRITEABLE)
441 #define pte_set_was_writeable(pte, was_writeable) \
443 if ((was_writeable)) { \
444 (pte) |= ARM_PTE_WRITEABLE; \
446 (pte) &= ~ARM_PTE_WRITEABLE; \
450 /* PVE Define Macros */
452 #define pve_next(pve) \
455 #define pve_link_field(pve) \
458 #define pve_link(pp, e) \
459 ((pve_next(e) = pve_next(pp)), (pve_next(pp) = (e)))
461 #define pve_unlink(pp, e) \
462 (pve_next(pp) = pve_next(e))
464 /* bits held in the ptep pointer field */
466 #define pve_get_ptep(pve) \
469 #define pve_set_ptep(pve, ptep_new) \
471 (pve)->pve_ptep = (ptep_new); \
474 /* PTEP Define Macros */
476 #if (__ARM_VMSA__ == 7)
478 #define ARM_PT_DESC_INDEX_MASK 0x00000
479 #define ARM_PT_DESC_INDEX_SHIFT 0
482 * mask for page descriptor index: 4MB per page table
484 #define ARM_TT_PT_INDEX_MASK 0xfffU /* mask for page descriptor index: 4MB per page table */
487 * Shift value used for reconstructing the virtual address for a PTE.
489 #define ARM_TT_PT_ADDR_SHIFT (10U)
491 #define ptep_get_va(ptep) \
492 ((((pt_desc_t *) (pvh_list(pai_to_pvh(pa_index(ml_static_vtop((((vm_offset_t)(ptep) & ~0xFFF))))))))->pt_map[ARM_PT_DESC_INDEX(ptep)].va)+ ((((unsigned)(ptep)) & ARM_TT_PT_INDEX_MASK)<<ARM_TT_PT_ADDR_SHIFT))
494 #define ptep_get_pmap(ptep) \
495 ((((pt_desc_t *) (pvh_list(pai_to_pvh(pa_index(ml_static_vtop((((vm_offset_t)(ptep) & ~0xFFF))))))))->pmap))
499 #if (ARM_PGSHIFT == 12)
500 #define ARM_PT_DESC_INDEX_MASK ((PAGE_SHIFT_CONST == ARM_PGSHIFT )? 0x00000ULL : 0x03000ULL)
501 #define ARM_PT_DESC_INDEX_SHIFT ((PAGE_SHIFT_CONST == ARM_PGSHIFT )? 0 : 12)
503 * mask for page descriptor index: 2MB per page table
505 #define ARM_TT_PT_INDEX_MASK (0x0fffULL)
507 * Shift value used for reconstructing the virtual address for a PTE.
509 #define ARM_TT_PT_ADDR_SHIFT (9ULL)
511 /* TODO: Give this a better name/documentation than "other" */
512 #define ARM_TT_PT_OTHER_MASK (0x0fffULL)
516 #define ARM_PT_DESC_INDEX_MASK (0x00000)
517 #define ARM_PT_DESC_INDEX_SHIFT (0)
519 * mask for page descriptor index: 32MB per page table
521 #define ARM_TT_PT_INDEX_MASK (0x3fffULL)
523 * Shift value used for reconstructing the virtual address for a PTE.
525 #define ARM_TT_PT_ADDR_SHIFT (11ULL)
527 /* TODO: Give this a better name/documentation than "other" */
528 #define ARM_TT_PT_OTHER_MASK (0x3fffULL)
531 #define ARM_PT_DESC_INDEX(ptep) \
532 (((unsigned)(ptep) & ARM_PT_DESC_INDEX_MASK) >> ARM_PT_DESC_INDEX_SHIFT)
534 #define ptep_get_va(ptep) \
535 ((((pt_desc_t *) (pvh_list(pai_to_pvh(pa_index(ml_static_vtop((((vm_offset_t)(ptep) & ~ARM_TT_PT_OTHER_MASK))))))))->pt_map[ARM_PT_DESC_INDEX(ptep)].va)+ ((((unsigned)(ptep)) & ARM_TT_PT_INDEX_MASK)<<ARM_TT_PT_ADDR_SHIFT))
537 #define ptep_get_pmap(ptep) \
538 ((((pt_desc_t *) (pvh_list(pai_to_pvh(pa_index(ml_static_vtop((((vm_offset_t)(ptep) & ~ARM_TT_PT_OTHER_MASK))))))))->pmap))
542 #define ARM_PT_DESC_INDEX(ptep) \
543 (((unsigned)(ptep) & ARM_PT_DESC_INDEX_MASK) >> ARM_PT_DESC_INDEX_SHIFT)
545 #define ptep_get_ptd(ptep) \
546 ((struct pt_desc *)(pvh_list(pai_to_pvh(pa_index(ml_static_vtop((vm_offset_t)(ptep)))))))
549 /* PVH Define Macros */
552 #define PVH_TYPE_NULL 0x0UL
553 #define PVH_TYPE_PVEP 0x1UL
554 #define PVH_TYPE_PTEP 0x2UL
555 #define PVH_TYPE_PTDP 0x3UL
557 #define PVH_TYPE_MASK (0x3UL)
561 #define PVH_FLAG_IOMMU 0x4UL
562 #define PVH_FLAG_IOMMU_TABLE (1ULL << 63)
563 #define PVH_FLAG_CPU (1ULL << 62)
564 #define PVH_LOCK_BIT 61
565 #define PVH_FLAG_LOCK (1ULL << PVH_LOCK_BIT)
566 #define PVH_FLAG_EXEC (1ULL << 60)
567 #define PVH_FLAG_LOCKDOWN (1ULL << 59)
568 #define PVH_HIGH_FLAGS (PVH_FLAG_CPU | PVH_FLAG_LOCK | PVH_FLAG_EXEC | PVH_FLAG_LOCKDOWN)
570 #else /* !__arm64__ */
572 #define PVH_LOCK_BIT 31
573 #define PVH_FLAG_LOCK (1UL << PVH_LOCK_BIT)
574 #define PVH_HIGH_FLAGS PVH_FLAG_LOCK
578 #define PVH_LIST_MASK (~PVH_TYPE_MASK)
580 #define pvh_test_type(h, b) \
581 ((*(vm_offset_t *)(h) & (PVH_TYPE_MASK)) == (b))
583 #define pvh_ptep(h) \
584 ((pt_entry_t *)((*(vm_offset_t *)(h) & PVH_LIST_MASK) | PVH_HIGH_FLAGS))
586 #define pvh_list(h) \
587 ((pv_entry_t *)((*(vm_offset_t *)(h) & PVH_LIST_MASK) | PVH_HIGH_FLAGS))
589 #define pvh_get_flags(h) \
590 (*(vm_offset_t *)(h) & PVH_HIGH_FLAGS)
592 #define pvh_set_flags(h, f) \
594 __c11_atomic_store((_Atomic vm_offset_t *)(h), (*(vm_offset_t *)(h) & ~PVH_HIGH_FLAGS) | (f), \
595 memory_order_relaxed); \
598 #define pvh_update_head(h, e, t) \
600 assert(*(vm_offset_t *)(h) & PVH_FLAG_LOCK); \
601 __c11_atomic_store((_Atomic vm_offset_t *)(h), (vm_offset_t)(e) | (t) | PVH_FLAG_LOCK, \
602 memory_order_relaxed); \
605 #define pvh_update_head_unlocked(h, e, t) \
607 assert(!(*(vm_offset_t *)(h) & PVH_FLAG_LOCK)); \
608 *(vm_offset_t *)(h) = ((vm_offset_t)(e) | (t)) & ~PVH_FLAG_LOCK; \
611 #define pvh_add(h, e) \
613 assert(!pvh_test_type((h), PVH_TYPE_PTEP)); \
614 pve_next(e) = pvh_list(h); \
615 pvh_update_head((h), (e), PVH_TYPE_PVEP); \
618 #define pvh_remove(h, p, e) \
620 assert(!PVE_NEXT_IS_ALTACCT(pve_next((e)))); \
622 if (PVE_NEXT_PTR(pve_next((e))) == PV_ENTRY_NULL) { \
623 pvh_update_head((h), PV_ENTRY_NULL, PVH_TYPE_NULL); \
625 pvh_update_head((h), PVE_NEXT_PTR(pve_next((e))), PVH_TYPE_PVEP); \
630 * preserve the "alternate accounting" bit \
631 * when updating "p" (the previous entry's \
634 boolean_t __is_altacct; \
635 __is_altacct = PVE_NEXT_IS_ALTACCT(*(p)); \
636 *(p) = PVE_NEXT_PTR(pve_next((e))); \
637 if (__is_altacct) { \
638 PVE_NEXT_SET_ALTACCT((p)); \
640 PVE_NEXT_CLR_ALTACCT((p)); \
646 /* PPATTR Define Macros */
648 #define ppattr_set_bits(h, b) \
650 while (!OSCompareAndSwap16(*(pp_attr_t *)(h), *(pp_attr_t *)(h) | (b), (pp_attr_t *)(h))); \
653 #define ppattr_clear_bits(h, b) \
655 while (!OSCompareAndSwap16(*(pp_attr_t *)(h), *(pp_attr_t *)(h) & ~(b), (pp_attr_t *)(h))); \
658 #define ppattr_test_bits(h, b) \
659 ((*(pp_attr_t *)(h) & (b)) == (b))
661 #define pa_set_bits(x, b) \
664 ppattr_set_bits(&pp_attr_table[pa_index(x)], \
668 #define pa_test_bits(x, b) \
669 (pa_valid(x) ? ppattr_test_bits(&pp_attr_table[pa_index(x)],\
672 #define pa_clear_bits(x, b) \
675 ppattr_clear_bits(&pp_attr_table[pa_index(x)], \
679 #define pa_set_modify(x) \
680 pa_set_bits(x, PP_ATTR_MODIFIED)
682 #define pa_clear_modify(x) \
683 pa_clear_bits(x, PP_ATTR_MODIFIED)
685 #define pa_set_reference(x) \
686 pa_set_bits(x, PP_ATTR_REFERENCED)
688 #define pa_clear_reference(x) \
689 pa_clear_bits(x, PP_ATTR_REFERENCED)
692 #define IS_INTERNAL_PAGE(pai) \
693 ppattr_test_bits(&pp_attr_table[pai], PP_ATTR_INTERNAL)
694 #define SET_INTERNAL_PAGE(pai) \
695 ppattr_set_bits(&pp_attr_table[pai], PP_ATTR_INTERNAL)
696 #define CLR_INTERNAL_PAGE(pai) \
697 ppattr_clear_bits(&pp_attr_table[pai], PP_ATTR_INTERNAL)
699 #define IS_REUSABLE_PAGE(pai) \
700 ppattr_test_bits(&pp_attr_table[pai], PP_ATTR_REUSABLE)
701 #define SET_REUSABLE_PAGE(pai) \
702 ppattr_set_bits(&pp_attr_table[pai], PP_ATTR_REUSABLE)
703 #define CLR_REUSABLE_PAGE(pai) \
704 ppattr_clear_bits(&pp_attr_table[pai], PP_ATTR_REUSABLE)
706 #define IS_ALTACCT_PAGE(pai, pve_p) \
708 ? ppattr_test_bits(&pp_attr_table[pai], PP_ATTR_ALTACCT) \
709 : PVE_NEXT_IS_ALTACCT(pve_next((pve_p))))
710 #define SET_ALTACCT_PAGE(pai, pve_p) \
711 if ((pve_p) == NULL) { \
712 ppattr_set_bits(&pp_attr_table[pai], PP_ATTR_ALTACCT); \
714 PVE_NEXT_SET_ALTACCT(&pve_next((pve_p))); \
716 #define CLR_ALTACCT_PAGE(pai, pve_p) \
717 if ((pve_p) == NULL) { \
718 ppattr_clear_bits(&pp_attr_table[pai], PP_ATTR_ALTACCT); \
720 PVE_NEXT_CLR_ALTACCT(&pve_next((pve_p))); \
723 #define IS_REFFAULT_PAGE(pai) \
724 ppattr_test_bits(&pp_attr_table[pai], PP_ATTR_REFFAULT)
725 #define SET_REFFAULT_PAGE(pai) \
726 ppattr_set_bits(&pp_attr_table[pai], PP_ATTR_REFFAULT)
727 #define CLR_REFFAULT_PAGE(pai) \
728 ppattr_clear_bits(&pp_attr_table[pai], PP_ATTR_REFFAULT)
730 #define IS_MODFAULT_PAGE(pai) \
731 ppattr_test_bits(&pp_attr_table[pai], PP_ATTR_MODFAULT)
732 #define SET_MODFAULT_PAGE(pai) \
733 ppattr_set_bits(&pp_attr_table[pai], PP_ATTR_MODFAULT)
734 #define CLR_MODFAULT_PAGE(pai) \
735 ppattr_clear_bits(&pp_attr_table[pai], PP_ATTR_MODFAULT)
737 #define tte_get_ptd(tte) \
738 ((struct pt_desc *)(pvh_list(pai_to_pvh(pa_index((vm_offset_t)((tte) & ~PAGE_MASK))))))
741 #if (__ARM_VMSA__ == 7)
743 #define tte_index(pmap, addr) \
748 #define tt0_index(pmap, addr) \
749 (((addr) & ARM_TT_L0_INDEX_MASK) >> ARM_TT_L0_SHIFT)
751 #define tt1_index(pmap, addr) \
752 (((addr) & ARM_TT_L1_INDEX_MASK) >> ARM_TT_L1_SHIFT)
754 #define tt2_index(pmap, addr) \
755 (((addr) & ARM_TT_L2_INDEX_MASK) >> ARM_TT_L2_SHIFT)
757 #define tt3_index(pmap, addr) \
758 (((addr) & ARM_TT_L3_INDEX_MASK) >> ARM_TT_L3_SHIFT)
760 #define tte_index(pmap, addr) \
761 (((addr) & ARM_TT_L2_INDEX_MASK) >> ARM_TT_L2_SHIFT)
766 * Lock on pmap system
769 lck_grp_t pmap_lck_grp
;
771 #define PMAP_LOCK_INIT(pmap) { \
772 simple_lock_init(&(pmap)->lock, 0); \
775 #define PMAP_LOCK(pmap) { \
776 pmap_simple_lock(&(pmap)->lock); \
779 #define PMAP_UNLOCK(pmap) { \
780 pmap_simple_unlock(&(pmap)->lock); \
784 #define PMAP_ASSERT_LOCKED(pmap) { \
785 simple_lock_assert(&(pmap)->lock, LCK_ASSERT_OWNED); \
788 #define PMAP_ASSERT_LOCKED(pmap)
791 #if defined(__arm64__)
792 #define PVH_LOCK_WORD 1 /* Assumes little-endian */
794 #define PVH_LOCK_WORD 0
797 #define ASSERT_PVH_LOCKED(index) \
799 assert((vm_offset_t)(pv_head_table[index]) & PVH_FLAG_LOCK); \
802 #define LOCK_PVH(index) \
804 pmap_lock_bit((uint32_t*)(&pv_head_table[index]) + PVH_LOCK_WORD, PVH_LOCK_BIT - (PVH_LOCK_WORD * 32)); \
807 #define UNLOCK_PVH(index) \
809 ASSERT_PVH_LOCKED(index); \
810 pmap_unlock_bit((uint32_t*)(&pv_head_table[index]) + PVH_LOCK_WORD, PVH_LOCK_BIT - (PVH_LOCK_WORD * 32)); \
813 #define PMAP_UPDATE_TLBS(pmap, s, e) { \
814 flush_mmu_tlb_region_asid_async(s, (unsigned)(e - s), pmap); \
818 #ifdef __ARM_L1_PTW__
820 #define FLUSH_PTE_RANGE(spte, epte) \
821 __builtin_arm_dmb(DMB_ISH);
823 #define FLUSH_PTE(pte_p) \
824 __builtin_arm_dmb(DMB_ISH);
826 #define FLUSH_PTE_STRONG(pte_p) \
827 __builtin_arm_dsb(DSB_ISH);
829 #define FLUSH_PTE_RANGE_STRONG(spte, epte) \
830 __builtin_arm_dsb(DSB_ISH);
832 #else /* __ARM_L1_PTW */
834 #define FLUSH_PTE_RANGE(spte, epte) \
835 CleanPoU_DcacheRegion((vm_offset_t)spte, \
836 (vm_offset_t)epte - (vm_offset_t)spte);
838 #define FLUSH_PTE(pte_p) \
839 __unreachable_ok_push \
840 if (TEST_PAGE_RATIO_4) \
841 FLUSH_PTE_RANGE((pte_p), (pte_p) + 4); \
843 FLUSH_PTE_RANGE((pte_p), (pte_p) + 1); \
844 CleanPoU_DcacheRegion((vm_offset_t)pte_p, sizeof(pt_entry_t)); \
847 #define FLUSH_PTE_STRONG(pte_p) FLUSH_PTE(pte_p)
849 #define FLUSH_PTE_RANGE_STRONG(spte, epte) FLUSH_PTE_RANGE(spte, epte)
851 #endif /* !defined(__ARM_L1_PTW) */
853 #define WRITE_PTE_FAST(pte_p, pte_entry) \
854 __unreachable_ok_push \
855 if (TEST_PAGE_RATIO_4) { \
856 if (((unsigned)(pte_p)) & 0x1f) \
857 panic("WRITE_PTE\n"); \
858 if (((pte_entry) & ~ARM_PTE_COMPRESSED_MASK) == ARM_PTE_EMPTY) { \
859 *(pte_p) = (pte_entry); \
860 *((pte_p)+1) = (pte_entry); \
861 *((pte_p)+2) = (pte_entry); \
862 *((pte_p)+3) = (pte_entry); \
864 *(pte_p) = (pte_entry); \
865 *((pte_p)+1) = (pte_entry) | 0x1000; \
866 *((pte_p)+2) = (pte_entry) | 0x2000; \
867 *((pte_p)+3) = (pte_entry) | 0x3000; \
870 *(pte_p) = (pte_entry); \
874 #define WRITE_PTE(pte_p, pte_entry) \
875 WRITE_PTE_FAST(pte_p, pte_entry); \
878 #define WRITE_PTE_STRONG(pte_p, pte_entry) \
879 WRITE_PTE_FAST(pte_p, pte_entry); \
880 FLUSH_PTE_STRONG(pte_p);
883 * Other useful macros.
885 #define current_pmap() \
886 (vm_map_pmap(current_thread()->map))
889 #define VALIDATE_USER_PMAP(x)
890 #define VALIDATE_PMAP(x)
891 #define VALIDATE_LEDGER(x)
894 #if DEVELOPMENT || DEBUG
897 * Trace levels are controlled by a bitmask in which each
898 * level can be enabled/disabled by the (1<<level) position
900 * Level 1: pmap lifecycle (create/destroy/switch)
901 * Level 2: mapping lifecycle (enter/remove/protect/nest/unnest)
902 * Level 3: internal state management (tte/attributes/fast-fault)
905 SECURITY_READ_ONLY_LATE(unsigned int) pmap_trace_mask
= 0;
907 #define PMAP_TRACE(level, ...) \
908 if (__improbable((1 << (level)) & pmap_trace_mask)) { \
909 KDBG_RELEASE(__VA_ARGS__); \
913 #define PMAP_TRACE(level, ...)
919 * Internal function prototypes (forward declarations).
925 static boolean_t
pv_alloc(
933 static void pv_list_free(
938 static void ptd_bootstrap(
939 pt_desc_t
*ptdp
, unsigned int ptd_cnt
);
941 static inline pt_desc_t
*ptd_alloc_unlinked(bool reclaim
);
943 static pt_desc_t
*ptd_alloc(pmap_t pmap
, bool reclaim
);
945 static void ptd_deallocate(pt_desc_t
*ptdp
);
947 static void ptd_init(
948 pt_desc_t
*ptdp
, pmap_t pmap
, vm_map_address_t va
, unsigned int ttlevel
, pt_entry_t
* pte_p
);
950 static void pmap_zone_init(
953 static void pmap_set_reference(
956 ppnum_t
pmap_vtophys(
957 pmap_t pmap
, addr64_t va
);
959 void pmap_switch_user_ttb(
962 static void flush_mmu_tlb_region_asid_async(
963 vm_offset_t va
, unsigned length
, pmap_t pmap
);
965 static kern_return_t
pmap_expand(
966 pmap_t
, vm_map_address_t
, unsigned int options
, unsigned int level
);
968 static int pmap_remove_range(
969 pmap_t
, vm_map_address_t
, pt_entry_t
*, pt_entry_t
*, uint32_t *);
971 static int pmap_remove_range_options(
972 pmap_t
, vm_map_address_t
, pt_entry_t
*, pt_entry_t
*, uint32_t *, int);
974 static tt_entry_t
*pmap_tt1_allocate(
975 pmap_t
, vm_size_t
, unsigned int);
977 #define PMAP_TT_ALLOCATE_NOWAIT 0x1
979 static void pmap_tt1_deallocate(
980 pmap_t
, tt_entry_t
*, vm_size_t
, unsigned int);
982 #define PMAP_TT_DEALLOCATE_NOBLOCK 0x1
984 static kern_return_t
pmap_tt_allocate(
985 pmap_t
, tt_entry_t
**, unsigned int, unsigned int);
987 #define PMAP_TT_ALLOCATE_NOWAIT 0x1
989 static void pmap_tte_deallocate(
990 pmap_t
, tt_entry_t
*, unsigned int);
992 #define PMAP_TT_L1_LEVEL 0x1
993 #define PMAP_TT_L2_LEVEL 0x2
994 #define PMAP_TT_L3_LEVEL 0x3
995 #if (__ARM_VMSA__ == 7)
996 #define PMAP_TT_MAX_LEVEL PMAP_TT_L2_LEVEL
998 #define PMAP_TT_MAX_LEVEL PMAP_TT_L3_LEVEL
1001 #ifdef __ARM64_PMAP_SUBPAGE_L1__
1002 #if (__ARM_VMSA__ <= 7)
1003 #error This is not supported for old-style page tables
1005 #define PMAP_ROOT_ALLOC_SIZE (((ARM_TT_L1_INDEX_MASK >> ARM_TT_L1_SHIFT) + 1) * sizeof(tt_entry_t))
1007 #define PMAP_ROOT_ALLOC_SIZE (ARM_PGBYTES)
1010 const unsigned int arm_hardware_page_size
= ARM_PGBYTES
;
1011 const unsigned int arm_pt_desc_size
= sizeof(pt_desc_t
);
1012 const unsigned int arm_pt_root_size
= PMAP_ROOT_ALLOC_SIZE
;
1014 #define PMAP_TT_DEALLOCATE_NOBLOCK 0x1
1016 #if (__ARM_VMSA__ > 7)
1018 static inline tt_entry_t
*pmap_tt1e(
1019 pmap_t
, vm_map_address_t
);
1021 static inline tt_entry_t
*pmap_tt2e(
1022 pmap_t
, vm_map_address_t
);
1024 static inline pt_entry_t
*pmap_tt3e(
1025 pmap_t
, vm_map_address_t
);
1027 static void pmap_unmap_sharedpage(
1031 pmap_is_64bit(pmap_t
);
1035 static inline tt_entry_t
*pmap_tte(
1036 pmap_t
, vm_map_address_t
);
1038 static inline pt_entry_t
*pmap_pte(
1039 pmap_t
, vm_map_address_t
);
1041 static void pmap_update_cache_attributes_locked(
1044 boolean_t
arm_clear_fast_fault(
1046 vm_prot_t fault_type
);
1048 static pmap_paddr_t
pmap_pages_reclaim(
1051 static kern_return_t
pmap_pages_alloc(
1056 #define PMAP_PAGES_ALLOCATE_NOWAIT 0x1
1057 #define PMAP_PAGES_RECLAIM_NOWAIT 0x2
1059 static void pmap_pages_free(
1063 static void pmap_pin_kernel_pages(vm_offset_t kva
, size_t nbytes
);
1065 static void pmap_unpin_kernel_pages(vm_offset_t kva
, size_t nbytes
);
1068 static void pmap_trim_self(pmap_t pmap
);
1069 static void pmap_trim_subord(pmap_t subord
);
1071 #define PMAP_SUPPORT_PROTOTYPES(__return_type, __function_name, __function_args, __function_index) \
1072 static __return_type __function_name##_internal __function_args;
1074 PMAP_SUPPORT_PROTOTYPES(
1076 arm_fast_fault
, (pmap_t pmap
,
1077 vm_map_address_t va
,
1078 vm_prot_t fault_type
,
1079 boolean_t from_user
), ARM_FAST_FAULT_INDEX
);
1082 PMAP_SUPPORT_PROTOTYPES(
1084 arm_force_fast_fault
, (ppnum_t ppnum
,
1085 vm_prot_t allow_mode
,
1086 int options
), ARM_FORCE_FAST_FAULT_INDEX
);
1088 PMAP_SUPPORT_PROTOTYPES(
1090 mapping_free_prime
, (void), MAPPING_FREE_PRIME_INDEX
);
1092 PMAP_SUPPORT_PROTOTYPES(
1094 mapping_replenish
, (void), MAPPING_REPLENISH_INDEX
);
1096 PMAP_SUPPORT_PROTOTYPES(
1098 pmap_batch_set_cache_attributes
, (ppnum_t pn
,
1099 unsigned int cacheattr
,
1100 unsigned int page_cnt
,
1101 unsigned int page_index
,
1103 unsigned int *res
), PMAP_BATCH_SET_CACHE_ATTRIBUTES_INDEX
);
1105 PMAP_SUPPORT_PROTOTYPES(
1107 pmap_change_wiring
, (pmap_t pmap
,
1109 boolean_t wired
), PMAP_CHANGE_WIRING_INDEX
);
1111 PMAP_SUPPORT_PROTOTYPES(
1113 pmap_create
, (ledger_t ledger
,
1115 boolean_t is_64bit
), PMAP_CREATE_INDEX
);
1117 PMAP_SUPPORT_PROTOTYPES(
1119 pmap_destroy
, (pmap_t pmap
), PMAP_DESTROY_INDEX
);
1121 PMAP_SUPPORT_PROTOTYPES(
1123 pmap_enter_options
, (pmap_t pmap
,
1127 vm_prot_t fault_type
,
1130 unsigned int options
), PMAP_ENTER_OPTIONS_INDEX
);
1132 PMAP_SUPPORT_PROTOTYPES(
1134 pmap_extract
, (pmap_t pmap
,
1135 vm_map_address_t va
), PMAP_EXTRACT_INDEX
);
1137 PMAP_SUPPORT_PROTOTYPES(
1139 pmap_find_phys
, (pmap_t pmap
,
1140 addr64_t va
), PMAP_FIND_PHYS_INDEX
);
1142 #if (__ARM_VMSA__ > 7)
1143 PMAP_SUPPORT_PROTOTYPES(
1145 pmap_insert_sharedpage
, (pmap_t pmap
), PMAP_INSERT_SHAREDPAGE_INDEX
);
1149 PMAP_SUPPORT_PROTOTYPES(
1151 pmap_is_empty
, (pmap_t pmap
,
1152 vm_map_offset_t va_start
,
1153 vm_map_offset_t va_end
), PMAP_IS_EMPTY_INDEX
);
1156 PMAP_SUPPORT_PROTOTYPES(
1158 pmap_map_cpu_windows_copy
, (ppnum_t pn
,
1160 unsigned int wimg_bits
), PMAP_MAP_CPU_WINDOWS_COPY_INDEX
);
1162 PMAP_SUPPORT_PROTOTYPES(
1164 pmap_nest
, (pmap_t grand
,
1168 uint64_t size
), PMAP_NEST_INDEX
);
1170 PMAP_SUPPORT_PROTOTYPES(
1172 pmap_page_protect_options
, (ppnum_t ppnum
,
1174 unsigned int options
), PMAP_PAGE_PROTECT_OPTIONS_INDEX
);
1176 PMAP_SUPPORT_PROTOTYPES(
1178 pmap_protect_options
, (pmap_t pmap
,
1179 vm_map_address_t start
,
1180 vm_map_address_t end
,
1182 unsigned int options
,
1183 void *args
), PMAP_PROTECT_OPTIONS_INDEX
);
1185 PMAP_SUPPORT_PROTOTYPES(
1187 pmap_query_page_info
, (pmap_t pmap
,
1189 int *disp_p
), PMAP_QUERY_PAGE_INFO_INDEX
);
1191 PMAP_SUPPORT_PROTOTYPES(
1193 pmap_query_resident
, (pmap_t pmap
,
1194 vm_map_address_t start
,
1195 vm_map_address_t end
,
1196 mach_vm_size_t
* compressed_bytes_p
), PMAP_QUERY_RESIDENT_INDEX
);
1198 PMAP_SUPPORT_PROTOTYPES(
1200 pmap_reference
, (pmap_t pmap
), PMAP_REFERENCE_INDEX
);
1202 PMAP_SUPPORT_PROTOTYPES(
1204 pmap_remove_options
, (pmap_t pmap
,
1205 vm_map_address_t start
,
1206 vm_map_address_t end
,
1207 int options
), PMAP_REMOVE_OPTIONS_INDEX
);
1209 PMAP_SUPPORT_PROTOTYPES(
1211 pmap_return
, (boolean_t do_panic
,
1212 boolean_t do_recurse
), PMAP_RETURN_INDEX
);
1214 PMAP_SUPPORT_PROTOTYPES(
1216 pmap_set_cache_attributes
, (ppnum_t pn
,
1217 unsigned int cacheattr
), PMAP_SET_CACHE_ATTRIBUTES_INDEX
);
1219 PMAP_SUPPORT_PROTOTYPES(
1221 pmap_update_compressor_page
, (ppnum_t pn
,
1222 unsigned int prev_cacheattr
, unsigned int new_cacheattr
), PMAP_UPDATE_COMPRESSOR_PAGE_INDEX
);
1224 PMAP_SUPPORT_PROTOTYPES(
1226 pmap_set_nested
, (pmap_t pmap
), PMAP_SET_NESTED_INDEX
);
1229 PMAP_SUPPORT_PROTOTYPES(
1231 pmap_set_process
, (pmap_t pmap
,
1233 char *procname
), PMAP_SET_PROCESS_INDEX
);
1236 PMAP_SUPPORT_PROTOTYPES(
1238 pmap_unmap_cpu_windows_copy
, (unsigned int index
), PMAP_UNMAP_CPU_WINDOWS_COPY_INDEX
);
1240 PMAP_SUPPORT_PROTOTYPES(
1242 pmap_unnest_options
, (pmap_t grand
,
1245 unsigned int option
), PMAP_UNNEST_OPTIONS_INDEX
);
1248 PMAP_SUPPORT_PROTOTYPES(
1250 phys_attribute_set
, (ppnum_t pn
,
1251 unsigned int bits
), PHYS_ATTRIBUTE_SET_INDEX
);
1254 PMAP_SUPPORT_PROTOTYPES(
1256 phys_attribute_clear
, (ppnum_t pn
,
1259 void *arg
), PHYS_ATTRIBUTE_CLEAR_INDEX
);
1261 PMAP_SUPPORT_PROTOTYPES(
1263 pmap_switch
, (pmap_t pmap
), PMAP_SWITCH_INDEX
);
1265 PMAP_SUPPORT_PROTOTYPES(
1267 pmap_switch_user_ttb
, (pmap_t pmap
), PMAP_SWITCH_USER_TTB_INDEX
);
1269 PMAP_SUPPORT_PROTOTYPES(
1271 pmap_clear_user_ttb
, (void), PMAP_CLEAR_USER_TTB_INDEX
);
1274 PMAP_SUPPORT_PROTOTYPES(
1276 pmap_set_jit_entitled
, (pmap_t pmap
), PMAP_SET_JIT_ENTITLED_INDEX
);
1278 PMAP_SUPPORT_PROTOTYPES(
1280 pmap_trim
, (pmap_t grand
,
1284 uint64_t size
), PMAP_TRIM_INDEX
);
1290 void pmap_footprint_suspend(vm_map_t map
,
1292 PMAP_SUPPORT_PROTOTYPES(
1294 pmap_footprint_suspend
, (vm_map_t map
,
1296 PMAP_FOOTPRINT_SUSPEND_INDEX
);
1300 boolean_t pgtrace_enabled
= 0;
1303 queue_chain_t chain
;
1306 * pmap - pmap for below addresses
1307 * ova - original va page address
1308 * cva - clone va addresses for pre, target and post pages
1309 * cva_spte - clone saved ptes
1310 * range - trace range in this map
1311 * cloned - has been cloned or not
1314 vm_map_offset_t ova
;
1315 vm_map_offset_t cva
[3];
1316 pt_entry_t cva_spte
[3];
1322 } pmap_pgtrace_map_t
;
1324 static void pmap_pgtrace_init(void);
1325 static bool pmap_pgtrace_enter_clone(pmap_t pmap
, vm_map_offset_t va_page
, vm_map_offset_t start
, vm_map_offset_t end
);
1326 static void pmap_pgtrace_remove_clone(pmap_t pmap
, pmap_paddr_t pa_page
, vm_map_offset_t va_page
);
1327 static void pmap_pgtrace_remove_all_clone(pmap_paddr_t pa
);
1330 #if (__ARM_VMSA__ > 7)
1332 * The low global vector page is mapped at a fixed alias.
1333 * Since the page size is 16k for H8 and newer we map the globals to a 16k
1334 * aligned address. Readers of the globals (e.g. lldb, panic server) need
1335 * to check both addresses anyway for backward compatibility. So for now
1336 * we leave H6 and H7 where they were.
1338 #if (ARM_PGSHIFT == 14)
1339 #define LOWGLOBAL_ALIAS (LOW_GLOBAL_BASE_ADDRESS + 0x4000)
1341 #define LOWGLOBAL_ALIAS (LOW_GLOBAL_BASE_ADDRESS + 0x2000)
1345 #define LOWGLOBAL_ALIAS (0xFFFF1000)
1348 long long alloc_tteroot_count
__attribute__((aligned(8))) MARK_AS_PMAP_DATA
= 0LL;
1349 long long alloc_ttepages_count
__attribute__((aligned(8))) MARK_AS_PMAP_DATA
= 0LL;
1350 long long alloc_ptepages_count
__attribute__((aligned(8))) MARK_AS_PMAP_DATA
= 0LL;
1351 long long alloc_pmap_pages_count
__attribute__((aligned(8))) = 0LL;
1353 int pt_fake_zone_index
= -1; /* index of pmap fake zone */
1358 * Allocates and initializes a per-CPU data structure for the pmap.
1360 MARK_AS_PMAP_TEXT
static void
1361 pmap_cpu_data_init_internal(unsigned int cpu_number
)
1363 pmap_cpu_data_t
* pmap_cpu_data
= pmap_get_cpu_data();
1365 pmap_cpu_data
->cpu_number
= cpu_number
;
1369 pmap_cpu_data_init(void)
1371 pmap_cpu_data_init_internal(cpu_number());
1375 pmap_cpu_data_array_init(void)
1378 pmap_cpu_data_init();
1382 pmap_get_cpu_data(void)
1384 pmap_cpu_data_t
* pmap_cpu_data
= NULL
;
1386 pmap_cpu_data
= &getCpuDatap()->cpu_pmap_cpu_data
;
1388 return pmap_cpu_data
;
1397 boolean_t found_page
;
1403 * pmap_pages_reclaim() is returning a page by freeing an active pt page.
1404 * To be eligible, a pt page is assigned to a user pmap. It doesn't have any wired pte
1405 * entry and it contains at least one valid pte entry.
1407 * In a loop, check for a page in the reclaimed pt page list.
1408 * if one is present, unlink that page and return the physical page address.
1409 * Otherwise, scan the pt page list for an eligible pt page to reclaim.
1410 * If found, invoke pmap_remove_range() on its pmap and address range then
1411 * deallocates that pt page. This will end up adding the pt page to the
1412 * reclaimed pt page list.
1413 * If no eligible page were found in the pt page list, panic.
1416 pmap_simple_lock(&pmap_pages_lock
);
1417 pmap_pages_request_count
++;
1418 pmap_pages_request_acum
++;
1421 if (pmap_pages_reclaim_list
!= (page_free_entry_t
*)NULL
) {
1422 page_free_entry_t
*page_entry
;
1424 page_entry
= pmap_pages_reclaim_list
;
1425 pmap_pages_reclaim_list
= pmap_pages_reclaim_list
->next
;
1426 pmap_simple_unlock(&pmap_pages_lock
);
1428 return (pmap_paddr_t
)ml_static_vtop((vm_offset_t
)page_entry
);
1431 pmap_simple_unlock(&pmap_pages_lock
);
1433 pmap_simple_lock(&pt_pages_lock
);
1434 ptdp
= (pt_desc_t
*)queue_first(&pt_page_list
);
1437 while (!queue_end(&pt_page_list
, (queue_entry_t
)ptdp
)) {
1438 if ((ptdp
->pmap
->nested
== FALSE
)
1439 && (pmap_simple_lock_try(&ptdp
->pmap
->lock
))) {
1440 assert(ptdp
->pmap
!= kernel_pmap
);
1441 unsigned refcnt_acc
= 0;
1442 unsigned wiredcnt_acc
= 0;
1444 for (i
= 0; i
< PT_INDEX_MAX
; i
++) {
1445 if (ptdp
->pt_cnt
[i
].refcnt
== PT_DESC_REFCOUNT
) {
1446 /* Do not attempt to free a page that contains an L2 table */
1450 refcnt_acc
+= ptdp
->pt_cnt
[i
].refcnt
;
1451 wiredcnt_acc
+= ptdp
->pt_cnt
[i
].wiredcnt
;
1453 if ((wiredcnt_acc
== 0) && (refcnt_acc
!= 0)) {
1455 /* Leave ptdp->pmap locked here. We're about to reclaim
1456 * a tt page from it, so we don't want anyone else messing
1457 * with it while we do that. */
1460 pmap_simple_unlock(&ptdp
->pmap
->lock
);
1462 ptdp
= (pt_desc_t
*)queue_next((queue_t
)ptdp
);
1465 panic("pmap_pages_reclaim(): No eligible page in pt_page_list\n");
1467 int remove_count
= 0;
1468 vm_map_address_t va
;
1470 pt_entry_t
*bpte
, *epte
;
1473 uint32_t rmv_spte
= 0;
1475 pmap_simple_unlock(&pt_pages_lock
);
1477 PMAP_ASSERT_LOCKED(pmap
); // pmap lock should be held from loop above
1478 for (i
= 0; i
< PT_INDEX_MAX
; i
++) {
1479 va
= ptdp
->pt_map
[i
].va
;
1481 /* If the VA is bogus, this may represent an unallocated region
1482 * or one which is in transition (already being freed or expanded).
1483 * Don't try to remove mappings here. */
1484 if (va
== (vm_offset_t
)-1) {
1488 tte_p
= pmap_tte(pmap
, va
);
1489 if ((tte_p
!= (tt_entry_t
*) NULL
)
1490 && ((*tte_p
& ARM_TTE_TYPE_MASK
) == ARM_TTE_TYPE_TABLE
)) {
1491 #if (__ARM_VMSA__ == 7)
1492 pte_p
= (pt_entry_t
*) ttetokv(*tte_p
);
1493 bpte
= &pte_p
[ptenum(va
)];
1494 epte
= bpte
+ PAGE_SIZE
/ sizeof(pt_entry_t
);
1496 pte_p
= (pt_entry_t
*) ttetokv(*tte_p
);
1497 bpte
= &pte_p
[tt3_index(pmap
, va
)];
1498 epte
= bpte
+ PAGE_SIZE
/ sizeof(pt_entry_t
);
1501 * Use PMAP_OPTIONS_REMOVE to clear any
1502 * "compressed" markers and update the
1503 * "compressed" counter in pmap->stats.
1504 * This means that we lose accounting for
1505 * any compressed pages in this range
1506 * but the alternative is to not be able
1507 * to account for their future decompression,
1508 * which could cause the counter to drift
1511 remove_count
+= pmap_remove_range_options(
1512 pmap
, va
, bpte
, epte
,
1513 &rmv_spte
, PMAP_OPTIONS_REMOVE
);
1514 if (ptdp
->pt_cnt
[ARM_PT_DESC_INDEX(pte_p
)].refcnt
!= 0) {
1515 panic("pmap_pages_reclaim(): ptdp %p, count %d\n", ptdp
, ptdp
->pt_cnt
[ARM_PT_DESC_INDEX(pte_p
)].refcnt
);
1517 #if (__ARM_VMSA__ == 7)
1518 pmap_tte_deallocate(pmap
, tte_p
, PMAP_TT_L1_LEVEL
);
1519 flush_mmu_tlb_entry_async((va
& ~ARM_TT_L1_PT_OFFMASK
) | (pmap
->asid
& 0xff));
1520 flush_mmu_tlb_entry_async(((va
& ~ARM_TT_L1_PT_OFFMASK
) + ARM_TT_L1_SIZE
) | (pmap
->asid
& 0xff));
1521 flush_mmu_tlb_entry_async(((va
& ~ARM_TT_L1_PT_OFFMASK
) + 2 * ARM_TT_L1_SIZE
) | (pmap
->asid
& 0xff));
1522 flush_mmu_tlb_entry_async(((va
& ~ARM_TT_L1_PT_OFFMASK
) + 3 * ARM_TT_L1_SIZE
) | (pmap
->asid
& 0xff));
1524 pmap_tte_deallocate(pmap
, tte_p
, PMAP_TT_L2_LEVEL
);
1525 flush_mmu_tlb_entry_async(tlbi_addr(va
& ~ARM_TT_L2_OFFMASK
) | tlbi_asid(pmap
->asid
));
1528 if (remove_count
> 0) {
1529 #if (__ARM_VMSA__ == 7)
1530 flush_mmu_tlb_region_asid_async(va
, 4 * ARM_TT_L1_SIZE
, pmap
);
1532 flush_mmu_tlb_region_asid_async(va
, ARM_TT_L2_SIZE
, pmap
);
1538 // Undo the lock we grabbed when we found ptdp above
1541 pmap_simple_lock(&pmap_pages_lock
);
1546 static kern_return_t
1552 vm_page_t m
= VM_PAGE_NULL
, m_prev
;
1554 if (option
& PMAP_PAGES_RECLAIM_NOWAIT
) {
1555 assert(size
== PAGE_SIZE
);
1556 *pa
= pmap_pages_reclaim();
1557 return KERN_SUCCESS
;
1559 if (size
== PAGE_SIZE
) {
1560 while ((m
= vm_page_grab()) == VM_PAGE_NULL
) {
1561 if (option
& PMAP_PAGES_ALLOCATE_NOWAIT
) {
1562 return KERN_RESOURCE_SHORTAGE
;
1567 vm_page_lock_queues();
1568 vm_page_wire(m
, VM_KERN_MEMORY_PTE
, TRUE
);
1569 vm_page_unlock_queues();
1571 if (size
== 2 * PAGE_SIZE
) {
1572 while (cpm_allocate(size
, &m
, 0, 1, TRUE
, 0) != KERN_SUCCESS
) {
1573 if (option
& PMAP_PAGES_ALLOCATE_NOWAIT
) {
1574 return KERN_RESOURCE_SHORTAGE
;
1581 *pa
= (pmap_paddr_t
)ptoa(VM_PAGE_GET_PHYS_PAGE(m
));
1583 vm_object_lock(pmap_object
);
1584 while (m
!= VM_PAGE_NULL
) {
1585 vm_page_insert_wired(m
, pmap_object
, (vm_object_offset_t
) ((ptoa(VM_PAGE_GET_PHYS_PAGE(m
))) - gPhysBase
), VM_KERN_MEMORY_PTE
);
1587 m
= NEXT_PAGE(m_prev
);
1588 *(NEXT_PAGE_PTR(m_prev
)) = VM_PAGE_NULL
;
1590 vm_object_unlock(pmap_object
);
1592 OSAddAtomic(size
>> PAGE_SHIFT
, &inuse_pmap_pages_count
);
1593 OSAddAtomic64(size
>> PAGE_SHIFT
, &alloc_pmap_pages_count
);
1595 return KERN_SUCCESS
;
1604 pmap_simple_lock(&pmap_pages_lock
);
1606 if (pmap_pages_request_count
!= 0) {
1607 page_free_entry_t
*page_entry
;
1609 pmap_pages_request_count
--;
1610 page_entry
= (page_free_entry_t
*)phystokv(pa
);
1611 page_entry
->next
= pmap_pages_reclaim_list
;
1612 pmap_pages_reclaim_list
= page_entry
;
1613 pmap_simple_unlock(&pmap_pages_lock
);
1618 pmap_simple_unlock(&pmap_pages_lock
);
1621 pmap_paddr_t pa_max
;
1623 OSAddAtomic(-(size
>> PAGE_SHIFT
), &inuse_pmap_pages_count
);
1625 for (pa_max
= pa
+ size
; pa
< pa_max
; pa
= pa
+ PAGE_SIZE
) {
1626 vm_object_lock(pmap_object
);
1627 m
= vm_page_lookup(pmap_object
, (pa
- gPhysBase
));
1628 assert(m
!= VM_PAGE_NULL
);
1629 assert(VM_PAGE_WIRED(m
));
1630 vm_page_lock_queues();
1632 vm_page_unlock_queues();
1633 vm_object_unlock(pmap_object
);
1639 pmap_t pmap
, int bytes
)
1641 pmap_ledger_credit(pmap
, task_ledgers
.tkm_private
, bytes
);
1649 pmap_ledger_debit(pmap
, task_ledgers
.tkm_private
, bytes
);
1653 pmap_tt_ledger_credit(
1657 if (pmap
!= kernel_pmap
) {
1658 pmap_ledger_credit(pmap
, task_ledgers
.phys_footprint
, size
);
1659 pmap_ledger_credit(pmap
, task_ledgers
.page_table
, size
);
1664 pmap_tt_ledger_debit(
1668 if (pmap
!= kernel_pmap
) {
1669 pmap_ledger_debit(pmap
, task_ledgers
.phys_footprint
, size
);
1670 pmap_ledger_debit(pmap
, task_ledgers
.page_table
, size
);
1678 unsigned int asid_bitmap_index
;
1680 pmap_simple_lock(&pmaps_lock
);
1681 for (asid_bitmap_index
= 0; asid_bitmap_index
< (MAX_ASID
/ (sizeof(uint32_t) * NBBY
)); asid_bitmap_index
++) {
1682 unsigned int temp
= ffs(asid_bitmap
[asid_bitmap_index
]);
1685 asid_bitmap
[asid_bitmap_index
] &= ~(1 << temp
);
1686 #if __ARM_KERNEL_PROTECT__
1688 * We need two ASIDs: n and (n | 1). n is used for EL0,
1691 unsigned int temp2
= temp
| 1;
1692 assert(temp2
< MAX_ASID
);
1694 assert(temp2
!= temp
);
1695 assert(asid_bitmap
[asid_bitmap_index
] & (1 << temp2
));
1697 /* Grab the second ASID. */
1698 asid_bitmap
[asid_bitmap_index
] &= ~(1 << temp2
);
1699 #endif /* __ARM_KERNEL_PROTECT__ */
1700 pmap_simple_unlock(&pmaps_lock
);
1703 * We should never vend out physical ASID 0 through this
1704 * method, as it belongs to the kernel.
1706 assert(((asid_bitmap_index
* sizeof(uint32_t) * NBBY
+ temp
) % ARM_MAX_ASID
) != 0);
1708 #if __ARM_KERNEL_PROTECT__
1709 /* Or the kernel EL1 ASID. */
1710 assert(((asid_bitmap_index
* sizeof(uint32_t) * NBBY
+ temp
) % ARM_MAX_ASID
) != 1);
1711 #endif /* __ARM_KERNEL_PROTECT__ */
1713 return asid_bitmap_index
* sizeof(uint32_t) * NBBY
+ temp
;
1716 pmap_simple_unlock(&pmaps_lock
);
1718 * ToDo: Add code to deal with pmap with no asid panic for now. Not
1719 * an issue with the small config process hard limit
1721 panic("alloc_asid(): out of ASID number");
1729 /* Don't free up any alias of physical ASID 0. */
1730 assert((asid
% ARM_MAX_ASID
) != 0);
1732 pmap_simple_lock(&pmaps_lock
);
1733 setbit(asid
, (int *) asid_bitmap
);
1735 #if __ARM_KERNEL_PROTECT__
1736 assert((asid
| 1) < MAX_ASID
);
1737 assert((asid
| 1) != asid
);
1738 setbit(asid
| 1, (int *) asid_bitmap
);
1739 #endif /* __ARM_KERNEL_PROTECT__ */
1741 pmap_simple_unlock(&pmaps_lock
);
1744 #ifndef PMAP_PV_LOAD_FACTOR
1745 #define PMAP_PV_LOAD_FACTOR 1
1748 #define PV_LOW_WATER_MARK_DEFAULT (0x200 * PMAP_PV_LOAD_FACTOR)
1749 #define PV_KERN_LOW_WATER_MARK_DEFAULT (0x200 * PMAP_PV_LOAD_FACTOR)
1750 #define PV_ALLOC_CHUNK_INITIAL (0x200 * PMAP_PV_LOAD_FACTOR)
1751 #define PV_KERN_ALLOC_CHUNK_INITIAL (0x200 * PMAP_PV_LOAD_FACTOR)
1752 #define PV_ALLOC_INITIAL_TARGET (PV_ALLOC_CHUNK_INITIAL * 5)
1753 #define PV_KERN_ALLOC_INITIAL_TARGET (PV_KERN_ALLOC_CHUNK_INITIAL)
1755 uint32_t pv_free_count MARK_AS_PMAP_DATA
= 0;
1756 uint32_t pv_page_count MARK_AS_PMAP_DATA
= 0;
1757 uint32_t pv_kern_free_count MARK_AS_PMAP_DATA
= 0;
1759 uint32_t pv_low_water_mark MARK_AS_PMAP_DATA
;
1760 uint32_t pv_kern_low_water_mark MARK_AS_PMAP_DATA
;
1761 uint32_t pv_alloc_chunk MARK_AS_PMAP_DATA
;
1762 uint32_t pv_kern_alloc_chunk MARK_AS_PMAP_DATA
;
1764 thread_t mapping_replenish_thread
;
1765 event_t mapping_replenish_event
;
1766 event_t pmap_user_pv_throttle_event
;
1767 volatile uint32_t mappingrecurse
= 0;
1769 uint64_t pmap_pv_throttle_stat
;
1770 uint64_t pmap_pv_throttled_waiters
;
1772 unsigned pmap_mapping_thread_wakeups
;
1773 unsigned pmap_kernel_reserve_replenish_stat MARK_AS_PMAP_DATA
;
1774 unsigned pmap_user_reserve_replenish_stat MARK_AS_PMAP_DATA
;
1775 unsigned pmap_kern_reserve_alloc_stat MARK_AS_PMAP_DATA
;
1782 simple_lock_init(&pv_free_list_lock
, 0);
1783 simple_lock_init(&pv_kern_free_list_lock
, 0);
1784 pv_free_list
= PV_ENTRY_NULL
;
1785 pv_free_count
= 0x0U
;
1786 pv_kern_free_list
= PV_ENTRY_NULL
;
1787 pv_kern_free_count
= 0x0U
;
1790 static inline void PV_ALLOC(pv_entry_t
**pv_ep
);
1791 static inline void PV_KERN_ALLOC(pv_entry_t
**pv_e
);
1792 static inline void PV_FREE_LIST(pv_entry_t
*pv_eh
, pv_entry_t
*pv_et
, int pv_cnt
);
1793 static inline void PV_KERN_FREE_LIST(pv_entry_t
*pv_eh
, pv_entry_t
*pv_et
, int pv_cnt
);
1795 static inline void pmap_pv_throttle(pmap_t p
);
1804 PMAP_ASSERT_LOCKED(pmap
);
1806 ASSERT_PVH_LOCKED(pai
);
1808 if (PV_ENTRY_NULL
== *pvepp
) {
1809 if ((pmap
== NULL
) || (kernel_pmap
== pmap
)) {
1810 PV_KERN_ALLOC(pvepp
);
1812 if (PV_ENTRY_NULL
== *pvepp
) {
1826 ret
= pmap_pages_alloc(&pa
, PAGE_SIZE
, PMAP_PAGES_ALLOCATE_NOWAIT
);
1828 if (ret
== KERN_RESOURCE_SHORTAGE
) {
1829 ret
= pmap_pages_alloc(&pa
, PAGE_SIZE
, PMAP_PAGES_RECLAIM_NOWAIT
);
1832 if (ret
!= KERN_SUCCESS
) {
1833 panic("%s: failed to alloc page for kernel, ret=%d, "
1834 "pmap=%p, pai=%u, pvepp=%p",
1841 pv_e
= (pv_entry_t
*)phystokv(pa
);
1843 pv_eh
= pv_et
= PV_ENTRY_NULL
;
1847 for (j
= 1; j
< (PAGE_SIZE
/ sizeof(pv_entry_t
)); j
++) {
1848 pv_e
->pve_next
= pv_eh
;
1851 if (pv_et
== PV_ENTRY_NULL
) {
1857 PV_KERN_FREE_LIST(pv_eh
, pv_et
, pv_cnt
);
1867 pmap_pv_throttle(pmap
);
1877 ret
= pmap_pages_alloc(&pa
, PAGE_SIZE
, 0);
1879 if (ret
!= KERN_SUCCESS
) {
1880 panic("%s: failed to alloc page, ret=%d, "
1881 "pmap=%p, pai=%u, pvepp=%p",
1888 pv_e
= (pv_entry_t
*)phystokv(pa
);
1890 pv_eh
= pv_et
= PV_ENTRY_NULL
;
1894 for (j
= 1; j
< (PAGE_SIZE
/ sizeof(pv_entry_t
)); j
++) {
1895 pv_e
->pve_next
= pv_eh
;
1898 if (pv_et
== PV_ENTRY_NULL
) {
1904 PV_FREE_LIST(pv_eh
, pv_et
, pv_cnt
);
1911 assert(PV_ENTRY_NULL
!= *pvepp
);
1919 PV_FREE_LIST(pvep
, pvep
, 1);
1928 PV_FREE_LIST(pvehp
, pvetp
, cnt
);
1932 pv_water_mark_check(void)
1934 if ((pv_free_count
< pv_low_water_mark
) || (pv_kern_free_count
< pv_kern_low_water_mark
)) {
1935 if (!mappingrecurse
&& hw_compare_and_store(0, 1, &mappingrecurse
)) {
1936 thread_wakeup(&mapping_replenish_event
);
1942 PV_ALLOC(pv_entry_t
**pv_ep
)
1944 assert(*pv_ep
== PV_ENTRY_NULL
);
1945 pmap_simple_lock(&pv_free_list_lock
);
1947 * If the kernel reserved pool is low, let non-kernel mappings allocate
1948 * synchronously, possibly subject to a throttle.
1950 if ((pv_kern_free_count
>= pv_kern_low_water_mark
) && ((*pv_ep
= pv_free_list
) != 0)) {
1951 pv_free_list
= (pv_entry_t
*)(*pv_ep
)->pve_next
;
1952 (*pv_ep
)->pve_next
= PV_ENTRY_NULL
;
1956 pmap_simple_unlock(&pv_free_list_lock
);
1960 PV_FREE_LIST(pv_entry_t
*pv_eh
, pv_entry_t
*pv_et
, int pv_cnt
)
1962 pmap_simple_lock(&pv_free_list_lock
);
1963 pv_et
->pve_next
= (pv_entry_t
*)pv_free_list
;
1964 pv_free_list
= pv_eh
;
1965 pv_free_count
+= pv_cnt
;
1966 pmap_simple_unlock(&pv_free_list_lock
);
1970 PV_KERN_ALLOC(pv_entry_t
**pv_e
)
1972 assert(*pv_e
== PV_ENTRY_NULL
);
1973 pmap_simple_lock(&pv_kern_free_list_lock
);
1975 if ((*pv_e
= pv_kern_free_list
) != 0) {
1976 pv_kern_free_list
= (pv_entry_t
*)(*pv_e
)->pve_next
;
1977 (*pv_e
)->pve_next
= PV_ENTRY_NULL
;
1978 pv_kern_free_count
--;
1979 pmap_kern_reserve_alloc_stat
++;
1982 pmap_simple_unlock(&pv_kern_free_list_lock
);
1986 PV_KERN_FREE_LIST(pv_entry_t
*pv_eh
, pv_entry_t
*pv_et
, int pv_cnt
)
1988 pmap_simple_lock(&pv_kern_free_list_lock
);
1989 pv_et
->pve_next
= pv_kern_free_list
;
1990 pv_kern_free_list
= pv_eh
;
1991 pv_kern_free_count
+= pv_cnt
;
1992 pmap_simple_unlock(&pv_kern_free_list_lock
);
1996 pmap_pv_throttle(__unused pmap_t p
)
1998 assert(p
!= kernel_pmap
);
1999 /* Apply throttle on non-kernel mappings */
2000 if (pv_kern_free_count
< (pv_kern_low_water_mark
/ 2)) {
2001 pmap_pv_throttle_stat
++;
2002 /* This doesn't need to be strictly accurate, merely a hint
2003 * to eliminate the timeout when the reserve is replenished.
2005 pmap_pv_throttled_waiters
++;
2006 assert_wait_timeout(&pmap_user_pv_throttle_event
, THREAD_UNINT
, 1, 1000 * NSEC_PER_USEC
);
2007 thread_block(THREAD_CONTINUE_NULL
);
2012 * Creates a target number of free pv_entry_t objects for the kernel free list
2013 * and the general free list.
2015 MARK_AS_PMAP_TEXT
static kern_return_t
2016 mapping_free_prime_internal(void)
2025 int alloc_options
= 0;
2026 int needed_pv_cnt
= 0;
2027 int target_pv_free_cnt
= 0;
2029 SECURITY_READ_ONLY_LATE(static boolean_t
) mapping_free_prime_internal_called
= FALSE
;
2030 SECURITY_READ_ONLY_LATE(static boolean_t
) mapping_free_prime_internal_done
= FALSE
;
2032 if (mapping_free_prime_internal_done
) {
2033 return KERN_FAILURE
;
2036 if (!mapping_free_prime_internal_called
) {
2037 mapping_free_prime_internal_called
= TRUE
;
2039 pv_low_water_mark
= PV_LOW_WATER_MARK_DEFAULT
;
2041 /* Alterable via sysctl */
2042 pv_kern_low_water_mark
= PV_KERN_LOW_WATER_MARK_DEFAULT
;
2044 pv_kern_alloc_chunk
= PV_KERN_ALLOC_CHUNK_INITIAL
;
2045 pv_alloc_chunk
= PV_ALLOC_CHUNK_INITIAL
;
2049 pv_eh
= pv_et
= PV_ENTRY_NULL
;
2050 target_pv_free_cnt
= PV_ALLOC_INITIAL_TARGET
;
2053 * We don't take the lock to read pv_free_count, as we should not be
2054 * invoking this from a multithreaded context.
2056 needed_pv_cnt
= target_pv_free_cnt
- pv_free_count
;
2058 if (needed_pv_cnt
> target_pv_free_cnt
) {
2062 while (pv_cnt
< needed_pv_cnt
) {
2063 ret
= pmap_pages_alloc(&pa
, PAGE_SIZE
, alloc_options
);
2065 assert(ret
== KERN_SUCCESS
);
2069 pv_e
= (pv_entry_t
*)phystokv(pa
);
2071 for (j
= 0; j
< (PAGE_SIZE
/ sizeof(pv_entry_t
)); j
++) {
2072 pv_e
->pve_next
= pv_eh
;
2075 if (pv_et
== PV_ENTRY_NULL
) {
2084 PV_FREE_LIST(pv_eh
, pv_et
, pv_cnt
);
2088 pv_eh
= pv_et
= PV_ENTRY_NULL
;
2089 target_pv_free_cnt
= PV_KERN_ALLOC_INITIAL_TARGET
;
2092 * We don't take the lock to read pv_kern_free_count, as we should not
2093 * be invoking this from a multithreaded context.
2095 needed_pv_cnt
= target_pv_free_cnt
- pv_kern_free_count
;
2097 if (needed_pv_cnt
> target_pv_free_cnt
) {
2101 while (pv_cnt
< needed_pv_cnt
) {
2102 ret
= pmap_pages_alloc(&pa
, PAGE_SIZE
, alloc_options
);
2104 assert(ret
== KERN_SUCCESS
);
2107 pv_e
= (pv_entry_t
*)phystokv(pa
);
2109 for (j
= 0; j
< (PAGE_SIZE
/ sizeof(pv_entry_t
)); j
++) {
2110 pv_e
->pve_next
= pv_eh
;
2113 if (pv_et
== PV_ENTRY_NULL
) {
2122 PV_KERN_FREE_LIST(pv_eh
, pv_et
, pv_cnt
);
2125 mapping_free_prime_internal_done
= TRUE
;
2126 return KERN_SUCCESS
;
2130 mapping_free_prime(void)
2132 kern_return_t kr
= KERN_FAILURE
;
2134 kr
= mapping_free_prime_internal();
2136 if (kr
!= KERN_SUCCESS
) {
2137 panic("%s: failed, kr=%d", __FUNCTION__
, kr
);
2141 void mapping_replenish(void);
2144 mapping_adjust(void)
2148 mres
= kernel_thread_start_priority((thread_continue_t
)mapping_replenish
, NULL
, MAXPRI_KERNEL
, &mapping_replenish_thread
);
2149 if (mres
!= KERN_SUCCESS
) {
2150 panic("pmap: mapping_replenish thread creation failed");
2152 thread_deallocate(mapping_replenish_thread
);
2156 * Fills the kernel and general PV free lists back up to their low watermarks.
2158 MARK_AS_PMAP_TEXT
static kern_return_t
2159 mapping_replenish_internal(void)
2167 kern_return_t ret
= KERN_SUCCESS
;
2169 while (pv_kern_free_count
< pv_kern_low_water_mark
) {
2171 pv_eh
= pv_et
= PV_ENTRY_NULL
;
2173 ret
= pmap_pages_alloc(&pa
, PAGE_SIZE
, 0);
2174 assert(ret
== KERN_SUCCESS
);
2178 pv_e
= (pv_entry_t
*)phystokv(pa
);
2180 for (j
= 0; j
< (PAGE_SIZE
/ sizeof(pv_entry_t
)); j
++) {
2181 pv_e
->pve_next
= pv_eh
;
2184 if (pv_et
== PV_ENTRY_NULL
) {
2190 pmap_kernel_reserve_replenish_stat
+= pv_cnt
;
2191 PV_KERN_FREE_LIST(pv_eh
, pv_et
, pv_cnt
);
2194 while (pv_free_count
< pv_low_water_mark
) {
2196 pv_eh
= pv_et
= PV_ENTRY_NULL
;
2198 ret
= pmap_pages_alloc(&pa
, PAGE_SIZE
, 0);
2199 assert(ret
== KERN_SUCCESS
);
2203 pv_e
= (pv_entry_t
*)phystokv(pa
);
2205 for (j
= 0; j
< (PAGE_SIZE
/ sizeof(pv_entry_t
)); j
++) {
2206 pv_e
->pve_next
= pv_eh
;
2209 if (pv_et
== PV_ENTRY_NULL
) {
2215 pmap_user_reserve_replenish_stat
+= pv_cnt
;
2216 PV_FREE_LIST(pv_eh
, pv_et
, pv_cnt
);
2223 * Continuation function that keeps the PV free lists from running out of free
2226 __attribute__((noreturn
))
2228 mapping_replenish(void)
2232 /* We qualify for VM privileges...*/
2233 current_thread()->options
|= TH_OPT_VMPRIV
;
2236 kr
= mapping_replenish_internal();
2238 if (kr
!= KERN_SUCCESS
) {
2239 panic("%s: failed, kr=%d", __FUNCTION__
, kr
);
2243 * Wake threads throttled while the kernel reserve was being replenished.
2245 if (pmap_pv_throttled_waiters
) {
2246 pmap_pv_throttled_waiters
= 0;
2247 thread_wakeup(&pmap_user_pv_throttle_event
);
2250 /* Check if the kernel pool has been depleted since the
2251 * first pass, to reduce refill latency.
2253 if (pv_kern_free_count
< pv_kern_low_water_mark
) {
2256 /* Block sans continuation to avoid yielding kernel stack */
2257 assert_wait(&mapping_replenish_event
, THREAD_UNINT
);
2259 thread_block(THREAD_CONTINUE_NULL
);
2260 pmap_mapping_thread_wakeups
++;
2268 unsigned int ptd_cnt
)
2270 simple_lock_init(&ptd_free_list_lock
, 0);
2271 while (ptd_cnt
!= 0) {
2272 (*(void **)ptdp
) = (void *)ptd_free_list
;
2273 ptd_free_list
= ptdp
;
2278 ptd_preboot
= FALSE
;
2282 ptd_alloc_unlinked(bool reclaim
)
2288 pmap_simple_lock(&ptd_free_list_lock
);
2291 if (ptd_free_count
== 0) {
2292 unsigned int ptd_cnt
;
2293 pt_desc_t
*ptdp_next
;
2296 ptdp
= (pt_desc_t
*)avail_start
;
2297 avail_start
+= ARM_PGBYTES
;
2299 ptd_cnt
= ARM_PGBYTES
/ sizeof(pt_desc_t
);
2304 pmap_simple_unlock(&ptd_free_list_lock
);
2306 if (pmap_pages_alloc(&pa
, PAGE_SIZE
, PMAP_PAGES_ALLOCATE_NOWAIT
) != KERN_SUCCESS
) {
2308 ret
= pmap_pages_alloc(&pa
, PAGE_SIZE
, PMAP_PAGES_RECLAIM_NOWAIT
);
2309 assert(ret
== KERN_SUCCESS
);
2314 ptdp
= (pt_desc_t
*)phystokv(pa
);
2316 pmap_simple_lock(&ptd_free_list_lock
);
2318 ptd_cnt
= PAGE_SIZE
/ sizeof(pt_desc_t
);
2321 while (ptd_cnt
!= 0) {
2322 (*(void **)ptdp_next
) = (void *)ptd_free_list
;
2323 ptd_free_list
= ptdp_next
;
2330 if ((ptdp
= ptd_free_list
) != PTD_ENTRY_NULL
) {
2331 ptd_free_list
= (pt_desc_t
*)(*(void **)ptdp
);
2334 panic("out of ptd entry\n");
2338 pmap_simple_unlock(&ptd_free_list_lock
);
2341 ptdp
->pt_page
.next
= NULL
;
2342 ptdp
->pt_page
.prev
= NULL
;
2345 for (i
= 0; i
< PT_INDEX_MAX
; i
++) {
2346 ptdp
->pt_map
[i
].va
= (vm_offset_t
)-1;
2347 ptdp
->pt_cnt
[i
].refcnt
= 0;
2348 ptdp
->pt_cnt
[i
].wiredcnt
= 0;
2354 static inline pt_desc_t
*
2355 ptd_alloc(pmap_t pmap
, bool reclaim
)
2357 pt_desc_t
*ptdp
= ptd_alloc_unlinked(reclaim
);
2364 if (pmap
!= kernel_pmap
) {
2365 /* We should never try to reclaim kernel pagetable pages in
2366 * pmap_pages_reclaim(), so don't enter them into the list. */
2367 pmap_simple_lock(&pt_pages_lock
);
2368 queue_enter(&pt_page_list
, ptdp
, pt_desc_t
*, pt_page
);
2369 pmap_simple_unlock(&pt_pages_lock
);
2372 pmap_tt_ledger_credit(pmap
, sizeof(*ptdp
));
2377 ptd_deallocate(pt_desc_t
*ptdp
)
2379 pmap_t pmap
= ptdp
->pmap
;
2382 panic("ptd_deallocate(): early boot\n");
2385 if (ptdp
->pt_page
.next
!= NULL
) {
2386 pmap_simple_lock(&pt_pages_lock
);
2387 queue_remove(&pt_page_list
, ptdp
, pt_desc_t
*, pt_page
);
2388 pmap_simple_unlock(&pt_pages_lock
);
2390 pmap_simple_lock(&ptd_free_list_lock
);
2391 (*(void **)ptdp
) = (void *)ptd_free_list
;
2392 ptd_free_list
= (pt_desc_t
*)ptdp
;
2394 pmap_simple_unlock(&ptd_free_list_lock
);
2396 pmap_tt_ledger_debit(pmap
, sizeof(*ptdp
));
2404 vm_map_address_t va
,
2408 if (ptdp
->pmap
!= pmap
) {
2409 panic("ptd_init(): pmap mismatch\n");
2412 #if (__ARM_VMSA__ == 7)
2414 ptdp
->pt_map
[ARM_PT_DESC_INDEX(pte_p
)].va
= (vm_offset_t
) va
& ~(ARM_TT_L1_PT_OFFMASK
);
2417 ptdp
->pt_map
[ARM_PT_DESC_INDEX(pte_p
)].va
= (vm_offset_t
) va
& ~ARM_TT_L2_OFFMASK
;
2418 } else if (level
== 2) {
2419 ptdp
->pt_map
[ARM_PT_DESC_INDEX(pte_p
)].va
= (vm_offset_t
) va
& ~ARM_TT_L1_OFFMASK
;
2422 if (level
< PMAP_TT_MAX_LEVEL
) {
2423 ptdp
->pt_cnt
[ARM_PT_DESC_INDEX(pte_p
)].refcnt
= PT_DESC_REFCOUNT
;
2432 return pa_valid(addr
);
2435 #if (__ARM_VMSA__ == 7)
2438 * Given an offset and a map, compute the address of the
2439 * corresponding translation table entry.
2441 static inline tt_entry_t
*
2442 pmap_tte(pmap_t pmap
,
2443 vm_map_address_t addr
)
2445 if (!(tte_index(pmap
, addr
) < pmap
->tte_index_max
)) {
2446 return (tt_entry_t
*)NULL
;
2448 return &pmap
->tte
[tte_index(pmap
, addr
)];
2453 * Given an offset and a map, compute the address of the
2454 * pte. If the address is invalid with respect to the map
2455 * then PT_ENTRY_NULL is returned (and the map may need to grow).
2457 * This is only used internally.
2459 static inline pt_entry_t
*
2462 vm_map_address_t addr
)
2468 ttp
= pmap_tte(pmap
, addr
);
2469 if (ttp
== (tt_entry_t
*)NULL
) {
2470 return PT_ENTRY_NULL
;
2474 if ((tte
& ARM_TTE_TYPE_MASK
) == ARM_TTE_TYPE_BLOCK
) {
2475 panic("Attempt to demote L1 block: pmap=%p, va=0x%llx, tte=0x%llx\n", pmap
, (uint64_t)addr
, (uint64_t)tte
);
2478 if ((tte
& ARM_TTE_TYPE_MASK
) != ARM_TTE_TYPE_TABLE
) {
2479 return PT_ENTRY_NULL
;
2481 ptp
= (pt_entry_t
*) ttetokv(tte
) + ptenum(addr
);
2488 * Given an offset and a map, compute the address of level 1 translation table entry.
2489 * If the tranlation is invalid then PT_ENTRY_NULL is returned.
2491 static inline tt_entry_t
*
2492 pmap_tt1e(pmap_t pmap
,
2493 vm_map_address_t addr
)
2495 /* Level 0 currently unused */
2496 #if __ARM64_TWO_LEVEL_PMAP__
2497 #pragma unused(pmap, addr)
2498 panic("pmap_tt1e called on a two level pmap");
2501 return &pmap
->tte
[tt1_index(pmap
, addr
)];
2506 * Given an offset and a map, compute the address of level 2 translation table entry.
2507 * If the tranlation is invalid then PT_ENTRY_NULL is returned.
2509 static inline tt_entry_t
*
2510 pmap_tt2e(pmap_t pmap
,
2511 vm_map_address_t addr
)
2513 #if __ARM64_TWO_LEVEL_PMAP__
2514 return &pmap
->tte
[tt2_index(pmap
, addr
)];
2519 ttp
= pmap_tt1e(pmap
, addr
);
2522 if ((tte
& (ARM_TTE_TYPE_MASK
| ARM_TTE_VALID
)) == (ARM_TTE_TYPE_BLOCK
| ARM_TTE_VALID
)) {
2523 panic("Attempt to demote L1 block (?!): pmap=%p, va=0x%llx, tte=0x%llx\n", pmap
, (uint64_t)addr
, (uint64_t)tte
);
2526 if ((tte
& (ARM_TTE_TYPE_MASK
| ARM_TTE_VALID
)) != (ARM_TTE_TYPE_TABLE
| ARM_TTE_VALID
)) {
2527 return PT_ENTRY_NULL
;
2530 ttp
= &((tt_entry_t
*) phystokv(tte
& ARM_TTE_TABLE_MASK
))[tt2_index(pmap
, addr
)];
2531 return (tt_entry_t
*)ttp
;
2537 * Given an offset and a map, compute the address of level 3 translation table entry.
2538 * If the tranlation is invalid then PT_ENTRY_NULL is returned.
2540 static inline pt_entry_t
*
2543 vm_map_address_t addr
)
2549 ttp
= pmap_tt2e(pmap
, addr
);
2550 if (ttp
== PT_ENTRY_NULL
) {
2551 return PT_ENTRY_NULL
;
2557 if ((tte
& (ARM_TTE_TYPE_MASK
| ARM_TTE_VALID
)) == (ARM_TTE_TYPE_BLOCK
| ARM_TTE_VALID
)) {
2558 panic("Attempt to demote L2 block: pmap=%p, va=0x%llx, tte=0x%llx\n", pmap
, (uint64_t)addr
, (uint64_t)tte
);
2561 if ((tte
& (ARM_TTE_TYPE_MASK
| ARM_TTE_VALID
)) != (ARM_TTE_TYPE_TABLE
| ARM_TTE_VALID
)) {
2562 return PT_ENTRY_NULL
;
2565 /* Get third-level (4KB) entry */
2566 ptp
= &(((pt_entry_t
*) phystokv(tte
& ARM_TTE_TABLE_MASK
))[tt3_index(pmap
, addr
)]);
2571 static inline tt_entry_t
*
2574 vm_map_address_t addr
)
2576 return pmap_tt2e(pmap
, addr
);
2580 static inline pt_entry_t
*
2583 vm_map_address_t addr
)
2585 return pmap_tt3e(pmap
, addr
);
2592 * Map memory at initialization. The physical addresses being
2593 * mapped are not managed and are never unmapped.
2595 * For now, VM is already on, we only need to map the
2600 vm_map_address_t virt
,
2610 while (start
< end
) {
2611 kr
= pmap_enter(kernel_pmap
, virt
, (ppnum_t
)atop(start
),
2612 prot
, VM_PROT_NONE
, flags
, FALSE
);
2614 if (kr
!= KERN_SUCCESS
) {
2615 panic("%s: failed pmap_enter, "
2616 "virt=%p, start_addr=%p, end_addr=%p, prot=%#x, flags=%#x",
2618 (void *) virt
, (void *) start
, (void *) end
, prot
, flags
);
2628 pmap_map_bd_with_options(
2629 vm_map_address_t virt
,
2637 vm_map_address_t vaddr
;
2639 pt_entry_t mem_attr
;
2641 switch (options
& PMAP_MAP_BD_MASK
) {
2642 case PMAP_MAP_BD_WCOMB
:
2643 mem_attr
= ARM_PTE_ATTRINDX(CACHE_ATTRINDX_WRITECOMB
);
2644 #if (__ARM_VMSA__ > 7)
2645 mem_attr
|= ARM_PTE_SH(SH_OUTER_MEMORY
);
2647 mem_attr
|= ARM_PTE_SH
;
2650 case PMAP_MAP_BD_POSTED
:
2651 mem_attr
= ARM_PTE_ATTRINDX(CACHE_ATTRINDX_POSTED
);
2654 mem_attr
= ARM_PTE_ATTRINDX(CACHE_ATTRINDX_DISABLE
);
2658 tmplate
= pa_to_pte(start
) | ARM_PTE_AP((prot
& VM_PROT_WRITE
) ? AP_RWNA
: AP_RONA
) |
2659 mem_attr
| ARM_PTE_TYPE
| ARM_PTE_NX
| ARM_PTE_PNX
| ARM_PTE_AF
;
2660 #if __ARM_KERNEL_PROTECT__
2661 tmplate
|= ARM_PTE_NG
;
2662 #endif /* __ARM_KERNEL_PROTECT__ */
2666 while (paddr
< end
) {
2667 ptep
= pmap_pte(kernel_pmap
, vaddr
);
2668 if (ptep
== PT_ENTRY_NULL
) {
2669 panic("pmap_map_bd");
2671 assert(!ARM_PTE_IS_COMPRESSED(*ptep
));
2672 WRITE_PTE_STRONG(ptep
, tmplate
);
2674 pte_increment_pa(tmplate
);
2680 flush_mmu_tlb_region(virt
, (unsigned)(end
- start
));
2687 * Back-door routine for mapping kernel VM at initialization.
2688 * Useful for mapping memory outside the range
2689 * [vm_first_phys, vm_last_phys] (i.e., devices).
2690 * Otherwise like pmap_map.
2694 vm_map_address_t virt
,
2701 vm_map_address_t vaddr
;
2704 /* not cacheable and not buffered */
2705 tmplate
= pa_to_pte(start
)
2706 | ARM_PTE_TYPE
| ARM_PTE_AF
| ARM_PTE_NX
| ARM_PTE_PNX
2707 | ARM_PTE_AP((prot
& VM_PROT_WRITE
) ? AP_RWNA
: AP_RONA
)
2708 | ARM_PTE_ATTRINDX(CACHE_ATTRINDX_DISABLE
);
2709 #if __ARM_KERNEL_PROTECT__
2710 tmplate
|= ARM_PTE_NG
;
2711 #endif /* __ARM_KERNEL_PROTECT__ */
2715 while (paddr
< end
) {
2716 ptep
= pmap_pte(kernel_pmap
, vaddr
);
2717 if (ptep
== PT_ENTRY_NULL
) {
2718 panic("pmap_map_bd");
2720 assert(!ARM_PTE_IS_COMPRESSED(*ptep
));
2721 WRITE_PTE_STRONG(ptep
, tmplate
);
2723 pte_increment_pa(tmplate
);
2729 flush_mmu_tlb_region(virt
, (unsigned)(end
- start
));
2736 * Back-door routine for mapping kernel VM at initialization.
2737 * Useful for mapping memory specific physical addresses in early
2738 * boot (i.e., before kernel_map is initialized).
2740 * Maps are in the VM_HIGH_KERNEL_WINDOW area.
2744 pmap_map_high_window_bd(
2745 vm_offset_t pa_start
,
2749 pt_entry_t
*ptep
, pte
;
2750 #if (__ARM_VMSA__ == 7)
2751 vm_map_address_t va_start
= VM_HIGH_KERNEL_WINDOW
;
2752 vm_map_address_t va_max
= VM_MAX_KERNEL_ADDRESS
;
2754 vm_map_address_t va_start
= VREGION1_START
;
2755 vm_map_address_t va_max
= VREGION1_START
+ VREGION1_SIZE
;
2757 vm_map_address_t va_end
;
2758 vm_map_address_t va
;
2761 offset
= pa_start
& PAGE_MASK
;
2765 if (len
> (va_max
- va_start
)) {
2766 panic("pmap_map_high_window_bd: area too large\n");
2770 for (; va_start
< va_max
; va_start
+= PAGE_SIZE
) {
2771 ptep
= pmap_pte(kernel_pmap
, va_start
);
2772 assert(!ARM_PTE_IS_COMPRESSED(*ptep
));
2773 if (*ptep
== ARM_PTE_TYPE_FAULT
) {
2777 if (va_start
> va_max
) {
2778 panic("pmap_map_high_window_bd: insufficient pages\n");
2781 for (va_end
= va_start
+ PAGE_SIZE
; va_end
< va_start
+ len
; va_end
+= PAGE_SIZE
) {
2782 ptep
= pmap_pte(kernel_pmap
, va_end
);
2783 assert(!ARM_PTE_IS_COMPRESSED(*ptep
));
2784 if (*ptep
!= ARM_PTE_TYPE_FAULT
) {
2785 va_start
= va_end
+ PAGE_SIZE
;
2790 for (va
= va_start
; va
< va_end
; va
+= PAGE_SIZE
, pa_start
+= PAGE_SIZE
) {
2791 ptep
= pmap_pte(kernel_pmap
, va
);
2792 pte
= pa_to_pte(pa_start
)
2793 | ARM_PTE_TYPE
| ARM_PTE_AF
| ARM_PTE_NX
| ARM_PTE_PNX
2794 | ARM_PTE_AP((prot
& VM_PROT_WRITE
) ? AP_RWNA
: AP_RONA
)
2795 | ARM_PTE_ATTRINDX(CACHE_ATTRINDX_DEFAULT
);
2796 #if (__ARM_VMSA__ > 7)
2797 pte
|= ARM_PTE_SH(SH_OUTER_MEMORY
);
2801 #if __ARM_KERNEL_PROTECT__
2803 #endif /* __ARM_KERNEL_PROTECT__ */
2804 WRITE_PTE_STRONG(ptep
, pte
);
2806 PMAP_UPDATE_TLBS(kernel_pmap
, va_start
, va_start
+ len
);
2808 kasan_notify_address(va_start
, len
);
2813 #define PMAP_ALIGN(addr, align) ((addr) + ((align) - 1) & ~((align) - 1))
2816 pmap_compute_io_rgns(void)
2819 pmap_io_range_t
*ranges
;
2823 unsigned int prop_size
;
2825 err
= DTLookupEntry(NULL
, "/defaults", &entry
);
2826 assert(err
== kSuccess
);
2828 if (kSuccess
!= DTGetProperty(entry
, "pmap-io-ranges", &prop
, &prop_size
)) {
2833 for (unsigned int i
= 0; i
< (prop_size
/ sizeof(*ranges
)); ++i
) {
2834 if (ranges
[i
].addr
& PAGE_MASK
) {
2835 panic("pmap I/O region %u addr 0x%llx is not page-aligned", i
, ranges
[i
].addr
);
2837 if (ranges
[i
].len
& PAGE_MASK
) {
2838 panic("pmap I/O region %u length 0x%x is not page-aligned", i
, ranges
[i
].len
);
2840 if (os_add_overflow(ranges
[i
].addr
, ranges
[i
].len
, &rgn_end
)) {
2841 panic("pmap I/O region %u addr 0x%llx length 0x%x wraps around", i
, ranges
[i
].addr
, ranges
[i
].len
);
2843 if ((i
== 0) || (ranges
[i
].addr
< io_rgn_start
)) {
2844 io_rgn_start
= ranges
[i
].addr
;
2846 if ((i
== 0) || (rgn_end
> io_rgn_end
)) {
2847 io_rgn_end
= rgn_end
;
2852 if (io_rgn_start
& PAGE_MASK
) {
2853 panic("pmap I/O region start is not page-aligned!\n");
2856 if (io_rgn_end
& PAGE_MASK
) {
2857 panic("pmap I/O region end is not page-aligned!\n");
2860 if (((io_rgn_start
<= gPhysBase
) && (io_rgn_end
> gPhysBase
)) ||
2861 ((io_rgn_start
< avail_end
) && (io_rgn_end
>= avail_end
)) ||
2862 ((io_rgn_start
> gPhysBase
) && (io_rgn_end
< avail_end
))) {
2863 panic("pmap I/O region overlaps physical memory!\n");
2866 return num_io_rgns
* sizeof(*ranges
);
2870 * return < 0 for a < b
2874 typedef int (*cmpfunc_t
)(const void *a
, const void *b
);
2877 qsort(void *a
, size_t n
, size_t es
, cmpfunc_t cmp
);
2880 cmp_io_rgns(const void *a
, const void *b
)
2882 const pmap_io_range_t
*range_a
= a
;
2883 const pmap_io_range_t
*range_b
= b
;
2884 if ((range_b
->addr
+ range_b
->len
) <= range_a
->addr
) {
2886 } else if ((range_a
->addr
+ range_a
->len
) <= range_b
->addr
) {
2894 pmap_load_io_rgns(void)
2897 pmap_io_range_t
*ranges
;
2900 unsigned int prop_size
;
2902 if (num_io_rgns
== 0) {
2906 err
= DTLookupEntry(NULL
, "/defaults", &entry
);
2907 assert(err
== kSuccess
);
2909 err
= DTGetProperty(entry
, "pmap-io-ranges", &prop
, &prop_size
);
2910 assert(err
== kSuccess
);
2913 for (unsigned int i
= 0; i
< (prop_size
/ sizeof(*ranges
)); ++i
) {
2914 io_attr_table
[i
] = ranges
[i
];
2917 qsort(io_attr_table
, num_io_rgns
, sizeof(*ranges
), cmp_io_rgns
);
2922 * pmap_get_arm64_prot
2924 * return effective armv8 VMSA block protections including
2925 * table AP/PXN/XN overrides of a pmap entry
2930 pmap_get_arm64_prot(
2935 uint64_t tt_type
, table_ap
, table_xn
, table_pxn
;
2938 tte
= *pmap_tt1e(pmap
, addr
);
2940 if (!(tte
& ARM_TTE_VALID
)) {
2944 tt_type
= tte
& ARM_TTE_TYPE_MASK
;
2946 if (tt_type
== ARM_TTE_TYPE_BLOCK
) {
2947 return prot
| (tte
& ARM_TTE_BLOCK_NX
) | (tte
& ARM_TTE_BLOCK_PNX
) | (tte
& ARM_TTE_BLOCK_APMASK
) | ARM_TTE_VALID
;
2950 table_ap
= (tte
>> ARM_TTE_TABLE_APSHIFT
) & 0x3;
2951 table_xn
= tte
& ARM_TTE_TABLE_XN
;
2952 table_pxn
= tte
& ARM_TTE_TABLE_PXN
;
2954 prot
|= (table_ap
<< ARM_TTE_BLOCK_APSHIFT
) | (table_xn
? ARM_TTE_BLOCK_NX
: 0) | (table_pxn
? ARM_TTE_BLOCK_PNX
: 0);
2956 tte
= *pmap_tt2e(pmap
, addr
);
2957 if (!(tte
& ARM_TTE_VALID
)) {
2961 tt_type
= tte
& ARM_TTE_TYPE_MASK
;
2963 if (tt_type
== ARM_TTE_TYPE_BLOCK
) {
2964 return prot
| (tte
& ARM_TTE_BLOCK_NX
) | (tte
& ARM_TTE_BLOCK_PNX
) | (tte
& ARM_TTE_BLOCK_APMASK
) | ARM_TTE_VALID
;
2967 table_ap
= (tte
>> ARM_TTE_TABLE_APSHIFT
) & 0x3;
2968 table_xn
= tte
& ARM_TTE_TABLE_XN
;
2969 table_pxn
= tte
& ARM_TTE_TABLE_PXN
;
2971 prot
|= (table_ap
<< ARM_TTE_BLOCK_APSHIFT
) | (table_xn
? ARM_TTE_BLOCK_NX
: 0) | (table_pxn
? ARM_TTE_BLOCK_PNX
: 0);
2973 tte
= *pmap_tt3e(pmap
, addr
);
2974 if (!(tte
& ARM_TTE_VALID
)) {
2978 return prot
| (tte
& ARM_TTE_BLOCK_NX
) | (tte
& ARM_TTE_BLOCK_PNX
) | (tte
& ARM_TTE_BLOCK_APMASK
) | ARM_TTE_VALID
;
2980 #endif /* __arm64__ */
2984 * Bootstrap the system enough to run with virtual memory.
2986 * The early VM initialization code has already allocated
2987 * the first CPU's translation table and made entries for
2988 * all the one-to-one mappings to be found there.
2990 * We must set up the kernel pmap structures, the
2991 * physical-to-virtual translation lookup tables for the
2992 * physical memory to be managed (between avail_start and
2995 * Map the kernel's code and data, and allocate the system page table.
2996 * Page_size must already be set.
2999 * first_avail first available physical page -
3000 * after kernel page tables
3001 * avail_start PA of first managed physical page
3002 * avail_end PA of last managed physical page
3009 pmap_paddr_t pmap_struct_start
;
3010 vm_size_t pv_head_size
;
3011 vm_size_t ptd_root_table_size
;
3012 vm_size_t pp_attr_table_size
;
3013 vm_size_t io_attr_table_size
;
3014 unsigned int npages
;
3016 vm_map_offset_t maxoffset
;
3018 lck_grp_init(&pmap_lck_grp
, "pmap", LCK_GRP_ATTR_NULL
);
3021 #if DEVELOPMENT || DEBUG
3022 if (PE_parse_boot_argn("pmap_trace", &pmap_trace_mask
, sizeof(pmap_trace_mask
))) {
3023 kprintf("Kernel traces for pmap operations enabled\n");
3028 * Initialize the kernel pmap.
3031 kernel_pmap
->tte
= cpu_tte
;
3032 kernel_pmap
->ttep
= cpu_ttep
;
3033 #if (__ARM_VMSA__ > 7)
3034 kernel_pmap
->min
= ARM64_TTBR1_MIN_ADDR
;
3036 kernel_pmap
->min
= VM_MIN_KERNEL_AND_KEXT_ADDRESS
;
3038 kernel_pmap
->max
= VM_MAX_KERNEL_ADDRESS
;
3039 kernel_pmap
->ref_count
= 1;
3040 kernel_pmap
->gc_status
= 0;
3041 kernel_pmap
->nx_enabled
= TRUE
;
3043 kernel_pmap
->is_64bit
= TRUE
;
3045 kernel_pmap
->is_64bit
= FALSE
;
3047 kernel_pmap
->stamp
= hw_atomic_add(&pmap_stamp
, 1);
3049 kernel_pmap
->nested_region_grand_addr
= 0x0ULL
;
3050 kernel_pmap
->nested_region_subord_addr
= 0x0ULL
;
3051 kernel_pmap
->nested_region_size
= 0x0ULL
;
3052 kernel_pmap
->nested_region_asid_bitmap
= NULL
;
3053 kernel_pmap
->nested_region_asid_bitmap_size
= 0x0UL
;
3055 #if (__ARM_VMSA__ == 7)
3056 kernel_pmap
->tte_index_max
= 4 * NTTES
;
3058 kernel_pmap
->tte_index_max
= (ARM_PGBYTES
/ sizeof(tt_entry_t
));
3060 kernel_pmap
->prev_tte
= (tt_entry_t
*) NULL
;
3062 PMAP_LOCK_INIT(kernel_pmap
);
3063 #if (__ARM_VMSA__ == 7)
3064 simple_lock_init(&kernel_pmap
->tt1_lock
, 0);
3065 kernel_pmap
->cpu_ref
= 0;
3067 memset((void *) &kernel_pmap
->stats
, 0, sizeof(kernel_pmap
->stats
));
3069 /* allocate space for and initialize the bookkeeping structures */
3070 io_attr_table_size
= pmap_compute_io_rgns();
3071 npages
= (unsigned int)atop(mem_size
);
3072 pp_attr_table_size
= npages
* sizeof(pp_attr_t
);
3073 pv_head_size
= round_page(sizeof(pv_entry_t
*) * npages
);
3074 // allocate enough initial PTDs to map twice the available physical memory
3075 ptd_root_table_size
= sizeof(pt_desc_t
) * (mem_size
/ ((PAGE_SIZE
/ sizeof(pt_entry_t
)) * ARM_PGBYTES
)) * 2;
3077 pmap_struct_start
= avail_start
;
3079 pp_attr_table
= (pp_attr_t
*) phystokv(avail_start
);
3080 avail_start
= PMAP_ALIGN(avail_start
+ pp_attr_table_size
, __alignof(pp_attr_t
));
3081 io_attr_table
= (pmap_io_range_t
*) phystokv(avail_start
);
3082 avail_start
= PMAP_ALIGN(avail_start
+ io_attr_table_size
, __alignof(pv_entry_t
*));
3083 pv_head_table
= (pv_entry_t
**) phystokv(avail_start
);
3084 avail_start
= PMAP_ALIGN(avail_start
+ pv_head_size
, __alignof(pt_desc_t
));
3085 ptd_root_table
= (pt_desc_t
*)phystokv(avail_start
);
3086 avail_start
= round_page(avail_start
+ ptd_root_table_size
);
3088 memset((char *)phystokv(pmap_struct_start
), 0, avail_start
- pmap_struct_start
);
3090 pmap_load_io_rgns();
3091 ptd_bootstrap(ptd_root_table
, (unsigned int)(ptd_root_table_size
/ sizeof(pt_desc_t
)));
3093 pmap_cpu_data_array_init();
3095 vm_first_phys
= gPhysBase
;
3096 vm_last_phys
= trunc_page(avail_end
);
3098 simple_lock_init(&pmaps_lock
, 0);
3099 queue_init(&map_pmap_list
);
3100 queue_enter(&map_pmap_list
, kernel_pmap
, pmap_t
, pmaps
);
3101 free_page_size_tt_list
= TT_FREE_ENTRY_NULL
;
3102 free_page_size_tt_count
= 0;
3103 free_page_size_tt_max
= 0;
3104 free_two_page_size_tt_list
= TT_FREE_ENTRY_NULL
;
3105 free_two_page_size_tt_count
= 0;
3106 free_two_page_size_tt_max
= 0;
3107 free_tt_list
= TT_FREE_ENTRY_NULL
;
3111 simple_lock_init(&pt_pages_lock
, 0);
3112 queue_init(&pt_page_list
);
3114 simple_lock_init(&pmap_pages_lock
, 0);
3115 pmap_pages_request_count
= 0;
3116 pmap_pages_request_acum
= 0;
3117 pmap_pages_reclaim_list
= PAGE_FREE_ENTRY_NULL
;
3119 virtual_space_start
= vstart
;
3120 virtual_space_end
= VM_MAX_KERNEL_ADDRESS
;
3122 /* mark all the address spaces in use */
3123 for (i
= 0; i
< MAX_ASID
/ (sizeof(uint32_t) * NBBY
); i
++) {
3124 asid_bitmap
[i
] = 0xffffffff;
3128 * The kernel gets ASID 0, and all aliases of it. This is
3129 * important because ASID 0 is global; if we vend ASID 0
3130 * out to a user pmap, those translations will show up in
3131 * other processes through the TLB.
3133 for (i
= 0; i
< MAX_ASID
; i
+= ARM_MAX_ASID
) {
3134 asid_bitmap
[i
/ (sizeof(uint32_t) * NBBY
)] &= ~(1 << (i
% (sizeof(uint32_t) * NBBY
)));
3136 #if __ARM_KERNEL_PROTECT__
3137 assert((i
+ 1) < MAX_ASID
);
3138 asid_bitmap
[(i
+ 1) / (sizeof(uint32_t) * NBBY
)] &= ~(1 << ((i
+ 1) % (sizeof(uint32_t) * NBBY
)));
3139 #endif /* __ARM_KERNEL_PROTECT__ */
3142 kernel_pmap
->asid
= 0;
3143 kernel_pmap
->vasid
= 0;
3146 if (PE_parse_boot_argn("arm_maxoffset", &maxoffset
, sizeof(maxoffset
))) {
3147 maxoffset
= trunc_page(maxoffset
);
3148 if ((maxoffset
>= pmap_max_offset(FALSE
, ARM_PMAP_MAX_OFFSET_MIN
))
3149 && (maxoffset
<= pmap_max_offset(FALSE
, ARM_PMAP_MAX_OFFSET_MAX
))) {
3150 arm_pmap_max_offset_default
= maxoffset
;
3153 #if defined(__arm64__)
3154 if (PE_parse_boot_argn("arm64_maxoffset", &maxoffset
, sizeof(maxoffset
))) {
3155 maxoffset
= trunc_page(maxoffset
);
3156 if ((maxoffset
>= pmap_max_offset(TRUE
, ARM_PMAP_MAX_OFFSET_MIN
))
3157 && (maxoffset
<= pmap_max_offset(TRUE
, ARM_PMAP_MAX_OFFSET_MAX
))) {
3158 arm64_pmap_max_offset_default
= maxoffset
;
3163 #if DEVELOPMENT || DEBUG
3164 PE_parse_boot_argn("panic_on_unsigned_execute", &panic_on_unsigned_execute
, sizeof(panic_on_unsigned_execute
));
3165 #endif /* DEVELOPMENT || DEBUG */
3167 pmap_nesting_size_min
= ARM_NESTING_SIZE_MIN
;
3168 pmap_nesting_size_max
= ARM_NESTING_SIZE_MAX
;
3170 simple_lock_init(&phys_backup_lock
, 0);
3174 PE_parse_boot_argn("pmap_stats_assert",
3176 sizeof(pmap_stats_assert
));
3177 PE_parse_boot_argn("vm_footprint_suspend_allowed",
3178 &vm_footprint_suspend_allowed
,
3179 sizeof(vm_footprint_suspend_allowed
));
3180 #endif /* MACH_ASSERT */
3183 /* Shadow the CPU copy windows, as they fall outside of the physical aperture */
3184 kasan_map_shadow(CPUWINDOWS_BASE
, CPUWINDOWS_TOP
- CPUWINDOWS_BASE
, true);
3191 vm_offset_t
*startp
,
3195 *startp
= virtual_space_start
;
3196 *endp
= virtual_space_end
;
3201 pmap_virtual_region(
3202 unsigned int region_select
,
3203 vm_map_offset_t
*startp
,
3207 boolean_t ret
= FALSE
;
3208 #if __ARM64_PMAP_SUBPAGE_L1__ && __ARM_16K_PG__
3209 if (region_select
== 0) {
3211 * In this config, the bootstrap mappings should occupy their own L2
3212 * TTs, as they should be immutable after boot. Having the associated
3213 * TTEs and PTEs in their own pages allows us to lock down those pages,
3214 * while allowing the rest of the kernel address range to be remapped.
3216 #if (__ARM_VMSA__ > 7)
3217 *startp
= LOW_GLOBAL_BASE_ADDRESS
& ~ARM_TT_L2_OFFMASK
;
3219 #error Unsupported configuration
3221 *size
= ((VM_MAX_KERNEL_ADDRESS
- *startp
) & ~PAGE_MASK
);
3225 #if (__ARM_VMSA__ > 7)
3226 unsigned long low_global_vr_mask
= 0;
3227 vm_map_size_t low_global_vr_size
= 0;
3230 if (region_select
== 0) {
3231 #if (__ARM_VMSA__ == 7)
3232 *startp
= gVirtBase
& 0xFFC00000;
3233 *size
= ((virtual_space_start
- (gVirtBase
& 0xFFC00000)) + ~0xFFC00000) & 0xFFC00000;
3235 /* Round to avoid overlapping with the V=P area; round to at least the L2 block size. */
3236 if (!TEST_PAGE_SIZE_4K
) {
3237 *startp
= gVirtBase
& 0xFFFFFFFFFE000000;
3238 *size
= ((virtual_space_start
- (gVirtBase
& 0xFFFFFFFFFE000000)) + ~0xFFFFFFFFFE000000) & 0xFFFFFFFFFE000000;
3240 *startp
= gVirtBase
& 0xFFFFFFFFFF800000;
3241 *size
= ((virtual_space_start
- (gVirtBase
& 0xFFFFFFFFFF800000)) + ~0xFFFFFFFFFF800000) & 0xFFFFFFFFFF800000;
3246 if (region_select
== 1) {
3247 *startp
= VREGION1_START
;
3248 *size
= VREGION1_SIZE
;
3251 #if (__ARM_VMSA__ > 7)
3252 /* We need to reserve a range that is at least the size of an L2 block mapping for the low globals */
3253 if (!TEST_PAGE_SIZE_4K
) {
3254 low_global_vr_mask
= 0xFFFFFFFFFE000000;
3255 low_global_vr_size
= 0x2000000;
3257 low_global_vr_mask
= 0xFFFFFFFFFF800000;
3258 low_global_vr_size
= 0x800000;
3261 if (((gVirtBase
& low_global_vr_mask
) != LOW_GLOBAL_BASE_ADDRESS
) && (region_select
== 2)) {
3262 *startp
= LOW_GLOBAL_BASE_ADDRESS
;
3263 *size
= low_global_vr_size
;
3267 if (region_select
== 3) {
3268 /* In this config, we allow the bootstrap mappings to occupy the same
3269 * page table pages as the heap.
3271 *startp
= VM_MIN_KERNEL_ADDRESS
;
3272 *size
= LOW_GLOBAL_BASE_ADDRESS
- *startp
;
3284 return (unsigned int)atop(avail_end
- first_avail
);
3292 return pmap_next_page(pnum
);
3300 if (first_avail
!= avail_end
) {
3301 *pnum
= (ppnum_t
)atop(first_avail
);
3302 first_avail
+= PAGE_SIZE
;
3310 * Initialize the pmap module.
3311 * Called by vm_init, to initialize any structures that the pmap
3312 * system needs to map virtual memory.
3319 * Protect page zero in the kernel map.
3320 * (can be overruled by permanent transltion
3321 * table entries at page zero - see arm_vm_init).
3323 vm_protect(kernel_map
, 0, PAGE_SIZE
, TRUE
, VM_PROT_NONE
);
3325 pmap_initialized
= TRUE
;
3331 * Initialize the pmap object (for tracking the vm_page_t
3332 * structures for pages we allocate to be page tables in
3335 _vm_object_allocate(mem_size
, pmap_object
);
3336 pmap_object
->copy_strategy
= MEMORY_OBJECT_COPY_NONE
;
3341 * The value of hard_maxproc may have been scaled, make sure
3342 * it is still less than the value of MAX_ASID.
3344 assert(hard_maxproc
< MAX_ASID
);
3347 pmap_pgtrace_init();
3352 pmap_pv_fixup(__unused vm_offset_t start
, __unused vm_size_t length
)
3362 pmap_paddr_t phys
= ptoa(ppnum
);
3364 assert(phys
!= vm_page_fictitious_addr
);
3366 if (!pa_valid(phys
)) {
3370 pai
= (int)pa_index(phys
);
3371 pv_h
= pai_to_pvh(pai
);
3373 return pvh_test_type(pv_h
, PVH_TYPE_NULL
);
3378 pmap_assert_free(ppnum_t ppnum
)
3380 assertf(pmap_verify_free(ppnum
), "page = 0x%x", ppnum
);
3387 * Initialize zones used by pmap.
3394 * Create the zone of physical maps
3395 * and the physical-to-virtual entries.
3397 pmap_zone
= zinit((vm_size_t
) sizeof(struct pmap
), (vm_size_t
) sizeof(struct pmap
) * 256,
3403 pmap_ledger_alloc_init(size_t size
)
3405 panic("%s: unsupported, "
3411 pmap_ledger_alloc(void)
3413 ledger_t retval
= NULL
;
3415 panic("%s: unsupported",
3422 pmap_ledger_free(ledger_t ledger
)
3424 panic("%s: unsupported, "
3430 * Create and return a physical map.
3432 * If the size specified for the map
3433 * is zero, the map is an actual physical
3434 * map, and may be referenced by the
3437 * If the size specified is non-zero,
3438 * the map will be used in software only, and
3439 * is bounded by that size.
3441 MARK_AS_PMAP_TEXT
static pmap_t
3442 pmap_create_internal(
3451 * A software use-only map doesn't even need a pmap.
3459 * Allocate a pmap struct from the pmap_zone. Then allocate
3460 * the translation table of the right size for the pmap.
3462 if ((p
= (pmap_t
) zalloc(pmap_zone
)) == PMAP_NULL
) {
3467 p
->min
= MACH_VM_MIN_ADDRESS
;
3468 p
->max
= MACH_VM_MAX_ADDRESS
;
3470 p
->min
= VM_MIN_ADDRESS
;
3471 p
->max
= VM_MAX_ADDRESS
;
3474 p
->nested_region_true_start
= 0;
3475 p
->nested_region_true_end
= ~0;
3479 p
->stamp
= hw_atomic_add(&pmap_stamp
, 1);
3480 p
->nx_enabled
= TRUE
;
3481 p
->is_64bit
= is_64bit
;
3483 p
->nested_pmap
= PMAP_NULL
;
3490 #if (__ARM_VMSA__ == 7)
3491 simple_lock_init(&p
->tt1_lock
, 0);
3494 memset((void *) &p
->stats
, 0, sizeof(p
->stats
));
3496 p
->tt_entry_free
= (tt_entry_t
*)0;
3498 p
->tte
= pmap_tt1_allocate(p
, PMAP_ROOT_ALLOC_SIZE
, 0);
3499 p
->ttep
= ml_static_vtop((vm_offset_t
)p
->tte
);
3500 PMAP_TRACE(3, PMAP_CODE(PMAP__TTE
), VM_KERNEL_ADDRHIDE(p
), VM_KERNEL_ADDRHIDE(p
->min
), VM_KERNEL_ADDRHIDE(p
->max
), p
->ttep
);
3502 #if (__ARM_VMSA__ == 7)
3503 p
->tte_index_max
= NTTES
;
3505 p
->tte_index_max
= (PMAP_ROOT_ALLOC_SIZE
/ sizeof(tt_entry_t
));
3507 p
->prev_tte
= (tt_entry_t
*) NULL
;
3509 /* nullify the translation table */
3510 for (i
= 0; i
< p
->tte_index_max
; i
++) {
3511 p
->tte
[i
] = ARM_TTE_TYPE_FAULT
;
3514 FLUSH_PTE_RANGE(p
->tte
, p
->tte
+ p
->tte_index_max
);
3517 p
->vasid
= alloc_asid();
3518 p
->asid
= p
->vasid
% ARM_MAX_ASID
;
3521 * initialize the rest of the structure
3523 p
->nested_region_grand_addr
= 0x0ULL
;
3524 p
->nested_region_subord_addr
= 0x0ULL
;
3525 p
->nested_region_size
= 0x0ULL
;
3526 p
->nested_region_asid_bitmap
= NULL
;
3527 p
->nested_region_asid_bitmap_size
= 0x0UL
;
3529 p
->nested_has_no_bounds_ref
= false;
3530 p
->nested_no_bounds_refcnt
= 0;
3531 p
->nested_bounds_set
= false;
3535 p
->pmap_stats_assert
= TRUE
;
3537 strlcpy(p
->pmap_procname
, "<nil>", sizeof(p
->pmap_procname
));
3538 #endif /* MACH_ASSERT */
3539 #if DEVELOPMENT || DEBUG
3540 p
->footprint_was_suspended
= FALSE
;
3541 #endif /* DEVELOPMENT || DEBUG */
3543 pmap_simple_lock(&pmaps_lock
);
3544 queue_enter(&map_pmap_list
, p
, pmap_t
, pmaps
);
3545 pmap_simple_unlock(&pmaps_lock
);
3558 PMAP_TRACE(1, PMAP_CODE(PMAP__CREATE
) | DBG_FUNC_START
, size
, is_64bit
);
3560 ledger_reference(ledger
);
3562 pmap
= pmap_create_internal(ledger
, size
, is_64bit
);
3564 if (pmap
== PMAP_NULL
) {
3565 ledger_dereference(ledger
);
3568 PMAP_TRACE(1, PMAP_CODE(PMAP__CREATE
) | DBG_FUNC_END
, VM_KERNEL_ADDRHIDE(pmap
), pmap
->vasid
, pmap
->asid
);
3574 MARK_AS_PMAP_TEXT
static void
3575 pmap_set_process_internal(
3576 __unused pmap_t pmap
,
3578 __unused
char *procname
)
3585 VALIDATE_PMAP(pmap
);
3587 pmap
->pmap_pid
= pid
;
3588 strlcpy(pmap
->pmap_procname
, procname
, sizeof(pmap
->pmap_procname
));
3589 if (pmap_ledgers_panic_leeway
) {
3592 * Some processes somehow trigger some issues that make
3593 * the pmap stats and ledgers go off track, causing
3594 * some assertion failures and ledger panics.
3595 * Turn off the sanity checks if we allow some ledger leeway
3596 * because of that. We'll still do a final check in
3597 * pmap_check_ledgers() for discrepancies larger than the
3598 * allowed leeway after the address space has been fully
3601 pmap
->pmap_stats_assert
= FALSE
;
3602 ledger_disable_panic_on_negative(pmap
->ledger
,
3603 task_ledgers
.phys_footprint
);
3604 ledger_disable_panic_on_negative(pmap
->ledger
,
3605 task_ledgers
.internal
);
3606 ledger_disable_panic_on_negative(pmap
->ledger
,
3607 task_ledgers
.internal_compressed
);
3608 ledger_disable_panic_on_negative(pmap
->ledger
,
3609 task_ledgers
.iokit_mapped
);
3610 ledger_disable_panic_on_negative(pmap
->ledger
,
3611 task_ledgers
.alternate_accounting
);
3612 ledger_disable_panic_on_negative(pmap
->ledger
,
3613 task_ledgers
.alternate_accounting_compressed
);
3615 #endif /* MACH_ASSERT */
3617 #endif /* MACH_ASSERT*/
3626 pmap_set_process_internal(pmap
, pid
, procname
);
3630 * We maintain stats and ledgers so that a task's physical footprint is:
3631 * phys_footprint = ((internal - alternate_accounting)
3632 * + (internal_compressed - alternate_accounting_compressed)
3634 * + purgeable_nonvolatile
3635 * + purgeable_nonvolatile_compressed
3637 * where "alternate_accounting" includes "iokit" and "purgeable" memory.
3641 uint64_t num_pmaps_checked
;
3643 int phys_footprint_over
;
3644 ledger_amount_t phys_footprint_over_total
;
3645 ledger_amount_t phys_footprint_over_max
;
3646 int phys_footprint_under
;
3647 ledger_amount_t phys_footprint_under_total
;
3648 ledger_amount_t phys_footprint_under_max
;
3651 ledger_amount_t internal_over_total
;
3652 ledger_amount_t internal_over_max
;
3654 ledger_amount_t internal_under_total
;
3655 ledger_amount_t internal_under_max
;
3657 int internal_compressed_over
;
3658 ledger_amount_t internal_compressed_over_total
;
3659 ledger_amount_t internal_compressed_over_max
;
3660 int internal_compressed_under
;
3661 ledger_amount_t internal_compressed_under_total
;
3662 ledger_amount_t internal_compressed_under_max
;
3664 int iokit_mapped_over
;
3665 ledger_amount_t iokit_mapped_over_total
;
3666 ledger_amount_t iokit_mapped_over_max
;
3667 int iokit_mapped_under
;
3668 ledger_amount_t iokit_mapped_under_total
;
3669 ledger_amount_t iokit_mapped_under_max
;
3671 int alternate_accounting_over
;
3672 ledger_amount_t alternate_accounting_over_total
;
3673 ledger_amount_t alternate_accounting_over_max
;
3674 int alternate_accounting_under
;
3675 ledger_amount_t alternate_accounting_under_total
;
3676 ledger_amount_t alternate_accounting_under_max
;
3678 int alternate_accounting_compressed_over
;
3679 ledger_amount_t alternate_accounting_compressed_over_total
;
3680 ledger_amount_t alternate_accounting_compressed_over_max
;
3681 int alternate_accounting_compressed_under
;
3682 ledger_amount_t alternate_accounting_compressed_under_total
;
3683 ledger_amount_t alternate_accounting_compressed_under_max
;
3685 int page_table_over
;
3686 ledger_amount_t page_table_over_total
;
3687 ledger_amount_t page_table_over_max
;
3688 int page_table_under
;
3689 ledger_amount_t page_table_under_total
;
3690 ledger_amount_t page_table_under_max
;
3692 int purgeable_volatile_over
;
3693 ledger_amount_t purgeable_volatile_over_total
;
3694 ledger_amount_t purgeable_volatile_over_max
;
3695 int purgeable_volatile_under
;
3696 ledger_amount_t purgeable_volatile_under_total
;
3697 ledger_amount_t purgeable_volatile_under_max
;
3699 int purgeable_nonvolatile_over
;
3700 ledger_amount_t purgeable_nonvolatile_over_total
;
3701 ledger_amount_t purgeable_nonvolatile_over_max
;
3702 int purgeable_nonvolatile_under
;
3703 ledger_amount_t purgeable_nonvolatile_under_total
;
3704 ledger_amount_t purgeable_nonvolatile_under_max
;
3706 int purgeable_volatile_compressed_over
;
3707 ledger_amount_t purgeable_volatile_compressed_over_total
;
3708 ledger_amount_t purgeable_volatile_compressed_over_max
;
3709 int purgeable_volatile_compressed_under
;
3710 ledger_amount_t purgeable_volatile_compressed_under_total
;
3711 ledger_amount_t purgeable_volatile_compressed_under_max
;
3713 int purgeable_nonvolatile_compressed_over
;
3714 ledger_amount_t purgeable_nonvolatile_compressed_over_total
;
3715 ledger_amount_t purgeable_nonvolatile_compressed_over_max
;
3716 int purgeable_nonvolatile_compressed_under
;
3717 ledger_amount_t purgeable_nonvolatile_compressed_under_total
;
3718 ledger_amount_t purgeable_nonvolatile_compressed_under_max
;
3720 int network_volatile_over
;
3721 ledger_amount_t network_volatile_over_total
;
3722 ledger_amount_t network_volatile_over_max
;
3723 int network_volatile_under
;
3724 ledger_amount_t network_volatile_under_total
;
3725 ledger_amount_t network_volatile_under_max
;
3727 int network_nonvolatile_over
;
3728 ledger_amount_t network_nonvolatile_over_total
;
3729 ledger_amount_t network_nonvolatile_over_max
;
3730 int network_nonvolatile_under
;
3731 ledger_amount_t network_nonvolatile_under_total
;
3732 ledger_amount_t network_nonvolatile_under_max
;
3734 int network_volatile_compressed_over
;
3735 ledger_amount_t network_volatile_compressed_over_total
;
3736 ledger_amount_t network_volatile_compressed_over_max
;
3737 int network_volatile_compressed_under
;
3738 ledger_amount_t network_volatile_compressed_under_total
;
3739 ledger_amount_t network_volatile_compressed_under_max
;
3741 int network_nonvolatile_compressed_over
;
3742 ledger_amount_t network_nonvolatile_compressed_over_total
;
3743 ledger_amount_t network_nonvolatile_compressed_over_max
;
3744 int network_nonvolatile_compressed_under
;
3745 ledger_amount_t network_nonvolatile_compressed_under_total
;
3746 ledger_amount_t network_nonvolatile_compressed_under_max
;
3747 } pmap_ledgers_drift
;
3748 #endif /* MACH_ASSERT */
3751 * Retire the given physical map from service.
3752 * Should only be called if the map contains
3753 * no valid mappings.
3755 MARK_AS_PMAP_TEXT
static void
3756 pmap_destroy_internal(
3759 if (pmap
== PMAP_NULL
) {
3763 VALIDATE_PMAP(pmap
);
3765 int32_t ref_count
= __c11_atomic_fetch_sub(&pmap
->ref_count
, 1, memory_order_relaxed
) - 1;
3766 if (ref_count
> 0) {
3768 } else if (ref_count
< 0) {
3769 panic("pmap %p: refcount underflow", pmap
);
3770 } else if (pmap
== kernel_pmap
) {
3771 panic("pmap %p: attempt to destroy kernel pmap", pmap
);
3774 #if (__ARM_VMSA__ == 7)
3778 pmap_simple_lock(&pmaps_lock
);
3779 while (pmap
->gc_status
& PMAP_GC_INFLIGHT
) {
3780 pmap
->gc_status
|= PMAP_GC_WAIT
;
3781 assert_wait((event_t
) &pmap
->gc_status
, THREAD_UNINT
);
3782 pmap_simple_unlock(&pmaps_lock
);
3783 (void) thread_block(THREAD_CONTINUE_NULL
);
3784 pmap_simple_lock(&pmaps_lock
);
3786 queue_remove(&map_pmap_list
, pmap
, pmap_t
, pmaps
);
3787 pmap_simple_unlock(&pmaps_lock
);
3789 if (pmap
->cpu_ref
!= 0) {
3790 panic("pmap_destroy(%p): cpu_ref = %u", pmap
, pmap
->cpu_ref
);
3793 pmap_trim_self(pmap
);
3796 * Free the memory maps, then the
3800 for (i
= 0; i
< pmap
->tte_index_max
; i
++) {
3801 ttep
= &pmap
->tte
[i
];
3802 if ((*ttep
& ARM_TTE_TYPE_MASK
) == ARM_TTE_TYPE_TABLE
) {
3803 pmap_tte_deallocate(pmap
, ttep
, PMAP_TT_L1_LEVEL
);
3809 pmap_tt1_deallocate(pmap
, pmap
->tte
, pmap
->tte_index_max
* sizeof(tt_entry_t
), 0);
3810 pmap
->tte
= (tt_entry_t
*) NULL
;
3812 pmap
->tte_index_max
= 0;
3814 if (pmap
->prev_tte
) {
3815 pmap_tt1_deallocate(pmap
, pmap
->prev_tte
, PMAP_ROOT_ALLOC_SIZE
, 0);
3816 pmap
->prev_tte
= (tt_entry_t
*) NULL
;
3818 assert((tt_free_entry_t
*)pmap
->tt_entry_free
== NULL
);
3820 flush_mmu_tlb_asid(pmap
->asid
);
3821 /* return its asid to the pool */
3822 free_asid(pmap
->vasid
);
3823 pmap_check_ledgers(pmap
);
3826 if (pmap
->nested_region_asid_bitmap
) {
3827 kfree(pmap
->nested_region_asid_bitmap
, pmap
->nested_region_asid_bitmap_size
* sizeof(unsigned int));
3829 zfree(pmap_zone
, pmap
);
3830 #else /* __ARM_VMSA__ == 7 */
3835 pmap_unmap_sharedpage(pmap
);
3837 pmap_simple_lock(&pmaps_lock
);
3838 while (pmap
->gc_status
& PMAP_GC_INFLIGHT
) {
3839 pmap
->gc_status
|= PMAP_GC_WAIT
;
3840 assert_wait((event_t
) &pmap
->gc_status
, THREAD_UNINT
);
3841 pmap_simple_unlock(&pmaps_lock
);
3842 (void) thread_block(THREAD_CONTINUE_NULL
);
3843 pmap_simple_lock(&pmaps_lock
);
3845 queue_remove(&map_pmap_list
, pmap
, pmap_t
, pmaps
);
3846 pmap_simple_unlock(&pmaps_lock
);
3848 pmap_trim_self(pmap
);
3851 * Free the memory maps, then the
3854 for (c
= pmap
->min
; c
< pmap
->max
; c
+= ARM_TT_L2_SIZE
) {
3855 ttep
= pmap_tt2e(pmap
, c
);
3856 if ((ttep
!= PT_ENTRY_NULL
) && (*ttep
& ARM_TTE_TYPE_MASK
) == ARM_TTE_TYPE_TABLE
) {
3858 pmap_tte_deallocate(pmap
, ttep
, PMAP_TT_L2_LEVEL
);
3862 #if !__ARM64_TWO_LEVEL_PMAP__
3863 for (c
= pmap
->min
; c
< pmap
->max
; c
+= ARM_TT_L1_SIZE
) {
3864 ttep
= pmap_tt1e(pmap
, c
);
3865 if ((ttep
!= PT_ENTRY_NULL
) && (*ttep
& ARM_TTE_TYPE_MASK
) == ARM_TTE_TYPE_TABLE
) {
3867 pmap_tte_deallocate(pmap
, ttep
, PMAP_TT_L1_LEVEL
);
3876 pmap_tt1_deallocate(pmap
, (tt_entry_t
*)phystokv(pa
), PMAP_ROOT_ALLOC_SIZE
, 0);
3879 assert((tt_free_entry_t
*)pmap
->tt_entry_free
== NULL
);
3880 flush_mmu_tlb_asid((uint64_t)(pmap
->asid
) << TLBI_ASID_SHIFT
);
3881 free_asid(pmap
->vasid
);
3883 if (pmap
->nested_region_asid_bitmap
) {
3884 kfree(pmap
->nested_region_asid_bitmap
, pmap
->nested_region_asid_bitmap_size
* sizeof(unsigned int));
3887 pmap_check_ledgers(pmap
);
3889 zfree(pmap_zone
, pmap
);
3891 #endif /* __ARM_VMSA__ == 7 */
3900 PMAP_TRACE(1, PMAP_CODE(PMAP__DESTROY
) | DBG_FUNC_START
, VM_KERNEL_ADDRHIDE(pmap
), pmap
->vasid
, pmap
->asid
);
3902 ledger
= pmap
->ledger
;
3904 pmap_destroy_internal(pmap
);
3906 ledger_dereference(ledger
);
3908 PMAP_TRACE(1, PMAP_CODE(PMAP__DESTROY
) | DBG_FUNC_END
);
3913 * Add a reference to the specified pmap.
3915 MARK_AS_PMAP_TEXT
static void
3916 pmap_reference_internal(
3919 if (pmap
!= PMAP_NULL
) {
3920 VALIDATE_PMAP(pmap
);
3921 __c11_atomic_fetch_add(&pmap
->ref_count
, 1, memory_order_relaxed
);
3929 pmap_reference_internal(pmap
);
3939 tt_free_entry_t
*tt1_free
;
3942 vm_address_t va_end
;
3945 pmap_simple_lock(&pmaps_lock
);
3946 if ((size
== PAGE_SIZE
) && (free_page_size_tt_count
!= 0)) {
3947 free_page_size_tt_count
--;
3948 tt1
= (tt_entry_t
*)free_page_size_tt_list
;
3949 free_page_size_tt_list
= ((tt_free_entry_t
*)tt1
)->next
;
3950 pmap_simple_unlock(&pmaps_lock
);
3951 pmap_tt_ledger_credit(pmap
, size
);
3952 return (tt_entry_t
*)tt1
;
3955 if ((size
== 2 * PAGE_SIZE
) && (free_two_page_size_tt_count
!= 0)) {
3956 free_two_page_size_tt_count
--;
3957 tt1
= (tt_entry_t
*)free_two_page_size_tt_list
;
3958 free_two_page_size_tt_list
= ((tt_free_entry_t
*)tt1
)->next
;
3959 pmap_simple_unlock(&pmaps_lock
);
3960 pmap_tt_ledger_credit(pmap
, size
);
3961 return (tt_entry_t
*)tt1
;
3964 if (free_tt_count
!= 0) {
3966 tt1
= (tt_entry_t
*)free_tt_list
;
3967 free_tt_list
= (tt_free_entry_t
*)((tt_free_entry_t
*)tt1
)->next
;
3968 pmap_simple_unlock(&pmaps_lock
);
3969 pmap_tt_ledger_credit(pmap
, size
);
3970 return (tt_entry_t
*)tt1
;
3973 pmap_simple_unlock(&pmaps_lock
);
3975 ret
= pmap_pages_alloc(&pa
, (unsigned)((size
< PAGE_SIZE
)? PAGE_SIZE
: size
), ((option
& PMAP_TT_ALLOCATE_NOWAIT
)? PMAP_PAGES_ALLOCATE_NOWAIT
: 0));
3977 if (ret
== KERN_RESOURCE_SHORTAGE
) {
3978 return (tt_entry_t
*)0;
3982 if (size
< PAGE_SIZE
) {
3983 pmap_simple_lock(&pmaps_lock
);
3985 for (va_end
= phystokv(pa
) + PAGE_SIZE
, va
= phystokv(pa
) + size
; va
< va_end
; va
= va
+ size
) {
3986 tt1_free
= (tt_free_entry_t
*)va
;
3987 tt1_free
->next
= free_tt_list
;
3988 free_tt_list
= tt1_free
;
3991 if (free_tt_count
> free_tt_max
) {
3992 free_tt_max
= free_tt_count
;
3995 pmap_simple_unlock(&pmaps_lock
);
3998 /* Always report root allocations in units of PMAP_ROOT_ALLOC_SIZE, which can be obtained by sysctl arm_pt_root_size.
3999 * Depending on the device, this can vary between 512b and 16K. */
4000 OSAddAtomic((uint32_t)(size
/ PMAP_ROOT_ALLOC_SIZE
), (pmap
== kernel_pmap
? &inuse_kernel_tteroot_count
: &inuse_user_tteroot_count
));
4001 OSAddAtomic64(size
/ PMAP_ROOT_ALLOC_SIZE
, &alloc_tteroot_count
);
4002 pmap_tt_ledger_credit(pmap
, size
);
4004 return (tt_entry_t
*) phystokv(pa
);
4008 pmap_tt1_deallocate(
4014 tt_free_entry_t
*tt_entry
;
4016 tt_entry
= (tt_free_entry_t
*)tt
;
4018 pmap_simple_lock(&pmaps_lock
);
4021 if (size
< PAGE_SIZE
) {
4023 if (free_tt_count
> free_tt_max
) {
4024 free_tt_max
= free_tt_count
;
4026 tt_entry
->next
= free_tt_list
;
4027 free_tt_list
= tt_entry
;
4030 if (size
== PAGE_SIZE
) {
4031 free_page_size_tt_count
++;
4032 if (free_page_size_tt_count
> free_page_size_tt_max
) {
4033 free_page_size_tt_max
= free_page_size_tt_count
;
4035 tt_entry
->next
= free_page_size_tt_list
;
4036 free_page_size_tt_list
= tt_entry
;
4039 if (size
== 2 * PAGE_SIZE
) {
4040 free_two_page_size_tt_count
++;
4041 if (free_two_page_size_tt_count
> free_two_page_size_tt_max
) {
4042 free_two_page_size_tt_max
= free_two_page_size_tt_count
;
4044 tt_entry
->next
= free_two_page_size_tt_list
;
4045 free_two_page_size_tt_list
= tt_entry
;
4048 if ((option
& PMAP_TT_DEALLOCATE_NOBLOCK
) || (!not_in_kdp
)) {
4050 pmap_simple_unlock(&pmaps_lock
);
4052 pmap_tt_ledger_debit(pmap
, size
);
4056 while (free_page_size_tt_count
> FREE_PAGE_SIZE_TT_MAX
) {
4057 free_page_size_tt_count
--;
4058 tt
= (tt_entry_t
*)free_page_size_tt_list
;
4059 free_page_size_tt_list
= ((tt_free_entry_t
*)tt
)->next
;
4061 pmap_simple_unlock(&pmaps_lock
);
4063 pmap_pages_free(ml_static_vtop((vm_offset_t
)tt
), PAGE_SIZE
);
4065 OSAddAtomic(-(int32_t)(PAGE_SIZE
/ PMAP_ROOT_ALLOC_SIZE
), (pmap
== kernel_pmap
? &inuse_kernel_tteroot_count
: &inuse_user_tteroot_count
));
4067 pmap_simple_lock(&pmaps_lock
);
4070 while (free_two_page_size_tt_count
> FREE_TWO_PAGE_SIZE_TT_MAX
) {
4071 free_two_page_size_tt_count
--;
4072 tt
= (tt_entry_t
*)free_two_page_size_tt_list
;
4073 free_two_page_size_tt_list
= ((tt_free_entry_t
*)tt
)->next
;
4075 pmap_simple_unlock(&pmaps_lock
);
4077 pmap_pages_free(ml_static_vtop((vm_offset_t
)tt
), 2 * PAGE_SIZE
);
4079 OSAddAtomic(-2 * (int32_t)(PAGE_SIZE
/ PMAP_ROOT_ALLOC_SIZE
), (pmap
== kernel_pmap
? &inuse_kernel_tteroot_count
: &inuse_user_tteroot_count
));
4081 pmap_simple_lock(&pmaps_lock
);
4083 pmap_simple_unlock(&pmaps_lock
);
4084 pmap_tt_ledger_debit(pmap
, size
);
4087 static kern_return_t
4092 unsigned int options
)
4098 if ((tt_free_entry_t
*)pmap
->tt_entry_free
!= NULL
) {
4099 tt_free_entry_t
*tt_free_next
;
4101 tt_free_next
= ((tt_free_entry_t
*)pmap
->tt_entry_free
)->next
;
4102 *ttp
= (tt_entry_t
*)pmap
->tt_entry_free
;
4103 pmap
->tt_entry_free
= (tt_entry_t
*)tt_free_next
;
4111 * Allocate a VM page for the level x page table entries.
4113 while (pmap_pages_alloc(&pa
, PAGE_SIZE
, ((options
& PMAP_TT_ALLOCATE_NOWAIT
)? PMAP_PAGES_ALLOCATE_NOWAIT
: 0)) != KERN_SUCCESS
) {
4114 if (options
& PMAP_OPTIONS_NOWAIT
) {
4115 return KERN_RESOURCE_SHORTAGE
;
4120 while ((ptdp
= ptd_alloc(pmap
, false)) == NULL
) {
4121 if (options
& PMAP_OPTIONS_NOWAIT
) {
4122 pmap_pages_free(pa
, PAGE_SIZE
);
4123 return KERN_RESOURCE_SHORTAGE
;
4128 if (level
< PMAP_TT_MAX_LEVEL
) {
4129 OSAddAtomic64(1, &alloc_ttepages_count
);
4130 OSAddAtomic(1, (pmap
== kernel_pmap
? &inuse_kernel_ttepages_count
: &inuse_user_ttepages_count
));
4132 OSAddAtomic64(1, &alloc_ptepages_count
);
4133 OSAddAtomic(1, (pmap
== kernel_pmap
? &inuse_kernel_ptepages_count
: &inuse_user_ptepages_count
));
4136 pmap_tt_ledger_credit(pmap
, PAGE_SIZE
);
4138 PMAP_ZINFO_PALLOC(pmap
, PAGE_SIZE
);
4140 pvh_update_head_unlocked(pai_to_pvh(pa_index(pa
)), ptdp
, PVH_TYPE_PTDP
);
4142 __unreachable_ok_push
4143 if (TEST_PAGE_RATIO_4
) {
4145 vm_address_t va_end
;
4149 for (va_end
= phystokv(pa
) + PAGE_SIZE
, va
= phystokv(pa
) + ARM_PGBYTES
; va
< va_end
; va
= va
+ ARM_PGBYTES
) {
4150 ((tt_free_entry_t
*)va
)->next
= (tt_free_entry_t
*)pmap
->tt_entry_free
;
4151 pmap
->tt_entry_free
= (tt_entry_t
*)va
;
4155 __unreachable_ok_pop
4157 *ttp
= (tt_entry_t
*)phystokv(pa
);
4161 return KERN_SUCCESS
;
4172 unsigned pt_acc_cnt
;
4173 unsigned i
, max_pt_index
= PAGE_RATIO
;
4174 vm_offset_t free_page
= 0;
4178 ptdp
= ptep_get_ptd((vm_offset_t
)ttp
);
4180 ptdp
->pt_map
[ARM_PT_DESC_INDEX(ttp
)].va
= (vm_offset_t
)-1;
4182 if ((level
< PMAP_TT_MAX_LEVEL
) && (ptdp
->pt_cnt
[ARM_PT_DESC_INDEX(ttp
)].refcnt
== PT_DESC_REFCOUNT
)) {
4183 ptdp
->pt_cnt
[ARM_PT_DESC_INDEX(ttp
)].refcnt
= 0;
4186 if (ptdp
->pt_cnt
[ARM_PT_DESC_INDEX(ttp
)].refcnt
!= 0) {
4187 panic("pmap_tt_deallocate(): ptdp %p, count %d\n", ptdp
, ptdp
->pt_cnt
[ARM_PT_DESC_INDEX(ttp
)].refcnt
);
4190 ptdp
->pt_cnt
[ARM_PT_DESC_INDEX(ttp
)].refcnt
= 0;
4192 for (i
= 0, pt_acc_cnt
= 0; i
< max_pt_index
; i
++) {
4193 pt_acc_cnt
+= ptdp
->pt_cnt
[i
].refcnt
;
4196 if (pt_acc_cnt
== 0) {
4197 tt_free_entry_t
*tt_free_list
= (tt_free_entry_t
*)&pmap
->tt_entry_free
;
4198 unsigned pt_free_entry_cnt
= 1;
4200 while (pt_free_entry_cnt
< max_pt_index
&& tt_free_list
) {
4201 tt_free_entry_t
*tt_free_list_next
;
4203 tt_free_list_next
= tt_free_list
->next
;
4204 if ((((vm_offset_t
)tt_free_list_next
) - ((vm_offset_t
)ttp
& ~PAGE_MASK
)) < PAGE_SIZE
) {
4205 pt_free_entry_cnt
++;
4207 tt_free_list
= tt_free_list_next
;
4209 if (pt_free_entry_cnt
== max_pt_index
) {
4210 tt_free_entry_t
*tt_free_list_cur
;
4212 free_page
= (vm_offset_t
)ttp
& ~PAGE_MASK
;
4213 tt_free_list
= (tt_free_entry_t
*)&pmap
->tt_entry_free
;
4214 tt_free_list_cur
= (tt_free_entry_t
*)&pmap
->tt_entry_free
;
4216 while (tt_free_list_cur
) {
4217 tt_free_entry_t
*tt_free_list_next
;
4219 tt_free_list_next
= tt_free_list_cur
->next
;
4220 if ((((vm_offset_t
)tt_free_list_next
) - free_page
) < PAGE_SIZE
) {
4221 tt_free_list
->next
= tt_free_list_next
->next
;
4223 tt_free_list
= tt_free_list_next
;
4225 tt_free_list_cur
= tt_free_list_next
;
4228 ((tt_free_entry_t
*)ttp
)->next
= (tt_free_entry_t
*)pmap
->tt_entry_free
;
4229 pmap
->tt_entry_free
= ttp
;
4232 ((tt_free_entry_t
*)ttp
)->next
= (tt_free_entry_t
*)pmap
->tt_entry_free
;
4233 pmap
->tt_entry_free
= ttp
;
4238 if (free_page
!= 0) {
4239 ptd_deallocate(ptep_get_ptd((vm_offset_t
)free_page
));
4240 *(pt_desc_t
**)pai_to_pvh(pa_index(ml_static_vtop(free_page
))) = NULL
;
4241 pmap_pages_free(ml_static_vtop(free_page
), PAGE_SIZE
);
4242 if (level
< PMAP_TT_MAX_LEVEL
) {
4243 OSAddAtomic(-1, (pmap
== kernel_pmap
? &inuse_kernel_ttepages_count
: &inuse_user_ttepages_count
));
4245 OSAddAtomic(-1, (pmap
== kernel_pmap
? &inuse_kernel_ptepages_count
: &inuse_user_ptepages_count
));
4247 PMAP_ZINFO_PFREE(pmap
, PAGE_SIZE
);
4248 pmap_tt_ledger_debit(pmap
, PAGE_SIZE
);
4258 tt_entry_t tte
= *ttep
;
4261 panic("pmap_tte_deallocate(): null tt_entry ttep==%p\n", ttep
);
4264 if (((level
+ 1) == PMAP_TT_MAX_LEVEL
) && (tte_get_ptd(tte
)->pt_cnt
[ARM_PT_DESC_INDEX(ttetokv(*ttep
))].refcnt
!= 0)) {
4265 panic("pmap_tte_deallocate(): pmap=%p ttep=%p ptd=%p refcnt=0x%x \n", pmap
, ttep
,
4266 tte_get_ptd(tte
), (tte_get_ptd(tte
)->pt_cnt
[ARM_PT_DESC_INDEX(ttetokv(*ttep
))].refcnt
));
4269 #if (__ARM_VMSA__ == 7)
4271 tt_entry_t
*ttep_4M
= (tt_entry_t
*) ((vm_offset_t
)ttep
& 0xFFFFFFF0);
4274 for (i
= 0; i
< 4; i
++, ttep_4M
++) {
4275 *ttep_4M
= (tt_entry_t
) 0;
4277 FLUSH_PTE_RANGE_STRONG(ttep_4M
- 4, ttep_4M
);
4280 *ttep
= (tt_entry_t
) 0;
4281 FLUSH_PTE_STRONG(ttep
);
4286 pmap_tte_deallocate(
4294 PMAP_ASSERT_LOCKED(pmap
);
4299 if (tte_get_ptd(tte
)->pmap
!= pmap
) {
4300 panic("pmap_tte_deallocate(): ptd=%p ptd->pmap=%p pmap=%p \n",
4301 tte_get_ptd(tte
), tte_get_ptd(tte
)->pmap
, pmap
);
4305 pmap_tte_remove(pmap
, ttep
, level
);
4307 if ((tte
& ARM_TTE_TYPE_MASK
) == ARM_TTE_TYPE_TABLE
) {
4310 pt_entry_t
*pte_p
= ((pt_entry_t
*) (ttetokv(tte
) & ~ARM_PGMASK
));
4313 for (i
= 0; i
< (ARM_PGBYTES
/ sizeof(*pte_p
)); i
++, pte_p
++) {
4314 if (ARM_PTE_IS_COMPRESSED(*pte_p
)) {
4315 panic("pmap_tte_deallocate: tte=0x%llx pmap=%p, pte_p=%p, pte=0x%llx compressed\n",
4316 (uint64_t)tte
, pmap
, pte_p
, (uint64_t)(*pte_p
));
4317 } else if (((*pte_p
) & ARM_PTE_TYPE_MASK
) != ARM_PTE_TYPE_FAULT
) {
4318 panic("pmap_tte_deallocate: tte=0x%llx pmap=%p, pte_p=%p, pte=0x%llx\n",
4319 (uint64_t)tte
, pmap
, pte_p
, (uint64_t)(*pte_p
));
4326 /* Clear any page offset: we mean to free the whole page, but armv7 TTEs may only be
4327 * aligned on 1K boundaries. We clear the surrounding "chunk" of 4 TTEs above. */
4328 pa
= tte_to_pa(tte
) & ~ARM_PGMASK
;
4329 pmap_tt_deallocate(pmap
, (tt_entry_t
*) phystokv(pa
), level
+ 1);
4335 * Remove a range of hardware page-table entries.
4336 * The entries given are the first (inclusive)
4337 * and last (exclusive) entries for the VM pages.
4338 * The virtual address is the va for the first pte.
4340 * The pmap must be locked.
4341 * If the pmap is not the kernel pmap, the range must lie
4342 * entirely within one pte-page. This is NOT checked.
4343 * Assumes that the pte-page exists.
4345 * Returns the number of PTE changed, and sets *rmv_cnt
4346 * to the number of SPTE changed.
4351 vm_map_address_t va
,
4356 return pmap_remove_range_options(pmap
, va
, bpte
, epte
, rmv_cnt
,
4357 PMAP_OPTIONS_REMOVE
);
4361 #ifdef PVH_FLAG_EXEC
4364 * Update the access protection bits of the physical aperture mapping for a page.
4365 * This is useful, for example, in guranteeing that a verified executable page
4366 * has no writable mappings anywhere in the system, including the physical
4367 * aperture. flush_tlb_async can be set to true to avoid unnecessary TLB
4368 * synchronization overhead in cases where the call to this function is
4369 * guaranteed to be followed by other TLB operations.
4372 pmap_set_ptov_ap(unsigned int pai __unused
, unsigned int ap __unused
, boolean_t flush_tlb_async __unused
)
4374 #if __ARM_PTE_PHYSMAP__
4375 ASSERT_PVH_LOCKED(pai
);
4376 vm_offset_t kva
= phystokv(vm_first_phys
+ (pmap_paddr_t
)ptoa(pai
));
4377 pt_entry_t
*pte_p
= pmap_pte(kernel_pmap
, kva
);
4379 pt_entry_t tmplate
= *pte_p
;
4380 if ((tmplate
& ARM_PTE_APMASK
) == ARM_PTE_AP(ap
)) {
4383 tmplate
= (tmplate
& ~ARM_PTE_APMASK
) | ARM_PTE_AP(ap
);
4384 #if (__ARM_VMSA__ > 7)
4385 if (tmplate
& ARM_PTE_HINT_MASK
) {
4386 panic("%s: physical aperture PTE %p has hint bit set, va=%p, pte=0x%llx",
4387 __func__
, pte_p
, (void *)kva
, tmplate
);
4390 WRITE_PTE_STRONG(pte_p
, tmplate
);
4391 flush_mmu_tlb_region_asid_async(kva
, PAGE_SIZE
, kernel_pmap
);
4392 if (!flush_tlb_async
) {
4398 #endif /* defined(PVH_FLAG_EXEC) */
4406 int *num_alt_internal
,
4410 pv_entry_t
**pv_h
, **pve_pp
;
4413 ASSERT_PVH_LOCKED(pai
);
4414 pv_h
= pai_to_pvh(pai
);
4415 vm_offset_t pvh_flags
= pvh_get_flags(pv_h
);
4418 if (pvh_test_type(pv_h
, PVH_TYPE_PTEP
)) {
4419 if (__builtin_expect((cpte
!= pvh_ptep(pv_h
)), 0)) {
4420 panic("%s: cpte=%p does not match pv_h=%p (%p), pai=0x%x\n", __func__
, cpte
, pv_h
, pvh_ptep(pv_h
), pai
);
4422 if (IS_ALTACCT_PAGE(pai
, PV_ENTRY_NULL
)) {
4423 assert(IS_INTERNAL_PAGE(pai
));
4425 (*num_alt_internal
)++;
4426 CLR_ALTACCT_PAGE(pai
, PV_ENTRY_NULL
);
4427 } else if (IS_INTERNAL_PAGE(pai
)) {
4428 if (IS_REUSABLE_PAGE(pai
)) {
4436 pvh_update_head(pv_h
, PV_ENTRY_NULL
, PVH_TYPE_NULL
);
4437 } else if (pvh_test_type(pv_h
, PVH_TYPE_PVEP
)) {
4439 pve_p
= pvh_list(pv_h
);
4441 while (pve_p
!= PV_ENTRY_NULL
&&
4442 (pve_get_ptep(pve_p
) != cpte
)) {
4443 pve_pp
= pve_link_field(pve_p
);
4444 pve_p
= PVE_NEXT_PTR(pve_next(pve_p
));
4447 if (__builtin_expect((pve_p
== PV_ENTRY_NULL
), 0)) {
4448 panic("%s: cpte=%p (pai=0x%x) not in pv_h=%p\n", __func__
, cpte
, pai
, pv_h
);
4452 if ((pmap
!= NULL
) && (kern_feature_override(KF_PMAPV_OVRD
) == FALSE
)) {
4453 pv_entry_t
*check_pve_p
= PVE_NEXT_PTR(pve_next(pve_p
));
4454 while (check_pve_p
!= PV_ENTRY_NULL
) {
4455 if (pve_get_ptep(check_pve_p
) == cpte
) {
4456 panic("%s: duplicate pve entry cpte=%p pmap=%p, pv_h=%p, pve_p=%p, pai=0x%x",
4457 __func__
, cpte
, pmap
, pv_h
, pve_p
, pai
);
4459 check_pve_p
= PVE_NEXT_PTR(pve_next(check_pve_p
));
4464 if (IS_ALTACCT_PAGE(pai
, pve_p
)) {
4465 assert(IS_INTERNAL_PAGE(pai
));
4467 (*num_alt_internal
)++;
4468 CLR_ALTACCT_PAGE(pai
, pve_p
);
4469 } else if (IS_INTERNAL_PAGE(pai
)) {
4470 if (IS_REUSABLE_PAGE(pai
)) {
4479 pvh_remove(pv_h
, pve_pp
, pve_p
);
4481 if (!pvh_test_type(pv_h
, PVH_TYPE_NULL
)) {
4482 pvh_set_flags(pv_h
, pvh_flags
);
4485 panic("%s: unexpected PV head %p, cpte=%p pmap=%p pv_h=%p pai=0x%x",
4486 __func__
, *pv_h
, cpte
, pmap
, pv_h
, pai
);
4489 #ifdef PVH_FLAG_EXEC
4490 if ((pvh_flags
& PVH_FLAG_EXEC
) && pvh_test_type(pv_h
, PVH_TYPE_NULL
)) {
4491 pmap_set_ptov_ap(pai
, AP_RWNA
, FALSE
);
4497 pmap_remove_range_options(
4499 vm_map_address_t va
,
4506 int num_removed
, num_unwired
;
4507 int num_pte_changed
;
4510 int num_external
, num_internal
, num_reusable
;
4511 int num_alt_internal
;
4512 uint64_t num_compressed
, num_alt_compressed
;
4514 PMAP_ASSERT_LOCKED(pmap
);
4518 num_pte_changed
= 0;
4523 num_alt_internal
= 0;
4524 num_alt_compressed
= 0;
4526 for (cpte
= bpte
; cpte
< epte
;
4527 cpte
+= PAGE_SIZE
/ ARM_PGBYTES
, va
+= PAGE_SIZE
) {
4529 boolean_t managed
= FALSE
;
4534 if (pgtrace_enabled
) {
4535 pmap_pgtrace_remove_clone(pmap
, pte_to_pa(spte
), va
);
4540 if (pmap
!= kernel_pmap
&&
4541 (options
& PMAP_OPTIONS_REMOVE
) &&
4542 (ARM_PTE_IS_COMPRESSED(spte
))) {
4544 * "pmap" must be locked at this point,
4545 * so this should not race with another
4546 * pmap_remove_range() or pmap_enter().
4549 /* one less "compressed"... */
4551 if (spte
& ARM_PTE_COMPRESSED_ALT
) {
4552 /* ... but it used to be "ALTACCT" */
4553 num_alt_compressed
++;
4557 WRITE_PTE_FAST(cpte
, ARM_PTE_TYPE_FAULT
);
4559 * "refcnt" also accounts for
4560 * our "compressed" markers,
4561 * so let's update it here.
4563 if (OSAddAtomic16(-1, (SInt16
*) &(ptep_get_ptd(cpte
)->pt_cnt
[ARM_PT_DESC_INDEX(cpte
)].refcnt
)) <= 0) {
4564 panic("pmap_remove_range_options: over-release of ptdp %p for pte %p\n", ptep_get_ptd(cpte
), cpte
);
4569 * It may be possible for the pte to transition from managed
4570 * to unmanaged in this timeframe; for now, elide the assert.
4571 * We should break out as a consequence of checking pa_valid.
4573 //assert(!ARM_PTE_IS_COMPRESSED(spte));
4574 pa
= pte_to_pa(spte
);
4575 if (!pa_valid(pa
)) {
4578 pai
= (int)pa_index(pa
);
4581 pa
= pte_to_pa(spte
);
4582 if (pai
== (int)pa_index(pa
)) {
4584 break; // Leave pai locked as we will unlock it after we free the PV entry
4589 if (ARM_PTE_IS_COMPRESSED(*cpte
)) {
4591 * There used to be a valid mapping here but it
4592 * has already been removed when the page was
4593 * sent to the VM compressor, so nothing left to
4599 /* remove the translation, do not flush the TLB */
4600 if (*cpte
!= ARM_PTE_TYPE_FAULT
) {
4601 assert(!ARM_PTE_IS_COMPRESSED(*cpte
));
4603 if (managed
&& (pmap
!= kernel_pmap
) && (ptep_get_va(cpte
) != va
)) {
4604 panic("pmap_remove_range_options(): cpte=%p ptd=%p pte=0x%llx va=0x%llx\n",
4605 cpte
, ptep_get_ptd(cpte
), (uint64_t)*cpte
, (uint64_t)va
);
4608 WRITE_PTE_FAST(cpte
, ARM_PTE_TYPE_FAULT
);
4612 if ((spte
!= ARM_PTE_TYPE_FAULT
) &&
4613 (pmap
!= kernel_pmap
)) {
4614 assert(!ARM_PTE_IS_COMPRESSED(spte
));
4615 if (OSAddAtomic16(-1, (SInt16
*) &(ptep_get_ptd(cpte
)->pt_cnt
[ARM_PT_DESC_INDEX(cpte
)].refcnt
)) <= 0) {
4616 panic("pmap_remove_range_options: over-release of ptdp %p for pte %p\n", ptep_get_ptd(cpte
), cpte
);
4623 if (pte_is_wired(spte
)) {
4624 pte_set_wired(cpte
, 0);
4628 * if not managed, we're done
4634 * find and remove the mapping from the chain for this
4638 pmap_remove_pv(pmap
, cpte
, pai
, &num_internal
, &num_alt_internal
, &num_reusable
, &num_external
);
4647 OSAddAtomic(-num_removed
, (SInt32
*) &pmap
->stats
.resident_count
);
4648 pmap_ledger_debit(pmap
, task_ledgers
.phys_mem
, machine_ptob(num_removed
));
4650 if (pmap
!= kernel_pmap
) {
4651 /* sanity checks... */
4653 if (pmap
->stats
.internal
< num_internal
) {
4654 if ((!pmap_stats_assert
||
4655 !pmap
->pmap_stats_assert
)) {
4656 printf("%d[%s] pmap_remove_range_options(%p,0x%llx,%p,%p,0x%x): num_internal=%d num_removed=%d num_unwired=%d num_external=%d num_reusable=%d num_compressed=%lld num_alt_internal=%d num_alt_compressed=%lld num_pte_changed=%d stats.internal=%d stats.reusable=%d\n",
4658 pmap
->pmap_procname
,
4673 pmap
->stats
.internal
,
4674 pmap
->stats
.reusable
);
4676 panic("%d[%s] pmap_remove_range_options(%p,0x%llx,%p,%p,0x%x): num_internal=%d num_removed=%d num_unwired=%d num_external=%d num_reusable=%d num_compressed=%lld num_alt_internal=%d num_alt_compressed=%lld num_pte_changed=%d stats.internal=%d stats.reusable=%d",
4678 pmap
->pmap_procname
,
4693 pmap
->stats
.internal
,
4694 pmap
->stats
.reusable
);
4697 #endif /* MACH_ASSERT */
4698 PMAP_STATS_ASSERTF(pmap
->stats
.external
>= num_external
,
4700 "pmap=%p num_external=%d stats.external=%d",
4701 pmap
, num_external
, pmap
->stats
.external
);
4702 PMAP_STATS_ASSERTF(pmap
->stats
.internal
>= num_internal
,
4704 "pmap=%p num_internal=%d stats.internal=%d num_reusable=%d stats.reusable=%d",
4706 num_internal
, pmap
->stats
.internal
,
4707 num_reusable
, pmap
->stats
.reusable
);
4708 PMAP_STATS_ASSERTF(pmap
->stats
.reusable
>= num_reusable
,
4710 "pmap=%p num_internal=%d stats.internal=%d num_reusable=%d stats.reusable=%d",
4712 num_internal
, pmap
->stats
.internal
,
4713 num_reusable
, pmap
->stats
.reusable
);
4714 PMAP_STATS_ASSERTF(pmap
->stats
.compressed
>= num_compressed
,
4716 "pmap=%p num_compressed=%lld num_alt_compressed=%lld stats.compressed=%lld",
4717 pmap
, num_compressed
, num_alt_compressed
,
4718 pmap
->stats
.compressed
);
4720 /* update pmap stats... */
4721 OSAddAtomic(-num_unwired
, (SInt32
*) &pmap
->stats
.wired_count
);
4723 OSAddAtomic(-num_external
, &pmap
->stats
.external
);
4726 OSAddAtomic(-num_internal
, &pmap
->stats
.internal
);
4729 OSAddAtomic(-num_reusable
, &pmap
->stats
.reusable
);
4731 if (num_compressed
) {
4732 OSAddAtomic64(-num_compressed
, &pmap
->stats
.compressed
);
4734 /* ... and ledgers */
4735 pmap_ledger_debit(pmap
, task_ledgers
.wired_mem
, machine_ptob(num_unwired
));
4736 pmap_ledger_debit(pmap
, task_ledgers
.internal
, machine_ptob(num_internal
));
4737 pmap_ledger_debit(pmap
, task_ledgers
.alternate_accounting
, machine_ptob(num_alt_internal
));
4738 pmap_ledger_debit(pmap
, task_ledgers
.alternate_accounting_compressed
, machine_ptob(num_alt_compressed
));
4739 pmap_ledger_debit(pmap
, task_ledgers
.internal_compressed
, machine_ptob(num_compressed
));
4740 /* make needed adjustments to phys_footprint */
4741 pmap_ledger_debit(pmap
, task_ledgers
.phys_footprint
,
4742 machine_ptob((num_internal
-
4745 num_alt_compressed
)));
4748 /* flush the ptable entries we have written */
4749 if (num_pte_changed
> 0) {
4750 FLUSH_PTE_RANGE_STRONG(bpte
, epte
);
4753 return num_pte_changed
;
4758 * Remove the given range of addresses
4759 * from the specified map.
4761 * It is assumed that the start and end are properly
4762 * rounded to the hardware page size.
4767 vm_map_address_t start
,
4768 vm_map_address_t end
)
4770 pmap_remove_options(pmap
, start
, end
, PMAP_OPTIONS_REMOVE
);
4773 MARK_AS_PMAP_TEXT
static int
4774 pmap_remove_options_internal(
4776 vm_map_address_t start
,
4777 vm_map_address_t end
,
4780 int remove_count
= 0;
4781 pt_entry_t
*bpte
, *epte
;
4784 uint32_t rmv_spte
= 0;
4786 if (__improbable(end
< start
)) {
4787 panic("%s: invalid address range %p, %p", __func__
, (void*)start
, (void*)end
);
4790 VALIDATE_PMAP(pmap
);
4793 tte_p
= pmap_tte(pmap
, start
);
4795 if (tte_p
== (tt_entry_t
*) NULL
) {
4799 if ((*tte_p
& ARM_TTE_TYPE_MASK
) == ARM_TTE_TYPE_TABLE
) {
4800 pte_p
= (pt_entry_t
*) ttetokv(*tte_p
);
4801 bpte
= &pte_p
[ptenum(start
)];
4802 epte
= bpte
+ ((end
- start
) >> ARM_TT_LEAF_SHIFT
);
4804 remove_count
+= pmap_remove_range_options(pmap
, start
, bpte
, epte
,
4805 &rmv_spte
, options
);
4807 #if (__ARM_VMSA__ == 7)
4808 if (rmv_spte
&& (ptep_get_ptd(pte_p
)->pt_cnt
[ARM_PT_DESC_INDEX(pte_p
)].refcnt
== 0) &&
4809 (pmap
!= kernel_pmap
) && (pmap
->nested
== FALSE
)) {
4810 pmap_tte_deallocate(pmap
, tte_p
, PMAP_TT_L1_LEVEL
);
4811 flush_mmu_tlb_entry((start
& ~ARM_TT_L1_OFFMASK
) | (pmap
->asid
& 0xff));
4814 if (rmv_spte
&& (ptep_get_ptd(pte_p
)->pt_cnt
[ARM_PT_DESC_INDEX(pte_p
)].refcnt
== 0) &&
4815 (pmap
!= kernel_pmap
) && (pmap
->nested
== FALSE
)) {
4816 pmap_tte_deallocate(pmap
, tte_p
, PMAP_TT_L2_LEVEL
);
4817 flush_mmu_tlb_entry(tlbi_addr(start
& ~ARM_TT_L2_OFFMASK
) | tlbi_asid(pmap
->asid
));
4824 return remove_count
;
4828 pmap_remove_options(
4830 vm_map_address_t start
,
4831 vm_map_address_t end
,
4834 int remove_count
= 0;
4835 vm_map_address_t va
;
4837 if (pmap
== PMAP_NULL
) {
4841 PMAP_TRACE(2, PMAP_CODE(PMAP__REMOVE
) | DBG_FUNC_START
,
4842 VM_KERNEL_ADDRHIDE(pmap
), VM_KERNEL_ADDRHIDE(start
),
4843 VM_KERNEL_ADDRHIDE(end
));
4846 if ((start
| end
) & PAGE_MASK
) {
4847 panic("pmap_remove_options() pmap %p start 0x%llx end 0x%llx\n",
4848 pmap
, (uint64_t)start
, (uint64_t)end
);
4850 if ((end
< start
) || (start
< pmap
->min
) || (end
> pmap
->max
)) {
4851 panic("pmap_remove_options(): invalid address range, pmap=%p, start=0x%llx, end=0x%llx\n",
4852 pmap
, (uint64_t)start
, (uint64_t)end
);
4857 * Invalidate the translation buffer first
4863 #if (__ARM_VMSA__ == 7)
4864 l
= ((va
+ ARM_TT_L1_SIZE
) & ~ARM_TT_L1_OFFMASK
);
4866 l
= ((va
+ ARM_TT_L2_SIZE
) & ~ARM_TT_L2_OFFMASK
);
4872 remove_count
+= pmap_remove_options_internal(pmap
, va
, l
, options
);
4877 if (remove_count
> 0) {
4878 PMAP_UPDATE_TLBS(pmap
, start
, end
);
4881 PMAP_TRACE(2, PMAP_CODE(PMAP__REMOVE
) | DBG_FUNC_END
);
4886 * Remove phys addr if mapped in specified map
4889 pmap_remove_some_phys(
4890 __unused pmap_t map
,
4891 __unused ppnum_t pn
)
4893 /* Implement to support working set code */
4899 #if !__ARM_USER_PROTECT__
4905 #if __ARM_USER_PROTECT__
4906 if (pmap
->tte_index_max
== NTTES
) {
4907 thread
->machine
.uptw_ttc
= 2;
4908 thread
->machine
.uptw_ttb
= ((unsigned int) pmap
->ttep
) | TTBR_SETUP
;
4910 thread
->machine
.uptw_ttc
= 1; \
4911 thread
->machine
.uptw_ttb
= ((unsigned int) pmap
->ttep
) | TTBR_SETUP
;
4913 thread
->machine
.asid
= pmap
->asid
;
4918 pmap_flush_core_tlb_asid(pmap_t pmap
)
4920 #if (__ARM_VMSA__ == 7)
4921 flush_core_tlb_asid(pmap
->asid
);
4923 flush_core_tlb_asid(((uint64_t) pmap
->asid
) << TLBI_ASID_SHIFT
);
4927 MARK_AS_PMAP_TEXT
static void
4928 pmap_switch_internal(
4931 VALIDATE_PMAP(pmap
);
4932 pmap_cpu_data_t
*cpu_data_ptr
= pmap_get_cpu_data();
4933 uint32_t last_asid_high_bits
, asid_high_bits
;
4934 boolean_t do_asid_flush
= FALSE
;
4936 #if (__ARM_VMSA__ == 7)
4938 pmap_simple_lock(&pmap
->tt1_lock
);
4941 pmap_t last_nested_pmap
= cpu_data_ptr
->cpu_nested_pmap
;
4945 assert(pmap
->asid
< (sizeof(cpu_data_ptr
->cpu_asid_high_bits
) / sizeof(*cpu_data_ptr
->cpu_asid_high_bits
)));
4947 /* Extract the "virtual" bits of the ASIDs (which could cause us to alias). */
4948 asid_high_bits
= pmap
->vasid
>> ARM_ASID_SHIFT
;
4949 last_asid_high_bits
= (uint32_t) cpu_data_ptr
->cpu_asid_high_bits
[pmap
->asid
];
4951 if (asid_high_bits
!= last_asid_high_bits
) {
4953 * If the virtual ASID of the new pmap does not match the virtual ASID
4954 * last seen on this CPU for the physical ASID (that was a mouthful),
4955 * then this switch runs the risk of aliasing. We need to flush the
4956 * TLB for this phyiscal ASID in this case.
4958 cpu_data_ptr
->cpu_asid_high_bits
[pmap
->asid
] = (uint8_t) asid_high_bits
;
4959 do_asid_flush
= TRUE
;
4962 pmap_switch_user_ttb_internal(pmap
);
4964 #if (__ARM_VMSA__ > 7)
4965 /* If we're switching to a different nested pmap (i.e. shared region), we'll need
4966 * to flush the userspace mappings for that region. Those mappings are global
4967 * and will not be protected by the ASID. It should also be cheaper to flush the
4968 * entire local TLB rather than to do a broadcast MMU flush by VA region. */
4969 if ((pmap
!= kernel_pmap
) && (last_nested_pmap
!= NULL
) && (pmap
->nested_pmap
!= last_nested_pmap
)) {
4973 if (do_asid_flush
) {
4974 pmap_flush_core_tlb_asid(pmap
);
4977 #if (__ARM_VMSA__ == 7)
4979 pmap_simple_unlock(&pmap
->tt1_lock
);
4988 PMAP_TRACE(1, PMAP_CODE(PMAP__SWITCH
) | DBG_FUNC_START
, VM_KERNEL_ADDRHIDE(pmap
), pmap
->vasid
, pmap
->asid
);
4989 pmap_switch_internal(pmap
);
4990 PMAP_TRACE(1, PMAP_CODE(PMAP__SWITCH
) | DBG_FUNC_END
);
4998 pmap_page_protect_options(ppnum
, prot
, 0, NULL
);
5002 * Routine: pmap_page_protect_options
5005 * Lower the permission for all mappings to a given
5008 MARK_AS_PMAP_TEXT
static void
5009 pmap_page_protect_options_internal(
5012 unsigned int options
)
5014 pmap_paddr_t phys
= ptoa(ppnum
);
5016 pv_entry_t
**pve_pp
;
5021 pv_entry_t
*new_pve_p
;
5022 pt_entry_t
*new_pte_p
;
5023 vm_offset_t pvh_flags
;
5027 boolean_t tlb_flush_needed
= FALSE
;
5028 unsigned int pvh_cnt
= 0;
5030 assert(ppnum
!= vm_page_fictitious_addr
);
5032 /* Only work with managed pages. */
5033 if (!pa_valid(phys
)) {
5038 * Determine the new protection.
5042 return; /* nothing to do */
5044 case VM_PROT_READ
| VM_PROT_EXECUTE
:
5052 pai
= (int)pa_index(phys
);
5054 pv_h
= pai_to_pvh(pai
);
5055 pvh_flags
= pvh_get_flags(pv_h
);
5058 pte_p
= PT_ENTRY_NULL
;
5059 pve_p
= PV_ENTRY_NULL
;
5061 pveh_p
= PV_ENTRY_NULL
;
5062 pvet_p
= PV_ENTRY_NULL
;
5063 new_pve_p
= PV_ENTRY_NULL
;
5064 new_pte_p
= PT_ENTRY_NULL
;
5065 if (pvh_test_type(pv_h
, PVH_TYPE_PTEP
)) {
5066 pte_p
= pvh_ptep(pv_h
);
5067 } else if (pvh_test_type(pv_h
, PVH_TYPE_PVEP
)) {
5068 pve_p
= pvh_list(pv_h
);
5072 while ((pve_p
!= PV_ENTRY_NULL
) || (pte_p
!= PT_ENTRY_NULL
)) {
5073 vm_map_address_t va
;
5076 boolean_t update
= FALSE
;
5078 if (pve_p
!= PV_ENTRY_NULL
) {
5079 pte_p
= pve_get_ptep(pve_p
);
5082 #ifdef PVH_FLAG_IOMMU
5083 if ((vm_offset_t
)pte_p
& PVH_FLAG_IOMMU
) {
5085 if (options
& PMAP_OPTIONS_COMPRESSOR
) {
5086 panic("pmap_page_protect: attempt to compress ppnum 0x%x owned by iommu 0x%llx, pve_p=%p",
5087 ppnum
, (uint64_t)pte_p
& ~PVH_FLAG_IOMMU
, pve_p
);
5089 if (pve_p
!= PV_ENTRY_NULL
) {
5090 pv_entry_t
*temp_pve_p
= PVE_NEXT_PTR(pve_next(pve_p
));
5091 pvh_remove(pv_h
, pve_pp
, pve_p
);
5092 pveh_p
= pvh_list(pv_h
);
5093 pve_next(pve_p
) = new_pve_p
;
5102 goto protect_skip_pve
;
5105 pmap
= ptep_get_pmap(pte_p
);
5106 va
= ptep_get_va(pte_p
);
5108 if (pte_p
== PT_ENTRY_NULL
) {
5109 panic("pmap_page_protect: pmap=%p prot=%d options=%u, pv_h=%p, pveh_p=%p, pve_p=%p, va=0x%llx ppnum: 0x%x\n",
5110 pmap
, prot
, options
, pv_h
, pveh_p
, pve_p
, (uint64_t)va
, ppnum
);
5111 } else if ((pmap
== NULL
) || (atop(pte_to_pa(*pte_p
)) != ppnum
)) {
5113 if (kern_feature_override(KF_PMAPV_OVRD
) == FALSE
) {
5114 pv_entry_t
*check_pve_p
= pveh_p
;
5115 while (check_pve_p
!= PV_ENTRY_NULL
) {
5116 if ((check_pve_p
!= pve_p
) && (pve_get_ptep(check_pve_p
) == pte_p
)) {
5117 panic("pmap_page_protect: duplicate pve entry pte_p=%p pmap=%p prot=%d options=%u, pv_h=%p, pveh_p=%p, pve_p=%p, pte=0x%llx, va=0x%llx ppnum: 0x%x\n",
5118 pte_p
, pmap
, prot
, options
, pv_h
, pveh_p
, pve_p
, (uint64_t)*pte_p
, (uint64_t)va
, ppnum
);
5120 check_pve_p
= PVE_NEXT_PTR(pve_next(check_pve_p
));
5124 panic("pmap_page_protect: bad pve entry pte_p=%p pmap=%p prot=%d options=%u, pv_h=%p, pveh_p=%p, pve_p=%p, pte=0x%llx, va=0x%llx ppnum: 0x%x\n",
5125 pte_p
, pmap
, prot
, options
, pv_h
, pveh_p
, pve_p
, (uint64_t)*pte_p
, (uint64_t)va
, ppnum
);
5128 #if DEVELOPMENT || DEBUG
5129 if ((prot
& VM_PROT_EXECUTE
) || !nx_enabled
|| !pmap
->nx_enabled
)
5131 if ((prot
& VM_PROT_EXECUTE
))
5133 { set_NX
= FALSE
;} else {
5137 /* Remove the mapping if new protection is NONE */
5139 boolean_t is_altacct
= FALSE
;
5141 if (IS_ALTACCT_PAGE(pai
, pve_p
)) {
5147 if (pte_is_wired(*pte_p
)) {
5148 pte_set_wired(pte_p
, 0);
5149 if (pmap
!= kernel_pmap
) {
5150 pmap_ledger_debit(pmap
, task_ledgers
.wired_mem
, PAGE_SIZE
);
5151 OSAddAtomic(-1, (SInt32
*) &pmap
->stats
.wired_count
);
5155 if (*pte_p
!= ARM_PTE_TYPE_FAULT
&&
5156 pmap
!= kernel_pmap
&&
5157 (options
& PMAP_OPTIONS_COMPRESSOR
) &&
5158 IS_INTERNAL_PAGE(pai
)) {
5159 assert(!ARM_PTE_IS_COMPRESSED(*pte_p
));
5160 /* mark this PTE as having been "compressed" */
5161 tmplate
= ARM_PTE_COMPRESSED
;
5163 tmplate
|= ARM_PTE_COMPRESSED_ALT
;
5167 tmplate
= ARM_PTE_TYPE_FAULT
;
5170 if ((*pte_p
!= ARM_PTE_TYPE_FAULT
) &&
5171 tmplate
== ARM_PTE_TYPE_FAULT
&&
5172 (pmap
!= kernel_pmap
)) {
5173 if (OSAddAtomic16(-1, (SInt16
*) &(ptep_get_ptd(pte_p
)->pt_cnt
[ARM_PT_DESC_INDEX(pte_p
)].refcnt
)) <= 0) {
5174 panic("pmap_page_protect_options(): over-release of ptdp %p for pte %p\n", ptep_get_ptd(pte_p
), pte_p
);
5178 if (*pte_p
!= tmplate
) {
5179 WRITE_PTE_STRONG(pte_p
, tmplate
);
5183 pmap_ledger_debit(pmap
, task_ledgers
.phys_mem
, PAGE_SIZE
);
5184 OSAddAtomic(-1, (SInt32
*) &pmap
->stats
.resident_count
);
5188 * We only ever compress internal pages.
5190 if (options
& PMAP_OPTIONS_COMPRESSOR
) {
5191 assert(IS_INTERNAL_PAGE(pai
));
5195 if (pmap
!= kernel_pmap
) {
5196 if (IS_REUSABLE_PAGE(pai
) &&
5197 IS_INTERNAL_PAGE(pai
) &&
5199 PMAP_STATS_ASSERTF(pmap
->stats
.reusable
> 0, pmap
, "stats.reusable %d", pmap
->stats
.reusable
);
5200 OSAddAtomic(-1, &pmap
->stats
.reusable
);
5201 } else if (IS_INTERNAL_PAGE(pai
)) {
5202 PMAP_STATS_ASSERTF(pmap
->stats
.internal
> 0, pmap
, "stats.internal %d", pmap
->stats
.internal
);
5203 OSAddAtomic(-1, &pmap
->stats
.internal
);
5205 PMAP_STATS_ASSERTF(pmap
->stats
.external
> 0, pmap
, "stats.external %d", pmap
->stats
.external
);
5206 OSAddAtomic(-1, &pmap
->stats
.external
);
5208 if ((options
& PMAP_OPTIONS_COMPRESSOR
) &&
5209 IS_INTERNAL_PAGE(pai
)) {
5210 /* adjust "compressed" stats */
5211 OSAddAtomic64(+1, &pmap
->stats
.compressed
);
5212 PMAP_STATS_PEAK(pmap
->stats
.compressed
);
5213 pmap
->stats
.compressed_lifetime
++;
5216 if (IS_ALTACCT_PAGE(pai
, pve_p
)) {
5217 assert(IS_INTERNAL_PAGE(pai
));
5218 pmap_ledger_debit(pmap
, task_ledgers
.internal
, PAGE_SIZE
);
5219 pmap_ledger_debit(pmap
, task_ledgers
.alternate_accounting
, PAGE_SIZE
);
5220 if (options
& PMAP_OPTIONS_COMPRESSOR
) {
5221 pmap_ledger_credit(pmap
, task_ledgers
.internal_compressed
, PAGE_SIZE
);
5222 pmap_ledger_credit(pmap
, task_ledgers
.alternate_accounting_compressed
, PAGE_SIZE
);
5226 * Cleanup our marker before
5227 * we free this pv_entry.
5229 CLR_ALTACCT_PAGE(pai
, pve_p
);
5230 } else if (IS_REUSABLE_PAGE(pai
)) {
5231 assert(IS_INTERNAL_PAGE(pai
));
5232 if (options
& PMAP_OPTIONS_COMPRESSOR
) {
5233 pmap_ledger_credit(pmap
, task_ledgers
.internal_compressed
, PAGE_SIZE
);
5234 /* was not in footprint, but is now */
5235 pmap_ledger_credit(pmap
, task_ledgers
.phys_footprint
, PAGE_SIZE
);
5237 } else if (IS_INTERNAL_PAGE(pai
)) {
5238 pmap_ledger_debit(pmap
, task_ledgers
.internal
, PAGE_SIZE
);
5241 * Update all stats related to physical footprint, which only
5242 * deals with internal pages.
5244 if (options
& PMAP_OPTIONS_COMPRESSOR
) {
5246 * This removal is only being done so we can send this page to
5247 * the compressor; therefore it mustn't affect total task footprint.
5249 pmap_ledger_credit(pmap
, task_ledgers
.internal_compressed
, PAGE_SIZE
);
5252 * This internal page isn't going to the compressor, so adjust stats to keep
5253 * phys_footprint up to date.
5255 pmap_ledger_debit(pmap
, task_ledgers
.phys_footprint
, PAGE_SIZE
);
5258 /* external page: no impact on ledgers */
5262 if (pve_p
!= PV_ENTRY_NULL
) {
5263 assert(pve_next(pve_p
) == PVE_NEXT_PTR(pve_next(pve_p
)));
5270 if (pmap
== kernel_pmap
) {
5271 tmplate
= ((spte
& ~ARM_PTE_APMASK
) | ARM_PTE_AP(AP_RONA
));
5273 tmplate
= ((spte
& ~ARM_PTE_APMASK
) | ARM_PTE_AP(AP_RORO
));
5276 pte_set_was_writeable(tmplate
, false);
5278 #if (__ARM_VMSA__ == 7)
5280 tmplate
|= ARM_PTE_NX
;
5283 * While the naive implementation of this would serve to add execute
5284 * permission, this is not how the VM uses this interface, or how
5285 * x86_64 implements it. So ignore requests to add execute permissions.
5288 tmplate
&= ~ARM_PTE_NX
;
5295 tmplate
|= ARM_PTE_NX
| ARM_PTE_PNX
;
5298 * While the naive implementation of this would serve to add execute
5299 * permission, this is not how the VM uses this interface, or how
5300 * x86_64 implements it. So ignore requests to add execute permissions.
5303 if (pmap
== kernel_pmap
) {
5304 tmplate
&= ~ARM_PTE_PNX
;
5305 tmplate
|= ARM_PTE_NX
;
5307 tmplate
&= ~ARM_PTE_NX
;
5308 tmplate
|= ARM_PTE_PNX
;
5317 if (*pte_p
!= ARM_PTE_TYPE_FAULT
&&
5318 !ARM_PTE_IS_COMPRESSED(*pte_p
) &&
5319 *pte_p
!= tmplate
) {
5320 WRITE_PTE_STRONG(pte_p
, tmplate
);
5325 /* Invalidate TLBs for all CPUs using it */
5327 tlb_flush_needed
= TRUE
;
5328 flush_mmu_tlb_region_asid_async(va
, PAGE_SIZE
, pmap
);
5331 #ifdef PVH_FLAG_IOMMU
5334 pte_p
= PT_ENTRY_NULL
;
5336 if (pve_p
!= PV_ENTRY_NULL
) {
5338 assert(pve_next(pve_p
) == PVE_NEXT_PTR(pve_next(pve_p
)));
5340 pve_pp
= pve_link_field(pve_p
);
5341 pve_p
= PVE_NEXT_PTR(pve_next(pve_p
));
5345 #ifdef PVH_FLAG_EXEC
5346 if (remove
&& (pvh_get_flags(pv_h
) & PVH_FLAG_EXEC
)) {
5347 pmap_set_ptov_ap(pai
, AP_RWNA
, tlb_flush_needed
);
5350 if (tlb_flush_needed
) {
5354 /* if we removed a bunch of entries, take care of them now */
5356 if (new_pve_p
!= PV_ENTRY_NULL
) {
5357 pvh_update_head(pv_h
, new_pve_p
, PVH_TYPE_PVEP
);
5358 pvh_set_flags(pv_h
, pvh_flags
);
5359 } else if (new_pte_p
!= PT_ENTRY_NULL
) {
5360 pvh_update_head(pv_h
, new_pte_p
, PVH_TYPE_PTEP
);
5361 pvh_set_flags(pv_h
, pvh_flags
);
5363 pvh_update_head(pv_h
, PV_ENTRY_NULL
, PVH_TYPE_NULL
);
5369 if (remove
&& (pvet_p
!= PV_ENTRY_NULL
)) {
5370 pv_list_free(pveh_p
, pvet_p
, pvh_cnt
);
5375 pmap_page_protect_options(
5378 unsigned int options
,
5381 pmap_paddr_t phys
= ptoa(ppnum
);
5383 assert(ppnum
!= vm_page_fictitious_addr
);
5385 /* Only work with managed pages. */
5386 if (!pa_valid(phys
)) {
5391 * Determine the new protection.
5393 if (prot
== VM_PROT_ALL
) {
5394 return; /* nothing to do */
5397 PMAP_TRACE(2, PMAP_CODE(PMAP__PAGE_PROTECT
) | DBG_FUNC_START
, ppnum
, prot
);
5399 pmap_page_protect_options_internal(ppnum
, prot
, options
);
5401 PMAP_TRACE(2, PMAP_CODE(PMAP__PAGE_PROTECT
) | DBG_FUNC_END
);
5405 * Indicates if the pmap layer enforces some additional restrictions on the
5406 * given set of protections.
5409 pmap_has_prot_policy(__unused vm_prot_t prot
)
5415 * Set the physical protection on the
5416 * specified range of this map as requested.
5417 * VERY IMPORTANT: Will not increase permissions.
5418 * VERY IMPORTANT: Only pmap_enter() is allowed to grant permissions.
5427 pmap_protect_options(pmap
, b
, e
, prot
, 0, NULL
);
5430 MARK_AS_PMAP_TEXT
static void
5431 pmap_protect_options_internal(
5433 vm_map_address_t start
,
5434 vm_map_address_t end
,
5436 unsigned int options
,
5437 __unused
void *args
)
5440 pt_entry_t
*bpte_p
, *epte_p
;
5442 boolean_t set_NX
= TRUE
;
5443 #if (__ARM_VMSA__ > 7)
5444 boolean_t set_XO
= FALSE
;
5446 boolean_t should_have_removed
= FALSE
;
5448 #ifndef __ARM_IC_NOALIAS_ICACHE__
5449 boolean_t InvalidatePoU_Icache_Done
= FALSE
;
5452 if (__improbable(end
< start
)) {
5453 panic("%s called with bogus range: %p, %p", __func__
, (void*)start
, (void*)end
);
5456 #if DEVELOPMENT || DEBUG
5457 if (options
& PMAP_OPTIONS_PROTECT_IMMEDIATE
) {
5458 if ((prot
& VM_PROT_ALL
) == VM_PROT_NONE
) {
5459 should_have_removed
= TRUE
;
5464 /* Determine the new protection. */
5466 #if (__ARM_VMSA__ > 7)
5467 case VM_PROT_EXECUTE
:
5472 case VM_PROT_READ
| VM_PROT_EXECUTE
:
5474 case VM_PROT_READ
| VM_PROT_WRITE
:
5476 return; /* nothing to do */
5478 should_have_removed
= TRUE
;
5482 if (should_have_removed
) {
5483 panic("%s: should have been a remove operation, "
5484 "pmap=%p, start=%p, end=%p, prot=%#x, options=%#x, args=%p",
5486 pmap
, (void *)start
, (void *)end
, prot
, options
, args
);
5489 #if DEVELOPMENT || DEBUG
5490 if ((prot
& VM_PROT_EXECUTE
) || !nx_enabled
|| !pmap
->nx_enabled
)
5492 if ((prot
& VM_PROT_EXECUTE
))
5500 VALIDATE_PMAP(pmap
);
5502 tte_p
= pmap_tte(pmap
, start
);
5504 if ((tte_p
!= (tt_entry_t
*) NULL
) && (*tte_p
& ARM_TTE_TYPE_MASK
) == ARM_TTE_TYPE_TABLE
) {
5505 bpte_p
= (pt_entry_t
*) ttetokv(*tte_p
);
5506 bpte_p
= &bpte_p
[ptenum(start
)];
5507 epte_p
= bpte_p
+ arm_atop(end
- start
);
5510 for (pte_p
= bpte_p
;
5512 pte_p
+= PAGE_SIZE
/ ARM_PGBYTES
) {
5514 #if DEVELOPMENT || DEBUG
5515 boolean_t force_write
= FALSE
;
5520 if ((spte
== ARM_PTE_TYPE_FAULT
) ||
5521 ARM_PTE_IS_COMPRESSED(spte
)) {
5527 boolean_t managed
= FALSE
;
5531 * It may be possible for the pte to transition from managed
5532 * to unmanaged in this timeframe; for now, elide the assert.
5533 * We should break out as a consequence of checking pa_valid.
5535 // assert(!ARM_PTE_IS_COMPRESSED(spte));
5536 pa
= pte_to_pa(spte
);
5537 if (!pa_valid(pa
)) {
5540 pai
= (int)pa_index(pa
);
5543 pa
= pte_to_pa(spte
);
5544 if (pai
== (int)pa_index(pa
)) {
5546 break; // Leave the PVH locked as we will unlock it after we free the PTE
5551 if ((spte
== ARM_PTE_TYPE_FAULT
) ||
5552 ARM_PTE_IS_COMPRESSED(spte
)) {
5558 if (pmap
== kernel_pmap
) {
5559 #if DEVELOPMENT || DEBUG
5560 if ((options
& PMAP_OPTIONS_PROTECT_IMMEDIATE
) && (prot
& VM_PROT_WRITE
)) {
5562 tmplate
= ((spte
& ~ARM_PTE_APMASK
) | ARM_PTE_AP(AP_RWNA
));
5566 tmplate
= ((spte
& ~ARM_PTE_APMASK
) | ARM_PTE_AP(AP_RONA
));
5569 #if DEVELOPMENT || DEBUG
5570 if ((options
& PMAP_OPTIONS_PROTECT_IMMEDIATE
) && (prot
& VM_PROT_WRITE
)) {
5572 tmplate
= ((spte
& ~ARM_PTE_APMASK
) | ARM_PTE_AP(AP_RWRW
));
5576 tmplate
= ((spte
& ~ARM_PTE_APMASK
) | ARM_PTE_AP(AP_RORO
));
5581 * XXX Removing "NX" would
5582 * grant "execute" access
5583 * immediately, bypassing any
5584 * checks VM might want to do
5585 * in its soft fault path.
5586 * pmap_protect() and co. are
5587 * not allowed to increase
5588 * access permissions.
5590 #if (__ARM_VMSA__ == 7)
5592 tmplate
|= ARM_PTE_NX
;
5594 /* do NOT clear "NX"! */
5598 tmplate
|= ARM_PTE_NX
| ARM_PTE_PNX
;
5600 if (pmap
== kernel_pmap
) {
5602 * TODO: Run CS/Monitor checks here;
5603 * should we be clearing PNX here? Is
5604 * this just for dtrace?
5606 tmplate
&= ~ARM_PTE_PNX
;
5607 tmplate
|= ARM_PTE_NX
;
5609 /* do NOT clear "NX"! */
5610 tmplate
|= ARM_PTE_PNX
;
5612 tmplate
&= ~ARM_PTE_APMASK
;
5613 tmplate
|= ARM_PTE_AP(AP_RONA
);
5619 #if DEVELOPMENT || DEBUG
5622 * TODO: Run CS/Monitor checks here.
5626 * We are marking the page as writable,
5627 * so we consider it to be modified and
5630 pa_set_bits(pa
, PP_ATTR_REFERENCED
| PP_ATTR_MODIFIED
);
5631 tmplate
|= ARM_PTE_AF
;
5633 if (IS_REFFAULT_PAGE(pai
)) {
5634 CLR_REFFAULT_PAGE(pai
);
5637 if (IS_MODFAULT_PAGE(pai
)) {
5638 CLR_MODFAULT_PAGE(pai
);
5641 } else if (options
& PMAP_OPTIONS_PROTECT_IMMEDIATE
) {
5643 * An immediate request for anything other than
5644 * write should still mark the page as
5645 * referenced if managed.
5648 pa_set_bits(pa
, PP_ATTR_REFERENCED
);
5649 tmplate
|= ARM_PTE_AF
;
5651 if (IS_REFFAULT_PAGE(pai
)) {
5652 CLR_REFFAULT_PAGE(pai
);
5658 /* We do not expect to write fast fault the entry. */
5659 pte_set_was_writeable(tmplate
, false);
5661 /* TODO: Doesn't this need to worry about PNX? */
5662 if (((spte
& ARM_PTE_NX
) == ARM_PTE_NX
) && (prot
& VM_PROT_EXECUTE
)) {
5663 CleanPoU_DcacheRegion((vm_offset_t
) phystokv(pa
), PAGE_SIZE
);
5664 #ifdef __ARM_IC_NOALIAS_ICACHE__
5665 InvalidatePoU_IcacheRegion((vm_offset_t
) phystokv(pa
), PAGE_SIZE
);
5667 if (!InvalidatePoU_Icache_Done
) {
5668 InvalidatePoU_Icache();
5669 InvalidatePoU_Icache_Done
= TRUE
;
5674 WRITE_PTE_FAST(pte_p
, tmplate
);
5677 ASSERT_PVH_LOCKED(pai
);
5682 FLUSH_PTE_RANGE_STRONG(bpte_p
, epte_p
);
5683 PMAP_UPDATE_TLBS(pmap
, start
, end
);
5690 pmap_protect_options(
5695 unsigned int options
,
5696 __unused
void *args
)
5698 vm_map_address_t l
, beg
;
5700 if ((b
| e
) & PAGE_MASK
) {
5701 panic("pmap_protect_options() pmap %p start 0x%llx end 0x%llx\n",
5702 pmap
, (uint64_t)b
, (uint64_t)e
);
5705 #if DEVELOPMENT || DEBUG
5706 if (options
& PMAP_OPTIONS_PROTECT_IMMEDIATE
) {
5707 if ((prot
& VM_PROT_ALL
) == VM_PROT_NONE
) {
5708 pmap_remove_options(pmap
, b
, e
, options
);
5714 /* Determine the new protection. */
5716 case VM_PROT_EXECUTE
:
5718 case VM_PROT_READ
| VM_PROT_EXECUTE
:
5720 case VM_PROT_READ
| VM_PROT_WRITE
:
5722 return; /* nothing to do */
5724 pmap_remove_options(pmap
, b
, e
, options
);
5729 PMAP_TRACE(2, PMAP_CODE(PMAP__PROTECT
) | DBG_FUNC_START
,
5730 VM_KERNEL_ADDRHIDE(pmap
), VM_KERNEL_ADDRHIDE(b
),
5731 VM_KERNEL_ADDRHIDE(e
));
5736 l
= ((beg
+ ARM_TT_TWIG_SIZE
) & ~ARM_TT_TWIG_OFFMASK
);
5742 pmap_protect_options_internal(pmap
, beg
, l
, prot
, options
, args
);
5747 PMAP_TRACE(2, PMAP_CODE(PMAP__PROTECT
) | DBG_FUNC_END
);
5750 /* Map a (possibly) autogenned block */
5759 __unused
unsigned int flags
)
5762 addr64_t original_va
= va
;
5765 for (page
= 0; page
< size
; page
++) {
5766 kr
= pmap_enter(pmap
, va
, pa
, prot
, VM_PROT_NONE
, attr
, TRUE
);
5768 if (kr
!= KERN_SUCCESS
) {
5770 * This will panic for now, as it is unclear that
5771 * removing the mappings is correct.
5773 panic("%s: failed pmap_enter, "
5774 "pmap=%p, va=%#llx, pa=%u, size=%u, prot=%#x, flags=%#x",
5776 pmap
, va
, pa
, size
, prot
, flags
);
5778 pmap_remove(pmap
, original_va
, va
- original_va
);
5786 return KERN_SUCCESS
;
5790 * Insert the given physical page (p) at
5791 * the specified virtual address (v) in the
5792 * target physical map with the protection requested.
5794 * If specified, the page will be wired down, meaning
5795 * that the related pte can not be reclaimed.
5797 * NB: This is the only routine which MAY NOT lazy-evaluate
5798 * or lose information. That is, this routine must actually
5799 * insert this page into the given map eventually (must make
5800 * forward progress eventually.
5808 vm_prot_t fault_type
,
5812 return pmap_enter_options(pmap
, v
, pn
, prot
, fault_type
, flags
, wired
, 0, NULL
);
5817 pmap_enter_pte(pmap_t pmap
, pt_entry_t
*pte_p
, pt_entry_t pte
, vm_map_address_t v
)
5819 if (pmap
!= kernel_pmap
&& ((pte
& ARM_PTE_WIRED
) != (*pte_p
& ARM_PTE_WIRED
))) {
5820 SInt16
*ptd_wiredcnt_ptr
= (SInt16
*)&(ptep_get_ptd(pte_p
)->pt_cnt
[ARM_PT_DESC_INDEX(pte_p
)].wiredcnt
);
5821 if (pte
& ARM_PTE_WIRED
) {
5822 OSAddAtomic16(1, ptd_wiredcnt_ptr
);
5823 pmap_ledger_credit(pmap
, task_ledgers
.wired_mem
, PAGE_SIZE
);
5824 OSAddAtomic(1, (SInt32
*) &pmap
->stats
.wired_count
);
5826 OSAddAtomic16(-1, ptd_wiredcnt_ptr
);
5827 pmap_ledger_debit(pmap
, task_ledgers
.wired_mem
, PAGE_SIZE
);
5828 OSAddAtomic(-1, (SInt32
*) &pmap
->stats
.wired_count
);
5831 if (*pte_p
!= ARM_PTE_TYPE_FAULT
&&
5832 !ARM_PTE_IS_COMPRESSED(*pte_p
)) {
5833 WRITE_PTE_STRONG(pte_p
, pte
);
5834 PMAP_UPDATE_TLBS(pmap
, v
, v
+ PAGE_SIZE
);
5836 WRITE_PTE(pte_p
, pte
);
5837 __builtin_arm_isb(ISB_SY
);
5840 PMAP_TRACE(3, PMAP_CODE(PMAP__TTE
), VM_KERNEL_ADDRHIDE(pmap
), VM_KERNEL_ADDRHIDE(v
), VM_KERNEL_ADDRHIDE(v
+ PAGE_SIZE
), pte
);
5844 wimg_to_pte(unsigned int wimg
)
5848 switch (wimg
& (VM_WIMG_MASK
)) {
5851 pte
= ARM_PTE_ATTRINDX(CACHE_ATTRINDX_DISABLE
);
5852 pte
|= ARM_PTE_NX
| ARM_PTE_PNX
;
5854 case VM_WIMG_POSTED
:
5855 pte
= ARM_PTE_ATTRINDX(CACHE_ATTRINDX_POSTED
);
5856 pte
|= ARM_PTE_NX
| ARM_PTE_PNX
;
5859 pte
= ARM_PTE_ATTRINDX(CACHE_ATTRINDX_WRITECOMB
);
5860 pte
|= ARM_PTE_NX
| ARM_PTE_PNX
;
5863 pte
= ARM_PTE_ATTRINDX(CACHE_ATTRINDX_WRITETHRU
);
5864 #if (__ARM_VMSA__ > 7)
5865 pte
|= ARM_PTE_SH(SH_OUTER_MEMORY
);
5870 case VM_WIMG_COPYBACK
:
5871 pte
= ARM_PTE_ATTRINDX(CACHE_ATTRINDX_WRITEBACK
);
5872 #if (__ARM_VMSA__ > 7)
5873 pte
|= ARM_PTE_SH(SH_OUTER_MEMORY
);
5878 case VM_WIMG_INNERWBACK
:
5879 pte
= ARM_PTE_ATTRINDX(CACHE_ATTRINDX_INNERWRITEBACK
);
5880 #if (__ARM_VMSA__ > 7)
5881 pte
|= ARM_PTE_SH(SH_INNER_MEMORY
);
5887 pte
= ARM_PTE_ATTRINDX(CACHE_ATTRINDX_DEFAULT
);
5888 #if (__ARM_VMSA__ > 7)
5889 pte
|= ARM_PTE_SH(SH_OUTER_MEMORY
);
5903 unsigned int options
,
5905 boolean_t
*is_altacct
)
5908 pv_h
= pai_to_pvh(pai
);
5909 boolean_t first_cpu_mapping
;
5911 ASSERT_PVH_LOCKED(pai
);
5913 vm_offset_t pvh_flags
= pvh_get_flags(pv_h
);
5917 /* An IOMMU mapping may already be present for a page that hasn't yet
5918 * had a CPU mapping established, so we use PVH_FLAG_CPU to determine
5919 * if this is the first CPU mapping. We base internal/reusable
5920 * accounting on the options specified for the first CPU mapping.
5921 * PVH_FLAG_CPU, and thus this accounting, will then persist as long
5922 * as there are *any* mappings of the page. The accounting for a
5923 * page should not need to change until the page is recycled by the
5924 * VM layer, and we assert that there are no mappings when a page
5925 * is recycled. An IOMMU mapping of a freed/recycled page is
5926 * considered a security violation & potential DMA corruption path.*/
5927 first_cpu_mapping
= ((pmap
!= NULL
) && !(pvh_flags
& PVH_FLAG_CPU
));
5928 if (first_cpu_mapping
) {
5929 pvh_flags
|= PVH_FLAG_CPU
;
5932 first_cpu_mapping
= pvh_test_type(pv_h
, PVH_TYPE_NULL
);
5935 if (first_cpu_mapping
) {
5936 if (options
& PMAP_OPTIONS_INTERNAL
) {
5937 SET_INTERNAL_PAGE(pai
);
5939 CLR_INTERNAL_PAGE(pai
);
5941 if ((options
& PMAP_OPTIONS_INTERNAL
) &&
5942 (options
& PMAP_OPTIONS_REUSABLE
)) {
5943 SET_REUSABLE_PAGE(pai
);
5945 CLR_REUSABLE_PAGE(pai
);
5948 if (pvh_test_type(pv_h
, PVH_TYPE_NULL
)) {
5949 pvh_update_head(pv_h
, pte_p
, PVH_TYPE_PTEP
);
5950 if (pmap
!= NULL
&& pmap
!= kernel_pmap
&&
5951 ((options
& PMAP_OPTIONS_ALT_ACCT
) ||
5952 PMAP_FOOTPRINT_SUSPENDED(pmap
)) &&
5953 IS_INTERNAL_PAGE(pai
)) {
5955 * Make a note to ourselves that this mapping is using alternative
5956 * accounting. We'll need this in order to know which ledger to
5957 * debit when the mapping is removed.
5959 * The altacct bit must be set while the pv head is locked. Defer
5960 * the ledger accounting until after we've dropped the lock.
5962 SET_ALTACCT_PAGE(pai
, PV_ENTRY_NULL
);
5965 CLR_ALTACCT_PAGE(pai
, PV_ENTRY_NULL
);
5968 if (pvh_test_type(pv_h
, PVH_TYPE_PTEP
)) {
5972 * convert pvh list from PVH_TYPE_PTEP to PVH_TYPE_PVEP
5974 pte1_p
= pvh_ptep(pv_h
);
5975 pvh_set_flags(pv_h
, pvh_flags
);
5976 if ((*pve_p
== PV_ENTRY_NULL
) && (!pv_alloc(pmap
, pai
, pve_p
))) {
5980 pve_set_ptep(*pve_p
, pte1_p
);
5981 (*pve_p
)->pve_next
= PV_ENTRY_NULL
;
5983 if (IS_ALTACCT_PAGE(pai
, PV_ENTRY_NULL
)) {
5985 * transfer "altacct" from
5986 * pp_attr to this pve
5988 CLR_ALTACCT_PAGE(pai
, PV_ENTRY_NULL
);
5989 SET_ALTACCT_PAGE(pai
, *pve_p
);
5991 pvh_update_head(pv_h
, *pve_p
, PVH_TYPE_PVEP
);
5992 *pve_p
= PV_ENTRY_NULL
;
5993 } else if (!pvh_test_type(pv_h
, PVH_TYPE_PVEP
)) {
5994 panic("%s: unexpected PV head %p, pte_p=%p pmap=%p pv_h=%p",
5995 __func__
, *pv_h
, pte_p
, pmap
, pv_h
);
5998 * Set up pv_entry for this new mapping and then
5999 * add it to the list for this physical page.
6001 pvh_set_flags(pv_h
, pvh_flags
);
6002 if ((*pve_p
== PV_ENTRY_NULL
) && (!pv_alloc(pmap
, pai
, pve_p
))) {
6006 pve_set_ptep(*pve_p
, pte_p
);
6007 (*pve_p
)->pve_next
= PV_ENTRY_NULL
;
6009 pvh_add(pv_h
, *pve_p
);
6011 if (pmap
!= NULL
&& pmap
!= kernel_pmap
&&
6012 ((options
& PMAP_OPTIONS_ALT_ACCT
) ||
6013 PMAP_FOOTPRINT_SUSPENDED(pmap
)) &&
6014 IS_INTERNAL_PAGE(pai
)) {
6016 * Make a note to ourselves that this
6017 * mapping is using alternative
6018 * accounting. We'll need this in order
6019 * to know which ledger to debit when
6020 * the mapping is removed.
6022 * The altacct bit must be set while
6023 * the pv head is locked. Defer the
6024 * ledger accounting until after we've
6027 SET_ALTACCT_PAGE(pai
, *pve_p
);
6031 *pve_p
= PV_ENTRY_NULL
;
6034 pvh_set_flags(pv_h
, pvh_flags
);
6039 MARK_AS_PMAP_TEXT
static kern_return_t
6040 pmap_enter_options_internal(
6045 vm_prot_t fault_type
,
6048 unsigned int options
)
6050 pmap_paddr_t pa
= ptoa(pn
);
6056 boolean_t set_XO
= FALSE
;
6057 boolean_t refcnt_updated
;
6058 boolean_t wiredcnt_updated
;
6059 unsigned int wimg_bits
;
6060 boolean_t was_compressed
, was_alt_compressed
;
6061 kern_return_t kr
= KERN_SUCCESS
;
6063 VALIDATE_PMAP(pmap
);
6065 if ((v
) & PAGE_MASK
) {
6066 panic("pmap_enter_options() pmap %p v 0x%llx\n",
6070 if ((prot
& VM_PROT_EXECUTE
) && (prot
& VM_PROT_WRITE
) && (pmap
== kernel_pmap
)) {
6071 panic("pmap_enter_options(): WX request on kernel_pmap");
6074 #if DEVELOPMENT || DEBUG
6075 if ((prot
& VM_PROT_EXECUTE
) || !nx_enabled
|| !pmap
->nx_enabled
)
6077 if ((prot
& VM_PROT_EXECUTE
))
6079 { set_NX
= FALSE
;} else {
6083 #if (__ARM_VMSA__ > 7)
6084 if (prot
== VM_PROT_EXECUTE
) {
6089 assert(pn
!= vm_page_fictitious_addr
);
6091 refcnt_updated
= FALSE
;
6092 wiredcnt_updated
= FALSE
;
6093 pve_p
= PV_ENTRY_NULL
;
6094 was_compressed
= FALSE
;
6095 was_alt_compressed
= FALSE
;
6100 * Expand pmap to include this pte. Assume that
6101 * pmap is always expanded to include enough hardware
6102 * pages to map one VM page.
6104 while ((pte_p
= pmap_pte(pmap
, v
)) == PT_ENTRY_NULL
) {
6105 /* Must unlock to expand the pmap. */
6108 kr
= pmap_expand(pmap
, v
, options
, PMAP_TT_MAX_LEVEL
);
6110 if (kr
!= KERN_SUCCESS
) {
6117 if (options
& PMAP_OPTIONS_NOENTER
) {
6119 return KERN_SUCCESS
;
6126 if (ARM_PTE_IS_COMPRESSED(spte
)) {
6128 * "pmap" should be locked at this point, so this should
6129 * not race with another pmap_enter() or pmap_remove_range().
6131 assert(pmap
!= kernel_pmap
);
6133 /* one less "compressed" */
6134 OSAddAtomic64(-1, &pmap
->stats
.compressed
);
6135 pmap_ledger_debit(pmap
, task_ledgers
.internal_compressed
,
6138 was_compressed
= TRUE
;
6139 if (spte
& ARM_PTE_COMPRESSED_ALT
) {
6140 was_alt_compressed
= TRUE
;
6141 pmap_ledger_debit(pmap
, task_ledgers
.alternate_accounting_compressed
, PAGE_SIZE
);
6143 /* was part of the footprint */
6144 pmap_ledger_debit(pmap
, task_ledgers
.phys_footprint
, PAGE_SIZE
);
6147 /* clear "compressed" marker */
6148 /* XXX is it necessary since we're about to overwrite it ? */
6149 WRITE_PTE_FAST(pte_p
, ARM_PTE_TYPE_FAULT
);
6150 spte
= ARM_PTE_TYPE_FAULT
;
6153 * We're replacing a "compressed" marker with a valid PTE,
6154 * so no change for "refcnt".
6156 refcnt_updated
= TRUE
;
6159 if ((spte
!= ARM_PTE_TYPE_FAULT
) && (pte_to_pa(spte
) != pa
)) {
6160 pmap_remove_range(pmap
, v
, pte_p
, pte_p
+ 1, 0);
6161 PMAP_UPDATE_TLBS(pmap
, v
, v
+ PAGE_SIZE
);
6164 pte
= pa_to_pte(pa
) | ARM_PTE_TYPE
;
6166 /* Don't bother tracking wiring for kernel PTEs. We use ARM_PTE_WIRED to track
6167 * wired memory statistics for user pmaps, but kernel PTEs are assumed
6168 * to be wired in nearly all cases. For VM layer functionality, the wired
6169 * count in vm_page_t is sufficient. */
6170 if (wired
&& pmap
!= kernel_pmap
) {
6171 pte
|= ARM_PTE_WIRED
;
6174 #if (__ARM_VMSA__ == 7)
6180 pte
|= ARM_PTE_NX
| ARM_PTE_PNX
;
6182 if (pmap
== kernel_pmap
) {
6190 if (pmap
== kernel_pmap
) {
6191 #if __ARM_KERNEL_PROTECT__
6193 #endif /* __ARM_KERNEL_PROTECT__ */
6194 if (prot
& VM_PROT_WRITE
) {
6195 pte
|= ARM_PTE_AP(AP_RWNA
);
6196 pa_set_bits(pa
, PP_ATTR_MODIFIED
| PP_ATTR_REFERENCED
);
6198 pte
|= ARM_PTE_AP(AP_RONA
);
6199 pa_set_bits(pa
, PP_ATTR_REFERENCED
);
6201 #if (__ARM_VMSA__ == 7)
6202 if ((_COMM_PAGE_BASE_ADDRESS
<= v
) && (v
< _COMM_PAGE_BASE_ADDRESS
+ _COMM_PAGE_AREA_LENGTH
)) {
6203 pte
= (pte
& ~(ARM_PTE_APMASK
)) | ARM_PTE_AP(AP_RORO
);
6207 if (!(pmap
->nested
)) {
6209 } else if ((pmap
->nested_region_asid_bitmap
)
6210 && (v
>= pmap
->nested_region_subord_addr
)
6211 && (v
< (pmap
->nested_region_subord_addr
+ pmap
->nested_region_size
))) {
6212 unsigned int index
= (unsigned int)((v
- pmap
->nested_region_subord_addr
) >> ARM_TT_TWIG_SHIFT
);
6214 if ((pmap
->nested_region_asid_bitmap
)
6215 && testbit(index
, (int *)pmap
->nested_region_asid_bitmap
)) {
6220 if (pmap
->nested_pmap
!= NULL
) {
6221 vm_map_address_t nest_vaddr
;
6222 pt_entry_t
*nest_pte_p
;
6224 nest_vaddr
= v
- pmap
->nested_region_grand_addr
+ pmap
->nested_region_subord_addr
;
6226 if ((nest_vaddr
>= pmap
->nested_region_subord_addr
)
6227 && (nest_vaddr
< (pmap
->nested_region_subord_addr
+ pmap
->nested_region_size
))
6228 && ((nest_pte_p
= pmap_pte(pmap
->nested_pmap
, nest_vaddr
)) != PT_ENTRY_NULL
)
6229 && (*nest_pte_p
!= ARM_PTE_TYPE_FAULT
)
6230 && (!ARM_PTE_IS_COMPRESSED(*nest_pte_p
))
6231 && (((*nest_pte_p
) & ARM_PTE_NG
) != ARM_PTE_NG
)) {
6232 unsigned int index
= (unsigned int)((v
- pmap
->nested_region_subord_addr
) >> ARM_TT_TWIG_SHIFT
);
6234 if ((pmap
->nested_pmap
->nested_region_asid_bitmap
)
6235 && !testbit(index
, (int *)pmap
->nested_pmap
->nested_region_asid_bitmap
)) {
6236 panic("pmap_enter(): Global attribute conflict nest_pte_p=%p pmap=%p v=0x%llx spte=0x%llx \n",
6237 nest_pte_p
, pmap
, (uint64_t)v
, (uint64_t)*nest_pte_p
);
6242 if (prot
& VM_PROT_WRITE
) {
6243 if (pa_valid(pa
) && (!pa_test_bits(pa
, PP_ATTR_MODIFIED
))) {
6244 if (fault_type
& VM_PROT_WRITE
) {
6246 pte
|= ARM_PTE_AP(AP_RWNA
);
6248 pte
|= ARM_PTE_AP(AP_RWRW
);
6250 pa_set_bits(pa
, PP_ATTR_REFERENCED
| PP_ATTR_MODIFIED
);
6253 pte
|= ARM_PTE_AP(AP_RONA
);
6255 pte
|= ARM_PTE_AP(AP_RORO
);
6257 pa_set_bits(pa
, PP_ATTR_REFERENCED
);
6258 pte_set_was_writeable(pte
, true);
6262 pte
|= ARM_PTE_AP(AP_RWNA
);
6264 pte
|= ARM_PTE_AP(AP_RWRW
);
6266 pa_set_bits(pa
, PP_ATTR_REFERENCED
);
6270 pte
|= ARM_PTE_AP(AP_RONA
);
6272 pte
|= ARM_PTE_AP(AP_RORO
);
6274 pa_set_bits(pa
, PP_ATTR_REFERENCED
);
6280 volatile uint16_t *refcnt
= NULL
;
6281 volatile uint16_t *wiredcnt
= NULL
;
6282 if (pmap
!= kernel_pmap
) {
6283 refcnt
= &(ptep_get_ptd(pte_p
)->pt_cnt
[ARM_PT_DESC_INDEX(pte_p
)].refcnt
);
6284 wiredcnt
= &(ptep_get_ptd(pte_p
)->pt_cnt
[ARM_PT_DESC_INDEX(pte_p
)].wiredcnt
);
6285 /* Bump the wired count to keep the PTE page from being reclaimed. We need this because
6286 * we may drop the PVH and pmap locks later in pmap_enter() if we need to allocate
6287 * a new PV entry. */
6288 if (!wiredcnt_updated
) {
6289 OSAddAtomic16(1, (volatile int16_t*)wiredcnt
);
6290 wiredcnt_updated
= TRUE
;
6292 if (!refcnt_updated
) {
6293 OSAddAtomic16(1, (volatile int16_t*)refcnt
);
6294 refcnt_updated
= TRUE
;
6300 boolean_t is_altacct
, is_internal
;
6302 is_internal
= FALSE
;
6305 pai
= (int)pa_index(pa
);
6310 if ((flags
& (VM_WIMG_MASK
| VM_WIMG_USE_DEFAULT
))) {
6311 wimg_bits
= (flags
& (VM_WIMG_MASK
| VM_WIMG_USE_DEFAULT
));
6313 wimg_bits
= pmap_cache_attributes(pn
);
6316 /* We may be retrying this operation after dropping the PVH lock.
6317 * Cache attributes for the physical page may have changed while the lock
6318 * was dropped, so clear any cache attributes we may have previously set
6319 * in the PTE template. */
6320 pte
&= ~(ARM_PTE_ATTRINDXMASK
| ARM_PTE_SHMASK
);
6321 pte
|= wimg_to_pte(wimg_bits
);
6325 if (pte
== *pte_p
) {
6327 * This pmap_enter operation has been completed by another thread
6328 * undo refcnt on pt and return
6331 goto Pmap_enter_cleanup
;
6332 } else if (pte_to_pa(*pte_p
) == pa
) {
6333 pmap_enter_pte(pmap
, pte_p
, pte
, v
);
6335 goto Pmap_enter_cleanup
;
6336 } else if (*pte_p
!= ARM_PTE_TYPE_FAULT
) {
6338 * pte has been modified by another thread
6339 * hold refcnt on pt and retry pmap_enter operation
6342 goto Pmap_enter_retry
;
6344 if (!pmap_enter_pv(pmap
, pte_p
, pai
, options
, &pve_p
, &is_altacct
)) {
6345 goto Pmap_enter_loop
;
6348 pmap_enter_pte(pmap
, pte_p
, pte
, v
);
6350 if (pmap
!= kernel_pmap
) {
6351 if (IS_REUSABLE_PAGE(pai
) &&
6353 assert(IS_INTERNAL_PAGE(pai
));
6354 OSAddAtomic(+1, &pmap
->stats
.reusable
);
6355 PMAP_STATS_PEAK(pmap
->stats
.reusable
);
6356 } else if (IS_INTERNAL_PAGE(pai
)) {
6357 OSAddAtomic(+1, &pmap
->stats
.internal
);
6358 PMAP_STATS_PEAK(pmap
->stats
.internal
);
6361 OSAddAtomic(+1, &pmap
->stats
.external
);
6362 PMAP_STATS_PEAK(pmap
->stats
.external
);
6368 if (pmap
!= kernel_pmap
) {
6369 pmap_ledger_credit(pmap
, task_ledgers
.phys_mem
, PAGE_SIZE
);
6373 * Make corresponding adjustments to
6374 * phys_footprint statistics.
6376 pmap_ledger_credit(pmap
, task_ledgers
.internal
, PAGE_SIZE
);
6379 * If this page is internal and
6380 * in an IOKit region, credit
6381 * the task's total count of
6382 * dirty, internal IOKit pages.
6383 * It should *not* count towards
6384 * the task's total physical
6385 * memory footprint, because
6386 * this entire region was
6387 * already billed to the task
6388 * at the time the mapping was
6391 * Put another way, this is
6393 * alternate_accounting++, so
6394 * net effect on phys_footprint
6395 * is 0. That means: don't
6396 * touch phys_footprint here.
6398 pmap_ledger_credit(pmap
, task_ledgers
.alternate_accounting
, PAGE_SIZE
);
6400 pmap_ledger_credit(pmap
, task_ledgers
.phys_footprint
, PAGE_SIZE
);
6405 OSAddAtomic(1, (SInt32
*) &pmap
->stats
.resident_count
);
6406 if (pmap
->stats
.resident_count
> pmap
->stats
.resident_max
) {
6407 pmap
->stats
.resident_max
= pmap
->stats
.resident_count
;
6410 if (prot
& VM_PROT_EXECUTE
) {
6412 goto Pmap_enter_cleanup
;
6415 wimg_bits
= pmap_cache_attributes(pn
);
6416 if ((flags
& (VM_WIMG_MASK
| VM_WIMG_USE_DEFAULT
))) {
6417 wimg_bits
= (wimg_bits
& (~VM_WIMG_MASK
)) | (flags
& (VM_WIMG_MASK
| VM_WIMG_USE_DEFAULT
));
6420 pte
|= wimg_to_pte(wimg_bits
);
6422 pmap_enter_pte(pmap
, pte_p
, pte
, v
);
6425 goto Pmap_enter_return
;
6429 if (refcnt
!= NULL
) {
6430 assert(refcnt_updated
);
6431 if (OSAddAtomic16(-1, (volatile int16_t*)refcnt
) <= 0) {
6432 panic("pmap_enter(): over-release of ptdp %p for pte %p\n", ptep_get_ptd(pte_p
), pte_p
);
6439 if (pgtrace_enabled
) {
6440 // Clone and invalidate original mapping if eligible
6441 for (int i
= 0; i
< PAGE_RATIO
; i
++) {
6442 pmap_pgtrace_enter_clone(pmap
, v
+ ARM_PGBYTES
* i
, 0, 0);
6447 if (pve_p
!= PV_ENTRY_NULL
) {
6451 if (wiredcnt_updated
&& (OSAddAtomic16(-1, (volatile int16_t*)wiredcnt
) <= 0)) {
6452 panic("pmap_enter(): over-unwire of ptdp %p for pte %p\n", ptep_get_ptd(pte_p
), pte_p
);
6466 vm_prot_t fault_type
,
6469 unsigned int options
,
6472 kern_return_t kr
= KERN_FAILURE
;
6474 PMAP_TRACE(2, PMAP_CODE(PMAP__ENTER
) | DBG_FUNC_START
,
6475 VM_KERNEL_ADDRHIDE(pmap
), VM_KERNEL_ADDRHIDE(v
), pn
, prot
);
6477 kr
= pmap_enter_options_internal(pmap
, v
, pn
, prot
, fault_type
, flags
, wired
, options
);
6478 pv_water_mark_check();
6480 PMAP_TRACE(2, PMAP_CODE(PMAP__ENTER
) | DBG_FUNC_END
, kr
);
6486 * Routine: pmap_change_wiring
6487 * Function: Change the wiring attribute for a map/virtual-address
6489 * In/out conditions:
6490 * The mapping must already exist in the pmap.
6492 MARK_AS_PMAP_TEXT
static void
6493 pmap_change_wiring_internal(
6501 /* Don't bother tracking wiring for kernel PTEs. We use ARM_PTE_WIRED to track
6502 * wired memory statistics for user pmaps, but kernel PTEs are assumed
6503 * to be wired in nearly all cases. For VM layer functionality, the wired
6504 * count in vm_page_t is sufficient. */
6505 if (pmap
== kernel_pmap
) {
6508 VALIDATE_USER_PMAP(pmap
);
6511 pte_p
= pmap_pte(pmap
, v
);
6512 assert(pte_p
!= PT_ENTRY_NULL
);
6513 pa
= pte_to_pa(*pte_p
);
6515 LOCK_PVH((int)pa_index(pa
));
6518 if (wired
&& !pte_is_wired(*pte_p
)) {
6519 pte_set_wired(pte_p
, wired
);
6520 OSAddAtomic(+1, (SInt32
*) &pmap
->stats
.wired_count
);
6521 pmap_ledger_credit(pmap
, task_ledgers
.wired_mem
, PAGE_SIZE
);
6522 } else if (!wired
&& pte_is_wired(*pte_p
)) {
6523 PMAP_STATS_ASSERTF(pmap
->stats
.wired_count
>= 1, pmap
, "stats.wired_count %d", pmap
->stats
.wired_count
);
6524 pte_set_wired(pte_p
, wired
);
6525 OSAddAtomic(-1, (SInt32
*) &pmap
->stats
.wired_count
);
6526 pmap_ledger_debit(pmap
, task_ledgers
.wired_mem
, PAGE_SIZE
);
6530 UNLOCK_PVH((int)pa_index(pa
));
6542 pmap_change_wiring_internal(pmap
, v
, wired
);
6545 MARK_AS_PMAP_TEXT
static ppnum_t
6546 pmap_find_phys_internal(
6552 VALIDATE_PMAP(pmap
);
6554 if (pmap
!= kernel_pmap
) {
6558 ppn
= pmap_vtophys(pmap
, va
);
6560 if (pmap
!= kernel_pmap
) {
6572 pmap_paddr_t pa
= 0;
6574 if (pmap
== kernel_pmap
) {
6576 } else if ((current_thread()->map
) && (pmap
== vm_map_pmap(current_thread()->map
))) {
6581 return (ppnum_t
)(pa
>> PAGE_SHIFT
);
6585 return pmap_find_phys_internal(pmap
, va
);
6587 return pmap_vtophys(pmap
, va
);
6601 pa
= ((pmap_paddr_t
)pmap_vtophys(kernel_pmap
, va
)) << PAGE_SHIFT
;
6603 pa
|= (va
& PAGE_MASK
);
6606 return (pmap_paddr_t
)pa
;
6614 if ((va
< pmap
->min
) || (va
>= pmap
->max
)) {
6618 #if (__ARM_VMSA__ == 7)
6619 tt_entry_t
*tte_p
, tte
;
6623 tte_p
= pmap_tte(pmap
, va
);
6624 if (tte_p
== (tt_entry_t
*) NULL
) {
6629 if ((tte
& ARM_TTE_TYPE_MASK
) == ARM_TTE_TYPE_TABLE
) {
6630 pte_p
= (pt_entry_t
*) ttetokv(tte
) + ptenum(va
);
6631 ppn
= (ppnum_t
) atop(pte_to_pa(*pte_p
) | (va
& ARM_PGMASK
));
6632 #if DEVELOPMENT || DEBUG
6634 ARM_PTE_IS_COMPRESSED(*pte_p
)) {
6635 panic("pmap_vtophys(%p,0x%llx): compressed pte_p=%p 0x%llx with ppn=0x%x\n",
6636 pmap
, va
, pte_p
, (uint64_t) (*pte_p
), ppn
);
6638 #endif /* DEVELOPMENT || DEBUG */
6639 } else if ((tte
& ARM_TTE_TYPE_MASK
) == ARM_TTE_TYPE_BLOCK
) {
6640 if ((tte
& ARM_TTE_BLOCK_SUPER
) == ARM_TTE_BLOCK_SUPER
) {
6641 ppn
= (ppnum_t
) atop(suptte_to_pa(tte
) | (va
& ARM_TT_L1_SUPER_OFFMASK
));
6643 ppn
= (ppnum_t
) atop(sectte_to_pa(tte
) | (va
& ARM_TT_L1_BLOCK_OFFMASK
));
6653 /* Level 0 currently unused */
6655 #if __ARM64_TWO_LEVEL_PMAP__
6656 /* We have no L1 entry; go straight to the L2 entry */
6657 ttp
= pmap_tt2e(pmap
, va
);
6660 /* Get first-level (1GB) entry */
6661 ttp
= pmap_tt1e(pmap
, va
);
6663 if ((tte
& (ARM_TTE_TYPE_MASK
| ARM_TTE_VALID
)) != (ARM_TTE_TYPE_TABLE
| ARM_TTE_VALID
)) {
6667 tte
= ((tt_entry_t
*) phystokv(tte
& ARM_TTE_TABLE_MASK
))[tt2_index(pmap
, va
)];
6669 if ((tte
& ARM_TTE_VALID
) != (ARM_TTE_VALID
)) {
6673 if ((tte
& ARM_TTE_TYPE_MASK
) == ARM_TTE_TYPE_BLOCK
) {
6674 ppn
= (ppnum_t
) atop((tte
& ARM_TTE_BLOCK_L2_MASK
) | (va
& ARM_TT_L2_OFFMASK
));
6677 tte
= ((tt_entry_t
*) phystokv(tte
& ARM_TTE_TABLE_MASK
))[tt3_index(pmap
, va
)];
6678 ppn
= (ppnum_t
) atop((tte
& ARM_PTE_MASK
) | (va
& ARM_TT_L3_OFFMASK
));
6684 MARK_AS_PMAP_TEXT
static vm_offset_t
6685 pmap_extract_internal(
6687 vm_map_address_t va
)
6689 pmap_paddr_t pa
= 0;
6696 VALIDATE_PMAP(pmap
);
6700 ppn
= pmap_vtophys(pmap
, va
);
6703 pa
= ptoa(ppn
) | ((va
) & PAGE_MASK
);
6712 * Routine: pmap_extract
6714 * Extract the physical page address associated
6715 * with the given map/virtual_address pair.
6721 vm_map_address_t va
)
6723 pmap_paddr_t pa
= 0;
6725 if (pmap
== kernel_pmap
) {
6727 } else if (pmap
== vm_map_pmap(current_thread()->map
)) {
6735 return pmap_extract_internal(pmap
, va
);
6739 * pmap_init_pte_page - Initialize a page table page.
6746 unsigned int ttlevel
,
6747 boolean_t alloc_ptd
)
6749 pt_desc_t
*ptdp
= NULL
;
6752 pvh
= (vm_offset_t
*)(pai_to_pvh(pa_index(ml_static_vtop((vm_offset_t
)pte_p
))));
6754 if (pvh_test_type(pvh
, PVH_TYPE_NULL
)) {
6757 * This path should only be invoked from arm_vm_init. If we are emulating 16KB pages
6758 * on 4KB hardware, we may already have allocated a page table descriptor for a
6759 * bootstrap request, so we check for an existing PTD here.
6761 ptdp
= ptd_alloc(pmap
, true);
6762 pvh_update_head_unlocked(pvh
, ptdp
, PVH_TYPE_PTDP
);
6764 panic("pmap_init_pte_page(): pte_p %p", pte_p
);
6766 } else if (pvh_test_type(pvh
, PVH_TYPE_PTDP
)) {
6767 ptdp
= (pt_desc_t
*)(pvh_list(pvh
));
6769 panic("pmap_init_pte_page(): invalid PVH type for pte_p %p", pte_p
);
6772 bzero(pte_p
, ARM_PGBYTES
);
6773 // below barrier ensures the page zeroing is visible to PTW before
6774 // it is linked to the PTE of previous level
6775 __builtin_arm_dmb(DMB_ISHST
);
6776 ptd_init(ptdp
, pmap
, va
, ttlevel
, pte_p
);
6780 * Routine: pmap_expand
6782 * Expands a pmap to be able to map the specified virtual address.
6784 * Allocates new memory for the default (COARSE) translation table
6785 * entry, initializes all the pte entries to ARM_PTE_TYPE_FAULT and
6786 * also allocates space for the corresponding pv entries.
6788 * Nothing should be locked.
6790 static kern_return_t
6794 unsigned int options
,
6797 #if (__ARM_VMSA__ == 7)
6804 while (tte_index(pmap
, v
) >= pmap
->tte_index_max
) {
6805 tte_p
= pmap_tt1_allocate(pmap
, 2 * ARM_PGBYTES
, ((options
& PMAP_OPTIONS_NOWAIT
)? PMAP_TT_ALLOCATE_NOWAIT
: 0));
6806 if (tte_p
== (tt_entry_t
*)0) {
6807 return KERN_RESOURCE_SHORTAGE
;
6811 if (pmap
->tte_index_max
> NTTES
) {
6812 pmap_tt1_deallocate(pmap
, tte_p
, 2 * ARM_PGBYTES
, PMAP_TT_DEALLOCATE_NOBLOCK
);
6817 pmap_simple_lock(&pmap
->tt1_lock
);
6818 for (i
= 0; i
< pmap
->tte_index_max
; i
++) {
6819 tte_p
[i
] = pmap
->tte
[i
];
6821 for (i
= NTTES
; i
< 2 * NTTES
; i
++) {
6822 tte_p
[i
] = ARM_TTE_TYPE_FAULT
;
6825 pmap
->prev_tte
= pmap
->tte
;
6827 pmap
->ttep
= ml_static_vtop((vm_offset_t
)pmap
->tte
);
6829 FLUSH_PTE_RANGE(pmap
->tte
, pmap
->tte
+ (2 * NTTES
));
6831 pmap
->tte_index_max
= 2 * NTTES
;
6832 pmap
->stamp
= hw_atomic_add(&pmap_stamp
, 1);
6834 for (i
= 0; i
< NTTES
; i
++) {
6835 pmap
->prev_tte
[i
] = ARM_TTE_TYPE_FAULT
;
6838 FLUSH_PTE_RANGE(pmap
->prev_tte
, pmap
->prev_tte
+ NTTES
);
6840 pmap_simple_unlock(&pmap
->tt1_lock
);
6842 pmap_set_pmap(pmap
, current_thread());
6846 return KERN_SUCCESS
;
6850 tt_entry_t
*tte_next_p
;
6854 if (pmap_pte(pmap
, v
) != PT_ENTRY_NULL
) {
6856 return KERN_SUCCESS
;
6858 tte_p
= &pmap
->tte
[ttenum(v
& ~ARM_TT_L1_PT_OFFMASK
)];
6859 for (i
= 0, tte_next_p
= tte_p
; i
< 4; i
++) {
6860 if (tte_to_pa(*tte_next_p
)) {
6861 pa
= tte_to_pa(*tte_next_p
);
6866 pa
= pa
& ~PAGE_MASK
;
6868 tte_p
= &pmap
->tte
[ttenum(v
)];
6869 *tte_p
= pa_to_tte(pa
) | (((v
>> ARM_TT_L1_SHIFT
) & 0x3) << 10) | ARM_TTE_TYPE_TABLE
;
6871 PMAP_TRACE(3, PMAP_CODE(PMAP__TTE
), VM_KERNEL_ADDRHIDE(pmap
), VM_KERNEL_ADDRHIDE(v
& ~ARM_TT_L1_OFFMASK
),
6872 VM_KERNEL_ADDRHIDE((v
& ~ARM_TT_L1_OFFMASK
) + ARM_TT_L1_SIZE
), *tte_p
);
6874 return KERN_SUCCESS
;
6878 v
= v
& ~ARM_TT_L1_PT_OFFMASK
;
6881 while (pmap_pte(pmap
, v
) == PT_ENTRY_NULL
) {
6883 * Allocate a VM page for the level 2 page table entries.
6885 while (pmap_tt_allocate(pmap
, &tt_p
, PMAP_TT_L2_LEVEL
, ((options
& PMAP_TT_ALLOCATE_NOWAIT
)? PMAP_PAGES_ALLOCATE_NOWAIT
: 0)) != KERN_SUCCESS
) {
6886 if (options
& PMAP_OPTIONS_NOWAIT
) {
6887 return KERN_RESOURCE_SHORTAGE
;
6894 * See if someone else expanded us first
6896 if (pmap_pte(pmap
, v
) == PT_ENTRY_NULL
) {
6897 tt_entry_t
*tte_next_p
;
6899 pmap_init_pte_page(pmap
, (pt_entry_t
*) tt_p
, v
, PMAP_TT_L2_LEVEL
, FALSE
);
6900 pa
= kvtophys((vm_offset_t
)tt_p
);
6901 #ifndef __ARM_L1_PTW__
6902 CleanPoU_DcacheRegion((vm_offset_t
) phystokv(pa
), PAGE_SIZE
);
6904 tte_p
= &pmap
->tte
[ttenum(v
)];
6905 for (i
= 0, tte_next_p
= tte_p
; i
< 4; i
++) {
6906 *tte_next_p
= pa_to_tte(pa
) | ARM_TTE_TYPE_TABLE
;
6907 PMAP_TRACE(3, PMAP_CODE(PMAP__TTE
), VM_KERNEL_ADDRHIDE(pmap
), VM_KERNEL_ADDRHIDE((v
& ~ARM_TT_L1_PT_OFFMASK
) + (i
* ARM_TT_L1_SIZE
)),
6908 VM_KERNEL_ADDRHIDE((v
& ~ARM_TT_L1_PT_OFFMASK
) + ((i
+ 1) * ARM_TT_L1_SIZE
)), *tte_p
);
6912 FLUSH_PTE_RANGE(tte_p
, tte_p
+ 4);
6915 tt_p
= (tt_entry_t
*)NULL
;
6918 if (tt_p
!= (tt_entry_t
*)NULL
) {
6919 pmap_tt_deallocate(pmap
, tt_p
, PMAP_TT_L2_LEVEL
);
6920 tt_p
= (tt_entry_t
*)NULL
;
6923 return KERN_SUCCESS
;
6926 #if __ARM64_TWO_LEVEL_PMAP__
6927 /* If we are using a two level page table, we'll start at L2. */
6928 unsigned int ttlevel
= 2;
6930 /* Otherwise, we start at L1 (we use 3 levels by default). */
6931 unsigned int ttlevel
= 1;
6937 tt_p
= (tt_entry_t
*)NULL
;
6939 for (; ttlevel
< level
; ttlevel
++) {
6943 if ((pmap_tt2e(pmap
, v
) == PT_ENTRY_NULL
)) {
6945 while (pmap_tt_allocate(pmap
, &tt_p
, PMAP_TT_L2_LEVEL
, ((options
& PMAP_TT_ALLOCATE_NOWAIT
)? PMAP_PAGES_ALLOCATE_NOWAIT
: 0)) != KERN_SUCCESS
) {
6946 if (options
& PMAP_OPTIONS_NOWAIT
) {
6947 return KERN_RESOURCE_SHORTAGE
;
6952 if ((pmap_tt2e(pmap
, v
) == PT_ENTRY_NULL
)) {
6953 pmap_init_pte_page(pmap
, (pt_entry_t
*) tt_p
, v
, PMAP_TT_L2_LEVEL
, FALSE
);
6954 pa
= kvtophys((vm_offset_t
)tt_p
);
6955 tte_p
= pmap_tt1e( pmap
, v
);
6956 *tte_p
= (pa
& ARM_TTE_TABLE_MASK
) | ARM_TTE_TYPE_TABLE
| ARM_TTE_VALID
;
6957 PMAP_TRACE(3, PMAP_CODE(PMAP__TTE
), VM_KERNEL_ADDRHIDE(pmap
), VM_KERNEL_ADDRHIDE(v
& ~ARM_TT_L1_OFFMASK
),
6958 VM_KERNEL_ADDRHIDE((v
& ~ARM_TT_L1_OFFMASK
) + ARM_TT_L1_SIZE
), *tte_p
);
6960 tt_p
= (tt_entry_t
*)NULL
;
6961 if ((pmap
== kernel_pmap
) && (VM_MIN_KERNEL_ADDRESS
< 0x00000000FFFFFFFFULL
)) {
6962 current_pmap()->tte
[v
>> ARM_TT_L1_SHIFT
] = kernel_pmap
->tte
[v
>> ARM_TT_L1_SHIFT
];
6966 } else if (ttlevel
== 2) {
6967 if (pmap_tt3e(pmap
, v
) == PT_ENTRY_NULL
) {
6969 while (pmap_tt_allocate(pmap
, &tt_p
, PMAP_TT_L3_LEVEL
, ((options
& PMAP_TT_ALLOCATE_NOWAIT
)? PMAP_PAGES_ALLOCATE_NOWAIT
: 0)) != KERN_SUCCESS
) {
6970 if (options
& PMAP_OPTIONS_NOWAIT
) {
6971 return KERN_RESOURCE_SHORTAGE
;
6976 if ((pmap_tt3e(pmap
, v
) == PT_ENTRY_NULL
)) {
6977 pmap_init_pte_page(pmap
, (pt_entry_t
*) tt_p
, v
, PMAP_TT_L3_LEVEL
, FALSE
);
6978 pa
= kvtophys((vm_offset_t
)tt_p
);
6979 tte_p
= pmap_tt2e( pmap
, v
);
6980 *tte_p
= (pa
& ARM_TTE_TABLE_MASK
) | ARM_TTE_TYPE_TABLE
| ARM_TTE_VALID
;
6981 PMAP_TRACE(3, PMAP_CODE(PMAP__TTE
), VM_KERNEL_ADDRHIDE(pmap
), VM_KERNEL_ADDRHIDE(v
& ~ARM_TT_L2_OFFMASK
),
6982 VM_KERNEL_ADDRHIDE((v
& ~ARM_TT_L2_OFFMASK
) + ARM_TT_L2_SIZE
), *tte_p
);
6984 tt_p
= (tt_entry_t
*)NULL
;
6991 if (tt_p
!= (tt_entry_t
*)NULL
) {
6992 pmap_tt_deallocate(pmap
, tt_p
, ttlevel
+ 1);
6993 tt_p
= (tt_entry_t
*)NULL
;
6997 return KERN_SUCCESS
;
7002 * Routine: pmap_collect
7004 * Garbage collects the physical map system for
7005 * pages which are no longer used.
7006 * Success need not be guaranteed -- that is, there
7007 * may well be pages which are not referenced, but
7008 * others may be collected.
7011 pmap_collect(pmap_t pmap
)
7013 if (pmap
== PMAP_NULL
) {
7019 if ((pmap
->nested
== FALSE
) && (pmap
!= kernel_pmap
)) {
7020 /* TODO: Scan for vm page assigned to top level page tables with no reference */
7031 * Pmap garbage collection
7032 * Called by the pageout daemon when pages are scarce.
7039 pmap_t pmap
, pmap_next
;
7042 if (pmap_gc_allowed
&&
7043 (pmap_gc_allowed_by_time_throttle
||
7045 pmap_gc_forced
= FALSE
;
7046 pmap_gc_allowed_by_time_throttle
= FALSE
;
7047 pmap_simple_lock(&pmaps_lock
);
7048 pmap
= CAST_DOWN_EXPLICIT(pmap_t
, queue_first(&map_pmap_list
));
7049 while (!queue_end(&map_pmap_list
, (queue_entry_t
)pmap
)) {
7050 if (!(pmap
->gc_status
& PMAP_GC_INFLIGHT
)) {
7051 pmap
->gc_status
|= PMAP_GC_INFLIGHT
;
7053 pmap_simple_unlock(&pmaps_lock
);
7057 pmap_simple_lock(&pmaps_lock
);
7058 gc_wait
= (pmap
->gc_status
& PMAP_GC_WAIT
);
7059 pmap
->gc_status
&= ~(PMAP_GC_INFLIGHT
| PMAP_GC_WAIT
);
7060 pmap_next
= CAST_DOWN_EXPLICIT(pmap_t
, queue_next(&pmap
->pmaps
));
7062 if (!queue_end(&map_pmap_list
, (queue_entry_t
)pmap_next
)) {
7063 pmap_next
->gc_status
|= PMAP_GC_INFLIGHT
;
7065 pmap_simple_unlock(&pmaps_lock
);
7066 thread_wakeup((event_t
) &pmap
->gc_status
);
7067 pmap_simple_lock(&pmaps_lock
);
7071 pmap_simple_unlock(&pmaps_lock
);
7076 * Called by the VM to reclaim pages that we can reclaim quickly and cheaply.
7079 pmap_release_pages_fast(void)
7085 * By default, don't attempt pmap GC more frequently
7086 * than once / 1 minutes.
7090 compute_pmap_gc_throttle(
7093 pmap_gc_allowed_by_time_throttle
= TRUE
;
7097 * pmap_attribute_cache_sync(vm_offset_t pa)
7099 * Invalidates all of the instruction cache on a physical page and
7100 * pushes any dirty data from the data cache for the same physical page
7104 pmap_attribute_cache_sync(
7107 __unused vm_machine_attribute_t attribute
,
7108 __unused vm_machine_attribute_val_t
* value
)
7110 if (size
> PAGE_SIZE
) {
7111 panic("pmap_attribute_cache_sync size: 0x%llx\n", (uint64_t)size
);
7113 cache_sync_page(pp
);
7116 return KERN_SUCCESS
;
7120 * pmap_sync_page_data_phys(ppnum_t pp)
7122 * Invalidates all of the instruction cache on a physical page and
7123 * pushes any dirty data from the data cache for the same physical page
7126 pmap_sync_page_data_phys(
7129 cache_sync_page(pp
);
7133 * pmap_sync_page_attributes_phys(ppnum_t pp)
7135 * Write back and invalidate all cachelines on a physical page.
7138 pmap_sync_page_attributes_phys(
7141 flush_dcache((vm_offset_t
) (pp
<< PAGE_SHIFT
), PAGE_SIZE
, TRUE
);
7145 /* temporary workaround */
7154 pte_p
= pmap_pte(map
->pmap
, va
);
7159 return (spte
& ARM_PTE_ATTRINDXMASK
) == ARM_PTE_ATTRINDX(CACHE_ATTRINDX_DEFAULT
);
7171 addr
= (unsigned int *) phystokv(ptoa(pn
));
7172 count
= PAGE_SIZE
/ sizeof(unsigned int);
7178 extern void mapping_set_mod(ppnum_t pn
);
7184 pmap_set_modify(pn
);
7187 extern void mapping_set_ref(ppnum_t pn
);
7193 pmap_set_reference(pn
);
7197 * Clear specified attribute bits.
7199 * Try to force an arm_fast_fault() for all mappings of
7200 * the page - to force attributes to be set again at fault time.
7201 * If the forcing succeeds, clear the cached bits at the head.
7202 * Otherwise, something must have been wired, so leave the cached
7205 MARK_AS_PMAP_TEXT
static void
7206 phys_attribute_clear_internal(
7212 pmap_paddr_t pa
= ptoa(pn
);
7213 vm_prot_t allow_mode
= VM_PROT_ALL
;
7216 if ((bits
& PP_ATTR_MODIFIED
) &&
7217 (options
& PMAP_OPTIONS_NOFLUSH
) &&
7219 panic("phys_attribute_clear(0x%x,0x%x,0x%x,%p): "
7220 "should not clear 'modified' without flushing TLBs\n",
7221 pn
, bits
, options
, arg
);
7224 assert(pn
!= vm_page_fictitious_addr
);
7226 if (options
& PMAP_OPTIONS_CLEAR_WRITE
) {
7227 assert(bits
== PP_ATTR_MODIFIED
);
7229 pmap_page_protect_options_internal(pn
, (VM_PROT_ALL
& ~VM_PROT_WRITE
), 0);
7231 * We short circuit this case; it should not need to
7232 * invoke arm_force_fast_fault, so just clear the modified bit.
7233 * pmap_page_protect has taken care of resetting
7234 * the state so that we'll see the next write as a fault to
7235 * the VM (i.e. we don't want a fast fault).
7237 pa_clear_bits(pa
, bits
);
7240 if (bits
& PP_ATTR_REFERENCED
) {
7241 allow_mode
&= ~(VM_PROT_READ
| VM_PROT_EXECUTE
);
7243 if (bits
& PP_ATTR_MODIFIED
) {
7244 allow_mode
&= ~VM_PROT_WRITE
;
7247 if (bits
== PP_ATTR_NOENCRYPT
) {
7249 * We short circuit this case; it should not need to
7250 * invoke arm_force_fast_fault, so just clear and
7251 * return. On ARM, this bit is just a debugging aid.
7253 pa_clear_bits(pa
, bits
);
7257 if (arm_force_fast_fault_internal(pn
, allow_mode
, options
)) {
7258 pa_clear_bits(pa
, bits
);
7264 phys_attribute_clear(
7271 * Do we really want this tracepoint? It will be extremely chatty.
7272 * Also, should we have a corresponding trace point for the set path?
7274 PMAP_TRACE(3, PMAP_CODE(PMAP__ATTRIBUTE_CLEAR
) | DBG_FUNC_START
, pn
, bits
);
7276 phys_attribute_clear_internal(pn
, bits
, options
, arg
);
7278 PMAP_TRACE(3, PMAP_CODE(PMAP__ATTRIBUTE_CLEAR
) | DBG_FUNC_END
);
7282 * Set specified attribute bits.
7284 * Set cached value in the pv head because we have
7285 * no per-mapping hardware support for referenced and
7288 MARK_AS_PMAP_TEXT
static void
7289 phys_attribute_set_internal(
7293 pmap_paddr_t pa
= ptoa(pn
);
7294 assert(pn
!= vm_page_fictitious_addr
);
7297 pa_set_bits(pa
, bits
);
7307 phys_attribute_set_internal(pn
, bits
);
7312 * Check specified attribute bits.
7314 * use the software cached bits (since no hw support).
7317 phys_attribute_test(
7321 pmap_paddr_t pa
= ptoa(pn
);
7322 assert(pn
!= vm_page_fictitious_addr
);
7323 return pa_test_bits(pa
, bits
);
7328 * Set the modify/reference bits on the specified physical page.
7331 pmap_set_modify(ppnum_t pn
)
7333 phys_attribute_set(pn
, PP_ATTR_MODIFIED
);
7338 * Clear the modify bits on the specified physical page.
7344 phys_attribute_clear(pn
, PP_ATTR_MODIFIED
, 0, NULL
);
7351 * Return whether or not the specified physical page is modified
7352 * by any physical maps.
7358 return phys_attribute_test(pn
, PP_ATTR_MODIFIED
);
7363 * Set the reference bit on the specified physical page.
7369 phys_attribute_set(pn
, PP_ATTR_REFERENCED
);
7373 * Clear the reference bits on the specified physical page.
7376 pmap_clear_reference(
7379 phys_attribute_clear(pn
, PP_ATTR_REFERENCED
, 0, NULL
);
7384 * pmap_is_referenced:
7386 * Return whether or not the specified physical page is referenced
7387 * by any physical maps.
7393 return phys_attribute_test(pn
, PP_ATTR_REFERENCED
);
7397 * pmap_get_refmod(phys)
7398 * returns the referenced and modified bits of the specified
7405 return ((phys_attribute_test(pn
, PP_ATTR_MODIFIED
)) ? VM_MEM_MODIFIED
: 0)
7406 | ((phys_attribute_test(pn
, PP_ATTR_REFERENCED
)) ? VM_MEM_REFERENCED
: 0);
7410 * pmap_clear_refmod(phys, mask)
7411 * clears the referenced and modified bits as specified by the mask
7412 * of the specified physical page.
7415 pmap_clear_refmod_options(
7418 unsigned int options
,
7423 bits
= ((mask
& VM_MEM_MODIFIED
) ? PP_ATTR_MODIFIED
: 0) |
7424 ((mask
& VM_MEM_REFERENCED
) ? PP_ATTR_REFERENCED
: 0);
7425 phys_attribute_clear(pn
, bits
, options
, arg
);
7433 pmap_clear_refmod_options(pn
, mask
, 0, NULL
);
7437 pmap_disconnect_options(
7439 unsigned int options
,
7442 if ((options
& PMAP_OPTIONS_COMPRESSOR_IFF_MODIFIED
)) {
7444 * On ARM, the "modified" bit is managed by software, so
7445 * we know up-front if the physical page is "modified",
7446 * without having to scan all the PTEs pointing to it.
7447 * The caller should have made the VM page "busy" so noone
7448 * should be able to establish any new mapping and "modify"
7449 * the page behind us.
7451 if (pmap_is_modified(pn
)) {
7453 * The page has been modified and will be sent to
7454 * the VM compressor.
7456 options
|= PMAP_OPTIONS_COMPRESSOR
;
7459 * The page hasn't been modified and will be freed
7460 * instead of compressed.
7465 /* disconnect the page */
7466 pmap_page_protect_options(pn
, 0, options
, arg
);
7468 /* return ref/chg status */
7469 return pmap_get_refmod(pn
);
7477 * Disconnect all mappings for this page and return reference and change status
7478 * in generic format.
7485 pmap_page_protect(pn
, 0); /* disconnect the page */
7486 return pmap_get_refmod(pn
); /* return ref/chg status */
7490 pmap_has_managed_page(ppnum_t first
, ppnum_t last
)
7492 if (ptoa(first
) >= vm_last_phys
) {
7495 if (ptoa(last
) < vm_first_phys
) {
7503 * The state maintained by the noencrypt functions is used as a
7504 * debugging aid on ARM. This incurs some overhead on the part
7505 * of the caller. A special case check in phys_attribute_clear
7506 * (the most expensive path) currently minimizes this overhead,
7507 * but stubbing these functions out on RELEASE kernels yields
7514 #if DEVELOPMENT || DEBUG
7515 boolean_t result
= FALSE
;
7517 if (!pa_valid(ptoa(pn
))) {
7521 result
= (phys_attribute_test(pn
, PP_ATTR_NOENCRYPT
));
7534 #if DEVELOPMENT || DEBUG
7535 if (!pa_valid(ptoa(pn
))) {
7539 phys_attribute_set(pn
, PP_ATTR_NOENCRYPT
);
7546 pmap_clear_noencrypt(
7549 #if DEVELOPMENT || DEBUG
7550 if (!pa_valid(ptoa(pn
))) {
7554 phys_attribute_clear(pn
, PP_ATTR_NOENCRYPT
, 0, NULL
);
7562 pmap_lock_phys_page(ppnum_t pn
)
7565 pmap_paddr_t phys
= ptoa(pn
);
7567 if (pa_valid(phys
)) {
7568 pai
= (int)pa_index(phys
);
7571 { simple_lock(&phys_backup_lock
, LCK_GRP_NULL
);}
7576 pmap_unlock_phys_page(ppnum_t pn
)
7579 pmap_paddr_t phys
= ptoa(pn
);
7581 if (pa_valid(phys
)) {
7582 pai
= (int)pa_index(phys
);
7585 { simple_unlock(&phys_backup_lock
);}
7588 MARK_AS_PMAP_TEXT
static void
7589 pmap_switch_user_ttb_internal(
7592 VALIDATE_PMAP(pmap
);
7593 pmap_cpu_data_t
*cpu_data_ptr
;
7594 cpu_data_ptr
= pmap_get_cpu_data();
7596 #if (__ARM_VMSA__ == 7)
7598 if ((cpu_data_ptr
->cpu_user_pmap
!= PMAP_NULL
)
7599 && (cpu_data_ptr
->cpu_user_pmap
!= kernel_pmap
)) {
7602 c
= hw_atomic_sub((volatile uint32_t *)&cpu_data_ptr
->cpu_user_pmap
->cpu_ref
, 1);
7603 if ((c
== 0) && (cpu_data_ptr
->cpu_user_pmap
->prev_tte
!= 0)) {
7604 /* We saved off the old 1-page tt1 in pmap_expand() in case other cores were still using it.
7605 * Now that the user pmap's cpu_ref is 0, we should be able to safely free it.*/
7606 tt_entry_t
*tt_entry
;
7608 tt_entry
= cpu_data_ptr
->cpu_user_pmap
->prev_tte
;
7609 cpu_data_ptr
->cpu_user_pmap
->prev_tte
= (tt_entry_t
*) NULL
;
7610 pmap_tt1_deallocate(cpu_data_ptr
->cpu_user_pmap
, tt_entry
, ARM_PGBYTES
, PMAP_TT_DEALLOCATE_NOBLOCK
);
7613 cpu_data_ptr
->cpu_user_pmap
= pmap
;
7614 cpu_data_ptr
->cpu_user_pmap_stamp
= pmap
->stamp
;
7615 (void) hw_atomic_add((volatile uint32_t *)&pmap
->cpu_ref
, 1);
7617 #if MACH_ASSERT && __ARM_USER_PROTECT__
7619 unsigned int ttbr0_val
, ttbr1_val
;
7620 __asm__
volatile ("mrc p15,0,%0,c2,c0,0\n" : "=r"(ttbr0_val
));
7621 __asm__
volatile ("mrc p15,0,%0,c2,c0,1\n" : "=r"(ttbr1_val
));
7622 if (ttbr0_val
!= ttbr1_val
) {
7623 panic("Misaligned ttbr0 %08X\n", ttbr0_val
);
7627 if (pmap
->tte_index_max
== NTTES
) {
7628 /* Setting TTBCR.N for TTBR0 TTBR1 boundary at 0x40000000 */
7629 __asm__
volatile ("mcr p15,0,%0,c2,c0,2" : : "r"(2));
7630 __builtin_arm_isb(ISB_SY
);
7631 #if !__ARM_USER_PROTECT__
7632 set_mmu_ttb(pmap
->ttep
);
7635 #if !__ARM_USER_PROTECT__
7636 set_mmu_ttb(pmap
->ttep
);
7638 /* Setting TTBCR.N for TTBR0 TTBR1 boundary at 0x80000000 */
7639 __asm__
volatile ("mcr p15,0,%0,c2,c0,2" : : "r"(1));
7640 __builtin_arm_isb(ISB_SY
);
7641 #if MACH_ASSERT && __ARM_USER_PROTECT__
7642 if (pmap
->ttep
& 0x1000) {
7643 panic("Misaligned ttbr0 %08X\n", pmap
->ttep
);
7648 #if !__ARM_USER_PROTECT__
7649 set_context_id(pmap
->asid
);
7652 #else /* (__ARM_VMSA__ == 7) */
7654 if (pmap
!= kernel_pmap
) {
7655 cpu_data_ptr
->cpu_nested_pmap
= pmap
->nested_pmap
;
7658 if (pmap
== kernel_pmap
) {
7659 pmap_clear_user_ttb_internal();
7661 set_mmu_ttb((pmap
->ttep
& TTBR_BADDR_MASK
) | (((uint64_t)pmap
->asid
) << TTBR_ASID_SHIFT
));
7667 pmap_switch_user_ttb(
7670 PMAP_TRACE(1, PMAP_CODE(PMAP__SWITCH_USER_TTB
) | DBG_FUNC_START
, VM_KERNEL_ADDRHIDE(pmap
), pmap
->vasid
, pmap
->asid
);
7671 pmap_switch_user_ttb_internal(pmap
);
7672 PMAP_TRACE(1, PMAP_CODE(PMAP__SWITCH_USER_TTB
) | DBG_FUNC_END
);
7675 MARK_AS_PMAP_TEXT
static void
7676 pmap_clear_user_ttb_internal(void)
7678 #if (__ARM_VMSA__ > 7)
7679 set_mmu_ttb(invalid_ttep
& TTBR_BADDR_MASK
);
7681 set_mmu_ttb(kernel_pmap
->ttep
);
7686 pmap_clear_user_ttb(void)
7688 pmap_clear_user_ttb_internal();
7692 * Routine: arm_force_fast_fault
7695 * Force all mappings for this page to fault according
7696 * to the access modes allowed, so we can gather ref/modify
7699 MARK_AS_PMAP_TEXT
static boolean_t
7700 arm_force_fast_fault_internal(
7702 vm_prot_t allow_mode
,
7705 pmap_paddr_t phys
= ptoa(ppnum
);
7711 boolean_t is_reusable
, is_internal
;
7712 boolean_t tlb_flush_needed
= FALSE
;
7713 boolean_t ref_fault
;
7714 boolean_t mod_fault
;
7716 assert(ppnum
!= vm_page_fictitious_addr
);
7718 if (!pa_valid(phys
)) {
7719 return FALSE
; /* Not a managed page. */
7725 pai
= (int)pa_index(phys
);
7727 pv_h
= pai_to_pvh(pai
);
7729 pte_p
= PT_ENTRY_NULL
;
7730 pve_p
= PV_ENTRY_NULL
;
7731 if (pvh_test_type(pv_h
, PVH_TYPE_PTEP
)) {
7732 pte_p
= pvh_ptep(pv_h
);
7733 } else if (pvh_test_type(pv_h
, PVH_TYPE_PVEP
)) {
7734 pve_p
= pvh_list(pv_h
);
7737 is_reusable
= IS_REUSABLE_PAGE(pai
);
7738 is_internal
= IS_INTERNAL_PAGE(pai
);
7740 while ((pve_p
!= PV_ENTRY_NULL
) || (pte_p
!= PT_ENTRY_NULL
)) {
7741 vm_map_address_t va
;
7745 boolean_t update_pte
;
7747 if (pve_p
!= PV_ENTRY_NULL
) {
7748 pte_p
= pve_get_ptep(pve_p
);
7751 if (pte_p
== PT_ENTRY_NULL
) {
7752 panic("pte_p is NULL: pve_p=%p ppnum=0x%x\n", pve_p
, ppnum
);
7754 #ifdef PVH_FLAG_IOMMU
7755 if ((vm_offset_t
)pte_p
& PVH_FLAG_IOMMU
) {
7759 if (*pte_p
== ARM_PTE_EMPTY
) {
7760 panic("pte is NULL: pte_p=%p ppnum=0x%x\n", pte_p
, ppnum
);
7762 if (ARM_PTE_IS_COMPRESSED(*pte_p
)) {
7763 panic("pte is COMPRESSED: pte_p=%p ppnum=0x%x\n", pte_p
, ppnum
);
7766 pmap
= ptep_get_pmap(pte_p
);
7767 va
= ptep_get_va(pte_p
);
7769 assert(va
>= pmap
->min
&& va
< pmap
->max
);
7771 if (pte_is_wired(*pte_p
) || pmap
== kernel_pmap
) {
7780 if ((allow_mode
& VM_PROT_READ
) != VM_PROT_READ
) {
7781 /* read protection sets the pte to fault */
7782 tmplate
= tmplate
& ~ARM_PTE_AF
;
7786 if ((allow_mode
& VM_PROT_WRITE
) != VM_PROT_WRITE
) {
7787 /* take away write permission if set */
7788 if (pmap
== kernel_pmap
) {
7789 if ((tmplate
& ARM_PTE_APMASK
) == ARM_PTE_AP(AP_RWNA
)) {
7790 tmplate
= ((tmplate
& ~ARM_PTE_APMASK
) | ARM_PTE_AP(AP_RONA
));
7791 pte_set_was_writeable(tmplate
, true);
7796 if ((tmplate
& ARM_PTE_APMASK
) == ARM_PTE_AP(AP_RWRW
)) {
7797 tmplate
= ((tmplate
& ~ARM_PTE_APMASK
) | ARM_PTE_AP(AP_RORO
));
7798 pte_set_was_writeable(tmplate
, true);
7807 if (*pte_p
!= ARM_PTE_TYPE_FAULT
&&
7808 !ARM_PTE_IS_COMPRESSED(*pte_p
)) {
7809 WRITE_PTE_STRONG(pte_p
, tmplate
);
7810 flush_mmu_tlb_region_asid_async(va
, PAGE_SIZE
, pmap
);
7811 tlb_flush_needed
= TRUE
;
7813 WRITE_PTE(pte_p
, tmplate
);
7814 __builtin_arm_isb(ISB_SY
);
7818 /* update pmap stats and ledgers */
7819 if (IS_ALTACCT_PAGE(pai
, pve_p
)) {
7821 * We do not track "reusable" status for
7822 * "alternate accounting" mappings.
7824 } else if ((options
& PMAP_OPTIONS_CLEAR_REUSABLE
) &&
7827 pmap
!= kernel_pmap
) {
7828 /* one less "reusable" */
7829 PMAP_STATS_ASSERTF(pmap
->stats
.reusable
> 0, pmap
, "stats.reusable %d", pmap
->stats
.reusable
);
7830 OSAddAtomic(-1, &pmap
->stats
.reusable
);
7831 /* one more "internal" */
7832 OSAddAtomic(+1, &pmap
->stats
.internal
);
7833 PMAP_STATS_PEAK(pmap
->stats
.internal
);
7834 PMAP_STATS_ASSERTF(pmap
->stats
.internal
> 0, pmap
, "stats.internal %d", pmap
->stats
.internal
);
7835 pmap_ledger_credit(pmap
, task_ledgers
.internal
, machine_ptob(1));
7836 assert(!IS_ALTACCT_PAGE(pai
, pve_p
));
7837 assert(IS_INTERNAL_PAGE(pai
));
7838 pmap_ledger_credit(pmap
, task_ledgers
.phys_footprint
, machine_ptob(1));
7841 * Avoid the cost of another trap to handle the fast
7842 * fault when we next write to this page: let's just
7843 * handle that now since we already have all the
7844 * necessary information.
7847 arm_clear_fast_fault(ppnum
, VM_PROT_WRITE
);
7849 } else if ((options
& PMAP_OPTIONS_SET_REUSABLE
) &&
7852 pmap
!= kernel_pmap
) {
7853 /* one more "reusable" */
7854 OSAddAtomic(+1, &pmap
->stats
.reusable
);
7855 PMAP_STATS_PEAK(pmap
->stats
.reusable
);
7856 PMAP_STATS_ASSERTF(pmap
->stats
.reusable
> 0, pmap
, "stats.reusable %d", pmap
->stats
.reusable
);
7857 /* one less "internal" */
7858 PMAP_STATS_ASSERTF(pmap
->stats
.internal
> 0, pmap
, "stats.internal %d", pmap
->stats
.internal
);
7859 OSAddAtomic(-1, &pmap
->stats
.internal
);
7860 pmap_ledger_debit(pmap
, task_ledgers
.internal
, machine_ptob(1));
7861 assert(!IS_ALTACCT_PAGE(pai
, pve_p
));
7862 assert(IS_INTERNAL_PAGE(pai
));
7863 pmap_ledger_debit(pmap
, task_ledgers
.phys_footprint
, machine_ptob(1));
7866 #ifdef PVH_FLAG_IOMMU
7869 pte_p
= PT_ENTRY_NULL
;
7870 if (pve_p
!= PV_ENTRY_NULL
) {
7871 pve_p
= PVE_NEXT_PTR(pve_next(pve_p
));
7875 if (tlb_flush_needed
) {
7879 /* update global "reusable" status for this page */
7881 if ((options
& PMAP_OPTIONS_CLEAR_REUSABLE
) &&
7883 CLR_REUSABLE_PAGE(pai
);
7884 } else if ((options
& PMAP_OPTIONS_SET_REUSABLE
) &&
7886 SET_REUSABLE_PAGE(pai
);
7891 SET_MODFAULT_PAGE(pai
);
7894 SET_REFFAULT_PAGE(pai
);
7902 arm_force_fast_fault(
7904 vm_prot_t allow_mode
,
7908 pmap_paddr_t phys
= ptoa(ppnum
);
7910 assert(ppnum
!= vm_page_fictitious_addr
);
7912 if (!pa_valid(phys
)) {
7913 return FALSE
; /* Not a managed page. */
7916 return arm_force_fast_fault_internal(ppnum
, allow_mode
, options
);
7920 * Routine: arm_clear_fast_fault
7923 * Clear pending force fault for all mappings for this page based on
7924 * the observed fault type, update ref/modify bits.
7927 arm_clear_fast_fault(
7929 vm_prot_t fault_type
)
7931 pmap_paddr_t pa
= ptoa(ppnum
);
7936 boolean_t tlb_flush_needed
= FALSE
;
7939 assert(ppnum
!= vm_page_fictitious_addr
);
7941 if (!pa_valid(pa
)) {
7942 return FALSE
; /* Not a managed page. */
7946 pai
= (int)pa_index(pa
);
7947 ASSERT_PVH_LOCKED(pai
);
7948 pv_h
= pai_to_pvh(pai
);
7950 pte_p
= PT_ENTRY_NULL
;
7951 pve_p
= PV_ENTRY_NULL
;
7952 if (pvh_test_type(pv_h
, PVH_TYPE_PTEP
)) {
7953 pte_p
= pvh_ptep(pv_h
);
7954 } else if (pvh_test_type(pv_h
, PVH_TYPE_PVEP
)) {
7955 pve_p
= pvh_list(pv_h
);
7958 while ((pve_p
!= PV_ENTRY_NULL
) || (pte_p
!= PT_ENTRY_NULL
)) {
7959 vm_map_address_t va
;
7964 if (pve_p
!= PV_ENTRY_NULL
) {
7965 pte_p
= pve_get_ptep(pve_p
);
7968 if (pte_p
== PT_ENTRY_NULL
) {
7969 panic("pte_p is NULL: pve_p=%p ppnum=0x%x\n", pve_p
, ppnum
);
7971 #ifdef PVH_FLAG_IOMMU
7972 if ((vm_offset_t
)pte_p
& PVH_FLAG_IOMMU
) {
7976 if (*pte_p
== ARM_PTE_EMPTY
) {
7977 panic("pte is NULL: pte_p=%p ppnum=0x%x\n", pte_p
, ppnum
);
7980 pmap
= ptep_get_pmap(pte_p
);
7981 va
= ptep_get_va(pte_p
);
7983 assert(va
>= pmap
->min
&& va
< pmap
->max
);
7988 if ((fault_type
& VM_PROT_WRITE
) && (pte_was_writeable(spte
))) {
7990 if (pmap
== kernel_pmap
) {
7991 tmplate
= ((spte
& ~ARM_PTE_APMASK
) | ARM_PTE_AP(AP_RWNA
));
7993 tmplate
= ((spte
& ~ARM_PTE_APMASK
) | ARM_PTE_AP(AP_RWRW
));
7997 tmplate
|= ARM_PTE_AF
;
7999 pte_set_was_writeable(tmplate
, false);
8000 pa_set_bits(pa
, PP_ATTR_REFERENCED
| PP_ATTR_MODIFIED
);
8001 } else if ((fault_type
& VM_PROT_READ
) && ((spte
& ARM_PTE_AF
) != ARM_PTE_AF
)) {
8002 tmplate
= spte
| ARM_PTE_AF
;
8005 pa_set_bits(pa
, PP_ATTR_REFERENCED
);
8010 if (spte
!= tmplate
) {
8011 if (spte
!= ARM_PTE_TYPE_FAULT
) {
8012 WRITE_PTE_STRONG(pte_p
, tmplate
);
8013 flush_mmu_tlb_region_asid_async(va
, PAGE_SIZE
, pmap
);
8014 tlb_flush_needed
= TRUE
;
8016 WRITE_PTE(pte_p
, tmplate
);
8017 __builtin_arm_isb(ISB_SY
);
8022 #ifdef PVH_FLAG_IOMMU
8025 pte_p
= PT_ENTRY_NULL
;
8026 if (pve_p
!= PV_ENTRY_NULL
) {
8027 pve_p
= PVE_NEXT_PTR(pve_next(pve_p
));
8030 if (tlb_flush_needed
) {
8037 * Determine if the fault was induced by software tracking of
8038 * modify/reference bits. If so, re-enable the mapping (and set
8039 * the appropriate bits).
8041 * Returns KERN_SUCCESS if the fault was induced and was
8042 * successfully handled.
8044 * Returns KERN_FAILURE if the fault was not induced and
8045 * the function was unable to deal with it.
8047 * Returns KERN_PROTECTION_FAILURE if the pmap layer explictly
8048 * disallows this type of access.
8050 MARK_AS_PMAP_TEXT
static kern_return_t
8051 arm_fast_fault_internal(
8053 vm_map_address_t va
,
8054 vm_prot_t fault_type
,
8055 __unused boolean_t from_user
)
8057 kern_return_t result
= KERN_FAILURE
;
8059 pt_entry_t spte
= ARM_PTE_TYPE_FAULT
;
8063 VALIDATE_PMAP(pmap
);
8068 * If the entry doesn't exist, is completely invalid, or is already
8069 * valid, we can't fix it here.
8072 ptep
= pmap_pte(pmap
, va
);
8073 if (ptep
!= PT_ENTRY_NULL
) {
8076 pa
= pte_to_pa(spte
);
8078 if ((spte
== ARM_PTE_TYPE_FAULT
) ||
8079 ARM_PTE_IS_COMPRESSED(spte
)) {
8084 if (!pa_valid(pa
)) {
8088 pai
= (int)pa_index(pa
);
8096 if ((IS_REFFAULT_PAGE(pai
)) ||
8097 ((fault_type
& VM_PROT_WRITE
) && IS_MODFAULT_PAGE(pai
))) {
8099 * An attempted access will always clear ref/mod fault state, as
8100 * appropriate for the fault type. arm_clear_fast_fault will
8101 * update the associated PTEs for the page as appropriate; if
8102 * any PTEs are updated, we redrive the access. If the mapping
8103 * does not actually allow for the attempted access, the
8104 * following fault will (hopefully) fail to update any PTEs, and
8105 * thus cause arm_fast_fault to decide that it failed to handle
8108 if (IS_REFFAULT_PAGE(pai
)) {
8109 CLR_REFFAULT_PAGE(pai
);
8111 if ((fault_type
& VM_PROT_WRITE
) && IS_MODFAULT_PAGE(pai
)) {
8112 CLR_MODFAULT_PAGE(pai
);
8115 if (arm_clear_fast_fault((ppnum_t
)atop(pa
), fault_type
)) {
8117 * Should this preserve KERN_PROTECTION_FAILURE? The
8118 * cost of not doing so is a another fault in a case
8119 * that should already result in an exception.
8121 result
= KERN_SUCCESS
;
8133 vm_map_address_t va
,
8134 vm_prot_t fault_type
,
8135 __unused boolean_t from_user
)
8137 kern_return_t result
= KERN_FAILURE
;
8139 if (va
< pmap
->min
|| va
>= pmap
->max
) {
8143 PMAP_TRACE(3, PMAP_CODE(PMAP__FAST_FAULT
) | DBG_FUNC_START
,
8144 VM_KERNEL_ADDRHIDE(pmap
), VM_KERNEL_ADDRHIDE(va
), fault_type
,
8147 #if (__ARM_VMSA__ == 7)
8148 if (pmap
!= kernel_pmap
) {
8149 pmap_cpu_data_t
*cpu_data_ptr
= pmap_get_cpu_data();
8151 pmap_t cur_user_pmap
;
8153 cur_pmap
= current_pmap();
8154 cur_user_pmap
= cpu_data_ptr
->cpu_user_pmap
;
8156 if ((cur_user_pmap
== cur_pmap
) && (cur_pmap
== pmap
)) {
8157 if (cpu_data_ptr
->cpu_user_pmap_stamp
!= pmap
->stamp
) {
8158 pmap_set_pmap(pmap
, current_thread());
8159 result
= KERN_SUCCESS
;
8166 result
= arm_fast_fault_internal(pmap
, va
, fault_type
, from_user
);
8168 #if (__ARM_VMSA__ == 7)
8172 PMAP_TRACE(3, PMAP_CODE(PMAP__FAST_FAULT
) | DBG_FUNC_END
, result
);
8182 bcopy_phys((addr64_t
) (ptoa(psrc
)),
8183 (addr64_t
) (ptoa(pdst
)),
8189 * pmap_copy_page copies the specified (machine independent) pages.
8192 pmap_copy_part_page(
8194 vm_offset_t src_offset
,
8196 vm_offset_t dst_offset
,
8199 bcopy_phys((addr64_t
) (ptoa(psrc
) + src_offset
),
8200 (addr64_t
) (ptoa(pdst
) + dst_offset
),
8206 * pmap_zero_page zeros the specified (machine independent) page.
8212 assert(pn
!= vm_page_fictitious_addr
);
8213 bzero_phys((addr64_t
) ptoa(pn
), PAGE_SIZE
);
8217 * pmap_zero_part_page
8218 * zeros the specified (machine independent) part of a page.
8221 pmap_zero_part_page(
8226 assert(pn
!= vm_page_fictitious_addr
);
8227 assert(offset
+ len
<= PAGE_SIZE
);
8228 bzero_phys((addr64_t
) (ptoa(pn
) + offset
), len
);
8233 * nop in current arm implementation
8237 __unused thread_t t
)
8245 pt_entry_t
*ptep
, pte
;
8247 ptep
= pmap_pte(kernel_pmap
, LOWGLOBAL_ALIAS
);
8248 assert(ptep
!= PT_ENTRY_NULL
);
8249 assert(*ptep
== ARM_PTE_EMPTY
);
8251 pte
= pa_to_pte(ml_static_vtop((vm_offset_t
)&lowGlo
)) | AP_RONA
| ARM_PTE_NX
| ARM_PTE_PNX
| ARM_PTE_AF
| ARM_PTE_TYPE
;
8252 #if __ARM_KERNEL_PROTECT__
8254 #endif /* __ARM_KERNEL_PROTECT__ */
8255 pte
|= ARM_PTE_ATTRINDX(CACHE_ATTRINDX_WRITEBACK
);
8256 #if (__ARM_VMSA__ > 7)
8257 pte
|= ARM_PTE_SH(SH_OUTER_MEMORY
);
8262 FLUSH_PTE_RANGE(ptep
, (ptep
+ 1));
8263 PMAP_UPDATE_TLBS(kernel_pmap
, LOWGLOBAL_ALIAS
, LOWGLOBAL_ALIAS
+ PAGE_SIZE
);
8267 pmap_cpu_windows_copy_addr(int cpu_num
, unsigned int index
)
8269 if (__improbable(index
>= CPUWINDOWS_MAX
)) {
8270 panic("%s: invalid index %u", __func__
, index
);
8272 return (vm_offset_t
)(CPUWINDOWS_BASE
+ (PAGE_SIZE
* ((CPUWINDOWS_MAX
* cpu_num
) + index
)));
8275 MARK_AS_PMAP_TEXT
static unsigned int
8276 pmap_map_cpu_windows_copy_internal(
8279 unsigned int wimg_bits
)
8281 pt_entry_t
*ptep
= NULL
, pte
;
8282 unsigned int cpu_num
;
8284 vm_offset_t cpu_copywindow_vaddr
= 0;
8286 cpu_num
= pmap_get_cpu_data()->cpu_number
;
8288 for (i
= 0; i
< CPUWINDOWS_MAX
; i
++) {
8289 cpu_copywindow_vaddr
= pmap_cpu_windows_copy_addr(cpu_num
, i
);
8290 ptep
= pmap_pte(kernel_pmap
, cpu_copywindow_vaddr
);
8291 assert(!ARM_PTE_IS_COMPRESSED(*ptep
));
8292 if (*ptep
== ARM_PTE_TYPE_FAULT
) {
8296 if (i
== CPUWINDOWS_MAX
) {
8297 panic("pmap_map_cpu_windows_copy: out of window\n");
8300 pte
= pa_to_pte(ptoa(pn
)) | ARM_PTE_TYPE
| ARM_PTE_AF
| ARM_PTE_NX
| ARM_PTE_PNX
;
8301 #if __ARM_KERNEL_PROTECT__
8303 #endif /* __ARM_KERNEL_PROTECT__ */
8305 pte
|= wimg_to_pte(wimg_bits
);
8307 if (prot
& VM_PROT_WRITE
) {
8308 pte
|= ARM_PTE_AP(AP_RWNA
);
8310 pte
|= ARM_PTE_AP(AP_RONA
);
8313 WRITE_PTE_FAST(ptep
, pte
);
8315 * Invalidate tlb. Cover nested cpu_copywindow_vaddr usage with the interrupted context
8316 * in pmap_unmap_cpu_windows_copy() after clearing the pte and before tlb invalidate.
8318 FLUSH_PTE_STRONG(ptep
);
8319 PMAP_UPDATE_TLBS(kernel_pmap
, cpu_copywindow_vaddr
, cpu_copywindow_vaddr
+ PAGE_SIZE
);
8325 pmap_map_cpu_windows_copy(
8328 unsigned int wimg_bits
)
8330 return pmap_map_cpu_windows_copy_internal(pn
, prot
, wimg_bits
);
8333 MARK_AS_PMAP_TEXT
static void
8334 pmap_unmap_cpu_windows_copy_internal(
8338 unsigned int cpu_num
;
8339 vm_offset_t cpu_copywindow_vaddr
= 0;
8341 cpu_num
= pmap_get_cpu_data()->cpu_number
;
8343 cpu_copywindow_vaddr
= pmap_cpu_windows_copy_addr(cpu_num
, index
);
8344 /* Issue full-system DSB to ensure prior operations on the per-CPU window
8345 * (which are likely to have been on I/O memory) are complete before
8346 * tearing down the mapping. */
8347 __builtin_arm_dsb(DSB_SY
);
8348 ptep
= pmap_pte(kernel_pmap
, cpu_copywindow_vaddr
);
8349 WRITE_PTE_STRONG(ptep
, ARM_PTE_TYPE_FAULT
);
8350 PMAP_UPDATE_TLBS(kernel_pmap
, cpu_copywindow_vaddr
, cpu_copywindow_vaddr
+ PAGE_SIZE
);
8354 pmap_unmap_cpu_windows_copy(
8357 return pmap_unmap_cpu_windows_copy_internal(index
);
8361 * Indicate that a pmap is intended to be used as a nested pmap
8362 * within one or more larger address spaces. This must be set
8363 * before pmap_nest() is called with this pmap as the 'subordinate'.
8365 MARK_AS_PMAP_TEXT
static void
8366 pmap_set_nested_internal(
8369 VALIDATE_PMAP(pmap
);
8370 pmap
->nested
= TRUE
;
8377 pmap_set_nested_internal(pmap
);
8381 * pmap_trim_range(pmap, start, end)
8383 * pmap = pmap to operate on
8384 * start = start of the range
8385 * end = end of the range
8387 * Attempts to deallocate TTEs for the given range in the nested range.
8389 MARK_AS_PMAP_TEXT
static void
8396 addr64_t nested_region_start
;
8397 addr64_t nested_region_end
;
8398 addr64_t adjusted_start
;
8399 addr64_t adjusted_end
;
8400 addr64_t adjust_offmask
;
8404 if (__improbable(end
< start
)) {
8405 panic("%s: invalid address range, "
8406 "pmap=%p, start=%p, end=%p",
8408 pmap
, (void*)start
, (void*)end
);
8411 nested_region_start
= pmap
->nested
? pmap
->nested_region_subord_addr
: pmap
->nested_region_subord_addr
;
8412 nested_region_end
= nested_region_start
+ pmap
->nested_region_size
;
8414 if (__improbable((start
< nested_region_start
) || (end
> nested_region_end
))) {
8415 panic("%s: range outside nested region %p-%p, "
8416 "pmap=%p, start=%p, end=%p",
8417 __func__
, (void *)nested_region_start
, (void *)nested_region_end
,
8418 pmap
, (void*)start
, (void*)end
);
8421 /* Contract the range to TT page boundaries. */
8422 #if (__ARM_VMSA__ > 7)
8423 adjust_offmask
= ARM_TT_TWIG_OFFMASK
;
8424 #else /* (__ARM_VMSA__ > 7) */
8425 adjust_offmask
= ((ARM_TT_TWIG_SIZE
* 4) - 1);
8426 #endif /* (__ARM_VMSA__ > 7) */
8428 adjusted_start
= ((start
+ adjust_offmask
) & ~adjust_offmask
);
8429 adjusted_end
= end
& ~adjust_offmask
;
8431 /* Iterate over the range, trying to remove TTEs. */
8432 for (cur
= adjusted_start
; (cur
< adjusted_end
) && (cur
>= adjusted_start
); cur
+= ARM_TT_TWIG_SIZE
) {
8433 bool modified
= false;
8437 tte_p
= pmap_tte(pmap
, cur
);
8439 if (tte_p
== (tt_entry_t
*) NULL
) {
8443 if ((*tte_p
& ARM_TTE_TYPE_MASK
) == ARM_TTE_TYPE_TABLE
) {
8444 pte_p
= (pt_entry_t
*) ttetokv(*tte_p
);
8446 #if (__ARM_VMSA__ == 7)
8447 if ((ptep_get_ptd(pte_p
)->pt_cnt
[ARM_PT_DESC_INDEX(pte_p
)].refcnt
== 0) &&
8448 (pmap
!= kernel_pmap
)) {
8449 if (pmap
->nested
== TRUE
) {
8450 /* Deallocate for the nested map. */
8451 pmap_tte_deallocate(pmap
, tte_p
, PMAP_TT_L1_LEVEL
);
8453 /* Just remove for the parent map. */
8454 pmap_tte_remove(pmap
, tte_p
, PMAP_TT_L1_LEVEL
);
8457 flush_mmu_tlb_entry((cur
& ~ARM_TT_L1_OFFMASK
) | (pmap
->asid
& 0xff));
8461 if ((ptep_get_ptd(pte_p
)->pt_cnt
[ARM_PT_DESC_INDEX(pte_p
)].refcnt
== 0) &&
8462 (pmap
!= kernel_pmap
)) {
8463 if (pmap
->nested
== TRUE
) {
8464 /* Deallocate for the nested map. */
8465 pmap_tte_deallocate(pmap
, tte_p
, PMAP_TT_L2_LEVEL
);
8467 /* Just remove for the parent map. */
8468 pmap_tte_remove(pmap
, tte_p
, PMAP_TT_L2_LEVEL
);
8471 flush_mmu_tlb_entry(tlbi_addr(cur
& ~ARM_TT_L2_OFFMASK
) | tlbi_asid(pmap
->asid
));
8481 PMAP_UPDATE_TLBS(pmap
, cur
, cur
+ PAGE_SIZE
);
8485 #if (__ARM_VMSA__ > 7)
8486 /* Remove empty L2 TTs. */
8487 adjusted_start
= ((start
+ ARM_TT_L1_OFFMASK
) & ~ARM_TT_L1_OFFMASK
);
8488 adjusted_end
= end
& ~ARM_TT_L1_OFFMASK
;
8490 for (cur
= adjusted_start
; (cur
< adjusted_end
) && (cur
>= adjusted_start
); cur
+= ARM_TT_L1_SIZE
) {
8491 /* For each L1 entry in our range... */
8494 bool remove_tt1e
= true;
8495 tt_entry_t
* tt1e_p
= pmap_tt1e(pmap
, cur
);
8496 tt_entry_t
* tt2e_start
;
8497 tt_entry_t
* tt2e_end
;
8498 tt_entry_t
* tt2e_p
;
8501 if (tt1e_p
== NULL
) {
8508 if (tt1e
== ARM_TTE_TYPE_FAULT
) {
8513 tt2e_start
= &((tt_entry_t
*) phystokv(tt1e
& ARM_TTE_TABLE_MASK
))[0];
8514 tt2e_end
= &tt2e_start
[TTE_PGENTRIES
];
8516 for (tt2e_p
= tt2e_start
; tt2e_p
< tt2e_end
; tt2e_p
++) {
8517 if (*tt2e_p
!= ARM_TTE_TYPE_FAULT
) {
8519 * If any TTEs are populated, don't remove the
8522 remove_tt1e
= false;
8527 pmap_tte_deallocate(pmap
, tt1e_p
, PMAP_TT_L1_LEVEL
);
8528 PMAP_UPDATE_TLBS(pmap
, cur
, cur
+ PAGE_SIZE
);
8533 #endif /* (__ARM_VMSA__ > 7) */
8537 * pmap_trim_internal(grand, subord, vstart, nstart, size)
8539 * grand = pmap subord is nested in
8540 * subord = nested pmap
8541 * vstart = start of the used range in grand
8542 * nstart = start of the used range in nstart
8543 * size = size of the used range
8545 * Attempts to trim the shared region page tables down to only cover the given
8546 * range in subord and grand.
8548 MARK_AS_PMAP_TEXT
static void
8556 addr64_t vend
, nend
;
8557 addr64_t adjust_offmask
;
8559 if (__improbable(os_add_overflow(vstart
, size
, &vend
))) {
8560 panic("%s: grand addr wraps around, "
8561 "grand=%p, subord=%p, vstart=%p, nstart=%p, size=%#llx",
8562 __func__
, grand
, subord
, (void*)vstart
, (void*)nstart
, size
);
8565 if (__improbable(os_add_overflow(nstart
, size
, &nend
))) {
8566 panic("%s: nested addr wraps around, "
8567 "grand=%p, subord=%p, vstart=%p, nstart=%p, size=%#llx",
8568 __func__
, grand
, subord
, (void*)vstart
, (void*)nstart
, size
);
8571 VALIDATE_PMAP(grand
);
8572 VALIDATE_PMAP(subord
);
8576 if (!subord
->nested
) {
8577 panic("%s: subord is not nestable, "
8578 "grand=%p, subord=%p, vstart=%p, nstart=%p, size=%#llx",
8579 __func__
, grand
, subord
, (void*)vstart
, (void*)nstart
, size
);
8582 if (grand
->nested
) {
8583 panic("%s: grand is nestable, "
8584 "grand=%p, subord=%p, vstart=%p, nstart=%p, size=%#llx",
8585 __func__
, grand
, subord
, (void*)vstart
, (void*)nstart
, size
);
8588 if (grand
->nested_pmap
!= subord
) {
8589 panic("%s: grand->nested != subord, "
8590 "grand=%p, subord=%p, vstart=%p, nstart=%p, size=%#llx",
8591 __func__
, grand
, subord
, (void*)vstart
, (void*)nstart
, size
);
8595 if ((vstart
< grand
->nested_region_grand_addr
) || (vend
> (grand
->nested_region_grand_addr
+ grand
->nested_region_size
))) {
8596 panic("%s: grand range not in nested region, "
8597 "grand=%p, subord=%p, vstart=%p, nstart=%p, size=%#llx",
8598 __func__
, grand
, subord
, (void*)vstart
, (void*)nstart
, size
);
8601 if ((nstart
< grand
->nested_region_grand_addr
) || (nend
> (grand
->nested_region_grand_addr
+ grand
->nested_region_size
))) {
8602 panic("%s: subord range not in nested region, "
8603 "grand=%p, subord=%p, vstart=%p, nstart=%p, size=%#llx",
8604 __func__
, grand
, subord
, (void*)vstart
, (void*)nstart
, size
);
8609 if (!grand
->nested_has_no_bounds_ref
) {
8610 assert(subord
->nested_bounds_set
);
8612 if (!grand
->nested_bounds_set
) {
8613 /* Inherit the bounds from subord. */
8614 grand
->nested_region_true_start
= (subord
->nested_region_true_start
- grand
->nested_region_subord_addr
) + grand
->nested_region_grand_addr
;
8615 grand
->nested_region_true_end
= (subord
->nested_region_true_end
- grand
->nested_region_subord_addr
) + grand
->nested_region_grand_addr
;
8616 grand
->nested_bounds_set
= true;
8619 PMAP_UNLOCK(subord
);
8623 if ((!subord
->nested_bounds_set
) && size
) {
8624 #if (__ARM_VMSA__ > 7)
8625 adjust_offmask
= ARM_TT_TWIG_OFFMASK
;
8626 #else /* (__ARM_VMSA__ > 7) */
8627 adjust_offmask
= ((ARM_TT_TWIG_SIZE
* 4) - 1);
8628 #endif /* (__ARM_VMSA__ > 7) */
8630 subord
->nested_region_true_start
= nstart
;
8631 subord
->nested_region_true_end
= nend
;
8632 subord
->nested_region_true_start
&= ~adjust_offmask
;
8634 if (__improbable(os_add_overflow(subord
->nested_region_true_end
, adjust_offmask
, &subord
->nested_region_true_end
))) {
8635 panic("%s: padded true end wraps around, "
8636 "grand=%p, subord=%p, vstart=%p, nstart=%p, size=%#llx",
8637 __func__
, grand
, subord
, (void*)vstart
, (void*)nstart
, size
);
8640 subord
->nested_region_true_end
&= ~adjust_offmask
;
8641 subord
->nested_bounds_set
= true;
8644 if (subord
->nested_bounds_set
) {
8645 /* Inherit the bounds from subord. */
8646 grand
->nested_region_true_start
= (subord
->nested_region_true_start
- grand
->nested_region_subord_addr
) + grand
->nested_region_grand_addr
;
8647 grand
->nested_region_true_end
= (subord
->nested_region_true_end
- grand
->nested_region_subord_addr
) + grand
->nested_region_grand_addr
;
8648 grand
->nested_bounds_set
= true;
8650 /* If we know the bounds, we can trim the pmap. */
8651 grand
->nested_has_no_bounds_ref
= false;
8652 PMAP_UNLOCK(subord
);
8654 /* Don't trim if we don't know the bounds. */
8655 PMAP_UNLOCK(subord
);
8659 /* Trim grand to only cover the given range. */
8660 pmap_trim_range(grand
, grand
->nested_region_grand_addr
, grand
->nested_region_true_start
);
8661 pmap_trim_range(grand
, grand
->nested_region_true_end
, (grand
->nested_region_grand_addr
+ grand
->nested_region_size
));
8663 /* Try to trim subord. */
8664 pmap_trim_subord(subord
);
8667 MARK_AS_PMAP_TEXT
static void
8668 pmap_trim_self(pmap_t pmap
)
8670 if (pmap
->nested_has_no_bounds_ref
&& pmap
->nested_pmap
) {
8671 /* If we have a no bounds ref, we need to drop it. */
8672 PMAP_LOCK(pmap
->nested_pmap
);
8673 pmap
->nested_has_no_bounds_ref
= false;
8674 boolean_t nested_bounds_set
= pmap
->nested_pmap
->nested_bounds_set
;
8675 vm_map_offset_t nested_region_true_start
= (pmap
->nested_pmap
->nested_region_true_start
- pmap
->nested_region_subord_addr
) + pmap
->nested_region_grand_addr
;
8676 vm_map_offset_t nested_region_true_end
= (pmap
->nested_pmap
->nested_region_true_end
- pmap
->nested_region_subord_addr
) + pmap
->nested_region_grand_addr
;
8677 PMAP_UNLOCK(pmap
->nested_pmap
);
8679 if (nested_bounds_set
) {
8680 pmap_trim_range(pmap
, pmap
->nested_region_grand_addr
, nested_region_true_start
);
8681 pmap_trim_range(pmap
, nested_region_true_end
, (pmap
->nested_region_grand_addr
+ pmap
->nested_region_size
));
8684 * Try trimming the nested pmap, in case we had the
8687 pmap_trim_subord(pmap
->nested_pmap
);
8692 * pmap_trim_subord(grand, subord)
8694 * grand = pmap that we have nested subord in
8695 * subord = nested pmap we are attempting to trim
8697 * Trims subord if possible
8699 MARK_AS_PMAP_TEXT
static void
8700 pmap_trim_subord(pmap_t subord
)
8702 bool contract_subord
= false;
8706 subord
->nested_no_bounds_refcnt
--;
8708 if ((subord
->nested_no_bounds_refcnt
== 0) && (subord
->nested_bounds_set
)) {
8709 /* If this was the last no bounds reference, trim subord. */
8710 contract_subord
= true;
8713 PMAP_UNLOCK(subord
);
8715 if (contract_subord
) {
8716 pmap_trim_range(subord
, subord
->nested_region_subord_addr
, subord
->nested_region_true_start
);
8717 pmap_trim_range(subord
, subord
->nested_region_true_end
, subord
->nested_region_subord_addr
+ subord
->nested_region_size
);
8729 pmap_trim_internal(grand
, subord
, vstart
, nstart
, size
);
8733 * kern_return_t pmap_nest(grand, subord, vstart, size)
8735 * grand = the pmap that we will nest subord into
8736 * subord = the pmap that goes into the grand
8737 * vstart = start of range in pmap to be inserted
8738 * nstart = start of range in pmap nested pmap
8739 * size = Size of nest area (up to 16TB)
8741 * Inserts a pmap into another. This is used to implement shared segments.
8745 MARK_AS_PMAP_TEXT
static kern_return_t
8753 kern_return_t kr
= KERN_FAILURE
;
8754 vm_map_offset_t vaddr
, nvaddr
;
8758 unsigned int num_tte
;
8759 unsigned int nested_region_asid_bitmap_size
;
8760 unsigned int* nested_region_asid_bitmap
;
8761 int expand_options
= 0;
8763 addr64_t vend
, nend
;
8764 if (__improbable(os_add_overflow(vstart
, size
, &vend
))) {
8765 panic("%s: %p grand addr wraps around: 0x%llx + 0x%llx", __func__
, grand
, vstart
, size
);
8767 if (__improbable(os_add_overflow(nstart
, size
, &nend
))) {
8768 panic("%s: %p nested addr wraps around: 0x%llx + 0x%llx", __func__
, subord
, nstart
, size
);
8770 VALIDATE_PMAP(grand
);
8771 VALIDATE_PMAP(subord
);
8774 #if (__ARM_VMSA__ == 7)
8775 if (((size
| vstart
| nstart
) & ARM_TT_L1_PT_OFFMASK
) != 0x0ULL
) {
8776 return KERN_INVALID_VALUE
; /* Nest 4MB region */
8779 if (((size
| vstart
| nstart
) & (ARM_TT_L2_OFFMASK
)) != 0x0ULL
) {
8780 panic("pmap_nest() pmap %p unaligned nesting request 0x%llx, 0x%llx, 0x%llx\n", grand
, vstart
, nstart
, size
);
8784 if (!subord
->nested
) {
8785 panic("%s: subordinate pmap %p is not nestable", __func__
, subord
);
8788 if ((grand
->nested_pmap
!= PMAP_NULL
) && (grand
->nested_pmap
!= subord
)) {
8789 panic("pmap_nest() pmap %p has a nested pmap\n", grand
);
8792 if (subord
->nested_region_asid_bitmap
== NULL
) {
8793 nested_region_asid_bitmap_size
= (unsigned int)(size
>> ARM_TT_TWIG_SHIFT
) / (sizeof(unsigned int) * NBBY
);
8795 nested_region_asid_bitmap
= kalloc(nested_region_asid_bitmap_size
* sizeof(unsigned int));
8796 bzero(nested_region_asid_bitmap
, nested_region_asid_bitmap_size
* sizeof(unsigned int));
8799 if (subord
->nested_region_asid_bitmap
== NULL
) {
8800 subord
->nested_region_asid_bitmap
= nested_region_asid_bitmap
;
8801 subord
->nested_region_asid_bitmap_size
= nested_region_asid_bitmap_size
;
8802 subord
->nested_region_subord_addr
= nstart
;
8803 subord
->nested_region_size
= (mach_vm_offset_t
) size
;
8804 nested_region_asid_bitmap
= NULL
;
8806 PMAP_UNLOCK(subord
);
8807 if (nested_region_asid_bitmap
!= NULL
) {
8808 kfree(nested_region_asid_bitmap
, nested_region_asid_bitmap_size
* sizeof(unsigned int));
8811 if ((subord
->nested_region_subord_addr
+ subord
->nested_region_size
) < nend
) {
8813 unsigned int new_nested_region_asid_bitmap_size
;
8814 unsigned int* new_nested_region_asid_bitmap
;
8816 nested_region_asid_bitmap
= NULL
;
8817 nested_region_asid_bitmap_size
= 0;
8818 new_size
= nend
- subord
->nested_region_subord_addr
;
8820 /* We explicitly add 1 to the bitmap allocation size in order to avoid issues with truncation. */
8821 new_nested_region_asid_bitmap_size
= (unsigned int)((new_size
>> ARM_TT_TWIG_SHIFT
) / (sizeof(unsigned int) * NBBY
)) + 1;
8823 new_nested_region_asid_bitmap
= kalloc(new_nested_region_asid_bitmap_size
* sizeof(unsigned int));
8825 if (subord
->nested_region_size
< new_size
) {
8826 bzero(new_nested_region_asid_bitmap
, new_nested_region_asid_bitmap_size
* sizeof(unsigned int));
8827 bcopy(subord
->nested_region_asid_bitmap
, new_nested_region_asid_bitmap
, subord
->nested_region_asid_bitmap_size
);
8828 nested_region_asid_bitmap_size
= subord
->nested_region_asid_bitmap_size
;
8829 nested_region_asid_bitmap
= subord
->nested_region_asid_bitmap
;
8830 subord
->nested_region_asid_bitmap
= new_nested_region_asid_bitmap
;
8831 subord
->nested_region_asid_bitmap_size
= new_nested_region_asid_bitmap_size
;
8832 subord
->nested_region_size
= new_size
;
8833 new_nested_region_asid_bitmap
= NULL
;
8835 PMAP_UNLOCK(subord
);
8836 if (nested_region_asid_bitmap
!= NULL
)
8837 { kfree(nested_region_asid_bitmap
, nested_region_asid_bitmap_size
* sizeof(unsigned int));}
8838 if (new_nested_region_asid_bitmap
!= NULL
)
8839 { kfree(new_nested_region_asid_bitmap
, new_nested_region_asid_bitmap_size
* sizeof(unsigned int));}
8843 if (grand
->nested_pmap
== PMAP_NULL
) {
8844 grand
->nested_pmap
= subord
;
8846 if (!subord
->nested_bounds_set
) {
8848 * We are nesting without the shared regions bounds
8849 * being known. We'll have to trim the pmap later.
8851 grand
->nested_has_no_bounds_ref
= true;
8852 subord
->nested_no_bounds_refcnt
++;
8855 grand
->nested_region_grand_addr
= vstart
;
8856 grand
->nested_region_subord_addr
= nstart
;
8857 grand
->nested_region_size
= (mach_vm_offset_t
) size
;
8859 if ((grand
->nested_region_grand_addr
> vstart
)) {
8860 panic("pmap_nest() pmap %p : attempt to nest outside the nested region\n", grand
);
8861 } else if ((grand
->nested_region_grand_addr
+ grand
->nested_region_size
) < vend
) {
8862 grand
->nested_region_size
= (mach_vm_offset_t
)(vstart
- grand
->nested_region_grand_addr
+ size
);
8866 #if (__ARM_VMSA__ == 7)
8867 nvaddr
= (vm_map_offset_t
) nstart
;
8868 vaddr
= (vm_map_offset_t
) vstart
;
8869 num_tte
= size
>> ARM_TT_L1_SHIFT
;
8871 for (i
= 0; i
< num_tte
; i
++) {
8872 if (((subord
->nested_region_true_start
) > nvaddr
) || ((subord
->nested_region_true_end
) <= nvaddr
)) {
8876 stte_p
= pmap_tte(subord
, nvaddr
);
8877 if ((stte_p
== (tt_entry_t
*)NULL
) || (((*stte_p
) & ARM_TTE_TYPE_MASK
) != ARM_TTE_TYPE_TABLE
)) {
8878 PMAP_UNLOCK(subord
);
8879 kr
= pmap_expand(subord
, nvaddr
, expand_options
, PMAP_TT_L2_LEVEL
);
8881 if (kr
!= KERN_SUCCESS
) {
8888 PMAP_UNLOCK(subord
);
8890 stte_p
= pmap_tte(grand
, vaddr
);
8891 if (stte_p
== (tt_entry_t
*)NULL
) {
8893 kr
= pmap_expand(grand
, vaddr
, expand_options
, PMAP_TT_L1_LEVEL
);
8895 if (kr
!= KERN_SUCCESS
) {
8906 nvaddr
+= ARM_TT_L1_SIZE
;
8907 vaddr
+= ARM_TT_L1_SIZE
;
8911 nvaddr
= (vm_map_offset_t
) nstart
;
8912 num_tte
= (unsigned int)(size
>> ARM_TT_L2_SHIFT
);
8914 for (i
= 0; i
< num_tte
; i
++) {
8915 if (((subord
->nested_region_true_start
) > nvaddr
) || ((subord
->nested_region_true_end
) <= nvaddr
)) {
8919 stte_p
= pmap_tt2e(subord
, nvaddr
);
8920 if (stte_p
== PT_ENTRY_NULL
|| *stte_p
== ARM_TTE_EMPTY
) {
8921 PMAP_UNLOCK(subord
);
8922 kr
= pmap_expand(subord
, nvaddr
, expand_options
, PMAP_TT_L3_LEVEL
);
8924 if (kr
!= KERN_SUCCESS
) {
8932 nvaddr
+= ARM_TT_L2_SIZE
;
8935 PMAP_UNLOCK(subord
);
8938 * copy tte's from subord pmap into grand pmap
8942 nvaddr
= (vm_map_offset_t
) nstart
;
8943 vaddr
= (vm_map_offset_t
) vstart
;
8946 #if (__ARM_VMSA__ == 7)
8947 for (i
= 0; i
< num_tte
; i
++) {
8948 if (((subord
->nested_region_true_start
) > nvaddr
) || ((subord
->nested_region_true_end
) <= nvaddr
)) {
8952 stte_p
= pmap_tte(subord
, nvaddr
);
8953 gtte_p
= pmap_tte(grand
, vaddr
);
8957 nvaddr
+= ARM_TT_L1_SIZE
;
8958 vaddr
+= ARM_TT_L1_SIZE
;
8961 for (i
= 0; i
< num_tte
; i
++) {
8962 if (((subord
->nested_region_true_start
) > nvaddr
) || ((subord
->nested_region_true_end
) <= nvaddr
)) {
8966 stte_p
= pmap_tt2e(subord
, nvaddr
);
8967 gtte_p
= pmap_tt2e(grand
, vaddr
);
8968 if (gtte_p
== PT_ENTRY_NULL
) {
8970 kr
= pmap_expand(grand
, vaddr
, expand_options
, PMAP_TT_L2_LEVEL
);
8973 if (kr
!= KERN_SUCCESS
) {
8977 gtte_p
= pmap_tt2e(grand
, vaddr
);
8982 vaddr
+= ARM_TT_L2_SIZE
;
8983 nvaddr
+= ARM_TT_L2_SIZE
;
8990 stte_p
= pmap_tte(grand
, vstart
);
8991 FLUSH_PTE_RANGE_STRONG(stte_p
, stte_p
+ num_tte
);
8993 #if (__ARM_VMSA__ > 7)
8995 * check for overflow on LP64 arch
8997 assert((size
& 0xFFFFFFFF00000000ULL
) == 0);
8999 PMAP_UPDATE_TLBS(grand
, vstart
, vend
);
9013 kern_return_t kr
= KERN_FAILURE
;
9015 PMAP_TRACE(2, PMAP_CODE(PMAP__NEST
) | DBG_FUNC_START
,
9016 VM_KERNEL_ADDRHIDE(grand
), VM_KERNEL_ADDRHIDE(subord
),
9017 VM_KERNEL_ADDRHIDE(vstart
));
9019 kr
= pmap_nest_internal(grand
, subord
, vstart
, nstart
, size
);
9021 PMAP_TRACE(2, PMAP_CODE(PMAP__NEST
) | DBG_FUNC_END
, kr
);
9027 * kern_return_t pmap_unnest(grand, vaddr)
9029 * grand = the pmap that will have the virtual range unnested
9030 * vaddr = start of range in pmap to be unnested
9031 * size = size of range in pmap to be unnested
9041 return pmap_unnest_options(grand
, vaddr
, size
, 0);
9044 MARK_AS_PMAP_TEXT
static kern_return_t
9045 pmap_unnest_options_internal(
9049 unsigned int option
)
9051 vm_map_offset_t start
;
9052 vm_map_offset_t addr
;
9054 unsigned int current_index
;
9055 unsigned int start_index
;
9056 unsigned int max_index
;
9057 unsigned int num_tte
;
9061 if (__improbable(os_add_overflow(vaddr
, size
, &vend
))) {
9062 panic("%s: %p vaddr wraps around: 0x%llx + 0x%llx", __func__
, grand
, vaddr
, size
);
9065 VALIDATE_PMAP(grand
);
9067 #if (__ARM_VMSA__ == 7)
9068 if (((size
| vaddr
) & ARM_TT_L1_PT_OFFMASK
) != 0x0ULL
) {
9069 panic("pmap_unnest(): unaligned request\n");
9072 if (((size
| vaddr
) & ARM_TT_L2_OFFMASK
) != 0x0ULL
) {
9073 panic("pmap_unnest(): unaligned request\n");
9077 if ((option
& PMAP_UNNEST_CLEAN
) == 0) {
9078 if (grand
->nested_pmap
== NULL
) {
9079 panic("%s: %p has no nested pmap", __func__
, grand
);
9082 if ((vaddr
< grand
->nested_region_grand_addr
) || (vend
> (grand
->nested_region_grand_addr
+ grand
->nested_region_size
))) {
9083 panic("%s: %p: unnest request to region not-fully-nested region [%p, %p)", __func__
, grand
, (void*)vaddr
, (void*)vend
);
9086 PMAP_LOCK(grand
->nested_pmap
);
9088 start
= vaddr
- grand
->nested_region_grand_addr
+ grand
->nested_region_subord_addr
;
9089 start_index
= (unsigned int)((vaddr
- grand
->nested_region_grand_addr
) >> ARM_TT_TWIG_SHIFT
);
9090 max_index
= (unsigned int)(start_index
+ (size
>> ARM_TT_TWIG_SHIFT
));
9091 num_tte
= (unsigned int)(size
>> ARM_TT_TWIG_SHIFT
);
9093 for (current_index
= start_index
, addr
= start
; current_index
< max_index
; current_index
++, addr
+= ARM_TT_TWIG_SIZE
) {
9094 pt_entry_t
*bpte
, *epte
, *cpte
;
9096 if (addr
< grand
->nested_pmap
->nested_region_true_start
) {
9097 /* We haven't reached the interesting range. */
9101 if (addr
>= grand
->nested_pmap
->nested_region_true_end
) {
9102 /* We're done with the interesting range. */
9106 bpte
= pmap_pte(grand
->nested_pmap
, addr
);
9107 epte
= bpte
+ (ARM_TT_LEAF_INDEX_MASK
>> ARM_TT_LEAF_SHIFT
);
9109 if (!testbit(current_index
, (int *)grand
->nested_pmap
->nested_region_asid_bitmap
)) {
9110 setbit(current_index
, (int *)grand
->nested_pmap
->nested_region_asid_bitmap
);
9112 for (cpte
= bpte
; cpte
<= epte
; cpte
++) {
9115 boolean_t managed
= FALSE
;
9118 if ((*cpte
!= ARM_PTE_TYPE_FAULT
)
9119 && (!ARM_PTE_IS_COMPRESSED(*cpte
))) {
9122 pa
= pte_to_pa(spte
);
9123 if (!pa_valid(pa
)) {
9126 pai
= (int)pa_index(pa
);
9129 pa
= pte_to_pa(spte
);
9130 if (pai
== (int)pa_index(pa
)) {
9132 break; // Leave the PVH locked as we'll unlock it after we update the PTE
9137 if (((spte
& ARM_PTE_NG
) != ARM_PTE_NG
)) {
9138 WRITE_PTE_FAST(cpte
, (spte
| ARM_PTE_NG
));
9142 ASSERT_PVH_LOCKED(pai
);
9149 FLUSH_PTE_RANGE_STRONG(bpte
, epte
);
9150 flush_mmu_tlb_region_asid_async(start
, (unsigned)size
, grand
->nested_pmap
);
9155 PMAP_UNLOCK(grand
->nested_pmap
);
9161 * invalidate all pdes for segment at vaddr in pmap grand
9166 num_tte
= (unsigned int)(size
>> ARM_TT_TWIG_SHIFT
);
9168 for (i
= 0; i
< num_tte
; i
++, addr
+= ARM_TT_TWIG_SIZE
) {
9169 if (addr
< grand
->nested_pmap
->nested_region_true_start
) {
9170 /* We haven't reached the interesting range. */
9174 if (addr
>= grand
->nested_pmap
->nested_region_true_end
) {
9175 /* We're done with the interesting range. */
9179 tte_p
= pmap_tte(grand
, addr
);
9180 *tte_p
= ARM_TTE_TYPE_FAULT
;
9183 tte_p
= pmap_tte(grand
, start
);
9184 FLUSH_PTE_RANGE_STRONG(tte_p
, tte_p
+ num_tte
);
9185 PMAP_UPDATE_TLBS(grand
, start
, vend
);
9189 return KERN_SUCCESS
;
9193 pmap_unnest_options(
9197 unsigned int option
)
9199 kern_return_t kr
= KERN_FAILURE
;
9201 PMAP_TRACE(2, PMAP_CODE(PMAP__UNNEST
) | DBG_FUNC_START
,
9202 VM_KERNEL_ADDRHIDE(grand
), VM_KERNEL_ADDRHIDE(vaddr
));
9204 kr
= pmap_unnest_options_internal(grand
, vaddr
, size
, option
);
9206 PMAP_TRACE(2, PMAP_CODE(PMAP__UNNEST
) | DBG_FUNC_END
, kr
);
9212 pmap_adjust_unnest_parameters(
9214 __unused vm_map_offset_t
*s
,
9215 __unused vm_map_offset_t
*e
)
9217 return TRUE
; /* to get to log_unnest_badness()... */
9221 * disable no-execute capability on
9222 * the specified pmap
9224 #if DEVELOPMENT || DEBUG
9229 pmap
->nx_enabled
= FALSE
;
9234 __unused pmap_t pmap
)
9243 pt_fake_zone_index
= zone_index
;
9249 vm_size_t
*cur_size
, vm_size_t
*max_size
, vm_size_t
*elem_size
, vm_size_t
*alloc_size
,
9250 uint64_t *sum_size
, int *collectable
, int *exhaustable
, int *caller_acct
)
9252 *count
= inuse_pmap_pages_count
;
9253 *cur_size
= PAGE_SIZE
* (inuse_pmap_pages_count
);
9254 *max_size
= PAGE_SIZE
* (inuse_pmap_pages_count
+ vm_page_inactive_count
+ vm_page_active_count
+ vm_page_free_count
);
9255 *elem_size
= PAGE_SIZE
;
9256 *alloc_size
= PAGE_SIZE
;
9257 *sum_size
= (alloc_pmap_pages_count
) * PAGE_SIZE
;
9265 * flush a range of hardware TLB entries.
9266 * NOTE: assumes the smallest TLB entry in use will be for
9267 * an ARM small page (4K).
9270 #define ARM_FULL_TLB_FLUSH_THRESHOLD 64
9271 #define ARM64_FULL_TLB_FLUSH_THRESHOLD 256
9274 flush_mmu_tlb_region_asid_async(
9279 #if (__ARM_VMSA__ == 7)
9280 vm_offset_t end
= va
+ length
;
9285 if (length
/ ARM_SMALL_PAGE_SIZE
> ARM_FULL_TLB_FLUSH_THRESHOLD
) {
9286 boolean_t flush_all
= FALSE
;
9288 if ((asid
== 0) || (pmap
->nested
== TRUE
)) {
9292 flush_mmu_tlb_async();
9294 flush_mmu_tlb_asid_async(asid
);
9299 if (pmap
->nested
== TRUE
) {
9303 va
= arm_trunc_page(va
);
9305 flush_mmu_tlb_mva_entries_async(va
);
9306 va
+= ARM_SMALL_PAGE_SIZE
;
9311 va
= arm_trunc_page(va
) | (asid
& 0xff);
9312 flush_mmu_tlb_entries_async(va
, end
);
9315 vm_offset_t end
= va
+ length
;
9320 if ((length
>> ARM_TT_L3_SHIFT
) > ARM64_FULL_TLB_FLUSH_THRESHOLD
) {
9321 boolean_t flush_all
= FALSE
;
9323 if ((asid
== 0) || (pmap
->nested
== TRUE
)) {
9327 flush_mmu_tlb_async();
9329 flush_mmu_tlb_asid_async((uint64_t)asid
<< TLBI_ASID_SHIFT
);
9333 va
= tlbi_asid(asid
) | tlbi_addr(va
);
9334 end
= tlbi_asid(asid
) | tlbi_addr(end
);
9335 if (pmap
->nested
== TRUE
) {
9336 flush_mmu_tlb_allentries_async(va
, end
);
9338 flush_mmu_tlb_entries_async(va
, end
);
9345 flush_mmu_tlb_region(
9349 flush_mmu_tlb_region_asid_async(va
, length
, kernel_pmap
);
9354 pmap_find_io_attr(pmap_paddr_t paddr
)
9356 pmap_io_range_t find_range
= {.addr
= paddr
, .len
= PAGE_SIZE
};
9357 unsigned int begin
= 0, end
= num_io_rgns
- 1;
9358 assert(num_io_rgns
> 0);
9361 unsigned int middle
= (begin
+ end
) / 2;
9362 int cmp
= cmp_io_rgns(&find_range
, &io_attr_table
[middle
]);
9364 return io_attr_table
[middle
].wimg
;
9365 } else if (begin
== end
) {
9367 } else if (cmp
> 0) {
9379 pmap_cache_attributes(
9384 unsigned int result
;
9385 pp_attr_t pp_attr_current
;
9389 if ((paddr
>= io_rgn_start
) && (paddr
< io_rgn_end
)) {
9390 return pmap_find_io_attr(paddr
);
9393 if (!pmap_initialized
) {
9394 if ((paddr
>= gPhysBase
) && (paddr
< gPhysBase
+ gPhysSize
)) {
9395 return VM_WIMG_DEFAULT
;
9402 if (!pa_valid(paddr
)) {
9406 result
= VM_WIMG_DEFAULT
;
9408 pai
= (int)pa_index(paddr
);
9410 pp_attr_current
= pp_attr_table
[pai
];
9411 if (pp_attr_current
& PP_ATTR_WIMG_MASK
) {
9412 result
= pp_attr_current
& PP_ATTR_WIMG_MASK
;
9417 MARK_AS_PMAP_TEXT
static void
9418 pmap_sync_wimg(ppnum_t pn
, unsigned int wimg_bits_prev
, unsigned int wimg_bits_new
)
9420 if ((wimg_bits_prev
!= wimg_bits_new
)
9421 && ((wimg_bits_prev
== VM_WIMG_COPYBACK
)
9422 || ((wimg_bits_prev
== VM_WIMG_INNERWBACK
)
9423 && (wimg_bits_new
!= VM_WIMG_COPYBACK
))
9424 || ((wimg_bits_prev
== VM_WIMG_WTHRU
)
9425 && ((wimg_bits_new
!= VM_WIMG_COPYBACK
) || (wimg_bits_new
!= VM_WIMG_INNERWBACK
))))) {
9426 pmap_sync_page_attributes_phys(pn
);
9429 if ((wimg_bits_new
== VM_WIMG_RT
) && (wimg_bits_prev
!= VM_WIMG_RT
)) {
9430 pmap_force_dcache_clean(phystokv(ptoa(pn
)), PAGE_SIZE
);
9434 MARK_AS_PMAP_TEXT
static __unused
void
9435 pmap_update_compressor_page_internal(ppnum_t pn
, unsigned int prev_cacheattr
, unsigned int new_cacheattr
)
9437 pmap_paddr_t paddr
= ptoa(pn
);
9438 int pai
= (int)pa_index(paddr
);
9440 if (__improbable(!pa_valid(paddr
))) {
9441 panic("%s called on non-managed page 0x%08x", __func__
, pn
);
9447 pmap_update_cache_attributes_locked(pn
, new_cacheattr
);
9451 pmap_sync_wimg(pn
, prev_cacheattr
& VM_WIMG_MASK
, new_cacheattr
& VM_WIMG_MASK
);
9455 pmap_map_compressor_page(ppnum_t pn
)
9457 #if __ARM_PTE_PHYSMAP__
9458 unsigned int cacheattr
= pmap_cache_attributes(pn
) & VM_WIMG_MASK
;
9459 if (cacheattr
!= VM_WIMG_DEFAULT
) {
9460 pmap_update_compressor_page_internal(pn
, cacheattr
, VM_WIMG_DEFAULT
);
9463 return (void*)phystokv(ptoa(pn
));
9467 pmap_unmap_compressor_page(ppnum_t pn __unused
, void *kva __unused
)
9469 #if __ARM_PTE_PHYSMAP__
9470 unsigned int cacheattr
= pmap_cache_attributes(pn
) & VM_WIMG_MASK
;
9471 if (cacheattr
!= VM_WIMG_DEFAULT
) {
9472 pmap_update_compressor_page_internal(pn
, VM_WIMG_DEFAULT
, cacheattr
);
9477 MARK_AS_PMAP_TEXT
static boolean_t
9478 pmap_batch_set_cache_attributes_internal(
9480 unsigned int cacheattr
,
9481 unsigned int page_cnt
,
9482 unsigned int page_index
,
9488 pp_attr_t pp_attr_current
;
9489 pp_attr_t pp_attr_template
;
9490 unsigned int wimg_bits_prev
, wimg_bits_new
;
9492 if (cacheattr
& VM_WIMG_USE_DEFAULT
) {
9493 cacheattr
= VM_WIMG_DEFAULT
;
9496 if ((doit
== FALSE
) && (*res
== 0)) {
9497 pmap_pin_kernel_pages((vm_offset_t
)res
, sizeof(*res
));
9499 pmap_unpin_kernel_pages((vm_offset_t
)res
, sizeof(*res
));
9500 if (platform_cache_batch_wimg(cacheattr
& (VM_WIMG_MASK
), page_cnt
<< PAGE_SHIFT
) == FALSE
) {
9507 if (!pa_valid(paddr
)) {
9508 panic("pmap_batch_set_cache_attributes(): pn 0x%08x not managed", pn
);
9511 pai
= (int)pa_index(paddr
);
9518 pp_attr_current
= pp_attr_table
[pai
];
9519 wimg_bits_prev
= VM_WIMG_DEFAULT
;
9520 if (pp_attr_current
& PP_ATTR_WIMG_MASK
) {
9521 wimg_bits_prev
= pp_attr_current
& PP_ATTR_WIMG_MASK
;
9524 pp_attr_template
= (pp_attr_current
& ~PP_ATTR_WIMG_MASK
) | PP_ATTR_WIMG(cacheattr
& (VM_WIMG_MASK
));
9530 /* WIMG bits should only be updated under the PVH lock, but we should do this in a CAS loop
9531 * to avoid losing simultaneous updates to other bits like refmod. */
9532 } while (!OSCompareAndSwap16(pp_attr_current
, pp_attr_template
, &pp_attr_table
[pai
]));
9534 wimg_bits_new
= VM_WIMG_DEFAULT
;
9535 if (pp_attr_template
& PP_ATTR_WIMG_MASK
) {
9536 wimg_bits_new
= pp_attr_template
& PP_ATTR_WIMG_MASK
;
9540 if (wimg_bits_new
!= wimg_bits_prev
) {
9541 pmap_update_cache_attributes_locked(pn
, cacheattr
);
9544 if ((wimg_bits_new
== VM_WIMG_RT
) && (wimg_bits_prev
!= VM_WIMG_RT
)) {
9545 pmap_force_dcache_clean(phystokv(paddr
), PAGE_SIZE
);
9548 if (wimg_bits_new
== VM_WIMG_COPYBACK
) {
9551 if (wimg_bits_prev
== wimg_bits_new
) {
9552 pmap_pin_kernel_pages((vm_offset_t
)res
, sizeof(*res
));
9554 pmap_unpin_kernel_pages((vm_offset_t
)res
, sizeof(*res
));
9555 if (!platform_cache_batch_wimg(wimg_bits_new
, (*res
) << PAGE_SHIFT
)) {
9562 if (page_cnt
== (page_index
+ 1)) {
9563 wimg_bits_prev
= VM_WIMG_COPYBACK
;
9564 if (((wimg_bits_prev
!= wimg_bits_new
))
9565 && ((wimg_bits_prev
== VM_WIMG_COPYBACK
)
9566 || ((wimg_bits_prev
== VM_WIMG_INNERWBACK
)
9567 && (wimg_bits_new
!= VM_WIMG_COPYBACK
))
9568 || ((wimg_bits_prev
== VM_WIMG_WTHRU
)
9569 && ((wimg_bits_new
!= VM_WIMG_COPYBACK
) || (wimg_bits_new
!= VM_WIMG_INNERWBACK
))))) {
9570 platform_cache_flush_wimg(wimg_bits_new
);
9578 pmap_batch_set_cache_attributes(
9580 unsigned int cacheattr
,
9581 unsigned int page_cnt
,
9582 unsigned int page_index
,
9586 return pmap_batch_set_cache_attributes_internal(pn
, cacheattr
, page_cnt
, page_index
, doit
, res
);
9589 MARK_AS_PMAP_TEXT
static void
9590 pmap_set_cache_attributes_priv(
9592 unsigned int cacheattr
,
9593 boolean_t external __unused
)
9597 pp_attr_t pp_attr_current
;
9598 pp_attr_t pp_attr_template
;
9599 unsigned int wimg_bits_prev
, wimg_bits_new
;
9603 if (!pa_valid(paddr
)) {
9604 return; /* Not a managed page. */
9607 if (cacheattr
& VM_WIMG_USE_DEFAULT
) {
9608 cacheattr
= VM_WIMG_DEFAULT
;
9611 pai
= (int)pa_index(paddr
);
9617 pp_attr_current
= pp_attr_table
[pai
];
9618 wimg_bits_prev
= VM_WIMG_DEFAULT
;
9619 if (pp_attr_current
& PP_ATTR_WIMG_MASK
) {
9620 wimg_bits_prev
= pp_attr_current
& PP_ATTR_WIMG_MASK
;
9623 pp_attr_template
= (pp_attr_current
& ~PP_ATTR_WIMG_MASK
) | PP_ATTR_WIMG(cacheattr
& (VM_WIMG_MASK
));
9625 /* WIMG bits should only be updated under the PVH lock, but we should do this in a CAS loop
9626 * to avoid losing simultaneous updates to other bits like refmod. */
9627 } while (!OSCompareAndSwap16(pp_attr_current
, pp_attr_template
, &pp_attr_table
[pai
]));
9629 wimg_bits_new
= VM_WIMG_DEFAULT
;
9630 if (pp_attr_template
& PP_ATTR_WIMG_MASK
) {
9631 wimg_bits_new
= pp_attr_template
& PP_ATTR_WIMG_MASK
;
9634 if (wimg_bits_new
!= wimg_bits_prev
) {
9635 pmap_update_cache_attributes_locked(pn
, cacheattr
);
9640 pmap_sync_wimg(pn
, wimg_bits_prev
, wimg_bits_new
);
9643 MARK_AS_PMAP_TEXT
static void
9644 pmap_set_cache_attributes_internal(
9646 unsigned int cacheattr
)
9648 pmap_set_cache_attributes_priv(pn
, cacheattr
, TRUE
);
9652 pmap_set_cache_attributes(
9654 unsigned int cacheattr
)
9656 pmap_set_cache_attributes_internal(pn
, cacheattr
);
9660 pmap_update_cache_attributes_locked(
9662 unsigned attributes
)
9664 pmap_paddr_t phys
= ptoa(ppnum
);
9670 boolean_t tlb_flush_needed
= FALSE
;
9672 #if __ARM_PTE_PHYSMAP__
9673 vm_offset_t kva
= phystokv(phys
);
9674 pte_p
= pmap_pte(kernel_pmap
, kva
);
9677 tmplate
&= ~(ARM_PTE_ATTRINDXMASK
| ARM_PTE_SHMASK
);
9678 tmplate
|= wimg_to_pte(attributes
);
9679 #if (__ARM_VMSA__ > 7)
9680 if (tmplate
& ARM_PTE_HINT_MASK
) {
9681 panic("%s: physical aperture PTE %p has hint bit set, va=%p, pte=0x%llx",
9682 __FUNCTION__
, pte_p
, (void *)kva
, tmplate
);
9685 WRITE_PTE_STRONG(pte_p
, tmplate
);
9686 flush_mmu_tlb_region_asid_async(kva
, PAGE_SIZE
, kernel_pmap
);
9687 tlb_flush_needed
= TRUE
;
9690 pai
= (unsigned int)pa_index(phys
);
9692 pv_h
= pai_to_pvh(pai
);
9694 pte_p
= PT_ENTRY_NULL
;
9695 pve_p
= PV_ENTRY_NULL
;
9696 if (pvh_test_type(pv_h
, PVH_TYPE_PTEP
)) {
9697 pte_p
= pvh_ptep(pv_h
);
9698 } else if (pvh_test_type(pv_h
, PVH_TYPE_PVEP
)) {
9699 pve_p
= pvh_list(pv_h
);
9700 pte_p
= PT_ENTRY_NULL
;
9703 while ((pve_p
!= PV_ENTRY_NULL
) || (pte_p
!= PT_ENTRY_NULL
)) {
9704 vm_map_address_t va
;
9707 if (pve_p
!= PV_ENTRY_NULL
) {
9708 pte_p
= pve_get_ptep(pve_p
);
9710 #ifdef PVH_FLAG_IOMMU
9711 if ((vm_offset_t
)pte_p
& PVH_FLAG_IOMMU
) {
9712 goto cache_skip_pve
;
9715 pmap
= ptep_get_pmap(pte_p
);
9716 va
= ptep_get_va(pte_p
);
9719 tmplate
&= ~(ARM_PTE_ATTRINDXMASK
| ARM_PTE_SHMASK
);
9720 tmplate
|= wimg_to_pte(attributes
);
9722 WRITE_PTE_STRONG(pte_p
, tmplate
);
9723 flush_mmu_tlb_region_asid_async(va
, PAGE_SIZE
, pmap
);
9724 tlb_flush_needed
= TRUE
;
9726 #ifdef PVH_FLAG_IOMMU
9729 pte_p
= PT_ENTRY_NULL
;
9730 if (pve_p
!= PV_ENTRY_NULL
) {
9731 pve_p
= PVE_NEXT_PTR(pve_next(pve_p
));
9734 if (tlb_flush_needed
) {
9739 #if (__ARM_VMSA__ == 7)
9741 pmap_create_sharedpage(
9747 (void) pmap_pages_alloc(&pa
, PAGE_SIZE
, 0);
9748 memset((char *) phystokv(pa
), 0, PAGE_SIZE
);
9750 kr
= pmap_enter(kernel_pmap
, _COMM_PAGE_BASE_ADDRESS
, atop(pa
), VM_PROT_READ
| VM_PROT_WRITE
, VM_PROT_NONE
, VM_WIMG_USE_DEFAULT
, TRUE
);
9751 assert(kr
== KERN_SUCCESS
);
9753 return (vm_map_address_t
)phystokv(pa
);
9759 vm_address_t address
,
9760 tt_entry_t
template)
9762 tt_entry_t
*ptep
, pte
;
9764 ptep
= pmap_tt3e(pmap
, address
);
9766 panic("%s: no ptep?\n", __FUNCTION__
);
9770 pte
= tte_to_pa(pte
) | template;
9771 WRITE_PTE_STRONG(ptep
, pte
);
9774 /* Note absence of non-global bit */
9775 #define PMAP_COMM_PAGE_PTE_TEMPLATE (ARM_PTE_TYPE_VALID \
9776 | ARM_PTE_ATTRINDX(CACHE_ATTRINDX_WRITEBACK) \
9777 | ARM_PTE_SH(SH_INNER_MEMORY) | ARM_PTE_NX \
9778 | ARM_PTE_PNX | ARM_PTE_AP(AP_RORO) | ARM_PTE_AF)
9781 pmap_create_sharedpage(
9786 pmap_paddr_t pa
= 0;
9789 (void) pmap_pages_alloc(&pa
, PAGE_SIZE
, 0);
9791 memset((char *) phystokv(pa
), 0, PAGE_SIZE
);
9793 #ifdef CONFIG_XNUPOST
9795 * The kernel pmap maintains a user accessible mapping of the commpage
9798 kr
= pmap_enter(kernel_pmap
, _COMM_HIGH_PAGE64_BASE_ADDRESS
, (ppnum_t
)atop(pa
), VM_PROT_READ
, VM_PROT_NONE
, VM_WIMG_USE_DEFAULT
, TRUE
);
9799 assert(kr
== KERN_SUCCESS
);
9802 * This mapping should not be global (as we only expect to reference it
9805 pmap_update_tt3e(kernel_pmap
, _COMM_HIGH_PAGE64_BASE_ADDRESS
, PMAP_COMM_PAGE_PTE_TEMPLATE
| ARM_PTE_NG
);
9808 kasan_map_shadow(_COMM_HIGH_PAGE64_BASE_ADDRESS
, PAGE_SIZE
, true);
9810 #endif /* CONFIG_XNUPOST */
9813 * In order to avoid burning extra pages on mapping the shared page, we
9814 * create a dedicated pmap for the shared page. We forcibly nest the
9815 * translation tables from this pmap into other pmaps. The level we
9816 * will nest at depends on the MMU configuration (page size, TTBR range,
9819 * Note that this is NOT "the nested pmap" (which is used to nest the
9822 * Note that we update parameters of the entry for our unique needs (NG
9825 sharedpage_pmap
= pmap_create(NULL
, 0x0, FALSE
);
9826 assert(sharedpage_pmap
!= NULL
);
9828 /* The user 64-bit mapping... */
9829 kr
= pmap_enter(sharedpage_pmap
, _COMM_PAGE64_BASE_ADDRESS
, (ppnum_t
)atop(pa
), VM_PROT_READ
, VM_PROT_NONE
, VM_WIMG_USE_DEFAULT
, TRUE
);
9830 assert(kr
== KERN_SUCCESS
);
9831 pmap_update_tt3e(sharedpage_pmap
, _COMM_PAGE64_BASE_ADDRESS
, PMAP_COMM_PAGE_PTE_TEMPLATE
);
9833 /* ...and the user 32-bit mapping. */
9834 kr
= pmap_enter(sharedpage_pmap
, _COMM_PAGE32_BASE_ADDRESS
, (ppnum_t
)atop(pa
), VM_PROT_READ
, VM_PROT_NONE
, VM_WIMG_USE_DEFAULT
, TRUE
);
9835 assert(kr
== KERN_SUCCESS
);
9836 pmap_update_tt3e(sharedpage_pmap
, _COMM_PAGE32_BASE_ADDRESS
, PMAP_COMM_PAGE_PTE_TEMPLATE
);
9838 /* For manipulation in kernel, go straight to physical page */
9839 return (vm_map_address_t
)phystokv(pa
);
9843 * Asserts to ensure that the TTEs we nest to map the shared page do not overlap
9844 * with user controlled TTEs.
9846 #if (ARM_PGSHIFT == 14) || __ARM64_TWO_LEVEL_PMAP__
9847 static_assert((_COMM_PAGE64_BASE_ADDRESS
& ~ARM_TT_L2_OFFMASK
) >= MACH_VM_MAX_ADDRESS
);
9848 static_assert((_COMM_PAGE32_BASE_ADDRESS
& ~ARM_TT_L2_OFFMASK
) >= VM_MAX_ADDRESS
);
9849 #elif (ARM_PGSHIFT == 12)
9850 static_assert((_COMM_PAGE64_BASE_ADDRESS
& ~ARM_TT_L1_OFFMASK
) >= MACH_VM_MAX_ADDRESS
);
9851 static_assert((_COMM_PAGE32_BASE_ADDRESS
& ~ARM_TT_L1_OFFMASK
) >= VM_MAX_ADDRESS
);
9853 #error Nested shared page mapping is unsupported on this config
9856 MARK_AS_PMAP_TEXT
static kern_return_t
9857 pmap_insert_sharedpage_internal(
9860 kern_return_t kr
= KERN_SUCCESS
;
9861 vm_offset_t sharedpage_vaddr
;
9862 pt_entry_t
*ttep
, *src_ttep
;
9865 VALIDATE_PMAP(pmap
);
9867 #if _COMM_PAGE_AREA_LENGTH != PAGE_SIZE
9868 #error We assume a single page.
9871 if (pmap_is_64bit(pmap
)) {
9872 sharedpage_vaddr
= _COMM_PAGE64_BASE_ADDRESS
;
9874 sharedpage_vaddr
= _COMM_PAGE32_BASE_ADDRESS
;
9880 * For 4KB pages, we can force the commpage to nest at the level one
9881 * page table, as each entry is 1GB (i.e, there will be no overlap
9882 * with regular userspace mappings). For 16KB pages, each level one
9883 * entry is 64GB, so we must go to the second level entry (32MB) in
9886 #if (ARM_PGSHIFT == 12)
9887 #if __ARM64_TWO_LEVEL_PMAP__
9888 #error A two level page table with a page shift of 12 is not currently supported
9892 /* Just slam in the L1 entry. */
9893 ttep
= pmap_tt1e(pmap
, sharedpage_vaddr
);
9895 if (*ttep
!= ARM_PTE_EMPTY
) {
9896 panic("%s: Found something mapped at the commpage address?!", __FUNCTION__
);
9899 src_ttep
= pmap_tt1e(sharedpage_pmap
, sharedpage_vaddr
);
9900 #elif (ARM_PGSHIFT == 14)
9901 #if !__ARM64_TWO_LEVEL_PMAP__
9902 /* Allocate for the L2 entry if necessary, and slam it into place. */
9904 * As long as we are use a three level page table, the first level
9905 * should always exist, so we don't need to check for it.
9907 while (*pmap_tt1e(pmap
, sharedpage_vaddr
) == ARM_PTE_EMPTY
) {
9910 kr
= pmap_expand(pmap
, sharedpage_vaddr
, options
, PMAP_TT_L2_LEVEL
);
9912 if (kr
!= KERN_SUCCESS
) {
9914 panic("Failed to pmap_expand for commpage, pmap=%p", pmap
);
9922 ttep
= pmap_tt2e(pmap
, sharedpage_vaddr
);
9924 if (*ttep
!= ARM_PTE_EMPTY
) {
9925 panic("%s: Found something mapped at the commpage address?!", __FUNCTION__
);
9928 src_ttep
= pmap_tt2e(sharedpage_pmap
, sharedpage_vaddr
);
9932 FLUSH_PTE_STRONG(ttep
);
9934 /* TODO: Should we flush in the 64-bit case? */
9935 flush_mmu_tlb_region_asid_async(sharedpage_vaddr
, PAGE_SIZE
, kernel_pmap
);
9937 #if (ARM_PGSHIFT == 12) && !__ARM64_TWO_LEVEL_PMAP__
9938 flush_mmu_tlb_entry_async(tlbi_addr(sharedpage_vaddr
& ~ARM_TT_L1_OFFMASK
) | tlbi_asid(pmap
->asid
));
9939 #elif (ARM_PGSHIFT == 14)
9940 flush_mmu_tlb_entry_async(tlbi_addr(sharedpage_vaddr
& ~ARM_TT_L2_OFFMASK
) | tlbi_asid(pmap
->asid
));
9950 pmap_unmap_sharedpage(
9954 vm_offset_t sharedpage_vaddr
;
9956 #if _COMM_PAGE_AREA_LENGTH != PAGE_SIZE
9957 #error We assume a single page.
9960 if (pmap_is_64bit(pmap
)) {
9961 sharedpage_vaddr
= _COMM_PAGE64_BASE_ADDRESS
;
9963 sharedpage_vaddr
= _COMM_PAGE32_BASE_ADDRESS
;
9966 #if (ARM_PGSHIFT == 12)
9967 #if __ARM64_TWO_LEVEL_PMAP__
9968 #error A two level page table with a page shift of 12 is not currently supported
9970 ttep
= pmap_tt1e(pmap
, sharedpage_vaddr
);
9976 /* It had better be mapped to the shared page */
9977 if (*ttep
!= ARM_TTE_EMPTY
&& *ttep
!= *pmap_tt1e(sharedpage_pmap
, sharedpage_vaddr
)) {
9978 panic("%s: Something other than commpage mapped in shared page slot?", __FUNCTION__
);
9980 #elif (ARM_PGSHIFT == 14)
9981 ttep
= pmap_tt2e(pmap
, sharedpage_vaddr
);
9987 /* It had better be mapped to the shared page */
9988 if (*ttep
!= ARM_TTE_EMPTY
&& *ttep
!= *pmap_tt2e(sharedpage_pmap
, sharedpage_vaddr
)) {
9989 panic("%s: Something other than commpage mapped in shared page slot?", __FUNCTION__
);
9993 *ttep
= ARM_TTE_EMPTY
;
9994 flush_mmu_tlb_region_asid_async(sharedpage_vaddr
, PAGE_SIZE
, kernel_pmap
);
9996 #if (ARM_PGSHIFT == 12)
9997 #if __ARM64_TWO_LEVEL_PMAP__
9998 #error A two level page table with a page shift of 12 is not currently supported
10000 flush_mmu_tlb_entry_async(tlbi_addr(sharedpage_vaddr
& ~ARM_TT_L1_OFFMASK
) | tlbi_asid(pmap
->asid
));
10001 #elif (ARM_PGSHIFT == 14)
10002 flush_mmu_tlb_entry_async(tlbi_addr(sharedpage_vaddr
& ~ARM_TT_L2_OFFMASK
) | tlbi_asid(pmap
->asid
));
10008 pmap_insert_sharedpage(
10011 pmap_insert_sharedpage_internal(pmap
);
10018 return pmap
->is_64bit
;
10023 /* ARMTODO -- an implementation that accounts for
10024 * holes in the physical map, if any.
10030 return pa_valid(ptoa(pn
));
10033 MARK_AS_PMAP_TEXT
static boolean_t
10034 pmap_is_empty_internal(
10036 vm_map_offset_t va_start
,
10037 vm_map_offset_t va_end
)
10039 vm_map_offset_t block_start
, block_end
;
10042 if (pmap
== NULL
) {
10046 VALIDATE_PMAP(pmap
);
10048 if ((pmap
!= kernel_pmap
) && (not_in_kdp
)) {
10052 #if (__ARM_VMSA__ == 7)
10053 if (tte_index(pmap
, va_end
) >= pmap
->tte_index_max
) {
10054 if ((pmap
!= kernel_pmap
) && (not_in_kdp
)) {
10060 block_start
= va_start
;
10061 tte_p
= pmap_tte(pmap
, block_start
);
10062 while (block_start
< va_end
) {
10063 block_end
= (block_start
+ ARM_TT_L1_SIZE
) & ~(ARM_TT_L1_OFFMASK
);
10064 if (block_end
> va_end
) {
10065 block_end
= va_end
;
10068 if ((*tte_p
& ARM_TTE_TYPE_MASK
) != 0) {
10069 vm_map_offset_t offset
;
10070 ppnum_t phys_page
= 0;
10072 for (offset
= block_start
;
10073 offset
< block_end
;
10074 offset
+= ARM_PGBYTES
) {
10075 // This does a pmap_find_phys() lookup but assumes lock is held
10076 phys_page
= pmap_vtophys(pmap
, offset
);
10078 if ((pmap
!= kernel_pmap
) && (not_in_kdp
)) {
10086 block_start
= block_end
;
10090 block_start
= va_start
;
10092 while (block_start
< va_end
) {
10093 pt_entry_t
*bpte_p
, *epte_p
;
10096 block_end
= (block_start
+ ARM_TT_L2_SIZE
) & ~ARM_TT_L2_OFFMASK
;
10097 if (block_end
> va_end
) {
10098 block_end
= va_end
;
10101 tte_p
= pmap_tt2e(pmap
, block_start
);
10102 if ((tte_p
!= PT_ENTRY_NULL
)
10103 && ((*tte_p
& ARM_TTE_TYPE_MASK
) == ARM_TTE_TYPE_TABLE
)) {
10104 pte_p
= (pt_entry_t
*) ttetokv(*tte_p
);
10105 bpte_p
= &pte_p
[tt3_index(pmap
, block_start
)];
10106 epte_p
= bpte_p
+ (((block_end
- block_start
) & ARM_TT_L3_INDEX_MASK
) >> ARM_TT_L3_SHIFT
);
10108 for (pte_p
= bpte_p
; pte_p
< epte_p
; pte_p
++) {
10109 if (*pte_p
!= ARM_PTE_EMPTY
) {
10110 if ((pmap
!= kernel_pmap
) && (not_in_kdp
)) {
10117 block_start
= block_end
;
10121 if ((pmap
!= kernel_pmap
) && (not_in_kdp
)) {
10131 vm_map_offset_t va_start
,
10132 vm_map_offset_t va_end
)
10134 return pmap_is_empty_internal(pmap
, va_start
, va_end
);
10140 unsigned int option
)
10142 return (is64
) ? pmap_max_64bit_offset(option
) : pmap_max_32bit_offset(option
);
10146 pmap_max_64bit_offset(
10147 __unused
unsigned int option
)
10149 vm_map_offset_t max_offset_ret
= 0;
10151 #if defined(__arm64__)
10152 const vm_map_offset_t min_max_offset
= SHARED_REGION_BASE_ARM64
+ SHARED_REGION_SIZE_ARM64
+ 0x20000000; // end of shared region + 512MB for various purposes
10153 if (option
== ARM_PMAP_MAX_OFFSET_DEFAULT
) {
10154 max_offset_ret
= arm64_pmap_max_offset_default
;
10155 } else if (option
== ARM_PMAP_MAX_OFFSET_MIN
) {
10156 max_offset_ret
= min_max_offset
;
10157 } else if (option
== ARM_PMAP_MAX_OFFSET_MAX
) {
10158 max_offset_ret
= MACH_VM_MAX_ADDRESS
;
10159 } else if (option
== ARM_PMAP_MAX_OFFSET_DEVICE
) {
10160 if (arm64_pmap_max_offset_default
) {
10161 max_offset_ret
= arm64_pmap_max_offset_default
;
10162 } else if (max_mem
> 0xC0000000) {
10163 max_offset_ret
= min_max_offset
+ 0x138000000; // Max offset is 13.375GB for devices with > 3GB of memory
10164 } else if (max_mem
> 0x40000000) {
10165 max_offset_ret
= min_max_offset
+ 0x38000000; // Max offset is 9.375GB for devices with > 1GB and <= 3GB of memory
10167 max_offset_ret
= min_max_offset
;
10169 } else if (option
== ARM_PMAP_MAX_OFFSET_JUMBO
) {
10170 if (arm64_pmap_max_offset_default
) {
10171 // Allow the boot-arg to override jumbo size
10172 max_offset_ret
= arm64_pmap_max_offset_default
;
10174 max_offset_ret
= MACH_VM_MAX_ADDRESS
; // Max offset is 64GB for pmaps with special "jumbo" blessing
10177 panic("pmap_max_64bit_offset illegal option 0x%x\n", option
);
10180 assert(max_offset_ret
<= MACH_VM_MAX_ADDRESS
);
10181 assert(max_offset_ret
>= min_max_offset
);
10183 panic("Can't run pmap_max_64bit_offset on non-64bit architectures\n");
10186 return max_offset_ret
;
10190 pmap_max_32bit_offset(
10191 unsigned int option
)
10193 vm_map_offset_t max_offset_ret
= 0;
10195 if (option
== ARM_PMAP_MAX_OFFSET_DEFAULT
) {
10196 max_offset_ret
= arm_pmap_max_offset_default
;
10197 } else if (option
== ARM_PMAP_MAX_OFFSET_MIN
) {
10198 max_offset_ret
= 0x66000000;
10199 } else if (option
== ARM_PMAP_MAX_OFFSET_MAX
) {
10200 max_offset_ret
= VM_MAX_ADDRESS
;
10201 } else if (option
== ARM_PMAP_MAX_OFFSET_DEVICE
) {
10202 if (arm_pmap_max_offset_default
) {
10203 max_offset_ret
= arm_pmap_max_offset_default
;
10204 } else if (max_mem
> 0x20000000) {
10205 max_offset_ret
= 0x80000000;
10207 max_offset_ret
= 0x66000000;
10209 } else if (option
== ARM_PMAP_MAX_OFFSET_JUMBO
) {
10210 max_offset_ret
= 0x80000000;
10212 panic("pmap_max_32bit_offset illegal option 0x%x\n", option
);
10215 assert(max_offset_ret
<= MACH_VM_MAX_ADDRESS
);
10216 return max_offset_ret
;
10221 * Constrain DTrace copyin/copyout actions
10223 extern kern_return_t
dtrace_copyio_preflight(addr64_t
);
10224 extern kern_return_t
dtrace_copyio_postflight(addr64_t
);
10227 dtrace_copyio_preflight(
10228 __unused addr64_t va
)
10230 if (current_map() == kernel_map
) {
10231 return KERN_FAILURE
;
10233 return KERN_SUCCESS
;
10238 dtrace_copyio_postflight(
10239 __unused addr64_t va
)
10241 return KERN_SUCCESS
;
10243 #endif /* CONFIG_DTRACE */
10247 pmap_flush_context_init(__unused pmap_flush_context
*pfc
)
10254 __unused pmap_flush_context
*cpus_to_flush
)
10256 /* not implemented yet */
10261 static void __unused
10262 pmap_pin_kernel_pages(vm_offset_t kva __unused
, size_t nbytes __unused
)
10266 static void __unused
10267 pmap_unpin_kernel_pages(vm_offset_t kva __unused
, size_t nbytes __unused
)
10273 #define PMAP_RESIDENT_INVALID ((mach_vm_size_t)-1)
10275 MARK_AS_PMAP_TEXT
static mach_vm_size_t
10276 pmap_query_resident_internal(
10278 vm_map_address_t start
,
10279 vm_map_address_t end
,
10280 mach_vm_size_t
*compressed_bytes_p
)
10282 mach_vm_size_t resident_bytes
= 0;
10283 mach_vm_size_t compressed_bytes
= 0;
10285 pt_entry_t
*bpte
, *epte
;
10289 if (pmap
== NULL
) {
10290 return PMAP_RESIDENT_INVALID
;
10293 VALIDATE_PMAP(pmap
);
10295 /* Ensure that this request is valid, and addresses exactly one TTE. */
10296 if (__improbable((start
% ARM_PGBYTES
) || (end
% ARM_PGBYTES
))) {
10297 panic("%s: address range %p, %p not page-aligned", __func__
, (void*)start
, (void*)end
);
10300 if (__improbable((end
< start
) || ((end
- start
) > (PTE_PGENTRIES
* ARM_PGBYTES
)))) {
10301 panic("%s: invalid address range %p, %p", __func__
, (void*)start
, (void*)end
);
10305 tte_p
= pmap_tte(pmap
, start
);
10306 if (tte_p
== (tt_entry_t
*) NULL
) {
10308 return PMAP_RESIDENT_INVALID
;
10310 if ((*tte_p
& ARM_TTE_TYPE_MASK
) == ARM_TTE_TYPE_TABLE
) {
10311 #if (__ARM_VMSA__ == 7)
10312 pte_p
= (pt_entry_t
*) ttetokv(*tte_p
);
10313 bpte
= &pte_p
[ptenum(start
)];
10314 epte
= bpte
+ atop(end
- start
);
10316 pte_p
= (pt_entry_t
*) ttetokv(*tte_p
);
10317 bpte
= &pte_p
[tt3_index(pmap
, start
)];
10318 epte
= bpte
+ ((end
- start
) >> ARM_TT_L3_SHIFT
);
10321 for (; bpte
< epte
; bpte
++) {
10322 if (ARM_PTE_IS_COMPRESSED(*bpte
)) {
10323 compressed_bytes
+= ARM_PGBYTES
;
10324 } else if (pa_valid(pte_to_pa(*bpte
))) {
10325 resident_bytes
+= ARM_PGBYTES
;
10331 if (compressed_bytes_p
) {
10332 pmap_pin_kernel_pages((vm_offset_t
)compressed_bytes_p
, sizeof(*compressed_bytes_p
));
10333 *compressed_bytes_p
+= compressed_bytes
;
10334 pmap_unpin_kernel_pages((vm_offset_t
)compressed_bytes_p
, sizeof(*compressed_bytes_p
));
10337 return resident_bytes
;
10341 pmap_query_resident(
10343 vm_map_address_t start
,
10344 vm_map_address_t end
,
10345 mach_vm_size_t
*compressed_bytes_p
)
10347 mach_vm_size_t total_resident_bytes
;
10348 mach_vm_size_t compressed_bytes
;
10349 vm_map_address_t va
;
10352 if (pmap
== PMAP_NULL
) {
10353 if (compressed_bytes_p
) {
10354 *compressed_bytes_p
= 0;
10359 total_resident_bytes
= 0;
10360 compressed_bytes
= 0;
10362 PMAP_TRACE(3, PMAP_CODE(PMAP__QUERY_RESIDENT
) | DBG_FUNC_START
,
10363 VM_KERNEL_ADDRHIDE(pmap
), VM_KERNEL_ADDRHIDE(start
),
10364 VM_KERNEL_ADDRHIDE(end
));
10368 vm_map_address_t l
;
10369 mach_vm_size_t resident_bytes
;
10371 l
= ((va
+ ARM_TT_TWIG_SIZE
) & ~ARM_TT_TWIG_OFFMASK
);
10376 resident_bytes
= pmap_query_resident_internal(pmap
, va
, l
, compressed_bytes_p
);
10377 if (resident_bytes
== PMAP_RESIDENT_INVALID
) {
10381 total_resident_bytes
+= resident_bytes
;
10386 if (compressed_bytes_p
) {
10387 *compressed_bytes_p
= compressed_bytes
;
10390 PMAP_TRACE(3, PMAP_CODE(PMAP__QUERY_RESIDENT
) | DBG_FUNC_END
,
10391 total_resident_bytes
);
10393 return total_resident_bytes
;
10398 pmap_check_ledgers(
10401 ledger_amount_t bal
;
10404 boolean_t do_panic
;
10406 if (pmap
->pmap_pid
== 0) {
10408 * This pmap was not or is no longer fully associated
10409 * with a task (e.g. the old pmap after a fork()/exec() or
10410 * spawn()). Its "ledger" still points at a task that is
10411 * now using a different (and active) address space, so
10412 * we can't check that all the pmap ledgers are balanced here.
10414 * If the "pid" is set, that means that we went through
10415 * pmap_set_process() in task_terminate_internal(), so
10416 * this task's ledger should not have been re-used and
10417 * all the pmap ledgers should be back to 0.
10423 pid
= pmap
->pmap_pid
;
10424 procname
= pmap
->pmap_procname
;
10426 pmap_ledgers_drift
.num_pmaps_checked
++;
10428 #define LEDGER_CHECK_BALANCE(__LEDGER) \
10430 int panic_on_negative = TRUE; \
10431 ledger_get_balance(pmap->ledger, \
10432 task_ledgers.__LEDGER, \
10434 ledger_get_panic_on_negative(pmap->ledger, \
10435 task_ledgers.__LEDGER, \
10436 &panic_on_negative); \
10438 if (panic_on_negative || \
10439 (pmap_ledgers_panic && \
10440 pmap_ledgers_panic_leeway > 0 && \
10441 (bal > (pmap_ledgers_panic_leeway * PAGE_SIZE) || \
10442 bal < (-pmap_ledgers_panic_leeway * PAGE_SIZE)))) { \
10445 printf("LEDGER BALANCE proc %d (%s) " \
10446 "\"%s\" = %lld\n", \
10447 pid, procname, #__LEDGER, bal); \
10449 pmap_ledgers_drift.__LEDGER##_over++; \
10450 pmap_ledgers_drift.__LEDGER##_over_total += bal; \
10451 if (bal > pmap_ledgers_drift.__LEDGER##_over_max) { \
10452 pmap_ledgers_drift.__LEDGER##_over_max = bal; \
10454 } else if (bal < 0) { \
10455 pmap_ledgers_drift.__LEDGER##_under++; \
10456 pmap_ledgers_drift.__LEDGER##_under_total += bal; \
10457 if (bal < pmap_ledgers_drift.__LEDGER##_under_max) { \
10458 pmap_ledgers_drift.__LEDGER##_under_max = bal; \
10464 LEDGER_CHECK_BALANCE(phys_footprint
);
10465 LEDGER_CHECK_BALANCE(internal
);
10466 LEDGER_CHECK_BALANCE(internal_compressed
);
10467 LEDGER_CHECK_BALANCE(iokit_mapped
);
10468 LEDGER_CHECK_BALANCE(alternate_accounting
);
10469 LEDGER_CHECK_BALANCE(alternate_accounting_compressed
);
10470 LEDGER_CHECK_BALANCE(page_table
);
10471 LEDGER_CHECK_BALANCE(purgeable_volatile
);
10472 LEDGER_CHECK_BALANCE(purgeable_nonvolatile
);
10473 LEDGER_CHECK_BALANCE(purgeable_volatile_compressed
);
10474 LEDGER_CHECK_BALANCE(purgeable_nonvolatile_compressed
);
10475 LEDGER_CHECK_BALANCE(network_volatile
);
10476 LEDGER_CHECK_BALANCE(network_nonvolatile
);
10477 LEDGER_CHECK_BALANCE(network_volatile_compressed
);
10478 LEDGER_CHECK_BALANCE(network_nonvolatile_compressed
);
10481 if (pmap_ledgers_panic
) {
10482 panic("pmap_destroy(%p) %d[%s] has imbalanced ledgers\n",
10483 pmap
, pid
, procname
);
10485 printf("pmap_destroy(%p) %d[%s] has imbalanced ledgers\n",
10486 pmap
, pid
, procname
);
10490 PMAP_STATS_ASSERTF(pmap
->stats
.resident_count
== 0, pmap
, "stats.resident_count %d", pmap
->stats
.resident_count
);
10492 PMAP_STATS_ASSERTF(pmap
->stats
.wired_count
== 0, pmap
, "stats.wired_count %d", pmap
->stats
.wired_count
);
10494 PMAP_STATS_ASSERTF(pmap
->stats
.device
== 0, pmap
, "stats.device %d", pmap
->stats
.device
);
10495 PMAP_STATS_ASSERTF(pmap
->stats
.internal
== 0, pmap
, "stats.internal %d", pmap
->stats
.internal
);
10496 PMAP_STATS_ASSERTF(pmap
->stats
.external
== 0, pmap
, "stats.external %d", pmap
->stats
.external
);
10497 PMAP_STATS_ASSERTF(pmap
->stats
.reusable
== 0, pmap
, "stats.reusable %d", pmap
->stats
.reusable
);
10498 PMAP_STATS_ASSERTF(pmap
->stats
.compressed
== 0, pmap
, "stats.compressed %lld", pmap
->stats
.compressed
);
10500 #endif /* MACH_ASSERT */
10503 pmap_advise_pagezero_range(__unused pmap_t p
, __unused
uint64_t a
)
10509 #define PROF_START uint64_t t, nanot;\
10510 t = mach_absolute_time();
10512 #define PROF_END absolutetime_to_nanoseconds(mach_absolute_time()-t, &nanot);\
10513 kprintf("%s: took %llu ns\n", __func__, nanot);
10515 #define PMAP_PGTRACE_LOCK(p) \
10517 *(p) = ml_set_interrupts_enabled(false); \
10518 if (simple_lock_try(&(pmap_pgtrace.lock), LCK_GRP_NULL)) break; \
10519 ml_set_interrupts_enabled(*(p)); \
10522 #define PMAP_PGTRACE_UNLOCK(p) \
10524 simple_unlock(&(pmap_pgtrace.lock)); \
10525 ml_set_interrupts_enabled(*(p)); \
10528 #define PGTRACE_WRITE_PTE(pte_p, pte_entry) \
10530 *(pte_p) = (pte_entry); \
10531 FLUSH_PTE(pte_p); \
10534 #define PGTRACE_MAX_MAP 16 // maximum supported va to same pa
10541 } pmap_pgtrace_page_state_t
;
10544 queue_chain_t chain
;
10548 * maps - list of va maps to upper pa
10549 * map_pool - map pool
10550 * map_waste - waste can
10555 queue_head_t map_pool
;
10556 queue_head_t map_waste
;
10557 pmap_pgtrace_page_state_t state
;
10558 } pmap_pgtrace_page_t
;
10562 * pages - list of tracing page info
10564 queue_head_t pages
;
10565 decl_simple_lock_data(, lock
);
10566 } pmap_pgtrace
= {};
10569 pmap_pgtrace_init(void)
10571 queue_init(&(pmap_pgtrace
.pages
));
10572 simple_lock_init(&(pmap_pgtrace
.lock
), 0);
10576 if (PE_parse_boot_argn("pgtrace", &enabled
, sizeof(enabled
))) {
10577 pgtrace_enabled
= enabled
;
10581 // find a page with given pa - pmap_pgtrace should be locked
10582 inline static pmap_pgtrace_page_t
*
10583 pmap_pgtrace_find_page(pmap_paddr_t pa
)
10585 queue_head_t
*q
= &(pmap_pgtrace
.pages
);
10586 pmap_pgtrace_page_t
*p
;
10588 queue_iterate(q
, p
, pmap_pgtrace_page_t
*, chain
) {
10589 if (p
->state
== UNDEFINED
) {
10592 if (p
->state
== PA_UNDEFINED
) {
10603 // enter clone of given pmap, va page and range - pmap should be locked
10605 pmap_pgtrace_enter_clone(pmap_t pmap
, vm_map_offset_t va_page
, vm_map_offset_t start
, vm_map_offset_t end
)
10608 queue_head_t
*q
= &(pmap_pgtrace
.pages
);
10609 pmap_paddr_t pa_page
;
10610 pt_entry_t
*ptep
, *cptep
;
10611 pmap_pgtrace_page_t
*p
;
10612 bool found
= false;
10614 PMAP_ASSERT_LOCKED(pmap
);
10615 assert(va_page
== arm_trunc_page(va_page
));
10617 PMAP_PGTRACE_LOCK(&ints
);
10619 ptep
= pmap_pte(pmap
, va_page
);
10621 // target pte should exist
10622 if (!ptep
|| !(*ptep
& ARM_PTE_TYPE_VALID
)) {
10623 PMAP_PGTRACE_UNLOCK(&ints
);
10627 queue_head_t
*mapq
;
10628 queue_head_t
*mappool
;
10629 pmap_pgtrace_map_t
*map
= NULL
;
10631 pa_page
= pte_to_pa(*ptep
);
10633 // find if we have a page info defined for this
10634 queue_iterate(q
, p
, pmap_pgtrace_page_t
*, chain
) {
10636 mappool
= &(p
->map_pool
);
10638 switch (p
->state
) {
10640 queue_iterate(mapq
, map
, pmap_pgtrace_map_t
*, chain
) {
10641 if (map
->cloned
== false && map
->pmap
== pmap
&& map
->ova
== va_page
) {
10643 map
->range
.start
= start
;
10644 map
->range
.end
= end
;
10652 if (p
->pa
!= pa_page
) {
10655 queue_iterate(mapq
, map
, pmap_pgtrace_map_t
*, chain
) {
10656 if (map
->cloned
== false) {
10658 map
->ova
= va_page
;
10659 map
->range
.start
= start
;
10660 map
->range
.end
= end
;
10668 if (p
->pa
!= pa_page
) {
10671 queue_iterate(mapq
, map
, pmap_pgtrace_map_t
*, chain
) {
10672 if (map
->cloned
== true && map
->pmap
== pmap
&& map
->ova
== va_page
) {
10673 kprintf("%s: skip existing mapping at va=%llx\n", __func__
, va_page
);
10675 } else if (map
->cloned
== true && map
->pmap
== kernel_pmap
&& map
->cva
[1] == va_page
) {
10676 kprintf("%s: skip clone mapping at va=%llx\n", __func__
, va_page
);
10678 } else if (map
->cloned
== false && map
->pmap
== pmap
&& map
->ova
== va_page
) {
10679 // range should be already defined as well
10687 panic("invalid state p->state=%x\n", p
->state
);
10690 if (found
== true) {
10695 // do not clone if no page info found
10696 if (found
== false) {
10697 PMAP_PGTRACE_UNLOCK(&ints
);
10701 // copy pre, target and post ptes to clone ptes
10702 for (int i
= 0; i
< 3; i
++) {
10703 ptep
= pmap_pte(pmap
, va_page
+ (i
- 1) * ARM_PGBYTES
);
10704 cptep
= pmap_pte(kernel_pmap
, map
->cva
[i
]);
10705 assert(cptep
!= NULL
);
10706 if (ptep
== NULL
) {
10707 PGTRACE_WRITE_PTE(cptep
, (pt_entry_t
)NULL
);
10709 PGTRACE_WRITE_PTE(cptep
, *ptep
);
10711 PMAP_UPDATE_TLBS(kernel_pmap
, map
->cva
[i
], map
->cva
[i
] + ARM_PGBYTES
);
10714 // get ptes for original and clone
10715 ptep
= pmap_pte(pmap
, va_page
);
10716 cptep
= pmap_pte(kernel_pmap
, map
->cva
[1]);
10718 // invalidate original pte and mark it as a pgtrace page
10719 PGTRACE_WRITE_PTE(ptep
, (*ptep
| ARM_PTE_PGTRACE
) & ~ARM_PTE_TYPE_VALID
);
10720 PMAP_UPDATE_TLBS(pmap
, map
->ova
, map
->ova
+ ARM_PGBYTES
);
10722 map
->cloned
= true;
10723 p
->state
= DEFINED
;
10725 kprintf("%s: pa_page=%llx va_page=%llx cva[1]=%llx pmap=%p ptep=%p cptep=%p\n", __func__
, pa_page
, va_page
, map
->cva
[1], pmap
, ptep
, cptep
);
10727 PMAP_PGTRACE_UNLOCK(&ints
);
10732 // This function removes trace bit and validate pte if applicable. Pmap must be locked.
10734 pmap_pgtrace_remove_clone(pmap_t pmap
, pmap_paddr_t pa
, vm_map_offset_t va
)
10736 bool ints
, found
= false;
10737 pmap_pgtrace_page_t
*p
;
10740 PMAP_PGTRACE_LOCK(&ints
);
10742 // we must have this page info
10743 p
= pmap_pgtrace_find_page(pa
);
10748 // find matching map
10749 queue_head_t
*mapq
= &(p
->maps
);
10750 queue_head_t
*mappool
= &(p
->map_pool
);
10751 pmap_pgtrace_map_t
*map
;
10753 queue_iterate(mapq
, map
, pmap_pgtrace_map_t
*, chain
) {
10754 if (map
->pmap
== pmap
&& map
->ova
== va
) {
10764 if (map
->cloned
== true) {
10765 // Restore back the pte to original state
10766 ptep
= pmap_pte(pmap
, map
->ova
);
10768 PGTRACE_WRITE_PTE(ptep
, *ptep
| ARM_PTE_TYPE_VALID
);
10769 PMAP_UPDATE_TLBS(pmap
, va
, va
+ ARM_PGBYTES
);
10771 // revert clone pages
10772 for (int i
= 0; i
< 3; i
++) {
10773 ptep
= pmap_pte(kernel_pmap
, map
->cva
[i
]);
10774 assert(ptep
!= NULL
);
10775 PGTRACE_WRITE_PTE(ptep
, map
->cva_spte
[i
]);
10776 PMAP_UPDATE_TLBS(kernel_pmap
, map
->cva
[i
], map
->cva
[i
] + ARM_PGBYTES
);
10780 queue_remove(mapq
, map
, pmap_pgtrace_map_t
*, chain
);
10782 map
->ova
= (vm_map_offset_t
)NULL
;
10783 map
->cloned
= false;
10784 queue_enter_first(mappool
, map
, pmap_pgtrace_map_t
*, chain
);
10786 kprintf("%s: p=%p pa=%llx va=%llx\n", __func__
, p
, pa
, va
);
10789 PMAP_PGTRACE_UNLOCK(&ints
);
10792 // remove all clones of given pa - pmap must be locked
10794 pmap_pgtrace_remove_all_clone(pmap_paddr_t pa
)
10797 pmap_pgtrace_page_t
*p
;
10800 PMAP_PGTRACE_LOCK(&ints
);
10802 // we must have this page info
10803 p
= pmap_pgtrace_find_page(pa
);
10805 PMAP_PGTRACE_UNLOCK(&ints
);
10809 queue_head_t
*mapq
= &(p
->maps
);
10810 queue_head_t
*mappool
= &(p
->map_pool
);
10811 queue_head_t
*mapwaste
= &(p
->map_waste
);
10812 pmap_pgtrace_map_t
*map
;
10814 // move maps to waste
10815 while (!queue_empty(mapq
)) {
10816 queue_remove_first(mapq
, map
, pmap_pgtrace_map_t
*, chain
);
10817 queue_enter_first(mapwaste
, map
, pmap_pgtrace_map_t
*, chain
);
10820 PMAP_PGTRACE_UNLOCK(&ints
);
10822 // sanitize maps in waste
10823 queue_iterate(mapwaste
, map
, pmap_pgtrace_map_t
*, chain
) {
10824 if (map
->cloned
== true) {
10825 PMAP_LOCK(map
->pmap
);
10827 // restore back original pte
10828 ptep
= pmap_pte(map
->pmap
, map
->ova
);
10830 PGTRACE_WRITE_PTE(ptep
, *ptep
| ARM_PTE_TYPE_VALID
);
10831 PMAP_UPDATE_TLBS(map
->pmap
, map
->ova
, map
->ova
+ ARM_PGBYTES
);
10833 // revert clone ptes
10834 for (int i
= 0; i
< 3; i
++) {
10835 ptep
= pmap_pte(kernel_pmap
, map
->cva
[i
]);
10836 assert(ptep
!= NULL
);
10837 PGTRACE_WRITE_PTE(ptep
, map
->cva_spte
[i
]);
10838 PMAP_UPDATE_TLBS(kernel_pmap
, map
->cva
[i
], map
->cva
[i
] + ARM_PGBYTES
);
10841 PMAP_UNLOCK(map
->pmap
);
10845 map
->ova
= (vm_map_offset_t
)NULL
;
10846 map
->cloned
= false;
10849 PMAP_PGTRACE_LOCK(&ints
);
10851 // recycle maps back to map_pool
10852 while (!queue_empty(mapwaste
)) {
10853 queue_remove_first(mapwaste
, map
, pmap_pgtrace_map_t
*, chain
);
10854 queue_enter_first(mappool
, map
, pmap_pgtrace_map_t
*, chain
);
10857 PMAP_PGTRACE_UNLOCK(&ints
);
10861 pmap_pgtrace_get_search_space(pmap_t pmap
, vm_map_offset_t
*startp
, vm_map_offset_t
*endp
)
10864 vm_map_offset_t end
;
10866 if (pmap
== kernel_pmap
) {
10867 tsz
= (get_tcr() >> TCR_T1SZ_SHIFT
) & TCR_TSZ_MASK
;
10868 *startp
= MAX(VM_MIN_KERNEL_ADDRESS
, (UINT64_MAX
>> (64 - tsz
)) << (64 - tsz
));
10869 *endp
= VM_MAX_KERNEL_ADDRESS
;
10871 tsz
= (get_tcr() >> TCR_T0SZ_SHIFT
) & TCR_TSZ_MASK
;
10875 end
= ((uint64_t)1 << (64 - tsz
)) - 1;
10882 assert(*endp
> *startp
);
10887 // has pa mapped in given pmap? then clone it
10889 pmap_pgtrace_clone_from_pa(pmap_t pmap
, pmap_paddr_t pa
, vm_map_offset_t start_offset
, vm_map_offset_t end_offset
)
10892 vm_map_offset_t min
, max
;
10893 vm_map_offset_t cur_page
, end_page
;
10898 pmap_pgtrace_get_search_space(pmap
, &min
, &max
);
10900 cur_page
= arm_trunc_page(min
);
10901 end_page
= arm_trunc_page(max
);
10902 while (cur_page
<= end_page
) {
10903 vm_map_offset_t add
= 0;
10907 // skip uninterested space
10908 if (pmap
== kernel_pmap
&&
10909 ((vm_kernel_base
<= cur_page
&& cur_page
< vm_kernel_top
) ||
10910 (vm_kext_base
<= cur_page
&& cur_page
< vm_kext_top
))) {
10912 goto unlock_continue
;
10915 #if __ARM64_TWO_LEVEL_PMAP__
10916 // check whether we can skip l2
10917 ttep
= pmap_tt2e(pmap
, cur_page
);
10921 // check whether we can skip l1
10922 ttep
= pmap_tt1e(pmap
, cur_page
);
10925 if ((tte
& (ARM_TTE_TYPE_MASK
| ARM_TTE_VALID
)) != (ARM_TTE_TYPE_TABLE
| ARM_TTE_VALID
)) {
10926 add
= ARM_TT_L1_SIZE
;
10927 goto unlock_continue
;
10931 tte
= ((tt_entry_t
*) phystokv(tte
& ARM_TTE_TABLE_MASK
))[tt2_index(pmap
, cur_page
)];
10933 if ((tte
& (ARM_TTE_TYPE_MASK
| ARM_TTE_VALID
)) != (ARM_TTE_TYPE_TABLE
| ARM_TTE_VALID
)) {
10934 add
= ARM_TT_L2_SIZE
;
10935 goto unlock_continue
;
10939 ptep
= &(((pt_entry_t
*) phystokv(tte
& ARM_TTE_TABLE_MASK
))[tt3_index(pmap
, cur_page
)]);
10940 if (ptep
== PT_ENTRY_NULL
) {
10941 add
= ARM_TT_L3_SIZE
;
10942 goto unlock_continue
;
10945 if (arm_trunc_page(pa
) == pte_to_pa(*ptep
)) {
10946 if (pmap_pgtrace_enter_clone(pmap
, cur_page
, start_offset
, end_offset
) == true) {
10957 if (cur_page
+ add
< cur_page
) {
10968 // search pv table and clone vas of given pa
10970 pmap_pgtrace_clone_from_pvtable(pmap_paddr_t pa
, vm_map_offset_t start_offset
, vm_map_offset_t end_offset
)
10979 queue_chain_t chain
;
10981 vm_map_offset_t va
;
10984 queue_head_t pmapvaq
;
10987 queue_init(&pmapvaq
);
10989 pai
= pa_index(pa
);
10991 pvh
= pai_to_pvh(pai
);
10993 // collect pmap/va pair from pvh
10994 if (pvh_test_type(pvh
, PVH_TYPE_PTEP
)) {
10995 ptep
= pvh_ptep(pvh
);
10996 pmap
= ptep_get_pmap(ptep
);
10998 pmapva
= (pmap_va_t
*)kalloc(sizeof(pmap_va_t
));
10999 pmapva
->pmap
= pmap
;
11000 pmapva
->va
= ptep_get_va(ptep
);
11002 queue_enter_first(&pmapvaq
, pmapva
, pmap_va_t
*, chain
);
11003 } else if (pvh_test_type(pvh
, PVH_TYPE_PVEP
)) {
11006 pvep
= pvh_list(pvh
);
11008 ptep
= pve_get_ptep(pvep
);
11009 pmap
= ptep_get_pmap(ptep
);
11011 pmapva
= (pmap_va_t
*)kalloc(sizeof(pmap_va_t
));
11012 pmapva
->pmap
= pmap
;
11013 pmapva
->va
= ptep_get_va(ptep
);
11015 queue_enter_first(&pmapvaq
, pmapva
, pmap_va_t
*, chain
);
11017 pvep
= PVE_NEXT_PTR(pve_next(pvep
));
11023 // clone them while making sure mapping still exists
11024 queue_iterate(&pmapvaq
, pmapva
, pmap_va_t
*, chain
) {
11025 PMAP_LOCK(pmapva
->pmap
);
11026 ptep
= pmap_pte(pmapva
->pmap
, pmapva
->va
);
11027 if (pte_to_pa(*ptep
) == pa
) {
11028 if (pmap_pgtrace_enter_clone(pmapva
->pmap
, pmapva
->va
, start_offset
, end_offset
) == true) {
11032 PMAP_UNLOCK(pmapva
->pmap
);
11034 kfree(pmapva
, sizeof(pmap_va_t
));
11040 // allocate a page info
11041 static pmap_pgtrace_page_t
*
11042 pmap_pgtrace_alloc_page(void)
11044 pmap_pgtrace_page_t
*p
;
11045 queue_head_t
*mapq
;
11046 queue_head_t
*mappool
;
11047 queue_head_t
*mapwaste
;
11048 pmap_pgtrace_map_t
*map
;
11050 p
= kalloc(sizeof(pmap_pgtrace_page_t
));
11053 p
->state
= UNDEFINED
;
11056 mappool
= &(p
->map_pool
);
11057 mapwaste
= &(p
->map_waste
);
11059 queue_init(mappool
);
11060 queue_init(mapwaste
);
11062 for (int i
= 0; i
< PGTRACE_MAX_MAP
; i
++) {
11063 vm_map_offset_t newcva
;
11066 vm_map_entry_t entry
;
11069 vm_object_reference(kernel_object
);
11070 kr
= vm_map_find_space(kernel_map
, &newcva
, vm_map_round_page(3 * ARM_PGBYTES
, PAGE_MASK
), 0, 0, VM_MAP_KERNEL_FLAGS_NONE
, VM_KERN_MEMORY_DIAG
, &entry
);
11071 if (kr
!= KERN_SUCCESS
) {
11072 panic("%s VM couldn't find any space kr=%d\n", __func__
, kr
);
11074 VME_OBJECT_SET(entry
, kernel_object
);
11075 VME_OFFSET_SET(entry
, newcva
);
11076 vm_map_unlock(kernel_map
);
11078 // fill default clone page info and add to pool
11079 map
= kalloc(sizeof(pmap_pgtrace_map_t
));
11080 for (int j
= 0; j
< 3; j
++) {
11081 vm_map_offset_t addr
= newcva
+ j
* ARM_PGBYTES
;
11083 // pre-expand pmap while preemption enabled
11084 kr
= pmap_expand(kernel_pmap
, addr
, 0, PMAP_TT_MAX_LEVEL
);
11085 if (kr
!= KERN_SUCCESS
) {
11086 panic("%s: pmap_expand(kernel_pmap, addr=%llx) returns kr=%d\n", __func__
, addr
, kr
);
11089 cptep
= pmap_pte(kernel_pmap
, addr
);
11090 assert(cptep
!= NULL
);
11092 map
->cva
[j
] = addr
;
11093 map
->cva_spte
[j
] = *cptep
;
11095 map
->range
.start
= map
->range
.end
= 0;
11096 map
->cloned
= false;
11097 queue_enter_first(mappool
, map
, pmap_pgtrace_map_t
*, chain
);
11103 // free a page info
11105 pmap_pgtrace_free_page(pmap_pgtrace_page_t
*p
)
11107 queue_head_t
*mapq
;
11108 queue_head_t
*mappool
;
11109 queue_head_t
*mapwaste
;
11110 pmap_pgtrace_map_t
*map
;
11115 mappool
= &(p
->map_pool
);
11116 mapwaste
= &(p
->map_waste
);
11118 while (!queue_empty(mapq
)) {
11119 queue_remove_first(mapq
, map
, pmap_pgtrace_map_t
*, chain
);
11120 kfree(map
, sizeof(pmap_pgtrace_map_t
));
11123 while (!queue_empty(mappool
)) {
11124 queue_remove_first(mappool
, map
, pmap_pgtrace_map_t
*, chain
);
11125 kfree(map
, sizeof(pmap_pgtrace_map_t
));
11128 while (!queue_empty(mapwaste
)) {
11129 queue_remove_first(mapwaste
, map
, pmap_pgtrace_map_t
*, chain
);
11130 kfree(map
, sizeof(pmap_pgtrace_map_t
));
11133 kfree(p
, sizeof(pmap_pgtrace_page_t
));
11136 // construct page infos with the given address range
11138 pmap_pgtrace_add_page(pmap_t pmap
, vm_map_offset_t start
, vm_map_offset_t end
)
11142 queue_head_t
*q
= &(pmap_pgtrace
.pages
);
11144 vm_map_offset_t cur_page
, end_page
;
11147 kprintf("%s: invalid start=%llx > end=%llx\n", __func__
, start
, end
);
11153 // add each page in given range
11154 cur_page
= arm_trunc_page(start
);
11155 end_page
= arm_trunc_page(end
);
11156 while (cur_page
<= end_page
) {
11157 pmap_paddr_t pa_page
= 0;
11158 uint64_t num_cloned
= 0;
11159 pmap_pgtrace_page_t
*p
= NULL
, *newp
;
11160 bool free_newp
= true;
11161 pmap_pgtrace_page_state_t state
;
11163 // do all allocations outside of spinlocks
11164 newp
= pmap_pgtrace_alloc_page();
11166 // keep lock orders in pmap, kernel_pmap and pgtrace lock
11167 if (pmap
!= NULL
) {
11170 if (pmap
!= kernel_pmap
) {
11171 PMAP_LOCK(kernel_pmap
);
11174 // addresses are physical if pmap is null
11175 if (pmap
== NULL
) {
11177 pa_page
= cur_page
;
11178 state
= VA_UNDEFINED
;
11180 ptep
= pmap_pte(pmap
, cur_page
);
11181 if (ptep
!= NULL
) {
11182 pa_page
= pte_to_pa(*ptep
);
11185 state
= PA_UNDEFINED
;
11189 // search if we have a page info already
11190 PMAP_PGTRACE_LOCK(&ints
);
11191 if (state
!= PA_UNDEFINED
) {
11192 p
= pmap_pgtrace_find_page(pa_page
);
11195 // add pre-allocated page info if nothing found
11197 queue_enter_first(q
, newp
, pmap_pgtrace_page_t
*, chain
);
11202 // now p points what we want
11205 queue_head_t
*mapq
= &(p
->maps
);
11206 queue_head_t
*mappool
= &(p
->map_pool
);
11207 pmap_pgtrace_map_t
*map
;
11208 vm_map_offset_t start_offset
, end_offset
;
11210 // calculate trace offsets in the page
11211 if (cur_page
> start
) {
11214 start_offset
= start
- cur_page
;
11216 if (cur_page
== end_page
) {
11217 end_offset
= end
- end_page
;
11219 end_offset
= ARM_PGBYTES
- 1;
11222 kprintf("%s: pmap=%p cur_page=%llx ptep=%p state=%d start_offset=%llx end_offset=%llx\n", __func__
, pmap
, cur_page
, ptep
, state
, start_offset
, end_offset
);
11225 assert(!queue_empty(mappool
));
11226 queue_remove_first(mappool
, map
, pmap_pgtrace_map_t
*, chain
);
11227 if (p
->state
== PA_UNDEFINED
) {
11229 map
->ova
= cur_page
;
11230 map
->range
.start
= start_offset
;
11231 map
->range
.end
= end_offset
;
11232 } else if (p
->state
== VA_UNDEFINED
) {
11234 map
->range
.start
= start_offset
;
11235 map
->range
.end
= end_offset
;
11236 } else if (p
->state
== DEFINED
) {
11239 map
->ova
= cur_page
;
11240 map
->range
.start
= start_offset
;
11241 map
->range
.end
= end_offset
;
11243 panic("invalid p->state=%d\n", p
->state
);
11247 map
->cloned
= false;
11248 queue_enter(mapq
, map
, pmap_pgtrace_map_t
*, chain
);
11251 PMAP_PGTRACE_UNLOCK(&ints
);
11252 if (pmap
!= kernel_pmap
) {
11253 PMAP_UNLOCK(kernel_pmap
);
11255 if (pmap
!= NULL
) {
11260 if (pa_valid(pa_page
)) {
11261 num_cloned
= pmap_pgtrace_clone_from_pvtable(pa_page
, start_offset
, end_offset
);
11263 if (pmap
== NULL
) {
11264 num_cloned
+= pmap_pgtrace_clone_from_pa(kernel_pmap
, pa_page
, start_offset
, end_offset
);
11266 num_cloned
+= pmap_pgtrace_clone_from_pa(pmap
, pa_page
, start_offset
, end_offset
);
11269 // free pre-allocations if we didn't add it to the q
11271 pmap_pgtrace_free_page(newp
);
11274 if (num_cloned
== 0) {
11275 kprintf("%s: no mapping found for pa_page=%llx but will be added when a page entered\n", __func__
, pa_page
);
11281 if (cur_page
+ ARM_PGBYTES
< cur_page
) {
11284 cur_page
+= ARM_PGBYTES
;
11293 // delete page infos for given address range
11295 pmap_pgtrace_delete_page(pmap_t pmap
, vm_map_offset_t start
, vm_map_offset_t end
)
11299 queue_head_t
*q
= &(pmap_pgtrace
.pages
);
11300 pmap_pgtrace_page_t
*p
;
11301 vm_map_offset_t cur_page
, end_page
;
11303 kprintf("%s start=%llx end=%llx\n", __func__
, start
, end
);
11308 pmap_paddr_t pa_page
;
11310 // remove page info from start to end
11311 cur_page
= arm_trunc_page(start
);
11312 end_page
= arm_trunc_page(end
);
11313 while (cur_page
<= end_page
) {
11316 if (pmap
== NULL
) {
11317 pa_page
= cur_page
;
11320 ptep
= pmap_pte(pmap
, cur_page
);
11321 if (ptep
== NULL
) {
11325 pa_page
= pte_to_pa(*ptep
);
11329 // remove all clones and validate
11330 pmap_pgtrace_remove_all_clone(pa_page
);
11332 // find page info and delete
11333 PMAP_PGTRACE_LOCK(&ints
);
11334 p
= pmap_pgtrace_find_page(pa_page
);
11336 queue_remove(q
, p
, pmap_pgtrace_page_t
*, chain
);
11339 PMAP_PGTRACE_UNLOCK(&ints
);
11341 // free outside of locks
11343 pmap_pgtrace_free_page(p
);
11348 if (cur_page
+ ARM_PGBYTES
< cur_page
) {
11351 cur_page
+= ARM_PGBYTES
;
11361 pmap_pgtrace_fault(pmap_t pmap
, vm_map_offset_t va
, arm_saved_state_t
*ss
)
11364 pgtrace_run_result_t res
;
11365 pmap_pgtrace_page_t
*p
;
11366 bool ints
, found
= false;
11369 // Quick check if we are interested
11370 ptep
= pmap_pte(pmap
, va
);
11371 if (!ptep
|| !(*ptep
& ARM_PTE_PGTRACE
)) {
11372 return KERN_FAILURE
;
11375 PMAP_PGTRACE_LOCK(&ints
);
11377 // Check again since access is serialized
11378 ptep
= pmap_pte(pmap
, va
);
11379 if (!ptep
|| !(*ptep
& ARM_PTE_PGTRACE
)) {
11380 PMAP_PGTRACE_UNLOCK(&ints
);
11381 return KERN_FAILURE
;
11382 } else if ((*ptep
& ARM_PTE_TYPE_VALID
) == ARM_PTE_TYPE_VALID
) {
11383 // Somehow this cpu's tlb has not updated
11384 kprintf("%s Somehow this cpu's tlb has not updated?\n", __func__
);
11385 PMAP_UPDATE_TLBS(pmap
, va
, va
+ ARM_PGBYTES
);
11387 PMAP_PGTRACE_UNLOCK(&ints
);
11388 return KERN_SUCCESS
;
11391 // Find if this pa is what we are tracing
11392 pa
= pte_to_pa(*ptep
);
11394 p
= pmap_pgtrace_find_page(arm_trunc_page(pa
));
11396 panic("%s Can't find va=%llx pa=%llx from tracing pages\n", __func__
, va
, pa
);
11399 // find if pmap and va are also matching
11400 queue_head_t
*mapq
= &(p
->maps
);
11401 queue_head_t
*mapwaste
= &(p
->map_waste
);
11402 pmap_pgtrace_map_t
*map
;
11404 queue_iterate(mapq
, map
, pmap_pgtrace_map_t
*, chain
) {
11405 if (map
->pmap
== pmap
&& map
->ova
== arm_trunc_page(va
)) {
11411 // if not found, search map waste as they are still valid
11413 queue_iterate(mapwaste
, map
, pmap_pgtrace_map_t
*, chain
) {
11414 if (map
->pmap
== pmap
&& map
->ova
== arm_trunc_page(va
)) {
11422 panic("%s Can't find va=%llx pa=%llx from tracing pages\n", __func__
, va
, pa
);
11425 // Decode and run it on the clone map
11426 bzero(&res
, sizeof(res
));
11427 pgtrace_decode_and_run(*(uint32_t *)get_saved_state_pc(ss
), // instruction
11428 va
, map
->cva
, // fault va and clone page vas
11431 // write a log if in range
11432 vm_map_offset_t offset
= va
- map
->ova
;
11433 if (map
->range
.start
<= offset
&& offset
<= map
->range
.end
) {
11434 pgtrace_write_log(res
);
11437 PMAP_PGTRACE_UNLOCK(&ints
);
11439 // Return to next instruction
11440 set_saved_state_pc(ss
, get_saved_state_pc(ss
) + sizeof(uint32_t));
11442 return KERN_SUCCESS
;
11447 pmap_enforces_execute_only(
11448 #if (__ARM_VMSA__ == 7)
11453 #if (__ARM_VMSA__ > 7)
11454 return pmap
!= kernel_pmap
;
11460 MARK_AS_PMAP_TEXT
void
11461 pmap_set_jit_entitled_internal(
11462 __unused pmap_t pmap
)
11468 pmap_set_jit_entitled(
11471 pmap_set_jit_entitled_internal(pmap
);
11474 MARK_AS_PMAP_TEXT
static kern_return_t
11475 pmap_query_page_info_internal(
11477 vm_map_offset_t va
,
11484 pv_entry_t
**pv_h
, *pve_p
;
11486 if (pmap
== PMAP_NULL
|| pmap
== kernel_pmap
) {
11487 pmap_pin_kernel_pages((vm_offset_t
)disp_p
, sizeof(*disp_p
));
11489 pmap_unpin_kernel_pages((vm_offset_t
)disp_p
, sizeof(*disp_p
));
11490 return KERN_INVALID_ARGUMENT
;
11495 VALIDATE_PMAP(pmap
);
11498 pte
= pmap_pte(pmap
, va
);
11499 if (pte
== PT_ENTRY_NULL
) {
11503 pa
= pte_to_pa(*pte
);
11505 if (ARM_PTE_IS_COMPRESSED(*pte
)) {
11506 disp
|= PMAP_QUERY_PAGE_COMPRESSED
;
11507 if (*pte
& ARM_PTE_COMPRESSED_ALT
) {
11508 disp
|= PMAP_QUERY_PAGE_COMPRESSED_ALTACCT
;
11512 disp
|= PMAP_QUERY_PAGE_PRESENT
;
11513 pai
= (int) pa_index(pa
);
11514 if (!pa_valid(pa
)) {
11518 pv_h
= pai_to_pvh(pai
);
11519 pve_p
= PV_ENTRY_NULL
;
11520 if (pvh_test_type(pv_h
, PVH_TYPE_PVEP
)) {
11521 pve_p
= pvh_list(pv_h
);
11522 while (pve_p
!= PV_ENTRY_NULL
&&
11523 pve_get_ptep(pve_p
) != pte
) {
11524 pve_p
= PVE_NEXT_PTR(pve_next(pve_p
));
11527 if (IS_ALTACCT_PAGE(pai
, pve_p
)) {
11528 disp
|= PMAP_QUERY_PAGE_ALTACCT
;
11529 } else if (IS_REUSABLE_PAGE(pai
)) {
11530 disp
|= PMAP_QUERY_PAGE_REUSABLE
;
11531 } else if (IS_INTERNAL_PAGE(pai
)) {
11532 disp
|= PMAP_QUERY_PAGE_INTERNAL
;
11539 pmap_pin_kernel_pages((vm_offset_t
)disp_p
, sizeof(*disp_p
));
11541 pmap_unpin_kernel_pages((vm_offset_t
)disp_p
, sizeof(*disp_p
));
11542 return KERN_SUCCESS
;
11546 pmap_query_page_info(
11548 vm_map_offset_t va
,
11551 return pmap_query_page_info_internal(pmap
, va
, disp_p
);
11554 MARK_AS_PMAP_TEXT kern_return_t
11555 pmap_return_internal(__unused boolean_t do_panic
, __unused boolean_t do_recurse
)
11558 return KERN_SUCCESS
;
11562 pmap_return(boolean_t do_panic
, boolean_t do_recurse
)
11564 return pmap_return_internal(do_panic
, do_recurse
);
11569 MARK_AS_PMAP_TEXT
static void
11570 pmap_footprint_suspend_internal(
11574 #if DEVELOPMENT || DEBUG
11576 current_thread()->pmap_footprint_suspended
= TRUE
;
11577 map
->pmap
->footprint_was_suspended
= TRUE
;
11579 current_thread()->pmap_footprint_suspended
= FALSE
;
11581 #else /* DEVELOPMENT || DEBUG */
11584 #endif /* DEVELOPMENT || DEBUG */
11588 pmap_footprint_suspend(
11592 pmap_footprint_suspend_internal(map
, suspend
);
11595 #if defined(__arm64__) && (DEVELOPMENT || DEBUG)
11597 struct page_table_level_info
{
11601 uint64_t index_mask
;
11602 uint64_t valid_mask
;
11603 uint64_t type_mask
;
11604 uint64_t type_block
;
11607 struct page_table_dump_header
{
11609 uint64_t num_entries
;
11614 struct page_table_level_info page_table_levels
[] =
11615 { { ARM_TT_L0_SIZE
, ARM_TT_L0_OFFMASK
, ARM_TT_L0_SHIFT
, ARM_TT_L0_INDEX_MASK
, ARM_TTE_VALID
, ARM_TTE_TYPE_MASK
, ARM_TTE_TYPE_BLOCK
},
11616 { ARM_TT_L1_SIZE
, ARM_TT_L1_OFFMASK
, ARM_TT_L1_SHIFT
, ARM_TT_L1_INDEX_MASK
, ARM_TTE_VALID
, ARM_TTE_TYPE_MASK
, ARM_TTE_TYPE_BLOCK
},
11617 { ARM_TT_L2_SIZE
, ARM_TT_L2_OFFMASK
, ARM_TT_L2_SHIFT
, ARM_TT_L2_INDEX_MASK
, ARM_TTE_VALID
, ARM_TTE_TYPE_MASK
, ARM_TTE_TYPE_BLOCK
},
11618 { ARM_TT_L3_SIZE
, ARM_TT_L3_OFFMASK
, ARM_TT_L3_SHIFT
, ARM_TT_L3_INDEX_MASK
, ARM_PTE_TYPE_VALID
, ARM_PTE_TYPE_MASK
, ARM_TTE_TYPE_L3BLOCK
} };
11621 pmap_dump_page_tables_recurse(const tt_entry_t
*ttp
,
11622 unsigned int cur_level
,
11627 size_t bytes_used
= 0;
11628 uint64_t num_entries
= ARM_PGBYTES
/ sizeof(*ttp
);
11629 uint64_t size
= page_table_levels
[cur_level
].size
;
11630 uint64_t valid_mask
= page_table_levels
[cur_level
].valid_mask
;
11631 uint64_t type_mask
= page_table_levels
[cur_level
].type_mask
;
11632 uint64_t type_block
= page_table_levels
[cur_level
].type_block
;
11634 if (cur_level
== arm64_root_pgtable_level
) {
11635 num_entries
= arm64_root_pgtable_num_ttes
;
11638 uint64_t tt_size
= num_entries
* sizeof(tt_entry_t
);
11639 const tt_entry_t
*tt_end
= &ttp
[num_entries
];
11641 if (((vm_offset_t
)buf_end
- (vm_offset_t
)bufp
) < (tt_size
+ sizeof(struct page_table_dump_header
))) {
11645 struct page_table_dump_header
*header
= (struct page_table_dump_header
*)bufp
;
11646 header
->pa
= ml_static_vtop((vm_offset_t
)ttp
);
11647 header
->num_entries
= num_entries
;
11648 header
->start_va
= start_va
;
11649 header
->end_va
= start_va
+ (num_entries
* size
);
11651 bcopy(ttp
, (uint8_t*)bufp
+ sizeof(*header
), tt_size
);
11652 bytes_used
+= (sizeof(*header
) + tt_size
);
11653 uint64_t current_va
= start_va
;
11655 for (const tt_entry_t
*ttep
= ttp
; ttep
< tt_end
; ttep
++, current_va
+= size
) {
11656 tt_entry_t tte
= *ttep
;
11658 if (!(tte
& valid_mask
)) {
11662 if ((tte
& type_mask
) == type_block
) {
11665 if (cur_level
>= PMAP_TT_MAX_LEVEL
) {
11666 panic("%s: corrupt entry %#llx at %p, "
11667 "ttp=%p, cur_level=%u, bufp=%p, buf_end=%p",
11668 __FUNCTION__
, tte
, ttep
,
11669 ttp
, cur_level
, bufp
, buf_end
);
11672 const tt_entry_t
*next_tt
= (const tt_entry_t
*)phystokv(tte
& ARM_TTE_TABLE_MASK
);
11674 size_t recurse_result
= pmap_dump_page_tables_recurse(next_tt
, cur_level
+ 1, current_va
, (uint8_t*)bufp
+ bytes_used
, buf_end
);
11676 if (recurse_result
== 0) {
11680 bytes_used
+= recurse_result
;
11688 pmap_dump_page_tables(pmap_t pmap
, void *bufp
, void *buf_end
)
11691 panic("pmap_dump_page_tables must only be called from kernel debugger context");
11693 return pmap_dump_page_tables_recurse(pmap
->tte
, arm64_root_pgtable_level
, pmap
->min
, bufp
, buf_end
);
11696 #else /* defined(__arm64__) && (DEVELOPMENT || DEBUG) */
11699 pmap_dump_page_tables(pmap_t pmap __unused
, void *bufp __unused
, void *buf_end __unused
)
11704 #endif /* !defined(__arm64__) */