2 * Copyright (c) 2011-2016 Apple Inc. All rights reserved.
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
29 #include <mach_assert.h>
30 #include <mach_ldebug.h>
32 #include <mach/shared_region.h>
33 #include <mach/vm_param.h>
34 #include <mach/vm_prot.h>
35 #include <mach/vm_map.h>
36 #include <mach/machine/vm_param.h>
37 #include <mach/machine/vm_types.h>
39 #include <mach/boolean.h>
40 #include <kern/thread.h>
41 #include <kern/sched.h>
42 #include <kern/zalloc.h>
43 #include <kern/kalloc.h>
44 #include <kern/ledger.h>
45 #include <kern/misc_protos.h>
50 #include <vm/vm_map.h>
51 #include <vm/vm_kern.h>
52 #include <vm/vm_protos.h>
53 #include <vm/vm_object.h>
54 #include <vm/vm_page.h>
55 #include <vm/vm_pageout.h>
58 #include <libkern/section_keywords.h>
60 #include <machine/atomic.h>
61 #include <machine/thread.h>
62 #include <machine/lowglobals.h>
64 #include <arm/caches_internal.h>
65 #include <arm/cpu_data.h>
66 #include <arm/cpu_data_internal.h>
67 #include <arm/cpu_capabilities.h>
68 #include <arm/cpu_number.h>
69 #include <arm/machine_cpu.h>
70 #include <arm/misc_protos.h>
73 #include <libkern/section_keywords.h>
75 #if (__ARM_VMSA__ > 7)
76 #include <arm64/proc_reg.h>
77 #include <pexpert/arm64/boot.h>
80 #include <arm64/pgtrace.h>
81 #if CONFIG_PGTRACE_NONKEXT
82 #include <arm64/pgtrace_decoder.h>
83 #endif // CONFIG_PGTRACE_NONKEXT
87 #include <pexpert/device_tree.h>
89 #include <san/kasan.h>
92 int pmap_stats_assert
= 1;
93 #define PMAP_STATS_ASSERTF(cond, pmap, fmt, ...) \
95 if (pmap_stats_assert && (pmap)->pmap_stats_assert) \
96 assertf(cond, fmt, ##__VA_ARGS__); \
98 #else /* MACH_ASSERT */
99 #define PMAP_STATS_ASSERTF(cond, pmap, fmt, ...)
100 #endif /* MACH_ASSERT */
102 #if DEVELOPMENT || DEBUG
103 #define PMAP_FOOTPRINT_SUSPENDED(pmap) ((pmap)->footprint_suspended)
104 #else /* DEVELOPMENT || DEBUG */
105 #define PMAP_FOOTPRINT_SUSPENDED(pmap) (FALSE)
106 #endif /* DEVELOPMENT || DEBUG */
110 #if DEVELOPMENT || DEBUG
111 int panic_on_unsigned_execute
= 0;
112 #endif /* DEVELOPMENT || DEBUG */
115 /* Virtual memory region for early allocation */
116 #if (__ARM_VMSA__ == 7)
117 #define VREGION1_START (VM_HIGH_KERNEL_WINDOW & ~ARM_TT_L1_PT_OFFMASK)
119 #define VREGION1_HIGH_WINDOW (PE_EARLY_BOOT_VA)
120 #define VREGION1_START ((VM_MAX_KERNEL_ADDRESS & CPUWINDOWS_BASE_MASK) - VREGION1_HIGH_WINDOW)
122 #define VREGION1_SIZE (trunc_page(VM_MAX_KERNEL_ADDRESS - (VREGION1_START)))
124 extern unsigned int not_in_kdp
;
126 extern vm_offset_t first_avail
;
128 extern pmap_paddr_t avail_start
;
129 extern pmap_paddr_t avail_end
;
131 extern vm_offset_t virtual_space_start
; /* Next available kernel VA */
132 extern vm_offset_t virtual_space_end
; /* End of kernel address space */
134 extern int hard_maxproc
;
136 #if (__ARM_VMSA__ > 7)
137 /* The number of address bits one TTBR can cover. */
138 #define PGTABLE_ADDR_BITS (64ULL - T0SZ_BOOT)
141 * The bounds on our TTBRs. These are for sanity checking that
142 * an address is accessible by a TTBR before we attempt to map it.
144 #define ARM64_TTBR0_MIN_ADDR (0ULL)
145 #define ARM64_TTBR0_MAX_ADDR (0ULL + (1ULL << PGTABLE_ADDR_BITS) - 1)
146 #define ARM64_TTBR1_MIN_ADDR (0ULL - (1ULL << PGTABLE_ADDR_BITS))
147 #define ARM64_TTBR1_MAX_ADDR (~0ULL)
149 /* The level of the root of a page table. */
150 const uint64_t arm64_root_pgtable_level
= (3 - ((PGTABLE_ADDR_BITS
- 1 - ARM_PGSHIFT
) / (ARM_PGSHIFT
- TTE_SHIFT
)));
152 /* The number of entries in the root TT of a page table. */
153 const uint64_t arm64_root_pgtable_num_ttes
= (2 << ((PGTABLE_ADDR_BITS
- 1 - ARM_PGSHIFT
) % (ARM_PGSHIFT
- TTE_SHIFT
)));
155 const uint64_t arm64_root_pgtable_level
= 0;
156 const uint64_t arm64_root_pgtable_num_ttes
= 0;
159 struct pmap kernel_pmap_store MARK_AS_PMAP_DATA
;
160 SECURITY_READ_ONLY_LATE(pmap_t
) kernel_pmap
= &kernel_pmap_store
;
162 struct vm_object pmap_object_store
__attribute__((aligned(VM_PACKED_POINTER_ALIGNMENT
))); /* store pt pages */
163 vm_object_t pmap_object
= &pmap_object_store
;
165 static struct zone
*pmap_zone
; /* zone of pmap structures */
167 decl_simple_lock_data(, pmaps_lock MARK_AS_PMAP_DATA
)
168 unsigned int pmap_stamp MARK_AS_PMAP_DATA
;
169 queue_head_t map_pmap_list MARK_AS_PMAP_DATA
;
171 queue_head_t tt_pmap_list MARK_AS_PMAP_DATA
;
172 unsigned int tt_pmap_count MARK_AS_PMAP_DATA
;
173 unsigned int tt_pmap_max MARK_AS_PMAP_DATA
;
175 decl_simple_lock_data(, pt_pages_lock MARK_AS_PMAP_DATA
)
176 queue_head_t pt_page_list MARK_AS_PMAP_DATA
; /* pt page ptd entries list */
178 decl_simple_lock_data(, pmap_pages_lock MARK_AS_PMAP_DATA
)
180 typedef struct page_free_entry
{
181 struct page_free_entry
*next
;
184 #define PAGE_FREE_ENTRY_NULL ((page_free_entry_t *) 0)
186 page_free_entry_t
*pmap_pages_reclaim_list MARK_AS_PMAP_DATA
; /* Reclaimed pt page list */
187 unsigned int pmap_pages_request_count MARK_AS_PMAP_DATA
; /* Pending requests to reclaim pt page */
188 unsigned long long pmap_pages_request_acum MARK_AS_PMAP_DATA
;
191 typedef struct tt_free_entry
{
192 struct tt_free_entry
*next
;
195 #define TT_FREE_ENTRY_NULL ((tt_free_entry_t *) 0)
197 tt_free_entry_t
*free_page_size_tt_list MARK_AS_PMAP_DATA
;
198 unsigned int free_page_size_tt_count MARK_AS_PMAP_DATA
;
199 unsigned int free_page_size_tt_max MARK_AS_PMAP_DATA
;
200 #define FREE_PAGE_SIZE_TT_MAX 4
201 tt_free_entry_t
*free_two_page_size_tt_list MARK_AS_PMAP_DATA
;
202 unsigned int free_two_page_size_tt_count MARK_AS_PMAP_DATA
;
203 unsigned int free_two_page_size_tt_max MARK_AS_PMAP_DATA
;
204 #define FREE_TWO_PAGE_SIZE_TT_MAX 4
205 tt_free_entry_t
*free_tt_list MARK_AS_PMAP_DATA
;
206 unsigned int free_tt_count MARK_AS_PMAP_DATA
;
207 unsigned int free_tt_max MARK_AS_PMAP_DATA
;
209 #define TT_FREE_ENTRY_NULL ((tt_free_entry_t *) 0)
211 boolean_t pmap_gc_allowed MARK_AS_PMAP_DATA
= TRUE
;
212 boolean_t pmap_gc_forced MARK_AS_PMAP_DATA
= FALSE
;
213 boolean_t pmap_gc_allowed_by_time_throttle
= TRUE
;
215 unsigned int inuse_user_ttepages_count MARK_AS_PMAP_DATA
= 0; /* non-root, non-leaf user pagetable pages, in units of PAGE_SIZE */
216 unsigned int inuse_user_ptepages_count MARK_AS_PMAP_DATA
= 0; /* leaf user pagetable pages, in units of PAGE_SIZE */
217 unsigned int inuse_user_tteroot_count MARK_AS_PMAP_DATA
= 0; /* root user pagetables, in units of PMAP_ROOT_ALLOC_SIZE */
218 unsigned int inuse_kernel_ttepages_count MARK_AS_PMAP_DATA
= 0; /* non-root, non-leaf kernel pagetable pages, in units of PAGE_SIZE */
219 unsigned int inuse_kernel_ptepages_count MARK_AS_PMAP_DATA
= 0; /* leaf kernel pagetable pages, in units of PAGE_SIZE */
220 unsigned int inuse_kernel_tteroot_count MARK_AS_PMAP_DATA
= 0; /* root kernel pagetables, in units of PMAP_ROOT_ALLOC_SIZE */
221 unsigned int inuse_pmap_pages_count
= 0; /* debugging */
223 SECURITY_READ_ONLY_LATE(tt_entry_t
*) invalid_tte
= 0;
224 SECURITY_READ_ONLY_LATE(pmap_paddr_t
) invalid_ttep
= 0;
226 SECURITY_READ_ONLY_LATE(tt_entry_t
*) cpu_tte
= 0; /* set by arm_vm_init() - keep out of bss */
227 SECURITY_READ_ONLY_LATE(pmap_paddr_t
) cpu_ttep
= 0; /* set by arm_vm_init() - phys tte addr */
229 #if DEVELOPMENT || DEBUG
230 int nx_enabled
= 1; /* enable no-execute protection */
231 int allow_data_exec
= 0; /* No apps may execute data */
232 int allow_stack_exec
= 0; /* No apps may execute from the stack */
233 #else /* DEVELOPMENT || DEBUG */
234 const int nx_enabled
= 1; /* enable no-execute protection */
235 const int allow_data_exec
= 0; /* No apps may execute data */
236 const int allow_stack_exec
= 0; /* No apps may execute from the stack */
237 #endif /* DEVELOPMENT || DEBUG */
240 * pv_entry_t - structure to track the active mappings for a given page
242 typedef struct pv_entry
{
243 struct pv_entry
*pve_next
; /* next alias */
244 pt_entry_t
*pve_ptep
; /* page table entry */
245 #if __arm__ && (__BIGGEST_ALIGNMENT__ > 4)
246 /* For the newer ARMv7k ABI where 64-bit types are 64-bit aligned, but pointers
248 * Since pt_desc is 64-bit aligned and we cast often from pv_entry to
251 } __attribute__ ((aligned(8))) pv_entry_t
;
256 #define PV_ENTRY_NULL ((pv_entry_t *) 0)
260 * We use the least significant bit of the "pve_next" pointer in a "pv_entry"
261 * as a marker for pages mapped through an "alternate accounting" mapping.
262 * These macros set, clear and test for this marker and extract the actual
263 * value of the "pve_next" pointer.
265 #define PVE_NEXT_ALTACCT ((uintptr_t) 0x1)
266 #define PVE_NEXT_SET_ALTACCT(pve_next_p) \
267 *(pve_next_p) = (struct pv_entry *) (((uintptr_t) *(pve_next_p)) | \
269 #define PVE_NEXT_CLR_ALTACCT(pve_next_p) \
270 *(pve_next_p) = (struct pv_entry *) (((uintptr_t) *(pve_next_p)) & \
272 #define PVE_NEXT_IS_ALTACCT(pve_next) \
273 ((((uintptr_t) (pve_next)) & PVE_NEXT_ALTACCT) ? TRUE : FALSE)
274 #define PVE_NEXT_PTR(pve_next) \
275 ((struct pv_entry *)(((uintptr_t) (pve_next)) & \
278 static void pmap_check_ledgers(pmap_t pmap
);
280 static inline void pmap_check_ledgers(__unused pmap_t pmap
) {}
281 #endif /* MACH_ASSERT */
283 SECURITY_READ_ONLY_LATE(pv_entry_t
**) pv_head_table
; /* array of pv entry pointers */
285 pv_entry_t
*pv_free_list MARK_AS_PMAP_DATA
;
286 pv_entry_t
*pv_kern_free_list MARK_AS_PMAP_DATA
;
287 decl_simple_lock_data(,pv_free_list_lock MARK_AS_PMAP_DATA
)
288 decl_simple_lock_data(,pv_kern_free_list_lock MARK_AS_PMAP_DATA
)
290 decl_simple_lock_data(,phys_backup_lock
)
293 * pt_desc - structure to keep info on page assigned to page tables
295 #if (__ARM_VMSA__ == 7)
296 #define PT_INDEX_MAX 1
298 #if (ARM_PGSHIFT == 14)
299 #define PT_INDEX_MAX 1
301 #define PT_INDEX_MAX 4
305 #define PT_DESC_REFCOUNT 0x4000U
307 typedef struct pt_desc
{
308 queue_chain_t pt_page
;
310 unsigned short refcnt
;
311 unsigned short wiredcnt
;
312 } pt_cnt
[PT_INDEX_MAX
];
316 } pt_map
[PT_INDEX_MAX
];
320 #define PTD_ENTRY_NULL ((pt_desc_t *) 0)
322 SECURITY_READ_ONLY_LATE(pt_desc_t
*) ptd_root_table
;
324 pt_desc_t
*ptd_free_list MARK_AS_PMAP_DATA
= PTD_ENTRY_NULL
;
325 SECURITY_READ_ONLY_LATE(boolean_t
) ptd_preboot
= TRUE
;
326 unsigned int ptd_free_count MARK_AS_PMAP_DATA
= 0;
327 decl_simple_lock_data(,ptd_free_list_lock MARK_AS_PMAP_DATA
)
330 * physical page attribute
332 typedef u_int16_t pp_attr_t
;
334 #define PP_ATTR_WIMG_MASK 0x003F
335 #define PP_ATTR_WIMG(x) ((x) & PP_ATTR_WIMG_MASK)
337 #define PP_ATTR_REFERENCED 0x0040
338 #define PP_ATTR_MODIFIED 0x0080
340 #define PP_ATTR_INTERNAL 0x0100
341 #define PP_ATTR_REUSABLE 0x0200
342 #define PP_ATTR_ALTACCT 0x0400
343 #define PP_ATTR_NOENCRYPT 0x0800
345 #define PP_ATTR_REFFAULT 0x1000
346 #define PP_ATTR_MODFAULT 0x2000
349 SECURITY_READ_ONLY_LATE(pp_attr_t
*) pp_attr_table
;
352 typedef uint8_t io_attr_t
;
354 #define IO_ATTR_WIMG_MASK 0x3F
355 #define IO_ATTR_WIMG(x) ((x) & IO_ATTR_WIMG_MASK)
357 SECURITY_READ_ONLY_LATE(io_attr_t
*) io_attr_table
;
359 SECURITY_READ_ONLY_LATE(pmap_paddr_t
) vm_first_phys
= (pmap_paddr_t
) 0;
360 SECURITY_READ_ONLY_LATE(pmap_paddr_t
) vm_last_phys
= (pmap_paddr_t
) 0;
362 SECURITY_READ_ONLY_LATE(pmap_paddr_t
) io_rgn_start
= 0;
363 SECURITY_READ_ONLY_LATE(pmap_paddr_t
) io_rgn_end
= 0;
364 SECURITY_READ_ONLY_LATE(uint32_t) io_rgn_granule
= 0;
366 SECURITY_READ_ONLY_LATE(boolean_t
) pmap_initialized
= FALSE
; /* Has pmap_init completed? */
368 SECURITY_READ_ONLY_LATE(uint64_t) pmap_nesting_size_min
;
369 SECURITY_READ_ONLY_LATE(uint64_t) pmap_nesting_size_max
;
371 SECURITY_READ_ONLY_LATE(vm_map_offset_t
) arm_pmap_max_offset_default
= 0x0;
372 #if defined(__arm64__)
373 SECURITY_READ_ONLY_LATE(vm_map_offset_t
) arm64_pmap_max_offset_default
= 0x0;
376 /* free address spaces (1 means free) */
377 static uint32_t asid_bitmap
[MAX_ASID
/ (sizeof(uint32_t) * NBBY
)] MARK_AS_PMAP_DATA
;
379 #if (__ARM_VMSA__ > 7)
380 SECURITY_READ_ONLY_LATE(pmap_t
) sharedpage_pmap
;
384 #define pa_index(pa) \
385 (atop((pa) - vm_first_phys))
387 #define pai_to_pvh(pai) \
388 (&pv_head_table[pai])
390 #define pa_valid(x) \
391 ((x) >= vm_first_phys && (x) < vm_last_phys)
393 /* PTE Define Macros */
395 #define pte_is_wired(pte) \
396 (((pte) & ARM_PTE_WIRED_MASK) == ARM_PTE_WIRED)
398 #define pte_set_wired(ptep, wired) \
400 SInt16 *ptd_wiredcnt_ptr; \
401 ptd_wiredcnt_ptr = (SInt16 *)&(ptep_get_ptd(ptep)->pt_cnt[ARM_PT_DESC_INDEX(ptep)].wiredcnt); \
403 *ptep |= ARM_PTE_WIRED; \
404 OSAddAtomic16(1, ptd_wiredcnt_ptr); \
406 *ptep &= ~ARM_PTE_WIRED; \
407 OSAddAtomic16(-1, ptd_wiredcnt_ptr); \
411 #define pte_is_ffr(pte) \
412 (((pte) & ARM_PTE_WRITEABLE) == ARM_PTE_WRITEABLE)
414 #define pte_set_ffr(pte, ffr) \
417 pte |= ARM_PTE_WRITEABLE; \
419 pte &= ~ARM_PTE_WRITEABLE; \
423 /* PVE Define Macros */
425 #define pve_next(pve) \
428 #define pve_link_field(pve) \
431 #define pve_link(pp, e) \
432 ((pve_next(e) = pve_next(pp)), (pve_next(pp) = (e)))
434 #define pve_unlink(pp, e) \
435 (pve_next(pp) = pve_next(e))
437 /* bits held in the ptep pointer field */
439 #define pve_get_ptep(pve) \
442 #define pve_set_ptep(pve, ptep_new) \
444 (pve)->pve_ptep = (ptep_new); \
447 /* PTEP Define Macros */
449 #if (__ARM_VMSA__ == 7)
451 #define ARM_PT_DESC_INDEX_MASK 0x00000
452 #define ARM_PT_DESC_INDEX_SHIFT 0
455 * mask for page descriptor index: 4MB per page table
457 #define ARM_TT_PT_INDEX_MASK 0xfffU /* mask for page descriptor index: 4MB per page table */
460 * Shift value used for reconstructing the virtual address for a PTE.
462 #define ARM_TT_PT_ADDR_SHIFT (10U)
464 #define ARM_PT_DESC_INDEX(ptep) \
465 (((unsigned)(ptep) & ARM_PT_DESC_INDEX_MASK) >> ARM_PT_DESC_INDEX_SHIFT)
467 #define ptep_get_ptd(ptep) \
468 ((struct pt_desc *)((*((vm_offset_t *)(pai_to_pvh(pa_index((vm_offset_t)(ptep) - gVirtBase + gPhysBase))))) & PVH_LIST_MASK))
470 #define ptep_get_va(ptep) \
471 ((((pt_desc_t *) (pvh_list(pai_to_pvh(pa_index((((vm_offset_t)(ptep) & ~0xFFF) - gVirtBase + gPhysBase))))))->pt_map[ARM_PT_DESC_INDEX(ptep)].va)+ ((((unsigned)(ptep)) & ARM_TT_PT_INDEX_MASK)<<ARM_TT_PT_ADDR_SHIFT))
473 #define ptep_get_pmap(ptep) \
474 ((((pt_desc_t *) (pvh_list(pai_to_pvh(pa_index((((vm_offset_t)(ptep) & ~0xFFF) - gVirtBase + gPhysBase))))))->pmap))
479 #if (ARM_PGSHIFT == 12)
480 #define ARM_PT_DESC_INDEX_MASK ((PAGE_SHIFT_CONST == ARM_PGSHIFT )? 0x00000ULL : 0x03000ULL)
481 #define ARM_PT_DESC_INDEX_SHIFT ((PAGE_SHIFT_CONST == ARM_PGSHIFT )? 0 : 12)
483 * mask for page descriptor index: 2MB per page table
485 #define ARM_TT_PT_INDEX_MASK (0x0fffULL)
487 * Shift value used for reconstructing the virtual address for a PTE.
489 #define ARM_TT_PT_ADDR_SHIFT (9ULL)
491 /* TODO: Give this a better name/documentation than "other" */
492 #define ARM_TT_PT_OTHER_MASK (0x0fffULL)
496 #define ARM_PT_DESC_INDEX_MASK (0x00000)
497 #define ARM_PT_DESC_INDEX_SHIFT (0)
499 * mask for page descriptor index: 32MB per page table
501 #define ARM_TT_PT_INDEX_MASK (0x3fffULL)
503 * Shift value used for reconstructing the virtual address for a PTE.
505 #define ARM_TT_PT_ADDR_SHIFT (11ULL)
507 /* TODO: Give this a better name/documentation than "other" */
508 #define ARM_TT_PT_OTHER_MASK (0x3fffULL)
511 #define ARM_PT_DESC_INDEX(ptep) \
512 (((unsigned)(ptep) & ARM_PT_DESC_INDEX_MASK) >> ARM_PT_DESC_INDEX_SHIFT)
515 #define ptep_get_ptd(ptep) \
516 ((struct pt_desc *)((*((vm_offset_t *)(pai_to_pvh(pa_index((vm_offset_t)(ptep) - gVirtBase + gPhysBase))))) & PVH_LIST_MASK))
518 #define ptep_get_va(ptep) \
519 ((((pt_desc_t *) (pvh_list(pai_to_pvh(pa_index((((vm_offset_t)(ptep) & ~ARM_TT_PT_OTHER_MASK) - gVirtBase + gPhysBase))))))->pt_map[ARM_PT_DESC_INDEX(ptep)].va)+ ((((unsigned)(ptep)) & ARM_TT_PT_INDEX_MASK)<<ARM_TT_PT_ADDR_SHIFT))
521 #define ptep_get_pmap(ptep) \
522 ((((pt_desc_t *) (pvh_list(pai_to_pvh(pa_index((((vm_offset_t)(ptep) & ~ARM_TT_PT_OTHER_MASK) - gVirtBase + gPhysBase))))))->pmap))
527 /* PVH Define Macros */
530 #define PVH_TYPE_NULL 0x0UL
531 #define PVH_TYPE_PVEP 0x1UL
532 #define PVH_TYPE_PTEP 0x2UL
533 #define PVH_TYPE_PTDP 0x3UL
535 #define PVH_TYPE_MASK (0x3UL)
536 #define PVH_LIST_MASK (~PVH_TYPE_MASK)
538 #if (__ARM_VMSA__ == 7)
539 #define pvh_set_bits(h, b) \
541 while (!OSCompareAndSwap(*(vm_offset_t *)(h), *(vm_offset_t *)(h) | (b), (vm_offset_t *)(h))); \
544 #define pvh_clear_bits(h, b) \
546 while (!OSCompareAndSwap(*(vm_offset_t *)(h), *(vm_offset_t *)(h) & ~(b), (vm_offset_t *)(h))); \
549 #define pvh_set_bits(h, b) \
551 while (!OSCompareAndSwap64(*(vm_offset_t *)(h), *(vm_offset_t *)(h) | ((int64_t)b), (vm_offset_t *)(h))); \
554 #define pvh_clear_bits(h, b) \
556 while (!OSCompareAndSwap64(*(vm_offset_t *)(h), *(vm_offset_t *)(h) & ~((int64_t)b), (vm_offset_t *)(h))); \
560 #define pvh_test_type(h, b) \
561 ((*(vm_offset_t *)(h) & (PVH_TYPE_MASK)) == (b))
563 #define pvh_ptep(h) \
564 ((pt_entry_t *)(*(vm_offset_t *)(h) & PVH_LIST_MASK))
566 #define pvh_list(h) \
567 ((pv_entry_t *)(*(vm_offset_t *)(h) & PVH_LIST_MASK))
569 #define pvh_bits(h) \
570 (*(vm_offset_t *)(h) & PVH_TYPE_MASK)
572 #if (__ARM_VMSA__ == 7)
573 #define pvh_update_head(h, e, t) \
575 while (!OSCompareAndSwap(*(vm_offset_t *)(h), (vm_offset_t)(e) | (t), (vm_offset_t *)(h))); \
578 #define pvh_update_head(h, e, t) \
580 while (!OSCompareAndSwap64(*(vm_offset_t *)(h), (vm_offset_t)(e) | (t), (vm_offset_t *)(h))); \
584 #define pvh_add(h, e) \
586 assert(!pvh_test_type((h), PVH_TYPE_PTEP)); \
587 pve_next(e) = pvh_list(h); \
588 pvh_update_head((h), (e), PVH_TYPE_PVEP); \
591 #define pvh_remove(h, p, e) \
593 assert(!PVE_NEXT_IS_ALTACCT(pve_next((e)))); \
595 if (PVE_NEXT_PTR(pve_next((e))) == PV_ENTRY_NULL) { \
596 pvh_update_head((h), PV_ENTRY_NULL, PVH_TYPE_NULL); \
598 pvh_update_head((h), PVE_NEXT_PTR(pve_next((e))), PVH_TYPE_PVEP); \
603 * preserve the "alternate accounting" bit \
604 * when updating "p" (the previous entry's \
607 boolean_t __is_altacct; \
608 __is_altacct = PVE_NEXT_IS_ALTACCT(*(p)); \
609 *(p) = PVE_NEXT_PTR(pve_next((e))); \
610 if (__is_altacct) { \
611 PVE_NEXT_SET_ALTACCT((p)); \
613 PVE_NEXT_CLR_ALTACCT((p)); \
619 /* PPATTR Define Macros */
621 #define ppattr_set_bits(h, b) \
623 while (!OSCompareAndSwap16(*(pp_attr_t *)(h), *(pp_attr_t *)(h) | (b), (pp_attr_t *)(h))); \
626 #define ppattr_clear_bits(h, b) \
628 while (!OSCompareAndSwap16(*(pp_attr_t *)(h), *(pp_attr_t *)(h) & ~(b), (pp_attr_t *)(h))); \
631 #define ppattr_test_bits(h, b) \
632 ((*(pp_attr_t *)(h) & (b)) == (b))
634 #define pa_set_bits(x, b) \
637 ppattr_set_bits(&pp_attr_table[pa_index(x)], \
641 #define pa_test_bits(x, b) \
642 (pa_valid(x) ? ppattr_test_bits(&pp_attr_table[pa_index(x)],\
645 #define pa_clear_bits(x, b) \
648 ppattr_clear_bits(&pp_attr_table[pa_index(x)], \
652 #define pa_set_modify(x) \
653 pa_set_bits(x, PP_ATTR_MODIFIED)
655 #define pa_clear_modify(x) \
656 pa_clear_bits(x, PP_ATTR_MODIFIED)
658 #define pa_set_reference(x) \
659 pa_set_bits(x, PP_ATTR_REFERENCED)
661 #define pa_clear_reference(x) \
662 pa_clear_bits(x, PP_ATTR_REFERENCED)
665 #define IS_INTERNAL_PAGE(pai) \
666 ppattr_test_bits(&pp_attr_table[pai], PP_ATTR_INTERNAL)
667 #define SET_INTERNAL_PAGE(pai) \
668 ppattr_set_bits(&pp_attr_table[pai], PP_ATTR_INTERNAL)
669 #define CLR_INTERNAL_PAGE(pai) \
670 ppattr_clear_bits(&pp_attr_table[pai], PP_ATTR_INTERNAL)
672 #define IS_REUSABLE_PAGE(pai) \
673 ppattr_test_bits(&pp_attr_table[pai], PP_ATTR_REUSABLE)
674 #define SET_REUSABLE_PAGE(pai) \
675 ppattr_set_bits(&pp_attr_table[pai], PP_ATTR_REUSABLE)
676 #define CLR_REUSABLE_PAGE(pai) \
677 ppattr_clear_bits(&pp_attr_table[pai], PP_ATTR_REUSABLE)
679 #define IS_ALTACCT_PAGE(pai, pve_p) \
681 ? ppattr_test_bits(&pp_attr_table[pai], PP_ATTR_ALTACCT) \
682 : PVE_NEXT_IS_ALTACCT(pve_next((pve_p))))
683 #define SET_ALTACCT_PAGE(pai, pve_p) \
684 if ((pve_p) == NULL) { \
685 ppattr_set_bits(&pp_attr_table[pai], PP_ATTR_ALTACCT); \
687 PVE_NEXT_SET_ALTACCT(&pve_next((pve_p))); \
689 #define CLR_ALTACCT_PAGE(pai, pve_p) \
690 if ((pve_p) == NULL) { \
691 ppattr_clear_bits(&pp_attr_table[pai], PP_ATTR_ALTACCT);\
693 PVE_NEXT_CLR_ALTACCT(&pve_next((pve_p))); \
696 #define IS_REFFAULT_PAGE(pai) \
697 ppattr_test_bits(&pp_attr_table[pai], PP_ATTR_REFFAULT)
698 #define SET_REFFAULT_PAGE(pai) \
699 ppattr_set_bits(&pp_attr_table[pai], PP_ATTR_REFFAULT)
700 #define CLR_REFFAULT_PAGE(pai) \
701 ppattr_clear_bits(&pp_attr_table[pai], PP_ATTR_REFFAULT)
703 #define IS_MODFAULT_PAGE(pai) \
704 ppattr_test_bits(&pp_attr_table[pai], PP_ATTR_MODFAULT)
705 #define SET_MODFAULT_PAGE(pai) \
706 ppattr_set_bits(&pp_attr_table[pai], PP_ATTR_MODFAULT)
707 #define CLR_MODFAULT_PAGE(pai) \
708 ppattr_clear_bits(&pp_attr_table[pai], PP_ATTR_MODFAULT)
711 #if (__ARM_VMSA__ == 7)
713 #define tte_index(pmap, addr) \
716 #define tte_get_ptd(tte) \
717 ((struct pt_desc *)((*((vm_offset_t *)(pai_to_pvh(pa_index((vm_offset_t)((tte) & ~PAGE_MASK)))))) & PVH_LIST_MASK))
721 #define tt0_index(pmap, addr) \
722 (((addr) & ARM_TT_L0_INDEX_MASK) >> ARM_TT_L0_SHIFT)
724 #define tt1_index(pmap, addr) \
725 (((addr) & ARM_TT_L1_INDEX_MASK) >> ARM_TT_L1_SHIFT)
727 #define tt2_index(pmap, addr) \
728 (((addr) & ARM_TT_L2_INDEX_MASK) >> ARM_TT_L2_SHIFT)
730 #define tt3_index(pmap, addr) \
731 (((addr) & ARM_TT_L3_INDEX_MASK) >> ARM_TT_L3_SHIFT)
733 #define tte_index(pmap, addr) \
734 (((addr) & ARM_TT_L2_INDEX_MASK) >> ARM_TT_L2_SHIFT)
736 #define tte_get_ptd(tte) \
737 ((struct pt_desc *)((*((vm_offset_t *)(pai_to_pvh(pa_index((vm_offset_t)((tte) & ~PAGE_MASK)))))) & PVH_LIST_MASK))
742 * Lock on pmap system
745 #define PMAP_LOCK_INIT(pmap) { \
746 simple_lock_init(&(pmap)->lock, 0); \
749 #define PMAP_LOCK(pmap) { \
750 simple_lock(&(pmap)->lock); \
753 #define PMAP_UNLOCK(pmap) { \
754 simple_unlock(&(pmap)->lock); \
758 #define PMAP_ASSERT_LOCKED(pmap) { \
759 simple_lock_assert(&(pmap)->lock, LCK_ASSERT_OWNED); \
762 #define PMAP_ASSERT_LOCKED(pmap)
766 * Each entry in the pv_head_table is locked by a bit in the
767 * pv lock array, which is stored in the region preceding pv_head_table.
768 * The lock bits are accessed by the physical address of the page they lock.
770 #define LOCK_PVH(index) { \
771 hw_lock_bit((hw_lock_bit_t *) \
772 ((unsigned int*)pv_head_table)-1-(index>>5), \
776 #define UNLOCK_PVH(index) { \
777 hw_unlock_bit((hw_lock_bit_t *) \
778 ((unsigned int*)pv_head_table)-1-(index>>5), \
782 #define ASSERT_PVH_LOCKED(index) { \
783 assert(*(((unsigned int*)pv_head_table)-1-(index>>5)) & (1 << (index & 0x1F))); \
786 #define PMAP_UPDATE_TLBS(pmap, s, e) { \
787 flush_mmu_tlb_region_asid(s, (unsigned)(e - s), pmap); \
790 #ifdef __ARM_L1_PTW__
792 #define FLUSH_PTE_RANGE(spte, epte) \
793 __asm__ volatile("dsb ish");
795 #define FLUSH_PTE(pte_p) \
796 __asm__ volatile("dsb ish");
800 #define FLUSH_PTE_RANGE(spte, epte) \
801 CleanPoU_DcacheRegion((vm_offset_t)spte, \
802 (vm_offset_t)epte - (vm_offset_t)spte);
804 #define FLUSH_PTE(pte_p) \
805 CleanPoU_DcacheRegion((vm_offset_t)pte_p, sizeof(pt_entry_t));
808 #define WRITE_PTE(pte_p, pte_entry) \
809 __unreachable_ok_push \
810 if (TEST_PAGE_RATIO_4) { \
812 if (((unsigned)(pte_p)) & 0x1f) panic("WRITE_PTE\n"); \
813 if (((pte_entry) & ~ARM_PTE_COMPRESSED_MASK) == ARM_PTE_EMPTY) { \
814 *(pte_p) = (pte_entry); \
815 *((pte_p)+1) = (pte_entry); \
816 *((pte_p)+2) = (pte_entry); \
817 *((pte_p)+3) = (pte_entry); \
819 *(pte_p) = (pte_entry); \
820 *((pte_p)+1) = (pte_entry) | 0x1000; \
821 *((pte_p)+2) = (pte_entry) | 0x2000; \
822 *((pte_p)+3) = (pte_entry) | 0x3000; \
824 FLUSH_PTE_RANGE((pte_p),((pte_p)+4)); \
828 *(pte_p) = (pte_entry); \
834 #define WRITE_PTE_FAST(pte_p, pte_entry) \
835 __unreachable_ok_push \
836 if (TEST_PAGE_RATIO_4) { \
837 if (((unsigned)(pte_p)) & 0x1f) panic("WRITE_PTE\n"); \
838 if (((pte_entry) & ~ARM_PTE_COMPRESSED_MASK) == ARM_PTE_EMPTY) { \
839 *(pte_p) = (pte_entry); \
840 *((pte_p)+1) = (pte_entry); \
841 *((pte_p)+2) = (pte_entry); \
842 *((pte_p)+3) = (pte_entry); \
844 *(pte_p) = (pte_entry); \
845 *((pte_p)+1) = (pte_entry) | 0x1000; \
846 *((pte_p)+2) = (pte_entry) | 0x2000; \
847 *((pte_p)+3) = (pte_entry) | 0x3000; \
850 *(pte_p) = (pte_entry); \
856 * Other useful macros.
858 #define current_pmap() \
859 (vm_map_pmap(current_thread()->map))
861 #define PMAP_IS_VALID(x) (TRUE)
864 unsigned int pmap_trace
= 0;
866 #define PMAP_TRACE(...) \
868 KDBG_RELEASE(__VA_ARGS__); \
871 #define PMAP_TRACE(...) KDBG_DEBUG(__VA_ARGS__)
874 #define PMAP_TRACE_CONSTANT(...) KDBG_RELEASE(__VA_ARGS__)
877 * Internal function prototypes (forward declarations).
883 static boolean_t
pv_alloc(
891 static void pv_list_free(
896 static void ptd_bootstrap(
897 pt_desc_t
*ptdp
, unsigned int ptd_cnt
);
899 static pt_desc_t
*ptd_alloc(
902 static void ptd_deallocate(
905 static void ptd_init(
906 pt_desc_t
*ptdp
, pmap_t pmap
, vm_map_address_t va
, unsigned int ttlevel
, pt_entry_t
* pte_p
);
908 static void pmap_zone_init(
911 static void pmap_set_reference(
914 ppnum_t
pmap_vtophys(
915 pmap_t pmap
, addr64_t va
);
917 void pmap_switch_user_ttb(
920 static void flush_mmu_tlb_region_asid(
921 vm_offset_t va
, unsigned length
, pmap_t pmap
);
923 static kern_return_t
pmap_expand(
924 pmap_t
, vm_map_address_t
, unsigned int options
, unsigned int level
);
926 static int pmap_remove_range(
927 pmap_t
, vm_map_address_t
, pt_entry_t
*, pt_entry_t
*, uint32_t *);
929 static int pmap_remove_range_options(
930 pmap_t
, vm_map_address_t
, pt_entry_t
*, pt_entry_t
*, uint32_t *, int);
932 static tt_entry_t
*pmap_tt1_allocate(
933 pmap_t
, vm_size_t
, unsigned int);
935 #define PMAP_TT_ALLOCATE_NOWAIT 0x1
937 static void pmap_tt1_deallocate(
938 pmap_t
, tt_entry_t
*, vm_size_t
, unsigned int);
940 #define PMAP_TT_DEALLOCATE_NOBLOCK 0x1
942 static kern_return_t
pmap_tt_allocate(
943 pmap_t
, tt_entry_t
**, unsigned int, unsigned int);
945 #define PMAP_TT_ALLOCATE_NOWAIT 0x1
947 static void pmap_tte_deallocate(
948 pmap_t
, tt_entry_t
*, unsigned int);
950 #define PMAP_TT_L1_LEVEL 0x1
951 #define PMAP_TT_L2_LEVEL 0x2
952 #define PMAP_TT_L3_LEVEL 0x3
953 #if (__ARM_VMSA__ == 7)
954 #define PMAP_TT_MAX_LEVEL PMAP_TT_L2_LEVEL
956 #define PMAP_TT_MAX_LEVEL PMAP_TT_L3_LEVEL
959 #ifdef __ARM64_PMAP_SUBPAGE_L1__
960 #if (__ARM_VMSA__ <= 7)
961 #error This is not supported for old-style page tables
963 #define PMAP_ROOT_ALLOC_SIZE (((ARM_TT_L1_INDEX_MASK >> ARM_TT_L1_SHIFT) + 1) * sizeof(tt_entry_t))
965 #define PMAP_ROOT_ALLOC_SIZE (ARM_PGBYTES)
968 const unsigned int arm_hardware_page_size
= ARM_PGBYTES
;
969 const unsigned int arm_pt_desc_size
= sizeof(pt_desc_t
);
970 const unsigned int arm_pt_root_size
= PMAP_ROOT_ALLOC_SIZE
;
972 #define PMAP_TT_DEALLOCATE_NOBLOCK 0x1
974 void pmap_init_pte_page_internal(
975 pmap_t
, pt_entry_t
*, vm_offset_t
, unsigned int , pt_desc_t
**);
978 #if (__ARM_VMSA__ > 7)
980 static inline tt_entry_t
*pmap_tt1e(
981 pmap_t
, vm_map_address_t
);
983 static inline tt_entry_t
*pmap_tt2e(
984 pmap_t
, vm_map_address_t
);
986 static inline pt_entry_t
*pmap_tt3e(
987 pmap_t
, vm_map_address_t
);
989 static void pmap_unmap_sharedpage(
992 static void pmap_sharedpage_flush_32_to_64(
996 pmap_is_64bit(pmap_t
);
1000 static inline tt_entry_t
*pmap_tte(
1001 pmap_t
, vm_map_address_t
);
1003 static inline pt_entry_t
*pmap_pte(
1004 pmap_t
, vm_map_address_t
);
1006 static void pmap_update_cache_attributes_locked(
1009 boolean_t
arm_clear_fast_fault(
1011 vm_prot_t fault_type
);
1013 static pmap_paddr_t
pmap_pages_reclaim(
1016 static kern_return_t
pmap_pages_alloc(
1021 #define PMAP_PAGES_ALLOCATE_NOWAIT 0x1
1022 #define PMAP_PAGES_RECLAIM_NOWAIT 0x2
1024 static void pmap_pages_free(
1029 #define PMAP_SUPPORT_PROTOTYPES(__return_type, __function_name, __function_args, __function_index) \
1030 static __return_type __function_name##_internal __function_args;
1032 PMAP_SUPPORT_PROTOTYPES(
1034 arm_fast_fault
, (pmap_t pmap
,
1035 vm_map_address_t va
,
1036 vm_prot_t fault_type
,
1037 boolean_t from_user
), ARM_FAST_FAULT_INDEX
);
1040 PMAP_SUPPORT_PROTOTYPES(
1042 arm_force_fast_fault
, (ppnum_t ppnum
,
1043 vm_prot_t allow_mode
,
1044 int options
), ARM_FORCE_FAST_FAULT_INDEX
);
1046 PMAP_SUPPORT_PROTOTYPES(
1048 mapping_free_prime
, (void), MAPPING_FREE_PRIME_INDEX
);
1050 PMAP_SUPPORT_PROTOTYPES(
1052 mapping_replenish
, (void), MAPPING_REPLENISH_INDEX
);
1054 PMAP_SUPPORT_PROTOTYPES(
1056 pmap_batch_set_cache_attributes
, (ppnum_t pn
,
1057 unsigned int cacheattr
,
1058 unsigned int page_cnt
,
1059 unsigned int page_index
,
1061 unsigned int *res
), PMAP_BATCH_SET_CACHE_ATTRIBUTES_INDEX
);
1063 PMAP_SUPPORT_PROTOTYPES(
1065 pmap_change_wiring
, (pmap_t pmap
,
1067 boolean_t wired
), PMAP_CHANGE_WIRING_INDEX
);
1069 PMAP_SUPPORT_PROTOTYPES(
1071 pmap_create
, (ledger_t ledger
,
1073 boolean_t is_64bit
), PMAP_CREATE_INDEX
);
1075 PMAP_SUPPORT_PROTOTYPES(
1077 pmap_destroy
, (pmap_t pmap
), PMAP_DESTROY_INDEX
);
1081 PMAP_SUPPORT_PROTOTYPES(
1083 pmap_enter_options
, (pmap_t pmap
,
1087 vm_prot_t fault_type
,
1090 unsigned int options
), PMAP_ENTER_OPTIONS_INDEX
);
1092 PMAP_SUPPORT_PROTOTYPES(
1094 pmap_extract
, (pmap_t pmap
,
1095 vm_map_address_t va
), PMAP_EXTRACT_INDEX
);
1097 PMAP_SUPPORT_PROTOTYPES(
1099 pmap_find_phys
, (pmap_t pmap
,
1100 addr64_t va
), PMAP_FIND_PHYS_INDEX
);
1102 #if (__ARM_VMSA__ > 7)
1103 PMAP_SUPPORT_PROTOTYPES(
1105 pmap_insert_sharedpage
, (pmap_t pmap
), PMAP_INSERT_SHAREDPAGE_INDEX
);
1109 PMAP_SUPPORT_PROTOTYPES(
1111 pmap_is_empty
, (pmap_t pmap
,
1112 vm_map_offset_t va_start
,
1113 vm_map_offset_t va_end
), PMAP_IS_EMPTY_INDEX
);
1116 PMAP_SUPPORT_PROTOTYPES(
1118 pmap_map_cpu_windows_copy
, (ppnum_t pn
,
1120 unsigned int wimg_bits
), PMAP_MAP_CPU_WINDOWS_COPY_INDEX
);
1122 PMAP_SUPPORT_PROTOTYPES(
1124 pmap_nest
, (pmap_t grand
,
1128 uint64_t size
), PMAP_NEST_INDEX
);
1130 PMAP_SUPPORT_PROTOTYPES(
1132 pmap_page_protect_options
, (ppnum_t ppnum
,
1134 unsigned int options
), PMAP_PAGE_PROTECT_OPTIONS_INDEX
);
1136 PMAP_SUPPORT_PROTOTYPES(
1138 pmap_protect_options
, (pmap_t pmap
,
1139 vm_map_address_t start
,
1140 vm_map_address_t end
,
1142 unsigned int options
,
1143 void *args
), PMAP_PROTECT_OPTIONS_INDEX
);
1145 PMAP_SUPPORT_PROTOTYPES(
1147 pmap_query_page_info
, (pmap_t pmap
,
1149 int *disp_p
), PMAP_QUERY_PAGE_INFO_INDEX
);
1151 PMAP_SUPPORT_PROTOTYPES(
1153 pmap_query_resident
, (pmap_t pmap
,
1154 vm_map_address_t start
,
1155 vm_map_address_t end
,
1156 mach_vm_size_t
*resident_bytes_p
,
1157 mach_vm_size_t
*compressed_bytes_p
), PMAP_QUERY_RESIDENT_INDEX
);
1159 PMAP_SUPPORT_PROTOTYPES(
1161 pmap_reference
, (pmap_t pmap
), PMAP_REFERENCE_INDEX
);
1163 PMAP_SUPPORT_PROTOTYPES(
1165 pmap_remove_options
, (pmap_t pmap
,
1166 vm_map_address_t start
,
1167 vm_map_address_t end
,
1168 int options
), PMAP_REMOVE_OPTIONS_INDEX
);
1170 PMAP_SUPPORT_PROTOTYPES(
1172 pmap_return
, (boolean_t do_panic
,
1173 boolean_t do_recurse
), PMAP_RETURN_INDEX
);
1175 PMAP_SUPPORT_PROTOTYPES(
1177 pmap_set_cache_attributes
, (ppnum_t pn
,
1178 unsigned int cacheattr
), PMAP_SET_CACHE_ATTRIBUTES_INDEX
);
1180 PMAP_SUPPORT_PROTOTYPES(
1182 pmap_set_nested
, (pmap_t pmap
), PMAP_SET_NESTED_INDEX
);
1185 PMAP_SUPPORT_PROTOTYPES(
1187 pmap_set_process
, (pmap_t pmap
,
1189 char *procname
), PMAP_SET_PROCESS_INDEX
);
1193 PMAP_SUPPORT_PROTOTYPES(
1195 pmap_unmap_cpu_windows_copy
, (unsigned int index
), PMAP_UNMAP_CPU_WINDOWS_COPY_INDEX
);
1197 PMAP_SUPPORT_PROTOTYPES(
1199 pmap_unnest_options
, (pmap_t grand
,
1202 unsigned int option
), PMAP_UNNEST_OPTIONS_INDEX
);
1205 PMAP_SUPPORT_PROTOTYPES(
1207 phys_attribute_set
, (ppnum_t pn
,
1208 unsigned int bits
), PHYS_ATTRIBUTE_SET_INDEX
);
1211 PMAP_SUPPORT_PROTOTYPES(
1213 phys_attribute_clear
, (ppnum_t pn
,
1216 void *arg
), PHYS_ATTRIBUTE_CLEAR_INDEX
);
1218 PMAP_SUPPORT_PROTOTYPES(
1220 pmap_switch
, (pmap_t pmap
), PMAP_SWITCH_INDEX
);
1222 PMAP_SUPPORT_PROTOTYPES(
1224 pmap_switch_user_ttb
, (pmap_t pmap
), PMAP_SWITCH_USER_TTB_INDEX
);
1228 void pmap_footprint_suspend(vm_map_t map
,
1230 PMAP_SUPPORT_PROTOTYPES(
1232 pmap_footprint_suspend
, (vm_map_t map
,
1234 PMAP_FOOTPRINT_SUSPEND_INDEX
);
1237 boolean_t pgtrace_enabled
= 0;
1240 queue_chain_t chain
;
1243 pmap - pmap for below addresses
1244 ova - original va page address
1245 cva - clone va addresses for pre, target and post pages
1246 cva_spte - clone saved ptes
1247 range - trace range in this map
1248 cloned - has been cloned or not
1251 vm_map_offset_t ova
;
1252 vm_map_offset_t cva
[3];
1253 pt_entry_t cva_spte
[3];
1259 } pmap_pgtrace_map_t
;
1261 static void pmap_pgtrace_init(void);
1262 static bool pmap_pgtrace_enter_clone(pmap_t pmap
, vm_map_offset_t va_page
, vm_map_offset_t start
, vm_map_offset_t end
);
1263 static void pmap_pgtrace_remove_clone(pmap_t pmap
, pmap_paddr_t pa_page
, vm_map_offset_t va_page
);
1264 static void pmap_pgtrace_remove_all_clone(pmap_paddr_t pa
);
1267 #if (__ARM_VMSA__ > 7)
1269 * The low global vector page is mapped at a fixed alias.
1270 * Since the page size is 16k for H8 and newer we map the globals to a 16k
1271 * aligned address. Readers of the globals (e.g. lldb, panic server) need
1272 * to check both addresses anyway for backward compatibility. So for now
1273 * we leave H6 and H7 where they were.
1275 #if (ARM_PGSHIFT == 14)
1276 #define LOWGLOBAL_ALIAS (LOW_GLOBAL_BASE_ADDRESS + 0x4000)
1278 #define LOWGLOBAL_ALIAS (LOW_GLOBAL_BASE_ADDRESS + 0x2000)
1282 #define LOWGLOBAL_ALIAS (0xFFFF1000)
1285 long long alloc_tteroot_count
__attribute__((aligned(8))) MARK_AS_PMAP_DATA
= 0LL;
1286 long long alloc_ttepages_count
__attribute__((aligned(8))) MARK_AS_PMAP_DATA
= 0LL;
1287 long long alloc_ptepages_count
__attribute__((aligned(8))) MARK_AS_PMAP_DATA
= 0LL;
1288 long long alloc_pmap_pages_count
__attribute__((aligned(8))) = 0LL;
1290 int pt_fake_zone_index
= -1; /* index of pmap fake zone */
1295 * Allocates and initializes a per-CPU data structure for the pmap.
1298 pmap_cpu_data_init_internal(unsigned int cpu_number
)
1300 pmap_cpu_data_t
* pmap_cpu_data
= NULL
;
1302 pmap_cpu_data
= pmap_get_cpu_data();
1303 pmap_cpu_data
->cpu_number
= cpu_number
;
1307 pmap_cpu_data_init(void)
1309 pmap_cpu_data_init_internal(cpu_number());
1313 pmap_cpu_data_array_init(void)
1316 pmap_cpu_data_init();
1320 pmap_get_cpu_data(void)
1322 pmap_cpu_data_t
* pmap_cpu_data
= NULL
;
1324 pmap_cpu_data
= &getCpuDatap()->cpu_pmap_cpu_data
;
1326 return pmap_cpu_data
;
1335 boolean_t found_page
;
1341 * pmap_pages_reclaim() is returning a page by freeing an active pt page.
1342 * To be eligible, a pt page is assigned to a user pmap. It doesn't have any wired pte
1343 * entry and it contains at least one valid pte entry.
1345 * In a loop, check for a page in the reclaimed pt page list.
1346 * if one is present, unlink that page and return the physical page address.
1347 * Otherwise, scan the pt page list for an eligible pt page to reclaim.
1348 * If found, invoke pmap_remove_range() on its pmap and address range then
1349 * deallocates that pt page. This will end up adding the pt page to the
1350 * reclaimed pt page list.
1351 * If no eligible page were found in the pt page list, panic.
1354 simple_lock(&pmap_pages_lock
);
1355 pmap_pages_request_count
++;
1356 pmap_pages_request_acum
++;
1360 if (pmap_pages_reclaim_list
!= (page_free_entry_t
*)NULL
) {
1361 page_free_entry_t
*page_entry
;
1363 page_entry
= pmap_pages_reclaim_list
;
1364 pmap_pages_reclaim_list
= pmap_pages_reclaim_list
->next
;
1365 simple_unlock(&pmap_pages_lock
);
1367 return((pmap_paddr_t
)ml_static_vtop((vm_offset_t
)page_entry
));
1370 simple_unlock(&pmap_pages_lock
);
1372 simple_lock(&pt_pages_lock
);
1373 ptdp
= (pt_desc_t
*)queue_first(&pt_page_list
);
1376 while (!queue_end(&pt_page_list
, (queue_entry_t
)ptdp
)) {
1377 if ((ptdp
->pmap
!= kernel_pmap
)
1378 && (ptdp
->pmap
->nested
== FALSE
)
1379 && (simple_lock_try(&ptdp
->pmap
->lock
))) {
1381 unsigned refcnt_acc
= 0;
1382 unsigned wiredcnt_acc
= 0;
1384 for (i
= 0 ; i
< PT_INDEX_MAX
; i
++) {
1385 if (ptdp
->pt_cnt
[i
].refcnt
& PT_DESC_REFCOUNT
) {
1386 /* Do not attempt to free a page that contains an L2 table
1387 * or is currently being operated on by pmap_enter(),
1388 * which can drop the pmap lock. */
1392 refcnt_acc
+= ptdp
->pt_cnt
[i
].refcnt
;
1393 wiredcnt_acc
+= ptdp
->pt_cnt
[i
].wiredcnt
;
1395 if ((wiredcnt_acc
== 0) && (refcnt_acc
!= 0)) {
1397 /* Leave ptdp->pmap locked here. We're about to reclaim
1398 * a tt page from it, so we don't want anyone else messing
1399 * with it while we do that. */
1402 simple_unlock(&ptdp
->pmap
->lock
);
1404 ptdp
= (pt_desc_t
*)queue_next((queue_t
)ptdp
);
1407 panic("pmap_pages_reclaim(): No eligible page in pt_page_list\n");
1409 int remove_count
= 0;
1410 vm_map_address_t va
;
1412 pt_entry_t
*bpte
, *epte
;
1415 uint32_t rmv_spte
=0;
1417 simple_unlock(&pt_pages_lock
);
1419 PMAP_ASSERT_LOCKED(pmap
); // pmap lock should be held from loop above
1420 for (i
= 0 ; i
< PT_INDEX_MAX
; i
++) {
1421 va
= ptdp
->pt_map
[i
].va
;
1423 tte_p
= pmap_tte(pmap
, va
);
1424 if ((tte_p
!= (tt_entry_t
*) NULL
)
1425 && ((*tte_p
& ARM_TTE_TYPE_MASK
) == ARM_TTE_TYPE_TABLE
)) {
1427 #if (__ARM_VMSA__ == 7)
1428 pte_p
= (pt_entry_t
*) ttetokv(*tte_p
);
1429 bpte
= &pte_p
[ptenum(va
)];
1430 epte
= bpte
+ PAGE_SIZE
/sizeof(pt_entry_t
);
1432 pte_p
= (pt_entry_t
*) ttetokv(*tte_p
);
1433 bpte
= &pte_p
[tt3_index(pmap
, va
)];
1434 epte
= bpte
+ PAGE_SIZE
/sizeof(pt_entry_t
);
1437 * Use PMAP_OPTIONS_REMOVE to clear any
1438 * "compressed" markers and update the
1439 * "compressed" counter in pmap->stats.
1440 * This means that we lose accounting for
1441 * any compressed pages in this range
1442 * but the alternative is to not be able
1443 * to account for their future decompression,
1444 * which could cause the counter to drift
1447 remove_count
+= pmap_remove_range_options(
1448 pmap
, va
, bpte
, epte
,
1449 &rmv_spte
, PMAP_OPTIONS_REMOVE
);
1450 if (ptdp
->pt_cnt
[ARM_PT_DESC_INDEX(pte_p
)].refcnt
!= 0)
1451 panic("pmap_pages_reclaim(): ptdp %p, count %d\n", ptdp
, ptdp
->pt_cnt
[ARM_PT_DESC_INDEX(pte_p
)].refcnt
);
1452 #if (__ARM_VMSA__ == 7)
1453 pmap_tte_deallocate(pmap
, tte_p
, PMAP_TT_L1_LEVEL
);
1454 flush_mmu_tlb_entry((va
& ~ARM_TT_L1_PT_OFFMASK
) | (pmap
->asid
& 0xff));
1455 flush_mmu_tlb_entry(((va
& ~ARM_TT_L1_PT_OFFMASK
) + ARM_TT_L1_SIZE
) | (pmap
->asid
& 0xff));
1456 flush_mmu_tlb_entry(((va
& ~ARM_TT_L1_PT_OFFMASK
) + 2*ARM_TT_L1_SIZE
)| (pmap
->asid
& 0xff));
1457 flush_mmu_tlb_entry(((va
& ~ARM_TT_L1_PT_OFFMASK
) + 3*ARM_TT_L1_SIZE
)| (pmap
->asid
& 0xff));
1459 pmap_tte_deallocate(pmap
, tte_p
, PMAP_TT_L2_LEVEL
);
1460 flush_mmu_tlb_entry(tlbi_addr(va
& ~ARM_TT_L2_OFFMASK
) | tlbi_asid(pmap
->asid
));
1463 if (remove_count
> 0) {
1464 #if (__ARM_VMSA__ == 7)
1465 PMAP_UPDATE_TLBS(pmap
, va
, va
+4*ARM_TT_L1_SIZE
);
1467 PMAP_UPDATE_TLBS(pmap
, va
, va
+ARM_TT_L2_SIZE
);
1472 // Undo the lock we grabbed when we found ptdp above
1475 simple_lock(&pmap_pages_lock
);
1480 static kern_return_t
1486 vm_page_t m
= VM_PAGE_NULL
, m_prev
;
1488 if(option
& PMAP_PAGES_RECLAIM_NOWAIT
) {
1489 assert(size
== PAGE_SIZE
);
1490 *pa
= pmap_pages_reclaim();
1491 return KERN_SUCCESS
;
1493 if (size
== PAGE_SIZE
) {
1494 while ((m
= vm_page_grab()) == VM_PAGE_NULL
) {
1495 if(option
& PMAP_PAGES_ALLOCATE_NOWAIT
) {
1496 return KERN_RESOURCE_SHORTAGE
;
1501 vm_page_lock_queues();
1502 vm_page_wire(m
, VM_KERN_MEMORY_PTE
, TRUE
);
1503 vm_page_unlock_queues();
1505 if (size
== 2*PAGE_SIZE
) {
1506 while (cpm_allocate(size
, &m
, 0, 1, TRUE
, 0) != KERN_SUCCESS
) {
1507 if(option
& PMAP_PAGES_ALLOCATE_NOWAIT
)
1508 return KERN_RESOURCE_SHORTAGE
;
1514 *pa
= (pmap_paddr_t
)ptoa(VM_PAGE_GET_PHYS_PAGE(m
));
1516 vm_object_lock(pmap_object
);
1517 while (m
!= VM_PAGE_NULL
) {
1518 vm_page_insert_wired(m
, pmap_object
, (vm_object_offset_t
) ((ptoa(VM_PAGE_GET_PHYS_PAGE(m
))) - gPhysBase
), VM_KERN_MEMORY_PTE
);
1520 m
= NEXT_PAGE(m_prev
);
1521 *(NEXT_PAGE_PTR(m_prev
)) = VM_PAGE_NULL
;
1523 vm_object_unlock(pmap_object
);
1525 OSAddAtomic(size
>>PAGE_SHIFT
, &inuse_pmap_pages_count
);
1526 OSAddAtomic64(size
>>PAGE_SHIFT
, &alloc_pmap_pages_count
);
1528 return KERN_SUCCESS
;
1537 simple_lock(&pmap_pages_lock
);
1539 if (pmap_pages_request_count
!= 0) {
1540 page_free_entry_t
*page_entry
;
1542 pmap_pages_request_count
--;
1543 page_entry
= (page_free_entry_t
*)phystokv(pa
);
1544 page_entry
->next
= pmap_pages_reclaim_list
;
1545 pmap_pages_reclaim_list
= page_entry
;
1546 simple_unlock(&pmap_pages_lock
);
1551 simple_unlock(&pmap_pages_lock
);
1554 pmap_paddr_t pa_max
;
1556 OSAddAtomic(-(size
>>PAGE_SHIFT
), &inuse_pmap_pages_count
);
1558 for (pa_max
= pa
+ size
; pa
< pa_max
; pa
= pa
+ PAGE_SIZE
) {
1559 vm_object_lock(pmap_object
);
1560 m
= vm_page_lookup(pmap_object
, (pa
- gPhysBase
));
1561 assert(m
!= VM_PAGE_NULL
);
1562 assert(VM_PAGE_WIRED(m
));
1563 vm_page_lock_queues();
1565 vm_page_unlock_queues();
1566 vm_object_unlock(pmap_object
);
1572 pmap_t pmap
, int bytes
)
1574 pmap_ledger_credit(pmap
, task_ledgers
.tkm_private
, bytes
);
1582 pmap_ledger_debit(pmap
, task_ledgers
.tkm_private
, bytes
);
1586 pmap_tt_ledger_credit(
1590 if (pmap
!= kernel_pmap
) {
1591 pmap_ledger_credit(pmap
, task_ledgers
.phys_footprint
, size
);
1592 pmap_ledger_credit(pmap
, task_ledgers
.page_table
, size
);
1597 pmap_tt_ledger_debit(
1601 if (pmap
!= kernel_pmap
) {
1602 pmap_ledger_debit(pmap
, task_ledgers
.phys_footprint
, size
);
1603 pmap_ledger_debit(pmap
, task_ledgers
.page_table
, size
);
1611 unsigned int asid_bitmap_index
;
1613 simple_lock(&pmaps_lock
);
1614 for (asid_bitmap_index
= 0; asid_bitmap_index
< (MAX_ASID
/ (sizeof(uint32_t) * NBBY
)); asid_bitmap_index
++) {
1615 unsigned int temp
= ffs(asid_bitmap
[asid_bitmap_index
]);
1618 asid_bitmap
[asid_bitmap_index
] &= ~(1 << temp
);
1619 #if __ARM_KERNEL_PROTECT__
1621 * We need two ASIDs: n and (n | 1). n is used for EL0,
1624 unsigned int temp2
= temp
| 1;
1625 assert(temp2
< MAX_ASID
);
1627 assert(temp2
!= temp
);
1628 assert(asid_bitmap
[asid_bitmap_index
] & (1 << temp2
));
1630 /* Grab the second ASID. */
1631 asid_bitmap
[asid_bitmap_index
] &= ~(1 << temp2
);
1632 #endif /* __ARM_KERNEL_PROTECT__ */
1633 simple_unlock(&pmaps_lock
);
1636 * We should never vend out physical ASID 0 through this
1637 * method, as it belongs to the kernel.
1639 assert(((asid_bitmap_index
* sizeof(uint32_t) * NBBY
+ temp
) % ARM_MAX_ASID
) != 0);
1641 #if __ARM_KERNEL_PROTECT__
1642 /* Or the kernel EL1 ASID. */
1643 assert(((asid_bitmap_index
* sizeof(uint32_t) * NBBY
+ temp
) % ARM_MAX_ASID
) != 1);
1644 #endif /* __ARM_KERNEL_PROTECT__ */
1646 return (asid_bitmap_index
* sizeof(uint32_t) * NBBY
+ temp
);
1649 simple_unlock(&pmaps_lock
);
1651 * ToDo: Add code to deal with pmap with no asid panic for now. Not
1652 * an issue with the small config process hard limit
1654 panic("alloc_asid(): out of ASID number");
1662 /* Don't free up any alias of physical ASID 0. */
1663 assert((asid
% ARM_MAX_ASID
) != 0);
1665 simple_lock(&pmaps_lock
);
1666 setbit(asid
, (int *) asid_bitmap
);
1668 #if __ARM_KERNEL_PROTECT__
1669 assert((asid
| 1) < MAX_ASID
);
1670 assert((asid
| 1) != asid
);
1671 setbit(asid
| 1, (int *) asid_bitmap
);
1672 #endif /* __ARM_KERNEL_PROTECT__ */
1674 simple_unlock(&pmaps_lock
);
1677 #define PV_LOW_WATER_MARK_DEFAULT 0x200
1678 #define PV_KERN_LOW_WATER_MARK_DEFAULT 0x200
1679 #define PV_ALLOC_CHUNK_INITIAL 0x200
1680 #define PV_KERN_ALLOC_CHUNK_INITIAL 0x200
1681 #define PV_ALLOC_INITIAL_TARGET (PV_ALLOC_CHUNK_INITIAL * 5)
1682 #define PV_KERN_ALLOC_INITIAL_TARGET (PV_KERN_ALLOC_CHUNK_INITIAL)
1685 uint32_t pv_free_count MARK_AS_PMAP_DATA
= 0;
1686 uint32_t pv_page_count MARK_AS_PMAP_DATA
= 0;
1687 uint32_t pv_kern_free_count MARK_AS_PMAP_DATA
= 0;
1689 uint32_t pv_low_water_mark MARK_AS_PMAP_DATA
;
1690 uint32_t pv_kern_low_water_mark MARK_AS_PMAP_DATA
;
1691 uint32_t pv_alloc_chunk MARK_AS_PMAP_DATA
;
1692 uint32_t pv_kern_alloc_chunk MARK_AS_PMAP_DATA
;
1694 thread_t mapping_replenish_thread
;
1695 event_t mapping_replenish_event
;
1696 event_t pmap_user_pv_throttle_event
;
1697 volatile uint32_t mappingrecurse
= 0;
1699 uint64_t pmap_pv_throttle_stat
;
1700 uint64_t pmap_pv_throttled_waiters
;
1702 unsigned pmap_mapping_thread_wakeups
;
1703 unsigned pmap_kernel_reserve_replenish_stat MARK_AS_PMAP_DATA
;
1704 unsigned pmap_user_reserve_replenish_stat MARK_AS_PMAP_DATA
;
1705 unsigned pmap_kern_reserve_alloc_stat MARK_AS_PMAP_DATA
;
1712 simple_lock_init(&pv_free_list_lock
, 0);
1713 simple_lock_init(&pv_kern_free_list_lock
, 0);
1714 pv_free_list
= PV_ENTRY_NULL
;
1715 pv_free_count
= 0x0U
;
1716 pv_kern_free_list
= PV_ENTRY_NULL
;
1717 pv_kern_free_count
= 0x0U
;
1720 static inline void PV_ALLOC(pv_entry_t
**pv_ep
);
1721 static inline void PV_KERN_ALLOC(pv_entry_t
**pv_e
);
1722 static inline void PV_FREE_LIST(pv_entry_t
*pv_eh
, pv_entry_t
*pv_et
, int pv_cnt
);
1723 static inline void PV_KERN_FREE_LIST(pv_entry_t
*pv_eh
, pv_entry_t
*pv_et
, int pv_cnt
);
1725 static inline void pmap_pv_throttle(pmap_t p
);
1733 PMAP_ASSERT_LOCKED(pmap
);
1734 ASSERT_PVH_LOCKED(pai
);
1736 if (PV_ENTRY_NULL
== *pvepp
) {
1738 if (kernel_pmap
== pmap
) {
1740 PV_KERN_ALLOC(pvepp
);
1742 if (PV_ENTRY_NULL
== *pvepp
) {
1754 ret
= pmap_pages_alloc(&pa
, PAGE_SIZE
, PMAP_PAGES_ALLOCATE_NOWAIT
);
1756 if (ret
== KERN_RESOURCE_SHORTAGE
) {
1757 ret
= pmap_pages_alloc(&pa
, PAGE_SIZE
, PMAP_PAGES_RECLAIM_NOWAIT
);
1760 if (ret
!= KERN_SUCCESS
) {
1761 panic("%s: failed to alloc page for kernel, ret=%d, "
1762 "pmap=%p, pai=%u, pvepp=%p",
1769 pv_e
= (pv_entry_t
*)phystokv(pa
);
1771 pv_eh
= pv_et
= PV_ENTRY_NULL
;
1775 for (j
= 1; j
< (PAGE_SIZE
/sizeof(pv_entry_t
)) ; j
++) {
1776 pv_e
->pve_next
= pv_eh
;
1779 if (pv_et
== PV_ENTRY_NULL
)
1784 PV_KERN_FREE_LIST(pv_eh
, pv_et
, pv_cnt
);
1792 pmap_pv_throttle(pmap
);
1802 ret
= pmap_pages_alloc(&pa
, PAGE_SIZE
, 0);
1804 if (ret
!= KERN_SUCCESS
) {
1805 panic("%s: failed to alloc page, ret=%d, "
1806 "pmap=%p, pai=%u, pvepp=%p",
1813 pv_e
= (pv_entry_t
*)phystokv(pa
);
1815 pv_eh
= pv_et
= PV_ENTRY_NULL
;
1819 for (j
= 1; j
< (PAGE_SIZE
/sizeof(pv_entry_t
)) ; j
++) {
1820 pv_e
->pve_next
= pv_eh
;
1823 if (pv_et
== PV_ENTRY_NULL
)
1828 PV_FREE_LIST(pv_eh
, pv_et
, pv_cnt
);
1835 assert(PV_ENTRY_NULL
!= *pvepp
);
1843 PV_FREE_LIST(pvep
, pvep
, 1);
1852 PV_FREE_LIST(pvehp
, pvetp
, cnt
);
1857 static inline void PV_ALLOC(pv_entry_t
**pv_ep
) {
1858 assert(*pv_ep
== PV_ENTRY_NULL
);
1859 simple_lock(&pv_free_list_lock
);
1861 * If the kernel reserved pool is low, let non-kernel mappings allocate
1862 * synchronously, possibly subject to a throttle.
1864 if ((pv_kern_free_count
>= pv_kern_low_water_mark
) && ((*pv_ep
= pv_free_list
) != 0)) {
1865 pv_free_list
= (pv_entry_t
*)(*pv_ep
)->pve_next
;
1866 (*pv_ep
)->pve_next
= PV_ENTRY_NULL
;
1870 simple_unlock(&pv_free_list_lock
);
1872 if ((pv_free_count
< pv_low_water_mark
) || (pv_kern_free_count
< pv_kern_low_water_mark
)) {
1873 if (!mappingrecurse
&& hw_compare_and_store(0,1, &mappingrecurse
))
1874 thread_wakeup(&mapping_replenish_event
);
1878 static inline void PV_FREE_LIST(pv_entry_t
*pv_eh
, pv_entry_t
*pv_et
, int pv_cnt
) {
1879 simple_lock(&pv_free_list_lock
);
1880 pv_et
->pve_next
= (pv_entry_t
*)pv_free_list
;
1881 pv_free_list
= pv_eh
;
1882 pv_free_count
+= pv_cnt
;
1883 simple_unlock(&pv_free_list_lock
);
1886 static inline void PV_KERN_ALLOC(pv_entry_t
**pv_e
) {
1887 assert(*pv_e
== PV_ENTRY_NULL
);
1888 simple_lock(&pv_kern_free_list_lock
);
1890 if ((*pv_e
= pv_kern_free_list
) != 0) {
1891 pv_kern_free_list
= (pv_entry_t
*)(*pv_e
)->pve_next
;
1892 (*pv_e
)->pve_next
= PV_ENTRY_NULL
;
1893 pv_kern_free_count
--;
1894 pmap_kern_reserve_alloc_stat
++;
1897 simple_unlock(&pv_kern_free_list_lock
);
1899 if (pv_kern_free_count
< pv_kern_low_water_mark
) {
1900 if (!mappingrecurse
&& hw_compare_and_store(0,1, &mappingrecurse
)) {
1901 thread_wakeup(&mapping_replenish_event
);
1906 static inline void PV_KERN_FREE_LIST(pv_entry_t
*pv_eh
, pv_entry_t
*pv_et
, int pv_cnt
) {
1907 simple_lock(&pv_kern_free_list_lock
);
1908 pv_et
->pve_next
= pv_kern_free_list
;
1909 pv_kern_free_list
= pv_eh
;
1910 pv_kern_free_count
+= pv_cnt
;
1911 simple_unlock(&pv_kern_free_list_lock
);
1914 static inline void pmap_pv_throttle(__unused pmap_t p
) {
1915 assert(p
!= kernel_pmap
);
1916 /* Apply throttle on non-kernel mappings */
1917 if (pv_kern_free_count
< (pv_kern_low_water_mark
/ 2)) {
1918 pmap_pv_throttle_stat
++;
1919 /* This doesn't need to be strictly accurate, merely a hint
1920 * to eliminate the timeout when the reserve is replenished.
1922 pmap_pv_throttled_waiters
++;
1923 assert_wait_timeout(&pmap_user_pv_throttle_event
, THREAD_UNINT
, 1, 1000 * NSEC_PER_USEC
);
1924 thread_block(THREAD_CONTINUE_NULL
);
1929 * Creates a target number of free pv_entry_t objects for the kernel free list
1930 * and the general free list.
1932 static kern_return_t
1933 mapping_free_prime_internal(void)
1942 int alloc_options
= 0;
1943 int needed_pv_cnt
= 0;
1944 int target_pv_free_cnt
= 0;
1946 SECURITY_READ_ONLY_LATE(static boolean_t
) mapping_free_prime_internal_called
= FALSE
;
1947 SECURITY_READ_ONLY_LATE(static boolean_t
) mapping_free_prime_internal_done
= FALSE
;
1949 if (mapping_free_prime_internal_done
) {
1950 return KERN_FAILURE
;
1953 if (!mapping_free_prime_internal_called
) {
1954 mapping_free_prime_internal_called
= TRUE
;
1956 pv_low_water_mark
= PV_LOW_WATER_MARK_DEFAULT
;
1958 /* Alterable via sysctl */
1959 pv_kern_low_water_mark
= PV_KERN_LOW_WATER_MARK_DEFAULT
;
1961 pv_kern_alloc_chunk
= PV_KERN_ALLOC_CHUNK_INITIAL
;
1962 pv_alloc_chunk
= PV_ALLOC_CHUNK_INITIAL
;
1966 pv_eh
= pv_et
= PV_ENTRY_NULL
;
1967 target_pv_free_cnt
= PV_ALLOC_INITIAL_TARGET
;
1970 * We don't take the lock to read pv_free_count, as we should not be
1971 * invoking this from a multithreaded context.
1973 needed_pv_cnt
= target_pv_free_cnt
- pv_free_count
;
1975 if (needed_pv_cnt
> target_pv_free_cnt
) {
1979 while (pv_cnt
< needed_pv_cnt
) {
1980 ret
= pmap_pages_alloc(&pa
, PAGE_SIZE
, alloc_options
);
1982 assert(ret
== KERN_SUCCESS
);
1986 pv_e
= (pv_entry_t
*)phystokv(pa
);
1988 for (j
= 0; j
< (PAGE_SIZE
/sizeof(pv_entry_t
)) ; j
++) {
1989 pv_e
->pve_next
= pv_eh
;
1992 if (pv_et
== PV_ENTRY_NULL
)
2000 PV_FREE_LIST(pv_eh
, pv_et
, pv_cnt
);
2004 pv_eh
= pv_et
= PV_ENTRY_NULL
;
2005 target_pv_free_cnt
= PV_KERN_ALLOC_INITIAL_TARGET
;
2008 * We don't take the lock to read pv_kern_free_count, as we should not
2009 * be invoking this from a multithreaded context.
2011 needed_pv_cnt
= target_pv_free_cnt
- pv_kern_free_count
;
2013 if (needed_pv_cnt
> target_pv_free_cnt
) {
2017 while (pv_cnt
< needed_pv_cnt
) {
2019 ret
= pmap_pages_alloc(&pa
, PAGE_SIZE
, alloc_options
);
2021 assert(ret
== KERN_SUCCESS
);
2024 pv_e
= (pv_entry_t
*)phystokv(pa
);
2026 for (j
= 0; j
< (PAGE_SIZE
/sizeof(pv_entry_t
)) ; j
++) {
2027 pv_e
->pve_next
= pv_eh
;
2030 if (pv_et
== PV_ENTRY_NULL
)
2038 PV_KERN_FREE_LIST(pv_eh
, pv_et
, pv_cnt
);
2041 mapping_free_prime_internal_done
= TRUE
;
2042 return KERN_SUCCESS
;
2046 mapping_free_prime(void)
2048 kern_return_t kr
= KERN_FAILURE
;
2050 kr
= mapping_free_prime_internal();
2052 if (kr
!= KERN_SUCCESS
) {
2053 panic("%s: failed, kr=%d", __FUNCTION__
, kr
);
2057 void mapping_replenish(void);
2059 void mapping_adjust(void) {
2062 mres
= kernel_thread_start_priority((thread_continue_t
)mapping_replenish
, NULL
, MAXPRI_KERNEL
, &mapping_replenish_thread
);
2063 if (mres
!= KERN_SUCCESS
) {
2064 panic("pmap: mapping_replenish thread creation failed");
2066 thread_deallocate(mapping_replenish_thread
);
2070 * Fills the kernel and general PV free lists back up to their low watermarks.
2072 static kern_return_t
2073 mapping_replenish_internal(void)
2081 kern_return_t ret
= KERN_SUCCESS
;
2083 while (pv_kern_free_count
< pv_kern_low_water_mark
) {
2085 pv_eh
= pv_et
= PV_ENTRY_NULL
;
2087 ret
= pmap_pages_alloc(&pa
, PAGE_SIZE
, 0);
2088 assert(ret
== KERN_SUCCESS
);
2092 pv_e
= (pv_entry_t
*)phystokv(pa
);
2094 for (j
= 0; j
< (PAGE_SIZE
/sizeof(pv_entry_t
)) ; j
++) {
2095 pv_e
->pve_next
= pv_eh
;
2098 if (pv_et
== PV_ENTRY_NULL
)
2103 pmap_kernel_reserve_replenish_stat
+= pv_cnt
;
2104 PV_KERN_FREE_LIST(pv_eh
, pv_et
, pv_cnt
);
2107 while (pv_free_count
< pv_low_water_mark
) {
2109 pv_eh
= pv_et
= PV_ENTRY_NULL
;
2111 ret
= pmap_pages_alloc(&pa
, PAGE_SIZE
, 0);
2112 assert(ret
== KERN_SUCCESS
);
2116 pv_e
= (pv_entry_t
*)phystokv(pa
);
2118 for (j
= 0; j
< (PAGE_SIZE
/sizeof(pv_entry_t
)) ; j
++) {
2119 pv_e
->pve_next
= pv_eh
;
2122 if (pv_et
== PV_ENTRY_NULL
)
2127 pmap_user_reserve_replenish_stat
+= pv_cnt
;
2128 PV_FREE_LIST(pv_eh
, pv_et
, pv_cnt
);
2135 * Continuation function that keeps the PV free lists from running out of free
2138 __attribute__((noreturn
))
2140 mapping_replenish(void)
2144 /* We qualify for VM privileges...*/
2145 current_thread()->options
|= TH_OPT_VMPRIV
;
2148 kr
= mapping_replenish_internal();
2150 if (kr
!= KERN_SUCCESS
) {
2151 panic("%s: failed, kr=%d", __FUNCTION__
, kr
);
2155 * Wake threads throttled while the kernel reserve was being replenished.
2157 if (pmap_pv_throttled_waiters
) {
2158 pmap_pv_throttled_waiters
= 0;
2159 thread_wakeup(&pmap_user_pv_throttle_event
);
2162 /* Check if the kernel pool has been depleted since the
2163 * first pass, to reduce refill latency.
2165 if (pv_kern_free_count
< pv_kern_low_water_mark
)
2167 /* Block sans continuation to avoid yielding kernel stack */
2168 assert_wait(&mapping_replenish_event
, THREAD_UNINT
);
2170 thread_block(THREAD_CONTINUE_NULL
);
2171 pmap_mapping_thread_wakeups
++;
2179 unsigned int ptd_cnt
)
2181 simple_lock_init(&ptd_free_list_lock
, 0);
2182 while (ptd_cnt
!= 0) {
2183 (*(void **)ptdp
) = (void *)ptd_free_list
;
2184 ptd_free_list
= ptdp
;
2189 ptd_preboot
= FALSE
;
2200 simple_lock(&ptd_free_list_lock
);
2202 if (ptd_free_count
== 0) {
2203 unsigned int ptd_cnt
;
2204 pt_desc_t
*ptdp_next
;
2207 ptdp
= (pt_desc_t
*)avail_start
;
2208 avail_start
+= ARM_PGBYTES
;
2210 ptd_cnt
= ARM_PGBYTES
/sizeof(pt_desc_t
);
2215 simple_unlock(&ptd_free_list_lock
);
2217 if (pmap_pages_alloc(&pa
, PAGE_SIZE
, PMAP_PAGES_ALLOCATE_NOWAIT
) != KERN_SUCCESS
) {
2218 ret
= pmap_pages_alloc(&pa
, PAGE_SIZE
, PMAP_PAGES_RECLAIM_NOWAIT
);
2219 assert(ret
== KERN_SUCCESS
);
2221 ptdp
= (pt_desc_t
*)phystokv(pa
);
2223 simple_lock(&ptd_free_list_lock
);
2225 ptd_cnt
= PAGE_SIZE
/sizeof(pt_desc_t
);
2228 while (ptd_cnt
!= 0) {
2229 (*(void **)ptdp_next
) = (void *)ptd_free_list
;
2230 ptd_free_list
= ptdp_next
;
2237 if ((ptdp
= ptd_free_list
) != PTD_ENTRY_NULL
) {
2238 ptd_free_list
= (pt_desc_t
*)(*(void **)ptdp
);
2241 panic("out of ptd entry\n");
2245 simple_unlock(&ptd_free_list_lock
);
2247 ptdp
->pt_page
.next
= NULL
;
2248 ptdp
->pt_page
.prev
= NULL
;
2251 for (i
= 0 ; i
< PT_INDEX_MAX
; i
++) {
2252 ptdp
->pt_map
[i
].va
= 0;
2253 ptdp
->pt_cnt
[i
].refcnt
= 0;
2254 ptdp
->pt_cnt
[i
].wiredcnt
= 0;
2256 simple_lock(&pt_pages_lock
);
2257 queue_enter(&pt_page_list
, ptdp
, pt_desc_t
*, pt_page
);
2258 simple_unlock(&pt_pages_lock
);
2260 pmap_tt_ledger_credit(pmap
, sizeof(*ptdp
));
2270 pmap_t pmap
= ptdp
->pmap
;
2273 panic("ptd_deallocate(): early boot\n");
2275 for (i
= 0 ; i
< PT_INDEX_MAX
; i
++) {
2276 if (ptdp
->pt_cnt
[i
].refcnt
!= 0)
2277 panic("ptd_deallocate(): ptdp=%p refcnt=0x%x \n", ptdp
, ptdp
->pt_cnt
[i
].refcnt
);
2280 if (ptdp
->pt_page
.next
!= NULL
) {
2281 simple_lock(&pt_pages_lock
);
2282 queue_remove(&pt_page_list
, ptdp
, pt_desc_t
*, pt_page
);
2283 simple_unlock(&pt_pages_lock
);
2285 simple_lock(&ptd_free_list_lock
);
2286 (*(void **)ptdp
) = (void *)ptd_free_list
;
2287 ptd_free_list
= (pt_desc_t
*)ptdp
;
2289 simple_unlock(&ptd_free_list_lock
);
2290 pmap_tt_ledger_debit(pmap
, sizeof(*ptdp
));
2297 vm_map_address_t va
,
2301 if (ptdp
->pmap
!= pmap
)
2302 panic("ptd_init(): pmap mismatch\n");
2304 #if (__ARM_VMSA__ == 7)
2306 ptdp
->pt_map
[ARM_PT_DESC_INDEX(pte_p
)].va
= (vm_offset_t
) va
& ~(ARM_TT_L1_PT_OFFMASK
);
2309 ptdp
->pt_map
[ARM_PT_DESC_INDEX(pte_p
)].va
= (vm_offset_t
) va
& ~ARM_TT_L2_OFFMASK
;
2310 } else if (level
== 2)
2311 ptdp
->pt_map
[ARM_PT_DESC_INDEX(pte_p
)].va
= (vm_offset_t
) va
& ~ARM_TT_L1_OFFMASK
;
2313 if (level
< PMAP_TT_MAX_LEVEL
)
2314 ptdp
->pt_cnt
[ARM_PT_DESC_INDEX(pte_p
)].refcnt
= PT_DESC_REFCOUNT
;
2323 return pa_valid(addr
);
2326 #if (__ARM_VMSA__ == 7)
2329 * Given an offset and a map, compute the address of the
2330 * corresponding translation table entry.
2332 static inline tt_entry_t
*
2333 pmap_tte(pmap_t pmap
,
2334 vm_map_address_t addr
)
2336 if (!(tte_index(pmap
, addr
) < pmap
->tte_index_max
))
2337 return (tt_entry_t
*)NULL
;
2338 return (&pmap
->tte
[tte_index(pmap
, addr
)]);
2343 * Given an offset and a map, compute the address of the
2344 * pte. If the address is invalid with respect to the map
2345 * then PT_ENTRY_NULL is returned (and the map may need to grow).
2347 * This is only used internally.
2349 static inline pt_entry_t
*
2352 vm_map_address_t addr
)
2358 ttp
= pmap_tte(pmap
, addr
);
2359 if (ttp
== (tt_entry_t
*)NULL
)
2360 return (PT_ENTRY_NULL
);
2363 if ((tte
& ARM_TTE_TYPE_MASK
) == ARM_TTE_TYPE_BLOCK
)
2364 panic("Attempt to demote L1 block: pmap=%p, va=0x%llx, tte=0x%llx\n", pmap
, (uint64_t)addr
, (uint64_t)tte
);
2366 if ((tte
& ARM_TTE_TYPE_MASK
) != ARM_TTE_TYPE_TABLE
)
2367 return (PT_ENTRY_NULL
);
2368 ptp
= (pt_entry_t
*) ttetokv(tte
) + ptenum(addr
);
2375 * Given an offset and a map, compute the address of level 1 translation table entry.
2376 * If the tranlation is invalid then PT_ENTRY_NULL is returned.
2378 static inline tt_entry_t
*
2379 pmap_tt1e(pmap_t pmap
,
2380 vm_map_address_t addr
)
2382 #if __ARM64_TWO_LEVEL_PMAP__
2383 #pragma unused(pmap, addr)
2384 panic("pmap_tt1e called on a two level pmap");
2387 return (&pmap
->tte
[tt1_index(pmap
, addr
)]);
2392 * Given an offset and a map, compute the address of level 2 translation table entry.
2393 * If the tranlation is invalid then PT_ENTRY_NULL is returned.
2395 static inline tt_entry_t
*
2396 pmap_tt2e(pmap_t pmap
,
2397 vm_map_address_t addr
)
2399 #if __ARM64_TWO_LEVEL_PMAP__
2400 return (&pmap
->tte
[tt2_index(pmap
, addr
)]);
2405 ttp
= pmap_tt1e(pmap
, addr
);
2408 if ((tte
& (ARM_TTE_TYPE_MASK
| ARM_TTE_VALID
)) == (ARM_TTE_TYPE_BLOCK
| ARM_TTE_VALID
))
2409 panic("Attempt to demote L1 block (?!): pmap=%p, va=0x%llx, tte=0x%llx\n", pmap
, (uint64_t)addr
, (uint64_t)tte
);
2411 if ((tte
& (ARM_TTE_TYPE_MASK
| ARM_TTE_VALID
)) != (ARM_TTE_TYPE_TABLE
| ARM_TTE_VALID
))
2412 return (PT_ENTRY_NULL
);
2414 ttp
= &((tt_entry_t
*) phystokv(tte
& ARM_TTE_TABLE_MASK
))[tt2_index(pmap
, addr
)];
2415 return ((tt_entry_t
*)ttp
);
2421 * Given an offset and a map, compute the address of level 3 translation table entry.
2422 * If the tranlation is invalid then PT_ENTRY_NULL is returned.
2424 static inline pt_entry_t
*
2427 vm_map_address_t addr
)
2433 /* Level 0 currently unused */
2434 #if __ARM64_TWO_LEVEL_PMAP__
2435 ttp
= pmap_tt2e(pmap
, addr
);
2438 /* Get first-level (1GB) entry */
2439 ttp
= pmap_tt1e(pmap
, addr
);
2442 if ((tte
& (ARM_TTE_TYPE_MASK
| ARM_TTE_VALID
)) == (ARM_TTE_TYPE_BLOCK
| ARM_TTE_VALID
))
2443 panic("Attempt to demote L1 block (?!): pmap=%p, va=0x%llx, tte=0x%llx\n", pmap
, (uint64_t)addr
, (uint64_t)tte
);
2445 if ((tte
& (ARM_TTE_TYPE_MASK
| ARM_TTE_VALID
)) != (ARM_TTE_TYPE_TABLE
| ARM_TTE_VALID
))
2446 return (PT_ENTRY_NULL
);
2448 tte
= ((tt_entry_t
*) phystokv(tte
& ARM_TTE_TABLE_MASK
))[tt2_index(pmap
, addr
)];
2451 if ((tte
& (ARM_TTE_TYPE_MASK
| ARM_TTE_VALID
)) == (ARM_TTE_TYPE_BLOCK
| ARM_TTE_VALID
))
2452 panic("Attempt to demote L2 block: pmap=%p, va=0x%llx, tte=0x%llx\n", pmap
, (uint64_t)addr
, (uint64_t)tte
);
2454 if ((tte
& (ARM_TTE_TYPE_MASK
| ARM_TTE_VALID
)) != (ARM_TTE_TYPE_TABLE
| ARM_TTE_VALID
)) {
2455 return (PT_ENTRY_NULL
);
2458 /* Get third-level (4KB) entry */
2459 ptp
= &(((pt_entry_t
*) phystokv(tte
& ARM_TTE_TABLE_MASK
))[tt3_index(pmap
, addr
)]);
2464 static inline tt_entry_t
*
2467 vm_map_address_t addr
)
2469 return(pmap_tt2e(pmap
, addr
));
2473 static inline pt_entry_t
*
2476 vm_map_address_t addr
)
2478 return(pmap_tt3e(pmap
, addr
));
2485 * Map memory at initialization. The physical addresses being
2486 * mapped are not managed and are never unmapped.
2488 * For now, VM is already on, we only need to map the
2493 vm_map_address_t virt
,
2503 while (start
< end
) {
2504 kr
= pmap_enter(kernel_pmap
, virt
, (ppnum_t
)atop(start
),
2505 prot
, VM_PROT_NONE
, flags
, FALSE
);
2507 if (kr
!= KERN_SUCCESS
) {
2508 panic("%s: failed pmap_enter, "
2509 "virt=%p, start_addr=%p, end_addr=%p, prot=%#x, flags=%#x",
2511 (void *) virt
, (void *) start
, (void *) end
, prot
, flags
);
2521 pmap_map_bd_with_options(
2522 vm_map_address_t virt
,
2530 vm_map_address_t vaddr
;
2532 pt_entry_t mem_attr
;
2534 switch (options
& PMAP_MAP_BD_MASK
) {
2535 case PMAP_MAP_BD_WCOMB
:
2536 mem_attr
= ARM_PTE_ATTRINDX(CACHE_ATTRINDX_WRITECOMB
);
2537 #if (__ARM_VMSA__ > 7)
2538 mem_attr
|= ARM_PTE_SH(SH_OUTER_MEMORY
);
2540 mem_attr
|= ARM_PTE_SH
;
2543 case PMAP_MAP_BD_POSTED
:
2544 mem_attr
= ARM_PTE_ATTRINDX(CACHE_ATTRINDX_POSTED
);
2547 mem_attr
= ARM_PTE_ATTRINDX(CACHE_ATTRINDX_DISABLE
);
2551 tmplate
= pa_to_pte(start
) | ARM_PTE_AP((prot
& VM_PROT_WRITE
) ? AP_RWNA
: AP_RONA
) |
2552 mem_attr
| ARM_PTE_TYPE
| ARM_PTE_NX
| ARM_PTE_PNX
| ARM_PTE_AF
;
2553 #if __ARM_KERNEL_PROTECT__
2554 tmplate
|= ARM_PTE_NG
;
2555 #endif /* __ARM_KERNEL_PROTECT__ */
2559 while (paddr
< end
) {
2561 ptep
= pmap_pte(kernel_pmap
, vaddr
);
2562 if (ptep
== PT_ENTRY_NULL
) {
2563 panic("pmap_map_bd");
2565 assert(!ARM_PTE_IS_COMPRESSED(*ptep
));
2566 WRITE_PTE(ptep
, tmplate
);
2568 pte_increment_pa(tmplate
);
2574 flush_mmu_tlb_region(virt
, (unsigned)(end
- start
));
2580 * Back-door routine for mapping kernel VM at initialization.
2581 * Useful for mapping memory outside the range
2582 * [vm_first_phys, vm_last_phys] (i.e., devices).
2583 * Otherwise like pmap_map.
2587 vm_map_address_t virt
,
2594 vm_map_address_t vaddr
;
2597 /* not cacheable and not buffered */
2598 tmplate
= pa_to_pte(start
)
2599 | ARM_PTE_TYPE
| ARM_PTE_AF
| ARM_PTE_NX
| ARM_PTE_PNX
2600 | ARM_PTE_AP((prot
& VM_PROT_WRITE
) ? AP_RWNA
: AP_RONA
)
2601 | ARM_PTE_ATTRINDX(CACHE_ATTRINDX_DISABLE
);
2602 #if __ARM_KERNEL_PROTECT__
2603 tmplate
|= ARM_PTE_NG
;
2604 #endif /* __ARM_KERNEL_PROTECT__ */
2608 while (paddr
< end
) {
2610 ptep
= pmap_pte(kernel_pmap
, vaddr
);
2611 if (ptep
== PT_ENTRY_NULL
) {
2612 panic("pmap_map_bd");
2614 assert(!ARM_PTE_IS_COMPRESSED(*ptep
));
2615 WRITE_PTE(ptep
, tmplate
);
2617 pte_increment_pa(tmplate
);
2623 flush_mmu_tlb_region(virt
, (unsigned)(end
- start
));
2629 * Back-door routine for mapping kernel VM at initialization.
2630 * Useful for mapping memory specific physical addresses in early
2631 * boot (i.e., before kernel_map is initialized).
2633 * Maps are in the VM_HIGH_KERNEL_WINDOW area.
2637 pmap_map_high_window_bd(
2638 vm_offset_t pa_start
,
2642 pt_entry_t
*ptep
, pte
;
2643 #if (__ARM_VMSA__ == 7)
2644 vm_map_address_t va_start
= VM_HIGH_KERNEL_WINDOW
;
2645 vm_map_address_t va_max
= VM_MAX_KERNEL_ADDRESS
;
2647 vm_map_address_t va_start
= VREGION1_START
;
2648 vm_map_address_t va_max
= VREGION1_START
+ VREGION1_SIZE
;
2650 vm_map_address_t va_end
;
2651 vm_map_address_t va
;
2654 offset
= pa_start
& PAGE_MASK
;
2658 if (len
> (va_max
- va_start
)) {
2659 panic("pmap_map_high_window_bd: area too large\n");
2663 for ( ; va_start
< va_max
; va_start
+= PAGE_SIZE
) {
2664 ptep
= pmap_pte(kernel_pmap
, va_start
);
2665 assert(!ARM_PTE_IS_COMPRESSED(*ptep
));
2666 if (*ptep
== ARM_PTE_TYPE_FAULT
)
2669 if (va_start
> va_max
) {
2670 panic("pmap_map_high_window_bd: insufficient pages\n");
2673 for (va_end
= va_start
+ PAGE_SIZE
; va_end
< va_start
+ len
; va_end
+= PAGE_SIZE
) {
2674 ptep
= pmap_pte(kernel_pmap
, va_end
);
2675 assert(!ARM_PTE_IS_COMPRESSED(*ptep
));
2676 if (*ptep
!= ARM_PTE_TYPE_FAULT
) {
2677 va_start
= va_end
+ PAGE_SIZE
;
2682 for (va
= va_start
; va
< va_end
; va
+= PAGE_SIZE
, pa_start
+= PAGE_SIZE
) {
2683 ptep
= pmap_pte(kernel_pmap
, va
);
2684 pte
= pa_to_pte(pa_start
)
2685 | ARM_PTE_TYPE
| ARM_PTE_AF
| ARM_PTE_NX
| ARM_PTE_PNX
2686 | ARM_PTE_AP((prot
& VM_PROT_WRITE
) ? AP_RWNA
: AP_RONA
)
2687 | ARM_PTE_ATTRINDX(CACHE_ATTRINDX_DEFAULT
);
2688 #if (__ARM_VMSA__ > 7)
2689 pte
|= ARM_PTE_SH(SH_OUTER_MEMORY
);
2693 #if __ARM_KERNEL_PROTECT__
2695 #endif /* __ARM_KERNEL_PROTECT__ */
2696 WRITE_PTE(ptep
, pte
);
2698 PMAP_UPDATE_TLBS(kernel_pmap
, va_start
, va_start
+ len
);
2700 kasan_notify_address(va_start
, len
);
2705 #define PMAP_ALIGN(addr, align) ((addr) + ((align) - 1) & ~((align) - 1))
2707 typedef struct pmap_io_range
2712 } __attribute__((packed
)) pmap_io_range_t
;
2715 pmap_compute_io_rgns(void)
2718 pmap_io_range_t
*ranges
;
2721 unsigned int prop_size
;
2723 err
= DTLookupEntry(NULL
, "/defaults", &entry
);
2724 assert(err
== kSuccess
);
2726 if (kSuccess
!= DTGetProperty(entry
, "pmap-io-granule", &prop
, &prop_size
))
2729 io_rgn_granule
= *((uint32_t*)prop
);
2731 if (kSuccess
!= DTGetProperty(entry
, "pmap-io-ranges", &prop
, &prop_size
))
2734 if ((io_rgn_granule
== 0) || (io_rgn_granule
& PAGE_MASK
))
2735 panic("pmap I/O region granularity is not page-aligned!\n");
2738 for (unsigned int i
= 0; i
< (prop_size
/ sizeof(*ranges
)); ++i
) {
2739 if ((i
== 0) || (ranges
[i
].addr
< io_rgn_start
))
2740 io_rgn_start
= ranges
[i
].addr
;
2741 if ((i
== 0) || ((ranges
[i
].addr
+ ranges
[i
].len
) > io_rgn_end
))
2742 io_rgn_end
= ranges
[i
].addr
+ ranges
[i
].len
;
2745 if (io_rgn_start
& PAGE_MASK
)
2746 panic("pmap I/O region start is not page-aligned!\n");
2748 if (io_rgn_end
& PAGE_MASK
)
2749 panic("pmap I/O region end is not page-aligned!\n");
2751 if (((io_rgn_start
< gPhysBase
) && (io_rgn_end
>= gPhysBase
)) ||
2752 ((io_rgn_start
< avail_end
) && (io_rgn_end
>= avail_end
)))
2753 panic("pmap I/O region overlaps physical memory!\n");
2755 return (unsigned int)((io_rgn_end
- io_rgn_start
) / io_rgn_granule
);
2759 pmap_load_io_rgns(void)
2762 pmap_io_range_t
*ranges
;
2765 unsigned int prop_size
;
2767 if (io_rgn_granule
== 0)
2770 err
= DTLookupEntry(NULL
, "/defaults", &entry
);
2771 assert(err
== kSuccess
);
2773 err
= DTGetProperty(entry
, "pmap-io-ranges", &prop
, &prop_size
);
2774 assert(err
== kSuccess
);
2777 for (unsigned int i
= 0; i
< (prop_size
/ sizeof(*ranges
)); ++i
) {
2778 if ((ranges
[i
].addr
- io_rgn_start
) % io_rgn_granule
)
2779 panic("pmap I/O region %d is not aligned to I/O granularity!\n", i
);
2780 if (ranges
[i
].len
% io_rgn_granule
)
2781 panic("pmap I/O region %d size is not a multiple of I/O granularity!\n", i
);
2782 for (uint32_t offs
= 0; offs
< ranges
[i
].len
; offs
+= io_rgn_granule
) {
2783 io_attr_table
[(ranges
[i
].addr
+ offs
- io_rgn_start
) / io_rgn_granule
] =
2784 IO_ATTR_WIMG(ranges
[i
].wimg
);
2791 * Bootstrap the system enough to run with virtual memory.
2793 * The early VM initialization code has already allocated
2794 * the first CPU's translation table and made entries for
2795 * all the one-to-one mappings to be found there.
2797 * We must set up the kernel pmap structures, the
2798 * physical-to-virtual translation lookup tables for the
2799 * physical memory to be managed (between avail_start and
2802 * Map the kernel's code and data, and allocate the system page table.
2803 * Page_size must already be set.
2806 * first_avail first available physical page -
2807 * after kernel page tables
2808 * avail_start PA of first managed physical page
2809 * avail_end PA of last managed physical page
2816 pmap_paddr_t pmap_struct_start
;
2817 vm_size_t pv_head_size
;
2818 vm_size_t pv_lock_table_size
;
2819 vm_size_t ptd_root_table_size
;
2820 vm_size_t pp_attr_table_size
;
2821 vm_size_t io_attr_table_size
;
2822 unsigned int niorgns
;
2823 unsigned int npages
;
2825 vm_map_offset_t maxoffset
;
2829 if (PE_parse_boot_argn("-pmap_trace", &pmap_trace
, sizeof (pmap_trace
))) {
2830 kprintf("Kernel traces for pmap operations enabled\n");
2835 * Initialize the kernel pmap.
2838 kernel_pmap
->tte
= cpu_tte
;
2839 kernel_pmap
->ttep
= cpu_ttep
;
2840 #if (__ARM_VMSA__ > 7)
2841 kernel_pmap
->min
= ARM64_TTBR1_MIN_ADDR
;
2843 kernel_pmap
->min
= VM_MIN_KERNEL_AND_KEXT_ADDRESS
;
2845 kernel_pmap
->max
= VM_MAX_KERNEL_ADDRESS
;
2846 kernel_pmap
->wired
= 0;
2847 kernel_pmap
->ref_count
= 1;
2848 kernel_pmap
->gc_status
= 0;
2849 kernel_pmap
->nx_enabled
= TRUE
;
2851 kernel_pmap
->is_64bit
= TRUE
;
2853 kernel_pmap
->is_64bit
= FALSE
;
2855 kernel_pmap
->stamp
= hw_atomic_add(&pmap_stamp
, 1);
2857 kernel_pmap
->nested_region_grand_addr
= 0x0ULL
;
2858 kernel_pmap
->nested_region_subord_addr
= 0x0ULL
;
2859 kernel_pmap
->nested_region_size
= 0x0ULL
;
2860 kernel_pmap
->nested_region_asid_bitmap
= NULL
;
2861 kernel_pmap
->nested_region_asid_bitmap_size
= 0x0UL
;
2863 #if (__ARM_VMSA__ == 7)
2864 kernel_pmap
->tte_index_max
= 4*NTTES
;
2866 kernel_pmap
->tte_index_max
= (ARM_PGBYTES
/ sizeof(tt_entry_t
));
2868 kernel_pmap
->prev_tte
= (tt_entry_t
*) NULL
;
2869 kernel_pmap
->cpu_ref
= 0;
2871 PMAP_LOCK_INIT(kernel_pmap
);
2872 #if (__ARM_VMSA__ == 7)
2873 simple_lock_init(&kernel_pmap
->tt1_lock
, 0);
2875 memset((void *) &kernel_pmap
->stats
, 0, sizeof(kernel_pmap
->stats
));
2877 /* allocate space for and initialize the bookkeeping structures */
2878 niorgns
= pmap_compute_io_rgns();
2879 npages
= (unsigned int)atop(mem_size
);
2880 pp_attr_table_size
= npages
* sizeof(pp_attr_t
);
2881 io_attr_table_size
= niorgns
* sizeof(io_attr_t
);
2882 pv_lock_table_size
= npages
;
2883 pv_head_size
= round_page(sizeof(pv_entry_t
*) * npages
);
2884 #if (__ARM_VMSA__ == 7)
2885 ptd_root_table_size
= sizeof(pt_desc_t
) * (1<<((mem_size
>>30)+12));
2887 ptd_root_table_size
= sizeof(pt_desc_t
) * (1<<((mem_size
>>30)+13));
2890 pmap_struct_start
= avail_start
;
2892 pp_attr_table
= (pp_attr_t
*) phystokv(avail_start
);
2893 avail_start
= PMAP_ALIGN(avail_start
+ pp_attr_table_size
, __alignof(pp_attr_t
));
2894 io_attr_table
= (io_attr_t
*) phystokv(avail_start
);
2895 avail_start
= PMAP_ALIGN(avail_start
+ io_attr_table_size
+ pv_lock_table_size
, __alignof(pv_entry_t
*));
2896 pv_head_table
= (pv_entry_t
**) phystokv(avail_start
);
2897 avail_start
= PMAP_ALIGN(avail_start
+ pv_head_size
, __alignof(pt_desc_t
));
2898 ptd_root_table
= (pt_desc_t
*)phystokv(avail_start
);
2899 avail_start
= round_page(avail_start
+ ptd_root_table_size
);
2901 memset((char *)phystokv(pmap_struct_start
), 0, avail_start
- pmap_struct_start
);
2903 pmap_load_io_rgns();
2904 ptd_bootstrap(ptd_root_table
, (unsigned int)(ptd_root_table_size
/sizeof(pt_desc_t
)));
2906 pmap_cpu_data_array_init();
2908 vm_first_phys
= gPhysBase
;
2909 vm_last_phys
= trunc_page(avail_end
);
2911 simple_lock_init(&pmaps_lock
, 0);
2912 queue_init(&map_pmap_list
);
2913 queue_enter(&map_pmap_list
, kernel_pmap
, pmap_t
, pmaps
);
2914 queue_init(&tt_pmap_list
);
2917 free_page_size_tt_list
= TT_FREE_ENTRY_NULL
;
2918 free_page_size_tt_count
= 0;
2919 free_page_size_tt_max
= 0;
2920 free_two_page_size_tt_list
= TT_FREE_ENTRY_NULL
;
2921 free_two_page_size_tt_count
= 0;
2922 free_two_page_size_tt_max
= 0;
2923 free_tt_list
= TT_FREE_ENTRY_NULL
;
2927 simple_lock_init(&pt_pages_lock
, 0);
2928 queue_init(&pt_page_list
);
2930 simple_lock_init(&pmap_pages_lock
, 0);
2931 pmap_pages_request_count
= 0;
2932 pmap_pages_request_acum
= 0;
2933 pmap_pages_reclaim_list
= PAGE_FREE_ENTRY_NULL
;
2935 virtual_space_start
= vstart
;
2936 virtual_space_end
= VM_MAX_KERNEL_ADDRESS
;
2938 /* mark all the address spaces in use */
2939 for (i
= 0; i
< MAX_ASID
/ (sizeof(uint32_t) * NBBY
); i
++)
2940 asid_bitmap
[i
] = 0xffffffff;
2943 * The kernel gets ASID 0, and all aliases of it. This is
2944 * important because ASID 0 is global; if we vend ASID 0
2945 * out to a user pmap, those translations will show up in
2946 * other processes through the TLB.
2948 for (i
= 0; i
< MAX_ASID
; i
+= ARM_MAX_ASID
) {
2949 asid_bitmap
[i
/ (sizeof(uint32_t) * NBBY
)] &= ~(1 << (i
% (sizeof(uint32_t) * NBBY
)));
2951 #if __ARM_KERNEL_PROTECT__
2952 assert((i
+ 1) < MAX_ASID
);
2953 asid_bitmap
[(i
+ 1) / (sizeof(uint32_t) * NBBY
)] &= ~(1 << ((i
+ 1) % (sizeof(uint32_t) * NBBY
)));
2954 #endif /* __ARM_KERNEL_PROTECT__ */
2957 kernel_pmap
->asid
= 0;
2958 kernel_pmap
->vasid
= 0;
2960 if (PE_parse_boot_argn("arm_maxoffset", &maxoffset
, sizeof (maxoffset
))) {
2961 maxoffset
= trunc_page(maxoffset
);
2962 if ((maxoffset
>= pmap_max_offset(FALSE
, ARM_PMAP_MAX_OFFSET_MIN
))
2963 && (maxoffset
<= pmap_max_offset(FALSE
, ARM_PMAP_MAX_OFFSET_MAX
))) {
2964 arm_pmap_max_offset_default
= maxoffset
;
2967 #if defined(__arm64__)
2968 if (PE_parse_boot_argn("arm64_maxoffset", &maxoffset
, sizeof (maxoffset
))) {
2969 maxoffset
= trunc_page(maxoffset
);
2970 if ((maxoffset
>= pmap_max_offset(TRUE
, ARM_PMAP_MAX_OFFSET_MIN
))
2971 && (maxoffset
<= pmap_max_offset(TRUE
, ARM_PMAP_MAX_OFFSET_MAX
))) {
2972 arm64_pmap_max_offset_default
= maxoffset
;
2977 #if DEVELOPMENT || DEBUG
2978 PE_parse_boot_argn("panic_on_unsigned_execute", &panic_on_unsigned_execute
, sizeof (panic_on_unsigned_execute
));
2979 #endif /* DEVELOPMENT || DEBUG */
2981 pmap_nesting_size_min
= ARM_NESTING_SIZE_MIN
;
2982 pmap_nesting_size_max
= ARM_NESTING_SIZE_MAX
;
2984 simple_lock_init(&phys_backup_lock
, 0);
2987 PE_parse_boot_argn("pmap_stats_assert",
2989 sizeof (pmap_stats_assert
));
2990 #endif /* MACH_ASSERT */
2993 /* Shadow the CPU copy windows, as they fall outside of the physical aperture */
2994 kasan_map_shadow(CPUWINDOWS_BASE
, CPUWINDOWS_TOP
- CPUWINDOWS_BASE
, true);
3001 vm_offset_t
*startp
,
3005 *startp
= virtual_space_start
;
3006 *endp
= virtual_space_end
;
3011 pmap_virtual_region(
3012 unsigned int region_select
,
3013 vm_map_offset_t
*startp
,
3017 boolean_t ret
= FALSE
;
3018 #if __ARM64_PMAP_SUBPAGE_L1__ && __ARM_16K_PG__
3019 if (region_select
== 0) {
3021 * In this config, the bootstrap mappings should occupy their own L2
3022 * TTs, as they should be immutable after boot. Having the associated
3023 * TTEs and PTEs in their own pages allows us to lock down those pages,
3024 * while allowing the rest of the kernel address range to be remapped.
3026 #if (__ARM_VMSA__ > 7)
3027 *startp
= LOW_GLOBAL_BASE_ADDRESS
& ~ARM_TT_L2_OFFMASK
;
3029 #error Unsupported configuration
3031 *size
= ((VM_MAX_KERNEL_ADDRESS
- *startp
) & ~PAGE_MASK
);
3035 #if (__ARM_VMSA__ > 7)
3036 unsigned long low_global_vr_mask
= 0;
3037 vm_map_size_t low_global_vr_size
= 0;
3040 if (region_select
== 0) {
3041 #if (__ARM_VMSA__ == 7)
3042 *startp
= gVirtBase
& 0xFFC00000;
3043 *size
= ((virtual_space_start
-(gVirtBase
& 0xFFC00000)) + ~0xFFC00000) & 0xFFC00000;
3045 /* Round to avoid overlapping with the V=P area; round to at least the L2 block size. */
3046 if (!TEST_PAGE_SIZE_4K
) {
3047 *startp
= gVirtBase
& 0xFFFFFFFFFE000000;
3048 *size
= ((virtual_space_start
-(gVirtBase
& 0xFFFFFFFFFE000000)) + ~0xFFFFFFFFFE000000) & 0xFFFFFFFFFE000000;
3050 *startp
= gVirtBase
& 0xFFFFFFFFFF800000;
3051 *size
= ((virtual_space_start
-(gVirtBase
& 0xFFFFFFFFFF800000)) + ~0xFFFFFFFFFF800000) & 0xFFFFFFFFFF800000;
3056 if (region_select
== 1) {
3057 *startp
= VREGION1_START
;
3058 *size
= VREGION1_SIZE
;
3061 #if (__ARM_VMSA__ > 7)
3062 /* We need to reserve a range that is at least the size of an L2 block mapping for the low globals */
3063 if (!TEST_PAGE_SIZE_4K
) {
3064 low_global_vr_mask
= 0xFFFFFFFFFE000000;
3065 low_global_vr_size
= 0x2000000;
3067 low_global_vr_mask
= 0xFFFFFFFFFF800000;
3068 low_global_vr_size
= 0x800000;
3071 if (((gVirtBase
& low_global_vr_mask
) != LOW_GLOBAL_BASE_ADDRESS
) && (region_select
== 2)) {
3072 *startp
= LOW_GLOBAL_BASE_ADDRESS
;
3073 *size
= low_global_vr_size
;
3077 if (region_select
== 3) {
3078 /* In this config, we allow the bootstrap mappings to occupy the same
3079 * page table pages as the heap.
3081 *startp
= VM_MIN_KERNEL_ADDRESS
;
3082 *size
= LOW_GLOBAL_BASE_ADDRESS
- *startp
;
3094 return (unsigned int)atop(avail_end
- first_avail
);
3102 return pmap_next_page(pnum
);
3110 if (first_avail
!= avail_end
) {
3111 *pnum
= (ppnum_t
)atop(first_avail
);
3112 first_avail
+= PAGE_SIZE
;
3120 * Initialize the pmap module.
3121 * Called by vm_init, to initialize any structures that the pmap
3122 * system needs to map virtual memory.
3129 * Protect page zero in the kernel map.
3130 * (can be overruled by permanent transltion
3131 * table entries at page zero - see arm_vm_init).
3133 vm_protect(kernel_map
, 0, PAGE_SIZE
, TRUE
, VM_PROT_NONE
);
3135 pmap_initialized
= TRUE
;
3141 * Initialize the pmap object (for tracking the vm_page_t
3142 * structures for pages we allocate to be page tables in
3145 _vm_object_allocate(mem_size
, pmap_object
);
3146 pmap_object
->copy_strategy
= MEMORY_OBJECT_COPY_NONE
;
3151 * The value of hard_maxproc may have been scaled, make sure
3152 * it is still less than the value of MAX_ASID.
3154 assert(hard_maxproc
< MAX_ASID
);
3157 pmap_pgtrace_init();
3167 boolean_t result
= TRUE
;
3168 pmap_paddr_t phys
= ptoa(ppnum
);
3170 assert(phys
!= vm_page_fictitious_addr
);
3172 if (!pa_valid(phys
))
3175 pai
= (int)pa_index(phys
);
3176 pv_h
= pai_to_pvh(pai
);
3178 result
= (pvh_list(pv_h
) == PV_ENTRY_NULL
);
3185 * Initialize zones used by pmap.
3192 * Create the zone of physical maps
3193 * and the physical-to-virtual entries.
3195 pmap_zone
= zinit((vm_size_t
) sizeof(struct pmap
), (vm_size_t
) sizeof(struct pmap
)*256,
3201 * Create and return a physical map.
3203 * If the size specified for the map
3204 * is zero, the map is an actual physical
3205 * map, and may be referenced by the
3208 * If the size specified is non-zero,
3209 * the map will be used in software only, and
3210 * is bounded by that size.
3213 pmap_create_internal(
3222 * A software use-only map doesn't even need a pmap.
3230 * Allocate a pmap struct from the pmap_zone. Then allocate
3231 * the translation table of the right size for the pmap.
3233 if ((p
= (pmap_t
) zalloc(pmap_zone
)) == PMAP_NULL
)
3237 p
->min
= MACH_VM_MIN_ADDRESS
;
3238 p
->max
= MACH_VM_MAX_ADDRESS
;
3240 p
->min
= VM_MIN_ADDRESS
;
3241 p
->max
= VM_MAX_ADDRESS
;
3247 p
->stamp
= hw_atomic_add(&pmap_stamp
, 1);
3248 p
->nx_enabled
= TRUE
;
3249 p
->is_64bit
= is_64bit
;
3251 p
->nested_pmap
= PMAP_NULL
;
3254 ledger_reference(ledger
);
3258 #if (__ARM_VMSA__ == 7)
3259 simple_lock_init(&p
->tt1_lock
, 0);
3261 memset((void *) &p
->stats
, 0, sizeof(p
->stats
));
3263 p
->tt_entry_free
= (tt_entry_t
*)0;
3265 p
->tte
= pmap_tt1_allocate(p
, PMAP_ROOT_ALLOC_SIZE
, 0);
3266 p
->ttep
= ml_static_vtop((vm_offset_t
)p
->tte
);
3268 #if (__ARM_VMSA__ == 7)
3269 p
->tte_index_max
= NTTES
;
3271 p
->tte_index_max
= (PMAP_ROOT_ALLOC_SIZE
/ sizeof(tt_entry_t
));
3273 p
->prev_tte
= (tt_entry_t
*) NULL
;
3276 /* nullify the translation table */
3277 for (i
= 0; i
< p
->tte_index_max
; i
++)
3278 p
->tte
[i
] = ARM_TTE_TYPE_FAULT
;
3280 #ifndef __ARM_L1_PTW__
3281 CleanPoU_DcacheRegion((vm_offset_t
) (p
->tte
), PMAP_ROOT_ALLOC_SIZE
);
3283 __asm__
volatile("dsb ish");
3286 p
->vasid
= alloc_asid();
3287 p
->asid
= p
->vasid
% ARM_MAX_ASID
;
3290 * initialize the rest of the structure
3292 p
->nested_region_grand_addr
= 0x0ULL
;
3293 p
->nested_region_subord_addr
= 0x0ULL
;
3294 p
->nested_region_size
= 0x0ULL
;
3295 p
->nested_region_asid_bitmap
= NULL
;
3296 p
->nested_region_asid_bitmap_size
= 0x0UL
;
3299 p
->pmap_stats_assert
= TRUE
;
3301 strlcpy(p
->pmap_procname
, "<nil>", sizeof (p
->pmap_procname
));
3302 #endif /* MACH_ASSERT */
3303 #if DEVELOPMENT || DEBUG
3304 p
->footprint_suspended
= FALSE
;
3305 p
->footprint_was_suspended
= FALSE
;
3306 #endif /* DEVELOPMENT || DEBUG */
3308 simple_lock(&pmaps_lock
);
3309 queue_enter(&map_pmap_list
, p
, pmap_t
, pmaps
);
3310 simple_unlock(&pmaps_lock
);
3323 PMAP_TRACE(PMAP_CODE(PMAP__CREATE
) | DBG_FUNC_START
, size
, is_64bit
);
3325 pmap
= pmap_create_internal(ledger
, size
, is_64bit
);
3327 PMAP_TRACE(PMAP_CODE(PMAP__CREATE
) | DBG_FUNC_END
,
3328 VM_KERNEL_ADDRHIDE(pmap
));
3335 pmap_set_process_internal(
3336 __unused pmap_t pmap
,
3338 __unused
char *procname
)
3345 pmap
->pmap_pid
= pid
;
3346 strlcpy(pmap
->pmap_procname
, procname
, sizeof (pmap
->pmap_procname
));
3347 if (!strncmp(procname
, "corecaptured", sizeof (pmap
->pmap_procname
))) {
3350 * "corecaptured" somehow triggers some issues that make
3351 * the pmap stats and ledgers to go off track, causing
3352 * some assertion failures and ledger panics.
3353 * Turn that off if the terminating process is "corecaptured".
3355 pmap
->pmap_stats_assert
= FALSE
;
3356 ledger_disable_panic_on_negative(pmap
->ledger
,
3357 task_ledgers
.phys_footprint
);
3358 ledger_disable_panic_on_negative(pmap
->ledger
,
3359 task_ledgers
.internal
);
3360 ledger_disable_panic_on_negative(pmap
->ledger
,
3361 task_ledgers
.internal_compressed
);
3362 ledger_disable_panic_on_negative(pmap
->ledger
,
3363 task_ledgers
.iokit_mapped
);
3364 ledger_disable_panic_on_negative(pmap
->ledger
,
3365 task_ledgers
.alternate_accounting
);
3366 ledger_disable_panic_on_negative(pmap
->ledger
,
3367 task_ledgers
.alternate_accounting_compressed
);
3369 #endif /* MACH_ASSERT */
3371 #endif /* MACH_ASSERT*/
3380 pmap_set_process_internal(pmap
, pid
, procname
);
3384 * We maintain stats and ledgers so that a task's physical footprint is:
3385 * phys_footprint = ((internal - alternate_accounting)
3386 * + (internal_compressed - alternate_accounting_compressed)
3388 * + purgeable_nonvolatile
3389 * + purgeable_nonvolatile_compressed
3391 * where "alternate_accounting" includes "iokit" and "purgeable" memory.
3395 uint64_t num_pmaps_checked
;
3397 int phys_footprint_over
;
3398 ledger_amount_t phys_footprint_over_total
;
3399 ledger_amount_t phys_footprint_over_max
;
3400 int phys_footprint_under
;
3401 ledger_amount_t phys_footprint_under_total
;
3402 ledger_amount_t phys_footprint_under_max
;
3405 ledger_amount_t internal_over_total
;
3406 ledger_amount_t internal_over_max
;
3408 ledger_amount_t internal_under_total
;
3409 ledger_amount_t internal_under_max
;
3411 int internal_compressed_over
;
3412 ledger_amount_t internal_compressed_over_total
;
3413 ledger_amount_t internal_compressed_over_max
;
3414 int internal_compressed_under
;
3415 ledger_amount_t internal_compressed_under_total
;
3416 ledger_amount_t internal_compressed_under_max
;
3418 int iokit_mapped_over
;
3419 ledger_amount_t iokit_mapped_over_total
;
3420 ledger_amount_t iokit_mapped_over_max
;
3421 int iokit_mapped_under
;
3422 ledger_amount_t iokit_mapped_under_total
;
3423 ledger_amount_t iokit_mapped_under_max
;
3425 int alternate_accounting_over
;
3426 ledger_amount_t alternate_accounting_over_total
;
3427 ledger_amount_t alternate_accounting_over_max
;
3428 int alternate_accounting_under
;
3429 ledger_amount_t alternate_accounting_under_total
;
3430 ledger_amount_t alternate_accounting_under_max
;
3432 int alternate_accounting_compressed_over
;
3433 ledger_amount_t alternate_accounting_compressed_over_total
;
3434 ledger_amount_t alternate_accounting_compressed_over_max
;
3435 int alternate_accounting_compressed_under
;
3436 ledger_amount_t alternate_accounting_compressed_under_total
;
3437 ledger_amount_t alternate_accounting_compressed_under_max
;
3439 int page_table_over
;
3440 ledger_amount_t page_table_over_total
;
3441 ledger_amount_t page_table_over_max
;
3442 int page_table_under
;
3443 ledger_amount_t page_table_under_total
;
3444 ledger_amount_t page_table_under_max
;
3446 int purgeable_volatile_over
;
3447 ledger_amount_t purgeable_volatile_over_total
;
3448 ledger_amount_t purgeable_volatile_over_max
;
3449 int purgeable_volatile_under
;
3450 ledger_amount_t purgeable_volatile_under_total
;
3451 ledger_amount_t purgeable_volatile_under_max
;
3453 int purgeable_nonvolatile_over
;
3454 ledger_amount_t purgeable_nonvolatile_over_total
;
3455 ledger_amount_t purgeable_nonvolatile_over_max
;
3456 int purgeable_nonvolatile_under
;
3457 ledger_amount_t purgeable_nonvolatile_under_total
;
3458 ledger_amount_t purgeable_nonvolatile_under_max
;
3460 int purgeable_volatile_compressed_over
;
3461 ledger_amount_t purgeable_volatile_compressed_over_total
;
3462 ledger_amount_t purgeable_volatile_compressed_over_max
;
3463 int purgeable_volatile_compressed_under
;
3464 ledger_amount_t purgeable_volatile_compressed_under_total
;
3465 ledger_amount_t purgeable_volatile_compressed_under_max
;
3467 int purgeable_nonvolatile_compressed_over
;
3468 ledger_amount_t purgeable_nonvolatile_compressed_over_total
;
3469 ledger_amount_t purgeable_nonvolatile_compressed_over_max
;
3470 int purgeable_nonvolatile_compressed_under
;
3471 ledger_amount_t purgeable_nonvolatile_compressed_under_total
;
3472 ledger_amount_t purgeable_nonvolatile_compressed_under_max
;
3473 } pmap_ledgers_drift
;
3474 #endif /* MACH_ASSERT */
3477 * Retire the given physical map from service.
3478 * Should only be called if the map contains
3479 * no valid mappings.
3482 pmap_destroy_internal(
3485 #if (__ARM_VMSA__ == 7)
3488 pmap_t tmp_pmap
, tt_pmap
;
3489 queue_head_t tmp_pmap_list
;
3491 queue_init(&tmp_pmap_list
);
3492 simple_lock(&pmaps_lock
);
3493 tt_pmap
= CAST_DOWN_EXPLICIT(pmap_t
, queue_first(&tt_pmap_list
));
3494 while (!queue_end(&tt_pmap_list
, (queue_entry_t
)tt_pmap
)) {
3495 if (tt_pmap
->cpu_ref
== 0 ) {
3497 tt_pmap
= CAST_DOWN_EXPLICIT(pmap_t
, queue_next(&tmp_pmap
->pmaps
));
3498 queue_remove(&tt_pmap_list
, tmp_pmap
, pmap_t
, pmaps
);
3500 queue_enter(&tmp_pmap_list
, tmp_pmap
, pmap_t
, pmaps
);
3503 tt_pmap
= CAST_DOWN_EXPLICIT(pmap_t
, queue_next(&tmp_pmap
->pmaps
));
3506 simple_unlock(&pmaps_lock
);
3508 tmp_pmap
= CAST_DOWN_EXPLICIT(pmap_t
, queue_first(&tmp_pmap_list
));
3509 while (!queue_end(&tmp_pmap_list
, (queue_entry_t
)tmp_pmap
)) {
3511 tmp_pmap
= CAST_DOWN_EXPLICIT(pmap_t
, queue_next(&tt_pmap
->pmaps
));
3512 queue_remove(&tmp_pmap_list
, tt_pmap
, pmap_t
, pmaps
);
3514 pmap_tt1_deallocate(pmap
, tt_pmap
->tte
, tt_pmap
->tte_index_max
*sizeof(tt_entry_t
), 0);
3515 tt_pmap
->tte
= (tt_entry_t
*) NULL
;
3517 tt_pmap
->tte_index_max
= 0;
3519 if (tt_pmap
->prev_tte
) {
3520 pmap_tt1_deallocate(pmap
, tt_pmap
->prev_tte
, PMAP_ROOT_ALLOC_SIZE
, 0);
3521 tt_pmap
->prev_tte
= (tt_entry_t
*) NULL
;
3523 assert((tt_free_entry_t
*)pmap
->tt_entry_free
== NULL
);
3524 free_asid(tt_pmap
->vasid
);
3526 pmap_check_ledgers(tt_pmap
);
3527 ledger_dereference(tt_pmap
->ledger
);
3529 zfree(pmap_zone
, tt_pmap
);
3532 if (pmap
== PMAP_NULL
)
3535 if (hw_atomic_sub(&pmap
->ref_count
, 1) != 0)
3538 simple_lock(&pmaps_lock
);
3540 while (pmap
->gc_status
& PMAP_GC_INFLIGHT
) {
3541 pmap
->gc_status
|= PMAP_GC_WAIT
;
3542 assert_wait((event_t
) & pmap
->gc_status
, THREAD_UNINT
);
3543 simple_unlock(&pmaps_lock
);
3544 (void) thread_block(THREAD_CONTINUE_NULL
);
3545 simple_lock(&pmaps_lock
);
3549 queue_remove(&map_pmap_list
, pmap
, pmap_t
, pmaps
);
3550 simple_unlock(&pmaps_lock
);
3553 * Free the memory maps, then the
3557 for (i
= 0; i
< pmap
->tte_index_max
; i
++) {
3558 ttep
= &pmap
->tte
[i
];
3559 if ((*ttep
& ARM_TTE_TYPE_MASK
) == ARM_TTE_TYPE_TABLE
) {
3560 pmap_tte_deallocate(pmap
, ttep
, PMAP_TT_L1_LEVEL
);
3561 flush_mmu_tlb_entry((i
<<ARM_TT_L1_SHIFT
) | (pmap
->asid
& 0xff));
3566 if (pmap
->cpu_ref
== 0) {
3568 pmap_tt1_deallocate(pmap
, pmap
->tte
, pmap
->tte_index_max
*sizeof(tt_entry_t
), 0);
3569 pmap
->tte
= (tt_entry_t
*) NULL
;
3571 pmap
->tte_index_max
= 0;
3573 if (pmap
->prev_tte
) {
3574 pmap_tt1_deallocate(pmap
, pmap
->prev_tte
, PMAP_ROOT_ALLOC_SIZE
, 0);
3575 pmap
->prev_tte
= (tt_entry_t
*) NULL
;
3577 assert((tt_free_entry_t
*)pmap
->tt_entry_free
== NULL
);
3579 /* return its asid to the pool */
3580 free_asid(pmap
->vasid
);
3581 pmap_check_ledgers(pmap
);
3583 ledger_dereference(pmap
->ledger
);
3584 if (pmap
->nested_region_asid_bitmap
)
3585 kfree(pmap
->nested_region_asid_bitmap
, pmap
->nested_region_asid_bitmap_size
*sizeof(unsigned int));
3586 zfree(pmap_zone
, pmap
);
3588 simple_lock(&pmaps_lock
);
3589 queue_enter(&tt_pmap_list
, pmap
, pmap_t
, pmaps
);
3591 if (tt_pmap_count
> tt_pmap_max
)
3592 tt_pmap_max
= tt_pmap_count
;
3593 simple_unlock(&pmaps_lock
);
3600 if (pmap
== PMAP_NULL
) {
3604 pmap_unmap_sharedpage(pmap
);
3606 if (hw_atomic_sub(&pmap
->ref_count
, 1) == 0) {
3608 simple_lock(&pmaps_lock
);
3609 while (pmap
->gc_status
& PMAP_GC_INFLIGHT
) {
3610 pmap
->gc_status
|= PMAP_GC_WAIT
;
3611 assert_wait((event_t
) & pmap
->gc_status
, THREAD_UNINT
);
3612 simple_unlock(&pmaps_lock
);
3613 (void) thread_block(THREAD_CONTINUE_NULL
);
3614 simple_lock(&pmaps_lock
);
3616 queue_remove(&map_pmap_list
, pmap
, pmap_t
, pmaps
);
3617 simple_unlock(&pmaps_lock
);
3620 * Free the memory maps, then the
3623 for (c
= pmap
->min
; c
< pmap
->max
; c
+= ARM_TT_L2_SIZE
) {
3624 ttep
= pmap_tt2e(pmap
, c
);
3625 if ((ttep
!= PT_ENTRY_NULL
) && (*ttep
& ARM_TTE_TYPE_MASK
) == ARM_TTE_TYPE_TABLE
) {
3627 pmap_tte_deallocate(pmap
, ttep
, PMAP_TT_L2_LEVEL
);
3629 flush_mmu_tlb_entry(tlbi_addr(c
) | tlbi_asid(pmap
->asid
));
3632 #if !__ARM64_TWO_LEVEL_PMAP__
3633 for (c
= pmap
->min
; c
< pmap
->max
; c
+= ARM_TT_L1_SIZE
) {
3634 ttep
= pmap_tt1e(pmap
, c
);
3635 if ((ttep
!= PT_ENTRY_NULL
) && (*ttep
& ARM_TTE_TYPE_MASK
) == ARM_TTE_TYPE_TABLE
) {
3637 pmap_tte_deallocate(pmap
, ttep
, PMAP_TT_L1_LEVEL
);
3645 pmap_tt1_deallocate(pmap
, (tt_entry_t
*)phystokv(pa
), PMAP_ROOT_ALLOC_SIZE
, 0);
3649 assert((tt_free_entry_t
*)pmap
->tt_entry_free
== NULL
);
3650 flush_mmu_tlb_asid((uint64_t)(pmap
->asid
) << TLBI_ASID_SHIFT
);
3651 free_asid(pmap
->vasid
);
3653 if (pmap
->nested_region_asid_bitmap
) {
3654 kfree(pmap
->nested_region_asid_bitmap
, pmap
->nested_region_asid_bitmap_size
*sizeof(unsigned int));
3657 pmap_check_ledgers(pmap
);
3658 ledger_dereference(pmap
->ledger
);
3660 zfree(pmap_zone
, pmap
);
3670 PMAP_TRACE(PMAP_CODE(PMAP__DESTROY
) | DBG_FUNC_START
,
3671 VM_KERNEL_ADDRHIDE(pmap
));
3673 pmap_destroy_internal(pmap
);
3675 PMAP_TRACE(PMAP_CODE(PMAP__DESTROY
) | DBG_FUNC_END
);
3680 * Add a reference to the specified pmap.
3683 pmap_reference_internal(
3686 if (pmap
!= PMAP_NULL
) {
3687 (void) hw_atomic_add(&pmap
->ref_count
, 1);
3695 pmap_reference_internal(pmap
);
3705 tt_free_entry_t
*tt1_free
;
3708 vm_address_t va_end
;
3711 simple_lock(&pmaps_lock
);
3712 if ((size
== PAGE_SIZE
) && (free_page_size_tt_count
!= 0)) {
3713 free_page_size_tt_count
--;
3714 tt1
= (tt_entry_t
*)free_page_size_tt_list
;
3715 free_page_size_tt_list
= ((tt_free_entry_t
*)tt1
)->next
;
3716 simple_unlock(&pmaps_lock
);
3717 pmap_tt_ledger_credit(pmap
, size
);
3718 return (tt_entry_t
*)tt1
;
3720 if ((size
== 2*PAGE_SIZE
) && (free_two_page_size_tt_count
!= 0)) {
3721 free_two_page_size_tt_count
--;
3722 tt1
= (tt_entry_t
*)free_two_page_size_tt_list
;
3723 free_two_page_size_tt_list
= ((tt_free_entry_t
*)tt1
)->next
;
3724 simple_unlock(&pmaps_lock
);
3725 pmap_tt_ledger_credit(pmap
, size
);
3726 return (tt_entry_t
*)tt1
;
3728 if (free_tt_count
!= 0) {
3730 tt1
= (tt_entry_t
*)free_tt_list
;
3731 free_tt_list
= (tt_free_entry_t
*)((tt_free_entry_t
*)tt1
)->next
;
3732 simple_unlock(&pmaps_lock
);
3733 pmap_tt_ledger_credit(pmap
, size
);
3734 return (tt_entry_t
*)tt1
;
3737 simple_unlock(&pmaps_lock
);
3739 ret
= pmap_pages_alloc(&pa
, (unsigned)((size
< PAGE_SIZE
)? PAGE_SIZE
: size
), ((option
& PMAP_TT_ALLOCATE_NOWAIT
)? PMAP_PAGES_ALLOCATE_NOWAIT
: 0));
3741 if(ret
== KERN_RESOURCE_SHORTAGE
)
3742 return (tt_entry_t
*)0;
3745 if (size
< PAGE_SIZE
) {
3746 simple_lock(&pmaps_lock
);
3748 for (va_end
= phystokv(pa
) + PAGE_SIZE
, va
= phystokv(pa
) + size
; va
< va_end
; va
= va
+size
) {
3749 tt1_free
= (tt_free_entry_t
*)va
;
3750 tt1_free
->next
= free_tt_list
;
3751 free_tt_list
= tt1_free
;
3754 if (free_tt_count
> free_tt_max
)
3755 free_tt_max
= free_tt_count
;
3757 simple_unlock(&pmaps_lock
);
3760 /* Always report root allocations in units of PMAP_ROOT_ALLOC_SIZE, which can be obtained by sysctl arm_pt_root_size.
3761 * Depending on the device, this can vary between 512b and 16K. */
3762 OSAddAtomic((uint32_t)(size
/ PMAP_ROOT_ALLOC_SIZE
), (pmap
== kernel_pmap
? &inuse_kernel_tteroot_count
: &inuse_user_tteroot_count
));
3763 OSAddAtomic64(size
/ PMAP_ROOT_ALLOC_SIZE
, &alloc_tteroot_count
);
3764 pmap_tt_ledger_credit(pmap
, size
);
3766 return (tt_entry_t
*) phystokv(pa
);
3770 pmap_tt1_deallocate(
3776 tt_free_entry_t
*tt_entry
;
3778 tt_entry
= (tt_free_entry_t
*)tt
;
3780 simple_lock(&pmaps_lock
);
3782 if (size
< PAGE_SIZE
) {
3784 if (free_tt_count
> free_tt_max
)
3785 free_tt_max
= free_tt_count
;
3786 tt_entry
->next
= free_tt_list
;
3787 free_tt_list
= tt_entry
;
3790 if (size
== PAGE_SIZE
) {
3791 free_page_size_tt_count
++;
3792 if (free_page_size_tt_count
> free_page_size_tt_max
)
3793 free_page_size_tt_max
= free_page_size_tt_count
;
3794 tt_entry
->next
= free_page_size_tt_list
;
3795 free_page_size_tt_list
= tt_entry
;
3798 if (size
== 2*PAGE_SIZE
) {
3799 free_two_page_size_tt_count
++;
3800 if (free_two_page_size_tt_count
> free_two_page_size_tt_max
)
3801 free_two_page_size_tt_max
= free_two_page_size_tt_count
;
3802 tt_entry
->next
= free_two_page_size_tt_list
;
3803 free_two_page_size_tt_list
= tt_entry
;
3806 if ((option
& PMAP_TT_DEALLOCATE_NOBLOCK
) || (!not_in_kdp
)) {
3808 simple_unlock(&pmaps_lock
);
3809 pmap_tt_ledger_debit(pmap
, size
);
3813 while (free_page_size_tt_count
> FREE_PAGE_SIZE_TT_MAX
) {
3815 free_page_size_tt_count
--;
3816 tt
= (tt_entry_t
*)free_page_size_tt_list
;
3817 free_page_size_tt_list
= ((tt_free_entry_t
*)tt
)->next
;
3819 simple_unlock(&pmaps_lock
);
3821 pmap_pages_free(ml_static_vtop((vm_offset_t
)tt
), PAGE_SIZE
);
3823 OSAddAtomic(-(int32_t)(PAGE_SIZE
/ PMAP_ROOT_ALLOC_SIZE
), (pmap
== kernel_pmap
? &inuse_kernel_tteroot_count
: &inuse_user_tteroot_count
));
3825 simple_lock(&pmaps_lock
);
3828 while (free_two_page_size_tt_count
> FREE_TWO_PAGE_SIZE_TT_MAX
) {
3829 free_two_page_size_tt_count
--;
3830 tt
= (tt_entry_t
*)free_two_page_size_tt_list
;
3831 free_two_page_size_tt_list
= ((tt_free_entry_t
*)tt
)->next
;
3833 simple_unlock(&pmaps_lock
);
3835 pmap_pages_free(ml_static_vtop((vm_offset_t
)tt
), 2*PAGE_SIZE
);
3837 OSAddAtomic(-2 * (int32_t)(PAGE_SIZE
/ PMAP_ROOT_ALLOC_SIZE
), (pmap
== kernel_pmap
? &inuse_kernel_tteroot_count
: &inuse_user_tteroot_count
));
3839 simple_lock(&pmaps_lock
);
3841 simple_unlock(&pmaps_lock
);
3842 pmap_tt_ledger_debit(pmap
, size
);
3845 static kern_return_t
3850 unsigned int options
)
3856 if ((tt_free_entry_t
*)pmap
->tt_entry_free
!= NULL
) {
3857 tt_free_entry_t
*tt_free_next
;
3859 tt_free_next
= ((tt_free_entry_t
*)pmap
->tt_entry_free
)->next
;
3860 *ttp
= (tt_entry_t
*)pmap
->tt_entry_free
;
3861 pmap
->tt_entry_free
= (tt_entry_t
*)tt_free_next
;
3869 * Allocate a VM page for the level x page table entries.
3871 while (pmap_pages_alloc(&pa
, PAGE_SIZE
, ((options
& PMAP_TT_ALLOCATE_NOWAIT
)? PMAP_PAGES_ALLOCATE_NOWAIT
: 0)) != KERN_SUCCESS
) {
3872 if(options
& PMAP_OPTIONS_NOWAIT
) {
3873 return KERN_RESOURCE_SHORTAGE
;
3878 if (level
< PMAP_TT_MAX_LEVEL
) {
3879 OSAddAtomic64(1, &alloc_ttepages_count
);
3880 OSAddAtomic(1, (pmap
== kernel_pmap
? &inuse_kernel_ttepages_count
: &inuse_user_ttepages_count
));
3882 OSAddAtomic64(1, &alloc_ptepages_count
);
3883 OSAddAtomic(1, (pmap
== kernel_pmap
? &inuse_kernel_ptepages_count
: &inuse_user_ptepages_count
));
3886 pmap_tt_ledger_credit(pmap
, PAGE_SIZE
);
3888 PMAP_ZINFO_PALLOC(pmap
, PAGE_SIZE
);
3890 ptdp
= ptd_alloc(pmap
);
3891 *(pt_desc_t
**)pai_to_pvh(pa_index(pa
)) = ptdp
;
3893 __unreachable_ok_push
3894 if (TEST_PAGE_RATIO_4
) {
3896 vm_address_t va_end
;
3900 for (va_end
= phystokv(pa
) + PAGE_SIZE
, va
= phystokv(pa
) + ARM_PGBYTES
; va
< va_end
; va
= va
+ARM_PGBYTES
) {
3901 ((tt_free_entry_t
*)va
)->next
= (tt_free_entry_t
*)pmap
->tt_entry_free
;
3902 pmap
->tt_entry_free
= (tt_entry_t
*)va
;
3906 __unreachable_ok_pop
3908 *ttp
= (tt_entry_t
*)phystokv(pa
);
3912 return KERN_SUCCESS
;
3923 unsigned pt_acc_cnt
;
3924 unsigned i
, max_pt_index
= PAGE_RATIO
;
3925 vm_offset_t free_page
=0;
3929 ptdp
= ptep_get_ptd((vm_offset_t
)ttp
);
3931 if (level
< PMAP_TT_MAX_LEVEL
) {
3933 if (ptdp
->pt_cnt
[ARM_PT_DESC_INDEX(ttp
)].refcnt
== PT_DESC_REFCOUNT
)
3934 ptdp
->pt_cnt
[ARM_PT_DESC_INDEX(ttp
)].refcnt
= 0;
3937 ptdp
->pt_map
[ARM_PT_DESC_INDEX(ttp
)].va
= 0;
3939 if (ptdp
->pt_cnt
[ARM_PT_DESC_INDEX(ttp
)].refcnt
!= 0)
3940 panic("pmap_tt_deallocate(): ptdp %p, count %d\n", ptdp
, ptdp
->pt_cnt
[ARM_PT_DESC_INDEX(ttp
)].refcnt
);
3942 for (i
= 0, pt_acc_cnt
= 0 ; i
< max_pt_index
; i
++)
3943 pt_acc_cnt
+= ptdp
->pt_cnt
[i
].refcnt
;
3945 if (pt_acc_cnt
== 0) {
3946 tt_free_entry_t
*tt_free_list
= (tt_free_entry_t
*)&pmap
->tt_entry_free
;
3947 unsigned pt_free_entry_cnt
= 1;
3949 while (pt_free_entry_cnt
< max_pt_index
&& tt_free_list
) {
3950 tt_free_entry_t
*tt_free_list_next
;
3952 tt_free_list_next
= tt_free_list
->next
;
3953 if ((((vm_offset_t
)tt_free_list_next
) - ((vm_offset_t
)ttp
& ~PAGE_MASK
)) < PAGE_SIZE
) {
3954 pt_free_entry_cnt
++;
3956 tt_free_list
= tt_free_list_next
;
3958 if (pt_free_entry_cnt
== max_pt_index
) {
3959 tt_free_entry_t
*tt_free_list_cur
;
3961 free_page
= (vm_offset_t
)ttp
& ~PAGE_MASK
;
3962 tt_free_list
= (tt_free_entry_t
*)&pmap
->tt_entry_free
;
3963 tt_free_list_cur
= (tt_free_entry_t
*)&pmap
->tt_entry_free
;
3965 while (tt_free_list_cur
) {
3966 tt_free_entry_t
*tt_free_list_next
;
3968 tt_free_list_next
= tt_free_list_cur
->next
;
3969 if ((((vm_offset_t
)tt_free_list_next
) - free_page
) < PAGE_SIZE
) {
3970 tt_free_list
->next
= tt_free_list_next
->next
;
3972 tt_free_list
= tt_free_list_next
;
3974 tt_free_list_cur
= tt_free_list_next
;
3977 ((tt_free_entry_t
*)ttp
)->next
= (tt_free_entry_t
*)pmap
->tt_entry_free
;
3978 pmap
->tt_entry_free
= ttp
;
3981 ((tt_free_entry_t
*)ttp
)->next
= (tt_free_entry_t
*)pmap
->tt_entry_free
;
3982 pmap
->tt_entry_free
= ttp
;
3987 if (free_page
!= 0) {
3989 ptd_deallocate(ptep_get_ptd((vm_offset_t
)free_page
));
3990 *(pt_desc_t
**)pai_to_pvh(pa_index(ml_static_vtop(free_page
))) = NULL
;
3991 pmap_pages_free(ml_static_vtop(free_page
), PAGE_SIZE
);
3992 if (level
< PMAP_TT_MAX_LEVEL
)
3993 OSAddAtomic(-1, (pmap
== kernel_pmap
? &inuse_kernel_ttepages_count
: &inuse_user_ttepages_count
));
3995 OSAddAtomic(-1, (pmap
== kernel_pmap
? &inuse_kernel_ptepages_count
: &inuse_user_ptepages_count
));
3996 PMAP_ZINFO_PFREE(pmap
, PAGE_SIZE
);
3997 pmap_tt_ledger_debit(pmap
, PAGE_SIZE
);
4002 pmap_tte_deallocate(
4010 PMAP_ASSERT_LOCKED(pmap
);
4015 panic("pmap_tte_deallocate(): null tt_entry ttep==%p\n", ttep
);
4019 if (tte_get_ptd(tte
)->pmap
!= pmap
) {
4020 panic("pmap_tte_deallocate(): ptd=%p ptd->pmap=%p pmap=%p \n",
4021 tte_get_ptd(tte
), tte_get_ptd(tte
)->pmap
, pmap
);
4024 if (((level
+1) == PMAP_TT_MAX_LEVEL
) && (tte_get_ptd(tte
)->pt_cnt
[ARM_PT_DESC_INDEX(ttetokv(*ttep
))].refcnt
!= 0)) {
4025 panic("pmap_tte_deallocate(): pmap=%p ttep=%p ptd=%p refcnt=0x%x \n", pmap
, ttep
,
4026 tte_get_ptd(tte
), (tte_get_ptd(tte
)->pt_cnt
[ARM_PT_DESC_INDEX(ttetokv(*ttep
))].refcnt
));
4029 #if (__ARM_VMSA__ == 7)
4031 tt_entry_t
*ttep_4M
= (tt_entry_t
*) ((vm_offset_t
)ttep
& 0xFFFFFFF0);
4034 for (i
= 0; i
<4; i
++, ttep_4M
++)
4035 *ttep_4M
= (tt_entry_t
) 0;
4038 *ttep
= (tt_entry_t
) 0;
4041 #ifndef __ARM_L1_PTW__
4042 CleanPoU_DcacheRegion((vm_offset_t
) ttep
, sizeof(tt_entry_t
));
4044 __asm__
volatile("dsb ish");
4046 if ((tte
& ARM_TTE_TYPE_MASK
) == ARM_TTE_TYPE_TABLE
) {
4049 pt_entry_t
*pte_p
= ((pt_entry_t
*) (ttetokv(tte
) & ~ARM_PGMASK
));
4052 for (i
= 0; i
< (ARM_PGBYTES
/ sizeof(*pte_p
)); i
++,pte_p
++) {
4053 if (ARM_PTE_IS_COMPRESSED(*pte_p
)) {
4054 panic("pmap_tte_deallocate: tte=0x%llx pmap=%p, pte_p=%p, pte=0x%llx compressed\n",
4055 (uint64_t)tte
, pmap
, pte_p
, (uint64_t)(*pte_p
));
4056 } else if (((*pte_p
) & ARM_PTE_TYPE_MASK
) != ARM_PTE_TYPE_FAULT
) {
4057 panic("pmap_tte_deallocate: tte=0x%llx pmap=%p, pte_p=%p, pte=0x%llx\n",
4058 (uint64_t)tte
, pmap
, pte_p
, (uint64_t)(*pte_p
));
4065 /* Clear any page offset: we mean to free the whole page, but armv7 TTEs may only be
4066 * aligned on 1K boundaries. We clear the surrounding "chunk" of 4 TTEs above. */
4067 pa
= tte_to_pa(tte
) & ~ARM_PGMASK
;
4068 pmap_tt_deallocate(pmap
, (tt_entry_t
*) phystokv(pa
), level
+1);
4074 * Remove a range of hardware page-table entries.
4075 * The entries given are the first (inclusive)
4076 * and last (exclusive) entries for the VM pages.
4077 * The virtual address is the va for the first pte.
4079 * The pmap must be locked.
4080 * If the pmap is not the kernel pmap, the range must lie
4081 * entirely within one pte-page. This is NOT checked.
4082 * Assumes that the pte-page exists.
4084 * Returns the number of PTE changed, and sets *rmv_cnt
4085 * to the number of SPTE changed.
4090 vm_map_address_t va
,
4095 return pmap_remove_range_options(pmap
, va
, bpte
, epte
, rmv_cnt
,
4096 PMAP_OPTIONS_REMOVE
);
4100 int num_reusable_mismatch
= 0;
4101 #endif /* MACH_ASSERT */
4104 pmap_remove_range_options(
4106 vm_map_address_t va
,
4113 int num_removed
, num_unwired
;
4114 int num_pte_changed
;
4117 int num_external
, num_internal
, num_reusable
;
4118 int num_alt_internal
;
4119 uint64_t num_compressed
, num_alt_compressed
;
4121 PMAP_ASSERT_LOCKED(pmap
);
4125 num_pte_changed
= 0;
4130 num_alt_internal
= 0;
4131 num_alt_compressed
= 0;
4133 for (cpte
= bpte
; cpte
< epte
;
4134 cpte
+= PAGE_SIZE
/ARM_PGBYTES
, va
+= PAGE_SIZE
) {
4135 pv_entry_t
**pv_h
, **pve_pp
;
4138 boolean_t managed
=FALSE
;
4143 if (pgtrace_enabled
) {
4144 pmap_pgtrace_remove_clone(pmap
, pte_to_pa(spte
), va
);
4149 if (pmap
!= kernel_pmap
&&
4150 (options
& PMAP_OPTIONS_REMOVE
) &&
4151 (ARM_PTE_IS_COMPRESSED(spte
))) {
4153 * "pmap" must be locked at this point,
4154 * so this should not race with another
4155 * pmap_remove_range() or pmap_enter().
4158 /* one less "compressed"... */
4160 if (spte
& ARM_PTE_COMPRESSED_ALT
) {
4161 /* ... but it used to be "ALTACCT" */
4162 num_alt_compressed
++;
4166 WRITE_PTE_FAST(cpte
, ARM_PTE_TYPE_FAULT
);
4168 * "refcnt" also accounts for
4169 * our "compressed" markers,
4170 * so let's update it here.
4172 if (OSAddAtomic16(-1, (SInt16
*) &(ptep_get_ptd(cpte
)->pt_cnt
[ARM_PT_DESC_INDEX(cpte
)].refcnt
)) <= 0)
4173 panic("pmap_remove_range_options: over-release of ptdp %p for pte %p\n", ptep_get_ptd(cpte
), cpte
);
4177 * It may be possible for the pte to transition from managed
4178 * to unmanaged in this timeframe; for now, elide the assert.
4179 * We should break out as a consequence of checking pa_valid.
4181 //assert(!ARM_PTE_IS_COMPRESSED(spte));
4182 pa
= pte_to_pa(spte
);
4183 if (!pa_valid(pa
)) {
4186 pai
= (int)pa_index(pa
);
4189 pa
= pte_to_pa(spte
);
4190 if (pai
== (int)pa_index(pa
)) {
4192 break; // Leave pai locked as we will unlock it after we free the PV entry
4197 if (ARM_PTE_IS_COMPRESSED(*cpte
)) {
4199 * There used to be a valid mapping here but it
4200 * has already been removed when the page was
4201 * sent to the VM compressor, so nothing left to
4207 /* remove the translation, do not flush the TLB */
4208 if (*cpte
!= ARM_PTE_TYPE_FAULT
) {
4209 assert(!ARM_PTE_IS_COMPRESSED(*cpte
));
4211 if (managed
&& (pmap
!= kernel_pmap
) && (ptep_get_va(cpte
) != va
)) {
4212 panic("pmap_remove_range_options(): cpte=%p ptd=%p pte=0x%llx va=0x%llx\n",
4213 cpte
, ptep_get_ptd(cpte
), (uint64_t)*cpte
, (uint64_t)va
);
4216 WRITE_PTE_FAST(cpte
, ARM_PTE_TYPE_FAULT
);
4220 if ((spte
!= ARM_PTE_TYPE_FAULT
) &&
4221 (pmap
!= kernel_pmap
)) {
4222 assert(!ARM_PTE_IS_COMPRESSED(spte
));
4223 if (OSAddAtomic16(-1, (SInt16
*) &(ptep_get_ptd(cpte
)->pt_cnt
[ARM_PT_DESC_INDEX(cpte
)].refcnt
)) <= 0)
4224 panic("pmap_remove_range_options: over-release of ptdp %p for pte %p\n", ptep_get_ptd(cpte
), cpte
);
4225 if(rmv_cnt
) (*rmv_cnt
)++;
4228 if (pte_is_wired(spte
)) {
4229 pte_set_wired(cpte
, 0);
4233 * if not managed, we're done
4238 * find and remove the mapping from the chain for this
4241 ASSERT_PVH_LOCKED(pai
); // Should have been locked when we found the managed PTE above
4242 pv_h
= pai_to_pvh(pai
);
4244 if (pvh_test_type(pv_h
, PVH_TYPE_PTEP
)) {
4245 if (__builtin_expect((cpte
!= pvh_ptep(pv_h
)), 0))
4246 panic("pmap_remove_range(): cpte=%p (0x%llx) does not match pv_h=%p (%p)\n", cpte
, (uint64_t)spte
, pv_h
, pvh_ptep(pv_h
));
4247 if (IS_ALTACCT_PAGE(pai
, PV_ENTRY_NULL
)) {
4248 assert(IS_INTERNAL_PAGE(pai
));
4251 CLR_ALTACCT_PAGE(pai
, PV_ENTRY_NULL
);
4252 } else if (IS_INTERNAL_PAGE(pai
)) {
4253 if (IS_REUSABLE_PAGE(pai
)) {
4261 pvh_update_head(pv_h
, PV_ENTRY_NULL
, PVH_TYPE_NULL
);
4262 } else if (pvh_test_type(pv_h
, PVH_TYPE_PVEP
)) {
4265 pve_p
= pvh_list(pv_h
);
4267 while (pve_p
!= PV_ENTRY_NULL
&&
4268 (pve_get_ptep(pve_p
) != cpte
)) {
4269 pve_pp
= pve_link_field(pve_p
);
4270 pve_p
= PVE_NEXT_PTR(pve_next(pve_p
));
4273 if (__builtin_expect((pve_p
== PV_ENTRY_NULL
), 0)) {
4275 panic("pmap_remove_range(): cpte=%p (0x%llx) not in pv_h=%p\n", cpte
, (uint64_t)spte
, pv_h
);
4279 if (kern_feature_override(KF_PMAPV_OVRD
) == FALSE
) {
4280 pv_entry_t
*check_pve_p
= PVE_NEXT_PTR(pve_next(pve_p
));
4281 while (check_pve_p
!= PV_ENTRY_NULL
) {
4282 if (pve_get_ptep(check_pve_p
) == cpte
) {
4283 panic("pmap_remove_range(): duplicate pve entry cpte=%p pmap=%p, pv_h=%p, pve_p=%p, pte=0x%llx, va=0x%llx\n",
4284 cpte
, pmap
, pv_h
, pve_p
, (uint64_t)spte
, (uint64_t)va
);
4286 check_pve_p
= PVE_NEXT_PTR(pve_next(check_pve_p
));
4291 if (IS_ALTACCT_PAGE(pai
, pve_p
)) {
4292 assert(IS_INTERNAL_PAGE(pai
));
4295 CLR_ALTACCT_PAGE(pai
, pve_p
);
4296 } else if (IS_INTERNAL_PAGE(pai
)) {
4297 if (IS_REUSABLE_PAGE(pai
)) {
4306 pvh_remove(pv_h
, pve_pp
, pve_p
) ;
4309 panic("pmap_remove_range(): unexpected PV head %p, cpte=%p pmap=%p pv_h=%p pte=0x%llx va=0x%llx\n",
4310 *pv_h
, cpte
, pmap
, pv_h
, (uint64_t)spte
, (uint64_t)va
);
4320 OSAddAtomic(-num_removed
, (SInt32
*) &pmap
->stats
.resident_count
);
4321 pmap_ledger_debit(pmap
, task_ledgers
.phys_mem
, machine_ptob(num_removed
));
4323 if (pmap
!= kernel_pmap
) {
4324 /* sanity checks... */
4326 if (pmap
->stats
.internal
< num_internal
) {
4327 if ((! pmap_stats_assert
||
4328 ! pmap
->pmap_stats_assert
) ||
4329 (pmap
->stats
.internal
+ pmap
->stats
.reusable
) ==
4330 (num_internal
+ num_reusable
)) {
4331 num_reusable_mismatch
++;
4332 printf("pmap_remove_range_options(%p,0x%llx,%p,%p,0x%x): num_internal=%d num_removed=%d num_unwired=%d num_external=%d num_reusable=%d num_compressed=%lld num_alt_internal=%d num_alt_compressed=%lld num_pte_changed=%d stats.internal=%d stats.reusable=%d\n",
4347 pmap
->stats
.internal
,
4348 pmap
->stats
.reusable
);
4349 /* slight mismatch: fix it... */
4350 num_internal
= pmap
->stats
.internal
;
4351 num_reusable
= pmap
->stats
.reusable
;
4353 panic("pmap_remove_range_options(%p,0x%llx,%p,%p,0x%x): num_internal=%d num_removed=%d num_unwired=%d num_external=%d num_reusable=%d num_compressed=%lld num_alt_internal=%d num_alt_compressed=%lld num_pte_changed=%d stats.internal=%d stats.reusable=%d",
4368 pmap
->stats
.internal
,
4369 pmap
->stats
.reusable
);
4372 #endif /* MACH_ASSERT */
4373 PMAP_STATS_ASSERTF(pmap
->stats
.external
>= num_external
,
4375 "pmap=%p num_external=%d stats.external=%d",
4376 pmap
, num_external
, pmap
->stats
.external
);
4377 PMAP_STATS_ASSERTF(pmap
->stats
.internal
>= num_internal
,
4379 "pmap=%p num_internal=%d stats.internal=%d num_reusable=%d stats.reusable=%d",
4381 num_internal
, pmap
->stats
.internal
,
4382 num_reusable
, pmap
->stats
.reusable
);
4383 PMAP_STATS_ASSERTF(pmap
->stats
.reusable
>= num_reusable
,
4385 "pmap=%p num_internal=%d stats.internal=%d num_reusable=%d stats.reusable=%d",
4387 num_internal
, pmap
->stats
.internal
,
4388 num_reusable
, pmap
->stats
.reusable
);
4389 PMAP_STATS_ASSERTF(pmap
->stats
.compressed
>= num_compressed
,
4391 "pmap=%p num_compressed=%lld num_alt_compressed=%lld stats.compressed=%lld",
4392 pmap
, num_compressed
, num_alt_compressed
,
4393 pmap
->stats
.compressed
);
4395 /* update pmap stats... */
4396 OSAddAtomic(-num_unwired
, (SInt32
*) &pmap
->stats
.wired_count
);
4398 OSAddAtomic(-num_external
, &pmap
->stats
.external
);
4400 OSAddAtomic(-num_internal
, &pmap
->stats
.internal
);
4402 OSAddAtomic(-num_reusable
, &pmap
->stats
.reusable
);
4404 OSAddAtomic64(-num_compressed
, &pmap
->stats
.compressed
);
4405 /* ... and ledgers */
4406 pmap_ledger_debit(pmap
, task_ledgers
.wired_mem
, machine_ptob(num_unwired
));
4407 pmap_ledger_debit(pmap
, task_ledgers
.internal
, machine_ptob(num_internal
));
4408 pmap_ledger_debit(pmap
, task_ledgers
.alternate_accounting
, machine_ptob(num_alt_internal
));
4409 pmap_ledger_debit(pmap
, task_ledgers
.alternate_accounting_compressed
, machine_ptob(num_alt_compressed
));
4410 pmap_ledger_debit(pmap
, task_ledgers
.internal_compressed
, machine_ptob(num_compressed
));
4411 /* make needed adjustments to phys_footprint */
4412 pmap_ledger_debit(pmap
, task_ledgers
.phys_footprint
,
4413 machine_ptob((num_internal
-
4416 num_alt_compressed
)));
4419 /* flush the ptable entries we have written */
4420 if (num_pte_changed
> 0)
4421 FLUSH_PTE_RANGE(bpte
, epte
);
4423 return num_pte_changed
;
4428 * Remove the given range of addresses
4429 * from the specified map.
4431 * It is assumed that the start and end are properly
4432 * rounded to the hardware page size.
4437 vm_map_address_t start
,
4438 vm_map_address_t end
)
4440 pmap_remove_options(pmap
, start
, end
, PMAP_OPTIONS_REMOVE
);
4444 pmap_remove_options_internal(pmap_t pmap
,
4445 vm_map_address_t start
,
4446 vm_map_address_t end
,
4449 int remove_count
= 0;
4450 pt_entry_t
*bpte
, *epte
;
4453 uint32_t rmv_spte
=0;
4457 tte_p
= pmap_tte(pmap
, start
);
4459 if (tte_p
== (tt_entry_t
*) NULL
) {
4463 if ((*tte_p
& ARM_TTE_TYPE_MASK
) == ARM_TTE_TYPE_TABLE
) {
4464 pte_p
= (pt_entry_t
*) ttetokv(*tte_p
);
4465 bpte
= &pte_p
[ptenum(start
)];
4466 epte
= bpte
+ ((end
- start
) >> ARM_TT_LEAF_SHIFT
);
4468 remove_count
+= pmap_remove_range_options(pmap
, start
, bpte
, epte
,
4469 &rmv_spte
, options
);
4471 #if (__ARM_VMSA__ == 7)
4472 if (rmv_spte
&& (ptep_get_ptd(pte_p
)->pt_cnt
[ARM_PT_DESC_INDEX(pte_p
)].refcnt
== 0) &&
4473 (pmap
!= kernel_pmap
) && (pmap
->nested
== FALSE
)) {
4474 pmap_tte_deallocate(pmap
, tte_p
, PMAP_TT_L1_LEVEL
);
4475 flush_mmu_tlb_entry((start
& ~ARM_TT_L1_OFFMASK
) | (pmap
->asid
& 0xff));
4478 if (rmv_spte
&& (ptep_get_ptd(pte_p
)->pt_cnt
[ARM_PT_DESC_INDEX(pte_p
)].refcnt
== 0) &&
4479 (pmap
!= kernel_pmap
) && (pmap
->nested
== FALSE
)) {
4480 pmap_tte_deallocate(pmap
, tte_p
, PMAP_TT_L2_LEVEL
);
4481 flush_mmu_tlb_entry(tlbi_addr(start
& ~ARM_TT_L2_OFFMASK
) | tlbi_asid(pmap
->asid
));
4489 return remove_count
;
4493 pmap_remove_options(
4495 vm_map_address_t start
,
4496 vm_map_address_t end
,
4499 int remove_count
= 0;
4500 vm_map_address_t va
;
4502 if (pmap
== PMAP_NULL
)
4505 PMAP_TRACE(PMAP_CODE(PMAP__REMOVE
) | DBG_FUNC_START
,
4506 VM_KERNEL_ADDRHIDE(pmap
), VM_KERNEL_ADDRHIDE(start
),
4507 VM_KERNEL_ADDRHIDE(end
));
4510 if ((start
|end
) & PAGE_MASK
) {
4511 panic("pmap_remove_options() pmap %p start 0x%llx end 0x%llx\n",
4512 pmap
, (uint64_t)start
, (uint64_t)end
);
4514 if ((end
< start
) || (start
< pmap
->min
) || (end
> pmap
->max
)) {
4515 panic("pmap_remove_options(): invalid address range, pmap=%p, start=0x%llx, end=0x%llx\n",
4516 pmap
, (uint64_t)start
, (uint64_t)end
);
4521 * Invalidate the translation buffer first
4527 #if (__ARM_VMSA__ == 7)
4528 l
= ((va
+ ARM_TT_L1_SIZE
) & ~ARM_TT_L1_OFFMASK
);
4530 l
= ((va
+ ARM_TT_L2_SIZE
) & ~ARM_TT_L2_OFFMASK
);
4535 remove_count
+= pmap_remove_options_internal(pmap
, va
, l
, options
);
4541 if (remove_count
> 0)
4542 PMAP_UPDATE_TLBS(pmap
, start
, end
);
4544 PMAP_TRACE(PMAP_CODE(PMAP__REMOVE
) | DBG_FUNC_END
);
4549 * Remove phys addr if mapped in specified map
4552 pmap_remove_some_phys(
4553 __unused pmap_t map
,
4554 __unused ppnum_t pn
)
4556 /* Implement to support working set code */
4563 #if !__ARM_USER_PROTECT__
4569 #if __ARM_USER_PROTECT__
4570 if (pmap
->tte_index_max
== NTTES
) {
4571 thread
->machine
.uptw_ttc
= 2;
4572 thread
->machine
.uptw_ttb
= ((unsigned int) pmap
->ttep
) | TTBR_SETUP
;
4574 thread
->machine
.uptw_ttc
= 1; \
4575 thread
->machine
.uptw_ttb
= ((unsigned int) pmap
->ttep
) | TTBR_SETUP
;
4577 thread
->machine
.asid
= pmap
->asid
;
4582 pmap_flush_core_tlb_asid(pmap_t pmap
)
4584 #if (__ARM_VMSA__ == 7)
4585 flush_core_tlb_asid(pmap
->asid
);
4587 flush_core_tlb_asid(((uint64_t) pmap
->asid
) << TLBI_ASID_SHIFT
);
4588 #if __ARM_KERNEL_PROTECT__
4589 flush_core_tlb_asid(((uint64_t) pmap
->asid
+ 1) << TLBI_ASID_SHIFT
);
4590 #endif /* __ARM_KERNEL_PROTECT__ */
4595 pmap_switch_internal(
4598 pmap_cpu_data_t
*cpu_data_ptr
= pmap_get_cpu_data();
4599 uint32_t last_asid_high_bits
, asid_high_bits
;
4601 pmap_t cur_user_pmap
;
4602 boolean_t do_asid_flush
= FALSE
;
4604 #if (__ARM_VMSA__ == 7)
4606 simple_lock(&pmap
->tt1_lock
);
4609 cur_pmap
= current_pmap();
4610 cur_user_pmap
= cpu_data_ptr
->cpu_user_pmap
;
4613 assert(pmap
->asid
< (sizeof(cpu_data_ptr
->cpu_asid_high_bits
) / sizeof(*cpu_data_ptr
->cpu_asid_high_bits
)));
4615 /* Extract the "virtual" bits of the ASIDs (which could cause us to alias). */
4616 asid_high_bits
= pmap
->vasid
>> ARM_ASID_SHIFT
;
4617 last_asid_high_bits
= (uint32_t) cpu_data_ptr
->cpu_asid_high_bits
[pmap
->asid
];
4619 if (asid_high_bits
!= last_asid_high_bits
) {
4621 * If the virtual ASID of the new pmap does not match the virtual ASID
4622 * last seen on this CPU for the physical ASID (that was a mouthful),
4623 * then this switch runs the risk of aliasing. We need to flush the
4624 * TLB for this phyiscal ASID in this case.
4626 cpu_data_ptr
->cpu_asid_high_bits
[pmap
->asid
] = (uint8_t) asid_high_bits
;
4627 do_asid_flush
= TRUE
;
4630 if ((cur_user_pmap
== cur_pmap
) && (cur_pmap
== pmap
)) {
4631 if (cpu_data_ptr
->cpu_user_pmap_stamp
== pmap
->stamp
) {
4632 pmap_switch_user_ttb_internal(pmap
);
4634 #if (__ARM_VMSA__ == 7)
4636 simple_unlock(&pmap
->tt1_lock
);
4639 if (do_asid_flush
) {
4640 pmap_flush_core_tlb_asid(pmap
);
4645 cur_user_pmap
= NULL
;
4646 } else if ((cur_user_pmap
== pmap
) && (cpu_data_ptr
->cpu_user_pmap_stamp
!= pmap
->stamp
))
4647 cur_user_pmap
= NULL
;
4649 pmap_switch_user_ttb_internal(pmap
);
4651 if (do_asid_flush
) {
4652 pmap_flush_core_tlb_asid(pmap
);
4655 #if (__ARM_VMSA__ == 7)
4657 simple_unlock(&pmap
->tt1_lock
);
4659 if (pmap
!= kernel_pmap
) {
4661 if (cur_user_pmap
!= PMAP_NULL
) {
4663 * We have a low-address global mapping for the commpage
4664 * for 32-bit processes; flush it if we switch to a 64-bot
4667 if (pmap_is_64bit(pmap
) && !pmap_is_64bit(cur_user_pmap
)) {
4668 pmap_sharedpage_flush_32_to_64();
4681 pmap_switch_internal(pmap
);
4689 pmap_page_protect_options(ppnum
, prot
, 0, NULL
);
4693 * Routine: pmap_page_protect_options
4696 * Lower the permission for all mappings to a given
4700 pmap_page_protect_options_internal(
4703 unsigned int options
)
4705 pmap_paddr_t phys
= ptoa(ppnum
);
4714 unsigned int pvh_cnt
= 0;
4716 assert(ppnum
!= vm_page_fictitious_addr
);
4718 /* Only work with managed pages. */
4719 if (!pa_valid(phys
)) {
4724 * Determine the new protection.
4728 return; /* nothing to do */
4730 case VM_PROT_READ
| VM_PROT_EXECUTE
:
4738 pai
= (int)pa_index(phys
);
4740 pv_h
= pai_to_pvh(pai
);
4742 pte_p
= PT_ENTRY_NULL
;
4743 pve_p
= PV_ENTRY_NULL
;
4744 pveh_p
= PV_ENTRY_NULL
;
4745 pvet_p
= PV_ENTRY_NULL
;
4746 if (pvh_test_type(pv_h
, PVH_TYPE_PTEP
)) {
4747 pte_p
= pvh_ptep(pv_h
);
4748 } else if (pvh_test_type(pv_h
, PVH_TYPE_PVEP
)) {
4749 pve_p
= pvh_list(pv_h
);
4753 while ((pve_p
!= PV_ENTRY_NULL
) || (pte_p
!= PT_ENTRY_NULL
)) {
4754 vm_map_address_t va
;
4757 boolean_t update
= FALSE
;
4759 if (pve_p
!= PV_ENTRY_NULL
)
4760 pte_p
= pve_get_ptep(pve_p
);
4762 pmap
= ptep_get_pmap(pte_p
);
4763 va
= ptep_get_va(pte_p
);
4765 if (pte_p
== PT_ENTRY_NULL
) {
4766 panic("pmap_page_protect: pmap=%p prot=%d options=%u, pv_h=%p, pveh_p=%p, pve_p=%p, va=0x%llx ppnum: 0x%x\n",
4767 pmap
, prot
, options
, pv_h
, pveh_p
, pve_p
, (uint64_t)va
, ppnum
);
4768 } else if ((pmap
== NULL
) || (atop(pte_to_pa(*pte_p
)) != ppnum
)) {
4770 if (kern_feature_override(KF_PMAPV_OVRD
) == FALSE
) {
4772 pv_entry_t
*check_pve_p
= pveh_p
;
4773 while (check_pve_p
!= PV_ENTRY_NULL
) {
4774 if ((check_pve_p
!= pve_p
) && (pve_get_ptep(check_pve_p
) == pte_p
)) {
4775 panic("pmap_page_protect: duplicate pve entry pte_p=%p pmap=%p prot=%d options=%u, pv_h=%p, pveh_p=%p, pve_p=%p, pte=0x%llx, va=0x%llx ppnum: 0x%x\n",
4776 pte_p
, pmap
, prot
, options
, pv_h
, pveh_p
, pve_p
, (uint64_t)*pte_p
, (uint64_t)va
, ppnum
);
4778 check_pve_p
= PVE_NEXT_PTR(pve_next(check_pve_p
));
4782 panic("pmap_page_protect: bad pve entry pte_p=%p pmap=%p prot=%d options=%u, pv_h=%p, pveh_p=%p, pve_p=%p, pte=0x%llx, va=0x%llx ppnum: 0x%x\n",
4783 pte_p
, pmap
, prot
, options
, pv_h
, pveh_p
, pve_p
, (uint64_t)*pte_p
, (uint64_t)va
, ppnum
);
4786 #if DEVELOPMENT || DEBUG
4787 if ((prot
& VM_PROT_EXECUTE
) || !nx_enabled
|| !pmap
->nx_enabled
)
4789 if ((prot
& VM_PROT_EXECUTE
))
4795 /* Remove the mapping if new protection is NONE */
4797 boolean_t is_altacct
= FALSE
;
4799 if (IS_ALTACCT_PAGE(pai
, pve_p
)) {
4805 if (pte_is_wired(*pte_p
)) {
4806 pte_set_wired(pte_p
, 0);
4807 if (pmap
!= kernel_pmap
) {
4808 pmap_ledger_debit(pmap
, task_ledgers
.wired_mem
, PAGE_SIZE
);
4809 OSAddAtomic(-1, (SInt32
*) &pmap
->stats
.wired_count
);
4813 if (*pte_p
!= ARM_PTE_TYPE_FAULT
&&
4814 pmap
!= kernel_pmap
&&
4815 (options
& PMAP_OPTIONS_COMPRESSOR
) &&
4816 IS_INTERNAL_PAGE(pai
)) {
4817 assert(!ARM_PTE_IS_COMPRESSED(*pte_p
));
4818 /* mark this PTE as having been "compressed" */
4819 tmplate
= ARM_PTE_COMPRESSED
;
4821 tmplate
|= ARM_PTE_COMPRESSED_ALT
;
4825 tmplate
= ARM_PTE_TYPE_FAULT
;
4828 if ((*pte_p
!= ARM_PTE_TYPE_FAULT
) &&
4829 tmplate
== ARM_PTE_TYPE_FAULT
&&
4830 (pmap
!= kernel_pmap
)) {
4831 if (OSAddAtomic16(-1, (SInt16
*) &(ptep_get_ptd(pte_p
)->pt_cnt
[ARM_PT_DESC_INDEX(pte_p
)].refcnt
)) <= 0)
4832 panic("pmap_page_protect_options(): over-release of ptdp %p for pte %p\n", ptep_get_ptd(pte_p
), pte_p
);
4835 if (*pte_p
!= tmplate
) {
4836 WRITE_PTE(pte_p
, tmplate
);
4840 pmap_ledger_debit(pmap
, task_ledgers
.phys_mem
, PAGE_SIZE
);
4841 OSAddAtomic(-1, (SInt32
*) &pmap
->stats
.resident_count
);
4845 * We only ever compress internal pages.
4847 if (options
& PMAP_OPTIONS_COMPRESSOR
) {
4848 assert(IS_INTERNAL_PAGE(pai
));
4852 if (pmap
!= kernel_pmap
) {
4853 if (IS_REUSABLE_PAGE(pai
) &&
4854 IS_INTERNAL_PAGE(pai
) &&
4856 PMAP_STATS_ASSERTF(pmap
->stats
.reusable
> 0, pmap
, "stats.reusable %d", pmap
->stats
.reusable
);
4857 OSAddAtomic(-1, &pmap
->stats
.reusable
);
4858 } else if (IS_INTERNAL_PAGE(pai
)) {
4859 PMAP_STATS_ASSERTF(pmap
->stats
.internal
> 0, pmap
, "stats.internal %d", pmap
->stats
.internal
);
4860 OSAddAtomic(-1, &pmap
->stats
.internal
);
4862 PMAP_STATS_ASSERTF(pmap
->stats
.external
> 0, pmap
, "stats.external %d", pmap
->stats
.external
);
4863 OSAddAtomic(-1, &pmap
->stats
.external
);
4865 if ((options
& PMAP_OPTIONS_COMPRESSOR
) &&
4866 IS_INTERNAL_PAGE(pai
)) {
4867 /* adjust "compressed" stats */
4868 OSAddAtomic64(+1, &pmap
->stats
.compressed
);
4869 PMAP_STATS_PEAK(pmap
->stats
.compressed
);
4870 pmap
->stats
.compressed_lifetime
++;
4873 if (IS_ALTACCT_PAGE(pai
, pve_p
)) {
4874 assert(IS_INTERNAL_PAGE(pai
));
4875 pmap_ledger_debit(pmap
, task_ledgers
.internal
, PAGE_SIZE
);
4876 pmap_ledger_debit(pmap
, task_ledgers
.alternate_accounting
, PAGE_SIZE
);
4877 if (options
& PMAP_OPTIONS_COMPRESSOR
) {
4878 pmap_ledger_credit(pmap
, task_ledgers
.internal_compressed
, PAGE_SIZE
);
4879 pmap_ledger_credit(pmap
, task_ledgers
.alternate_accounting_compressed
, PAGE_SIZE
);
4883 * Cleanup our marker before
4884 * we free this pv_entry.
4886 CLR_ALTACCT_PAGE(pai
, pve_p
);
4888 } else if (IS_REUSABLE_PAGE(pai
)) {
4889 assert(IS_INTERNAL_PAGE(pai
));
4890 if (options
& PMAP_OPTIONS_COMPRESSOR
) {
4891 pmap_ledger_credit(pmap
, task_ledgers
.internal_compressed
, PAGE_SIZE
);
4892 /* was not in footprint, but is now */
4893 pmap_ledger_credit(pmap
, task_ledgers
.phys_footprint
, PAGE_SIZE
);
4896 } else if (IS_INTERNAL_PAGE(pai
)) {
4897 pmap_ledger_debit(pmap
, task_ledgers
.internal
, PAGE_SIZE
);
4900 * Update all stats related to physical footprint, which only
4901 * deals with internal pages.
4903 if (options
& PMAP_OPTIONS_COMPRESSOR
) {
4905 * This removal is only being done so we can send this page to
4906 * the compressor; therefore it mustn't affect total task footprint.
4908 pmap_ledger_credit(pmap
, task_ledgers
.internal_compressed
, PAGE_SIZE
);
4911 * This internal page isn't going to the compressor, so adjust stats to keep
4912 * phys_footprint up to date.
4914 pmap_ledger_debit(pmap
, task_ledgers
.phys_footprint
, PAGE_SIZE
);
4917 /* external page: no impact on ledgers */
4921 if (pve_p
!= PV_ENTRY_NULL
) {
4922 assert(pve_next(pve_p
) == PVE_NEXT_PTR(pve_next(pve_p
)));
4930 if (pmap
== kernel_pmap
)
4931 tmplate
= ((spte
& ~ARM_PTE_APMASK
) | ARM_PTE_AP(AP_RONA
));
4933 tmplate
= ((spte
& ~ARM_PTE_APMASK
) | ARM_PTE_AP(AP_RORO
));
4935 pte_set_ffr(tmplate
, 0);
4937 #if (__ARM_VMSA__ == 7)
4939 tmplate
|= ARM_PTE_NX
;
4942 * While the naive implementation of this would serve to add execute
4943 * permission, this is not how the VM uses this interface, or how
4944 * x86_64 implements it. So ignore requests to add execute permissions.
4947 tmplate
&= ~ARM_PTE_NX
;
4954 tmplate
|= ARM_PTE_NX
| ARM_PTE_PNX
;
4957 * While the naive implementation of this would serve to add execute
4958 * permission, this is not how the VM uses this interface, or how
4959 * x86_64 implements it. So ignore requests to add execute permissions.
4962 if (pmap
== kernel_pmap
) {
4963 tmplate
&= ~ARM_PTE_PNX
;
4964 tmplate
|= ARM_PTE_NX
;
4966 tmplate
&= ~ARM_PTE_NX
;
4967 tmplate
|= ARM_PTE_PNX
;
4976 if (*pte_p
!= ARM_PTE_TYPE_FAULT
&&
4977 !ARM_PTE_IS_COMPRESSED(*pte_p
) &&
4978 *pte_p
!= tmplate
) {
4979 WRITE_PTE(pte_p
, tmplate
);
4984 /* Invalidate TLBs for all CPUs using it */
4986 PMAP_UPDATE_TLBS(pmap
, va
, va
+ PAGE_SIZE
);
4988 pte_p
= PT_ENTRY_NULL
;
4990 if (pve_p
!= PV_ENTRY_NULL
) {
4993 assert(pve_next(pve_p
) == PVE_NEXT_PTR(pve_next(pve_p
)));
4995 pve_p
= PVE_NEXT_PTR(pve_next(pve_p
));
4999 /* if we removed a bunch of entries, take care of them now */
5001 pvh_update_head(pv_h
, PV_ENTRY_NULL
, PVH_TYPE_NULL
);
5006 if (remove
&& (pveh_p
!= PV_ENTRY_NULL
)) {
5007 pv_list_free(pveh_p
, pvet_p
, pvh_cnt
);
5012 pmap_page_protect_options(
5015 unsigned int options
,
5018 pmap_paddr_t phys
= ptoa(ppnum
);
5020 assert(ppnum
!= vm_page_fictitious_addr
);
5022 /* Only work with managed pages. */
5023 if (!pa_valid(phys
))
5027 * Determine the new protection.
5029 if (prot
== VM_PROT_ALL
) {
5030 return; /* nothing to do */
5033 PMAP_TRACE(PMAP_CODE(PMAP__PAGE_PROTECT
) | DBG_FUNC_START
, ppnum
, prot
);
5035 pmap_page_protect_options_internal(ppnum
, prot
, options
);
5037 PMAP_TRACE(PMAP_CODE(PMAP__PAGE_PROTECT
) | DBG_FUNC_END
);
5041 * Indicates if the pmap layer enforces some additional restrictions on the
5042 * given set of protections.
5044 bool pmap_has_prot_policy(__unused vm_prot_t prot
)
5050 * Set the physical protection on the
5051 * specified range of this map as requested.
5052 * VERY IMPORTANT: Will not increase permissions.
5053 * VERY IMPORTANT: Only pmap_enter() is allowed to grant permissions.
5062 pmap_protect_options(pmap
, b
, e
, prot
, 0, NULL
);
5066 pmap_protect_options_internal(pmap_t pmap
,
5067 vm_map_address_t start
,
5068 vm_map_address_t end
,
5070 unsigned int options
,
5071 __unused
void *args
)
5074 pt_entry_t
*bpte_p
, *epte_p
;
5076 boolean_t set_NX
= TRUE
;
5077 #if (__ARM_VMSA__ > 7)
5078 boolean_t set_XO
= FALSE
;
5080 boolean_t should_have_removed
= FALSE
;
5082 #ifndef __ARM_IC_NOALIAS_ICACHE__
5083 boolean_t InvalidatePoU_Icache_Done
= FALSE
;
5086 #if DEVELOPMENT || DEBUG
5087 if (options
& PMAP_OPTIONS_PROTECT_IMMEDIATE
) {
5088 if ((prot
& VM_PROT_ALL
) == VM_PROT_NONE
) {
5089 should_have_removed
= TRUE
;
5094 /* Determine the new protection. */
5096 #if (__ARM_VMSA__ > 7)
5097 case VM_PROT_EXECUTE
:
5102 case VM_PROT_READ
| VM_PROT_EXECUTE
:
5104 case VM_PROT_READ
| VM_PROT_WRITE
:
5106 return; /* nothing to do */
5108 should_have_removed
= TRUE
;
5112 if (should_have_removed
) {
5113 panic("%s: should have been a remove operation, "
5114 "pmap=%p, start=%p, end=%p, prot=%#x, options=%#x, args=%p",
5116 pmap
, (void *)start
, (void *)end
, prot
, options
, args
);
5119 #if DEVELOPMENT || DEBUG
5120 if ((prot
& VM_PROT_EXECUTE
) || !nx_enabled
|| !pmap
->nx_enabled
)
5122 if ((prot
& VM_PROT_EXECUTE
))
5131 tte_p
= pmap_tte(pmap
, start
);
5133 if ((tte_p
!= (tt_entry_t
*) NULL
) && (*tte_p
& ARM_TTE_TYPE_MASK
) == ARM_TTE_TYPE_TABLE
) {
5134 bpte_p
= (pt_entry_t
*) ttetokv(*tte_p
);
5135 bpte_p
= &bpte_p
[ptenum(start
)];
5136 epte_p
= bpte_p
+ arm_atop(end
- start
);
5139 for (pte_p
= bpte_p
;
5141 pte_p
+= PAGE_SIZE
/ARM_PGBYTES
) {
5143 #if DEVELOPMENT || DEBUG
5144 boolean_t force_write
= FALSE
;
5149 if ((spte
== ARM_PTE_TYPE_FAULT
) ||
5150 ARM_PTE_IS_COMPRESSED(spte
)) {
5156 boolean_t managed
=FALSE
;
5160 * It may be possible for the pte to transition from managed
5161 * to unmanaged in this timeframe; for now, elide the assert.
5162 * We should break out as a consequence of checking pa_valid.
5164 // assert(!ARM_PTE_IS_COMPRESSED(spte));
5165 pa
= pte_to_pa(spte
);
5168 pai
= (int)pa_index(pa
);
5171 pa
= pte_to_pa(spte
);
5172 if (pai
== (int)pa_index(pa
)) {
5174 break; // Leave the PVH locked as we will unlock it after we free the PTE
5179 if ((spte
== ARM_PTE_TYPE_FAULT
) ||
5180 ARM_PTE_IS_COMPRESSED(spte
)) {
5186 if (pmap
== kernel_pmap
) {
5187 #if DEVELOPMENT || DEBUG
5188 if ((options
& PMAP_OPTIONS_PROTECT_IMMEDIATE
) && (prot
& VM_PROT_WRITE
)) {
5190 tmplate
= ((spte
& ~ARM_PTE_APMASK
) | ARM_PTE_AP(AP_RWNA
));
5194 tmplate
= ((spte
& ~ARM_PTE_APMASK
) | ARM_PTE_AP(AP_RONA
));
5197 #if DEVELOPMENT || DEBUG
5198 if ((options
& PMAP_OPTIONS_PROTECT_IMMEDIATE
) && (prot
& VM_PROT_WRITE
)) {
5200 tmplate
= ((spte
& ~ARM_PTE_APMASK
) | ARM_PTE_AP(AP_RWRW
));
5204 tmplate
= ((spte
& ~ARM_PTE_APMASK
) | ARM_PTE_AP(AP_RORO
));
5209 * XXX Removing "NX" would
5210 * grant "execute" access
5211 * immediately, bypassing any
5212 * checks VM might want to do
5213 * in its soft fault path.
5214 * pmap_protect() and co. are
5215 * not allowed to increase
5216 * access permissions.
5218 #if (__ARM_VMSA__ == 7)
5220 tmplate
|= ARM_PTE_NX
;
5222 /* do NOT clear "NX"! */
5226 tmplate
|= ARM_PTE_NX
| ARM_PTE_PNX
;
5228 if (pmap
== kernel_pmap
) {
5230 * TODO: Run CS/Monitor checks here;
5231 * should we be clearing PNX here? Is
5232 * this just for dtrace?
5234 tmplate
&= ~ARM_PTE_PNX
;
5235 tmplate
|= ARM_PTE_NX
;
5237 /* do NOT clear "NX"! */
5238 tmplate
|= ARM_PTE_PNX
;
5240 tmplate
&= ~ARM_PTE_APMASK
;
5241 tmplate
|= ARM_PTE_AP(AP_RONA
);
5247 #if DEVELOPMENT || DEBUG
5250 * TODO: Run CS/Monitor checks here.
5254 * We are marking the page as writable,
5255 * so we consider it to be modified and
5258 pa_set_bits(pa
, PP_ATTR_REFERENCED
| PP_ATTR_MODIFIED
);
5259 tmplate
|= ARM_PTE_AF
;
5261 if (IS_REFFAULT_PAGE(pai
)) {
5262 CLR_REFFAULT_PAGE(pai
);
5265 if (IS_MODFAULT_PAGE(pai
)) {
5266 CLR_MODFAULT_PAGE(pai
);
5269 } else if (options
& PMAP_OPTIONS_PROTECT_IMMEDIATE
) {
5271 * An immediate request for anything other than
5272 * write should still mark the page as
5273 * referenced if managed.
5276 pa_set_bits(pa
, PP_ATTR_REFERENCED
);
5277 tmplate
|= ARM_PTE_AF
;
5279 if (IS_REFFAULT_PAGE(pai
)) {
5280 CLR_REFFAULT_PAGE(pai
);
5286 /* We do not expect to write fast fault the entry. */
5287 pte_set_ffr(tmplate
, 0);
5289 /* TODO: Doesn't this need to worry about PNX? */
5290 if (((spte
& ARM_PTE_NX
) == ARM_PTE_NX
) && (prot
& VM_PROT_EXECUTE
)) {
5291 CleanPoU_DcacheRegion((vm_offset_t
) phystokv(pa
), PAGE_SIZE
);
5292 #ifdef __ARM_IC_NOALIAS_ICACHE__
5293 InvalidatePoU_IcacheRegion((vm_offset_t
) phystokv(pa
), PAGE_SIZE
);
5295 if (!InvalidatePoU_Icache_Done
) {
5296 InvalidatePoU_Icache();
5297 InvalidatePoU_Icache_Done
= TRUE
;
5302 WRITE_PTE_FAST(pte_p
, tmplate
);
5305 ASSERT_PVH_LOCKED(pai
);
5310 FLUSH_PTE_RANGE(bpte_p
, epte_p
);
5311 PMAP_UPDATE_TLBS(pmap
, start
, end
);
5318 pmap_protect_options(
5323 unsigned int options
,
5324 __unused
void *args
)
5326 vm_map_address_t l
, beg
;
5328 if ((b
|e
) & PAGE_MASK
) {
5329 panic("pmap_protect_options() pmap %p start 0x%llx end 0x%llx\n",
5330 pmap
, (uint64_t)b
, (uint64_t)e
);
5333 #if DEVELOPMENT || DEBUG
5334 if (options
& PMAP_OPTIONS_PROTECT_IMMEDIATE
) {
5335 if ((prot
& VM_PROT_ALL
) == VM_PROT_NONE
) {
5336 pmap_remove_options(pmap
, b
, e
, options
);
5342 /* Determine the new protection. */
5344 case VM_PROT_EXECUTE
:
5346 case VM_PROT_READ
| VM_PROT_EXECUTE
:
5348 case VM_PROT_READ
| VM_PROT_WRITE
:
5350 return; /* nothing to do */
5352 pmap_remove_options(pmap
, b
, e
, options
);
5357 PMAP_TRACE(PMAP_CODE(PMAP__PROTECT
) | DBG_FUNC_START
,
5358 VM_KERNEL_ADDRHIDE(pmap
), VM_KERNEL_ADDRHIDE(b
),
5359 VM_KERNEL_ADDRHIDE(e
));
5364 l
= ((beg
+ ARM_TT_TWIG_SIZE
) & ~ARM_TT_TWIG_OFFMASK
);
5369 pmap_protect_options_internal(pmap
, beg
, l
, prot
, options
, args
);
5374 PMAP_TRACE(PMAP_CODE(PMAP__PROTECT
) | DBG_FUNC_END
);
5377 /* Map a (possibly) autogenned block */
5386 __unused
unsigned int flags
)
5389 addr64_t original_va
= va
;
5392 for (page
= 0; page
< size
; page
++) {
5393 kr
= pmap_enter(pmap
, va
, pa
, prot
, VM_PROT_NONE
, attr
, TRUE
);
5395 if (kr
!= KERN_SUCCESS
) {
5397 * This will panic for now, as it is unclear that
5398 * removing the mappings is correct.
5400 panic("%s: failed pmap_enter, "
5401 "pmap=%p, va=%#llx, pa=%u, size=%u, prot=%#x, flags=%#x",
5403 pmap
, va
, pa
, size
, prot
, flags
);
5405 pmap_remove(pmap
, original_va
, va
- original_va
);
5413 return KERN_SUCCESS
;
5417 * Insert the given physical page (p) at
5418 * the specified virtual address (v) in the
5419 * target physical map with the protection requested.
5421 * If specified, the page will be wired down, meaning
5422 * that the related pte can not be reclaimed.
5424 * NB: This is the only routine which MAY NOT lazy-evaluate
5425 * or lose information. That is, this routine must actually
5426 * insert this page into the given map eventually (must make
5427 * forward progress eventually.
5435 vm_prot_t fault_type
,
5439 return pmap_enter_options(pmap
, v
, pn
, prot
, fault_type
, flags
, wired
, 0, NULL
);
5443 static inline void pmap_enter_pte(pmap_t pmap
, pt_entry_t
*pte_p
, pt_entry_t pte
, vm_map_address_t v
)
5445 if (pmap
!= kernel_pmap
&& ((pte
& ARM_PTE_WIRED
) != (*pte_p
& ARM_PTE_WIRED
)))
5447 SInt16
*ptd_wiredcnt_ptr
= (SInt16
*)&(ptep_get_ptd(pte_p
)->pt_cnt
[ARM_PT_DESC_INDEX(pte_p
)].wiredcnt
);
5448 if (pte
& ARM_PTE_WIRED
) {
5449 OSAddAtomic16(1, ptd_wiredcnt_ptr
);
5450 pmap_ledger_credit(pmap
, task_ledgers
.wired_mem
, PAGE_SIZE
);
5451 OSAddAtomic(1, (SInt32
*) &pmap
->stats
.wired_count
);
5453 OSAddAtomic16(-1, ptd_wiredcnt_ptr
);
5454 pmap_ledger_debit(pmap
, task_ledgers
.wired_mem
, PAGE_SIZE
);
5455 OSAddAtomic(-1, (SInt32
*) &pmap
->stats
.wired_count
);
5458 if (*pte_p
!= ARM_PTE_TYPE_FAULT
&&
5459 !ARM_PTE_IS_COMPRESSED(*pte_p
)) {
5460 WRITE_PTE(pte_p
, pte
);
5461 PMAP_UPDATE_TLBS(pmap
, v
, v
+ PAGE_SIZE
);
5463 WRITE_PTE(pte_p
, pte
);
5464 __asm__
volatile("isb");
5469 wimg_to_pte(unsigned int wimg
)
5473 switch (wimg
& (VM_WIMG_MASK
)) {
5475 pte
= ARM_PTE_ATTRINDX(CACHE_ATTRINDX_DISABLE
);
5476 pte
|= ARM_PTE_NX
| ARM_PTE_PNX
;
5478 case VM_WIMG_POSTED
:
5479 pte
= ARM_PTE_ATTRINDX(CACHE_ATTRINDX_POSTED
);
5480 pte
|= ARM_PTE_NX
| ARM_PTE_PNX
;
5483 pte
= ARM_PTE_ATTRINDX(CACHE_ATTRINDX_WRITECOMB
);
5484 pte
|= ARM_PTE_NX
| ARM_PTE_PNX
;
5487 pte
= ARM_PTE_ATTRINDX(CACHE_ATTRINDX_WRITETHRU
);
5488 #if (__ARM_VMSA__ > 7)
5489 pte
|= ARM_PTE_SH(SH_OUTER_MEMORY
);
5494 case VM_WIMG_COPYBACK
:
5495 pte
= ARM_PTE_ATTRINDX(CACHE_ATTRINDX_WRITEBACK
);
5496 #if (__ARM_VMSA__ > 7)
5497 pte
|= ARM_PTE_SH(SH_OUTER_MEMORY
);
5502 case VM_WIMG_INNERWBACK
:
5503 pte
= ARM_PTE_ATTRINDX(CACHE_ATTRINDX_INNERWRITEBACK
);
5504 #if (__ARM_VMSA__ > 7)
5505 pte
|= ARM_PTE_SH(SH_INNER_MEMORY
);
5511 pte
= ARM_PTE_ATTRINDX(CACHE_ATTRINDX_DEFAULT
);
5512 #if (__ARM_VMSA__ > 7)
5513 pte
|= ARM_PTE_SH(SH_OUTER_MEMORY
);
5522 static kern_return_t
5523 pmap_enter_options_internal(
5528 vm_prot_t fault_type
,
5531 unsigned int options
)
5533 pmap_paddr_t pa
= ptoa(pn
);
5539 boolean_t set_XO
= FALSE
;
5540 boolean_t refcnt_updated
;
5541 boolean_t wiredcnt_updated
;
5542 unsigned int wimg_bits
;
5543 boolean_t was_compressed
, was_alt_compressed
;
5545 if ((v
) & PAGE_MASK
) {
5546 panic("pmap_enter_options() pmap %p v 0x%llx\n",
5550 if ((prot
& VM_PROT_EXECUTE
) && (prot
& VM_PROT_WRITE
) && (pmap
== kernel_pmap
)) {
5551 panic("pmap_enter_options(): WX request on kernel_pmap");
5554 #if DEVELOPMENT || DEBUG
5555 if ((prot
& VM_PROT_EXECUTE
) || !nx_enabled
|| !pmap
->nx_enabled
)
5557 if ((prot
& VM_PROT_EXECUTE
))
5563 #if (__ARM_VMSA__ > 7)
5564 if (prot
== VM_PROT_EXECUTE
) {
5569 assert(pn
!= vm_page_fictitious_addr
);
5571 refcnt_updated
= FALSE
;
5572 wiredcnt_updated
= FALSE
;
5573 pve_p
= PV_ENTRY_NULL
;
5574 was_compressed
= FALSE
;
5575 was_alt_compressed
= FALSE
;
5580 * Expand pmap to include this pte. Assume that
5581 * pmap is always expanded to include enough hardware
5582 * pages to map one VM page.
5584 while ((pte_p
= pmap_pte(pmap
, v
)) == PT_ENTRY_NULL
) {
5585 /* Must unlock to expand the pmap. */
5588 kern_return_t kr
=pmap_expand(pmap
, v
, options
, PMAP_TT_MAX_LEVEL
);
5597 if (options
& PMAP_OPTIONS_NOENTER
) {
5599 return KERN_SUCCESS
;
5606 if (ARM_PTE_IS_COMPRESSED(spte
)) {
5608 * "pmap" should be locked at this point, so this should
5609 * not race with another pmap_enter() or pmap_remove_range().
5611 assert(pmap
!= kernel_pmap
);
5613 /* one less "compressed" */
5614 OSAddAtomic64(-1, &pmap
->stats
.compressed
);
5615 pmap_ledger_debit(pmap
, task_ledgers
.internal_compressed
,
5618 was_compressed
= TRUE
;
5619 if (spte
& ARM_PTE_COMPRESSED_ALT
) {
5620 was_alt_compressed
= TRUE
;
5623 task_ledgers
.alternate_accounting_compressed
,
5626 /* was part of the footprint */
5627 pmap_ledger_debit(pmap
, task_ledgers
.phys_footprint
, PAGE_SIZE
);
5630 /* clear "compressed" marker */
5631 /* XXX is it necessary since we're about to overwrite it ? */
5632 WRITE_PTE_FAST(pte_p
, ARM_PTE_TYPE_FAULT
);
5633 spte
= ARM_PTE_TYPE_FAULT
;
5636 * We're replacing a "compressed" marker with a valid PTE,
5637 * so no change for "refcnt".
5639 refcnt_updated
= TRUE
;
5642 if ((spte
!= ARM_PTE_TYPE_FAULT
) && (pte_to_pa(spte
) != pa
)) {
5643 pmap_remove_range(pmap
, v
, pte_p
, pte_p
+ 1, 0);
5644 PMAP_UPDATE_TLBS(pmap
, v
, v
+ PAGE_SIZE
);
5647 pte
= pa_to_pte(pa
) | ARM_PTE_TYPE
;
5649 /* Don't bother tracking wiring for kernel PTEs. We use ARM_PTE_WIRED to track
5650 * wired memory statistics for user pmaps, but kernel PTEs are assumed
5651 * to be wired in nearly all cases. For VM layer functionality, the wired
5652 * count in vm_page_t is sufficient. */
5653 if (wired
&& pmap
!= kernel_pmap
)
5654 pte
|= ARM_PTE_WIRED
;
5656 #if (__ARM_VMSA__ == 7)
5661 pte
|= ARM_PTE_NX
| ARM_PTE_PNX
;
5663 if (pmap
== kernel_pmap
) {
5671 if ((flags
& (VM_WIMG_MASK
| VM_WIMG_USE_DEFAULT
)))
5672 wimg_bits
= (flags
& (VM_WIMG_MASK
| VM_WIMG_USE_DEFAULT
));
5674 wimg_bits
= pmap_cache_attributes(pn
);
5676 pte
|= wimg_to_pte(wimg_bits
);
5678 if (pmap
== kernel_pmap
) {
5679 #if __ARM_KERNEL_PROTECT__
5681 #endif /* __ARM_KERNEL_PROTECT__ */
5682 if (prot
& VM_PROT_WRITE
) {
5683 pte
|= ARM_PTE_AP(AP_RWNA
);
5684 pa_set_bits(pa
, PP_ATTR_MODIFIED
| PP_ATTR_REFERENCED
);
5686 pte
|= ARM_PTE_AP(AP_RONA
);
5687 pa_set_bits(pa
, PP_ATTR_REFERENCED
);
5689 #if (__ARM_VMSA__ == 7)
5690 if ((_COMM_PAGE_BASE_ADDRESS
<= v
) && (v
< _COMM_PAGE_BASE_ADDRESS
+ _COMM_PAGE_AREA_LENGTH
))
5691 pte
= (pte
& ~(ARM_PTE_APMASK
)) | ARM_PTE_AP(AP_RORO
);
5694 if (!(pmap
->nested
)) {
5696 } else if ((pmap
->nested_region_asid_bitmap
)
5697 && (v
>= pmap
->nested_region_subord_addr
)
5698 && (v
< (pmap
->nested_region_subord_addr
+pmap
->nested_region_size
))) {
5700 unsigned int index
= (unsigned int)((v
- pmap
->nested_region_subord_addr
) >> ARM_TT_TWIG_SHIFT
);
5702 if ((pmap
->nested_region_asid_bitmap
)
5703 && testbit(index
, (int *)pmap
->nested_region_asid_bitmap
))
5707 if (pmap
->nested_pmap
!= NULL
) {
5708 vm_map_address_t nest_vaddr
;
5709 pt_entry_t
*nest_pte_p
;
5711 nest_vaddr
= v
- pmap
->nested_region_grand_addr
+ pmap
->nested_region_subord_addr
;
5713 if ((nest_vaddr
>= pmap
->nested_region_subord_addr
)
5714 && (nest_vaddr
< (pmap
->nested_region_subord_addr
+pmap
->nested_region_size
))
5715 && ((nest_pte_p
= pmap_pte(pmap
->nested_pmap
, nest_vaddr
)) != PT_ENTRY_NULL
)
5716 && (*nest_pte_p
!= ARM_PTE_TYPE_FAULT
)
5717 && (!ARM_PTE_IS_COMPRESSED(*nest_pte_p
))
5718 && (((*nest_pte_p
) & ARM_PTE_NG
) != ARM_PTE_NG
)) {
5719 unsigned int index
= (unsigned int)((v
- pmap
->nested_region_subord_addr
) >> ARM_TT_TWIG_SHIFT
);
5721 if ((pmap
->nested_pmap
->nested_region_asid_bitmap
)
5722 && !testbit(index
, (int *)pmap
->nested_pmap
->nested_region_asid_bitmap
)) {
5724 panic("pmap_enter(): Global attribute conflict nest_pte_p=%p pmap=%p v=0x%llx spte=0x%llx \n",
5725 nest_pte_p
, pmap
, (uint64_t)v
, (uint64_t)*nest_pte_p
);
5731 if (prot
& VM_PROT_WRITE
) {
5733 if (pa_valid(pa
) && (!pa_test_bits(pa
, PP_ATTR_MODIFIED
))) {
5734 if (fault_type
& VM_PROT_WRITE
) {
5736 pte
|= ARM_PTE_AP(AP_RWNA
);
5738 pte
|= ARM_PTE_AP(AP_RWRW
);
5739 pa_set_bits(pa
, PP_ATTR_REFERENCED
| PP_ATTR_MODIFIED
);
5742 pte
|= ARM_PTE_AP(AP_RONA
);
5744 pte
|= ARM_PTE_AP(AP_RORO
);
5745 pa_set_bits(pa
, PP_ATTR_REFERENCED
);
5746 pte_set_ffr(pte
, 1);
5750 pte
|= ARM_PTE_AP(AP_RWNA
);
5752 pte
|= ARM_PTE_AP(AP_RWRW
);
5753 pa_set_bits(pa
, PP_ATTR_REFERENCED
);
5758 pte
|= ARM_PTE_AP(AP_RONA
);
5760 pte
|= ARM_PTE_AP(AP_RORO
);
5761 pa_set_bits(pa
, PP_ATTR_REFERENCED
);
5767 volatile uint16_t *refcnt
= NULL
;
5768 volatile uint16_t *wiredcnt
= NULL
;
5769 if (pmap
!= kernel_pmap
) {
5770 refcnt
= &(ptep_get_ptd(pte_p
)->pt_cnt
[ARM_PT_DESC_INDEX(pte_p
)].refcnt
);
5771 wiredcnt
= &(ptep_get_ptd(pte_p
)->pt_cnt
[ARM_PT_DESC_INDEX(pte_p
)].wiredcnt
);
5772 /* Bump the wired count to keep the PTE page from being reclaimed. We need this because
5773 * we may drop the PVH and pmap locks later in pmap_enter() if we need to allocate
5774 * a new PV entry. */
5775 if (!wiredcnt_updated
) {
5776 OSAddAtomic16(1, (volatile int16_t*)wiredcnt
);
5777 wiredcnt_updated
= TRUE
;
5779 if (!refcnt_updated
) {
5780 OSAddAtomic16(1, (volatile int16_t*)refcnt
);
5781 refcnt_updated
= TRUE
;
5788 boolean_t is_altacct
, is_internal
;
5790 is_internal
= FALSE
;
5793 pai
= (int)pa_index(pa
);
5794 pv_h
= pai_to_pvh(pai
);
5799 if (pte
== *pte_p
) {
5801 * This pmap_enter operation has been completed by another thread
5802 * undo refcnt on pt and return
5804 if (refcnt
!= NULL
) {
5805 assert(refcnt_updated
);
5806 if (OSAddAtomic16(-1, (volatile int16_t*)refcnt
) <= 0)
5807 panic("pmap_enter(): over-release of ptdp %p for pte %p\n", ptep_get_ptd(pte_p
), pte_p
);
5810 goto Pmap_enter_return
;
5811 } else if (pte_to_pa(*pte_p
) == pa
) {
5812 if (refcnt
!= NULL
) {
5813 assert(refcnt_updated
);
5814 if (OSAddAtomic16(-1, (volatile int16_t*)refcnt
) <= 0)
5815 panic("pmap_enter(): over-release of ptdp %p for pte %p\n", ptep_get_ptd(pte_p
), pte_p
);
5817 pmap_enter_pte(pmap
, pte_p
, pte
, v
);
5819 goto Pmap_enter_return
;
5820 } else if (*pte_p
!= ARM_PTE_TYPE_FAULT
) {
5822 * pte has been modified by another thread
5823 * hold refcnt on pt and retry pmap_enter operation
5826 goto Pmap_enter_retry
;
5828 if (pvh_test_type(pv_h
, PVH_TYPE_NULL
)) {
5829 pvh_update_head(pv_h
, pte_p
, PVH_TYPE_PTEP
);
5830 /* 1st mapping: see what kind of page it is */
5831 if (options
& PMAP_OPTIONS_INTERNAL
) {
5832 SET_INTERNAL_PAGE(pai
);
5834 CLR_INTERNAL_PAGE(pai
);
5836 if ((options
& PMAP_OPTIONS_INTERNAL
) &&
5837 (options
& PMAP_OPTIONS_REUSABLE
)) {
5838 SET_REUSABLE_PAGE(pai
);
5840 CLR_REUSABLE_PAGE(pai
);
5842 if (pmap
!= kernel_pmap
&&
5843 ((options
& PMAP_OPTIONS_ALT_ACCT
) ||
5844 PMAP_FOOTPRINT_SUSPENDED(pmap
)) &&
5845 IS_INTERNAL_PAGE(pai
)) {
5847 * Make a note to ourselves that this mapping is using alternative
5848 * accounting. We'll need this in order to know which ledger to
5849 * debit when the mapping is removed.
5851 * The altacct bit must be set while the pv head is locked. Defer
5852 * the ledger accounting until after we've dropped the lock.
5854 SET_ALTACCT_PAGE(pai
, PV_ENTRY_NULL
);
5857 CLR_ALTACCT_PAGE(pai
, PV_ENTRY_NULL
);
5860 if (pvh_test_type(pv_h
, PVH_TYPE_PTEP
)) {
5864 * convert pvh list from PVH_TYPE_PTEP to PVH_TYPE_PVEP
5866 pte1_p
= pvh_ptep(pv_h
);
5867 if((pve_p
== PV_ENTRY_NULL
) && (!pv_alloc(pmap
, pai
, &pve_p
))) {
5868 goto Pmap_enter_loop
;
5870 pve_set_ptep(pve_p
, pte1_p
);
5871 pve_p
->pve_next
= PV_ENTRY_NULL
;
5873 if (IS_ALTACCT_PAGE(pai
, PV_ENTRY_NULL
)) {
5875 * transfer "altacct" from
5876 * pp_attr to this pve
5878 CLR_ALTACCT_PAGE(pai
, PV_ENTRY_NULL
);
5879 SET_ALTACCT_PAGE(pai
, pve_p
);
5881 pvh_update_head(pv_h
, pve_p
, PVH_TYPE_PVEP
);
5882 pve_p
= PV_ENTRY_NULL
;
5885 * Set up pv_entry for this new mapping and then
5886 * add it to the list for this physical page.
5888 if((pve_p
== PV_ENTRY_NULL
) && (!pv_alloc(pmap
, pai
, &pve_p
))) {
5889 goto Pmap_enter_loop
;
5891 pve_set_ptep(pve_p
, pte_p
);
5892 pve_p
->pve_next
= PV_ENTRY_NULL
;
5894 pvh_add(pv_h
, pve_p
);
5896 if (pmap
!= kernel_pmap
&&
5897 ((options
& PMAP_OPTIONS_ALT_ACCT
) ||
5898 PMAP_FOOTPRINT_SUSPENDED(pmap
)) &&
5899 IS_INTERNAL_PAGE(pai
)) {
5901 * Make a note to ourselves that this
5902 * mapping is using alternative
5903 * accounting. We'll need this in order
5904 * to know which ledger to debit when
5905 * the mapping is removed.
5907 * The altacct bit must be set while
5908 * the pv head is locked. Defer the
5909 * ledger accounting until after we've
5912 SET_ALTACCT_PAGE(pai
, pve_p
);
5916 pve_p
= PV_ENTRY_NULL
;
5919 pmap_enter_pte(pmap
, pte_p
, pte
, v
);
5921 if (pmap
!= kernel_pmap
) {
5922 if (IS_REUSABLE_PAGE(pai
) &&
5924 assert(IS_INTERNAL_PAGE(pai
));
5925 OSAddAtomic(+1, &pmap
->stats
.reusable
);
5926 PMAP_STATS_PEAK(pmap
->stats
.reusable
);
5927 } else if (IS_INTERNAL_PAGE(pai
)) {
5928 OSAddAtomic(+1, &pmap
->stats
.internal
);
5929 PMAP_STATS_PEAK(pmap
->stats
.internal
);
5932 OSAddAtomic(+1, &pmap
->stats
.external
);
5933 PMAP_STATS_PEAK(pmap
->stats
.external
);
5939 if (pmap
!= kernel_pmap
) {
5940 pmap_ledger_credit(pmap
, task_ledgers
.phys_mem
, PAGE_SIZE
);
5944 * Make corresponding adjustments to
5945 * phys_footprint statistics.
5947 pmap_ledger_credit(pmap
, task_ledgers
.internal
, PAGE_SIZE
);
5950 * If this page is internal and
5951 * in an IOKit region, credit
5952 * the task's total count of
5953 * dirty, internal IOKit pages.
5954 * It should *not* count towards
5955 * the task's total physical
5956 * memory footprint, because
5957 * this entire region was
5958 * already billed to the task
5959 * at the time the mapping was
5962 * Put another way, this is
5964 * alternate_accounting++, so
5965 * net effect on phys_footprint
5966 * is 0. That means: don't
5967 * touch phys_footprint here.
5969 pmap_ledger_credit(pmap
, task_ledgers
.alternate_accounting
, PAGE_SIZE
);
5971 pmap_ledger_credit(pmap
, task_ledgers
.phys_footprint
, PAGE_SIZE
);
5976 OSAddAtomic(1, (SInt32
*) &pmap
->stats
.resident_count
);
5977 if (pmap
->stats
.resident_count
> pmap
->stats
.resident_max
)
5978 pmap
->stats
.resident_max
= pmap
->stats
.resident_count
;
5980 pmap_enter_pte(pmap
, pte_p
, pte
, v
);
5986 if (pgtrace_enabled
) {
5987 // Clone and invalidate original mapping if eligible
5988 for (int i
= 0; i
< PAGE_RATIO
; i
++) {
5989 pmap_pgtrace_enter_clone(pmap
, v
+ ARM_PGBYTES
*i
, 0, 0);
5994 if (pve_p
!= PV_ENTRY_NULL
)
5997 if (wiredcnt_updated
&& (OSAddAtomic16(-1, (volatile int16_t*)wiredcnt
) <= 0))
5998 panic("pmap_enter(): over-unwire of ptdp %p for pte %p\n", ptep_get_ptd(pte_p
), pte_p
);
6002 return KERN_SUCCESS
;
6011 vm_prot_t fault_type
,
6014 unsigned int options
,
6017 kern_return_t kr
= KERN_FAILURE
;
6019 PMAP_TRACE(PMAP_CODE(PMAP__ENTER
) | DBG_FUNC_START
,
6020 VM_KERNEL_ADDRHIDE(pmap
), VM_KERNEL_ADDRHIDE(v
), pn
, prot
);
6022 kr
= pmap_enter_options_internal(pmap
, v
, pn
, prot
, fault_type
, flags
, wired
, options
);
6024 PMAP_TRACE(PMAP_CODE(PMAP__ENTER
) | DBG_FUNC_END
, kr
);
6030 * Routine: pmap_change_wiring
6031 * Function: Change the wiring attribute for a map/virtual-address
6033 * In/out conditions:
6034 * The mapping must already exist in the pmap.
6037 pmap_change_wiring_internal(
6045 /* Don't bother tracking wiring for kernel PTEs. We use ARM_PTE_WIRED to track
6046 * wired memory statistics for user pmaps, but kernel PTEs are assumed
6047 * to be wired in nearly all cases. For VM layer functionality, the wired
6048 * count in vm_page_t is sufficient. */
6049 if (pmap
== kernel_pmap
) {
6054 pte_p
= pmap_pte(pmap
, v
);
6055 assert(pte_p
!= PT_ENTRY_NULL
);
6056 pa
= pte_to_pa(*pte_p
);
6058 LOCK_PVH((int)pa_index(pa
));
6060 if (wired
&& !pte_is_wired(*pte_p
)) {
6061 pte_set_wired(pte_p
, wired
);
6062 OSAddAtomic(+1, (SInt32
*) &pmap
->stats
.wired_count
);
6063 pmap_ledger_credit(pmap
, task_ledgers
.wired_mem
, PAGE_SIZE
);
6064 } else if (!wired
&& pte_is_wired(*pte_p
)) {
6065 PMAP_STATS_ASSERTF(pmap
->stats
.wired_count
>= 1, pmap
, "stats.wired_count %d", pmap
->stats
.wired_count
);
6066 pte_set_wired(pte_p
, wired
);
6067 OSAddAtomic(-1, (SInt32
*) &pmap
->stats
.wired_count
);
6068 pmap_ledger_debit(pmap
, task_ledgers
.wired_mem
, PAGE_SIZE
);
6072 UNLOCK_PVH((int)pa_index(pa
));
6083 pmap_change_wiring_internal(pmap
, v
, wired
);
6087 pmap_find_phys_internal(
6093 if (pmap
!= kernel_pmap
) {
6097 ppn
= pmap_vtophys(pmap
, va
);
6099 if (pmap
!= kernel_pmap
) {
6113 if (pmap
== kernel_pmap
)
6115 else if ((current_thread()->map
) && (pmap
== vm_map_pmap(current_thread()->map
)))
6118 if (pa
) return (ppnum_t
)(pa
>> PAGE_SHIFT
);
6121 return pmap_find_phys_internal(pmap
, va
);
6123 return pmap_vtophys(pmap
, va
);
6135 pa
= ((pmap_paddr_t
)pmap_vtophys(kernel_pmap
, va
)) << PAGE_SHIFT
;
6137 pa
|= (va
& PAGE_MASK
);
6139 return ((pmap_paddr_t
)pa
);
6147 if ((va
< pmap
->min
) || (va
>= pmap
->max
)) {
6151 #if (__ARM_VMSA__ == 7)
6152 tt_entry_t
*tte_p
, tte
;
6156 tte_p
= pmap_tte(pmap
, va
);
6157 if (tte_p
== (tt_entry_t
*) NULL
)
6161 if ((tte
& ARM_TTE_TYPE_MASK
) == ARM_TTE_TYPE_TABLE
) {
6162 pte_p
= (pt_entry_t
*) ttetokv(tte
) + ptenum(va
);
6163 ppn
= (ppnum_t
) atop(pte_to_pa(*pte_p
) | (va
& ARM_PGMASK
));
6164 #if DEVELOPMENT || DEBUG
6166 ARM_PTE_IS_COMPRESSED(*pte_p
)) {
6167 panic("pmap_vtophys(%p,0x%llx): compressed pte_p=%p 0x%llx with ppn=0x%x\n",
6168 pmap
, va
, pte_p
, (uint64_t) (*pte_p
), ppn
);
6170 #endif /* DEVELOPMENT || DEBUG */
6171 } else if ((tte
& ARM_TTE_TYPE_MASK
) == ARM_TTE_TYPE_BLOCK
)
6172 if ((tte
& ARM_TTE_BLOCK_SUPER
) == ARM_TTE_BLOCK_SUPER
)
6173 ppn
= (ppnum_t
) atop(suptte_to_pa(tte
) | (va
& ARM_TT_L1_SUPER_OFFMASK
));
6175 ppn
= (ppnum_t
) atop(sectte_to_pa(tte
) | (va
& ARM_TT_L1_BLOCK_OFFMASK
));
6183 /* Level 0 currently unused */
6185 #if __ARM64_TWO_LEVEL_PMAP__
6186 /* We have no L1 entry; go straight to the L2 entry */
6187 ttp
= pmap_tt2e(pmap
, va
);
6190 /* Get first-level (1GB) entry */
6191 ttp
= pmap_tt1e(pmap
, va
);
6193 if ((tte
& (ARM_TTE_TYPE_MASK
| ARM_TTE_VALID
)) != (ARM_TTE_TYPE_TABLE
| ARM_TTE_VALID
))
6196 tte
= ((tt_entry_t
*) phystokv(tte
& ARM_TTE_TABLE_MASK
))[tt2_index(pmap
, va
)];
6198 if ((tte
& ARM_TTE_VALID
) != (ARM_TTE_VALID
))
6201 if ((tte
& ARM_TTE_TYPE_MASK
) == ARM_TTE_TYPE_BLOCK
) {
6202 ppn
= (ppnum_t
) atop((tte
& ARM_TTE_BLOCK_L2_MASK
)| (va
& ARM_TT_L2_OFFMASK
));
6205 tte
= ((tt_entry_t
*) phystokv(tte
& ARM_TTE_TABLE_MASK
))[tt3_index(pmap
, va
)];
6206 ppn
= (ppnum_t
) atop((tte
& ARM_PTE_MASK
)| (va
& ARM_TT_L3_OFFMASK
));
6213 pmap_extract_internal(
6215 vm_map_address_t va
)
6226 ppn
= pmap_vtophys(pmap
, va
);
6229 pa
= ptoa(ppn
)| ((va
) & PAGE_MASK
);
6237 * Routine: pmap_extract
6239 * Extract the physical page address associated
6240 * with the given map/virtual_address pair.
6246 vm_map_address_t va
)
6250 if (pmap
== kernel_pmap
)
6252 else if (pmap
== vm_map_pmap(current_thread()->map
))
6257 return pmap_extract_internal(pmap
, va
);
6261 * pmap_init_pte_page - Initialize a page table page.
6268 unsigned int ttlevel
,
6269 boolean_t alloc_ptd
)
6273 ptdp
= *(pt_desc_t
**)pai_to_pvh(pa_index((((vm_offset_t
)pte_p
) - gVirtBase
+ gPhysBase
)));
6278 * This path should only be invoked from arm_vm_init. If we are emulating 16KB pages
6279 * on 4KB hardware, we may already have allocated a page table descriptor for a
6280 * bootstrap request, so we check for an existing PTD here.
6282 ptdp
= ptd_alloc(pmap
);
6283 *(pt_desc_t
**)pai_to_pvh(pa_index((((vm_offset_t
)pte_p
) - gVirtBase
+ gPhysBase
))) = ptdp
;
6285 panic("pmap_init_pte_page(): pte_p %p\n", pte_p
);
6289 pmap_init_pte_page_internal(pmap
, pte_p
, va
, ttlevel
, &ptdp
);
6293 * pmap_init_pte_page_internal - Initialize page table page and page table descriptor
6296 pmap_init_pte_page_internal(
6300 unsigned int ttlevel
,
6303 bzero(pte_p
, ARM_PGBYTES
);
6304 // below barrier ensures the page zeroing is visible to PTW before
6305 // it is linked to the PTE of previous level
6306 __asm__
volatile("DMB ST" : : : "memory");
6307 ptd_init(*ptdp
, pmap
, va
, ttlevel
, pte_p
);
6311 * pmap_init_pte_static_page - for static mappings to a known contiguous range of pa's
6312 * Called from arm_vm_init().
6315 pmap_init_pte_static_page(
6316 __unused pmap_t pmap
,
6320 #if (__ARM_VMSA__ == 7)
6322 pt_entry_t
*pte_cur
;
6324 for (i
= 0, pte_cur
= pte_p
;
6325 i
< (ARM_PGBYTES
/ sizeof(*pte_p
));
6326 i
++, pa
+= PAGE_SIZE
) {
6327 if (pa
>= avail_end
) {
6328 /* We don't want to map memory xnu does not own through this routine. */
6332 *pte_cur
= pa_to_pte(pa
)
6333 | ARM_PTE_TYPE
| ARM_PTE_AF
| ARM_PTE_SH
| ARM_PTE_AP(AP_RONA
)
6334 | ARM_PTE_ATTRINDX(CACHE_ATTRINDX_DEFAULT
);
6339 pt_entry_t
*pte_cur
;
6340 pt_entry_t
template;
6342 template = ARM_PTE_TYPE
| ARM_PTE_AF
| ARM_PTE_SH(SH_OUTER_MEMORY
) | ARM_PTE_AP(AP_RONA
) | ARM_PTE_ATTRINDX(CACHE_ATTRINDX_DEFAULT
) | ARM_PTE_NX
;
6344 for (i
= 0, pte_cur
= pte_p
;
6345 i
< (ARM_PGBYTES
/ sizeof(*pte_p
));
6346 i
++, pa
+= PAGE_SIZE
) {
6347 if (pa
>= avail_end
) {
6348 /* We don't want to map memory xnu does not own through this routine. */
6352 /* TEST_PAGE_RATIO_4 may be pre-processor defined to 0 */
6353 __unreachable_ok_push
6354 if (TEST_PAGE_RATIO_4
) {
6355 *pte_cur
= pa_to_pte(pa
) | template;
6356 *(pte_cur
+1) = pa_to_pte(pa
+0x1000) | template;
6357 *(pte_cur
+2) = pa_to_pte(pa
+0x2000) | template;
6358 *(pte_cur
+3) = pa_to_pte(pa
+0x3000) | template;
6361 *pte_cur
= pa_to_pte(pa
) | template;
6364 __unreachable_ok_pop
6367 bzero(pte_cur
, ARM_PGBYTES
- ((vm_offset_t
)pte_cur
- (vm_offset_t
)pte_p
));
6372 * Routine: pmap_expand
6374 * Expands a pmap to be able to map the specified virtual address.
6376 * Allocates new memory for the default (COARSE) translation table
6377 * entry, initializes all the pte entries to ARM_PTE_TYPE_FAULT and
6378 * also allocates space for the corresponding pv entries.
6380 * Nothing should be locked.
6382 static kern_return_t
6386 unsigned int options
,
6389 #if (__ARM_VMSA__ == 7)
6396 while (tte_index(pmap
, v
) >= pmap
->tte_index_max
) {
6397 tte_p
= pmap_tt1_allocate(pmap
, 2*ARM_PGBYTES
, ((options
& PMAP_OPTIONS_NOWAIT
)? PMAP_TT_ALLOCATE_NOWAIT
: 0));
6398 if (tte_p
== (tt_entry_t
*)0)
6399 return KERN_RESOURCE_SHORTAGE
;
6402 if (pmap
->tte_index_max
> NTTES
) {
6403 pmap_tt1_deallocate(pmap
, tte_p
, 2*ARM_PGBYTES
, PMAP_TT_DEALLOCATE_NOBLOCK
);
6408 simple_lock(&pmap
->tt1_lock
);
6409 for (i
= 0; i
< pmap
->tte_index_max
; i
++)
6410 tte_p
[i
] = pmap
->tte
[i
];
6411 for (i
= NTTES
; i
< 2*NTTES
; i
++)
6412 tte_p
[i
] = ARM_TTE_TYPE_FAULT
;
6414 pmap
->prev_tte
= pmap
->tte
;
6416 pmap
->ttep
= ml_static_vtop((vm_offset_t
)pmap
->tte
);
6417 #ifndef __ARM_L1_PTW__
6418 CleanPoU_DcacheRegion((vm_offset_t
) pmap
->tte
, 2*NTTES
* sizeof(tt_entry_t
));
6420 __builtin_arm_dsb(DSB_ISH
);
6422 pmap
->tte_index_max
= 2*NTTES
;
6423 pmap
->stamp
= hw_atomic_add(&pmap_stamp
, 1);
6425 for (i
= 0; i
< NTTES
; i
++)
6426 pmap
->prev_tte
[i
] = ARM_TTE_TYPE_FAULT
;
6427 #ifndef __ARM_L1_PTW__
6428 CleanPoU_DcacheRegion((vm_offset_t
) pmap
->prev_tte
, NTTES
* sizeof(tt_entry_t
));
6430 __builtin_arm_dsb(DSB_ISH
);
6433 simple_unlock(&pmap
->tt1_lock
);
6435 pmap_set_pmap(pmap
, current_thread());
6440 return (KERN_SUCCESS
);
6443 tt_entry_t
*tte_next_p
;
6447 if (pmap_pte(pmap
, v
) != PT_ENTRY_NULL
) {
6449 return (KERN_SUCCESS
);
6451 tte_p
= &pmap
->tte
[ttenum(v
& ~ARM_TT_L1_PT_OFFMASK
)];
6452 for (i
= 0, tte_next_p
= tte_p
; i
<4; i
++) {
6453 if (tte_to_pa(*tte_next_p
)) {
6454 pa
= tte_to_pa(*tte_next_p
);
6459 pa
= pa
& ~PAGE_MASK
;
6461 tte_p
= &pmap
->tte
[ttenum(v
)];
6462 *tte_p
= pa_to_tte(pa
) | (((v
>> ARM_TT_L1_SHIFT
) & 0x3) << 10) | ARM_TTE_TYPE_TABLE
;
6463 #ifndef __ARM_L1_PTW__
6464 CleanPoU_DcacheRegion((vm_offset_t
) tte_p
, sizeof(tt_entry_t
));
6467 return (KERN_SUCCESS
);
6471 v
= v
& ~ARM_TT_L1_PT_OFFMASK
;
6474 while (pmap_pte(pmap
, v
) == PT_ENTRY_NULL
) {
6476 * Allocate a VM page for the level 2 page table entries.
6478 while (pmap_tt_allocate(pmap
, &tt_p
, PMAP_TT_L2_LEVEL
, ((options
& PMAP_TT_ALLOCATE_NOWAIT
)? PMAP_PAGES_ALLOCATE_NOWAIT
: 0)) != KERN_SUCCESS
) {
6479 if(options
& PMAP_OPTIONS_NOWAIT
) {
6480 return KERN_RESOURCE_SHORTAGE
;
6487 * See if someone else expanded us first
6489 if (pmap_pte(pmap
, v
) == PT_ENTRY_NULL
) {
6490 tt_entry_t
*tte_next_p
;
6492 pmap_init_pte_page(pmap
, (pt_entry_t
*) tt_p
, v
, PMAP_TT_L2_LEVEL
, FALSE
);
6493 pa
= kvtophys((vm_offset_t
)tt_p
);
6494 #ifndef __ARM_L1_PTW__
6495 CleanPoU_DcacheRegion((vm_offset_t
) phystokv(pa
), PAGE_SIZE
);
6497 tte_p
= &pmap
->tte
[ttenum(v
)];
6498 for (i
= 0, tte_next_p
= tte_p
; i
<4; i
++) {
6499 *tte_next_p
= pa_to_tte(pa
) | ARM_TTE_TYPE_TABLE
;
6503 #ifndef __ARM_L1_PTW__
6504 CleanPoU_DcacheRegion((vm_offset_t
) tte_p
, 4*sizeof(tt_entry_t
));
6507 tt_p
= (tt_entry_t
*)NULL
;
6510 if (tt_p
!= (tt_entry_t
*)NULL
) {
6511 pmap_tt_deallocate(pmap
, tt_p
, PMAP_TT_L2_LEVEL
);
6512 tt_p
= (tt_entry_t
*)NULL
;
6515 return (KERN_SUCCESS
);
6518 #if __ARM64_TWO_LEVEL_PMAP__
6519 /* If we are using a two level page table, we'll start at L2. */
6520 unsigned int ttlevel
= 2;
6522 /* Otherwise, we start at L1 (we use 3 levels by default). */
6523 unsigned int ttlevel
= 1;
6529 tt_p
= (tt_entry_t
*)NULL
;
6531 for (; ttlevel
< level
; ttlevel
++) {
6536 if ((pmap_tt2e(pmap
, v
) == PT_ENTRY_NULL
)) {
6538 while (pmap_tt_allocate(pmap
, &tt_p
, PMAP_TT_L2_LEVEL
, ((options
& PMAP_TT_ALLOCATE_NOWAIT
)? PMAP_PAGES_ALLOCATE_NOWAIT
: 0)) != KERN_SUCCESS
) {
6539 if(options
& PMAP_OPTIONS_NOWAIT
) {
6540 return KERN_RESOURCE_SHORTAGE
;
6545 if ((pmap_tt2e(pmap
, v
) == PT_ENTRY_NULL
)) {
6546 pmap_init_pte_page(pmap
, (pt_entry_t
*) tt_p
, v
, PMAP_TT_L2_LEVEL
, FALSE
);
6547 pa
= kvtophys((vm_offset_t
)tt_p
);
6548 tte_p
= pmap_tt1e( pmap
, v
);
6549 *tte_p
= (pa
& ARM_TTE_TABLE_MASK
) | ARM_TTE_TYPE_TABLE
| ARM_TTE_VALID
;
6551 tt_p
= (tt_entry_t
*)NULL
;
6552 if ((pmap
== kernel_pmap
) && (VM_MIN_KERNEL_ADDRESS
< 0x00000000FFFFFFFFULL
))
6553 current_pmap()->tte
[v
>>ARM_TT_L1_SHIFT
] = kernel_pmap
->tte
[v
>>ARM_TT_L1_SHIFT
];
6557 } else if (ttlevel
== 2) {
6558 if (pmap_tt3e(pmap
, v
) == PT_ENTRY_NULL
) {
6560 while (pmap_tt_allocate(pmap
, &tt_p
, PMAP_TT_L3_LEVEL
, ((options
& PMAP_TT_ALLOCATE_NOWAIT
)? PMAP_PAGES_ALLOCATE_NOWAIT
: 0)) != KERN_SUCCESS
) {
6561 if(options
& PMAP_OPTIONS_NOWAIT
) {
6562 return KERN_RESOURCE_SHORTAGE
;
6567 if ((pmap_tt3e(pmap
, v
) == PT_ENTRY_NULL
)) {
6568 pmap_init_pte_page(pmap
, (pt_entry_t
*) tt_p
, v
, PMAP_TT_L3_LEVEL
, FALSE
);
6569 pa
= kvtophys((vm_offset_t
)tt_p
);
6570 tte_p
= pmap_tt2e( pmap
, v
);
6571 *tte_p
= (pa
& ARM_TTE_TABLE_MASK
) | ARM_TTE_TYPE_TABLE
| ARM_TTE_VALID
;
6573 tt_p
= (tt_entry_t
*)NULL
;
6580 if (tt_p
!= (tt_entry_t
*)NULL
) {
6581 pmap_tt_deallocate(pmap
, tt_p
, ttlevel
+1);
6582 tt_p
= (tt_entry_t
*)NULL
;
6586 return (KERN_SUCCESS
);
6591 * Routine: pmap_collect
6593 * Garbage collects the physical map system for
6594 * pages which are no longer used.
6595 * Success need not be guaranteed -- that is, there
6596 * may well be pages which are not referenced, but
6597 * others may be collected.
6600 pmap_collect(pmap_t pmap
)
6602 if (pmap
== PMAP_NULL
)
6607 if ((pmap
->nested
== FALSE
) && (pmap
!= kernel_pmap
)) {
6608 /* TODO: Scan for vm page assigned to top level page tables with no reference */
6619 * Pmap garbage collection
6620 * Called by the pageout daemon when pages are scarce.
6627 pmap_t pmap
, pmap_next
;
6630 if (pmap_gc_allowed
&&
6631 (pmap_gc_allowed_by_time_throttle
||
6633 pmap_gc_forced
= FALSE
;
6634 pmap_gc_allowed_by_time_throttle
= FALSE
;
6635 simple_lock(&pmaps_lock
);
6636 pmap
= CAST_DOWN_EXPLICIT(pmap_t
, queue_first(&map_pmap_list
));
6637 while (!queue_end(&map_pmap_list
, (queue_entry_t
)pmap
)) {
6638 if (!(pmap
->gc_status
& PMAP_GC_INFLIGHT
))
6639 pmap
->gc_status
|= PMAP_GC_INFLIGHT
;
6640 simple_unlock(&pmaps_lock
);
6644 simple_lock(&pmaps_lock
);
6645 gc_wait
= (pmap
->gc_status
& PMAP_GC_WAIT
);
6646 pmap
->gc_status
&= ~(PMAP_GC_INFLIGHT
|PMAP_GC_WAIT
);
6647 pmap_next
= CAST_DOWN_EXPLICIT(pmap_t
, queue_next(&pmap
->pmaps
));
6649 if (!queue_end(&map_pmap_list
, (queue_entry_t
)pmap_next
))
6650 pmap_next
->gc_status
|= PMAP_GC_INFLIGHT
;
6651 simple_unlock(&pmaps_lock
);
6652 thread_wakeup((event_t
) & pmap
->gc_status
);
6653 simple_lock(&pmaps_lock
);
6657 simple_unlock(&pmaps_lock
);
6662 * Called by the VM to reclaim pages that we can reclaim quickly and cheaply.
6665 pmap_release_pages_fast(void)
6670 * By default, don't attempt pmap GC more frequently
6671 * than once / 1 minutes.
6675 compute_pmap_gc_throttle(
6678 pmap_gc_allowed_by_time_throttle
= TRUE
;
6682 * pmap_attribute_cache_sync(vm_offset_t pa)
6684 * Invalidates all of the instruction cache on a physical page and
6685 * pushes any dirty data from the data cache for the same physical page
6689 pmap_attribute_cache_sync(
6692 __unused vm_machine_attribute_t attribute
,
6693 __unused vm_machine_attribute_val_t
* value
)
6695 if (size
> PAGE_SIZE
) {
6696 panic("pmap_attribute_cache_sync size: 0x%llx\n", (uint64_t)size
);
6698 cache_sync_page(pp
);
6700 return KERN_SUCCESS
;
6704 * pmap_sync_page_data_phys(ppnum_t pp)
6706 * Invalidates all of the instruction cache on a physical page and
6707 * pushes any dirty data from the data cache for the same physical page
6710 pmap_sync_page_data_phys(
6713 cache_sync_page(pp
);
6717 * pmap_sync_page_attributes_phys(ppnum_t pp)
6719 * Write back and invalidate all cachelines on a physical page.
6722 pmap_sync_page_attributes_phys(
6725 flush_dcache((vm_offset_t
) (pp
<< PAGE_SHIFT
), PAGE_SIZE
, TRUE
);
6729 /* temporary workaround */
6738 pte_p
= pmap_pte(map
->pmap
, va
);
6742 return ((spte
& ARM_PTE_ATTRINDXMASK
) == ARM_PTE_ATTRINDX(CACHE_ATTRINDX_DEFAULT
));
6754 addr
= (unsigned int *) phystokv(ptoa(pn
));
6755 count
= PAGE_SIZE
/ sizeof(unsigned int);
6760 extern void mapping_set_mod(ppnum_t pn
);
6766 pmap_set_modify(pn
);
6769 extern void mapping_set_ref(ppnum_t pn
);
6775 pmap_set_reference(pn
);
6779 * Clear specified attribute bits.
6781 * Try to force an arm_fast_fault() for all mappings of
6782 * the page - to force attributes to be set again at fault time.
6783 * If the forcing succeeds, clear the cached bits at the head.
6784 * Otherwise, something must have been wired, so leave the cached
6788 phys_attribute_clear_internal(
6794 pmap_paddr_t pa
= ptoa(pn
);
6795 vm_prot_t allow_mode
= VM_PROT_ALL
;
6798 if ((bits
& PP_ATTR_MODIFIED
) &&
6799 (options
& PMAP_OPTIONS_NOFLUSH
) &&
6801 panic("phys_attribute_clear(0x%x,0x%x,0x%x,%p): "
6802 "should not clear 'modified' without flushing TLBs\n",
6803 pn
, bits
, options
, arg
);
6806 assert(pn
!= vm_page_fictitious_addr
);
6807 if (bits
& PP_ATTR_REFERENCED
)
6808 allow_mode
&= ~(VM_PROT_READ
| VM_PROT_EXECUTE
);
6809 if (bits
& PP_ATTR_MODIFIED
)
6810 allow_mode
&= ~VM_PROT_WRITE
;
6812 if (bits
== PP_ATTR_NOENCRYPT
) {
6814 * We short circuit this case; it should not need to
6815 * invoke arm_force_fast_fault, so just clear and
6816 * return. On ARM, this bit is just a debugging aid.
6818 pa_clear_bits(pa
, bits
);
6822 if (arm_force_fast_fault_internal(pn
, allow_mode
, options
))
6823 pa_clear_bits(pa
, bits
);
6828 phys_attribute_clear(
6835 * Do we really want this tracepoint? It will be extremely chatty.
6836 * Also, should we have a corresponding trace point for the set path?
6838 PMAP_TRACE(PMAP_CODE(PMAP__ATTRIBUTE_CLEAR
) | DBG_FUNC_START
, pn
, bits
);
6840 phys_attribute_clear_internal(pn
, bits
, options
, arg
);
6842 PMAP_TRACE(PMAP_CODE(PMAP__ATTRIBUTE_CLEAR
) | DBG_FUNC_END
);
6846 * Set specified attribute bits.
6848 * Set cached value in the pv head because we have
6849 * no per-mapping hardware support for referenced and
6853 phys_attribute_set_internal(
6857 pmap_paddr_t pa
= ptoa(pn
);
6858 assert(pn
!= vm_page_fictitious_addr
);
6861 pa_set_bits(pa
, bits
);
6871 phys_attribute_set_internal(pn
, bits
);
6876 * Check specified attribute bits.
6878 * use the software cached bits (since no hw support).
6881 phys_attribute_test(
6885 pmap_paddr_t pa
= ptoa(pn
);
6886 assert(pn
!= vm_page_fictitious_addr
);
6887 return pa_test_bits(pa
, bits
);
6892 * Set the modify/reference bits on the specified physical page.
6895 pmap_set_modify(ppnum_t pn
)
6897 phys_attribute_set(pn
, PP_ATTR_MODIFIED
);
6902 * Clear the modify bits on the specified physical page.
6908 phys_attribute_clear(pn
, PP_ATTR_MODIFIED
, 0, NULL
);
6915 * Return whether or not the specified physical page is modified
6916 * by any physical maps.
6922 return phys_attribute_test(pn
, PP_ATTR_MODIFIED
);
6927 * Set the reference bit on the specified physical page.
6933 phys_attribute_set(pn
, PP_ATTR_REFERENCED
);
6937 * Clear the reference bits on the specified physical page.
6940 pmap_clear_reference(
6943 phys_attribute_clear(pn
, PP_ATTR_REFERENCED
, 0, NULL
);
6948 * pmap_is_referenced:
6950 * Return whether or not the specified physical page is referenced
6951 * by any physical maps.
6957 return phys_attribute_test(pn
, PP_ATTR_REFERENCED
);
6961 * pmap_get_refmod(phys)
6962 * returns the referenced and modified bits of the specified
6969 return (((phys_attribute_test(pn
, PP_ATTR_MODIFIED
)) ? VM_MEM_MODIFIED
: 0)
6970 | ((phys_attribute_test(pn
, PP_ATTR_REFERENCED
)) ? VM_MEM_REFERENCED
: 0));
6974 * pmap_clear_refmod(phys, mask)
6975 * clears the referenced and modified bits as specified by the mask
6976 * of the specified physical page.
6979 pmap_clear_refmod_options(
6982 unsigned int options
,
6987 bits
= ((mask
& VM_MEM_MODIFIED
) ? PP_ATTR_MODIFIED
: 0) |
6988 ((mask
& VM_MEM_REFERENCED
) ? PP_ATTR_REFERENCED
: 0);
6989 phys_attribute_clear(pn
, bits
, options
, arg
);
6997 pmap_clear_refmod_options(pn
, mask
, 0, NULL
);
7001 pmap_disconnect_options(
7003 unsigned int options
,
7006 if ((options
& PMAP_OPTIONS_COMPRESSOR_IFF_MODIFIED
)) {
7008 * On ARM, the "modified" bit is managed by software, so
7009 * we know up-front if the physical page is "modified",
7010 * without having to scan all the PTEs pointing to it.
7011 * The caller should have made the VM page "busy" so noone
7012 * should be able to establish any new mapping and "modify"
7013 * the page behind us.
7015 if (pmap_is_modified(pn
)) {
7017 * The page has been modified and will be sent to
7018 * the VM compressor.
7020 options
|= PMAP_OPTIONS_COMPRESSOR
;
7023 * The page hasn't been modified and will be freed
7024 * instead of compressed.
7029 /* disconnect the page */
7030 pmap_page_protect_options(pn
, 0, options
, arg
);
7032 /* return ref/chg status */
7033 return (pmap_get_refmod(pn
));
7041 * Disconnect all mappings for this page and return reference and change status
7042 * in generic format.
7049 pmap_page_protect(pn
, 0); /* disconnect the page */
7050 return (pmap_get_refmod(pn
)); /* return ref/chg status */
7054 pmap_has_managed_page(ppnum_t first
, ppnum_t last
)
7056 if (ptoa(first
) >= vm_last_phys
) return (FALSE
);
7057 if (ptoa(last
) < vm_first_phys
) return (FALSE
);
7063 * The state maintained by the noencrypt functions is used as a
7064 * debugging aid on ARM. This incurs some overhead on the part
7065 * of the caller. A special case check in phys_attribute_clear
7066 * (the most expensive path) currently minimizes this overhead,
7067 * but stubbing these functions out on RELEASE kernels yields
7074 #if DEVELOPMENT || DEBUG
7075 boolean_t result
= FALSE
;
7077 if (!pa_valid(ptoa(pn
))) return FALSE
;
7079 result
= (phys_attribute_test(pn
, PP_ATTR_NOENCRYPT
));
7092 #if DEVELOPMENT || DEBUG
7093 if (!pa_valid(ptoa(pn
))) return;
7095 phys_attribute_set(pn
, PP_ATTR_NOENCRYPT
);
7102 pmap_clear_noencrypt(
7105 #if DEVELOPMENT || DEBUG
7106 if (!pa_valid(ptoa(pn
))) return;
7108 phys_attribute_clear(pn
, PP_ATTR_NOENCRYPT
, 0, NULL
);
7116 pmap_lock_phys_page(ppnum_t pn
)
7119 pmap_paddr_t phys
= ptoa(pn
);
7121 if (pa_valid(phys
)) {
7122 pai
= (int)pa_index(phys
);
7125 simple_lock(&phys_backup_lock
);
7130 pmap_unlock_phys_page(ppnum_t pn
)
7133 pmap_paddr_t phys
= ptoa(pn
);
7135 if (pa_valid(phys
)) {
7136 pai
= (int)pa_index(phys
);
7139 simple_unlock(&phys_backup_lock
);
7143 pmap_switch_user_ttb_internal(
7146 #if (__ARM_VMSA__ == 7)
7147 pmap_cpu_data_t
*cpu_data_ptr
;
7149 cpu_data_ptr
= pmap_get_cpu_data();
7151 if ((cpu_data_ptr
->cpu_user_pmap
!= PMAP_NULL
)
7152 && (cpu_data_ptr
->cpu_user_pmap
!= kernel_pmap
)) {
7155 c
= hw_atomic_sub((volatile uint32_t *)&cpu_data_ptr
->cpu_user_pmap
->cpu_ref
, 1);
7156 if ((c
== 0) && (cpu_data_ptr
->cpu_user_pmap
->prev_tte
!= 0)) {
7157 /* We saved off the old 1-page tt1 in pmap_expand() in case other cores were still using it.
7158 * Now that the user pmap's cpu_ref is 0, we should be able to safely free it.*/
7159 tt_entry_t
*tt_entry
;
7161 tt_entry
= cpu_data_ptr
->cpu_user_pmap
->prev_tte
;
7162 cpu_data_ptr
->cpu_user_pmap
->prev_tte
= (tt_entry_t
*) NULL
;
7163 pmap_tt1_deallocate(cpu_data_ptr
->cpu_user_pmap
, tt_entry
, ARM_PGBYTES
, PMAP_TT_DEALLOCATE_NOBLOCK
);
7166 cpu_data_ptr
->cpu_user_pmap
= pmap
;
7167 cpu_data_ptr
->cpu_user_pmap_stamp
= pmap
->stamp
;
7168 (void) hw_atomic_add((volatile uint32_t *)&pmap
->cpu_ref
, 1);
7170 #if MACH_ASSERT && __ARM_USER_PROTECT__
7172 unsigned int ttbr0_val
, ttbr1_val
;
7173 __asm__
volatile("mrc p15,0,%0,c2,c0,0\n" : "=r"(ttbr0_val
));
7174 __asm__
volatile("mrc p15,0,%0,c2,c0,1\n" : "=r"(ttbr1_val
));
7175 if (ttbr0_val
!= ttbr1_val
) {
7176 panic("Misaligned ttbr0 %08X\n", ttbr0_val
);
7180 if (pmap
->tte_index_max
== NTTES
) {
7181 /* Setting TTBCR.N for TTBR0 TTBR1 boundary at 0x40000000 */
7182 __asm__
volatile("mcr p15,0,%0,c2,c0,2" : : "r"(2));
7183 __asm__
volatile("isb");
7184 #if !__ARM_USER_PROTECT__
7185 set_mmu_ttb(pmap
->ttep
);
7188 #if !__ARM_USER_PROTECT__
7189 set_mmu_ttb(pmap
->ttep
);
7191 /* Setting TTBCR.N for TTBR0 TTBR1 boundary at 0x80000000 */
7192 __asm__
volatile("mcr p15,0,%0,c2,c0,2" : : "r"(1));
7193 __asm__
volatile("isb");
7194 #if MACH_ASSERT && __ARM_USER_PROTECT__
7195 if (pmap
->ttep
& 0x1000) {
7196 panic("Misaligned ttbr0 %08X\n", pmap
->ttep
);
7201 #if !__ARM_USER_PROTECT__
7202 set_context_id(pmap
->asid
);
7206 pmap_get_cpu_data()->cpu_user_pmap
= pmap
;
7207 pmap_get_cpu_data()->cpu_user_pmap_stamp
= pmap
->stamp
;
7210 set_context_id(pmap
->asid
); /* Not required */
7212 if (pmap
== kernel_pmap
) {
7213 set_mmu_ttb(invalid_ttep
& TTBR_BADDR_MASK
);
7215 set_mmu_ttb((pmap
->ttep
& TTBR_BADDR_MASK
)|(((uint64_t)pmap
->asid
) << TTBR_ASID_SHIFT
));
7221 pmap_switch_user_ttb(
7224 pmap_switch_user_ttb_internal(pmap
);
7228 * Try to "intuit" whether we need to raise a VM_PROT_WRITE fault
7229 * for the given address when a "swp" instruction raised the fault.
7230 * We have to look at the existing pte for the address to see
7231 * if it needs to get bumped, or just added. If just added, do it
7232 * as a read-only mapping first (this could result in extra faults -
7233 * but better that than extra copy-on-write evaluations).
7236 #if (__ARM_VMSA__ == 7)
7238 arm_swap_readable_type(
7239 vm_map_address_t addr
,
7246 ptep
= pmap_pte(current_pmap(), addr
);
7247 if (ptep
== PT_ENTRY_NULL
)
7251 if (spte
== ARM_PTE_TYPE_FAULT
||
7252 ARM_PTE_IS_COMPRESSED(spte
))
7255 /* get the access permission bitmaps */
7256 /* (all subpages should be the same) */
7257 ap
= (spte
& ARM_PTE_APMASK
);
7259 if (spsr
& 0xf) { /* Supervisor mode */
7260 panic("arm_swap_readable_type supv");
7262 } else { /* User mode */
7263 if ((ap
== ARM_PTE_AP(AP_RWRW
)) || (ap
== ARM_PTE_AP(AP_RORO
)))
7272 * Routine: arm_force_fast_fault
7275 * Force all mappings for this page to fault according
7276 * to the access modes allowed, so we can gather ref/modify
7280 arm_force_fast_fault_internal(
7282 vm_prot_t allow_mode
,
7285 pmap_paddr_t phys
= ptoa(ppnum
);
7291 boolean_t is_reusable
, is_internal
;
7292 boolean_t ref_fault
;
7293 boolean_t mod_fault
;
7295 assert(ppnum
!= vm_page_fictitious_addr
);
7297 if (!pa_valid(phys
)) {
7298 return FALSE
; /* Not a managed page. */
7304 pai
= (int)pa_index(phys
);
7306 pv_h
= pai_to_pvh(pai
);
7308 pte_p
= PT_ENTRY_NULL
;
7309 pve_p
= PV_ENTRY_NULL
;
7310 if (pvh_test_type(pv_h
, PVH_TYPE_PTEP
)) {
7311 pte_p
= pvh_ptep(pv_h
);
7312 } else if (pvh_test_type(pv_h
, PVH_TYPE_PVEP
)) {
7313 pve_p
= pvh_list(pv_h
);
7316 is_reusable
= IS_REUSABLE_PAGE(pai
);
7317 is_internal
= IS_INTERNAL_PAGE(pai
);
7319 while ((pve_p
!= PV_ENTRY_NULL
) || (pte_p
!= PT_ENTRY_NULL
)) {
7320 vm_map_address_t va
;
7324 boolean_t update_pte
;
7326 if (pve_p
!= PV_ENTRY_NULL
)
7327 pte_p
= pve_get_ptep(pve_p
);
7329 if (pte_p
== PT_ENTRY_NULL
) {
7330 panic("pte_p is NULL: pve_p=%p ppnum=0x%x\n", pve_p
, ppnum
);
7332 if (*pte_p
== ARM_PTE_EMPTY
) {
7333 panic("pte is NULL: pte_p=%p ppnum=0x%x\n", pte_p
, ppnum
);
7335 if (ARM_PTE_IS_COMPRESSED(*pte_p
)) {
7336 panic("pte is COMPRESSED: pte_p=%p ppnum=0x%x\n", pte_p
, ppnum
);
7339 pmap
= ptep_get_pmap(pte_p
);
7340 va
= ptep_get_va(pte_p
);
7342 assert(va
>= pmap
->min
&& va
< pmap
->max
);
7344 if (pte_is_wired(*pte_p
) || pmap
== kernel_pmap
) {
7353 if ((allow_mode
& VM_PROT_READ
) != VM_PROT_READ
) {
7354 /* read protection sets the pte to fault */
7355 tmplate
= tmplate
& ~ARM_PTE_AF
;
7359 if ((allow_mode
& VM_PROT_WRITE
) != VM_PROT_WRITE
) {
7360 /* take away write permission if set */
7361 if (pmap
== kernel_pmap
) {
7362 if ((tmplate
& ARM_PTE_APMASK
) == ARM_PTE_AP(AP_RWNA
)) {
7363 tmplate
= ((tmplate
& ~ARM_PTE_APMASK
) | ARM_PTE_AP(AP_RONA
));
7366 if ((tmplate
& ARM_PTE_APMASK
) == ARM_PTE_AP(AP_RWRW
)) {
7367 tmplate
= ((tmplate
& ~ARM_PTE_APMASK
) | ARM_PTE_AP(AP_RORO
));
7371 pte_set_ffr(tmplate
, 1);
7378 if (*pte_p
!= ARM_PTE_TYPE_FAULT
&&
7379 !ARM_PTE_IS_COMPRESSED(*pte_p
)) {
7380 WRITE_PTE(pte_p
, tmplate
);
7381 PMAP_UPDATE_TLBS(pmap
, va
, va
+ PAGE_SIZE
);
7383 WRITE_PTE(pte_p
, tmplate
);
7384 __asm__
volatile("isb");
7388 /* update pmap stats and ledgers */
7389 if (IS_ALTACCT_PAGE(pai
, pve_p
)) {
7391 * We do not track "reusable" status for
7392 * "alternate accounting" mappings.
7394 } else if ((options
& PMAP_OPTIONS_CLEAR_REUSABLE
) &&
7397 pmap
!= kernel_pmap
) {
7398 /* one less "reusable" */
7399 PMAP_STATS_ASSERTF(pmap
->stats
.reusable
> 0, pmap
, "stats.reusable %d", pmap
->stats
.reusable
);
7400 OSAddAtomic(-1, &pmap
->stats
.reusable
);
7401 /* one more "internal" */
7402 OSAddAtomic(+1, &pmap
->stats
.internal
);
7403 PMAP_STATS_PEAK(pmap
->stats
.internal
);
7404 PMAP_STATS_ASSERTF(pmap
->stats
.internal
> 0, pmap
, "stats.internal %d", pmap
->stats
.internal
);
7405 pmap_ledger_credit(pmap
,
7406 task_ledgers
.internal
,
7408 assert(!IS_ALTACCT_PAGE(pai
, pve_p
));
7409 assert(IS_INTERNAL_PAGE(pai
));
7410 pmap_ledger_credit(pmap
,
7411 task_ledgers
.phys_footprint
,
7415 * Avoid the cost of another trap to handle the fast
7416 * fault when we next write to this page: let's just
7417 * handle that now since we already have all the
7418 * necessary information.
7421 arm_clear_fast_fault(ppnum
, VM_PROT_WRITE
);
7423 } else if ((options
& PMAP_OPTIONS_SET_REUSABLE
) &&
7426 pmap
!= kernel_pmap
) {
7427 /* one more "reusable" */
7428 OSAddAtomic(+1, &pmap
->stats
.reusable
);
7429 PMAP_STATS_PEAK(pmap
->stats
.reusable
);
7430 PMAP_STATS_ASSERTF(pmap
->stats
.reusable
> 0, pmap
, "stats.reusable %d", pmap
->stats
.reusable
);
7431 /* one less "internal" */
7432 PMAP_STATS_ASSERTF(pmap
->stats
.internal
> 0, pmap
, "stats.internal %d", pmap
->stats
.internal
);
7433 OSAddAtomic(-1, &pmap
->stats
.internal
);
7434 pmap_ledger_debit(pmap
,
7435 task_ledgers
.internal
,
7437 assert(!IS_ALTACCT_PAGE(pai
, pve_p
));
7438 assert(IS_INTERNAL_PAGE(pai
));
7439 pmap_ledger_debit(pmap
,
7440 task_ledgers
.phys_footprint
,
7444 pte_p
= PT_ENTRY_NULL
;
7445 if (pve_p
!= PV_ENTRY_NULL
)
7446 pve_p
= PVE_NEXT_PTR(pve_next(pve_p
));
7449 /* update global "reusable" status for this page */
7451 if ((options
& PMAP_OPTIONS_CLEAR_REUSABLE
) &&
7453 CLR_REUSABLE_PAGE(pai
);
7454 } else if ((options
& PMAP_OPTIONS_SET_REUSABLE
) &&
7456 SET_REUSABLE_PAGE(pai
);
7461 SET_MODFAULT_PAGE(pai
);
7464 SET_REFFAULT_PAGE(pai
);
7472 arm_force_fast_fault(
7474 vm_prot_t allow_mode
,
7478 pmap_paddr_t phys
= ptoa(ppnum
);
7480 assert(ppnum
!= vm_page_fictitious_addr
);
7482 if (!pa_valid(phys
)) {
7483 return FALSE
; /* Not a managed page. */
7486 return arm_force_fast_fault_internal(ppnum
, allow_mode
, options
);
7490 * Routine: arm_clear_fast_fault
7493 * Clear pending force fault for all mappings for this page based on
7494 * the observed fault type, update ref/modify bits.
7497 arm_clear_fast_fault(
7499 vm_prot_t fault_type
)
7501 pmap_paddr_t pa
= ptoa(ppnum
);
7508 assert(ppnum
!= vm_page_fictitious_addr
);
7510 if (!pa_valid(pa
)) {
7511 return FALSE
; /* Not a managed page. */
7515 pai
= (int)pa_index(pa
);
7516 ASSERT_PVH_LOCKED(pai
);
7517 pv_h
= pai_to_pvh(pai
);
7519 pte_p
= PT_ENTRY_NULL
;
7520 pve_p
= PV_ENTRY_NULL
;
7521 if (pvh_test_type(pv_h
, PVH_TYPE_PTEP
)) {
7522 pte_p
= pvh_ptep(pv_h
);
7523 } else if (pvh_test_type(pv_h
, PVH_TYPE_PVEP
)) {
7524 pve_p
= pvh_list(pv_h
);
7527 while ((pve_p
!= PV_ENTRY_NULL
) || (pte_p
!= PT_ENTRY_NULL
)) {
7528 vm_map_address_t va
;
7533 if (pve_p
!= PV_ENTRY_NULL
)
7534 pte_p
= pve_get_ptep(pve_p
);
7536 if (pte_p
== PT_ENTRY_NULL
) {
7537 panic("pte_p is NULL: pve_p=%p ppnum=0x%x\n", pve_p
, ppnum
);
7539 if (*pte_p
== ARM_PTE_EMPTY
) {
7540 panic("pte is NULL: pte_p=%p ppnum=0x%x\n", pte_p
, ppnum
);
7543 pmap
= ptep_get_pmap(pte_p
);
7544 va
= ptep_get_va(pte_p
);
7546 assert(va
>= pmap
->min
&& va
< pmap
->max
);
7551 if ((fault_type
& VM_PROT_WRITE
) && (pte_is_ffr(spte
))) {
7553 if (pmap
== kernel_pmap
)
7554 tmplate
= ((spte
& ~ARM_PTE_APMASK
) | ARM_PTE_AP(AP_RWNA
));
7556 tmplate
= ((spte
& ~ARM_PTE_APMASK
) | ARM_PTE_AP(AP_RWRW
));
7559 tmplate
|= ARM_PTE_AF
;
7561 pte_set_ffr(tmplate
, 0);
7562 pa_set_bits(pa
, PP_ATTR_REFERENCED
| PP_ATTR_MODIFIED
);
7564 } else if ((fault_type
& VM_PROT_READ
) && ((spte
& ARM_PTE_AF
) != ARM_PTE_AF
)) {
7565 tmplate
= spte
| ARM_PTE_AF
;
7568 pa_set_bits(pa
, PP_ATTR_REFERENCED
);
7573 if (spte
!= tmplate
) {
7574 if (spte
!= ARM_PTE_TYPE_FAULT
) {
7575 WRITE_PTE(pte_p
, tmplate
);
7576 PMAP_UPDATE_TLBS(pmap
, va
, va
+ PAGE_SIZE
);
7578 WRITE_PTE(pte_p
, tmplate
);
7579 __asm__
volatile("isb");
7584 pte_p
= PT_ENTRY_NULL
;
7585 if (pve_p
!= PV_ENTRY_NULL
)
7586 pve_p
= PVE_NEXT_PTR(pve_next(pve_p
));
7592 * Determine if the fault was induced by software tracking of
7593 * modify/reference bits. If so, re-enable the mapping (and set
7594 * the appropriate bits).
7596 * Returns KERN_SUCCESS if the fault was induced and was
7597 * successfully handled.
7599 * Returns KERN_FAILURE if the fault was not induced and
7600 * the function was unable to deal with it.
7602 * Returns KERN_PROTECTION_FAILURE if the pmap layer explictly
7603 * disallows this type of access.
7605 static kern_return_t
7606 arm_fast_fault_internal(
7608 vm_map_address_t va
,
7609 vm_prot_t fault_type
,
7610 __unused boolean_t from_user
)
7612 kern_return_t result
= KERN_FAILURE
;
7614 pt_entry_t spte
= ARM_PTE_TYPE_FAULT
;
7621 * If the entry doesn't exist, is completely invalid, or is already
7622 * valid, we can't fix it here.
7625 ptep
= pmap_pte(pmap
, va
);
7626 if (ptep
!= PT_ENTRY_NULL
) {
7629 pa
= pte_to_pa(spte
);
7631 if ((spte
== ARM_PTE_TYPE_FAULT
) ||
7632 ARM_PTE_IS_COMPRESSED(spte
) ||
7638 pai
= (int)pa_index(pa
);
7646 if ((IS_REFFAULT_PAGE(pai
)) ||
7647 ((fault_type
& VM_PROT_WRITE
) && IS_MODFAULT_PAGE(pai
))) {
7649 * An attempted access will always clear ref/mod fault state, as
7650 * appropriate for the fault type. arm_clear_fast_fault will
7651 * update the associated PTEs for the page as appropriate; if
7652 * any PTEs are updated, we redrive the access. If the mapping
7653 * does not actually allow for the attempted access, the
7654 * following fault will (hopefully) fail to update any PTEs, and
7655 * thus cause arm_fast_fault to decide that it failed to handle
7658 if (IS_REFFAULT_PAGE(pai
)) {
7659 CLR_REFFAULT_PAGE(pai
);
7661 if ( (fault_type
& VM_PROT_WRITE
) && IS_MODFAULT_PAGE(pai
)) {
7662 CLR_MODFAULT_PAGE(pai
);
7665 if (arm_clear_fast_fault((ppnum_t
)atop(pa
),fault_type
)) {
7667 * Should this preserve KERN_PROTECTION_FAILURE? The
7668 * cost of not doing so is a another fault in a case
7669 * that should already result in an exception.
7671 result
= KERN_SUCCESS
;
7683 vm_map_address_t va
,
7684 vm_prot_t fault_type
,
7685 __unused boolean_t from_user
)
7687 kern_return_t result
= KERN_FAILURE
;
7689 if (va
< pmap
->min
|| va
>= pmap
->max
)
7692 PMAP_TRACE(PMAP_CODE(PMAP__FAST_FAULT
) | DBG_FUNC_START
,
7693 VM_KERNEL_ADDRHIDE(pmap
), VM_KERNEL_ADDRHIDE(va
), fault_type
,
7696 #if (__ARM_VMSA__ == 7)
7697 if (pmap
!= kernel_pmap
) {
7698 pmap_cpu_data_t
*cpu_data_ptr
= pmap_get_cpu_data();
7700 pmap_t cur_user_pmap
;
7702 cur_pmap
= current_pmap();
7703 cur_user_pmap
= cpu_data_ptr
->cpu_user_pmap
;
7705 if ((cur_user_pmap
== cur_pmap
) && (cur_pmap
== pmap
)) {
7706 if (cpu_data_ptr
->cpu_user_pmap_stamp
!= pmap
->stamp
) {
7707 pmap_set_pmap(pmap
, current_thread());
7708 result
= KERN_SUCCESS
;
7715 result
= arm_fast_fault_internal(pmap
, va
, fault_type
, from_user
);
7717 #if (__ARM_VMSA__ == 7)
7721 PMAP_TRACE(PMAP_CODE(PMAP__FAST_FAULT
) | DBG_FUNC_END
, result
);
7731 bcopy_phys((addr64_t
) (ptoa(psrc
)),
7732 (addr64_t
) (ptoa(pdst
)),
7738 * pmap_copy_page copies the specified (machine independent) pages.
7741 pmap_copy_part_page(
7743 vm_offset_t src_offset
,
7745 vm_offset_t dst_offset
,
7748 bcopy_phys((addr64_t
) (ptoa(psrc
) + src_offset
),
7749 (addr64_t
) (ptoa(pdst
) + dst_offset
),
7755 * pmap_zero_page zeros the specified (machine independent) page.
7761 assert(pn
!= vm_page_fictitious_addr
);
7762 bzero_phys((addr64_t
) ptoa(pn
), PAGE_SIZE
);
7766 * pmap_zero_part_page
7767 * zeros the specified (machine independent) part of a page.
7770 pmap_zero_part_page(
7775 assert(pn
!= vm_page_fictitious_addr
);
7776 assert(offset
+ len
<= PAGE_SIZE
);
7777 bzero_phys((addr64_t
) (ptoa(pn
) + offset
), len
);
7782 * nop in current arm implementation
7786 __unused thread_t t
)
7794 pt_entry_t
*ptep
, pte
;
7796 ptep
= pmap_pte(kernel_pmap
, LOWGLOBAL_ALIAS
);
7797 assert(ptep
!= PT_ENTRY_NULL
);
7798 assert(*ptep
== ARM_PTE_EMPTY
);
7800 pte
= pa_to_pte(ml_static_vtop((vm_offset_t
)&lowGlo
)) | AP_RONA
| ARM_PTE_NX
| ARM_PTE_PNX
| ARM_PTE_AF
| ARM_PTE_TYPE
;
7801 #if __ARM_KERNEL_PROTECT__
7803 #endif /* __ARM_KERNEL_PROTECT__ */
7804 pte
|= ARM_PTE_ATTRINDX(CACHE_ATTRINDX_WRITEBACK
);
7805 #if (__ARM_VMSA__ > 7)
7806 pte
|= ARM_PTE_SH(SH_OUTER_MEMORY
);
7811 FLUSH_PTE_RANGE(ptep
,(ptep
+1));
7812 PMAP_UPDATE_TLBS(kernel_pmap
, LOWGLOBAL_ALIAS
, LOWGLOBAL_ALIAS
+ PAGE_SIZE
);
7816 pmap_cpu_windows_copy_addr(int cpu_num
, unsigned int index
)
7818 return (vm_offset_t
)(CPUWINDOWS_BASE
+ (PAGE_SIZE
* ((CPUWINDOWS_MAX
* cpu_num
) + index
)));
7822 pmap_map_cpu_windows_copy_internal(
7825 unsigned int wimg_bits
)
7827 pt_entry_t
*ptep
= NULL
, pte
;
7828 unsigned int cpu_num
;
7830 vm_offset_t cpu_copywindow_vaddr
= 0;
7832 cpu_num
= pmap_get_cpu_data()->cpu_number
;
7834 for (i
= 0; i
<CPUWINDOWS_MAX
; i
++) {
7835 cpu_copywindow_vaddr
= pmap_cpu_windows_copy_addr(cpu_num
, i
);
7836 ptep
= pmap_pte(kernel_pmap
, cpu_copywindow_vaddr
);
7837 assert(!ARM_PTE_IS_COMPRESSED(*ptep
));
7838 if (*ptep
== ARM_PTE_TYPE_FAULT
)
7841 if (i
== CPUWINDOWS_MAX
) {
7842 panic("pmap_map_cpu_windows_copy: out of window\n");
7845 pte
= pa_to_pte(ptoa(pn
)) | ARM_PTE_TYPE
| ARM_PTE_AF
| ARM_PTE_NX
| ARM_PTE_PNX
;
7846 #if __ARM_KERNEL_PROTECT__
7848 #endif /* __ARM_KERNEL_PROTECT__ */
7850 pte
|= wimg_to_pte(wimg_bits
);
7852 if (prot
& VM_PROT_WRITE
) {
7853 pte
|= ARM_PTE_AP(AP_RWNA
);
7855 pte
|= ARM_PTE_AP(AP_RONA
);
7858 WRITE_PTE(ptep
, pte
);
7860 * Invalidate tlb. Cover nested cpu_copywindow_vaddr usage with the interrupted context
7861 * in pmap_unmap_cpu_windows_copy() after clearing the pte and before tlb invalidate.
7863 PMAP_UPDATE_TLBS(kernel_pmap
, cpu_copywindow_vaddr
, cpu_copywindow_vaddr
+ PAGE_SIZE
);
7869 pmap_map_cpu_windows_copy(
7872 unsigned int wimg_bits
)
7874 return pmap_map_cpu_windows_copy_internal(pn
, prot
, wimg_bits
);
7878 pmap_unmap_cpu_windows_copy_internal(
7882 unsigned int cpu_num
;
7883 vm_offset_t cpu_copywindow_vaddr
= 0;
7885 cpu_num
= pmap_get_cpu_data()->cpu_number
;
7887 cpu_copywindow_vaddr
= pmap_cpu_windows_copy_addr(cpu_num
, index
);
7888 __asm__
volatile("dsb sy");
7889 ptep
= pmap_pte(kernel_pmap
, cpu_copywindow_vaddr
);
7890 WRITE_PTE(ptep
, ARM_PTE_TYPE_FAULT
);
7891 PMAP_UPDATE_TLBS(kernel_pmap
, cpu_copywindow_vaddr
, cpu_copywindow_vaddr
+ PAGE_SIZE
);
7895 pmap_unmap_cpu_windows_copy(
7898 return pmap_unmap_cpu_windows_copy_internal(index
);
7902 * Marked a pmap has nested
7905 pmap_set_nested_internal(
7908 pmap
->nested
= TRUE
;
7915 pmap_set_nested_internal(pmap
);
7919 * kern_return_t pmap_nest(grand, subord, vstart, size)
7921 * grand = the pmap that we will nest subord into
7922 * subord = the pmap that goes into the grand
7923 * vstart = start of range in pmap to be inserted
7924 * nstart = start of range in pmap nested pmap
7925 * size = Size of nest area (up to 16TB)
7927 * Inserts a pmap into another. This is used to implement shared segments.
7931 static kern_return_t
7939 kern_return_t kr
= KERN_FAILURE
;
7940 vm_map_offset_t vaddr
, nvaddr
;
7944 unsigned int num_tte
;
7945 unsigned int nested_region_asid_bitmap_size
;
7946 unsigned int* nested_region_asid_bitmap
;
7947 int expand_options
= 0;
7950 #if (__ARM_VMSA__ == 7)
7951 if (((size
|vstart
|nstart
) & ARM_TT_L1_PT_OFFMASK
) != 0x0ULL
) {
7952 return KERN_INVALID_VALUE
; /* Nest 4MB region */
7955 if (((size
|vstart
|nstart
) & (ARM_TT_L2_OFFMASK
)) != 0x0ULL
) {
7956 panic("pmap_nest() pmap %p has a nested pmap 0x%llx, 0x%llx, 0x%llx\n", grand
, vstart
, nstart
, size
);
7960 if ((grand
->nested_pmap
!= PMAP_NULL
) && (grand
->nested_pmap
!= subord
)) {
7961 panic("pmap_nest() pmap %p has a nested pmap\n", grand
);
7964 if (subord
->nested_region_asid_bitmap
== NULL
) {
7965 nested_region_asid_bitmap_size
= (unsigned int)(size
>>ARM_TT_TWIG_SHIFT
)/(sizeof(unsigned int)*NBBY
);
7967 nested_region_asid_bitmap
= kalloc(nested_region_asid_bitmap_size
*sizeof(unsigned int));
7968 bzero(nested_region_asid_bitmap
, nested_region_asid_bitmap_size
*sizeof(unsigned int));
7971 if (subord
->nested_region_asid_bitmap
== NULL
) {
7972 subord
->nested_region_asid_bitmap
= nested_region_asid_bitmap
;
7973 subord
->nested_region_asid_bitmap_size
= nested_region_asid_bitmap_size
;
7974 subord
->nested_region_subord_addr
= nstart
;
7975 subord
->nested_region_size
= (mach_vm_offset_t
) size
;
7976 nested_region_asid_bitmap
= NULL
;
7978 PMAP_UNLOCK(subord
);
7979 if (nested_region_asid_bitmap
!= NULL
) {
7980 kfree(nested_region_asid_bitmap
, nested_region_asid_bitmap_size
*sizeof(unsigned int));
7983 if ((subord
->nested_region_subord_addr
+ subord
->nested_region_size
) < (nstart
+size
)) {
7985 unsigned int new_nested_region_asid_bitmap_size
;
7986 unsigned int* new_nested_region_asid_bitmap
;
7988 nested_region_asid_bitmap
= NULL
;
7989 nested_region_asid_bitmap_size
= 0;
7990 new_size
= nstart
+ size
- subord
->nested_region_subord_addr
;
7992 /* We explicitly add 1 to the bitmap allocation size in order to avoid issues with truncation. */
7993 new_nested_region_asid_bitmap_size
= (unsigned int)((new_size
>>ARM_TT_TWIG_SHIFT
)/(sizeof(unsigned int)*NBBY
)) + 1;
7995 new_nested_region_asid_bitmap
= kalloc(new_nested_region_asid_bitmap_size
*sizeof(unsigned int));
7997 if (subord
->nested_region_size
< new_size
) {
7998 bzero(new_nested_region_asid_bitmap
, new_nested_region_asid_bitmap_size
*sizeof(unsigned int));
7999 bcopy(subord
->nested_region_asid_bitmap
, new_nested_region_asid_bitmap
, subord
->nested_region_asid_bitmap_size
);
8000 nested_region_asid_bitmap_size
= subord
->nested_region_asid_bitmap_size
;
8001 nested_region_asid_bitmap
= subord
->nested_region_asid_bitmap
;
8002 subord
->nested_region_asid_bitmap
= new_nested_region_asid_bitmap
;
8003 subord
->nested_region_asid_bitmap_size
= new_nested_region_asid_bitmap_size
;
8004 subord
->nested_region_size
= new_size
;
8005 new_nested_region_asid_bitmap
= NULL
;
8007 PMAP_UNLOCK(subord
);
8008 if (nested_region_asid_bitmap
!= NULL
)
8009 kfree(nested_region_asid_bitmap
, nested_region_asid_bitmap_size
*sizeof(unsigned int));
8010 if (new_nested_region_asid_bitmap
!= NULL
)
8011 kfree(new_nested_region_asid_bitmap
, new_nested_region_asid_bitmap_size
*sizeof(unsigned int));
8015 if (grand
->nested_pmap
== PMAP_NULL
) {
8016 grand
->nested_pmap
= subord
;
8017 grand
->nested_region_grand_addr
= vstart
;
8018 grand
->nested_region_subord_addr
= nstart
;
8019 grand
->nested_region_size
= (mach_vm_offset_t
) size
;
8021 if ((grand
->nested_region_grand_addr
> vstart
)) {
8022 panic("pmap_nest() pmap %p : attempt to nest outside the nested region\n", grand
);
8024 else if ((grand
->nested_region_grand_addr
+ grand
->nested_region_size
) < (vstart
+size
)) {
8025 grand
->nested_region_size
= (mach_vm_offset_t
)(vstart
- grand
->nested_region_grand_addr
+ size
);
8029 #if (__ARM_VMSA__ == 7)
8030 nvaddr
= (vm_map_offset_t
) nstart
;
8031 vaddr
= (vm_map_offset_t
) vstart
;
8032 num_tte
= size
>> ARM_TT_L1_SHIFT
;
8034 for (i
= 0; i
< num_tte
; i
++) {
8035 stte_p
= pmap_tte(subord
, nvaddr
);
8036 if ((stte_p
== (tt_entry_t
*)NULL
) || (((*stte_p
) & ARM_TTE_TYPE_MASK
) != ARM_TTE_TYPE_TABLE
)) {
8037 PMAP_UNLOCK(subord
);
8038 kr
= pmap_expand(subord
, nvaddr
, expand_options
, PMAP_TT_L2_LEVEL
);
8040 if (kr
!= KERN_SUCCESS
) {
8047 PMAP_UNLOCK(subord
);
8049 stte_p
= pmap_tte(grand
, vaddr
);
8050 if (stte_p
== (tt_entry_t
*)NULL
) {
8052 kr
= pmap_expand(grand
, vaddr
, expand_options
, PMAP_TT_L1_LEVEL
);
8054 if (kr
!= KERN_SUCCESS
) {
8065 nvaddr
+= ARM_TT_L1_SIZE
;
8066 vaddr
+= ARM_TT_L1_SIZE
;
8070 nvaddr
= (vm_map_offset_t
) nstart
;
8071 num_tte
= (unsigned int)(size
>> ARM_TT_L2_SHIFT
);
8073 for (i
= 0; i
< num_tte
; i
++) {
8074 stte_p
= pmap_tt2e(subord
, nvaddr
);
8075 if (stte_p
== PT_ENTRY_NULL
|| *stte_p
== ARM_TTE_EMPTY
) {
8076 PMAP_UNLOCK(subord
);
8077 kr
= pmap_expand(subord
, nvaddr
, expand_options
, PMAP_TT_L3_LEVEL
);
8079 if (kr
!= KERN_SUCCESS
) {
8086 nvaddr
+= ARM_TT_L2_SIZE
;
8089 PMAP_UNLOCK(subord
);
8092 * copy tte's from subord pmap into grand pmap
8096 nvaddr
= (vm_map_offset_t
) nstart
;
8097 vaddr
= (vm_map_offset_t
) vstart
;
8100 #if (__ARM_VMSA__ == 7)
8101 for (i
= 0; i
< num_tte
; i
++) {
8103 stte_p
= pmap_tte(subord
, nvaddr
);
8104 gtte_p
= pmap_tte(grand
, vaddr
);
8107 nvaddr
+= ARM_TT_L1_SIZE
;
8108 vaddr
+= ARM_TT_L1_SIZE
;
8111 for (i
= 0; i
< num_tte
; i
++) {
8113 stte_p
= pmap_tt2e(subord
, nstart
);
8114 gtte_p
= pmap_tt2e(grand
, vaddr
);
8115 if (gtte_p
== PT_ENTRY_NULL
) {
8117 kr
= pmap_expand(grand
, vaddr
, expand_options
, PMAP_TT_L2_LEVEL
);
8120 if (kr
!= KERN_SUCCESS
) {
8124 gtte_p
= pmap_tt2e(grand
, vaddr
);
8127 vaddr
+= ARM_TT_L2_SIZE
;
8128 nstart
+= ARM_TT_L2_SIZE
;
8135 #ifndef __ARM_L1_PTW__
8136 CleanPoU_DcacheRegion((vm_offset_t
) pmap_tte(grand
, vstart
), num_tte
* sizeof(tt_entry_t
));
8139 #if (__ARM_VMSA__ > 7)
8141 * check for overflow on LP64 arch
8143 assert((size
& 0xFFFFFFFF00000000ULL
) == 0);
8145 PMAP_UPDATE_TLBS(grand
, vstart
, vstart
+ size
);
8151 kern_return_t
pmap_nest(
8158 kern_return_t kr
= KERN_FAILURE
;
8160 PMAP_TRACE(PMAP_CODE(PMAP__NEST
) | DBG_FUNC_START
,
8161 VM_KERNEL_ADDRHIDE(grand
), VM_KERNEL_ADDRHIDE(subord
),
8162 VM_KERNEL_ADDRHIDE(vstart
));
8164 kr
= pmap_nest_internal(grand
, subord
, vstart
, nstart
, size
);
8166 PMAP_TRACE(PMAP_CODE(PMAP__NEST
) | DBG_FUNC_END
, kr
);
8172 * kern_return_t pmap_unnest(grand, vaddr)
8174 * grand = the pmap that we will nest subord into
8175 * vaddr = start of range in pmap to be unnested
8176 * size = size of range in pmap to be unnested
8186 return(pmap_unnest_options(grand
, vaddr
, size
, 0));
8189 static kern_return_t
8190 pmap_unnest_options_internal(
8194 unsigned int option
)
8196 vm_map_offset_t start
;
8197 vm_map_offset_t addr
;
8199 unsigned int current_index
;
8200 unsigned int start_index
;
8201 unsigned int max_index
;
8202 unsigned int num_tte
;
8205 #if (__ARM_VMSA__ == 7)
8206 if (((size
|vaddr
) & ARM_TT_L1_PT_OFFMASK
) != 0x0ULL
) {
8207 panic("pmap_unnest(): unaligned request\n");
8210 if (((size
|vaddr
) & ARM_TT_L2_OFFMASK
) != 0x0ULL
) {
8211 panic("pmap_unnest(): unaligned request\n");
8215 if ((option
& PMAP_UNNEST_CLEAN
) == 0)
8217 PMAP_LOCK(grand
->nested_pmap
);
8219 start
= vaddr
- grand
->nested_region_grand_addr
+ grand
->nested_region_subord_addr
;
8220 start_index
= (unsigned int)((vaddr
- grand
->nested_region_grand_addr
) >> ARM_TT_TWIG_SHIFT
);
8221 max_index
= (unsigned int)(start_index
+ (size
>> ARM_TT_TWIG_SHIFT
));
8222 num_tte
= (unsigned int)(size
>> ARM_TT_TWIG_SHIFT
);
8224 if (size
> grand
->nested_region_size
) {
8225 panic("pmap_unnest() pmap %p %llu, %llu\n", grand
, size
, (uint64_t)grand
->nested_region_size
);
8228 for (current_index
= start_index
, addr
= start
; current_index
< max_index
; current_index
++) {
8229 pt_entry_t
*bpte
, *epte
, *cpte
;
8232 if(!testbit(current_index
, (int *)grand
->nested_pmap
->nested_region_asid_bitmap
)) {
8234 setbit(current_index
, (int *)grand
->nested_pmap
->nested_region_asid_bitmap
);
8235 bpte
= pmap_pte(grand
->nested_pmap
, addr
);
8236 epte
= bpte
+ (ARM_TT_LEAF_INDEX_MASK
>>ARM_TT_LEAF_SHIFT
);
8238 for (cpte
= bpte
; cpte
<= epte
; cpte
++) {
8241 boolean_t managed
=FALSE
;
8244 if ((*cpte
!= ARM_PTE_TYPE_FAULT
)
8245 && (!ARM_PTE_IS_COMPRESSED(*cpte
))) {
8249 pa
= pte_to_pa(spte
);
8252 pai
= (int)pa_index(pa
);
8255 pa
= pte_to_pa(spte
);
8256 if (pai
== (int)pa_index(pa
)) {
8258 break; // Leave the PVH locked as we'll unlock it after we update the PTE
8263 if (((spte
& ARM_PTE_NG
) != ARM_PTE_NG
)) {
8265 WRITE_PTE(cpte
, (spte
| ARM_PTE_NG
));
8270 ASSERT_PVH_LOCKED(pai
);
8277 addr
+= ARM_TT_TWIG_SIZE
;
8279 #ifndef __ARM_L1_PTW__
8280 CleanPoU_DcacheRegion((vm_offset_t
) pmap_pte(grand
->nested_pmap
, start
), num_tte
* sizeof(tt_entry_t
));
8282 PMAP_UPDATE_TLBS(grand
->nested_pmap
, start
, start
+ size
);
8285 PMAP_UNLOCK(grand
->nested_pmap
);
8291 * invalidate all pdes for segment at vaddr in pmap grand
8296 num_tte
= (unsigned int)(size
>> ARM_TT_TWIG_SHIFT
);
8298 for (i
= 0; i
< num_tte
; i
++) {
8299 tte_p
= pmap_tte(grand
, addr
);
8300 *tte_p
= ARM_TTE_TYPE_FAULT
;
8302 addr
+= ARM_TT_TWIG_SIZE
;
8305 #ifndef __ARM_L1_PTW__
8306 CleanPoU_DcacheRegion((vm_offset_t
) pmap_tte(grand
, start
), num_tte
* sizeof(tt_entry_t
));
8308 PMAP_UPDATE_TLBS(grand
, start
, start
+ size
);
8312 return KERN_SUCCESS
;
8316 pmap_unnest_options(
8320 unsigned int option
)
8322 kern_return_t kr
= KERN_FAILURE
;
8324 PMAP_TRACE(PMAP_CODE(PMAP__UNNEST
) | DBG_FUNC_START
,
8325 VM_KERNEL_ADDRHIDE(grand
), VM_KERNEL_ADDRHIDE(vaddr
));
8327 kr
= pmap_unnest_options_internal(grand
, vaddr
, size
, option
);
8329 PMAP_TRACE(PMAP_CODE(PMAP__UNNEST
) | DBG_FUNC_END
, kr
);
8335 pmap_adjust_unnest_parameters(
8337 __unused vm_map_offset_t
*s
,
8338 __unused vm_map_offset_t
*e
)
8340 return TRUE
; /* to get to log_unnest_badness()... */
8344 * disable no-execute capability on
8345 * the specified pmap
8347 #if DEVELOPMENT || DEBUG
8352 pmap
->nx_enabled
= FALSE
;
8357 __unused pmap_t pmap
)
8366 pt_fake_zone_index
= zone_index
;
8372 vm_size_t
*cur_size
, vm_size_t
*max_size
, vm_size_t
*elem_size
, vm_size_t
*alloc_size
,
8373 uint64_t *sum_size
, int *collectable
, int *exhaustable
, int *caller_acct
)
8375 *count
= inuse_pmap_pages_count
;
8376 *cur_size
= PAGE_SIZE
* (inuse_pmap_pages_count
);
8377 *max_size
= PAGE_SIZE
* (inuse_pmap_pages_count
+ vm_page_inactive_count
+ vm_page_active_count
+ vm_page_free_count
);
8378 *elem_size
= PAGE_SIZE
;
8379 *alloc_size
= PAGE_SIZE
;
8380 *sum_size
= (alloc_pmap_pages_count
) * PAGE_SIZE
;
8388 * flush a range of hardware TLB entries.
8389 * NOTE: assumes the smallest TLB entry in use will be for
8390 * an ARM small page (4K).
8393 #define ARM_FULL_TLB_FLUSH_THRESHOLD 64
8394 #define ARM64_FULL_TLB_FLUSH_THRESHOLD 256
8397 flush_mmu_tlb_region_asid(
8402 #if (__ARM_VMSA__ == 7)
8403 vm_offset_t end
= va
+ length
;
8408 if (length
/ ARM_SMALL_PAGE_SIZE
> ARM_FULL_TLB_FLUSH_THRESHOLD
) {
8409 boolean_t flush_all
= FALSE
;
8411 if ((asid
== 0) || (pmap
->nested
== TRUE
))
8416 flush_mmu_tlb_asid(asid
);
8420 if (pmap
->nested
== TRUE
) {
8424 va
= arm_trunc_page(va
);
8426 flush_mmu_tlb_mva_entries(va
);
8427 va
+= ARM_SMALL_PAGE_SIZE
;
8432 va
= arm_trunc_page(va
) | (asid
& 0xff);
8433 flush_mmu_tlb_entries(va
, end
);
8436 vm_offset_t end
= va
+ length
;
8441 if ((length
>> ARM_TT_L3_SHIFT
) > ARM64_FULL_TLB_FLUSH_THRESHOLD
) {
8442 boolean_t flush_all
= FALSE
;
8444 if ((asid
== 0) || (pmap
->nested
== TRUE
))
8449 flush_mmu_tlb_asid((uint64_t)asid
<< TLBI_ASID_SHIFT
);
8452 va
= tlbi_asid(asid
) | tlbi_addr(va
);
8453 end
= tlbi_asid(asid
) | tlbi_addr(end
);
8454 if (pmap
->nested
== TRUE
) {
8455 flush_mmu_tlb_allentries(va
, end
);
8457 flush_mmu_tlb_entries(va
, end
);
8464 flush_mmu_tlb_region(
8468 flush_mmu_tlb_region_asid(va
, length
, kernel_pmap
);
8472 pmap_cache_attributes(
8477 unsigned int result
;
8478 pp_attr_t pp_attr_current
;
8482 if ((paddr
>= io_rgn_start
) && (paddr
< io_rgn_end
)) {
8483 unsigned int attr
= IO_ATTR_WIMG(io_attr_table
[(paddr
- io_rgn_start
) / io_rgn_granule
]);
8487 return (VM_WIMG_IO
);
8491 if (!pmap_initialized
) {
8492 if ((paddr
>= gPhysBase
) && (paddr
< gPhysBase
+gPhysSize
))
8493 return (VM_WIMG_DEFAULT
);
8495 return (VM_WIMG_IO
);
8499 if (!pa_valid(paddr
))
8500 return (VM_WIMG_IO
);
8502 result
= VM_WIMG_DEFAULT
;
8504 pai
= (int)pa_index(paddr
);
8506 pp_attr_current
= pp_attr_table
[pai
];
8507 if (pp_attr_current
& PP_ATTR_WIMG_MASK
)
8508 result
= pp_attr_current
& PP_ATTR_WIMG_MASK
;
8513 pmap_batch_set_cache_attributes_internal(
8515 unsigned int cacheattr
,
8516 unsigned int page_cnt
,
8517 unsigned int page_index
,
8523 pp_attr_t pp_attr_current
;
8524 pp_attr_t pp_attr_template
;
8525 unsigned int wimg_bits_prev
, wimg_bits_new
;
8527 if (cacheattr
& VM_WIMG_USE_DEFAULT
)
8528 cacheattr
= VM_WIMG_DEFAULT
;
8530 if ((doit
== FALSE
) && (*res
== 0)) {
8532 if (platform_cache_batch_wimg(cacheattr
& (VM_WIMG_MASK
), page_cnt
<<PAGE_SHIFT
) == FALSE
) {
8539 if (!pa_valid(paddr
)) {
8540 panic("pmap_batch_set_cache_attributes(): pn 0x%08x not managed\n", pn
);
8543 pai
= (int)pa_index(paddr
);
8548 pp_attr_current
= pp_attr_table
[pai
];
8549 wimg_bits_prev
= VM_WIMG_DEFAULT
;
8550 if (pp_attr_current
& PP_ATTR_WIMG_MASK
)
8551 wimg_bits_prev
= pp_attr_current
& PP_ATTR_WIMG_MASK
;
8553 pp_attr_template
= (pp_attr_current
& ~PP_ATTR_WIMG_MASK
) | PP_ATTR_WIMG(cacheattr
& (VM_WIMG_MASK
));
8556 pp_attr_table
[pai
] = pp_attr_template
;
8558 wimg_bits_new
= VM_WIMG_DEFAULT
;
8559 if (pp_attr_template
& PP_ATTR_WIMG_MASK
)
8560 wimg_bits_new
= pp_attr_template
& PP_ATTR_WIMG_MASK
;
8563 if (wimg_bits_new
!= wimg_bits_prev
)
8564 pmap_update_cache_attributes_locked(pn
, cacheattr
);
8567 if (wimg_bits_new
== VM_WIMG_COPYBACK
) {
8570 if (wimg_bits_prev
== wimg_bits_new
) {
8572 if (!platform_cache_batch_wimg(wimg_bits_new
, (*res
)<<PAGE_SHIFT
)) {
8579 if (page_cnt
== (page_index
+1)) {
8580 wimg_bits_prev
= VM_WIMG_COPYBACK
;
8581 if (((page_cnt
== (page_index
+1)) && (wimg_bits_prev
!= wimg_bits_new
))
8582 && ((wimg_bits_prev
== VM_WIMG_COPYBACK
)
8583 || ((wimg_bits_prev
== VM_WIMG_INNERWBACK
)
8584 && (wimg_bits_new
!= VM_WIMG_COPYBACK
))
8585 || ((wimg_bits_prev
== VM_WIMG_WTHRU
)
8586 && ((wimg_bits_new
!= VM_WIMG_COPYBACK
) || (wimg_bits_new
!= VM_WIMG_INNERWBACK
))))) {
8587 platform_cache_flush_wimg(wimg_bits_new
);
8595 pmap_batch_set_cache_attributes(
8597 unsigned int cacheattr
,
8598 unsigned int page_cnt
,
8599 unsigned int page_index
,
8603 return pmap_batch_set_cache_attributes_internal(pn
, cacheattr
, page_cnt
, page_index
, doit
, res
);
8607 pmap_set_cache_attributes_internal(
8609 unsigned int cacheattr
)
8613 pp_attr_t pp_attr_current
;
8614 pp_attr_t pp_attr_template
;
8615 unsigned int wimg_bits_prev
, wimg_bits_new
;
8619 if (!pa_valid(paddr
)) {
8620 return; /* Not a managed page. */
8623 if (cacheattr
& VM_WIMG_USE_DEFAULT
)
8624 cacheattr
= VM_WIMG_DEFAULT
;
8626 pai
= (int)pa_index(paddr
);
8630 pp_attr_current
= pp_attr_table
[pai
];
8631 wimg_bits_prev
= VM_WIMG_DEFAULT
;
8632 if (pp_attr_current
& PP_ATTR_WIMG_MASK
)
8633 wimg_bits_prev
= pp_attr_current
& PP_ATTR_WIMG_MASK
;
8635 pp_attr_template
= (pp_attr_current
& ~PP_ATTR_WIMG_MASK
) | PP_ATTR_WIMG(cacheattr
& (VM_WIMG_MASK
)) ;
8637 pp_attr_table
[pai
] = pp_attr_template
;
8638 wimg_bits_new
= VM_WIMG_DEFAULT
;
8639 if (pp_attr_template
& PP_ATTR_WIMG_MASK
)
8640 wimg_bits_new
= pp_attr_template
& PP_ATTR_WIMG_MASK
;
8642 if (wimg_bits_new
!= wimg_bits_prev
)
8643 pmap_update_cache_attributes_locked(pn
, cacheattr
);
8647 if ((wimg_bits_prev
!= wimg_bits_new
)
8648 && ((wimg_bits_prev
== VM_WIMG_COPYBACK
)
8649 || ((wimg_bits_prev
== VM_WIMG_INNERWBACK
)
8650 && (wimg_bits_new
!= VM_WIMG_COPYBACK
))
8651 || ((wimg_bits_prev
== VM_WIMG_WTHRU
)
8652 && ((wimg_bits_new
!= VM_WIMG_COPYBACK
) || (wimg_bits_new
!= VM_WIMG_INNERWBACK
)))))
8653 pmap_sync_page_attributes_phys(pn
);
8658 pmap_set_cache_attributes(
8660 unsigned int cacheattr
)
8662 pmap_set_cache_attributes_internal(pn
, cacheattr
);
8666 pmap_update_cache_attributes_locked(
8668 unsigned attributes
)
8670 pmap_paddr_t phys
= ptoa(ppnum
);
8677 #if (__ARM_VMSA__ == 7)
8678 #define ARM_PTE_SHMASK ARM_PTE_SH
8681 #if __ARM_PTE_PHYSMAP__
8682 vm_offset_t kva
= phystokv(phys
);
8683 pte_p
= pmap_pte(kernel_pmap
, kva
);
8686 tmplate
&= ~(ARM_PTE_ATTRINDXMASK
| ARM_PTE_SHMASK
);
8687 tmplate
|= wimg_to_pte(attributes
);
8689 WRITE_PTE(pte_p
, tmplate
);
8690 PMAP_UPDATE_TLBS(kernel_pmap
, kva
, kva
+ PAGE_SIZE
);
8693 pai
= (unsigned int)pa_index(phys
);
8695 pv_h
= pai_to_pvh(pai
);
8697 pte_p
= PT_ENTRY_NULL
;
8698 pve_p
= PV_ENTRY_NULL
;
8699 if (pvh_test_type(pv_h
, PVH_TYPE_PTEP
)) {
8700 pte_p
= pvh_ptep(pv_h
);
8701 } else if (pvh_test_type(pv_h
, PVH_TYPE_PVEP
)) {
8702 pve_p
= pvh_list(pv_h
);
8703 pte_p
= PT_ENTRY_NULL
;
8706 while ((pve_p
!= PV_ENTRY_NULL
) || (pte_p
!= PT_ENTRY_NULL
)) {
8707 vm_map_address_t va
;
8710 if (pve_p
!= PV_ENTRY_NULL
)
8711 pte_p
= pve_get_ptep(pve_p
);
8713 pmap
= ptep_get_pmap(pte_p
);
8714 va
= ptep_get_va(pte_p
);
8717 tmplate
&= ~(ARM_PTE_ATTRINDXMASK
| ARM_PTE_SHMASK
);
8718 tmplate
|= wimg_to_pte(attributes
);
8720 WRITE_PTE(pte_p
, tmplate
);
8721 PMAP_UPDATE_TLBS(pmap
, va
, va
+ PAGE_SIZE
);
8723 pte_p
= PT_ENTRY_NULL
;
8724 if (pve_p
!= PV_ENTRY_NULL
)
8725 pve_p
= PVE_NEXT_PTR(pve_next(pve_p
));
8730 #if (__ARM_VMSA__ == 7)
8732 pmap_create_sharedpage(
8738 (void) pmap_pages_alloc(&pa
, PAGE_SIZE
, 0);
8739 memset((char *) phystokv(pa
), 0, PAGE_SIZE
);
8741 kr
= pmap_enter(kernel_pmap
, _COMM_PAGE_BASE_ADDRESS
, atop(pa
), VM_PROT_READ
| VM_PROT_WRITE
, VM_PROT_NONE
, VM_WIMG_USE_DEFAULT
, TRUE
);
8742 assert(kr
== KERN_SUCCESS
);
8744 return((vm_map_address_t
)phystokv(pa
));
8751 vm_address_t address
,
8752 tt_entry_t
template)
8754 tt_entry_t
*ptep
, pte
;
8756 ptep
= pmap_tt3e(pmap
, address
);
8758 panic("%s: no ptep?\n", __FUNCTION__
);
8762 pte
= tte_to_pa(pte
) | template;
8763 WRITE_PTE(ptep
, pte
);
8766 /* Note absence of non-global bit */
8767 #define PMAP_COMM_PAGE_PTE_TEMPLATE (ARM_PTE_TYPE_VALID \
8768 | ARM_PTE_ATTRINDX(CACHE_ATTRINDX_WRITEBACK) \
8769 | ARM_PTE_SH(SH_INNER_MEMORY) | ARM_PTE_NX \
8770 | ARM_PTE_PNX | ARM_PTE_AP(AP_RORO) | ARM_PTE_AF)
8773 pmap_create_sharedpage(
8778 pmap_paddr_t pa
= 0;
8781 (void) pmap_pages_alloc(&pa
, PAGE_SIZE
, 0);
8783 memset((char *) phystokv(pa
), 0, PAGE_SIZE
);
8786 * The kernel pmap maintains a user accessible mapping of the commpage
8789 kr
= pmap_expand(kernel_pmap
, _COMM_HIGH_PAGE64_BASE_ADDRESS
, 0, PMAP_TT_L3_LEVEL
);
8790 assert(kr
== KERN_SUCCESS
);
8791 kr
= pmap_enter(kernel_pmap
, _COMM_HIGH_PAGE64_BASE_ADDRESS
, (ppnum_t
)atop(pa
), VM_PROT_READ
, VM_PROT_NONE
, VM_WIMG_USE_DEFAULT
, TRUE
);
8792 assert(kr
== KERN_SUCCESS
);
8795 * This mapping should not be global (as we only expect to reference it
8798 pmap_update_tt3e(kernel_pmap
, _COMM_HIGH_PAGE64_BASE_ADDRESS
, PMAP_COMM_PAGE_PTE_TEMPLATE
| ARM_PTE_NG
);
8801 * With PAN enabled kernel drivers can no longer use the previous mapping which is user readable
8802 * They should use the following mapping instead
8804 kr
= pmap_expand(kernel_pmap
, _COMM_PRIV_PAGE64_BASE_ADDRESS
, 0, PMAP_TT_L3_LEVEL
);
8805 assert(kr
== KERN_SUCCESS
);
8806 kr
= pmap_enter(kernel_pmap
, _COMM_PRIV_PAGE64_BASE_ADDRESS
, (ppnum_t
)atop(pa
), VM_PROT_READ
, VM_PROT_NONE
, VM_WIMG_USE_DEFAULT
, TRUE
);
8807 assert(kr
== KERN_SUCCESS
);
8810 * In order to avoid burning extra pages on mapping the shared page, we
8811 * create a dedicated pmap for the shared page. We forcibly nest the
8812 * translation tables from this pmap into other pmaps. The level we
8813 * will nest at depends on the MMU configuration (page size, TTBR range,
8816 * Note that this is NOT "the nested pmap" (which is used to nest the
8819 * Note that we update parameters of the entry for our unique needs (NG
8822 sharedpage_pmap
= pmap_create(NULL
, 0x0, FALSE
);
8823 assert(sharedpage_pmap
!= NULL
);
8825 /* The user 64-bit mapping... */
8826 kr
= pmap_enter(sharedpage_pmap
, _COMM_PAGE64_BASE_ADDRESS
, (ppnum_t
)atop(pa
), VM_PROT_READ
, VM_PROT_NONE
, VM_WIMG_USE_DEFAULT
, TRUE
);
8827 assert(kr
== KERN_SUCCESS
);
8828 pmap_update_tt3e(sharedpage_pmap
, _COMM_PAGE64_BASE_ADDRESS
, PMAP_COMM_PAGE_PTE_TEMPLATE
);
8830 /* ...and the user 32-bit mapping. */
8831 kr
= pmap_enter(sharedpage_pmap
, _COMM_PAGE32_BASE_ADDRESS
, (ppnum_t
)atop(pa
), VM_PROT_READ
, VM_PROT_NONE
, VM_WIMG_USE_DEFAULT
, TRUE
);
8832 assert(kr
== KERN_SUCCESS
);
8833 pmap_update_tt3e(sharedpage_pmap
, _COMM_PAGE32_BASE_ADDRESS
, PMAP_COMM_PAGE_PTE_TEMPLATE
);
8835 /* For manipulation in kernel, go straight to physical page */
8836 sharedpage_rw_addr
= phystokv(pa
);
8837 return((vm_map_address_t
)sharedpage_rw_addr
);
8841 * Asserts to ensure that the TTEs we nest to map the shared page do not overlap
8842 * with user controlled TTEs.
8844 #if (ARM_PGSHIFT == 14) || __ARM64_TWO_LEVEL_PMAP__
8845 static_assert((_COMM_PAGE64_BASE_ADDRESS
& ~ARM_TT_L2_OFFMASK
) >= MACH_VM_MAX_ADDRESS
);
8846 static_assert((_COMM_PAGE32_BASE_ADDRESS
& ~ARM_TT_L2_OFFMASK
) >= VM_MAX_ADDRESS
);
8847 #elif (ARM_PGSHIFT == 12)
8848 static_assert((_COMM_PAGE64_BASE_ADDRESS
& ~ARM_TT_L1_OFFMASK
) >= MACH_VM_MAX_ADDRESS
);
8849 static_assert((_COMM_PAGE32_BASE_ADDRESS
& ~ARM_TT_L1_OFFMASK
) >= VM_MAX_ADDRESS
);
8851 #error Nested shared page mapping is unsupported on this config
8855 pmap_insert_sharedpage_internal(
8858 #if (ARM_PGSHIFT == 14) && !__ARM64_TWO_LEVEL_PMAP__
8861 vm_offset_t sharedpage_vaddr
;
8862 pt_entry_t
*ttep
, *src_ttep
;
8863 #if _COMM_PAGE_AREA_LENGTH != PAGE_SIZE
8864 #error We assume a single page.
8867 if (pmap_is_64bit(pmap
)) {
8868 sharedpage_vaddr
= _COMM_PAGE64_BASE_ADDRESS
;
8870 sharedpage_vaddr
= _COMM_PAGE32_BASE_ADDRESS
;
8876 * For 4KB pages, we can force the commpage to nest at the level one
8877 * page table, as each entry is 1GB (i.e, there will be no overlap
8878 * with regular userspace mappings). For 16KB pages, each level one
8879 * entry is 64GB, so we must go to the second level entry (32MB) in
8882 #if (ARM_PGSHIFT == 12)
8883 #if __ARM64_TWO_LEVEL_PMAP__
8884 #error A two level page table with a page shift of 12 is not currently supported
8886 /* Just slam in the L1 entry. */
8887 ttep
= pmap_tt1e(pmap
, sharedpage_vaddr
);
8889 if (*ttep
!= ARM_PTE_EMPTY
) {
8890 panic("%s: Found something mapped at the commpage address?!", __FUNCTION__
);
8893 src_ttep
= pmap_tt1e(sharedpage_pmap
, sharedpage_vaddr
);
8894 #elif (ARM_PGSHIFT == 14)
8895 #if !__ARM64_TWO_LEVEL_PMAP__
8896 /* Allocate for the L2 entry if necessary, and slam it into place. */
8898 * As long as we are use a three level page table, the first level
8899 * should always exist, so we don't need to check for it.
8901 while (*pmap_tt1e(pmap
, sharedpage_vaddr
) == ARM_PTE_EMPTY
) {
8904 kr
= pmap_expand(pmap
, _COMM_PAGE32_BASE_ADDRESS
, 0, PMAP_TT_L2_LEVEL
);
8906 if (kr
!= KERN_SUCCESS
) {
8907 panic("Failed to pmap_expand for 32-bit commpage, pmap=%p", pmap
);
8914 ttep
= pmap_tt2e(pmap
, sharedpage_vaddr
);
8916 if (*ttep
!= ARM_PTE_EMPTY
) {
8917 panic("%s: Found something mapped at the commpage address?!", __FUNCTION__
);
8920 src_ttep
= pmap_tt2e(sharedpage_pmap
, sharedpage_vaddr
);
8924 #ifndef __ARM_L1_PTW__
8925 CleanPoU_DcacheRegion((vm_offset_t
) ttep
, sizeof(tt_entry_t
));
8927 /* TODO: Should we flush in the 64-bit case? */
8928 flush_mmu_tlb_region(sharedpage_vaddr
, PAGE_SIZE
);
8930 #if (ARM_PGSHIFT == 12) && !__ARM64_TWO_LEVEL_PMAP__
8931 flush_mmu_tlb_entry(tlbi_addr(sharedpage_vaddr
& ~ARM_TT_L1_OFFMASK
) | tlbi_asid(pmap
->asid
));
8932 #elif (ARM_PGSHIFT == 14)
8933 flush_mmu_tlb_entry(tlbi_addr(sharedpage_vaddr
& ~ARM_TT_L2_OFFMASK
) | tlbi_asid(pmap
->asid
));
8940 pmap_sharedpage_flush_32_to_64(
8943 flush_mmu_tlb_region(_COMM_PAGE32_BASE_ADDRESS
, PAGE_SIZE
);
8947 pmap_unmap_sharedpage(
8951 vm_offset_t sharedpage_vaddr
;
8953 #if _COMM_PAGE_AREA_LENGTH != PAGE_SIZE
8954 #error We assume a single page.
8957 if (pmap_is_64bit(pmap
)) {
8958 sharedpage_vaddr
= _COMM_PAGE64_BASE_ADDRESS
;
8960 sharedpage_vaddr
= _COMM_PAGE32_BASE_ADDRESS
;
8963 #if (ARM_PGSHIFT == 12)
8964 #if __ARM64_TWO_LEVEL_PMAP__
8965 #error A two level page table with a page shift of 12 is not currently supported
8967 ttep
= pmap_tt1e(pmap
, sharedpage_vaddr
);
8973 /* It had better be mapped to the shared page */
8974 if (*ttep
!= ARM_TTE_EMPTY
&& *ttep
!= *pmap_tt1e(sharedpage_pmap
, sharedpage_vaddr
)) {
8975 panic("%s: Something other than commpage mapped in shared page slot?", __FUNCTION__
);
8977 #elif (ARM_PGSHIFT == 14)
8978 ttep
= pmap_tt2e(pmap
, sharedpage_vaddr
);
8984 /* It had better be mapped to the shared page */
8985 if (*ttep
!= ARM_TTE_EMPTY
&& *ttep
!= *pmap_tt2e(sharedpage_pmap
, sharedpage_vaddr
)) {
8986 panic("%s: Something other than commpage mapped in shared page slot?", __FUNCTION__
);
8990 *ttep
= ARM_TTE_EMPTY
;
8991 flush_mmu_tlb_region(sharedpage_vaddr
, PAGE_SIZE
);
8993 #if (ARM_PGSHIFT == 12)
8994 #if __ARM64_TWO_LEVEL_PMAP__
8995 #error A two level page table with a page shift of 12 is not currently supported
8997 flush_mmu_tlb_entry(tlbi_addr(sharedpage_vaddr
& ~ARM_TT_L1_OFFMASK
) | tlbi_asid(pmap
->asid
));
8998 #elif (ARM_PGSHIFT == 14)
8999 flush_mmu_tlb_entry(tlbi_addr(sharedpage_vaddr
& ~ARM_TT_L2_OFFMASK
) | tlbi_asid(pmap
->asid
));
9004 pmap_insert_sharedpage(
9007 pmap_insert_sharedpage_internal(pmap
);
9014 return (pmap
->is_64bit
);
9019 /* ARMTODO -- an implementation that accounts for
9020 * holes in the physical map, if any.
9025 return pa_valid(ptoa(pn
));
9029 pmap_is_empty_internal(
9031 vm_map_offset_t va_start
,
9032 vm_map_offset_t va_end
)
9034 vm_map_offset_t block_start
, block_end
;
9041 if ((pmap
!= kernel_pmap
) && (not_in_kdp
)) {
9045 #if (__ARM_VMSA__ == 7)
9046 if (tte_index(pmap
, va_end
) >= pmap
->tte_index_max
) {
9047 if ((pmap
!= kernel_pmap
) && (not_in_kdp
)) {
9053 block_start
= va_start
;
9054 tte_p
= pmap_tte(pmap
, block_start
);
9055 while (block_start
< va_end
) {
9056 block_end
= (block_start
+ ARM_TT_L1_SIZE
) & ~(ARM_TT_L1_OFFMASK
);
9057 if (block_end
> va_end
)
9060 if ((*tte_p
& ARM_TTE_TYPE_MASK
) != 0) {
9061 vm_map_offset_t offset
;
9062 ppnum_t phys_page
= 0;
9064 for (offset
= block_start
;
9066 offset
+= ARM_PGBYTES
) {
9067 // This does a pmap_find_phys() lookup but assumes lock is held
9068 phys_page
= pmap_vtophys(pmap
, offset
);
9070 if ((pmap
!= kernel_pmap
) && (not_in_kdp
)) {
9078 block_start
= block_end
;
9082 block_start
= va_start
;
9084 while (block_start
< va_end
) {
9085 pt_entry_t
*bpte_p
, *epte_p
;
9088 block_end
= (block_start
+ ARM_TT_L2_SIZE
) & ~ARM_TT_L2_OFFMASK
;
9089 if (block_end
> va_end
)
9092 tte_p
= pmap_tt2e(pmap
, block_start
);
9093 if ((tte_p
!= PT_ENTRY_NULL
)
9094 && ((*tte_p
& ARM_TTE_TYPE_MASK
) == ARM_TTE_TYPE_TABLE
)) {
9096 pte_p
= (pt_entry_t
*) ttetokv(*tte_p
);
9097 bpte_p
= &pte_p
[tt3_index(pmap
, block_start
)];
9098 epte_p
= bpte_p
+ (((block_end
- block_start
) & ARM_TT_L3_INDEX_MASK
) >> ARM_TT_L3_SHIFT
);
9100 for (pte_p
= bpte_p
; pte_p
< epte_p
; pte_p
++) {
9101 if (*pte_p
!= ARM_PTE_EMPTY
) {
9102 if ((pmap
!= kernel_pmap
) && (not_in_kdp
)) {
9109 block_start
= block_end
;
9113 if ((pmap
!= kernel_pmap
) && (not_in_kdp
)) {
9123 vm_map_offset_t va_start
,
9124 vm_map_offset_t va_end
)
9126 return pmap_is_empty_internal(pmap
, va_start
, va_end
);
9129 vm_map_offset_t
pmap_max_offset(
9130 boolean_t is64 __unused
,
9131 unsigned int option
)
9133 vm_map_offset_t max_offset_ret
= 0;
9135 #if defined(__arm64__)
9137 vm_map_offset_t min_max_offset
= SHARED_REGION_BASE_ARM64
+ SHARED_REGION_SIZE_ARM64
+ 0x20000000; // end of shared region + 512MB for various purposes
9138 if (option
== ARM_PMAP_MAX_OFFSET_DEFAULT
) {
9139 max_offset_ret
= arm64_pmap_max_offset_default
;
9140 } else if (option
== ARM_PMAP_MAX_OFFSET_MIN
) {
9141 max_offset_ret
= min_max_offset
;
9142 } else if (option
== ARM_PMAP_MAX_OFFSET_MAX
) {
9143 max_offset_ret
= MACH_VM_MAX_ADDRESS
;
9144 } else if (option
== ARM_PMAP_MAX_OFFSET_DEVICE
) {
9145 if (arm64_pmap_max_offset_default
) {
9146 max_offset_ret
= arm64_pmap_max_offset_default
;
9147 } else if (max_mem
> 0xC0000000) {
9148 max_offset_ret
= 0x0000000318000000ULL
; // Max offset is 12.375GB for devices with > 3GB of memory
9149 } else if (max_mem
> 0x40000000) {
9150 max_offset_ret
= 0x0000000218000000ULL
; // Max offset is 8.375GB for devices with > 1GB and <= 3GB of memory
9152 max_offset_ret
= min_max_offset
;
9154 } else if (option
== ARM_PMAP_MAX_OFFSET_JUMBO
) {
9155 if (arm64_pmap_max_offset_default
) {
9156 // Allow the boot-arg to override jumbo size
9157 max_offset_ret
= arm64_pmap_max_offset_default
;
9159 max_offset_ret
= MACH_VM_MAX_ADDRESS
; // Max offset is MACH_VM_MAX_ADDRESS for pmaps with special "jumbo" blessing
9162 panic("pmap_max_offset illegal option 0x%x\n", option
);
9165 assert(max_offset_ret
>= min_max_offset
);
9166 assert(max_offset_ret
<= MACH_VM_MAX_ADDRESS
);
9167 return max_offset_ret
;
9169 if (option
== ARM_PMAP_MAX_OFFSET_DEFAULT
) {
9170 max_offset_ret
= arm_pmap_max_offset_default
;
9171 } else if (option
== ARM_PMAP_MAX_OFFSET_MIN
) {
9172 max_offset_ret
= 0x66000000;
9173 } else if (option
== ARM_PMAP_MAX_OFFSET_MAX
) {
9174 max_offset_ret
= VM_MAX_ADDRESS
;
9175 } else if (option
== ARM_PMAP_MAX_OFFSET_DEVICE
) {
9176 if (arm_pmap_max_offset_default
) {
9177 max_offset_ret
= arm_pmap_max_offset_default
;
9178 } else if (max_mem
> 0x20000000) {
9179 max_offset_ret
= 0x80000000;
9181 max_offset_ret
= 0x66000000;
9184 panic("pmap_max_offset illegal option 0x%x\n", option
);
9187 assert(max_offset_ret
<= VM_MAX_ADDRESS
);
9188 return max_offset_ret
;
9194 * Constrain DTrace copyin/copyout actions
9196 extern kern_return_t
dtrace_copyio_preflight(addr64_t
);
9197 extern kern_return_t
dtrace_copyio_postflight(addr64_t
);
9199 kern_return_t
dtrace_copyio_preflight(
9200 __unused addr64_t va
)
9202 if (current_map() == kernel_map
)
9203 return KERN_FAILURE
;
9205 return KERN_SUCCESS
;
9208 kern_return_t
dtrace_copyio_postflight(
9209 __unused addr64_t va
)
9211 return KERN_SUCCESS
;
9213 #endif /* CONFIG_DTRACE */
9217 pmap_flush_context_init(__unused pmap_flush_context
*pfc
)
9224 __unused pmap_flush_context
*cpus_to_flush
)
9226 /* not implemented yet */
9231 pmap_query_resident_internal(
9233 vm_map_address_t start
,
9234 vm_map_address_t end
,
9235 mach_vm_size_t
*resident_bytes_p
,
9236 mach_vm_size_t
*compressed_bytes_p
)
9238 mach_vm_size_t resident_bytes
= 0;
9239 mach_vm_size_t compressed_bytes
= 0;
9241 pt_entry_t
*bpte
, *epte
;
9249 /* Ensure that this request is valid, and addresses exactly one TTE. */
9250 assert(!(start
% ARM_PGBYTES
));
9251 assert(!(end
% ARM_PGBYTES
));
9252 assert(end
>= start
);
9253 assert((end
- start
) <= (PTE_PGENTRIES
* ARM_PGBYTES
));
9256 tte_p
= pmap_tte(pmap
, start
);
9257 if (tte_p
== (tt_entry_t
*) NULL
) {
9261 if ((*tte_p
& ARM_TTE_TYPE_MASK
) == ARM_TTE_TYPE_TABLE
) {
9263 #if (__ARM_VMSA__ == 7)
9264 pte_p
= (pt_entry_t
*) ttetokv(*tte_p
);
9265 bpte
= &pte_p
[ptenum(start
)];
9266 epte
= bpte
+ atop(end
- start
);
9268 pte_p
= (pt_entry_t
*) ttetokv(*tte_p
);
9269 bpte
= &pte_p
[tt3_index(pmap
, start
)];
9270 epte
= bpte
+ ((end
- start
) >> ARM_TT_L3_SHIFT
);
9273 for (; bpte
< epte
; bpte
++) {
9274 if (ARM_PTE_IS_COMPRESSED(*bpte
)) {
9275 compressed_bytes
+= ARM_PGBYTES
;
9276 } else if (pa_valid(pte_to_pa(*bpte
))) {
9277 resident_bytes
+= ARM_PGBYTES
;
9283 if (compressed_bytes_p
) {
9284 *compressed_bytes_p
+= compressed_bytes
;
9287 if (resident_bytes_p
) {
9288 *resident_bytes_p
+= resident_bytes
;
9295 pmap_query_resident(
9297 vm_map_address_t start
,
9298 vm_map_address_t end
,
9299 mach_vm_size_t
*compressed_bytes_p
)
9301 mach_vm_size_t resident_bytes
;
9302 mach_vm_size_t compressed_bytes
;
9303 vm_map_address_t va
;
9306 if (pmap
== PMAP_NULL
) {
9307 if (compressed_bytes_p
) {
9308 *compressed_bytes_p
= 0;
9314 compressed_bytes
= 0;
9316 PMAP_TRACE(PMAP_CODE(PMAP__QUERY_RESIDENT
) | DBG_FUNC_START
,
9317 VM_KERNEL_ADDRHIDE(pmap
), VM_KERNEL_ADDRHIDE(start
),
9318 VM_KERNEL_ADDRHIDE(end
));
9324 l
= ((va
+ ARM_TT_TWIG_SIZE
) & ~ARM_TT_TWIG_OFFMASK
);
9328 if (!pmap_query_resident_internal(pmap
, va
, l
, &resident_bytes
, compressed_bytes_p
)) {
9335 if (compressed_bytes_p
) {
9336 *compressed_bytes_p
= compressed_bytes
;
9339 PMAP_TRACE(PMAP_CODE(PMAP__QUERY_RESIDENT
) | DBG_FUNC_END
,
9342 return resident_bytes
;
9346 extern int pmap_ledgers_panic
;
9351 ledger_amount_t bal
;
9356 if (pmap
->pmap_pid
== 0) {
9358 * This pmap was not or is no longer fully associated
9359 * with a task (e.g. the old pmap after a fork()/exec() or
9360 * spawn()). Its "ledger" still points at a task that is
9361 * now using a different (and active) address space, so
9362 * we can't check that all the pmap ledgers are balanced here.
9364 * If the "pid" is set, that means that we went through
9365 * pmap_set_process() in task_terminate_internal(), so
9366 * this task's ledger should not have been re-used and
9367 * all the pmap ledgers should be back to 0.
9373 pid
= pmap
->pmap_pid
;
9374 procname
= pmap
->pmap_procname
;
9376 pmap_ledgers_drift
.num_pmaps_checked
++;
9378 ledger_get_balance(pmap
->ledger
,
9379 task_ledgers
.phys_footprint
,
9382 #if DEVELOPMENT || DEBUG
9383 // if (!pmap->footprint_was_suspended)
9384 #endif /* DEVELOPMENT || DEBUG */
9386 printf("LEDGER BALANCE proc %d (%s) "
9387 "\"phys_footprint\" = %lld\n",
9388 pid
, procname
, bal
);
9390 pmap_ledgers_drift
.phys_footprint_over
++;
9391 pmap_ledgers_drift
.phys_footprint_over_total
+= bal
;
9392 if (bal
> pmap_ledgers_drift
.phys_footprint_over_max
) {
9393 pmap_ledgers_drift
.phys_footprint_over_max
= bal
;
9396 pmap_ledgers_drift
.phys_footprint_under
++;
9397 pmap_ledgers_drift
.phys_footprint_under_total
+= bal
;
9398 if (bal
< pmap_ledgers_drift
.phys_footprint_under_max
) {
9399 pmap_ledgers_drift
.phys_footprint_under_max
= bal
;
9403 ledger_get_balance(pmap
->ledger
,
9404 task_ledgers
.internal
,
9408 printf("LEDGER BALANCE proc %d (%s) "
9409 "\"internal\" = %lld\n",
9410 pid
, procname
, bal
);
9412 pmap_ledgers_drift
.internal_over
++;
9413 pmap_ledgers_drift
.internal_over_total
+= bal
;
9414 if (bal
> pmap_ledgers_drift
.internal_over_max
) {
9415 pmap_ledgers_drift
.internal_over_max
= bal
;
9418 pmap_ledgers_drift
.internal_under
++;
9419 pmap_ledgers_drift
.internal_under_total
+= bal
;
9420 if (bal
< pmap_ledgers_drift
.internal_under_max
) {
9421 pmap_ledgers_drift
.internal_under_max
= bal
;
9425 ledger_get_balance(pmap
->ledger
,
9426 task_ledgers
.internal_compressed
,
9430 printf("LEDGER BALANCE proc %d (%s) "
9431 "\"internal_compressed\" = %lld\n",
9432 pid
, procname
, bal
);
9434 pmap_ledgers_drift
.internal_compressed_over
++;
9435 pmap_ledgers_drift
.internal_compressed_over_total
+= bal
;
9436 if (bal
> pmap_ledgers_drift
.internal_compressed_over_max
) {
9437 pmap_ledgers_drift
.internal_compressed_over_max
= bal
;
9440 pmap_ledgers_drift
.internal_compressed_under
++;
9441 pmap_ledgers_drift
.internal_compressed_under_total
+= bal
;
9442 if (bal
< pmap_ledgers_drift
.internal_compressed_under_max
) {
9443 pmap_ledgers_drift
.internal_compressed_under_max
= bal
;
9447 ledger_get_balance(pmap
->ledger
,
9448 task_ledgers
.iokit_mapped
,
9452 printf("LEDGER BALANCE proc %d (%s) "
9453 "\"iokit_mapped\" = %lld\n",
9454 pid
, procname
, bal
);
9456 pmap_ledgers_drift
.iokit_mapped_over
++;
9457 pmap_ledgers_drift
.iokit_mapped_over_total
+= bal
;
9458 if (bal
> pmap_ledgers_drift
.iokit_mapped_over_max
) {
9459 pmap_ledgers_drift
.iokit_mapped_over_max
= bal
;
9462 pmap_ledgers_drift
.iokit_mapped_under
++;
9463 pmap_ledgers_drift
.iokit_mapped_under_total
+= bal
;
9464 if (bal
< pmap_ledgers_drift
.iokit_mapped_under_max
) {
9465 pmap_ledgers_drift
.iokit_mapped_under_max
= bal
;
9469 ledger_get_balance(pmap
->ledger
,
9470 task_ledgers
.alternate_accounting
,
9474 printf("LEDGER BALANCE proc %d (%s) "
9475 "\"alternate_accounting\" = %lld\n",
9476 pid
, procname
, bal
);
9478 pmap_ledgers_drift
.alternate_accounting_over
++;
9479 pmap_ledgers_drift
.alternate_accounting_over_total
+= bal
;
9480 if (bal
> pmap_ledgers_drift
.alternate_accounting_over_max
) {
9481 pmap_ledgers_drift
.alternate_accounting_over_max
= bal
;
9484 pmap_ledgers_drift
.alternate_accounting_under
++;
9485 pmap_ledgers_drift
.alternate_accounting_under_total
+= bal
;
9486 if (bal
< pmap_ledgers_drift
.alternate_accounting_under_max
) {
9487 pmap_ledgers_drift
.alternate_accounting_under_max
= bal
;
9491 ledger_get_balance(pmap
->ledger
,
9492 task_ledgers
.alternate_accounting_compressed
,
9496 printf("LEDGER BALANCE proc %d (%s) "
9497 "\"alternate_accounting_compressed\" = %lld\n",
9498 pid
, procname
, bal
);
9500 pmap_ledgers_drift
.alternate_accounting_compressed_over
++;
9501 pmap_ledgers_drift
.alternate_accounting_compressed_over_total
+= bal
;
9502 if (bal
> pmap_ledgers_drift
.alternate_accounting_compressed_over_max
) {
9503 pmap_ledgers_drift
.alternate_accounting_compressed_over_max
= bal
;
9506 pmap_ledgers_drift
.alternate_accounting_compressed_under
++;
9507 pmap_ledgers_drift
.alternate_accounting_compressed_under_total
+= bal
;
9508 if (bal
< pmap_ledgers_drift
.alternate_accounting_compressed_under_max
) {
9509 pmap_ledgers_drift
.alternate_accounting_compressed_under_max
= bal
;
9513 ledger_get_balance(pmap
->ledger
,
9514 task_ledgers
.page_table
,
9518 printf("LEDGER BALANCE proc %d (%s) "
9519 "\"page_table\" = %lld\n",
9520 pid
, procname
, bal
);
9522 pmap_ledgers_drift
.page_table_over
++;
9523 pmap_ledgers_drift
.page_table_over_total
+= bal
;
9524 if (bal
> pmap_ledgers_drift
.page_table_over_max
) {
9525 pmap_ledgers_drift
.page_table_over_max
= bal
;
9528 pmap_ledgers_drift
.page_table_under
++;
9529 pmap_ledgers_drift
.page_table_under_total
+= bal
;
9530 if (bal
< pmap_ledgers_drift
.page_table_under_max
) {
9531 pmap_ledgers_drift
.page_table_under_max
= bal
;
9535 ledger_get_balance(pmap
->ledger
,
9536 task_ledgers
.purgeable_volatile
,
9540 printf("LEDGER BALANCE proc %d (%s) "
9541 "\"purgeable_volatile\" = %lld\n",
9542 pid
, procname
, bal
);
9544 pmap_ledgers_drift
.purgeable_volatile_over
++;
9545 pmap_ledgers_drift
.purgeable_volatile_over_total
+= bal
;
9546 if (bal
> pmap_ledgers_drift
.purgeable_volatile_over_max
) {
9547 pmap_ledgers_drift
.purgeable_volatile_over_max
= bal
;
9550 pmap_ledgers_drift
.purgeable_volatile_under
++;
9551 pmap_ledgers_drift
.purgeable_volatile_under_total
+= bal
;
9552 if (bal
< pmap_ledgers_drift
.purgeable_volatile_under_max
) {
9553 pmap_ledgers_drift
.purgeable_volatile_under_max
= bal
;
9557 ledger_get_balance(pmap
->ledger
,
9558 task_ledgers
.purgeable_nonvolatile
,
9562 printf("LEDGER BALANCE proc %d (%s) "
9563 "\"purgeable_nonvolatile\" = %lld\n",
9564 pid
, procname
, bal
);
9566 pmap_ledgers_drift
.purgeable_nonvolatile_over
++;
9567 pmap_ledgers_drift
.purgeable_nonvolatile_over_total
+= bal
;
9568 if (bal
> pmap_ledgers_drift
.purgeable_nonvolatile_over_max
) {
9569 pmap_ledgers_drift
.purgeable_nonvolatile_over_max
= bal
;
9572 pmap_ledgers_drift
.purgeable_nonvolatile_under
++;
9573 pmap_ledgers_drift
.purgeable_nonvolatile_under_total
+= bal
;
9574 if (bal
< pmap_ledgers_drift
.purgeable_nonvolatile_under_max
) {
9575 pmap_ledgers_drift
.purgeable_nonvolatile_under_max
= bal
;
9579 ledger_get_balance(pmap
->ledger
,
9580 task_ledgers
.purgeable_volatile_compressed
,
9584 printf("LEDGER BALANCE proc %d (%s) "
9585 "\"purgeable_volatile_compressed\" = %lld\n",
9586 pid
, procname
, bal
);
9588 pmap_ledgers_drift
.purgeable_volatile_compressed_over
++;
9589 pmap_ledgers_drift
.purgeable_volatile_compressed_over_total
+= bal
;
9590 if (bal
> pmap_ledgers_drift
.purgeable_volatile_compressed_over_max
) {
9591 pmap_ledgers_drift
.purgeable_volatile_compressed_over_max
= bal
;
9594 pmap_ledgers_drift
.purgeable_volatile_compressed_under
++;
9595 pmap_ledgers_drift
.purgeable_volatile_compressed_under_total
+= bal
;
9596 if (bal
< pmap_ledgers_drift
.purgeable_volatile_compressed_under_max
) {
9597 pmap_ledgers_drift
.purgeable_volatile_compressed_under_max
= bal
;
9601 ledger_get_balance(pmap
->ledger
,
9602 task_ledgers
.purgeable_nonvolatile_compressed
,
9606 printf("LEDGER BALANCE proc %d (%s) "
9607 "\"purgeable_nonvolatile_compressed\" = %lld\n",
9608 pid
, procname
, bal
);
9610 pmap_ledgers_drift
.purgeable_nonvolatile_compressed_over
++;
9611 pmap_ledgers_drift
.purgeable_nonvolatile_compressed_over_total
+= bal
;
9612 if (bal
> pmap_ledgers_drift
.purgeable_nonvolatile_compressed_over_max
) {
9613 pmap_ledgers_drift
.purgeable_nonvolatile_compressed_over_max
= bal
;
9616 pmap_ledgers_drift
.purgeable_nonvolatile_compressed_under
++;
9617 pmap_ledgers_drift
.purgeable_nonvolatile_compressed_under_total
+= bal
;
9618 if (bal
< pmap_ledgers_drift
.purgeable_nonvolatile_compressed_under_max
) {
9619 pmap_ledgers_drift
.purgeable_nonvolatile_compressed_under_max
= bal
;
9625 if (pmap_ledgers_panic
&&
9626 pmap
->pmap_stats_assert
) {
9627 panic("pmap_destroy(%p) %d[%s] has imbalanced ledgers\n",
9628 pmap
, pid
, procname
);
9630 printf("pmap_destroy(%p) %d[%s] has imbalanced ledgers\n",
9631 pmap
, pid
, procname
);
9635 PMAP_STATS_ASSERTF(pmap
->stats
.resident_count
== 0, pmap
, "stats.resident_count %d", pmap
->stats
.resident_count
);
9637 PMAP_STATS_ASSERTF(pmap
->stats
.wired_count
== 0, pmap
, "stats.wired_count %d", pmap
->stats
.wired_count
);
9639 PMAP_STATS_ASSERTF(pmap
->stats
.device
== 0, pmap
, "stats.device %d", pmap
->stats
.device
);
9640 PMAP_STATS_ASSERTF(pmap
->stats
.internal
== 0, pmap
, "stats.internal %d", pmap
->stats
.internal
);
9641 PMAP_STATS_ASSERTF(pmap
->stats
.external
== 0, pmap
, "stats.external %d", pmap
->stats
.external
);
9642 PMAP_STATS_ASSERTF(pmap
->stats
.reusable
== 0, pmap
, "stats.reusable %d", pmap
->stats
.reusable
);
9643 PMAP_STATS_ASSERTF(pmap
->stats
.compressed
== 0, pmap
, "stats.compressed %lld", pmap
->stats
.compressed
);
9645 #endif /* MACH_ASSERT */
9647 void pmap_advise_pagezero_range(__unused pmap_t p
, __unused
uint64_t a
) {
9652 #define PROF_START uint64_t t, nanot;\
9653 t = mach_absolute_time();
9655 #define PROF_END absolutetime_to_nanoseconds(mach_absolute_time()-t, &nanot);\
9656 kprintf("%s: took %llu ns\n", __func__, nanot);
9658 #define PMAP_PGTRACE_LOCK(p) \
9660 *(p) = ml_set_interrupts_enabled(false); \
9661 if (simple_lock_try(&(pmap_pgtrace.lock))) break; \
9662 ml_set_interrupts_enabled(*(p)); \
9665 #define PMAP_PGTRACE_UNLOCK(p) \
9667 simple_unlock(&(pmap_pgtrace.lock)); \
9668 ml_set_interrupts_enabled(*(p)); \
9671 #define PGTRACE_WRITE_PTE(pte_p, pte_entry) \
9673 *(pte_p) = (pte_entry); \
9677 #define PGTRACE_MAX_MAP 16 // maximum supported va to same pa
9684 } pmap_pgtrace_page_state_t
;
9687 queue_chain_t chain
;
9691 maps - list of va maps to upper pa
9693 map_waste - waste can
9698 queue_head_t map_pool
;
9699 queue_head_t map_waste
;
9700 pmap_pgtrace_page_state_t state
;
9701 } pmap_pgtrace_page_t
;
9705 pages - list of tracing page info
9708 decl_simple_lock_data(, lock
);
9709 } pmap_pgtrace
= {};
9711 static void pmap_pgtrace_init(void)
9713 queue_init(&(pmap_pgtrace
.pages
));
9714 simple_lock_init(&(pmap_pgtrace
.lock
), 0);
9718 if (PE_parse_boot_argn("pgtrace", &enabled
, sizeof(enabled
))) {
9719 pgtrace_enabled
= enabled
;
9723 // find a page with given pa - pmap_pgtrace should be locked
9724 inline static pmap_pgtrace_page_t
*pmap_pgtrace_find_page(pmap_paddr_t pa
)
9726 queue_head_t
*q
= &(pmap_pgtrace
.pages
);
9727 pmap_pgtrace_page_t
*p
;
9729 queue_iterate(q
, p
, pmap_pgtrace_page_t
*, chain
) {
9730 if (p
->state
== UNDEFINED
) {
9733 if (p
->state
== PA_UNDEFINED
) {
9744 // enter clone of given pmap, va page and range - pmap should be locked
9745 static bool pmap_pgtrace_enter_clone(pmap_t pmap
, vm_map_offset_t va_page
, vm_map_offset_t start
, vm_map_offset_t end
)
9748 queue_head_t
*q
= &(pmap_pgtrace
.pages
);
9749 pmap_paddr_t pa_page
;
9750 pt_entry_t
*ptep
, *cptep
;
9751 pmap_pgtrace_page_t
*p
;
9754 PMAP_ASSERT_LOCKED(pmap
);
9755 assert(va_page
== arm_trunc_page(va_page
));
9757 PMAP_PGTRACE_LOCK(&ints
);
9759 ptep
= pmap_pte(pmap
, va_page
);
9761 // target pte should exist
9762 if (!ptep
|| !(*ptep
& ARM_PTE_TYPE_VALID
)) {
9763 PMAP_PGTRACE_UNLOCK(&ints
);
9768 queue_head_t
*mappool
;
9769 pmap_pgtrace_map_t
*map
= NULL
;
9771 pa_page
= pte_to_pa(*ptep
);
9773 // find if we have a page info defined for this
9774 queue_iterate(q
, p
, pmap_pgtrace_page_t
*, chain
) {
9776 mappool
= &(p
->map_pool
);
9780 queue_iterate(mapq
, map
, pmap_pgtrace_map_t
*, chain
) {
9781 if (map
->cloned
== false && map
->pmap
== pmap
&& map
->ova
== va_page
) {
9783 map
->range
.start
= start
;
9784 map
->range
.end
= end
;
9792 if (p
->pa
!= pa_page
) {
9795 queue_iterate(mapq
, map
, pmap_pgtrace_map_t
*, chain
) {
9796 if (map
->cloned
== false) {
9799 map
->range
.start
= start
;
9800 map
->range
.end
= end
;
9808 if (p
->pa
!= pa_page
) {
9811 queue_iterate(mapq
, map
, pmap_pgtrace_map_t
*, chain
) {
9812 if (map
->cloned
== true && map
->pmap
== pmap
&& map
->ova
== va_page
) {
9813 kprintf("%s: skip existing mapping at va=%llx\n", __func__
, va_page
);
9815 } else if (map
->cloned
== true && map
->pmap
== kernel_pmap
&& map
->cva
[1] == va_page
) {
9816 kprintf("%s: skip clone mapping at va=%llx\n", __func__
, va_page
);
9818 } else if (map
->cloned
== false && map
->pmap
== pmap
&& map
->ova
== va_page
) {
9819 // range should be already defined as well
9827 panic("invalid state p->state=%x\n", p
->state
);
9830 if (found
== true) {
9835 // do not clone if no page info found
9836 if (found
== false) {
9837 PMAP_PGTRACE_UNLOCK(&ints
);
9841 // copy pre, target and post ptes to clone ptes
9842 for (int i
= 0; i
< 3; i
++) {
9843 ptep
= pmap_pte(pmap
, va_page
+ (i
-1)*ARM_PGBYTES
);
9844 cptep
= pmap_pte(kernel_pmap
, map
->cva
[i
]);
9845 assert(cptep
!= NULL
);
9847 PGTRACE_WRITE_PTE(cptep
, (pt_entry_t
)NULL
);
9849 PGTRACE_WRITE_PTE(cptep
, *ptep
);
9851 PMAP_UPDATE_TLBS(kernel_pmap
, map
->cva
[i
], map
->cva
[i
]+ARM_PGBYTES
);
9854 // get ptes for original and clone
9855 ptep
= pmap_pte(pmap
, va_page
);
9856 cptep
= pmap_pte(kernel_pmap
, map
->cva
[1]);
9858 // invalidate original pte and mark it as a pgtrace page
9859 PGTRACE_WRITE_PTE(ptep
, (*ptep
| ARM_PTE_PGTRACE
) & ~ARM_PTE_TYPE_VALID
);
9860 PMAP_UPDATE_TLBS(pmap
, map
->ova
, map
->ova
+ARM_PGBYTES
);
9865 kprintf("%s: pa_page=%llx va_page=%llx cva[1]=%llx pmap=%p ptep=%p cptep=%p\n", __func__
, pa_page
, va_page
, map
->cva
[1], pmap
, ptep
, cptep
);
9867 PMAP_PGTRACE_UNLOCK(&ints
);
9872 // This function removes trace bit and validate pte if applicable. Pmap must be locked.
9873 static void pmap_pgtrace_remove_clone(pmap_t pmap
, pmap_paddr_t pa
, vm_map_offset_t va
)
9875 bool ints
, found
= false;
9876 pmap_pgtrace_page_t
*p
;
9879 PMAP_PGTRACE_LOCK(&ints
);
9881 // we must have this page info
9882 p
= pmap_pgtrace_find_page(pa
);
9887 // find matching map
9888 queue_head_t
*mapq
= &(p
->maps
);
9889 queue_head_t
*mappool
= &(p
->map_pool
);
9890 pmap_pgtrace_map_t
*map
;
9892 queue_iterate(mapq
, map
, pmap_pgtrace_map_t
*, chain
) {
9893 if (map
->pmap
== pmap
&& map
->ova
== va
) {
9903 if (map
->cloned
== true) {
9904 // Restore back the pte to original state
9905 ptep
= pmap_pte(pmap
, map
->ova
);
9907 PGTRACE_WRITE_PTE(ptep
, *ptep
| ARM_PTE_TYPE_VALID
);
9908 PMAP_UPDATE_TLBS(pmap
, va
, va
+ARM_PGBYTES
);
9910 // revert clone pages
9911 for (int i
= 0; i
< 3; i
++) {
9912 ptep
= pmap_pte(kernel_pmap
, map
->cva
[i
]);
9913 assert(ptep
!= NULL
);
9914 PGTRACE_WRITE_PTE(ptep
, map
->cva_spte
[i
]);
9915 PMAP_UPDATE_TLBS(kernel_pmap
, map
->cva
[i
], map
->cva
[i
]+ARM_PGBYTES
);
9919 queue_remove(mapq
, map
, pmap_pgtrace_map_t
*, chain
);
9921 map
->ova
= (vm_map_offset_t
)NULL
;
9922 map
->cloned
= false;
9923 queue_enter_first(mappool
, map
, pmap_pgtrace_map_t
*, chain
);
9925 kprintf("%s: p=%p pa=%llx va=%llx\n", __func__
, p
, pa
, va
);
9928 PMAP_PGTRACE_UNLOCK(&ints
);
9931 // remove all clones of given pa - pmap must be locked
9932 static void pmap_pgtrace_remove_all_clone(pmap_paddr_t pa
)
9935 pmap_pgtrace_page_t
*p
;
9938 PMAP_PGTRACE_LOCK(&ints
);
9940 // we must have this page info
9941 p
= pmap_pgtrace_find_page(pa
);
9943 PMAP_PGTRACE_UNLOCK(&ints
);
9947 queue_head_t
*mapq
= &(p
->maps
);
9948 queue_head_t
*mappool
= &(p
->map_pool
);
9949 queue_head_t
*mapwaste
= &(p
->map_waste
);
9950 pmap_pgtrace_map_t
*map
;
9952 // move maps to waste
9953 while (!queue_empty(mapq
)) {
9954 queue_remove_first(mapq
, map
, pmap_pgtrace_map_t
*, chain
);
9955 queue_enter_first(mapwaste
, map
, pmap_pgtrace_map_t
*, chain
);
9958 PMAP_PGTRACE_UNLOCK(&ints
);
9960 // sanitize maps in waste
9961 queue_iterate(mapwaste
, map
, pmap_pgtrace_map_t
*, chain
) {
9962 if (map
->cloned
== true) {
9963 PMAP_LOCK(map
->pmap
);
9965 // restore back original pte
9966 ptep
= pmap_pte(map
->pmap
, map
->ova
);
9968 PGTRACE_WRITE_PTE(ptep
, *ptep
| ARM_PTE_TYPE_VALID
);
9969 PMAP_UPDATE_TLBS(map
->pmap
, map
->ova
, map
->ova
+ARM_PGBYTES
);
9971 // revert clone ptes
9972 for (int i
= 0; i
< 3; i
++) {
9973 ptep
= pmap_pte(kernel_pmap
, map
->cva
[i
]);
9974 assert(ptep
!= NULL
);
9975 PGTRACE_WRITE_PTE(ptep
, map
->cva_spte
[i
]);
9976 PMAP_UPDATE_TLBS(kernel_pmap
, map
->cva
[i
], map
->cva
[i
]+ARM_PGBYTES
);
9979 PMAP_UNLOCK(map
->pmap
);
9983 map
->ova
= (vm_map_offset_t
)NULL
;
9984 map
->cloned
= false;
9987 PMAP_PGTRACE_LOCK(&ints
);
9989 // recycle maps back to map_pool
9990 while (!queue_empty(mapwaste
)) {
9991 queue_remove_first(mapwaste
, map
, pmap_pgtrace_map_t
*, chain
);
9992 queue_enter_first(mappool
, map
, pmap_pgtrace_map_t
*, chain
);
9995 PMAP_PGTRACE_UNLOCK(&ints
);
9998 inline static void pmap_pgtrace_get_search_space(pmap_t pmap
, vm_map_offset_t
*startp
, vm_map_offset_t
*endp
)
10001 vm_map_offset_t end
;
10003 if (pmap
== kernel_pmap
) {
10004 tsz
= (get_tcr() >> TCR_T1SZ_SHIFT
) & TCR_TSZ_MASK
;
10005 *startp
= MAX(VM_MIN_KERNEL_ADDRESS
, (UINT64_MAX
>> (64-tsz
)) << (64-tsz
));
10006 *endp
= VM_MAX_KERNEL_ADDRESS
;
10008 tsz
= (get_tcr() >> TCR_T0SZ_SHIFT
) & TCR_TSZ_MASK
;
10012 end
= ((uint64_t)1 << (64-tsz
)) - 1;
10019 assert(*endp
> *startp
);
10024 // has pa mapped in given pmap? then clone it
10025 static uint64_t pmap_pgtrace_clone_from_pa(pmap_t pmap
, pmap_paddr_t pa
, vm_map_offset_t start_offset
, vm_map_offset_t end_offset
) {
10027 vm_map_offset_t min
, max
;
10028 vm_map_offset_t cur_page
, end_page
;
10033 pmap_pgtrace_get_search_space(pmap
, &min
, &max
);
10035 cur_page
= arm_trunc_page(min
);
10036 end_page
= arm_trunc_page(max
);
10037 while (cur_page
<= end_page
) {
10038 vm_map_offset_t add
= 0;
10042 // skip uninterested space
10043 if (pmap
== kernel_pmap
&&
10044 ((vm_kernel_base
<= cur_page
&& cur_page
< vm_kernel_top
) ||
10045 (vm_kext_base
<= cur_page
&& cur_page
< vm_kext_top
))) {
10047 goto unlock_continue
;
10050 #if __ARM64_TWO_LEVEL_PMAP__
10051 // check whether we can skip l2
10052 ttep
= pmap_tt2e(pmap
, cur_page
);
10056 // check whether we can skip l1
10057 ttep
= pmap_tt1e(pmap
, cur_page
);
10060 if ((tte
& (ARM_TTE_TYPE_MASK
| ARM_TTE_VALID
)) != (ARM_TTE_TYPE_TABLE
| ARM_TTE_VALID
)) {
10061 add
= ARM_TT_L1_SIZE
;
10062 goto unlock_continue
;
10066 tte
= ((tt_entry_t
*) phystokv(tte
& ARM_TTE_TABLE_MASK
))[tt2_index(pmap
, cur_page
)];
10068 if ((tte
& (ARM_TTE_TYPE_MASK
| ARM_TTE_VALID
)) != (ARM_TTE_TYPE_TABLE
| ARM_TTE_VALID
)) {
10069 add
= ARM_TT_L2_SIZE
;
10070 goto unlock_continue
;
10074 ptep
= &(((pt_entry_t
*) phystokv(tte
& ARM_TTE_TABLE_MASK
))[tt3_index(pmap
, cur_page
)]);
10075 if (ptep
== PT_ENTRY_NULL
) {
10076 add
= ARM_TT_L3_SIZE
;
10077 goto unlock_continue
;
10080 if (arm_trunc_page(pa
) == pte_to_pa(*ptep
)) {
10081 if (pmap_pgtrace_enter_clone(pmap
, cur_page
, start_offset
, end_offset
) == true) {
10092 if (cur_page
+ add
< cur_page
) {
10103 // search pv table and clone vas of given pa
10104 static uint64_t pmap_pgtrace_clone_from_pvtable(pmap_paddr_t pa
, vm_map_offset_t start_offset
, vm_map_offset_t end_offset
)
10113 queue_chain_t chain
;
10115 vm_map_offset_t va
;
10118 queue_head_t pmapvaq
;
10121 queue_init(&pmapvaq
);
10123 pai
= pa_index(pa
);
10125 pvh
= pai_to_pvh(pai
);
10127 // collect pmap/va pair from pvh
10128 if (pvh_test_type(pvh
, PVH_TYPE_PTEP
)) {
10129 ptep
= pvh_ptep(pvh
);
10130 pmap
= ptep_get_pmap(ptep
);
10132 pmapva
= (pmap_va_t
*)kalloc(sizeof(pmap_va_t
));
10133 pmapva
->pmap
= pmap
;
10134 pmapva
->va
= ptep_get_va(ptep
);
10136 queue_enter_first(&pmapvaq
, pmapva
, pmap_va_t
*, chain
);
10138 } else if (pvh_test_type(pvh
, PVH_TYPE_PVEP
)) {
10141 pvep
= pvh_list(pvh
);
10143 ptep
= pve_get_ptep(pvep
);
10144 pmap
= ptep_get_pmap(ptep
);
10146 pmapva
= (pmap_va_t
*)kalloc(sizeof(pmap_va_t
));
10147 pmapva
->pmap
= pmap
;
10148 pmapva
->va
= ptep_get_va(ptep
);
10150 queue_enter_first(&pmapvaq
, pmapva
, pmap_va_t
*, chain
);
10152 pvep
= PVE_NEXT_PTR(pve_next(pvep
));
10158 // clone them while making sure mapping still exists
10159 queue_iterate(&pmapvaq
, pmapva
, pmap_va_t
*, chain
) {
10160 PMAP_LOCK(pmapva
->pmap
);
10161 ptep
= pmap_pte(pmapva
->pmap
, pmapva
->va
);
10162 if (pte_to_pa(*ptep
) == pa
) {
10163 if (pmap_pgtrace_enter_clone(pmapva
->pmap
, pmapva
->va
, start_offset
, end_offset
) == true) {
10167 PMAP_UNLOCK(pmapva
->pmap
);
10169 kfree(pmapva
, sizeof(pmap_va_t
));
10175 // allocate a page info
10176 static pmap_pgtrace_page_t
*pmap_pgtrace_alloc_page(void)
10178 pmap_pgtrace_page_t
*p
;
10179 queue_head_t
*mapq
;
10180 queue_head_t
*mappool
;
10181 queue_head_t
*mapwaste
;
10182 pmap_pgtrace_map_t
*map
;
10184 p
= kalloc(sizeof(pmap_pgtrace_page_t
));
10187 p
->state
= UNDEFINED
;
10190 mappool
= &(p
->map_pool
);
10191 mapwaste
= &(p
->map_waste
);
10193 queue_init(mappool
);
10194 queue_init(mapwaste
);
10196 for (int i
= 0; i
< PGTRACE_MAX_MAP
; i
++) {
10197 vm_map_offset_t newcva
;
10200 vm_map_entry_t entry
;
10203 vm_object_reference(kernel_object
);
10204 kr
= vm_map_find_space(kernel_map
, &newcva
, vm_map_round_page(3*ARM_PGBYTES
, PAGE_MASK
), 0, 0, VM_MAP_KERNEL_FLAGS_NONE
, VM_KERN_MEMORY_DIAG
, &entry
);
10205 if (kr
!= KERN_SUCCESS
) {
10206 panic("%s VM couldn't find any space kr=%d\n", __func__
, kr
);
10208 VME_OBJECT_SET(entry
, kernel_object
);
10209 VME_OFFSET_SET(entry
, newcva
);
10210 vm_map_unlock(kernel_map
);
10212 // fill default clone page info and add to pool
10213 map
= kalloc(sizeof(pmap_pgtrace_map_t
));
10214 for (int j
= 0; j
< 3; j
++) {
10215 vm_map_offset_t addr
= newcva
+ j
* ARM_PGBYTES
;
10217 // pre-expand pmap while preemption enabled
10218 kr
= pmap_expand(kernel_pmap
, addr
, 0, PMAP_TT_MAX_LEVEL
);
10219 if (kr
!= KERN_SUCCESS
) {
10220 panic("%s: pmap_expand(kernel_pmap, addr=%llx) returns kr=%d\n", __func__
, addr
, kr
);
10223 cptep
= pmap_pte(kernel_pmap
, addr
);
10224 assert(cptep
!= NULL
);
10226 map
->cva
[j
] = addr
;
10227 map
->cva_spte
[j
] = *cptep
;
10229 map
->range
.start
= map
->range
.end
= 0;
10230 map
->cloned
= false;
10231 queue_enter_first(mappool
, map
, pmap_pgtrace_map_t
*, chain
);
10237 // free a page info
10238 static void pmap_pgtrace_free_page(pmap_pgtrace_page_t
*p
)
10240 queue_head_t
*mapq
;
10241 queue_head_t
*mappool
;
10242 queue_head_t
*mapwaste
;
10243 pmap_pgtrace_map_t
*map
;
10248 mappool
= &(p
->map_pool
);
10249 mapwaste
= &(p
->map_waste
);
10251 while (!queue_empty(mapq
)) {
10252 queue_remove_first(mapq
, map
, pmap_pgtrace_map_t
*, chain
);
10253 kfree(map
, sizeof(pmap_pgtrace_map_t
));
10256 while (!queue_empty(mappool
)) {
10257 queue_remove_first(mappool
, map
, pmap_pgtrace_map_t
*, chain
);
10258 kfree(map
, sizeof(pmap_pgtrace_map_t
));
10261 while (!queue_empty(mapwaste
)) {
10262 queue_remove_first(mapwaste
, map
, pmap_pgtrace_map_t
*, chain
);
10263 kfree(map
, sizeof(pmap_pgtrace_map_t
));
10266 kfree(p
, sizeof(pmap_pgtrace_page_t
));
10269 // construct page infos with the given address range
10270 int pmap_pgtrace_add_page(pmap_t pmap
, vm_map_offset_t start
, vm_map_offset_t end
)
10274 queue_head_t
*q
= &(pmap_pgtrace
.pages
);
10276 vm_map_offset_t cur_page
, end_page
;
10279 kprintf("%s: invalid start=%llx > end=%llx\n", __func__
, start
, end
);
10285 // add each page in given range
10286 cur_page
= arm_trunc_page(start
);
10287 end_page
= arm_trunc_page(end
);
10288 while (cur_page
<= end_page
) {
10289 pmap_paddr_t pa_page
= 0;
10290 uint64_t num_cloned
= 0;
10291 pmap_pgtrace_page_t
*p
= NULL
, *newp
;
10292 bool free_newp
= true;
10293 pmap_pgtrace_page_state_t state
;
10295 // do all allocations outside of spinlocks
10296 newp
= pmap_pgtrace_alloc_page();
10298 // keep lock orders in pmap, kernel_pmap and pgtrace lock
10299 if (pmap
!= NULL
) {
10302 if (pmap
!= kernel_pmap
) {
10303 PMAP_LOCK(kernel_pmap
);
10306 // addresses are physical if pmap is null
10307 if (pmap
== NULL
) {
10309 pa_page
= cur_page
;
10310 state
= VA_UNDEFINED
;
10312 ptep
= pmap_pte(pmap
, cur_page
);
10313 if (ptep
!= NULL
) {
10314 pa_page
= pte_to_pa(*ptep
);
10317 state
= PA_UNDEFINED
;
10321 // search if we have a page info already
10322 PMAP_PGTRACE_LOCK(&ints
);
10323 if (state
!= PA_UNDEFINED
) {
10324 p
= pmap_pgtrace_find_page(pa_page
);
10327 // add pre-allocated page info if nothing found
10329 queue_enter_first(q
, newp
, pmap_pgtrace_page_t
*, chain
);
10334 // now p points what we want
10337 queue_head_t
*mapq
= &(p
->maps
);
10338 queue_head_t
*mappool
= &(p
->map_pool
);
10339 pmap_pgtrace_map_t
*map
;
10340 vm_map_offset_t start_offset
, end_offset
;
10342 // calculate trace offsets in the page
10343 if (cur_page
> start
) {
10346 start_offset
= start
-cur_page
;
10348 if (cur_page
== end_page
) {
10349 end_offset
= end
-end_page
;
10351 end_offset
= ARM_PGBYTES
-1;
10354 kprintf("%s: pmap=%p cur_page=%llx ptep=%p state=%d start_offset=%llx end_offset=%llx\n", __func__
, pmap
, cur_page
, ptep
, state
, start_offset
, end_offset
);
10357 assert(!queue_empty(mappool
));
10358 queue_remove_first(mappool
, map
, pmap_pgtrace_map_t
*, chain
);
10359 if (p
->state
== PA_UNDEFINED
) {
10361 map
->ova
= cur_page
;
10362 map
->range
.start
= start_offset
;
10363 map
->range
.end
= end_offset
;
10364 } else if (p
->state
== VA_UNDEFINED
) {
10366 map
->range
.start
= start_offset
;
10367 map
->range
.end
= end_offset
;
10368 } else if (p
->state
== DEFINED
) {
10371 map
->ova
= cur_page
;
10372 map
->range
.start
= start_offset
;
10373 map
->range
.end
= end_offset
;
10375 panic("invalid p->state=%d\n", p
->state
);
10379 map
->cloned
= false;
10380 queue_enter(mapq
, map
, pmap_pgtrace_map_t
*, chain
);
10383 PMAP_PGTRACE_UNLOCK(&ints
);
10384 if (pmap
!= kernel_pmap
) {
10385 PMAP_UNLOCK(kernel_pmap
);
10387 if (pmap
!= NULL
) {
10392 if (pa_valid(pa_page
)) {
10393 num_cloned
= pmap_pgtrace_clone_from_pvtable(pa_page
, start_offset
, end_offset
);
10395 if (pmap
== NULL
) {
10396 num_cloned
+= pmap_pgtrace_clone_from_pa(kernel_pmap
, pa_page
, start_offset
, end_offset
);
10398 num_cloned
+= pmap_pgtrace_clone_from_pa(pmap
, pa_page
, start_offset
, end_offset
);
10401 // free pre-allocations if we didn't add it to the q
10403 pmap_pgtrace_free_page(newp
);
10406 if (num_cloned
== 0) {
10407 kprintf("%s: no mapping found for pa_page=%llx but will be added when a page entered\n", __func__
, pa_page
);
10413 if (cur_page
+ ARM_PGBYTES
< cur_page
) {
10416 cur_page
+= ARM_PGBYTES
;
10425 // delete page infos for given address range
10426 int pmap_pgtrace_delete_page(pmap_t pmap
, vm_map_offset_t start
, vm_map_offset_t end
)
10430 queue_head_t
*q
= &(pmap_pgtrace
.pages
);
10431 pmap_pgtrace_page_t
*p
;
10432 vm_map_offset_t cur_page
, end_page
;
10434 kprintf("%s start=%llx end=%llx\n", __func__
, start
, end
);
10439 pmap_paddr_t pa_page
;
10441 // remove page info from start to end
10442 cur_page
= arm_trunc_page(start
);
10443 end_page
= arm_trunc_page(end
);
10444 while (cur_page
<= end_page
) {
10447 if (pmap
== NULL
) {
10448 pa_page
= cur_page
;
10451 ptep
= pmap_pte(pmap
, cur_page
);
10452 if (ptep
== NULL
) {
10456 pa_page
= pte_to_pa(*ptep
);
10460 // remove all clones and validate
10461 pmap_pgtrace_remove_all_clone(pa_page
);
10463 // find page info and delete
10464 PMAP_PGTRACE_LOCK(&ints
);
10465 p
= pmap_pgtrace_find_page(pa_page
);
10467 queue_remove(q
, p
, pmap_pgtrace_page_t
*, chain
);
10470 PMAP_PGTRACE_UNLOCK(&ints
);
10472 // free outside of locks
10474 pmap_pgtrace_free_page(p
);
10479 if (cur_page
+ ARM_PGBYTES
< cur_page
) {
10482 cur_page
+= ARM_PGBYTES
;
10491 kern_return_t
pmap_pgtrace_fault(pmap_t pmap
, vm_map_offset_t va
, arm_saved_state_t
*ss
)
10494 pgtrace_run_result_t res
;
10495 pmap_pgtrace_page_t
*p
;
10496 bool ints
, found
= false;
10499 // Quick check if we are interested
10500 ptep
= pmap_pte(pmap
, va
);
10501 if (!ptep
|| !(*ptep
& ARM_PTE_PGTRACE
)) {
10502 return KERN_FAILURE
;
10505 PMAP_PGTRACE_LOCK(&ints
);
10507 // Check again since access is serialized
10508 ptep
= pmap_pte(pmap
, va
);
10509 if (!ptep
|| !(*ptep
& ARM_PTE_PGTRACE
)) {
10510 PMAP_PGTRACE_UNLOCK(&ints
);
10511 return KERN_FAILURE
;
10513 } else if ((*ptep
& ARM_PTE_TYPE_VALID
) == ARM_PTE_TYPE_VALID
) {
10514 // Somehow this cpu's tlb has not updated
10515 kprintf("%s Somehow this cpu's tlb has not updated?\n", __func__
);
10516 PMAP_UPDATE_TLBS(pmap
, va
, va
+ARM_PGBYTES
);
10518 PMAP_PGTRACE_UNLOCK(&ints
);
10519 return KERN_SUCCESS
;
10522 // Find if this pa is what we are tracing
10523 pa
= pte_to_pa(*ptep
);
10525 p
= pmap_pgtrace_find_page(arm_trunc_page(pa
));
10527 panic("%s Can't find va=%llx pa=%llx from tracing pages\n", __func__
, va
, pa
);
10530 // find if pmap and va are also matching
10531 queue_head_t
*mapq
= &(p
->maps
);
10532 queue_head_t
*mapwaste
= &(p
->map_waste
);
10533 pmap_pgtrace_map_t
*map
;
10535 queue_iterate(mapq
, map
, pmap_pgtrace_map_t
*, chain
) {
10536 if (map
->pmap
== pmap
&& map
->ova
== arm_trunc_page(va
)) {
10542 // if not found, search map waste as they are still valid
10544 queue_iterate(mapwaste
, map
, pmap_pgtrace_map_t
*, chain
) {
10545 if (map
->pmap
== pmap
&& map
->ova
== arm_trunc_page(va
)) {
10553 panic("%s Can't find va=%llx pa=%llx from tracing pages\n", __func__
, va
, pa
);
10556 // Decode and run it on the clone map
10557 bzero(&res
, sizeof(res
));
10558 pgtrace_decode_and_run(*(uint32_t *)get_saved_state_pc(ss
), // instruction
10559 va
, map
->cva
, // fault va and clone page vas
10562 // write a log if in range
10563 vm_map_offset_t offset
= va
- map
->ova
;
10564 if (map
->range
.start
<= offset
&& offset
<= map
->range
.end
) {
10565 pgtrace_write_log(res
);
10568 PMAP_PGTRACE_UNLOCK(&ints
);
10570 // Return to next instruction
10571 set_saved_state_pc(ss
, get_saved_state_pc(ss
) + sizeof(uint32_t));
10573 return KERN_SUCCESS
;
10578 pmap_enforces_execute_only(
10579 #if (__ARM_VMSA__ == 7)
10584 #if (__ARM_VMSA__ > 7)
10585 return (pmap
!= kernel_pmap
);
10592 pmap_set_jit_entitled(
10593 __unused pmap_t pmap
)
10598 static kern_return_t
10599 pmap_query_page_info_internal(
10601 vm_map_offset_t va
,
10608 pv_entry_t
**pv_h
, *pve_p
;
10610 if (pmap
== PMAP_NULL
|| pmap
== kernel_pmap
) {
10612 return KERN_INVALID_ARGUMENT
;
10619 pte
= pmap_pte(pmap
, va
);
10620 if (pte
== PT_ENTRY_NULL
) {
10624 pa
= pte_to_pa(*pte
);
10626 if (ARM_PTE_IS_COMPRESSED(*pte
)) {
10627 disp
|= PMAP_QUERY_PAGE_COMPRESSED
;
10628 if (*pte
& ARM_PTE_COMPRESSED_ALT
) {
10629 disp
|= PMAP_QUERY_PAGE_COMPRESSED_ALTACCT
;
10633 disp
|= PMAP_QUERY_PAGE_PRESENT
;
10634 pai
= (int) pa_index(pa
);
10635 if (!pa_valid(pa
)) {
10639 pv_h
= pai_to_pvh(pai
);
10640 pve_p
= PV_ENTRY_NULL
;
10641 if (pvh_test_type(pv_h
, PVH_TYPE_PVEP
)) {
10642 pve_p
= pvh_list(pv_h
);
10643 while (pve_p
!= PV_ENTRY_NULL
&&
10644 pve_get_ptep(pve_p
) != pte
) {
10645 pve_p
= PVE_NEXT_PTR(pve_next(pve_p
));
10648 if (IS_ALTACCT_PAGE(pai
, pve_p
)) {
10649 disp
|= PMAP_QUERY_PAGE_ALTACCT
;
10650 } else if (IS_REUSABLE_PAGE(pai
)) {
10651 disp
|= PMAP_QUERY_PAGE_REUSABLE
;
10652 } else if (IS_INTERNAL_PAGE(pai
)) {
10653 disp
|= PMAP_QUERY_PAGE_INTERNAL
;
10661 return KERN_SUCCESS
;
10665 pmap_query_page_info(
10667 vm_map_offset_t va
,
10670 return pmap_query_page_info_internal(pmap
, va
, disp_p
);
10674 pmap_return_internal(__unused boolean_t do_panic
, __unused boolean_t do_recurse
)
10677 return KERN_SUCCESS
;
10681 pmap_return(boolean_t do_panic
, boolean_t do_recurse
)
10683 return pmap_return_internal(do_panic
, do_recurse
);
10687 pmap_footprint_suspend_internal(
10691 #if DEVELOPMENT || DEBUG
10693 map
->pmap
->footprint_suspended
= TRUE
;
10694 map
->pmap
->footprint_was_suspended
= TRUE
;
10696 map
->pmap
->footprint_suspended
= FALSE
;
10698 #else /* DEVELOPMENT || DEBUG */
10701 #endif /* DEVELOPMENT || DEBUG */
10704 pmap_footprint_suspend(
10708 pmap_footprint_suspend_internal(map
, suspend
);