2 * Copyright (c) 2011-2019 Apple Inc. All rights reserved.
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
29 #include <mach_assert.h>
30 #include <mach_ldebug.h>
32 #include <mach/shared_region.h>
33 #include <mach/vm_param.h>
34 #include <mach/vm_prot.h>
35 #include <mach/vm_map.h>
36 #include <mach/machine/vm_param.h>
37 #include <mach/machine/vm_types.h>
39 #include <mach/boolean.h>
40 #include <kern/bits.h>
41 #include <kern/thread.h>
42 #include <kern/sched.h>
43 #include <kern/zalloc.h>
44 #include <kern/kalloc.h>
45 #include <kern/ledger.h>
47 #include <kern/trustcache.h>
49 #include <os/overflow.h>
52 #include <vm/vm_map.h>
53 #include <vm/vm_kern.h>
54 #include <vm/vm_protos.h>
55 #include <vm/vm_object.h>
56 #include <vm/vm_page.h>
57 #include <vm/vm_pageout.h>
60 #include <libkern/img4/interface.h>
61 #include <libkern/section_keywords.h>
63 #include <machine/atomic.h>
64 #include <machine/thread.h>
65 #include <machine/lowglobals.h>
67 #include <arm/caches_internal.h>
68 #include <arm/cpu_data.h>
69 #include <arm/cpu_data_internal.h>
70 #include <arm/cpu_capabilities.h>
71 #include <arm/cpu_number.h>
72 #include <arm/machine_cpu.h>
73 #include <arm/misc_protos.h>
76 #if (__ARM_VMSA__ > 7)
77 #include <arm64/proc_reg.h>
78 #include <pexpert/arm64/boot.h>
81 #include <arm64/pgtrace.h>
82 #if CONFIG_PGTRACE_NONKEXT
83 #include <arm64/pgtrace_decoder.h>
84 #endif // CONFIG_PGTRACE_NONKEXT
88 #include <pexpert/device_tree.h>
90 #include <san/kasan.h>
91 #include <sys/cdefs.h>
93 #if defined(HAS_APPLE_PAC)
97 #define PMAP_TT_L0_LEVEL 0x0
98 #define PMAP_TT_L1_LEVEL 0x1
99 #define PMAP_TT_L2_LEVEL 0x2
100 #define PMAP_TT_L3_LEVEL 0x3
101 #if (__ARM_VMSA__ == 7)
102 #define PMAP_TT_MAX_LEVEL PMAP_TT_L2_LEVEL
104 #define PMAP_TT_MAX_LEVEL PMAP_TT_L3_LEVEL
106 #define PMAP_TT_LEAF_LEVEL PMAP_TT_MAX_LEVEL
107 #define PMAP_TT_TWIG_LEVEL (PMAP_TT_MAX_LEVEL - 1)
109 static bool alloc_asid(pmap_t pmap
);
110 static void free_asid(pmap_t pmap
);
111 static void flush_mmu_tlb_region_asid_async(vm_offset_t va
, unsigned length
, pmap_t pmap
);
112 static void flush_mmu_tlb_tte_asid_async(vm_offset_t va
, pmap_t pmap
);
113 static void flush_mmu_tlb_full_asid_async(pmap_t pmap
);
114 static pt_entry_t
wimg_to_pte(unsigned int wimg
);
116 struct page_table_ops
{
117 bool (*alloc_id
)(pmap_t pmap
);
118 void (*free_id
)(pmap_t pmap
);
119 void (*flush_tlb_region_async
)(vm_offset_t va
, unsigned length
, pmap_t pmap
);
120 void (*flush_tlb_tte_async
)(vm_offset_t va
, pmap_t pmap
);
121 void (*flush_tlb_async
)(pmap_t pmap
);
122 pt_entry_t (*wimg_to_pte
)(unsigned int wimg
);
125 static const struct page_table_ops native_pt_ops
=
127 .alloc_id
= alloc_asid
,
128 .free_id
= free_asid
,
129 .flush_tlb_region_async
= flush_mmu_tlb_region_asid_async
,
130 .flush_tlb_tte_async
= flush_mmu_tlb_tte_asid_async
,
131 .flush_tlb_async
= flush_mmu_tlb_full_asid_async
,
132 .wimg_to_pte
= wimg_to_pte
,
135 #if (__ARM_VMSA__ > 7)
136 const struct page_table_level_info pmap_table_level_info_16k
[] =
139 .size
= ARM_16K_TT_L0_SIZE
,
140 .offmask
= ARM_16K_TT_L0_OFFMASK
,
141 .shift
= ARM_16K_TT_L0_SHIFT
,
142 .index_mask
= ARM_16K_TT_L0_INDEX_MASK
,
143 .valid_mask
= ARM_TTE_VALID
,
144 .type_mask
= ARM_TTE_TYPE_MASK
,
145 .type_block
= ARM_TTE_TYPE_BLOCK
148 .size
= ARM_16K_TT_L1_SIZE
,
149 .offmask
= ARM_16K_TT_L1_OFFMASK
,
150 .shift
= ARM_16K_TT_L1_SHIFT
,
151 .index_mask
= ARM_16K_TT_L1_INDEX_MASK
,
152 .valid_mask
= ARM_TTE_VALID
,
153 .type_mask
= ARM_TTE_TYPE_MASK
,
154 .type_block
= ARM_TTE_TYPE_BLOCK
157 .size
= ARM_16K_TT_L2_SIZE
,
158 .offmask
= ARM_16K_TT_L2_OFFMASK
,
159 .shift
= ARM_16K_TT_L2_SHIFT
,
160 .index_mask
= ARM_16K_TT_L2_INDEX_MASK
,
161 .valid_mask
= ARM_TTE_VALID
,
162 .type_mask
= ARM_TTE_TYPE_MASK
,
163 .type_block
= ARM_TTE_TYPE_BLOCK
166 .size
= ARM_16K_TT_L3_SIZE
,
167 .offmask
= ARM_16K_TT_L3_OFFMASK
,
168 .shift
= ARM_16K_TT_L3_SHIFT
,
169 .index_mask
= ARM_16K_TT_L3_INDEX_MASK
,
170 .valid_mask
= ARM_PTE_TYPE_VALID
,
171 .type_mask
= ARM_PTE_TYPE_MASK
,
172 .type_block
= ARM_TTE_TYPE_L3BLOCK
176 const struct page_table_level_info pmap_table_level_info_4k
[] =
179 .size
= ARM_4K_TT_L0_SIZE
,
180 .offmask
= ARM_4K_TT_L0_OFFMASK
,
181 .shift
= ARM_4K_TT_L0_SHIFT
,
182 .index_mask
= ARM_4K_TT_L0_INDEX_MASK
,
183 .valid_mask
= ARM_TTE_VALID
,
184 .type_mask
= ARM_TTE_TYPE_MASK
,
185 .type_block
= ARM_TTE_TYPE_BLOCK
188 .size
= ARM_4K_TT_L1_SIZE
,
189 .offmask
= ARM_4K_TT_L1_OFFMASK
,
190 .shift
= ARM_4K_TT_L1_SHIFT
,
191 .index_mask
= ARM_4K_TT_L1_INDEX_MASK
,
192 .valid_mask
= ARM_TTE_VALID
,
193 .type_mask
= ARM_TTE_TYPE_MASK
,
194 .type_block
= ARM_TTE_TYPE_BLOCK
197 .size
= ARM_4K_TT_L2_SIZE
,
198 .offmask
= ARM_4K_TT_L2_OFFMASK
,
199 .shift
= ARM_4K_TT_L2_SHIFT
,
200 .index_mask
= ARM_4K_TT_L2_INDEX_MASK
,
201 .valid_mask
= ARM_TTE_VALID
,
202 .type_mask
= ARM_TTE_TYPE_MASK
,
203 .type_block
= ARM_TTE_TYPE_BLOCK
206 .size
= ARM_4K_TT_L3_SIZE
,
207 .offmask
= ARM_4K_TT_L3_OFFMASK
,
208 .shift
= ARM_4K_TT_L3_SHIFT
,
209 .index_mask
= ARM_4K_TT_L3_INDEX_MASK
,
210 .valid_mask
= ARM_PTE_TYPE_VALID
,
211 .type_mask
= ARM_PTE_TYPE_MASK
,
212 .type_block
= ARM_TTE_TYPE_L3BLOCK
216 struct page_table_attr
{
217 const struct page_table_level_info
* const pta_level_info
;
218 const struct page_table_ops
* const pta_ops
;
219 const uintptr_t ap_ro
;
220 const uintptr_t ap_rw
;
221 const uintptr_t ap_rona
;
222 const uintptr_t ap_rwna
;
223 const uintptr_t ap_xn
;
224 const uintptr_t ap_x
;
225 const unsigned int pta_root_level
;
226 const unsigned int pta_max_level
;
229 const struct page_table_attr pmap_pt_attr_4k
= {
230 .pta_level_info
= pmap_table_level_info_4k
,
231 .pta_root_level
= PMAP_TT_L1_LEVEL
,
232 .pta_max_level
= PMAP_TT_L3_LEVEL
,
233 .pta_ops
= &native_pt_ops
,
234 .ap_ro
= ARM_PTE_AP(AP_RORO
),
235 .ap_rw
= ARM_PTE_AP(AP_RWRW
),
236 .ap_rona
= ARM_PTE_AP(AP_RONA
),
237 .ap_rwna
= ARM_PTE_AP(AP_RWNA
),
238 .ap_xn
= ARM_PTE_PNX
| ARM_PTE_NX
,
242 const struct page_table_attr pmap_pt_attr_16k
= {
243 .pta_level_info
= pmap_table_level_info_16k
,
244 .pta_root_level
= PMAP_TT_L1_LEVEL
,
245 .pta_max_level
= PMAP_TT_L3_LEVEL
,
246 .pta_ops
= &native_pt_ops
,
247 .ap_ro
= ARM_PTE_AP(AP_RORO
),
248 .ap_rw
= ARM_PTE_AP(AP_RWRW
),
249 .ap_rona
= ARM_PTE_AP(AP_RONA
),
250 .ap_rwna
= ARM_PTE_AP(AP_RWNA
),
251 .ap_xn
= ARM_PTE_PNX
| ARM_PTE_NX
,
256 const struct page_table_attr
* const native_pt_attr
= &pmap_pt_attr_16k
;
257 #else /* !__ARM_16K_PG__ */
258 const struct page_table_attr
* const native_pt_attr
= &pmap_pt_attr_4k
;
259 #endif /* !__ARM_16K_PG__ */
262 #else /* (__ARM_VMSA__ > 7) */
264 * We don't support pmap parameterization for VMSA7, so use an opaque
265 * page_table_attr structure.
267 const struct page_table_attr
* const native_pt_attr
= NULL
;
268 #endif /* (__ARM_VMSA__ > 7) */
270 typedef struct page_table_attr pt_attr_t
;
272 /* Macro for getting pmap attributes; not a function for const propagation. */
273 #if ARM_PARAMETERIZED_PMAP
274 /* The page table attributes are linked to the pmap */
275 #define pmap_get_pt_attr(pmap) ((pmap)->pmap_pt_attr)
276 #define pmap_get_pt_ops(pmap) ((pmap)->pmap_pt_attr->pta_ops)
277 #else /* !ARM_PARAMETERIZED_PMAP */
278 /* The page table attributes are fixed (to allow for const propagation) */
279 #define pmap_get_pt_attr(pmap) (native_pt_attr)
280 #define pmap_get_pt_ops(pmap) (&native_pt_ops)
281 #endif /* !ARM_PARAMETERIZED_PMAP */
283 #if (__ARM_VMSA__ > 7)
284 static inline uint64_t
285 pt_attr_ln_size(const pt_attr_t
* const pt_attr
, unsigned int level
)
287 return pt_attr
->pta_level_info
[level
].size
;
290 __unused
static inline uint64_t
291 pt_attr_ln_shift(const pt_attr_t
* const pt_attr
, unsigned int level
)
293 return pt_attr
->pta_level_info
[level
].shift
;
296 __unused
static inline uint64_t
297 pt_attr_ln_offmask(const pt_attr_t
* const pt_attr
, unsigned int level
)
299 return pt_attr
->pta_level_info
[level
].offmask
;
302 static inline unsigned int
303 pt_attr_twig_level(const pt_attr_t
* const pt_attr
)
305 return pt_attr
->pta_max_level
- 1;
308 static inline unsigned int
309 pt_attr_root_level(const pt_attr_t
* const pt_attr
)
311 return pt_attr
->pta_root_level
;
314 static __unused
inline uint64_t
315 pt_attr_leaf_size(const pt_attr_t
* const pt_attr
)
317 return pt_attr
->pta_level_info
[pt_attr
->pta_max_level
].size
;
320 static __unused
inline uint64_t
321 pt_attr_leaf_offmask(const pt_attr_t
* const pt_attr
)
323 return pt_attr
->pta_level_info
[pt_attr
->pta_max_level
].offmask
;
326 static inline uint64_t
327 pt_attr_leaf_shift(const pt_attr_t
* const pt_attr
)
329 return pt_attr
->pta_level_info
[pt_attr
->pta_max_level
].shift
;
332 static __unused
inline uint64_t
333 pt_attr_leaf_index_mask(const pt_attr_t
* const pt_attr
)
335 return pt_attr
->pta_level_info
[pt_attr
->pta_max_level
].index_mask
;
338 static inline uint64_t
339 pt_attr_twig_size(const pt_attr_t
* const pt_attr
)
341 return pt_attr
->pta_level_info
[pt_attr
->pta_max_level
- 1].size
;
344 static inline uint64_t
345 pt_attr_twig_offmask(const pt_attr_t
* const pt_attr
)
347 return pt_attr
->pta_level_info
[pt_attr
->pta_max_level
- 1].offmask
;
350 static inline uint64_t
351 pt_attr_twig_shift(const pt_attr_t
* const pt_attr
)
353 return pt_attr
->pta_level_info
[pt_attr
->pta_max_level
- 1].shift
;
356 static __unused
inline uint64_t
357 pt_attr_twig_index_mask(const pt_attr_t
* const pt_attr
)
359 return pt_attr
->pta_level_info
[pt_attr
->pta_max_level
- 1].index_mask
;
362 static inline uint64_t
363 pt_attr_leaf_table_size(const pt_attr_t
* const pt_attr
)
365 return pt_attr_twig_size(pt_attr
);
368 static inline uint64_t
369 pt_attr_leaf_table_offmask(const pt_attr_t
* const pt_attr
)
371 return pt_attr_twig_offmask(pt_attr
);
374 static inline uintptr_t
375 pt_attr_leaf_rw(const pt_attr_t
* const pt_attr
)
377 return pt_attr
->ap_rw
;
380 static inline uintptr_t
381 pt_attr_leaf_ro(const pt_attr_t
* const pt_attr
)
383 return pt_attr
->ap_ro
;
386 static inline uintptr_t
387 pt_attr_leaf_rona(const pt_attr_t
* const pt_attr
)
389 return pt_attr
->ap_rona
;
392 static inline uintptr_t
393 pt_attr_leaf_rwna(const pt_attr_t
* const pt_attr
)
395 return pt_attr
->ap_rwna
;
398 static inline uintptr_t
399 pt_attr_leaf_xn(const pt_attr_t
* const pt_attr
)
401 return pt_attr
->ap_xn
;
404 static inline uintptr_t
405 pt_attr_leaf_x(const pt_attr_t
* const pt_attr
)
407 return pt_attr
->ap_x
;
410 #else /* (__ARM_VMSA__ > 7) */
412 static inline unsigned int
413 pt_attr_twig_level(__unused
const pt_attr_t
* const pt_attr
)
415 return PMAP_TT_L1_LEVEL
;
418 static inline uint64_t
419 pt_attr_twig_size(__unused
const pt_attr_t
* const pt_attr
)
421 return ARM_TT_TWIG_SIZE
;
424 static inline uint64_t
425 pt_attr_twig_offmask(__unused
const pt_attr_t
* const pt_attr
)
427 return ARM_TT_TWIG_OFFMASK
;
430 static inline uint64_t
431 pt_attr_twig_shift(__unused
const pt_attr_t
* const pt_attr
)
433 return ARM_TT_TWIG_SHIFT
;
436 static __unused
inline uint64_t
437 pt_attr_twig_index_mask(__unused
const pt_attr_t
* const pt_attr
)
439 return ARM_TT_TWIG_INDEX_MASK
;
442 __unused
static inline uint64_t
443 pt_attr_leaf_size(__unused
const pt_attr_t
* const pt_attr
)
445 return ARM_TT_LEAF_SIZE
;
448 __unused
static inline uint64_t
449 pt_attr_leaf_offmask(__unused
const pt_attr_t
* const pt_attr
)
451 return ARM_TT_LEAF_OFFMASK
;
454 static inline uint64_t
455 pt_attr_leaf_shift(__unused
const pt_attr_t
* const pt_attr
)
457 return ARM_TT_LEAF_SHIFT
;
460 static __unused
inline uint64_t
461 pt_attr_leaf_index_mask(__unused
const pt_attr_t
* const pt_attr
)
463 return ARM_TT_LEAF_INDEX_MASK
;
466 static inline uint64_t
467 pt_attr_leaf_table_size(__unused
const pt_attr_t
* const pt_attr
)
469 return ARM_TT_L1_PT_SIZE
;
472 static inline uint64_t
473 pt_attr_leaf_table_offmask(__unused
const pt_attr_t
* const pt_attr
)
475 return ARM_TT_L1_PT_OFFMASK
;
478 static inline uintptr_t
479 pt_attr_leaf_rw(__unused
const pt_attr_t
* const pt_attr
)
481 return ARM_PTE_AP(AP_RWRW
);
484 static inline uintptr_t
485 pt_attr_leaf_ro(__unused
const pt_attr_t
* const pt_attr
)
487 return ARM_PTE_AP(AP_RORO
);
490 static inline uintptr_t
491 pt_attr_leaf_rona(__unused
const pt_attr_t
* const pt_attr
)
493 return ARM_PTE_AP(AP_RONA
);
496 static inline uintptr_t
497 pt_attr_leaf_rwna(__unused
const pt_attr_t
* const pt_attr
)
499 return ARM_PTE_AP(AP_RWNA
);
502 static inline uintptr_t
503 pt_attr_leaf_xn(__unused
const pt_attr_t
* const pt_attr
)
508 #endif /* (__ARM_VMSA__ > 7) */
511 pmap_sync_tlb(bool strong __unused
)
517 int vm_footprint_suspend_allowed
= 1;
519 extern int pmap_ledgers_panic
;
520 extern int pmap_ledgers_panic_leeway
;
522 int pmap_stats_assert
= 1;
523 #define PMAP_STATS_ASSERTF(cond, pmap, fmt, ...) \
525 if (pmap_stats_assert && (pmap)->pmap_stats_assert) \
526 assertf(cond, fmt, ##__VA_ARGS__); \
528 #else /* MACH_ASSERT */
529 #define PMAP_STATS_ASSERTF(cond, pmap, fmt, ...)
530 #endif /* MACH_ASSERT */
532 #if DEVELOPMENT || DEBUG
533 #define PMAP_FOOTPRINT_SUSPENDED(pmap) \
534 (current_thread()->pmap_footprint_suspended)
535 #else /* DEVELOPMENT || DEBUG */
536 #define PMAP_FOOTPRINT_SUSPENDED(pmap) (FALSE)
537 #endif /* DEVELOPMENT || DEBUG */
541 #define pmap_ledger_debit(p, e, a) ledger_debit((p)->ledger, e, a)
542 #define pmap_ledger_credit(p, e, a) ledger_credit((p)->ledger, e, a)
545 #if DEVELOPMENT || DEBUG
546 int panic_on_unsigned_execute
= 0;
547 #endif /* DEVELOPMENT || DEBUG */
550 /* Virtual memory region for early allocation */
551 #if (__ARM_VMSA__ == 7)
552 #define VREGION1_HIGH_WINDOW (0)
554 #define VREGION1_HIGH_WINDOW (PE_EARLY_BOOT_VA)
556 #define VREGION1_START ((VM_MAX_KERNEL_ADDRESS & CPUWINDOWS_BASE_MASK) - VREGION1_HIGH_WINDOW)
557 #define VREGION1_SIZE (trunc_page(VM_MAX_KERNEL_ADDRESS - (VREGION1_START)))
559 extern unsigned int not_in_kdp
;
561 extern vm_offset_t first_avail
;
563 extern pmap_paddr_t avail_start
;
564 extern pmap_paddr_t avail_end
;
566 extern vm_offset_t virtual_space_start
; /* Next available kernel VA */
567 extern vm_offset_t virtual_space_end
; /* End of kernel address space */
568 extern vm_offset_t static_memory_end
;
570 extern int maxproc
, hard_maxproc
;
572 #if (__ARM_VMSA__ > 7)
573 /* The number of address bits one TTBR can cover. */
574 #define PGTABLE_ADDR_BITS (64ULL - T0SZ_BOOT)
577 * The bounds on our TTBRs. These are for sanity checking that
578 * an address is accessible by a TTBR before we attempt to map it.
580 #define ARM64_TTBR0_MIN_ADDR (0ULL)
581 #define ARM64_TTBR0_MAX_ADDR (0ULL + (1ULL << PGTABLE_ADDR_BITS) - 1)
582 #define ARM64_TTBR1_MIN_ADDR (0ULL - (1ULL << PGTABLE_ADDR_BITS))
583 #define ARM64_TTBR1_MAX_ADDR (~0ULL)
585 /* The level of the root of a page table. */
586 const uint64_t arm64_root_pgtable_level
= (3 - ((PGTABLE_ADDR_BITS
- 1 - ARM_PGSHIFT
) / (ARM_PGSHIFT
- TTE_SHIFT
)));
588 /* The number of entries in the root TT of a page table. */
589 const uint64_t arm64_root_pgtable_num_ttes
= (2 << ((PGTABLE_ADDR_BITS
- 1 - ARM_PGSHIFT
) % (ARM_PGSHIFT
- TTE_SHIFT
)));
591 const uint64_t arm64_root_pgtable_level
= 0;
592 const uint64_t arm64_root_pgtable_num_ttes
= 0;
595 struct pmap kernel_pmap_store MARK_AS_PMAP_DATA
;
596 SECURITY_READ_ONLY_LATE(pmap_t
) kernel_pmap
= &kernel_pmap_store
;
598 struct vm_object pmap_object_store
__attribute__((aligned(VM_PACKED_POINTER_ALIGNMENT
))); /* store pt pages */
599 vm_object_t pmap_object
= &pmap_object_store
;
601 static struct zone
*pmap_zone
; /* zone of pmap structures */
603 decl_simple_lock_data(, pmaps_lock MARK_AS_PMAP_DATA
);
604 decl_simple_lock_data(, tt1_lock MARK_AS_PMAP_DATA
);
605 unsigned int pmap_stamp MARK_AS_PMAP_DATA
;
606 queue_head_t map_pmap_list MARK_AS_PMAP_DATA
;
608 decl_simple_lock_data(, pt_pages_lock MARK_AS_PMAP_DATA
);
609 queue_head_t pt_page_list MARK_AS_PMAP_DATA
; /* pt page ptd entries list */
611 decl_simple_lock_data(, pmap_pages_lock MARK_AS_PMAP_DATA
);
613 typedef struct page_free_entry
{
614 struct page_free_entry
*next
;
617 #define PAGE_FREE_ENTRY_NULL ((page_free_entry_t *) 0)
619 page_free_entry_t
*pmap_pages_reclaim_list MARK_AS_PMAP_DATA
; /* Reclaimed pt page list */
620 unsigned int pmap_pages_request_count MARK_AS_PMAP_DATA
; /* Pending requests to reclaim pt page */
621 unsigned long long pmap_pages_request_acum MARK_AS_PMAP_DATA
;
624 typedef struct tt_free_entry
{
625 struct tt_free_entry
*next
;
628 #define TT_FREE_ENTRY_NULL ((tt_free_entry_t *) 0)
630 tt_free_entry_t
*free_page_size_tt_list MARK_AS_PMAP_DATA
;
631 unsigned int free_page_size_tt_count MARK_AS_PMAP_DATA
;
632 unsigned int free_page_size_tt_max MARK_AS_PMAP_DATA
;
633 #define FREE_PAGE_SIZE_TT_MAX 4
634 tt_free_entry_t
*free_two_page_size_tt_list MARK_AS_PMAP_DATA
;
635 unsigned int free_two_page_size_tt_count MARK_AS_PMAP_DATA
;
636 unsigned int free_two_page_size_tt_max MARK_AS_PMAP_DATA
;
637 #define FREE_TWO_PAGE_SIZE_TT_MAX 4
638 tt_free_entry_t
*free_tt_list MARK_AS_PMAP_DATA
;
639 unsigned int free_tt_count MARK_AS_PMAP_DATA
;
640 unsigned int free_tt_max MARK_AS_PMAP_DATA
;
642 #define TT_FREE_ENTRY_NULL ((tt_free_entry_t *) 0)
644 boolean_t pmap_gc_allowed MARK_AS_PMAP_DATA
= TRUE
;
645 boolean_t pmap_gc_forced MARK_AS_PMAP_DATA
= FALSE
;
646 boolean_t pmap_gc_allowed_by_time_throttle
= TRUE
;
648 unsigned int inuse_user_ttepages_count MARK_AS_PMAP_DATA
= 0; /* non-root, non-leaf user pagetable pages, in units of PAGE_SIZE */
649 unsigned int inuse_user_ptepages_count MARK_AS_PMAP_DATA
= 0; /* leaf user pagetable pages, in units of PAGE_SIZE */
650 unsigned int inuse_user_tteroot_count MARK_AS_PMAP_DATA
= 0; /* root user pagetables, in units of PMAP_ROOT_ALLOC_SIZE */
651 unsigned int inuse_kernel_ttepages_count MARK_AS_PMAP_DATA
= 0; /* non-root, non-leaf kernel pagetable pages, in units of PAGE_SIZE */
652 unsigned int inuse_kernel_ptepages_count MARK_AS_PMAP_DATA
= 0; /* leaf kernel pagetable pages, in units of PAGE_SIZE */
653 unsigned int inuse_kernel_tteroot_count MARK_AS_PMAP_DATA
= 0; /* root kernel pagetables, in units of PMAP_ROOT_ALLOC_SIZE */
654 unsigned int inuse_pmap_pages_count
= 0; /* debugging */
656 SECURITY_READ_ONLY_LATE(tt_entry_t
*) invalid_tte
= 0;
657 SECURITY_READ_ONLY_LATE(pmap_paddr_t
) invalid_ttep
= 0;
659 SECURITY_READ_ONLY_LATE(tt_entry_t
*) cpu_tte
= 0; /* set by arm_vm_init() - keep out of bss */
660 SECURITY_READ_ONLY_LATE(pmap_paddr_t
) cpu_ttep
= 0; /* set by arm_vm_init() - phys tte addr */
662 #if DEVELOPMENT || DEBUG
663 int nx_enabled
= 1; /* enable no-execute protection */
664 int allow_data_exec
= 0; /* No apps may execute data */
665 int allow_stack_exec
= 0; /* No apps may execute from the stack */
666 unsigned long pmap_asid_flushes MARK_AS_PMAP_DATA
= 0;
667 #else /* DEVELOPMENT || DEBUG */
668 const int nx_enabled
= 1; /* enable no-execute protection */
669 const int allow_data_exec
= 0; /* No apps may execute data */
670 const int allow_stack_exec
= 0; /* No apps may execute from the stack */
671 #endif /* DEVELOPMENT || DEBUG */
674 * pv_entry_t - structure to track the active mappings for a given page
676 typedef struct pv_entry
{
677 struct pv_entry
*pve_next
; /* next alias */
678 pt_entry_t
*pve_ptep
; /* page table entry */
680 #if __arm__ && (__BIGGEST_ALIGNMENT__ > 4)
681 /* For the newer ARMv7k ABI where 64-bit types are 64-bit aligned, but pointers
683 * Since pt_desc is 64-bit aligned and we cast often from pv_entry to
686 __attribute__ ((aligned(8))) pv_entry_t
;
691 #define PV_ENTRY_NULL ((pv_entry_t *) 0)
695 * We use the least significant bit of the "pve_next" pointer in a "pv_entry"
696 * as a marker for pages mapped through an "alternate accounting" mapping.
697 * These macros set, clear and test for this marker and extract the actual
698 * value of the "pve_next" pointer.
700 #define PVE_NEXT_ALTACCT ((uintptr_t) 0x1)
701 #define PVE_NEXT_SET_ALTACCT(pve_next_p) \
702 *(pve_next_p) = (struct pv_entry *) (((uintptr_t) *(pve_next_p)) | \
704 #define PVE_NEXT_CLR_ALTACCT(pve_next_p) \
705 *(pve_next_p) = (struct pv_entry *) (((uintptr_t) *(pve_next_p)) & \
707 #define PVE_NEXT_IS_ALTACCT(pve_next) \
708 ((((uintptr_t) (pve_next)) & PVE_NEXT_ALTACCT) ? TRUE : FALSE)
709 #define PVE_NEXT_PTR(pve_next) \
710 ((struct pv_entry *)(((uintptr_t) (pve_next)) & \
713 static void pmap_check_ledgers(pmap_t pmap
);
716 pmap_check_ledgers(__unused pmap_t pmap
)
719 #endif /* MACH_ASSERT */
721 SECURITY_READ_ONLY_LATE(pv_entry_t
* *) pv_head_table
; /* array of pv entry pointers */
723 pv_entry_t
*pv_free_list MARK_AS_PMAP_DATA
;
724 pv_entry_t
*pv_kern_free_list MARK_AS_PMAP_DATA
;
725 decl_simple_lock_data(, pv_free_list_lock MARK_AS_PMAP_DATA
);
726 decl_simple_lock_data(, pv_kern_free_list_lock MARK_AS_PMAP_DATA
);
728 decl_simple_lock_data(, phys_backup_lock
);
731 * pt_desc - structure to keep info on page assigned to page tables
733 #if (__ARM_VMSA__ == 7)
734 #define PT_INDEX_MAX 1
736 #if (ARM_PGSHIFT == 14)
737 #define PT_INDEX_MAX 1
739 #define PT_INDEX_MAX 4
743 #define PT_DESC_REFCOUNT 0x4000U
744 #define PT_DESC_IOMMU_REFCOUNT 0x8000U
746 typedef struct pt_desc
{
747 queue_chain_t pt_page
;
752 * Locate this struct towards the end of the pt_desc; our long term
753 * goal is to make this a VLA to avoid wasting memory if we don't need
758 * For non-leaf pagetables, should always be PT_DESC_REFCOUNT
759 * For leaf pagetables, should reflect the number of non-empty PTEs
760 * For IOMMU pages, should always be PT_DESC_IOMMU_REFCOUNT
762 unsigned short refcnt
;
764 * For non-leaf pagetables, should be 0
765 * For leaf pagetables, should reflect the number of wired entries
766 * For IOMMU pages, may optionally reflect a driver-defined refcount (IOMMU operations are implicitly wired)
768 unsigned short wiredcnt
;
770 } ptd_info
[PT_INDEX_MAX
];
774 #define PTD_ENTRY_NULL ((pt_desc_t *) 0)
776 SECURITY_READ_ONLY_LATE(pt_desc_t
*) ptd_root_table
;
778 pt_desc_t
*ptd_free_list MARK_AS_PMAP_DATA
= PTD_ENTRY_NULL
;
779 SECURITY_READ_ONLY_LATE(boolean_t
) ptd_preboot
= TRUE
;
780 unsigned int ptd_free_count MARK_AS_PMAP_DATA
= 0;
781 decl_simple_lock_data(, ptd_free_list_lock MARK_AS_PMAP_DATA
);
784 * physical page attribute
786 typedef u_int16_t pp_attr_t
;
788 #define PP_ATTR_WIMG_MASK 0x003F
789 #define PP_ATTR_WIMG(x) ((x) & PP_ATTR_WIMG_MASK)
791 #define PP_ATTR_REFERENCED 0x0040
792 #define PP_ATTR_MODIFIED 0x0080
794 #define PP_ATTR_INTERNAL 0x0100
795 #define PP_ATTR_REUSABLE 0x0200
796 #define PP_ATTR_ALTACCT 0x0400
797 #define PP_ATTR_NOENCRYPT 0x0800
799 #define PP_ATTR_REFFAULT 0x1000
800 #define PP_ATTR_MODFAULT 0x2000
803 SECURITY_READ_ONLY_LATE(pp_attr_t
*) pp_attr_table
;
805 typedef struct pmap_io_range
{
808 #define PMAP_IO_RANGE_STRONG_SYNC (1UL << 31) // Strong DSB required for pages in this range
809 #define PMAP_IO_RANGE_CARVEOUT (1UL << 30) // Corresponds to memory carved out by bootloader
810 uint32_t wimg
; // lower 16 bits treated as pp_attr_t, upper 16 bits contain additional mapping flags
811 uint32_t signature
; // 4CC
812 } __attribute__((packed
)) pmap_io_range_t
;
814 SECURITY_READ_ONLY_LATE(pmap_io_range_t
*) io_attr_table
;
816 SECURITY_READ_ONLY_LATE(pmap_paddr_t
) vm_first_phys
= (pmap_paddr_t
) 0;
817 SECURITY_READ_ONLY_LATE(pmap_paddr_t
) vm_last_phys
= (pmap_paddr_t
) 0;
819 SECURITY_READ_ONLY_LATE(unsigned int) num_io_rgns
= 0;
821 SECURITY_READ_ONLY_LATE(boolean_t
) pmap_initialized
= FALSE
; /* Has pmap_init completed? */
823 SECURITY_READ_ONLY_LATE(uint64_t) pmap_nesting_size_min
;
824 SECURITY_READ_ONLY_LATE(uint64_t) pmap_nesting_size_max
;
826 SECURITY_READ_ONLY_LATE(vm_map_offset_t
) arm_pmap_max_offset_default
= 0x0;
827 #if defined(__arm64__)
828 SECURITY_READ_ONLY_LATE(vm_map_offset_t
) arm64_pmap_max_offset_default
= 0x0;
831 #define PMAP_MAX_SW_ASID ((MAX_ASID + MAX_HW_ASID - 1) / MAX_HW_ASID)
832 _Static_assert(PMAP_MAX_SW_ASID
<= (UINT8_MAX
+ 1),
833 "VASID bits can't be represented by an 8-bit integer");
835 decl_simple_lock_data(, asid_lock MARK_AS_PMAP_DATA
);
836 static bitmap_t asid_bitmap
[BITMAP_LEN(MAX_ASID
)] MARK_AS_PMAP_DATA
;
839 #if (__ARM_VMSA__ > 7)
840 SECURITY_READ_ONLY_LATE(pmap_t
) sharedpage_pmap
;
844 #define pa_index(pa) \
845 (atop((pa) - vm_first_phys))
847 #define pai_to_pvh(pai) \
848 (&pv_head_table[pai])
850 #define pa_valid(x) \
851 ((x) >= vm_first_phys && (x) < vm_last_phys)
853 /* PTE Define Macros */
855 #define pte_is_wired(pte) \
856 (((pte) & ARM_PTE_WIRED_MASK) == ARM_PTE_WIRED)
858 #define pte_set_wired(ptep, wired) \
860 SInt16 *ptd_wiredcnt_ptr; \
861 ptd_wiredcnt_ptr = (SInt16 *)&(ptep_get_ptd(ptep)->ptd_info[ARM_PT_DESC_INDEX(ptep)].wiredcnt); \
863 *ptep |= ARM_PTE_WIRED; \
864 OSAddAtomic16(1, ptd_wiredcnt_ptr); \
866 *ptep &= ~ARM_PTE_WIRED; \
867 OSAddAtomic16(-1, ptd_wiredcnt_ptr); \
871 #define pte_was_writeable(pte) \
872 (((pte) & ARM_PTE_WRITEABLE) == ARM_PTE_WRITEABLE)
874 #define pte_set_was_writeable(pte, was_writeable) \
876 if ((was_writeable)) { \
877 (pte) |= ARM_PTE_WRITEABLE; \
879 (pte) &= ~ARM_PTE_WRITEABLE; \
883 /* PVE Define Macros */
885 #define pve_next(pve) \
888 #define pve_link_field(pve) \
891 #define pve_link(pp, e) \
892 ((pve_next(e) = pve_next(pp)), (pve_next(pp) = (e)))
894 #define pve_unlink(pp, e) \
895 (pve_next(pp) = pve_next(e))
897 /* bits held in the ptep pointer field */
899 #define pve_get_ptep(pve) \
902 #define pve_set_ptep(pve, ptep_new) \
904 (pve)->pve_ptep = (ptep_new); \
907 /* PTEP Define Macros */
909 /* mask for page descriptor index */
910 #define ARM_TT_PT_INDEX_MASK ARM_PGMASK
912 #if (__ARM_VMSA__ == 7)
913 #define ARM_PT_DESC_INDEX_MASK 0x00000
914 #define ARM_PT_DESC_INDEX_SHIFT 0
917 * Shift value used for reconstructing the virtual address for a PTE.
919 #define ARM_TT_PT_ADDR_SHIFT (10U)
921 #define ptep_get_va(ptep) \
922 ((((pt_desc_t *) (pvh_list(pai_to_pvh(pa_index(ml_static_vtop((((vm_offset_t)(ptep) & ~ARM_PGMASK))))))))->ptd_info[ARM_PT_DESC_INDEX(ptep)].va)+ ((((unsigned)(ptep)) & ARM_TT_PT_INDEX_MASK)<<ARM_TT_PT_ADDR_SHIFT))
924 #define ptep_get_pmap(ptep) \
925 ((((pt_desc_t *) (pvh_list(pai_to_pvh(pa_index(ml_static_vtop((((vm_offset_t)(ptep) & ~ARM_PGMASK))))))))->pmap))
929 #if (ARM_PGSHIFT == 12)
930 #define ARM_PT_DESC_INDEX_MASK ((PAGE_SHIFT_CONST == ARM_PGSHIFT )? 0x00000ULL : 0x03000ULL)
931 #define ARM_PT_DESC_INDEX_SHIFT ((PAGE_SHIFT_CONST == ARM_PGSHIFT )? 0 : 12)
933 * Shift value used for reconstructing the virtual address for a PTE.
935 #define ARM_TT_PT_ADDR_SHIFT (9ULL)
938 #define ARM_PT_DESC_INDEX_MASK (0x00000)
939 #define ARM_PT_DESC_INDEX_SHIFT (0)
941 * Shift value used for reconstructing the virtual address for a PTE.
943 #define ARM_TT_PT_ADDR_SHIFT (11ULL)
947 #define ARM_PT_DESC_INDEX(ptep) \
948 (((unsigned)(ptep) & ARM_PT_DESC_INDEX_MASK) >> ARM_PT_DESC_INDEX_SHIFT)
950 #define ptep_get_va(ptep) \
951 ((((pt_desc_t *) (pvh_list(pai_to_pvh(pa_index(ml_static_vtop((((vm_offset_t)(ptep) & ~ARM_PGMASK))))))))->ptd_info[ARM_PT_DESC_INDEX(ptep)].va)+ ((((unsigned)(ptep)) & ARM_TT_PT_INDEX_MASK)<<ARM_TT_PT_ADDR_SHIFT))
953 #define ptep_get_pmap(ptep) \
954 ((((pt_desc_t *) (pvh_list(pai_to_pvh(pa_index(ml_static_vtop((((vm_offset_t)(ptep) & ~ARM_PGMASK))))))))->pmap))
958 #define ARM_PT_DESC_INDEX(ptep) \
959 (((unsigned)(ptep) & ARM_PT_DESC_INDEX_MASK) >> ARM_PT_DESC_INDEX_SHIFT)
961 #define ptep_get_ptd(ptep) \
962 ((struct pt_desc *)(pvh_list(pai_to_pvh(pa_index(ml_static_vtop((vm_offset_t)(ptep)))))))
965 /* PVH Define Macros */
968 #define PVH_TYPE_NULL 0x0UL
969 #define PVH_TYPE_PVEP 0x1UL
970 #define PVH_TYPE_PTEP 0x2UL
971 #define PVH_TYPE_PTDP 0x3UL
973 #define PVH_TYPE_MASK (0x3UL)
977 /* All flags listed below are stored in the PV head pointer unless otherwise noted */
978 #define PVH_FLAG_IOMMU 0x4UL /* Stored in each PTE, or in PV head for single-PTE PV heads */
979 #define PVH_FLAG_IOMMU_TABLE (1ULL << 63) /* Stored in each PTE, or in PV head for single-PTE PV heads */
980 #define PVH_FLAG_CPU (1ULL << 62)
981 #define PVH_LOCK_BIT 61
982 #define PVH_FLAG_LOCK (1ULL << PVH_LOCK_BIT)
983 #define PVH_FLAG_EXEC (1ULL << 60)
984 #define PVH_FLAG_LOCKDOWN (1ULL << 59)
985 #define PVH_HIGH_FLAGS (PVH_FLAG_CPU | PVH_FLAG_LOCK | PVH_FLAG_EXEC | PVH_FLAG_LOCKDOWN)
987 #else /* !__arm64__ */
989 #define PVH_LOCK_BIT 31
990 #define PVH_FLAG_LOCK (1UL << PVH_LOCK_BIT)
991 #define PVH_HIGH_FLAGS PVH_FLAG_LOCK
995 #define PVH_LIST_MASK (~PVH_TYPE_MASK)
997 #define pvh_test_type(h, b) \
998 ((*(vm_offset_t *)(h) & (PVH_TYPE_MASK)) == (b))
1000 #define pvh_ptep(h) \
1001 ((pt_entry_t *)((*(vm_offset_t *)(h) & PVH_LIST_MASK) | PVH_HIGH_FLAGS))
1003 #define pvh_list(h) \
1004 ((pv_entry_t *)((*(vm_offset_t *)(h) & PVH_LIST_MASK) | PVH_HIGH_FLAGS))
1006 #define pvh_get_flags(h) \
1007 (*(vm_offset_t *)(h) & PVH_HIGH_FLAGS)
1009 #define pvh_set_flags(h, f) \
1011 os_atomic_store((vm_offset_t *)(h), (*(vm_offset_t *)(h) & ~PVH_HIGH_FLAGS) | (f), \
1015 #define pvh_update_head(h, e, t) \
1017 assert(*(vm_offset_t *)(h) & PVH_FLAG_LOCK); \
1018 os_atomic_store((vm_offset_t *)(h), (vm_offset_t)(e) | (t) | PVH_FLAG_LOCK, \
1022 #define pvh_update_head_unlocked(h, e, t) \
1024 assert(!(*(vm_offset_t *)(h) & PVH_FLAG_LOCK)); \
1025 *(vm_offset_t *)(h) = ((vm_offset_t)(e) | (t)) & ~PVH_FLAG_LOCK; \
1028 #define pvh_add(h, e) \
1030 assert(!pvh_test_type((h), PVH_TYPE_PTEP)); \
1031 pve_next(e) = pvh_list(h); \
1032 pvh_update_head((h), (e), PVH_TYPE_PVEP); \
1035 #define pvh_remove(h, p, e) \
1037 assert(!PVE_NEXT_IS_ALTACCT(pve_next((e)))); \
1039 if (PVE_NEXT_PTR(pve_next((e))) == PV_ENTRY_NULL) { \
1040 pvh_update_head((h), PV_ENTRY_NULL, PVH_TYPE_NULL); \
1042 pvh_update_head((h), PVE_NEXT_PTR(pve_next((e))), PVH_TYPE_PVEP); \
1047 * preserve the "alternate accounting" bit \
1048 * when updating "p" (the previous entry's \
1051 boolean_t __is_altacct; \
1052 __is_altacct = PVE_NEXT_IS_ALTACCT(*(p)); \
1053 *(p) = PVE_NEXT_PTR(pve_next((e))); \
1054 if (__is_altacct) { \
1055 PVE_NEXT_SET_ALTACCT((p)); \
1057 PVE_NEXT_CLR_ALTACCT((p)); \
1063 /* PPATTR Define Macros */
1065 #define ppattr_set_bits(h, b) \
1067 while (!OSCompareAndSwap16(*(pp_attr_t *)(h), *(pp_attr_t *)(h) | (b), (pp_attr_t *)(h))); \
1070 #define ppattr_clear_bits(h, b) \
1072 while (!OSCompareAndSwap16(*(pp_attr_t *)(h), *(pp_attr_t *)(h) & ~(b), (pp_attr_t *)(h))); \
1075 #define ppattr_test_bits(h, b) \
1076 ((*(pp_attr_t *)(h) & (b)) == (b))
1078 #define pa_set_bits(x, b) \
1081 ppattr_set_bits(&pp_attr_table[pa_index(x)], \
1085 #define pa_test_bits(x, b) \
1086 (pa_valid(x) ? ppattr_test_bits(&pp_attr_table[pa_index(x)],\
1089 #define pa_clear_bits(x, b) \
1092 ppattr_clear_bits(&pp_attr_table[pa_index(x)], \
1096 #define pa_set_modify(x) \
1097 pa_set_bits(x, PP_ATTR_MODIFIED)
1099 #define pa_clear_modify(x) \
1100 pa_clear_bits(x, PP_ATTR_MODIFIED)
1102 #define pa_set_reference(x) \
1103 pa_set_bits(x, PP_ATTR_REFERENCED)
1105 #define pa_clear_reference(x) \
1106 pa_clear_bits(x, PP_ATTR_REFERENCED)
1109 #define IS_INTERNAL_PAGE(pai) \
1110 ppattr_test_bits(&pp_attr_table[pai], PP_ATTR_INTERNAL)
1111 #define SET_INTERNAL_PAGE(pai) \
1112 ppattr_set_bits(&pp_attr_table[pai], PP_ATTR_INTERNAL)
1113 #define CLR_INTERNAL_PAGE(pai) \
1114 ppattr_clear_bits(&pp_attr_table[pai], PP_ATTR_INTERNAL)
1116 #define IS_REUSABLE_PAGE(pai) \
1117 ppattr_test_bits(&pp_attr_table[pai], PP_ATTR_REUSABLE)
1118 #define SET_REUSABLE_PAGE(pai) \
1119 ppattr_set_bits(&pp_attr_table[pai], PP_ATTR_REUSABLE)
1120 #define CLR_REUSABLE_PAGE(pai) \
1121 ppattr_clear_bits(&pp_attr_table[pai], PP_ATTR_REUSABLE)
1123 #define IS_ALTACCT_PAGE(pai, pve_p) \
1124 (((pve_p) == NULL) \
1125 ? ppattr_test_bits(&pp_attr_table[pai], PP_ATTR_ALTACCT) \
1126 : PVE_NEXT_IS_ALTACCT(pve_next((pve_p))))
1127 #define SET_ALTACCT_PAGE(pai, pve_p) \
1128 if ((pve_p) == NULL) { \
1129 ppattr_set_bits(&pp_attr_table[pai], PP_ATTR_ALTACCT); \
1131 PVE_NEXT_SET_ALTACCT(&pve_next((pve_p))); \
1133 #define CLR_ALTACCT_PAGE(pai, pve_p) \
1134 if ((pve_p) == NULL) { \
1135 ppattr_clear_bits(&pp_attr_table[pai], PP_ATTR_ALTACCT); \
1137 PVE_NEXT_CLR_ALTACCT(&pve_next((pve_p))); \
1140 #define IS_REFFAULT_PAGE(pai) \
1141 ppattr_test_bits(&pp_attr_table[pai], PP_ATTR_REFFAULT)
1142 #define SET_REFFAULT_PAGE(pai) \
1143 ppattr_set_bits(&pp_attr_table[pai], PP_ATTR_REFFAULT)
1144 #define CLR_REFFAULT_PAGE(pai) \
1145 ppattr_clear_bits(&pp_attr_table[pai], PP_ATTR_REFFAULT)
1147 #define IS_MODFAULT_PAGE(pai) \
1148 ppattr_test_bits(&pp_attr_table[pai], PP_ATTR_MODFAULT)
1149 #define SET_MODFAULT_PAGE(pai) \
1150 ppattr_set_bits(&pp_attr_table[pai], PP_ATTR_MODFAULT)
1151 #define CLR_MODFAULT_PAGE(pai) \
1152 ppattr_clear_bits(&pp_attr_table[pai], PP_ATTR_MODFAULT)
1154 #define tte_get_ptd(tte) \
1155 ((struct pt_desc *)(pvh_list(pai_to_pvh(pa_index((vm_offset_t)((tte) & ~PAGE_MASK))))))
1158 #if (__ARM_VMSA__ == 7)
1160 #define tte_index(pmap, pt_attr, addr) \
1163 #define pte_index(pmap, pt_attr, addr) \
1168 #define ttn_index(pmap, pt_attr, addr, pt_level) \
1169 (((addr) & (pt_attr)->pta_level_info[(pt_level)].index_mask) >> (pt_attr)->pta_level_info[(pt_level)].shift)
1171 #define tt0_index(pmap, pt_attr, addr) \
1172 ttn_index((pmap), (pt_attr), (addr), PMAP_TT_L0_LEVEL)
1174 #define tt1_index(pmap, pt_attr, addr) \
1175 ttn_index((pmap), (pt_attr), (addr), PMAP_TT_L1_LEVEL)
1177 #define tt2_index(pmap, pt_attr, addr) \
1178 ttn_index((pmap), (pt_attr), (addr), PMAP_TT_L2_LEVEL)
1180 #define tt3_index(pmap, pt_attr, addr) \
1181 ttn_index((pmap), (pt_attr), (addr), PMAP_TT_L3_LEVEL)
1183 #define tte_index(pmap, pt_attr, addr) \
1184 tt2_index((pmap), (pt_attr), (addr))
1186 #define pte_index(pmap, pt_attr, addr) \
1187 tt3_index((pmap), (pt_attr), (addr))
1192 * Lock on pmap system
1195 lck_grp_t pmap_lck_grp
;
1197 #define PMAP_LOCK_INIT(pmap) { \
1198 simple_lock_init(&(pmap)->lock, 0); \
1201 #define PMAP_LOCK(pmap) { \
1202 pmap_simple_lock(&(pmap)->lock); \
1205 #define PMAP_UNLOCK(pmap) { \
1206 pmap_simple_unlock(&(pmap)->lock); \
1210 #define PMAP_ASSERT_LOCKED(pmap) { \
1211 simple_lock_assert(&(pmap)->lock, LCK_ASSERT_OWNED); \
1214 #define PMAP_ASSERT_LOCKED(pmap)
1217 #if defined(__arm64__)
1218 #define PVH_LOCK_WORD 1 /* Assumes little-endian */
1220 #define PVH_LOCK_WORD 0
1223 #define ASSERT_PVH_LOCKED(index) \
1225 assert((vm_offset_t)(pv_head_table[index]) & PVH_FLAG_LOCK); \
1228 #define LOCK_PVH(index) \
1230 pmap_lock_bit((uint32_t*)(&pv_head_table[index]) + PVH_LOCK_WORD, PVH_LOCK_BIT - (PVH_LOCK_WORD * 32)); \
1233 #define UNLOCK_PVH(index) \
1235 ASSERT_PVH_LOCKED(index); \
1236 pmap_unlock_bit((uint32_t*)(&pv_head_table[index]) + PVH_LOCK_WORD, PVH_LOCK_BIT - (PVH_LOCK_WORD * 32)); \
1239 #define PMAP_UPDATE_TLBS(pmap, s, e, strong) { \
1240 pmap_get_pt_ops(pmap)->flush_tlb_region_async(s, (unsigned)(e - s), pmap); \
1241 pmap_sync_tlb(strong); \
1244 #define FLUSH_PTE_RANGE(spte, epte) \
1245 __builtin_arm_dmb(DMB_ISH);
1247 #define FLUSH_PTE(pte_p) \
1248 __builtin_arm_dmb(DMB_ISH);
1250 #define FLUSH_PTE_STRONG(pte_p) \
1251 __builtin_arm_dsb(DSB_ISH);
1253 #define FLUSH_PTE_RANGE_STRONG(spte, epte) \
1254 __builtin_arm_dsb(DSB_ISH);
1256 #define WRITE_PTE_FAST(pte_p, pte_entry) \
1257 __unreachable_ok_push \
1258 if (TEST_PAGE_RATIO_4) { \
1259 if (((unsigned)(pte_p)) & 0x1f) { \
1260 panic("%s: WRITE_PTE_FAST is unaligned, " \
1261 "pte_p=%p, pte_entry=%p", \
1263 pte_p, (void*)pte_entry); \
1265 if (((pte_entry) & ~ARM_PTE_COMPRESSED_MASK) == ARM_PTE_EMPTY) { \
1266 *(pte_p) = (pte_entry); \
1267 *((pte_p)+1) = (pte_entry); \
1268 *((pte_p)+2) = (pte_entry); \
1269 *((pte_p)+3) = (pte_entry); \
1271 *(pte_p) = (pte_entry); \
1272 *((pte_p)+1) = (pte_entry) | 0x1000; \
1273 *((pte_p)+2) = (pte_entry) | 0x2000; \
1274 *((pte_p)+3) = (pte_entry) | 0x3000; \
1277 *(pte_p) = (pte_entry); \
1279 __unreachable_ok_pop
1281 #define WRITE_PTE(pte_p, pte_entry) \
1282 WRITE_PTE_FAST(pte_p, pte_entry); \
1285 #define WRITE_PTE_STRONG(pte_p, pte_entry) \
1286 WRITE_PTE_FAST(pte_p, pte_entry); \
1287 FLUSH_PTE_STRONG(pte_p);
1290 * Other useful macros.
1292 #define current_pmap() \
1293 (vm_map_pmap(current_thread()->map))
1296 #define VALIDATE_USER_PMAP(x)
1297 #define VALIDATE_PMAP(x)
1298 #define VALIDATE_LEDGER(x)
1301 #if DEVELOPMENT || DEBUG
1304 * Trace levels are controlled by a bitmask in which each
1305 * level can be enabled/disabled by the (1<<level) position
1307 * Level 1: pmap lifecycle (create/destroy/switch)
1308 * Level 2: mapping lifecycle (enter/remove/protect/nest/unnest)
1309 * Level 3: internal state management (tte/attributes/fast-fault)
1312 SECURITY_READ_ONLY_LATE(unsigned int) pmap_trace_mask
= 0;
1314 #define PMAP_TRACE(level, ...) \
1315 if (__improbable((1 << (level)) & pmap_trace_mask)) { \
1316 KDBG_RELEASE(__VA_ARGS__); \
1320 #define PMAP_TRACE(level, ...)
1326 * Internal function prototypes (forward declarations).
1329 static void pv_init(
1332 static boolean_t
pv_alloc(
1335 pv_entry_t
**pvepp
);
1337 static void pv_free(
1340 static void pv_list_free(
1345 static void ptd_bootstrap(
1346 pt_desc_t
*ptdp
, unsigned int ptd_cnt
);
1348 static inline pt_desc_t
*ptd_alloc_unlinked(bool reclaim
);
1350 static pt_desc_t
*ptd_alloc(pmap_t pmap
, bool reclaim
);
1352 static void ptd_deallocate(pt_desc_t
*ptdp
);
1354 static void ptd_init(
1355 pt_desc_t
*ptdp
, pmap_t pmap
, vm_map_address_t va
, unsigned int ttlevel
, pt_entry_t
* pte_p
);
1357 static void pmap_zone_init(
1360 static void pmap_set_reference(
1363 ppnum_t
pmap_vtophys(
1364 pmap_t pmap
, addr64_t va
);
1366 void pmap_switch_user_ttb(
1369 static kern_return_t
pmap_expand(
1370 pmap_t
, vm_map_address_t
, unsigned int options
, unsigned int level
);
1372 static int pmap_remove_range(
1373 pmap_t
, vm_map_address_t
, pt_entry_t
*, pt_entry_t
*, uint32_t *);
1375 static int pmap_remove_range_options(
1376 pmap_t
, vm_map_address_t
, pt_entry_t
*, pt_entry_t
*, uint32_t *, bool *, int);
1378 static tt_entry_t
*pmap_tt1_allocate(
1379 pmap_t
, vm_size_t
, unsigned int);
1381 #define PMAP_TT_ALLOCATE_NOWAIT 0x1
1383 static void pmap_tt1_deallocate(
1384 pmap_t
, tt_entry_t
*, vm_size_t
, unsigned int);
1386 #define PMAP_TT_DEALLOCATE_NOBLOCK 0x1
1388 static kern_return_t
pmap_tt_allocate(
1389 pmap_t
, tt_entry_t
**, unsigned int, unsigned int);
1391 #define PMAP_TT_ALLOCATE_NOWAIT 0x1
1393 static void pmap_tte_deallocate(
1394 pmap_t
, tt_entry_t
*, unsigned int);
1396 #ifdef __ARM64_PMAP_SUBPAGE_L1__
1397 #if (__ARM_VMSA__ <= 7)
1398 #error This is not supported for old-style page tables
1399 #endif /* (__ARM_VMSA__ <= 7) */
1400 #define PMAP_ROOT_ALLOC_SIZE (((ARM_TT_L1_INDEX_MASK >> ARM_TT_L1_SHIFT) + 1) * sizeof(tt_entry_t))
1401 #else /* !defined(__ARM64_PMAP_SUBPAGE_L1__) */
1402 #if (__ARM_VMSA__ <= 7)
1403 #define PMAP_ROOT_ALLOC_SIZE (ARM_PGBYTES * 2)
1404 #else /* (__ARM_VMSA__ > 7) */
1405 #define PMAP_ROOT_ALLOC_SIZE (ARM_PGBYTES)
1406 #endif /* (__ARM_VMSA__ > 7) */
1407 #endif /* !defined(__ARM64_PMAP_SUBPAGE_L1__) */
1409 const unsigned int arm_hardware_page_size
= ARM_PGBYTES
;
1410 const unsigned int arm_pt_desc_size
= sizeof(pt_desc_t
);
1411 const unsigned int arm_pt_root_size
= PMAP_ROOT_ALLOC_SIZE
;
1413 #define PMAP_TT_DEALLOCATE_NOBLOCK 0x1
1415 #if (__ARM_VMSA__ > 7)
1417 static inline tt_entry_t
*pmap_tt1e(
1418 pmap_t
, vm_map_address_t
);
1420 static inline tt_entry_t
*pmap_tt2e(
1421 pmap_t
, vm_map_address_t
);
1423 static inline pt_entry_t
*pmap_tt3e(
1424 pmap_t
, vm_map_address_t
);
1426 static inline pt_entry_t
*pmap_ttne(
1427 pmap_t
, unsigned int, vm_map_address_t
);
1429 static void pmap_unmap_sharedpage(
1433 pmap_is_64bit(pmap_t
);
1437 static inline tt_entry_t
*pmap_tte(
1438 pmap_t
, vm_map_address_t
);
1440 static inline pt_entry_t
*pmap_pte(
1441 pmap_t
, vm_map_address_t
);
1443 static void pmap_update_cache_attributes_locked(
1446 boolean_t
arm_clear_fast_fault(
1448 vm_prot_t fault_type
);
1450 static pmap_paddr_t
pmap_pages_reclaim(
1453 static kern_return_t
pmap_pages_alloc(
1458 #define PMAP_PAGES_ALLOCATE_NOWAIT 0x1
1459 #define PMAP_PAGES_RECLAIM_NOWAIT 0x2
1461 static void pmap_pages_free(
1465 static void pmap_pin_kernel_pages(vm_offset_t kva
, size_t nbytes
);
1467 static void pmap_unpin_kernel_pages(vm_offset_t kva
, size_t nbytes
);
1469 static void pmap_trim_self(pmap_t pmap
);
1470 static void pmap_trim_subord(pmap_t subord
);
1473 #define PMAP_SUPPORT_PROTOTYPES(__return_type, __function_name, __function_args, __function_index) \
1474 static __return_type __function_name##_internal __function_args
1476 PMAP_SUPPORT_PROTOTYPES(
1478 arm_fast_fault
, (pmap_t pmap
,
1479 vm_map_address_t va
,
1480 vm_prot_t fault_type
,
1482 bool from_user
), ARM_FAST_FAULT_INDEX
);
1485 PMAP_SUPPORT_PROTOTYPES(
1487 arm_force_fast_fault
, (ppnum_t ppnum
,
1488 vm_prot_t allow_mode
,
1489 int options
), ARM_FORCE_FAST_FAULT_INDEX
);
1491 PMAP_SUPPORT_PROTOTYPES(
1493 mapping_free_prime
, (void), MAPPING_FREE_PRIME_INDEX
);
1495 PMAP_SUPPORT_PROTOTYPES(
1497 mapping_replenish
, (uint32_t kern_target_count
, uint32_t user_target_count
), MAPPING_REPLENISH_INDEX
);
1499 PMAP_SUPPORT_PROTOTYPES(
1501 pmap_batch_set_cache_attributes
, (ppnum_t pn
,
1502 unsigned int cacheattr
,
1503 unsigned int page_cnt
,
1504 unsigned int page_index
,
1506 unsigned int *res
), PMAP_BATCH_SET_CACHE_ATTRIBUTES_INDEX
);
1508 PMAP_SUPPORT_PROTOTYPES(
1510 pmap_change_wiring
, (pmap_t pmap
,
1512 boolean_t wired
), PMAP_CHANGE_WIRING_INDEX
);
1514 PMAP_SUPPORT_PROTOTYPES(
1516 pmap_create_options
, (ledger_t ledger
,
1518 unsigned int flags
), PMAP_CREATE_INDEX
);
1520 PMAP_SUPPORT_PROTOTYPES(
1522 pmap_destroy
, (pmap_t pmap
), PMAP_DESTROY_INDEX
);
1524 PMAP_SUPPORT_PROTOTYPES(
1526 pmap_enter_options
, (pmap_t pmap
,
1530 vm_prot_t fault_type
,
1533 unsigned int options
), PMAP_ENTER_OPTIONS_INDEX
);
1535 PMAP_SUPPORT_PROTOTYPES(
1537 pmap_extract
, (pmap_t pmap
,
1538 vm_map_address_t va
), PMAP_EXTRACT_INDEX
);
1540 PMAP_SUPPORT_PROTOTYPES(
1542 pmap_find_phys
, (pmap_t pmap
,
1543 addr64_t va
), PMAP_FIND_PHYS_INDEX
);
1545 #if (__ARM_VMSA__ > 7)
1546 PMAP_SUPPORT_PROTOTYPES(
1548 pmap_insert_sharedpage
, (pmap_t pmap
), PMAP_INSERT_SHAREDPAGE_INDEX
);
1552 PMAP_SUPPORT_PROTOTYPES(
1554 pmap_is_empty
, (pmap_t pmap
,
1555 vm_map_offset_t va_start
,
1556 vm_map_offset_t va_end
), PMAP_IS_EMPTY_INDEX
);
1559 PMAP_SUPPORT_PROTOTYPES(
1561 pmap_map_cpu_windows_copy
, (ppnum_t pn
,
1563 unsigned int wimg_bits
), PMAP_MAP_CPU_WINDOWS_COPY_INDEX
);
1565 PMAP_SUPPORT_PROTOTYPES(
1567 pmap_nest
, (pmap_t grand
,
1571 uint64_t size
), PMAP_NEST_INDEX
);
1573 PMAP_SUPPORT_PROTOTYPES(
1575 pmap_page_protect_options
, (ppnum_t ppnum
,
1577 unsigned int options
), PMAP_PAGE_PROTECT_OPTIONS_INDEX
);
1579 PMAP_SUPPORT_PROTOTYPES(
1581 pmap_protect_options
, (pmap_t pmap
,
1582 vm_map_address_t start
,
1583 vm_map_address_t end
,
1585 unsigned int options
,
1586 void *args
), PMAP_PROTECT_OPTIONS_INDEX
);
1588 PMAP_SUPPORT_PROTOTYPES(
1590 pmap_query_page_info
, (pmap_t pmap
,
1592 int *disp_p
), PMAP_QUERY_PAGE_INFO_INDEX
);
1594 PMAP_SUPPORT_PROTOTYPES(
1596 pmap_query_resident
, (pmap_t pmap
,
1597 vm_map_address_t start
,
1598 vm_map_address_t end
,
1599 mach_vm_size_t
* compressed_bytes_p
), PMAP_QUERY_RESIDENT_INDEX
);
1601 PMAP_SUPPORT_PROTOTYPES(
1603 pmap_reference
, (pmap_t pmap
), PMAP_REFERENCE_INDEX
);
1605 PMAP_SUPPORT_PROTOTYPES(
1607 pmap_remove_options
, (pmap_t pmap
,
1608 vm_map_address_t start
,
1609 vm_map_address_t end
,
1610 int options
), PMAP_REMOVE_OPTIONS_INDEX
);
1612 PMAP_SUPPORT_PROTOTYPES(
1614 pmap_return
, (boolean_t do_panic
,
1615 boolean_t do_recurse
), PMAP_RETURN_INDEX
);
1617 PMAP_SUPPORT_PROTOTYPES(
1619 pmap_set_cache_attributes
, (ppnum_t pn
,
1620 unsigned int cacheattr
), PMAP_SET_CACHE_ATTRIBUTES_INDEX
);
1622 PMAP_SUPPORT_PROTOTYPES(
1624 pmap_update_compressor_page
, (ppnum_t pn
,
1625 unsigned int prev_cacheattr
, unsigned int new_cacheattr
), PMAP_UPDATE_COMPRESSOR_PAGE_INDEX
);
1627 PMAP_SUPPORT_PROTOTYPES(
1629 pmap_set_nested
, (pmap_t pmap
), PMAP_SET_NESTED_INDEX
);
1632 PMAP_SUPPORT_PROTOTYPES(
1634 pmap_set_process
, (pmap_t pmap
,
1636 char *procname
), PMAP_SET_PROCESS_INDEX
);
1639 PMAP_SUPPORT_PROTOTYPES(
1641 pmap_unmap_cpu_windows_copy
, (unsigned int index
), PMAP_UNMAP_CPU_WINDOWS_COPY_INDEX
);
1643 PMAP_SUPPORT_PROTOTYPES(
1645 pmap_unnest_options
, (pmap_t grand
,
1648 unsigned int option
), PMAP_UNNEST_OPTIONS_INDEX
);
1651 PMAP_SUPPORT_PROTOTYPES(
1653 phys_attribute_set
, (ppnum_t pn
,
1654 unsigned int bits
), PHYS_ATTRIBUTE_SET_INDEX
);
1657 PMAP_SUPPORT_PROTOTYPES(
1659 phys_attribute_clear
, (ppnum_t pn
,
1662 void *arg
), PHYS_ATTRIBUTE_CLEAR_INDEX
);
1664 PMAP_SUPPORT_PROTOTYPES(
1666 pmap_switch
, (pmap_t pmap
), PMAP_SWITCH_INDEX
);
1668 PMAP_SUPPORT_PROTOTYPES(
1670 pmap_switch_user_ttb
, (pmap_t pmap
), PMAP_SWITCH_USER_TTB_INDEX
);
1672 PMAP_SUPPORT_PROTOTYPES(
1674 pmap_clear_user_ttb
, (void), PMAP_CLEAR_USER_TTB_INDEX
);
1677 PMAP_SUPPORT_PROTOTYPES(
1679 pmap_set_jit_entitled
, (pmap_t pmap
), PMAP_SET_JIT_ENTITLED_INDEX
);
1681 PMAP_SUPPORT_PROTOTYPES(
1683 pmap_trim
, (pmap_t grand
,
1687 uint64_t size
), PMAP_TRIM_INDEX
);
1694 void pmap_footprint_suspend(vm_map_t map
,
1696 PMAP_SUPPORT_PROTOTYPES(
1698 pmap_footprint_suspend
, (vm_map_t map
,
1700 PMAP_FOOTPRINT_SUSPEND_INDEX
);
1704 boolean_t pgtrace_enabled
= 0;
1707 queue_chain_t chain
;
1710 * pmap - pmap for below addresses
1711 * ova - original va page address
1712 * cva - clone va addresses for pre, target and post pages
1713 * cva_spte - clone saved ptes
1714 * range - trace range in this map
1715 * cloned - has been cloned or not
1718 vm_map_offset_t ova
;
1719 vm_map_offset_t cva
[3];
1720 pt_entry_t cva_spte
[3];
1726 } pmap_pgtrace_map_t
;
1728 static void pmap_pgtrace_init(void);
1729 static bool pmap_pgtrace_enter_clone(pmap_t pmap
, vm_map_offset_t va_page
, vm_map_offset_t start
, vm_map_offset_t end
);
1730 static void pmap_pgtrace_remove_clone(pmap_t pmap
, pmap_paddr_t pa_page
, vm_map_offset_t va_page
);
1731 static void pmap_pgtrace_remove_all_clone(pmap_paddr_t pa
);
1734 #if (__ARM_VMSA__ > 7)
1736 * The low global vector page is mapped at a fixed alias.
1737 * Since the page size is 16k for H8 and newer we map the globals to a 16k
1738 * aligned address. Readers of the globals (e.g. lldb, panic server) need
1739 * to check both addresses anyway for backward compatibility. So for now
1740 * we leave H6 and H7 where they were.
1742 #if (ARM_PGSHIFT == 14)
1743 #define LOWGLOBAL_ALIAS (LOW_GLOBAL_BASE_ADDRESS + 0x4000)
1745 #define LOWGLOBAL_ALIAS (LOW_GLOBAL_BASE_ADDRESS + 0x2000)
1749 #define LOWGLOBAL_ALIAS (0xFFFF1000)
1752 long long alloc_tteroot_count
__attribute__((aligned(8))) MARK_AS_PMAP_DATA
= 0LL;
1753 long long alloc_ttepages_count
__attribute__((aligned(8))) MARK_AS_PMAP_DATA
= 0LL;
1754 long long alloc_ptepages_count
__attribute__((aligned(8))) MARK_AS_PMAP_DATA
= 0LL;
1755 long long alloc_pmap_pages_count
__attribute__((aligned(8))) = 0LL;
1757 int pt_fake_zone_index
= -1; /* index of pmap fake zone */
1762 * Allocates and initializes a per-CPU data structure for the pmap.
1764 MARK_AS_PMAP_TEXT
static void
1765 pmap_cpu_data_init_internal(unsigned int cpu_number
)
1767 pmap_cpu_data_t
* pmap_cpu_data
= pmap_get_cpu_data();
1769 pmap_cpu_data
->cpu_number
= cpu_number
;
1773 pmap_cpu_data_init(void)
1775 pmap_cpu_data_init_internal(cpu_number());
1779 pmap_cpu_data_array_init(void)
1782 pmap_cpu_data_init();
1786 pmap_get_cpu_data(void)
1788 pmap_cpu_data_t
* pmap_cpu_data
= NULL
;
1790 pmap_cpu_data
= &getCpuDatap()->cpu_pmap_cpu_data
;
1792 return pmap_cpu_data
;
1802 boolean_t found_page
;
1807 * pmap_pages_reclaim() is returning a page by freeing an active pt page.
1808 * To be eligible, a pt page is assigned to a user pmap. It doesn't have any wired pte
1809 * entry and it contains at least one valid pte entry.
1811 * In a loop, check for a page in the reclaimed pt page list.
1812 * if one is present, unlink that page and return the physical page address.
1813 * Otherwise, scan the pt page list for an eligible pt page to reclaim.
1814 * If found, invoke pmap_remove_range() on its pmap and address range then
1815 * deallocates that pt page. This will end up adding the pt page to the
1816 * reclaimed pt page list.
1817 * If no eligible page were found in the pt page list, panic.
1820 pmap_simple_lock(&pmap_pages_lock
);
1821 pmap_pages_request_count
++;
1822 pmap_pages_request_acum
++;
1825 if (pmap_pages_reclaim_list
!= (page_free_entry_t
*)NULL
) {
1826 page_free_entry_t
*page_entry
;
1828 page_entry
= pmap_pages_reclaim_list
;
1829 pmap_pages_reclaim_list
= pmap_pages_reclaim_list
->next
;
1830 pmap_simple_unlock(&pmap_pages_lock
);
1832 return (pmap_paddr_t
)ml_static_vtop((vm_offset_t
)page_entry
);
1835 pmap_simple_unlock(&pmap_pages_lock
);
1837 pmap_simple_lock(&pt_pages_lock
);
1838 ptdp
= (pt_desc_t
*)queue_first(&pt_page_list
);
1841 while (!queue_end(&pt_page_list
, (queue_entry_t
)ptdp
)) {
1842 if ((ptdp
->pmap
->nested
== FALSE
)
1843 && (pmap_simple_lock_try(&ptdp
->pmap
->lock
))) {
1844 assert(ptdp
->pmap
!= kernel_pmap
);
1845 unsigned refcnt_acc
= 0;
1846 unsigned wiredcnt_acc
= 0;
1848 for (i
= 0; i
< PT_INDEX_MAX
; i
++) {
1849 if (ptdp
->ptd_info
[i
].refcnt
== PT_DESC_REFCOUNT
) {
1850 /* Do not attempt to free a page that contains an L2 table */
1854 refcnt_acc
+= ptdp
->ptd_info
[i
].refcnt
;
1855 wiredcnt_acc
+= ptdp
->ptd_info
[i
].wiredcnt
;
1857 if ((wiredcnt_acc
== 0) && (refcnt_acc
!= 0)) {
1859 /* Leave ptdp->pmap locked here. We're about to reclaim
1860 * a tt page from it, so we don't want anyone else messing
1861 * with it while we do that. */
1864 pmap_simple_unlock(&ptdp
->pmap
->lock
);
1866 ptdp
= (pt_desc_t
*)queue_next((queue_t
)ptdp
);
1869 panic("%s: No eligible page in pt_page_list", __FUNCTION__
);
1871 int remove_count
= 0;
1872 bool need_strong_sync
= false;
1873 vm_map_address_t va
;
1875 pt_entry_t
*bpte
, *epte
;
1878 uint32_t rmv_spte
= 0;
1880 pmap_simple_unlock(&pt_pages_lock
);
1882 PMAP_ASSERT_LOCKED(pmap
); // pmap lock should be held from loop above
1884 __unused
const pt_attr_t
* const pt_attr
= pmap_get_pt_attr(pmap
);
1886 for (i
= 0; i
< PT_INDEX_MAX
; i
++) {
1887 va
= ptdp
->ptd_info
[i
].va
;
1889 /* If the VA is bogus, this may represent an unallocated region
1890 * or one which is in transition (already being freed or expanded).
1891 * Don't try to remove mappings here. */
1892 if (va
== (vm_offset_t
)-1) {
1896 tte_p
= pmap_tte(pmap
, va
);
1897 if ((tte_p
!= (tt_entry_t
*) NULL
)
1898 && ((*tte_p
& ARM_TTE_TYPE_MASK
) == ARM_TTE_TYPE_TABLE
)) {
1899 pte_p
= (pt_entry_t
*) ttetokv(*tte_p
);
1900 bpte
= &pte_p
[pte_index(pmap
, pt_attr
, va
)];
1901 epte
= bpte
+ PAGE_SIZE
/ sizeof(pt_entry_t
);
1903 * Use PMAP_OPTIONS_REMOVE to clear any
1904 * "compressed" markers and update the
1905 * "compressed" counter in pmap->stats.
1906 * This means that we lose accounting for
1907 * any compressed pages in this range
1908 * but the alternative is to not be able
1909 * to account for their future decompression,
1910 * which could cause the counter to drift
1913 remove_count
+= pmap_remove_range_options(
1914 pmap
, va
, bpte
, epte
,
1915 &rmv_spte
, &need_strong_sync
, PMAP_OPTIONS_REMOVE
);
1916 if (ptdp
->ptd_info
[ARM_PT_DESC_INDEX(pte_p
)].refcnt
!= 0) {
1917 panic("%s: ptdp %p, count %d", __FUNCTION__
, ptdp
, ptdp
->ptd_info
[ARM_PT_DESC_INDEX(pte_p
)].refcnt
);
1920 pmap_tte_deallocate(pmap
, tte_p
, PMAP_TT_TWIG_LEVEL
);
1922 if (remove_count
> 0) {
1923 pmap_get_pt_ops(pmap
)->flush_tlb_region_async(va
, (unsigned int)pt_attr_leaf_table_size(pt_attr
), pmap
);
1925 pmap_get_pt_ops(pmap
)->flush_tlb_tte_async(va
, pmap
);
1929 // Undo the lock we grabbed when we found ptdp above
1931 pmap_sync_tlb(need_strong_sync
);
1933 pmap_simple_lock(&pmap_pages_lock
);
1938 static kern_return_t
1944 vm_page_t m
= VM_PAGE_NULL
, m_prev
;
1946 if (option
& PMAP_PAGES_RECLAIM_NOWAIT
) {
1947 assert(size
== PAGE_SIZE
);
1948 *pa
= pmap_pages_reclaim();
1949 return KERN_SUCCESS
;
1951 if (size
== PAGE_SIZE
) {
1952 while ((m
= vm_page_grab()) == VM_PAGE_NULL
) {
1953 if (option
& PMAP_PAGES_ALLOCATE_NOWAIT
) {
1954 return KERN_RESOURCE_SHORTAGE
;
1959 vm_page_lock_queues();
1960 vm_page_wire(m
, VM_KERN_MEMORY_PTE
, TRUE
);
1961 vm_page_unlock_queues();
1963 if (size
== 2 * PAGE_SIZE
) {
1964 while (cpm_allocate(size
, &m
, 0, 1, TRUE
, 0) != KERN_SUCCESS
) {
1965 if (option
& PMAP_PAGES_ALLOCATE_NOWAIT
) {
1966 return KERN_RESOURCE_SHORTAGE
;
1973 *pa
= (pmap_paddr_t
)ptoa(VM_PAGE_GET_PHYS_PAGE(m
));
1975 vm_object_lock(pmap_object
);
1976 while (m
!= VM_PAGE_NULL
) {
1977 vm_page_insert_wired(m
, pmap_object
, (vm_object_offset_t
) ((ptoa(VM_PAGE_GET_PHYS_PAGE(m
))) - gPhysBase
), VM_KERN_MEMORY_PTE
);
1979 m
= NEXT_PAGE(m_prev
);
1980 *(NEXT_PAGE_PTR(m_prev
)) = VM_PAGE_NULL
;
1982 vm_object_unlock(pmap_object
);
1984 OSAddAtomic(size
>> PAGE_SHIFT
, &inuse_pmap_pages_count
);
1985 OSAddAtomic64(size
>> PAGE_SHIFT
, &alloc_pmap_pages_count
);
1987 return KERN_SUCCESS
;
1996 pmap_simple_lock(&pmap_pages_lock
);
1998 if (pmap_pages_request_count
!= 0) {
1999 page_free_entry_t
*page_entry
;
2001 pmap_pages_request_count
--;
2002 page_entry
= (page_free_entry_t
*)phystokv(pa
);
2003 page_entry
->next
= pmap_pages_reclaim_list
;
2004 pmap_pages_reclaim_list
= page_entry
;
2005 pmap_simple_unlock(&pmap_pages_lock
);
2010 pmap_simple_unlock(&pmap_pages_lock
);
2013 pmap_paddr_t pa_max
;
2015 OSAddAtomic(-(size
>> PAGE_SHIFT
), &inuse_pmap_pages_count
);
2017 for (pa_max
= pa
+ size
; pa
< pa_max
; pa
= pa
+ PAGE_SIZE
) {
2018 vm_object_lock(pmap_object
);
2019 m
= vm_page_lookup(pmap_object
, (pa
- gPhysBase
));
2020 assert(m
!= VM_PAGE_NULL
);
2021 assert(VM_PAGE_WIRED(m
));
2022 vm_page_lock_queues();
2024 vm_page_unlock_queues();
2025 vm_object_unlock(pmap_object
);
2031 pmap_t pmap
, int bytes
)
2033 pmap_ledger_credit(pmap
, task_ledgers
.tkm_private
, bytes
);
2041 pmap_ledger_debit(pmap
, task_ledgers
.tkm_private
, bytes
);
2045 pmap_tt_ledger_credit(
2049 if (pmap
!= kernel_pmap
) {
2050 pmap_ledger_credit(pmap
, task_ledgers
.phys_footprint
, size
);
2051 pmap_ledger_credit(pmap
, task_ledgers
.page_table
, size
);
2056 pmap_tt_ledger_debit(
2060 if (pmap
!= kernel_pmap
) {
2061 pmap_ledger_debit(pmap
, task_ledgers
.phys_footprint
, size
);
2062 pmap_ledger_debit(pmap
, task_ledgers
.page_table
, size
);
2067 alloc_asid(pmap_t pmap
)
2072 pmap_simple_lock(&asid_lock
);
2073 vasid
= bitmap_first(&asid_bitmap
[0], MAX_ASID
);
2075 pmap_simple_unlock(&asid_lock
);
2078 assert(vasid
< MAX_ASID
);
2079 bitmap_clear(&asid_bitmap
[0], (unsigned int)vasid
);
2080 pmap_simple_unlock(&asid_lock
);
2081 // bitmap_first() returns highest-order bits first, but a 0-based scheme works
2082 // slightly better with the collision detection scheme used by pmap_switch_internal().
2083 vasid
= MAX_ASID
- 1 - vasid
;
2084 hw_asid
= vasid
% MAX_HW_ASID
;
2085 pmap
->sw_asid
= vasid
/ MAX_HW_ASID
;
2086 hw_asid
+= 1; // Account for ASID 0, which is reserved for the kernel
2087 #if __ARM_KERNEL_PROTECT__
2088 hw_asid
<<= 1; // We're really handing out 2 hardware ASIDs, one for EL0 and one for EL1 access
2090 pmap
->hw_asid
= hw_asid
;
2095 free_asid(pmap_t pmap
)
2098 uint16_t hw_asid
= pmap
->hw_asid
;
2099 assert(hw_asid
!= 0); // Should not try to free kernel ASID
2101 #if __ARM_KERNEL_PROTECT__
2106 vasid
= ((unsigned int)pmap
->sw_asid
* MAX_HW_ASID
) + hw_asid
;
2107 vasid
= MAX_ASID
- 1 - vasid
;
2109 pmap_simple_lock(&asid_lock
);
2110 assert(!bitmap_test(&asid_bitmap
[0], vasid
));
2111 bitmap_set(&asid_bitmap
[0], vasid
);
2112 pmap_simple_unlock(&asid_lock
);
2116 #ifndef PMAP_PV_LOAD_FACTOR
2117 #define PMAP_PV_LOAD_FACTOR 1
2120 #define PV_LOW_WATER_MARK_DEFAULT (0x200 * PMAP_PV_LOAD_FACTOR)
2121 #define PV_KERN_LOW_WATER_MARK_DEFAULT (0x200 * PMAP_PV_LOAD_FACTOR)
2122 #define PV_ALLOC_CHUNK_INITIAL (0x200 * PMAP_PV_LOAD_FACTOR)
2123 #define PV_KERN_ALLOC_CHUNK_INITIAL (0x200 * PMAP_PV_LOAD_FACTOR)
2124 #define PV_ALLOC_INITIAL_TARGET (PV_ALLOC_CHUNK_INITIAL * 5)
2125 #define PV_KERN_ALLOC_INITIAL_TARGET (PV_KERN_ALLOC_CHUNK_INITIAL)
2127 uint32_t pv_free_count MARK_AS_PMAP_DATA
= 0;
2128 uint32_t pv_page_count MARK_AS_PMAP_DATA
= 0;
2129 uint32_t pv_kern_free_count MARK_AS_PMAP_DATA
= 0;
2131 uint32_t pv_low_water_mark MARK_AS_PMAP_DATA
;
2132 uint32_t pv_kern_low_water_mark MARK_AS_PMAP_DATA
;
2133 uint32_t pv_alloc_chunk MARK_AS_PMAP_DATA
;
2134 uint32_t pv_kern_alloc_chunk MARK_AS_PMAP_DATA
;
2136 thread_t mapping_replenish_thread
;
2137 event_t mapping_replenish_event
;
2138 volatile uint32_t mappingrecurse
= 0;
2140 unsigned pmap_mapping_thread_wakeups
;
2141 unsigned pmap_reserve_replenish_stat MARK_AS_PMAP_DATA
;
2142 unsigned pmap_kern_reserve_alloc_stat MARK_AS_PMAP_DATA
;
2149 simple_lock_init(&pv_free_list_lock
, 0);
2150 simple_lock_init(&pv_kern_free_list_lock
, 0);
2151 pv_free_list
= PV_ENTRY_NULL
;
2152 pv_free_count
= 0x0U
;
2153 pv_kern_free_list
= PV_ENTRY_NULL
;
2154 pv_kern_free_count
= 0x0U
;
2157 static inline void PV_ALLOC(pv_entry_t
**pv_ep
);
2158 static inline void PV_KERN_ALLOC(pv_entry_t
**pv_e
);
2159 static inline void PV_FREE_LIST(pv_entry_t
*pv_eh
, pv_entry_t
*pv_et
, int pv_cnt
, uint32_t kern_target
);
2168 PMAP_ASSERT_LOCKED(pmap
);
2170 ASSERT_PVH_LOCKED(pai
);
2172 if (PV_ENTRY_NULL
== *pvepp
) {
2173 if ((pmap
== NULL
) || (kernel_pmap
== pmap
)) {
2174 PV_KERN_ALLOC(pvepp
);
2176 if (PV_ENTRY_NULL
== *pvepp
) {
2190 ret
= pmap_pages_alloc(&pa
, PAGE_SIZE
, PMAP_PAGES_ALLOCATE_NOWAIT
);
2192 if (ret
== KERN_RESOURCE_SHORTAGE
) {
2193 ret
= pmap_pages_alloc(&pa
, PAGE_SIZE
, PMAP_PAGES_RECLAIM_NOWAIT
);
2196 if (ret
!= KERN_SUCCESS
) {
2197 panic("%s: failed to alloc page for kernel, ret=%d, "
2198 "pmap=%p, pai=%u, pvepp=%p",
2205 pv_e
= (pv_entry_t
*)phystokv(pa
);
2207 pv_eh
= pv_et
= PV_ENTRY_NULL
;
2211 for (j
= 1; j
< (PAGE_SIZE
/ sizeof(pv_entry_t
)); j
++) {
2212 pv_e
->pve_next
= pv_eh
;
2215 if (pv_et
== PV_ENTRY_NULL
) {
2221 PV_FREE_LIST(pv_eh
, pv_et
, pv_cnt
, pv_kern_low_water_mark
);
2240 ret
= pmap_pages_alloc(&pa
, PAGE_SIZE
, 0);
2242 if (ret
!= KERN_SUCCESS
) {
2243 panic("%s: failed to alloc page, ret=%d, "
2244 "pmap=%p, pai=%u, pvepp=%p",
2251 pv_e
= (pv_entry_t
*)phystokv(pa
);
2253 pv_eh
= pv_et
= PV_ENTRY_NULL
;
2257 for (j
= 1; j
< (PAGE_SIZE
/ sizeof(pv_entry_t
)); j
++) {
2258 pv_e
->pve_next
= pv_eh
;
2261 if (pv_et
== PV_ENTRY_NULL
) {
2268 PV_FREE_LIST(pv_eh
, pv_et
, pv_cnt
, pv_kern_low_water_mark
);
2275 assert(PV_ENTRY_NULL
!= *pvepp
);
2283 PV_FREE_LIST(pvep
, pvep
, 1, pv_kern_low_water_mark
);
2292 PV_FREE_LIST(pvehp
, pvetp
, cnt
, pv_kern_low_water_mark
);
2296 pv_water_mark_check(void)
2298 if (__improbable((pv_free_count
< pv_low_water_mark
) || (pv_kern_free_count
< pv_kern_low_water_mark
))) {
2299 if (!mappingrecurse
&& os_atomic_cmpxchg(&mappingrecurse
, 0, 1, acq_rel
)) {
2300 thread_wakeup(&mapping_replenish_event
);
2306 PV_ALLOC(pv_entry_t
**pv_ep
)
2308 assert(*pv_ep
== PV_ENTRY_NULL
);
2309 if (pv_kern_free_count
< pv_kern_low_water_mark
) {
2311 * If the kernel reserved pool is low, let non-kernel mappings wait for a page
2316 pmap_simple_lock(&pv_free_list_lock
);
2318 if ((*pv_ep
= pv_free_list
) != 0) {
2319 pv_free_list
= (pv_entry_t
*)(*pv_ep
)->pve_next
;
2320 (*pv_ep
)->pve_next
= PV_ENTRY_NULL
;
2324 pmap_simple_unlock(&pv_free_list_lock
);
2328 PV_FREE_LIST(pv_entry_t
*pv_eh
, pv_entry_t
*pv_et
, int pv_cnt
, uint32_t kern_target
)
2330 bool use_kernel_list
= false;
2331 pmap_simple_lock(&pv_kern_free_list_lock
);
2332 if (pv_kern_free_count
< kern_target
) {
2333 pv_et
->pve_next
= pv_kern_free_list
;
2334 pv_kern_free_list
= pv_eh
;
2335 pv_kern_free_count
+= pv_cnt
;
2336 use_kernel_list
= true;
2338 pmap_simple_unlock(&pv_kern_free_list_lock
);
2340 if (!use_kernel_list
) {
2341 pmap_simple_lock(&pv_free_list_lock
);
2342 pv_et
->pve_next
= (pv_entry_t
*)pv_free_list
;
2343 pv_free_list
= pv_eh
;
2344 pv_free_count
+= pv_cnt
;
2345 pmap_simple_unlock(&pv_free_list_lock
);
2350 PV_KERN_ALLOC(pv_entry_t
**pv_e
)
2352 assert(*pv_e
== PV_ENTRY_NULL
);
2353 pmap_simple_lock(&pv_kern_free_list_lock
);
2355 if ((*pv_e
= pv_kern_free_list
) != 0) {
2356 pv_kern_free_list
= (pv_entry_t
*)(*pv_e
)->pve_next
;
2357 (*pv_e
)->pve_next
= PV_ENTRY_NULL
;
2358 pv_kern_free_count
--;
2359 pmap_kern_reserve_alloc_stat
++;
2362 pmap_simple_unlock(&pv_kern_free_list_lock
);
2366 * Creates a target number of free pv_entry_t objects for the kernel free list
2367 * and the general free list.
2369 MARK_AS_PMAP_TEXT
static kern_return_t
2370 mapping_free_prime_internal(void)
2372 SECURITY_READ_ONLY_LATE(static boolean_t
) mapping_free_prime_internal_called
= FALSE
;
2373 SECURITY_READ_ONLY_LATE(static boolean_t
) mapping_free_prime_internal_done
= FALSE
;
2375 if (mapping_free_prime_internal_done
) {
2376 return KERN_FAILURE
;
2379 if (!mapping_free_prime_internal_called
) {
2380 mapping_free_prime_internal_called
= TRUE
;
2382 pv_low_water_mark
= PV_LOW_WATER_MARK_DEFAULT
;
2384 /* Alterable via sysctl */
2385 pv_kern_low_water_mark
= PV_KERN_LOW_WATER_MARK_DEFAULT
;
2387 pv_kern_alloc_chunk
= PV_KERN_ALLOC_CHUNK_INITIAL
;
2388 pv_alloc_chunk
= PV_ALLOC_CHUNK_INITIAL
;
2391 return mapping_replenish_internal(PV_KERN_ALLOC_INITIAL_TARGET
, PV_ALLOC_INITIAL_TARGET
);
2395 mapping_free_prime(void)
2397 kern_return_t kr
= KERN_FAILURE
;
2399 kr
= mapping_free_prime_internal();
2401 if (kr
!= KERN_SUCCESS
) {
2402 panic("%s: failed, kr=%d",
2407 void mapping_replenish(void);
2410 mapping_adjust(void)
2414 mres
= kernel_thread_start_priority((thread_continue_t
)mapping_replenish
, NULL
, MAXPRI_KERNEL
, &mapping_replenish_thread
);
2415 if (mres
!= KERN_SUCCESS
) {
2416 panic("%s: mapping_replenish thread creation failed",
2419 thread_deallocate(mapping_replenish_thread
);
2423 * Fills the kernel and general PV free lists back up to their low watermarks.
2425 MARK_AS_PMAP_TEXT
static kern_return_t
2426 mapping_replenish_internal(uint32_t kern_target_count
, uint32_t user_target_count
)
2434 kern_return_t ret
= KERN_SUCCESS
;
2436 while ((pv_free_count
< user_target_count
) || (pv_kern_free_count
< kern_target_count
)) {
2438 pv_eh
= pv_et
= PV_ENTRY_NULL
;
2440 ret
= pmap_pages_alloc(&pa
, PAGE_SIZE
, 0);
2441 assert(ret
== KERN_SUCCESS
);
2445 pv_e
= (pv_entry_t
*)phystokv(pa
);
2447 for (j
= 0; j
< (PAGE_SIZE
/ sizeof(pv_entry_t
)); j
++) {
2448 pv_e
->pve_next
= pv_eh
;
2451 if (pv_et
== PV_ENTRY_NULL
) {
2457 pmap_reserve_replenish_stat
+= pv_cnt
;
2458 PV_FREE_LIST(pv_eh
, pv_et
, pv_cnt
, kern_target_count
);
2465 * Continuation function that keeps the PV free lists from running out of free
2468 __attribute__((noreturn
))
2470 mapping_replenish(void)
2474 /* We qualify for VM privileges...*/
2475 current_thread()->options
|= TH_OPT_VMPRIV
;
2478 kr
= mapping_replenish_internal(pv_kern_low_water_mark
, pv_low_water_mark
);
2480 if (kr
!= KERN_SUCCESS
) {
2481 panic("%s: failed, kr=%d", __FUNCTION__
, kr
);
2484 /* Check if the kernel pool has been depleted since the
2485 * first pass, to reduce refill latency.
2487 if (pv_kern_free_count
< pv_kern_low_water_mark
) {
2490 /* Block sans continuation to avoid yielding kernel stack */
2491 assert_wait(&mapping_replenish_event
, THREAD_UNINT
);
2493 thread_block(THREAD_CONTINUE_NULL
);
2494 pmap_mapping_thread_wakeups
++;
2502 unsigned int ptd_cnt
)
2504 simple_lock_init(&ptd_free_list_lock
, 0);
2505 while (ptd_cnt
!= 0) {
2506 (*(void **)ptdp
) = (void *)ptd_free_list
;
2507 ptd_free_list
= ptdp
;
2512 ptd_preboot
= FALSE
;
2516 ptd_alloc_unlinked(bool reclaim
)
2522 pmap_simple_lock(&ptd_free_list_lock
);
2525 if (ptd_free_count
== 0) {
2526 unsigned int ptd_cnt
;
2527 pt_desc_t
*ptdp_next
;
2530 ptdp
= (pt_desc_t
*)avail_start
;
2531 avail_start
+= ARM_PGBYTES
;
2533 ptd_cnt
= ARM_PGBYTES
/ sizeof(pt_desc_t
);
2538 pmap_simple_unlock(&ptd_free_list_lock
);
2540 if (pmap_pages_alloc(&pa
, PAGE_SIZE
, PMAP_PAGES_ALLOCATE_NOWAIT
) != KERN_SUCCESS
) {
2542 ret
= pmap_pages_alloc(&pa
, PAGE_SIZE
, PMAP_PAGES_RECLAIM_NOWAIT
);
2543 assert(ret
== KERN_SUCCESS
);
2548 ptdp
= (pt_desc_t
*)phystokv(pa
);
2550 pmap_simple_lock(&ptd_free_list_lock
);
2552 ptd_cnt
= PAGE_SIZE
/ sizeof(pt_desc_t
);
2555 while (ptd_cnt
!= 0) {
2556 (*(void **)ptdp_next
) = (void *)ptd_free_list
;
2557 ptd_free_list
= ptdp_next
;
2564 if ((ptdp
= ptd_free_list
) != PTD_ENTRY_NULL
) {
2565 ptd_free_list
= (pt_desc_t
*)(*(void **)ptdp
);
2568 panic("%s: out of ptd entry",
2573 pmap_simple_unlock(&ptd_free_list_lock
);
2576 ptdp
->pt_page
.next
= NULL
;
2577 ptdp
->pt_page
.prev
= NULL
;
2580 for (i
= 0; i
< PT_INDEX_MAX
; i
++) {
2581 ptdp
->ptd_info
[i
].va
= (vm_offset_t
)-1;
2582 ptdp
->ptd_info
[i
].refcnt
= 0;
2583 ptdp
->ptd_info
[i
].wiredcnt
= 0;
2589 static inline pt_desc_t
*
2590 ptd_alloc(pmap_t pmap
, bool reclaim
)
2592 pt_desc_t
*ptdp
= ptd_alloc_unlinked(reclaim
);
2599 if (pmap
!= kernel_pmap
) {
2600 /* We should never try to reclaim kernel pagetable pages in
2601 * pmap_pages_reclaim(), so don't enter them into the list. */
2602 pmap_simple_lock(&pt_pages_lock
);
2603 queue_enter(&pt_page_list
, ptdp
, pt_desc_t
*, pt_page
);
2604 pmap_simple_unlock(&pt_pages_lock
);
2607 pmap_tt_ledger_credit(pmap
, sizeof(*ptdp
));
2612 ptd_deallocate(pt_desc_t
*ptdp
)
2614 pmap_t pmap
= ptdp
->pmap
;
2617 panic("%s: early boot, "
2623 if (ptdp
->pt_page
.next
!= NULL
) {
2624 pmap_simple_lock(&pt_pages_lock
);
2625 queue_remove(&pt_page_list
, ptdp
, pt_desc_t
*, pt_page
);
2626 pmap_simple_unlock(&pt_pages_lock
);
2628 pmap_simple_lock(&ptd_free_list_lock
);
2629 (*(void **)ptdp
) = (void *)ptd_free_list
;
2630 ptd_free_list
= (pt_desc_t
*)ptdp
;
2632 pmap_simple_unlock(&ptd_free_list_lock
);
2634 pmap_tt_ledger_debit(pmap
, sizeof(*ptdp
));
2642 vm_map_address_t va
,
2646 if (ptdp
->pmap
!= pmap
) {
2647 panic("%s: pmap mismatch, "
2648 "ptdp=%p, pmap=%p, va=%p, level=%u, pte_p=%p",
2650 ptdp
, pmap
, (void*)va
, level
, pte_p
);
2653 #if (__ARM_VMSA__ == 7)
2655 ptdp
->ptd_info
[ARM_PT_DESC_INDEX(pte_p
)].va
= (vm_offset_t
) va
& ~(ARM_TT_L1_PT_OFFMASK
);
2657 assert(level
> pt_attr_root_level(pmap_get_pt_attr(pmap
)));
2658 ptdp
->ptd_info
[ARM_PT_DESC_INDEX(pte_p
)].va
= (vm_offset_t
) va
& ~(pt_attr_ln_offmask(pmap_get_pt_attr(pmap
), level
- 1));
2660 if (level
< PMAP_TT_MAX_LEVEL
) {
2661 ptdp
->ptd_info
[ARM_PT_DESC_INDEX(pte_p
)].refcnt
= PT_DESC_REFCOUNT
;
2670 return pa_valid(addr
);
2673 #if (__ARM_VMSA__ == 7)
2676 * Given an offset and a map, compute the address of the
2677 * corresponding translation table entry.
2679 static inline tt_entry_t
*
2680 pmap_tte(pmap_t pmap
,
2681 vm_map_address_t addr
)
2683 __unused
const pt_attr_t
* const pt_attr
= pmap_get_pt_attr(pmap
);
2685 if (!(tte_index(pmap
, pt_attr
, addr
) < pmap
->tte_index_max
)) {
2686 return (tt_entry_t
*)NULL
;
2688 return &pmap
->tte
[tte_index(pmap
, pt_attr
, addr
)];
2693 * Given an offset and a map, compute the address of the
2694 * pte. If the address is invalid with respect to the map
2695 * then PT_ENTRY_NULL is returned (and the map may need to grow).
2697 * This is only used internally.
2699 static inline pt_entry_t
*
2702 vm_map_address_t addr
)
2708 ttp
= pmap_tte(pmap
, addr
);
2709 if (ttp
== (tt_entry_t
*)NULL
) {
2710 return PT_ENTRY_NULL
;
2714 if ((tte
& ARM_TTE_TYPE_MASK
) == ARM_TTE_TYPE_BLOCK
) {
2715 panic("%s: Attempt to demote L1 block, tte=0x%lx, "
2717 __FUNCTION__
, (unsigned long)tte
,
2721 if ((tte
& ARM_TTE_TYPE_MASK
) != ARM_TTE_TYPE_TABLE
) {
2722 return PT_ENTRY_NULL
;
2724 ptp
= (pt_entry_t
*) ttetokv(tte
) + ptenum(addr
);
2728 __unused
static inline tt_entry_t
*
2729 pmap_ttne(pmap_t pmap
,
2730 unsigned int target_level
,
2731 vm_map_address_t addr
)
2733 tt_entry_t
* ret_ttep
= NULL
;
2735 switch (target_level
) {
2737 ret_ttep
= pmap_tte(pmap
, addr
);
2740 ret_ttep
= (tt_entry_t
*)pmap_pte(pmap
, addr
);
2743 panic("%s: bad level, "
2744 "pmap=%p, target_level=%u, addr=%p",
2746 pmap
, target_level
, (void *)addr
);
2754 static inline tt_entry_t
*
2755 pmap_ttne(pmap_t pmap
,
2756 unsigned int target_level
,
2757 vm_map_address_t addr
)
2759 tt_entry_t
* ttp
= NULL
;
2760 tt_entry_t
* ttep
= NULL
;
2761 tt_entry_t tte
= ARM_TTE_EMPTY
;
2762 unsigned int cur_level
;
2764 const pt_attr_t
* const pt_attr
= pmap_get_pt_attr(pmap
);
2768 assert(target_level
<= pt_attr
->pta_max_level
);
2770 for (cur_level
= pt_attr
->pta_root_level
; cur_level
<= target_level
; cur_level
++) {
2771 ttep
= &ttp
[ttn_index(pmap
, pt_attr
, addr
, cur_level
)];
2773 if (cur_level
== target_level
) {
2780 if ((tte
& (ARM_TTE_TYPE_MASK
| ARM_TTE_VALID
)) == (ARM_TTE_TYPE_BLOCK
| ARM_TTE_VALID
)) {
2781 panic("%s: Attempt to demote L%u block, tte=0x%llx, "
2782 "pmap=%p, target_level=%u, addr=%p",
2783 __FUNCTION__
, cur_level
, tte
,
2784 pmap
, target_level
, (void*)addr
);
2787 if ((tte
& (ARM_TTE_TYPE_MASK
| ARM_TTE_VALID
)) != (ARM_TTE_TYPE_TABLE
| ARM_TTE_VALID
)) {
2788 return TT_ENTRY_NULL
;
2791 ttp
= (tt_entry_t
*)phystokv(tte
& ARM_TTE_TABLE_MASK
);
2798 * Given an offset and a map, compute the address of level 1 translation table entry.
2799 * If the tranlation is invalid then PT_ENTRY_NULL is returned.
2801 static inline tt_entry_t
*
2802 pmap_tt1e(pmap_t pmap
,
2803 vm_map_address_t addr
)
2805 return pmap_ttne(pmap
, PMAP_TT_L1_LEVEL
, addr
);
2809 * Given an offset and a map, compute the address of level 2 translation table entry.
2810 * If the tranlation is invalid then PT_ENTRY_NULL is returned.
2812 static inline tt_entry_t
*
2813 pmap_tt2e(pmap_t pmap
,
2814 vm_map_address_t addr
)
2816 return pmap_ttne(pmap
, PMAP_TT_L2_LEVEL
, addr
);
2821 * Given an offset and a map, compute the address of level 3 translation table entry.
2822 * If the tranlation is invalid then PT_ENTRY_NULL is returned.
2824 static inline pt_entry_t
*
2827 vm_map_address_t addr
)
2829 return (pt_entry_t
*)pmap_ttne(pmap
, PMAP_TT_L3_LEVEL
, addr
);
2832 static inline tt_entry_t
*
2835 vm_map_address_t addr
)
2837 return pmap_tt2e(pmap
, addr
);
2840 static inline pt_entry_t
*
2843 vm_map_address_t addr
)
2845 return pmap_tt3e(pmap
, addr
);
2856 * Map memory at initialization. The physical addresses being
2857 * mapped are not managed and are never unmapped.
2859 * For now, VM is already on, we only need to map the
2864 vm_map_address_t virt
,
2874 while (start
< end
) {
2875 kr
= pmap_enter(kernel_pmap
, virt
, (ppnum_t
)atop(start
),
2876 prot
, VM_PROT_NONE
, flags
, FALSE
);
2878 if (kr
!= KERN_SUCCESS
) {
2879 panic("%s: failed pmap_enter, "
2880 "virt=%p, start_addr=%p, end_addr=%p, prot=%#x, flags=%#x",
2882 (void *) virt
, (void *) start
, (void *) end
, prot
, flags
);
2892 pmap_map_bd_with_options(
2893 vm_map_address_t virt
,
2901 vm_map_address_t vaddr
;
2903 pt_entry_t mem_attr
;
2905 switch (options
& PMAP_MAP_BD_MASK
) {
2906 case PMAP_MAP_BD_WCOMB
:
2907 mem_attr
= ARM_PTE_ATTRINDX(CACHE_ATTRINDX_WRITECOMB
);
2908 #if (__ARM_VMSA__ > 7)
2909 mem_attr
|= ARM_PTE_SH(SH_OUTER_MEMORY
);
2911 mem_attr
|= ARM_PTE_SH
;
2914 case PMAP_MAP_BD_POSTED
:
2915 mem_attr
= ARM_PTE_ATTRINDX(CACHE_ATTRINDX_POSTED
);
2917 case PMAP_MAP_BD_POSTED_REORDERED
:
2918 mem_attr
= ARM_PTE_ATTRINDX(CACHE_ATTRINDX_POSTED_REORDERED
);
2920 case PMAP_MAP_BD_POSTED_COMBINED_REORDERED
:
2921 mem_attr
= ARM_PTE_ATTRINDX(CACHE_ATTRINDX_POSTED_COMBINED_REORDERED
);
2924 mem_attr
= ARM_PTE_ATTRINDX(CACHE_ATTRINDX_DISABLE
);
2928 tmplate
= pa_to_pte(start
) | ARM_PTE_AP((prot
& VM_PROT_WRITE
) ? AP_RWNA
: AP_RONA
) |
2929 mem_attr
| ARM_PTE_TYPE
| ARM_PTE_NX
| ARM_PTE_PNX
| ARM_PTE_AF
;
2930 #if __ARM_KERNEL_PROTECT__
2931 tmplate
|= ARM_PTE_NG
;
2932 #endif /* __ARM_KERNEL_PROTECT__ */
2936 while (paddr
< end
) {
2937 ptep
= pmap_pte(kernel_pmap
, vaddr
);
2938 if (ptep
== PT_ENTRY_NULL
) {
2939 panic("%s: no PTE for vaddr=%p, "
2940 "virt=%p, start=%p, end=%p, prot=0x%x, options=0x%x",
2941 __FUNCTION__
, (void*)vaddr
,
2942 (void*)virt
, (void*)start
, (void*)end
, prot
, options
);
2945 assert(!ARM_PTE_IS_COMPRESSED(*ptep
, ptep
));
2946 WRITE_PTE_STRONG(ptep
, tmplate
);
2948 pte_increment_pa(tmplate
);
2954 flush_mmu_tlb_region(virt
, (unsigned)(end
- start
));
2961 * Back-door routine for mapping kernel VM at initialization.
2962 * Useful for mapping memory outside the range
2963 * [vm_first_phys, vm_last_phys] (i.e., devices).
2964 * Otherwise like pmap_map.
2968 vm_map_address_t virt
,
2975 vm_map_address_t vaddr
;
2978 /* not cacheable and not buffered */
2979 tmplate
= pa_to_pte(start
)
2980 | ARM_PTE_TYPE
| ARM_PTE_AF
| ARM_PTE_NX
| ARM_PTE_PNX
2981 | ARM_PTE_AP((prot
& VM_PROT_WRITE
) ? AP_RWNA
: AP_RONA
)
2982 | ARM_PTE_ATTRINDX(CACHE_ATTRINDX_DISABLE
);
2983 #if __ARM_KERNEL_PROTECT__
2984 tmplate
|= ARM_PTE_NG
;
2985 #endif /* __ARM_KERNEL_PROTECT__ */
2989 while (paddr
< end
) {
2990 ptep
= pmap_pte(kernel_pmap
, vaddr
);
2991 if (ptep
== PT_ENTRY_NULL
) {
2992 panic("pmap_map_bd");
2994 assert(!ARM_PTE_IS_COMPRESSED(*ptep
, ptep
));
2995 WRITE_PTE_STRONG(ptep
, tmplate
);
2997 pte_increment_pa(tmplate
);
3003 flush_mmu_tlb_region(virt
, (unsigned)(end
- start
));
3010 * Back-door routine for mapping kernel VM at initialization.
3011 * Useful for mapping memory specific physical addresses in early
3012 * boot (i.e., before kernel_map is initialized).
3014 * Maps are in the VM_HIGH_KERNEL_WINDOW area.
3018 pmap_map_high_window_bd(
3019 vm_offset_t pa_start
,
3023 pt_entry_t
*ptep
, pte
;
3024 #if (__ARM_VMSA__ == 7)
3025 vm_map_address_t va_start
= VM_HIGH_KERNEL_WINDOW
;
3026 vm_map_address_t va_max
= VM_MAX_KERNEL_ADDRESS
;
3028 vm_map_address_t va_start
= VREGION1_START
;
3029 vm_map_address_t va_max
= VREGION1_START
+ VREGION1_SIZE
;
3031 vm_map_address_t va_end
;
3032 vm_map_address_t va
;
3035 offset
= pa_start
& PAGE_MASK
;
3039 if (len
> (va_max
- va_start
)) {
3040 panic("%s: area too large, "
3041 "pa_start=%p, len=%p, prot=0x%x",
3043 (void*)pa_start
, (void*)len
, prot
);
3047 for (; va_start
< va_max
; va_start
+= PAGE_SIZE
) {
3048 ptep
= pmap_pte(kernel_pmap
, va_start
);
3049 assert(!ARM_PTE_IS_COMPRESSED(*ptep
, ptep
));
3050 if (*ptep
== ARM_PTE_TYPE_FAULT
) {
3054 if (va_start
> va_max
) {
3055 panic("%s: insufficient pages, "
3056 "pa_start=%p, len=%p, prot=0x%x",
3058 (void*)pa_start
, (void*)len
, prot
);
3061 for (va_end
= va_start
+ PAGE_SIZE
; va_end
< va_start
+ len
; va_end
+= PAGE_SIZE
) {
3062 ptep
= pmap_pte(kernel_pmap
, va_end
);
3063 assert(!ARM_PTE_IS_COMPRESSED(*ptep
, ptep
));
3064 if (*ptep
!= ARM_PTE_TYPE_FAULT
) {
3065 va_start
= va_end
+ PAGE_SIZE
;
3070 for (va
= va_start
; va
< va_end
; va
+= PAGE_SIZE
, pa_start
+= PAGE_SIZE
) {
3071 ptep
= pmap_pte(kernel_pmap
, va
);
3072 pte
= pa_to_pte(pa_start
)
3073 | ARM_PTE_TYPE
| ARM_PTE_AF
| ARM_PTE_NX
| ARM_PTE_PNX
3074 | ARM_PTE_AP((prot
& VM_PROT_WRITE
) ? AP_RWNA
: AP_RONA
)
3075 | ARM_PTE_ATTRINDX(CACHE_ATTRINDX_DEFAULT
);
3076 #if (__ARM_VMSA__ > 7)
3077 pte
|= ARM_PTE_SH(SH_OUTER_MEMORY
);
3081 #if __ARM_KERNEL_PROTECT__
3083 #endif /* __ARM_KERNEL_PROTECT__ */
3084 WRITE_PTE_STRONG(ptep
, pte
);
3086 PMAP_UPDATE_TLBS(kernel_pmap
, va_start
, va_start
+ len
, false);
3088 kasan_notify_address(va_start
, len
);
3093 #define PMAP_ALIGN(addr, align) ((addr) + ((align) - 1) & ~((align) - 1))
3096 pmap_compute_io_rgns(void)
3099 pmap_io_range_t
*ranges
;
3103 unsigned int prop_size
;
3105 err
= DTLookupEntry(NULL
, "/defaults", &entry
);
3106 assert(err
== kSuccess
);
3108 if (kSuccess
!= DTGetProperty(entry
, "pmap-io-ranges", &prop
, &prop_size
)) {
3113 for (unsigned int i
= 0; i
< (prop_size
/ sizeof(*ranges
)); ++i
) {
3114 if (ranges
[i
].addr
& PAGE_MASK
) {
3115 panic("pmap I/O region %u addr 0x%llx is not page-aligned", i
, ranges
[i
].addr
);
3117 if (ranges
[i
].len
& PAGE_MASK
) {
3118 panic("pmap I/O region %u length 0x%llx is not page-aligned", i
, ranges
[i
].len
);
3120 if (os_add_overflow(ranges
[i
].addr
, ranges
[i
].len
, &rgn_end
)) {
3121 panic("pmap I/O region %u addr 0x%llx length 0x%llx wraps around", i
, ranges
[i
].addr
, ranges
[i
].len
);
3123 if (((ranges
[i
].addr
<= gPhysBase
) && (rgn_end
> gPhysBase
)) ||
3124 ((ranges
[i
].addr
< avail_end
) && (rgn_end
>= avail_end
)) ||
3125 ((ranges
[i
].addr
> gPhysBase
) && (rgn_end
< avail_end
))) {
3126 panic("pmap I/O region %u addr 0x%llx length 0x%llx overlaps physical memory", i
, ranges
[i
].addr
, ranges
[i
].len
);
3132 return num_io_rgns
* sizeof(*ranges
);
3136 * return < 0 for a < b
3140 typedef int (*cmpfunc_t
)(const void *a
, const void *b
);
3143 qsort(void *a
, size_t n
, size_t es
, cmpfunc_t cmp
);
3146 cmp_io_rgns(const void *a
, const void *b
)
3148 const pmap_io_range_t
*range_a
= a
;
3149 const pmap_io_range_t
*range_b
= b
;
3150 if ((range_b
->addr
+ range_b
->len
) <= range_a
->addr
) {
3152 } else if ((range_a
->addr
+ range_a
->len
) <= range_b
->addr
) {
3160 pmap_load_io_rgns(void)
3163 pmap_io_range_t
*ranges
;
3166 unsigned int prop_size
;
3168 if (num_io_rgns
== 0) {
3172 err
= DTLookupEntry(NULL
, "/defaults", &entry
);
3173 assert(err
== kSuccess
);
3175 err
= DTGetProperty(entry
, "pmap-io-ranges", &prop
, &prop_size
);
3176 assert(err
== kSuccess
);
3179 for (unsigned int i
= 0; i
< (prop_size
/ sizeof(*ranges
)); ++i
) {
3180 io_attr_table
[i
] = ranges
[i
];
3183 qsort(io_attr_table
, num_io_rgns
, sizeof(*ranges
), cmp_io_rgns
);
3188 * pmap_get_arm64_prot
3190 * return effective armv8 VMSA block protections including
3191 * table AP/PXN/XN overrides of a pmap entry
3196 pmap_get_arm64_prot(
3201 unsigned int level
= 0;
3202 uint64_t tte_type
= 0;
3203 uint64_t effective_prot_bits
= 0;
3204 uint64_t aggregate_tte
= 0;
3205 uint64_t table_ap_bits
= 0, table_xn
= 0, table_pxn
= 0;
3206 const pt_attr_t
* const pt_attr
= pmap_get_pt_attr(pmap
);
3208 for (level
= pt_attr
->pta_root_level
; level
<= pt_attr
->pta_max_level
; level
++) {
3209 tte
= *pmap_ttne(pmap
, level
, addr
);
3211 if (!(tte
& ARM_TTE_VALID
)) {
3215 tte_type
= tte
& ARM_TTE_TYPE_MASK
;
3217 if ((tte_type
== ARM_TTE_TYPE_BLOCK
) ||
3218 (level
== pt_attr
->pta_max_level
)) {
3219 /* Block or page mapping; both have the same protection bit layout. */
3221 } else if (tte_type
== ARM_TTE_TYPE_TABLE
) {
3222 /* All of the table bits we care about are overrides, so just OR them together. */
3223 aggregate_tte
|= tte
;
3227 table_ap_bits
= ((aggregate_tte
>> ARM_TTE_TABLE_APSHIFT
) & AP_MASK
);
3228 table_xn
= (aggregate_tte
& ARM_TTE_TABLE_XN
);
3229 table_pxn
= (aggregate_tte
& ARM_TTE_TABLE_PXN
);
3231 /* Start with the PTE bits. */
3232 effective_prot_bits
= tte
& (ARM_PTE_APMASK
| ARM_PTE_NX
| ARM_PTE_PNX
);
3234 /* Table AP bits mask out block/page AP bits */
3235 effective_prot_bits
&= ~(ARM_PTE_AP(table_ap_bits
));
3237 /* XN/PXN bits can be OR'd in. */
3238 effective_prot_bits
|= (table_xn
? ARM_PTE_NX
: 0);
3239 effective_prot_bits
|= (table_pxn
? ARM_PTE_PNX
: 0);
3241 return effective_prot_bits
;
3243 #endif /* __arm64__ */
3247 * Bootstrap the system enough to run with virtual memory.
3249 * The early VM initialization code has already allocated
3250 * the first CPU's translation table and made entries for
3251 * all the one-to-one mappings to be found there.
3253 * We must set up the kernel pmap structures, the
3254 * physical-to-virtual translation lookup tables for the
3255 * physical memory to be managed (between avail_start and
3258 * Map the kernel's code and data, and allocate the system page table.
3259 * Page_size must already be set.
3262 * first_avail first available physical page -
3263 * after kernel page tables
3264 * avail_start PA of first managed physical page
3265 * avail_end PA of last managed physical page
3272 pmap_paddr_t pmap_struct_start
;
3273 vm_size_t pv_head_size
;
3274 vm_size_t ptd_root_table_size
;
3275 vm_size_t pp_attr_table_size
;
3276 vm_size_t io_attr_table_size
;
3277 unsigned int npages
;
3278 vm_map_offset_t maxoffset
;
3280 lck_grp_init(&pmap_lck_grp
, "pmap", LCK_GRP_ATTR_NULL
);
3283 #if DEVELOPMENT || DEBUG
3284 if (PE_parse_boot_argn("pmap_trace", &pmap_trace_mask
, sizeof(pmap_trace_mask
))) {
3285 kprintf("Kernel traces for pmap operations enabled\n");
3290 * Initialize the kernel pmap.
3293 #if ARM_PARAMETERIZED_PMAP
3294 kernel_pmap
->pmap_pt_attr
= native_pt_attr
;
3295 #endif /* ARM_PARAMETERIZED_PMAP */
3297 kernel_pmap
->disable_jop
= 0;
3298 #endif /* HAS_APPLE_PAC */
3299 kernel_pmap
->tte
= cpu_tte
;
3300 kernel_pmap
->ttep
= cpu_ttep
;
3301 #if (__ARM_VMSA__ > 7)
3302 kernel_pmap
->min
= ARM64_TTBR1_MIN_ADDR
;
3304 kernel_pmap
->min
= VM_MIN_KERNEL_AND_KEXT_ADDRESS
;
3306 kernel_pmap
->max
= VM_MAX_KERNEL_ADDRESS
;
3307 os_atomic_init(&kernel_pmap
->ref_count
, 1);
3308 kernel_pmap
->gc_status
= 0;
3309 kernel_pmap
->nx_enabled
= TRUE
;
3311 kernel_pmap
->is_64bit
= TRUE
;
3313 kernel_pmap
->is_64bit
= FALSE
;
3315 kernel_pmap
->stamp
= os_atomic_inc(&pmap_stamp
, relaxed
);
3317 kernel_pmap
->nested_region_grand_addr
= 0x0ULL
;
3318 kernel_pmap
->nested_region_subord_addr
= 0x0ULL
;
3319 kernel_pmap
->nested_region_size
= 0x0ULL
;
3320 kernel_pmap
->nested_region_asid_bitmap
= NULL
;
3321 kernel_pmap
->nested_region_asid_bitmap_size
= 0x0UL
;
3323 #if (__ARM_VMSA__ == 7)
3324 kernel_pmap
->tte_index_max
= 4 * NTTES
;
3326 kernel_pmap
->hw_asid
= 0;
3327 kernel_pmap
->sw_asid
= 0;
3329 PMAP_LOCK_INIT(kernel_pmap
);
3330 memset((void *) &kernel_pmap
->stats
, 0, sizeof(kernel_pmap
->stats
));
3332 /* allocate space for and initialize the bookkeeping structures */
3333 io_attr_table_size
= pmap_compute_io_rgns();
3334 npages
= (unsigned int)atop(mem_size
);
3335 pp_attr_table_size
= npages
* sizeof(pp_attr_t
);
3336 pv_head_size
= round_page(sizeof(pv_entry_t
*) * npages
);
3337 // allocate enough initial PTDs to map twice the available physical memory
3338 ptd_root_table_size
= sizeof(pt_desc_t
) * (mem_size
/ ((PAGE_SIZE
/ sizeof(pt_entry_t
)) * ARM_PGBYTES
)) * 2;
3340 pmap_struct_start
= avail_start
;
3342 pp_attr_table
= (pp_attr_t
*) phystokv(avail_start
);
3343 avail_start
= PMAP_ALIGN(avail_start
+ pp_attr_table_size
, __alignof(pp_attr_t
));
3344 io_attr_table
= (pmap_io_range_t
*) phystokv(avail_start
);
3345 avail_start
= PMAP_ALIGN(avail_start
+ io_attr_table_size
, __alignof(pv_entry_t
*));
3346 pv_head_table
= (pv_entry_t
**) phystokv(avail_start
);
3347 avail_start
= PMAP_ALIGN(avail_start
+ pv_head_size
, __alignof(pt_desc_t
));
3348 ptd_root_table
= (pt_desc_t
*)phystokv(avail_start
);
3349 avail_start
= round_page(avail_start
+ ptd_root_table_size
);
3351 memset((char *)phystokv(pmap_struct_start
), 0, avail_start
- pmap_struct_start
);
3353 pmap_load_io_rgns();
3354 ptd_bootstrap(ptd_root_table
, (unsigned int)(ptd_root_table_size
/ sizeof(pt_desc_t
)));
3356 pmap_cpu_data_array_init();
3358 vm_first_phys
= gPhysBase
;
3359 vm_last_phys
= trunc_page(avail_end
);
3361 simple_lock_init(&pmaps_lock
, 0);
3362 simple_lock_init(&asid_lock
, 0);
3363 simple_lock_init(&tt1_lock
, 0);
3364 queue_init(&map_pmap_list
);
3365 queue_enter(&map_pmap_list
, kernel_pmap
, pmap_t
, pmaps
);
3366 free_page_size_tt_list
= TT_FREE_ENTRY_NULL
;
3367 free_page_size_tt_count
= 0;
3368 free_page_size_tt_max
= 0;
3369 free_two_page_size_tt_list
= TT_FREE_ENTRY_NULL
;
3370 free_two_page_size_tt_count
= 0;
3371 free_two_page_size_tt_max
= 0;
3372 free_tt_list
= TT_FREE_ENTRY_NULL
;
3376 simple_lock_init(&pt_pages_lock
, 0);
3377 queue_init(&pt_page_list
);
3379 simple_lock_init(&pmap_pages_lock
, 0);
3380 pmap_pages_request_count
= 0;
3381 pmap_pages_request_acum
= 0;
3382 pmap_pages_reclaim_list
= PAGE_FREE_ENTRY_NULL
;
3384 virtual_space_start
= vstart
;
3385 virtual_space_end
= VM_MAX_KERNEL_ADDRESS
;
3387 bitmap_full(&asid_bitmap
[0], MAX_ASID
);
3391 if (PE_parse_boot_argn("arm_maxoffset", &maxoffset
, sizeof(maxoffset
))) {
3392 maxoffset
= trunc_page(maxoffset
);
3393 if ((maxoffset
>= pmap_max_offset(FALSE
, ARM_PMAP_MAX_OFFSET_MIN
))
3394 && (maxoffset
<= pmap_max_offset(FALSE
, ARM_PMAP_MAX_OFFSET_MAX
))) {
3395 arm_pmap_max_offset_default
= maxoffset
;
3398 #if defined(__arm64__)
3399 if (PE_parse_boot_argn("arm64_maxoffset", &maxoffset
, sizeof(maxoffset
))) {
3400 maxoffset
= trunc_page(maxoffset
);
3401 if ((maxoffset
>= pmap_max_offset(TRUE
, ARM_PMAP_MAX_OFFSET_MIN
))
3402 && (maxoffset
<= pmap_max_offset(TRUE
, ARM_PMAP_MAX_OFFSET_MAX
))) {
3403 arm64_pmap_max_offset_default
= maxoffset
;
3408 #if DEVELOPMENT || DEBUG
3409 PE_parse_boot_argn("panic_on_unsigned_execute", &panic_on_unsigned_execute
, sizeof(panic_on_unsigned_execute
));
3410 #endif /* DEVELOPMENT || DEBUG */
3412 pmap_nesting_size_min
= ARM_NESTING_SIZE_MIN
;
3413 pmap_nesting_size_max
= ARM_NESTING_SIZE_MAX
;
3415 simple_lock_init(&phys_backup_lock
, 0);
3419 PE_parse_boot_argn("pmap_stats_assert",
3421 sizeof(pmap_stats_assert
));
3422 PE_parse_boot_argn("vm_footprint_suspend_allowed",
3423 &vm_footprint_suspend_allowed
,
3424 sizeof(vm_footprint_suspend_allowed
));
3425 #endif /* MACH_ASSERT */
3428 /* Shadow the CPU copy windows, as they fall outside of the physical aperture */
3429 kasan_map_shadow(CPUWINDOWS_BASE
, CPUWINDOWS_TOP
- CPUWINDOWS_BASE
, true);
3436 vm_offset_t
*startp
,
3440 *startp
= virtual_space_start
;
3441 *endp
= virtual_space_end
;
3446 pmap_virtual_region(
3447 unsigned int region_select
,
3448 vm_map_offset_t
*startp
,
3452 boolean_t ret
= FALSE
;
3453 #if __ARM64_PMAP_SUBPAGE_L1__ && __ARM_16K_PG__
3454 if (region_select
== 0) {
3456 * In this config, the bootstrap mappings should occupy their own L2
3457 * TTs, as they should be immutable after boot. Having the associated
3458 * TTEs and PTEs in their own pages allows us to lock down those pages,
3459 * while allowing the rest of the kernel address range to be remapped.
3461 #if (__ARM_VMSA__ > 7)
3462 *startp
= LOW_GLOBAL_BASE_ADDRESS
& ~ARM_TT_L2_OFFMASK
;
3464 #error Unsupported configuration
3466 *size
= ((VM_MAX_KERNEL_ADDRESS
- *startp
) & ~PAGE_MASK
);
3470 #if (__ARM_VMSA__ > 7)
3471 unsigned long low_global_vr_mask
= 0;
3472 vm_map_size_t low_global_vr_size
= 0;
3475 if (region_select
== 0) {
3476 #if (__ARM_VMSA__ == 7)
3477 *startp
= gVirtBase
& 0xFFC00000;
3478 *size
= ((virtual_space_start
- (gVirtBase
& 0xFFC00000)) + ~0xFFC00000) & 0xFFC00000;
3480 /* Round to avoid overlapping with the V=P area; round to at least the L2 block size. */
3481 if (!TEST_PAGE_SIZE_4K
) {
3482 *startp
= gVirtBase
& 0xFFFFFFFFFE000000;
3483 *size
= ((virtual_space_start
- (gVirtBase
& 0xFFFFFFFFFE000000)) + ~0xFFFFFFFFFE000000) & 0xFFFFFFFFFE000000;
3485 *startp
= gVirtBase
& 0xFFFFFFFFFF800000;
3486 *size
= ((virtual_space_start
- (gVirtBase
& 0xFFFFFFFFFF800000)) + ~0xFFFFFFFFFF800000) & 0xFFFFFFFFFF800000;
3491 if (region_select
== 1) {
3492 *startp
= VREGION1_START
;
3493 *size
= VREGION1_SIZE
;
3496 #if (__ARM_VMSA__ > 7)
3497 /* We need to reserve a range that is at least the size of an L2 block mapping for the low globals */
3498 if (!TEST_PAGE_SIZE_4K
) {
3499 low_global_vr_mask
= 0xFFFFFFFFFE000000;
3500 low_global_vr_size
= 0x2000000;
3502 low_global_vr_mask
= 0xFFFFFFFFFF800000;
3503 low_global_vr_size
= 0x800000;
3506 if (((gVirtBase
& low_global_vr_mask
) != LOW_GLOBAL_BASE_ADDRESS
) && (region_select
== 2)) {
3507 *startp
= LOW_GLOBAL_BASE_ADDRESS
;
3508 *size
= low_global_vr_size
;
3512 if (region_select
== 3) {
3513 /* In this config, we allow the bootstrap mappings to occupy the same
3514 * page table pages as the heap.
3516 *startp
= VM_MIN_KERNEL_ADDRESS
;
3517 *size
= LOW_GLOBAL_BASE_ADDRESS
- *startp
;
3529 return (unsigned int)atop(avail_end
- first_avail
);
3536 __unused boolean_t might_free
)
3538 return pmap_next_page(pnum
);
3546 if (first_avail
!= avail_end
) {
3547 *pnum
= (ppnum_t
)atop(first_avail
);
3548 first_avail
+= PAGE_SIZE
;
3556 * Initialize the pmap module.
3557 * Called by vm_init, to initialize any structures that the pmap
3558 * system needs to map virtual memory.
3565 * Protect page zero in the kernel map.
3566 * (can be overruled by permanent transltion
3567 * table entries at page zero - see arm_vm_init).
3569 vm_protect(kernel_map
, 0, PAGE_SIZE
, TRUE
, VM_PROT_NONE
);
3571 pmap_initialized
= TRUE
;
3577 * Initialize the pmap object (for tracking the vm_page_t
3578 * structures for pages we allocate to be page tables in
3581 _vm_object_allocate(mem_size
, pmap_object
);
3582 pmap_object
->copy_strategy
= MEMORY_OBJECT_COPY_NONE
;
3587 * The values of [hard_]maxproc may have been scaled, make sure
3588 * they are still less than the value of MAX_ASID.
3590 if (maxproc
> MAX_ASID
) {
3593 if (hard_maxproc
> MAX_ASID
) {
3594 hard_maxproc
= MAX_ASID
;
3598 pmap_pgtrace_init();
3608 pmap_paddr_t phys
= ptoa(ppnum
);
3610 assert(phys
!= vm_page_fictitious_addr
);
3612 if (!pa_valid(phys
)) {
3616 pai
= (int)pa_index(phys
);
3617 pv_h
= pai_to_pvh(pai
);
3619 return pvh_test_type(pv_h
, PVH_TYPE_NULL
);
3624 pmap_assert_free(ppnum_t ppnum
)
3626 assertf(pmap_verify_free(ppnum
), "page = 0x%x", ppnum
);
3633 * Initialize zones used by pmap.
3640 * Create the zone of physical maps
3641 * and the physical-to-virtual entries.
3643 pmap_zone
= zinit((vm_size_t
) sizeof(struct pmap
), (vm_size_t
) sizeof(struct pmap
) * 256,
3649 pmap_ledger_alloc_init(size_t size
)
3651 panic("%s: unsupported, "
3658 pmap_ledger_alloc(void)
3660 panic("%s: unsupported",
3666 pmap_ledger_free(ledger_t ledger
)
3668 panic("%s: unsupported, "
3674 * Create and return a physical map.
3676 * If the size specified for the map
3677 * is zero, the map is an actual physical
3678 * map, and may be referenced by the
3681 * If the size specified is non-zero,
3682 * the map will be used in software only, and
3683 * is bounded by that size.
3685 MARK_AS_PMAP_TEXT
static pmap_t
3686 pmap_create_options_internal(
3692 unsigned tte_index_max
;
3694 bool is_64bit
= flags
& PMAP_CREATE_64BIT
;
3695 #if defined(HAS_APPLE_PAC)
3696 bool disable_jop
= flags
& PMAP_CREATE_DISABLE_JOP
;
3697 #endif /* defined(HAS_APPLE_PAC) */
3700 * A software use-only map doesn't even need a pmap.
3707 * Allocate a pmap struct from the pmap_zone. Then allocate
3708 * the translation table of the right size for the pmap.
3710 if ((p
= (pmap_t
) zalloc(pmap_zone
)) == PMAP_NULL
) {
3714 if (flags
& PMAP_CREATE_64BIT
) {
3715 p
->min
= MACH_VM_MIN_ADDRESS
;
3716 p
->max
= MACH_VM_MAX_ADDRESS
;
3718 p
->min
= VM_MIN_ADDRESS
;
3719 p
->max
= VM_MAX_ADDRESS
;
3722 #if defined(HAS_APPLE_PAC)
3723 p
->disable_jop
= disable_jop
;
3724 #endif /* defined(HAS_APPLE_PAC) */
3726 p
->nested_region_true_start
= 0;
3727 p
->nested_region_true_end
= ~0;
3729 os_atomic_init(&p
->ref_count
, 1);
3731 p
->stamp
= os_atomic_inc(&pmap_stamp
, relaxed
);
3732 p
->nx_enabled
= TRUE
;
3733 p
->is_64bit
= is_64bit
;
3735 p
->nested_pmap
= PMAP_NULL
;
3737 #if ARM_PARAMETERIZED_PMAP
3738 p
->pmap_pt_attr
= native_pt_attr
;
3739 #endif /* ARM_PARAMETERIZED_PMAP */
3741 if (!pmap_get_pt_ops(p
)->alloc_id(p
)) {
3750 memset((void *) &p
->stats
, 0, sizeof(p
->stats
));
3752 p
->tt_entry_free
= (tt_entry_t
*)0;
3753 tte_index_max
= PMAP_ROOT_ALLOC_SIZE
/ sizeof(tt_entry_t
);
3755 #if (__ARM_VMSA__ == 7)
3756 p
->tte_index_max
= tte_index_max
;
3759 p
->tte
= pmap_tt1_allocate(p
, PMAP_ROOT_ALLOC_SIZE
, 0);
3761 goto tt1_alloc_fail
;
3764 p
->ttep
= ml_static_vtop((vm_offset_t
)p
->tte
);
3765 PMAP_TRACE(3, PMAP_CODE(PMAP__TTE
), VM_KERNEL_ADDRHIDE(p
), VM_KERNEL_ADDRHIDE(p
->min
), VM_KERNEL_ADDRHIDE(p
->max
), p
->ttep
);
3767 /* nullify the translation table */
3768 for (i
= 0; i
< tte_index_max
; i
++) {
3769 p
->tte
[i
] = ARM_TTE_TYPE_FAULT
;
3772 FLUSH_PTE_RANGE(p
->tte
, p
->tte
+ tte_index_max
);
3775 * initialize the rest of the structure
3777 p
->nested_region_grand_addr
= 0x0ULL
;
3778 p
->nested_region_subord_addr
= 0x0ULL
;
3779 p
->nested_region_size
= 0x0ULL
;
3780 p
->nested_region_asid_bitmap
= NULL
;
3781 p
->nested_region_asid_bitmap_size
= 0x0UL
;
3783 p
->nested_has_no_bounds_ref
= false;
3784 p
->nested_no_bounds_refcnt
= 0;
3785 p
->nested_bounds_set
= false;
3789 p
->pmap_stats_assert
= TRUE
;
3791 strlcpy(p
->pmap_procname
, "<nil>", sizeof(p
->pmap_procname
));
3792 #endif /* MACH_ASSERT */
3793 #if DEVELOPMENT || DEBUG
3794 p
->footprint_was_suspended
= FALSE
;
3795 #endif /* DEVELOPMENT || DEBUG */
3797 pmap_simple_lock(&pmaps_lock
);
3798 queue_enter(&map_pmap_list
, p
, pmap_t
, pmaps
);
3799 pmap_simple_unlock(&pmaps_lock
);
3804 pmap_get_pt_ops(p
)->free_id(p
);
3806 zfree(pmap_zone
, p
);
3811 pmap_create_options(
3818 PMAP_TRACE(1, PMAP_CODE(PMAP__CREATE
) | DBG_FUNC_START
, size
, flags
);
3820 ledger_reference(ledger
);
3822 pmap
= pmap_create_options_internal(ledger
, size
, flags
);
3824 if (pmap
== PMAP_NULL
) {
3825 ledger_dereference(ledger
);
3828 PMAP_TRACE(1, PMAP_CODE(PMAP__CREATE
) | DBG_FUNC_END
, VM_KERNEL_ADDRHIDE(pmap
), PMAP_VASID(pmap
), pmap
->hw_asid
);
3834 MARK_AS_PMAP_TEXT
static void
3835 pmap_set_process_internal(
3836 __unused pmap_t pmap
,
3838 __unused
char *procname
)
3845 VALIDATE_PMAP(pmap
);
3847 pmap
->pmap_pid
= pid
;
3848 strlcpy(pmap
->pmap_procname
, procname
, sizeof(pmap
->pmap_procname
));
3849 if (pmap_ledgers_panic_leeway
) {
3852 * Some processes somehow trigger some issues that make
3853 * the pmap stats and ledgers go off track, causing
3854 * some assertion failures and ledger panics.
3855 * Turn off the sanity checks if we allow some ledger leeway
3856 * because of that. We'll still do a final check in
3857 * pmap_check_ledgers() for discrepancies larger than the
3858 * allowed leeway after the address space has been fully
3861 pmap
->pmap_stats_assert
= FALSE
;
3862 ledger_disable_panic_on_negative(pmap
->ledger
,
3863 task_ledgers
.phys_footprint
);
3864 ledger_disable_panic_on_negative(pmap
->ledger
,
3865 task_ledgers
.internal
);
3866 ledger_disable_panic_on_negative(pmap
->ledger
,
3867 task_ledgers
.internal_compressed
);
3868 ledger_disable_panic_on_negative(pmap
->ledger
,
3869 task_ledgers
.iokit_mapped
);
3870 ledger_disable_panic_on_negative(pmap
->ledger
,
3871 task_ledgers
.alternate_accounting
);
3872 ledger_disable_panic_on_negative(pmap
->ledger
,
3873 task_ledgers
.alternate_accounting_compressed
);
3875 #endif /* MACH_ASSERT */
3877 #endif /* MACH_ASSERT*/
3886 pmap_set_process_internal(pmap
, pid
, procname
);
3888 #endif /* MACH_ASSERT */
3891 * We maintain stats and ledgers so that a task's physical footprint is:
3892 * phys_footprint = ((internal - alternate_accounting)
3893 * + (internal_compressed - alternate_accounting_compressed)
3895 * + purgeable_nonvolatile
3896 * + purgeable_nonvolatile_compressed
3898 * where "alternate_accounting" includes "iokit" and "purgeable" memory.
3903 * Retire the given physical map from service.
3904 * Should only be called if the map contains
3905 * no valid mappings.
3907 MARK_AS_PMAP_TEXT
static void
3908 pmap_destroy_internal(
3911 if (pmap
== PMAP_NULL
) {
3915 VALIDATE_PMAP(pmap
);
3917 __unused
const pt_attr_t
* const pt_attr
= pmap_get_pt_attr(pmap
);
3919 int32_t ref_count
= os_atomic_dec(&pmap
->ref_count
, relaxed
);
3920 if (ref_count
> 0) {
3922 } else if (ref_count
< 0) {
3923 panic("pmap %p: refcount underflow", pmap
);
3924 } else if (pmap
== kernel_pmap
) {
3925 panic("pmap %p: attempt to destroy kernel pmap", pmap
);
3930 #if (__ARM_VMSA__ > 7)
3931 pmap_unmap_sharedpage(pmap
);
3932 #endif /* (__ARM_VMSA__ > 7) */
3934 pmap_simple_lock(&pmaps_lock
);
3935 while (pmap
->gc_status
& PMAP_GC_INFLIGHT
) {
3936 pmap
->gc_status
|= PMAP_GC_WAIT
;
3937 assert_wait((event_t
) &pmap
->gc_status
, THREAD_UNINT
);
3938 pmap_simple_unlock(&pmaps_lock
);
3939 (void) thread_block(THREAD_CONTINUE_NULL
);
3940 pmap_simple_lock(&pmaps_lock
);
3942 queue_remove(&map_pmap_list
, pmap
, pmap_t
, pmaps
);
3943 pmap_simple_unlock(&pmaps_lock
);
3945 pmap_trim_self(pmap
);
3948 * Free the memory maps, then the
3951 #if (__ARM_VMSA__ == 7)
3955 for (i
= 0; i
< pmap
->tte_index_max
; i
++) {
3956 ttep
= &pmap
->tte
[i
];
3957 if ((*ttep
& ARM_TTE_TYPE_MASK
) == ARM_TTE_TYPE_TABLE
) {
3958 pmap_tte_deallocate(pmap
, ttep
, PMAP_TT_L1_LEVEL
);
3962 #else /* (__ARM_VMSA__ == 7) */
3966 for (level
= pt_attr
->pta_max_level
- 1; level
>= pt_attr
->pta_root_level
; level
--) {
3967 for (c
= pmap
->min
; c
< pmap
->max
; c
+= pt_attr_ln_size(pt_attr
, level
)) {
3968 ttep
= pmap_ttne(pmap
, level
, c
);
3970 if ((ttep
!= PT_ENTRY_NULL
) && (*ttep
& ARM_TTE_TYPE_MASK
) == ARM_TTE_TYPE_TABLE
) {
3972 pmap_tte_deallocate(pmap
, ttep
, level
);
3977 #endif /* (__ARM_VMSA__ == 7) */
3982 #if (__ARM_VMSA__ == 7)
3983 pmap_tt1_deallocate(pmap
, pmap
->tte
, pmap
->tte_index_max
* sizeof(tt_entry_t
), 0);
3984 pmap
->tte_index_max
= 0;
3985 #else /* (__ARM_VMSA__ == 7) */
3986 pmap_tt1_deallocate(pmap
, pmap
->tte
, PMAP_ROOT_ALLOC_SIZE
, 0);
3987 #endif /* (__ARM_VMSA__ == 7) */
3988 pmap
->tte
= (tt_entry_t
*) NULL
;
3992 assert((tt_free_entry_t
*)pmap
->tt_entry_free
== NULL
);
3994 pmap_get_pt_ops(pmap
)->flush_tlb_async(pmap
);
3997 /* return its asid to the pool */
3998 pmap_get_pt_ops(pmap
)->free_id(pmap
);
3999 pmap_check_ledgers(pmap
);
4001 if (pmap
->nested_region_asid_bitmap
) {
4002 kfree(pmap
->nested_region_asid_bitmap
, pmap
->nested_region_asid_bitmap_size
* sizeof(unsigned int));
4005 zfree(pmap_zone
, pmap
);
4014 PMAP_TRACE(1, PMAP_CODE(PMAP__DESTROY
) | DBG_FUNC_START
, VM_KERNEL_ADDRHIDE(pmap
), PMAP_VASID(pmap
), pmap
->hw_asid
);
4016 ledger
= pmap
->ledger
;
4018 pmap_destroy_internal(pmap
);
4020 ledger_dereference(ledger
);
4022 PMAP_TRACE(1, PMAP_CODE(PMAP__DESTROY
) | DBG_FUNC_END
);
4027 * Add a reference to the specified pmap.
4029 MARK_AS_PMAP_TEXT
static void
4030 pmap_reference_internal(
4033 if (pmap
!= PMAP_NULL
) {
4034 VALIDATE_PMAP(pmap
);
4035 os_atomic_inc(&pmap
->ref_count
, relaxed
);
4043 pmap_reference_internal(pmap
);
4052 tt_entry_t
*tt1
= NULL
;
4053 tt_free_entry_t
*tt1_free
;
4056 vm_address_t va_end
;
4059 pmap_simple_lock(&tt1_lock
);
4060 if ((size
== PAGE_SIZE
) && (free_page_size_tt_count
!= 0)) {
4061 free_page_size_tt_count
--;
4062 tt1
= (tt_entry_t
*)free_page_size_tt_list
;
4063 free_page_size_tt_list
= ((tt_free_entry_t
*)tt1
)->next
;
4064 } else if ((size
== 2 * PAGE_SIZE
) && (free_two_page_size_tt_count
!= 0)) {
4065 free_two_page_size_tt_count
--;
4066 tt1
= (tt_entry_t
*)free_two_page_size_tt_list
;
4067 free_two_page_size_tt_list
= ((tt_free_entry_t
*)tt1
)->next
;
4068 } else if ((size
< PAGE_SIZE
) && (free_tt_count
!= 0)) {
4070 tt1
= (tt_entry_t
*)free_tt_list
;
4071 free_tt_list
= (tt_free_entry_t
*)((tt_free_entry_t
*)tt1
)->next
;
4074 pmap_simple_unlock(&tt1_lock
);
4077 pmap_tt_ledger_credit(pmap
, size
);
4078 return (tt_entry_t
*)tt1
;
4081 ret
= pmap_pages_alloc(&pa
, (unsigned)((size
< PAGE_SIZE
)? PAGE_SIZE
: size
), ((option
& PMAP_TT_ALLOCATE_NOWAIT
)? PMAP_PAGES_ALLOCATE_NOWAIT
: 0));
4083 if (ret
== KERN_RESOURCE_SHORTAGE
) {
4084 return (tt_entry_t
*)0;
4088 if (size
< PAGE_SIZE
) {
4089 va
= phystokv(pa
) + size
;
4090 tt_free_entry_t
*local_free_list
= (tt_free_entry_t
*)va
;
4091 tt_free_entry_t
*next_free
= NULL
;
4092 for (va_end
= phystokv(pa
) + PAGE_SIZE
; va
< va_end
; va
= va
+ size
) {
4093 tt1_free
= (tt_free_entry_t
*)va
;
4094 tt1_free
->next
= next_free
;
4095 next_free
= tt1_free
;
4097 pmap_simple_lock(&tt1_lock
);
4098 local_free_list
->next
= free_tt_list
;
4099 free_tt_list
= next_free
;
4100 free_tt_count
+= ((PAGE_SIZE
/ size
) - 1);
4101 if (free_tt_count
> free_tt_max
) {
4102 free_tt_max
= free_tt_count
;
4104 pmap_simple_unlock(&tt1_lock
);
4107 /* Always report root allocations in units of PMAP_ROOT_ALLOC_SIZE, which can be obtained by sysctl arm_pt_root_size.
4108 * Depending on the device, this can vary between 512b and 16K. */
4109 OSAddAtomic((uint32_t)(size
/ PMAP_ROOT_ALLOC_SIZE
), (pmap
== kernel_pmap
? &inuse_kernel_tteroot_count
: &inuse_user_tteroot_count
));
4110 OSAddAtomic64(size
/ PMAP_ROOT_ALLOC_SIZE
, &alloc_tteroot_count
);
4111 pmap_tt_ledger_credit(pmap
, size
);
4113 return (tt_entry_t
*) phystokv(pa
);
4117 pmap_tt1_deallocate(
4123 tt_free_entry_t
*tt_entry
;
4125 tt_entry
= (tt_free_entry_t
*)tt
;
4127 pmap_simple_lock(&tt1_lock
);
4129 if (size
< PAGE_SIZE
) {
4131 if (free_tt_count
> free_tt_max
) {
4132 free_tt_max
= free_tt_count
;
4134 tt_entry
->next
= free_tt_list
;
4135 free_tt_list
= tt_entry
;
4138 if (size
== PAGE_SIZE
) {
4139 free_page_size_tt_count
++;
4140 if (free_page_size_tt_count
> free_page_size_tt_max
) {
4141 free_page_size_tt_max
= free_page_size_tt_count
;
4143 tt_entry
->next
= free_page_size_tt_list
;
4144 free_page_size_tt_list
= tt_entry
;
4147 if (size
== 2 * PAGE_SIZE
) {
4148 free_two_page_size_tt_count
++;
4149 if (free_two_page_size_tt_count
> free_two_page_size_tt_max
) {
4150 free_two_page_size_tt_max
= free_two_page_size_tt_count
;
4152 tt_entry
->next
= free_two_page_size_tt_list
;
4153 free_two_page_size_tt_list
= tt_entry
;
4156 if (option
& PMAP_TT_DEALLOCATE_NOBLOCK
) {
4157 pmap_simple_unlock(&tt1_lock
);
4158 pmap_tt_ledger_debit(pmap
, size
);
4162 while (free_page_size_tt_count
> FREE_PAGE_SIZE_TT_MAX
) {
4163 free_page_size_tt_count
--;
4164 tt
= (tt_entry_t
*)free_page_size_tt_list
;
4165 free_page_size_tt_list
= ((tt_free_entry_t
*)tt
)->next
;
4167 pmap_simple_unlock(&tt1_lock
);
4169 pmap_pages_free(ml_static_vtop((vm_offset_t
)tt
), PAGE_SIZE
);
4171 OSAddAtomic(-(int32_t)(PAGE_SIZE
/ PMAP_ROOT_ALLOC_SIZE
), (pmap
== kernel_pmap
? &inuse_kernel_tteroot_count
: &inuse_user_tteroot_count
));
4173 pmap_simple_lock(&tt1_lock
);
4176 while (free_two_page_size_tt_count
> FREE_TWO_PAGE_SIZE_TT_MAX
) {
4177 free_two_page_size_tt_count
--;
4178 tt
= (tt_entry_t
*)free_two_page_size_tt_list
;
4179 free_two_page_size_tt_list
= ((tt_free_entry_t
*)tt
)->next
;
4181 pmap_simple_unlock(&tt1_lock
);
4183 pmap_pages_free(ml_static_vtop((vm_offset_t
)tt
), 2 * PAGE_SIZE
);
4185 OSAddAtomic(-2 * (int32_t)(PAGE_SIZE
/ PMAP_ROOT_ALLOC_SIZE
), (pmap
== kernel_pmap
? &inuse_kernel_tteroot_count
: &inuse_user_tteroot_count
));
4187 pmap_simple_lock(&tt1_lock
);
4189 pmap_simple_unlock(&tt1_lock
);
4190 pmap_tt_ledger_debit(pmap
, size
);
4193 static kern_return_t
4198 unsigned int options
)
4204 if ((tt_free_entry_t
*)pmap
->tt_entry_free
!= NULL
) {
4205 tt_free_entry_t
*tt_free_next
;
4207 tt_free_next
= ((tt_free_entry_t
*)pmap
->tt_entry_free
)->next
;
4208 *ttp
= (tt_entry_t
*)pmap
->tt_entry_free
;
4209 pmap
->tt_entry_free
= (tt_entry_t
*)tt_free_next
;
4217 * Allocate a VM page for the level x page table entries.
4219 while (pmap_pages_alloc(&pa
, PAGE_SIZE
, ((options
& PMAP_TT_ALLOCATE_NOWAIT
)? PMAP_PAGES_ALLOCATE_NOWAIT
: 0)) != KERN_SUCCESS
) {
4220 if (options
& PMAP_OPTIONS_NOWAIT
) {
4221 return KERN_RESOURCE_SHORTAGE
;
4226 while ((ptdp
= ptd_alloc(pmap
, false)) == NULL
) {
4227 if (options
& PMAP_OPTIONS_NOWAIT
) {
4228 pmap_pages_free(pa
, PAGE_SIZE
);
4229 return KERN_RESOURCE_SHORTAGE
;
4234 if (level
< PMAP_TT_MAX_LEVEL
) {
4235 OSAddAtomic64(1, &alloc_ttepages_count
);
4236 OSAddAtomic(1, (pmap
== kernel_pmap
? &inuse_kernel_ttepages_count
: &inuse_user_ttepages_count
));
4238 OSAddAtomic64(1, &alloc_ptepages_count
);
4239 OSAddAtomic(1, (pmap
== kernel_pmap
? &inuse_kernel_ptepages_count
: &inuse_user_ptepages_count
));
4242 pmap_tt_ledger_credit(pmap
, PAGE_SIZE
);
4244 PMAP_ZINFO_PALLOC(pmap
, PAGE_SIZE
);
4246 pvh_update_head_unlocked(pai_to_pvh(pa_index(pa
)), ptdp
, PVH_TYPE_PTDP
);
4248 __unreachable_ok_push
4249 if (TEST_PAGE_RATIO_4
) {
4251 vm_address_t va_end
;
4255 for (va_end
= phystokv(pa
) + PAGE_SIZE
, va
= phystokv(pa
) + ARM_PGBYTES
; va
< va_end
; va
= va
+ ARM_PGBYTES
) {
4256 ((tt_free_entry_t
*)va
)->next
= (tt_free_entry_t
*)pmap
->tt_entry_free
;
4257 pmap
->tt_entry_free
= (tt_entry_t
*)va
;
4261 __unreachable_ok_pop
4263 *ttp
= (tt_entry_t
*)phystokv(pa
);
4267 return KERN_SUCCESS
;
4278 unsigned pt_acc_cnt
;
4279 unsigned i
, max_pt_index
= PAGE_RATIO
;
4280 vm_offset_t free_page
= 0;
4284 ptdp
= ptep_get_ptd((vm_offset_t
)ttp
);
4286 ptdp
->ptd_info
[ARM_PT_DESC_INDEX(ttp
)].va
= (vm_offset_t
)-1;
4288 if ((level
< PMAP_TT_MAX_LEVEL
) && (ptdp
->ptd_info
[ARM_PT_DESC_INDEX(ttp
)].refcnt
== PT_DESC_REFCOUNT
)) {
4289 ptdp
->ptd_info
[ARM_PT_DESC_INDEX(ttp
)].refcnt
= 0;
4292 if (ptdp
->ptd_info
[ARM_PT_DESC_INDEX(ttp
)].refcnt
!= 0) {
4293 panic("pmap_tt_deallocate(): ptdp %p, count %d\n", ptdp
, ptdp
->ptd_info
[ARM_PT_DESC_INDEX(ttp
)].refcnt
);
4296 ptdp
->ptd_info
[ARM_PT_DESC_INDEX(ttp
)].refcnt
= 0;
4298 for (i
= 0, pt_acc_cnt
= 0; i
< max_pt_index
; i
++) {
4299 pt_acc_cnt
+= ptdp
->ptd_info
[i
].refcnt
;
4302 if (pt_acc_cnt
== 0) {
4303 tt_free_entry_t
*tt_free_list
= (tt_free_entry_t
*)&pmap
->tt_entry_free
;
4304 unsigned pt_free_entry_cnt
= 1;
4306 while (pt_free_entry_cnt
< max_pt_index
&& tt_free_list
) {
4307 tt_free_entry_t
*tt_free_list_next
;
4309 tt_free_list_next
= tt_free_list
->next
;
4310 if ((((vm_offset_t
)tt_free_list_next
) - ((vm_offset_t
)ttp
& ~PAGE_MASK
)) < PAGE_SIZE
) {
4311 pt_free_entry_cnt
++;
4313 tt_free_list
= tt_free_list_next
;
4315 if (pt_free_entry_cnt
== max_pt_index
) {
4316 tt_free_entry_t
*tt_free_list_cur
;
4318 free_page
= (vm_offset_t
)ttp
& ~PAGE_MASK
;
4319 tt_free_list
= (tt_free_entry_t
*)&pmap
->tt_entry_free
;
4320 tt_free_list_cur
= (tt_free_entry_t
*)&pmap
->tt_entry_free
;
4322 while (tt_free_list_cur
) {
4323 tt_free_entry_t
*tt_free_list_next
;
4325 tt_free_list_next
= tt_free_list_cur
->next
;
4326 if ((((vm_offset_t
)tt_free_list_next
) - free_page
) < PAGE_SIZE
) {
4327 tt_free_list
->next
= tt_free_list_next
->next
;
4329 tt_free_list
= tt_free_list_next
;
4331 tt_free_list_cur
= tt_free_list_next
;
4334 ((tt_free_entry_t
*)ttp
)->next
= (tt_free_entry_t
*)pmap
->tt_entry_free
;
4335 pmap
->tt_entry_free
= ttp
;
4338 ((tt_free_entry_t
*)ttp
)->next
= (tt_free_entry_t
*)pmap
->tt_entry_free
;
4339 pmap
->tt_entry_free
= ttp
;
4344 if (free_page
!= 0) {
4345 ptd_deallocate(ptep_get_ptd((vm_offset_t
)free_page
));
4346 *(pt_desc_t
**)pai_to_pvh(pa_index(ml_static_vtop(free_page
))) = NULL
;
4347 pmap_pages_free(ml_static_vtop(free_page
), PAGE_SIZE
);
4348 if (level
< PMAP_TT_MAX_LEVEL
) {
4349 OSAddAtomic(-1, (pmap
== kernel_pmap
? &inuse_kernel_ttepages_count
: &inuse_user_ttepages_count
));
4351 OSAddAtomic(-1, (pmap
== kernel_pmap
? &inuse_kernel_ptepages_count
: &inuse_user_ptepages_count
));
4353 PMAP_ZINFO_PFREE(pmap
, PAGE_SIZE
);
4354 pmap_tt_ledger_debit(pmap
, PAGE_SIZE
);
4364 tt_entry_t tte
= *ttep
;
4367 panic("pmap_tte_deallocate(): null tt_entry ttep==%p\n", ttep
);
4370 if (((level
+ 1) == PMAP_TT_MAX_LEVEL
) && (tte_get_ptd(tte
)->ptd_info
[ARM_PT_DESC_INDEX(ttetokv(*ttep
))].refcnt
!= 0)) {
4371 panic("pmap_tte_deallocate(): pmap=%p ttep=%p ptd=%p refcnt=0x%x \n", pmap
, ttep
,
4372 tte_get_ptd(tte
), (tte_get_ptd(tte
)->ptd_info
[ARM_PT_DESC_INDEX(ttetokv(*ttep
))].refcnt
));
4375 #if (__ARM_VMSA__ == 7)
4377 tt_entry_t
*ttep_4M
= (tt_entry_t
*) ((vm_offset_t
)ttep
& 0xFFFFFFF0);
4380 for (i
= 0; i
< 4; i
++, ttep_4M
++) {
4381 *ttep_4M
= (tt_entry_t
) 0;
4383 FLUSH_PTE_RANGE_STRONG(ttep_4M
- 4, ttep_4M
);
4386 *ttep
= (tt_entry_t
) 0;
4387 FLUSH_PTE_STRONG(ttep
);
4392 pmap_tte_deallocate(
4400 PMAP_ASSERT_LOCKED(pmap
);
4405 if (tte_get_ptd(tte
)->pmap
!= pmap
) {
4406 panic("pmap_tte_deallocate(): ptd=%p ptd->pmap=%p pmap=%p \n",
4407 tte_get_ptd(tte
), tte_get_ptd(tte
)->pmap
, pmap
);
4411 pmap_tte_remove(pmap
, ttep
, level
);
4413 if ((tte
& ARM_TTE_TYPE_MASK
) == ARM_TTE_TYPE_TABLE
) {
4416 pt_entry_t
*pte_p
= ((pt_entry_t
*) (ttetokv(tte
) & ~ARM_PGMASK
));
4419 for (i
= 0; i
< (ARM_PGBYTES
/ sizeof(*pte_p
)); i
++, pte_p
++) {
4420 if (ARM_PTE_IS_COMPRESSED(*pte_p
, pte_p
)) {
4421 panic("pmap_tte_deallocate: tte=0x%llx pmap=%p, pte_p=%p, pte=0x%llx compressed\n",
4422 (uint64_t)tte
, pmap
, pte_p
, (uint64_t)(*pte_p
));
4423 } else if (((*pte_p
) & ARM_PTE_TYPE_MASK
) != ARM_PTE_TYPE_FAULT
) {
4424 panic("pmap_tte_deallocate: tte=0x%llx pmap=%p, pte_p=%p, pte=0x%llx\n",
4425 (uint64_t)tte
, pmap
, pte_p
, (uint64_t)(*pte_p
));
4432 /* Clear any page offset: we mean to free the whole page, but armv7 TTEs may only be
4433 * aligned on 1K boundaries. We clear the surrounding "chunk" of 4 TTEs above. */
4434 pa
= tte_to_pa(tte
) & ~ARM_PGMASK
;
4435 pmap_tt_deallocate(pmap
, (tt_entry_t
*) phystokv(pa
), level
+ 1);
4441 * Remove a range of hardware page-table entries.
4442 * The entries given are the first (inclusive)
4443 * and last (exclusive) entries for the VM pages.
4444 * The virtual address is the va for the first pte.
4446 * The pmap must be locked.
4447 * If the pmap is not the kernel pmap, the range must lie
4448 * entirely within one pte-page. This is NOT checked.
4449 * Assumes that the pte-page exists.
4451 * Returns the number of PTE changed, and sets *rmv_cnt
4452 * to the number of SPTE changed.
4457 vm_map_address_t va
,
4462 bool need_strong_sync
= false;
4463 int num_changed
= pmap_remove_range_options(pmap
, va
, bpte
, epte
, rmv_cnt
,
4464 &need_strong_sync
, PMAP_OPTIONS_REMOVE
);
4465 if (num_changed
> 0) {
4466 PMAP_UPDATE_TLBS(pmap
, va
, va
+ (PAGE_SIZE
* (epte
- bpte
)), need_strong_sync
);
4472 #ifdef PVH_FLAG_EXEC
4475 * Update the access protection bits of the physical aperture mapping for a page.
4476 * This is useful, for example, in guranteeing that a verified executable page
4477 * has no writable mappings anywhere in the system, including the physical
4478 * aperture. flush_tlb_async can be set to true to avoid unnecessary TLB
4479 * synchronization overhead in cases where the call to this function is
4480 * guaranteed to be followed by other TLB operations.
4483 pmap_set_ptov_ap(unsigned int pai __unused
, unsigned int ap __unused
, boolean_t flush_tlb_async __unused
)
4485 #if __ARM_PTE_PHYSMAP__
4486 ASSERT_PVH_LOCKED(pai
);
4487 vm_offset_t kva
= phystokv(vm_first_phys
+ (pmap_paddr_t
)ptoa(pai
));
4488 pt_entry_t
*pte_p
= pmap_pte(kernel_pmap
, kva
);
4490 pt_entry_t tmplate
= *pte_p
;
4491 if ((tmplate
& ARM_PTE_APMASK
) == ARM_PTE_AP(ap
)) {
4494 tmplate
= (tmplate
& ~ARM_PTE_APMASK
) | ARM_PTE_AP(ap
);
4495 #if (__ARM_VMSA__ > 7)
4496 if (tmplate
& ARM_PTE_HINT_MASK
) {
4497 panic("%s: physical aperture PTE %p has hint bit set, va=%p, pte=0x%llx",
4498 __func__
, pte_p
, (void *)kva
, tmplate
);
4501 WRITE_PTE_STRONG(pte_p
, tmplate
);
4502 flush_mmu_tlb_region_asid_async(kva
, PAGE_SIZE
, kernel_pmap
);
4503 if (!flush_tlb_async
) {
4509 #endif /* defined(PVH_FLAG_EXEC) */
4517 int *num_alt_internal
,
4521 pv_entry_t
**pv_h
, **pve_pp
;
4524 ASSERT_PVH_LOCKED(pai
);
4525 pv_h
= pai_to_pvh(pai
);
4526 vm_offset_t pvh_flags
= pvh_get_flags(pv_h
);
4529 if (pvh_test_type(pv_h
, PVH_TYPE_PTEP
)) {
4530 if (__builtin_expect((cpte
!= pvh_ptep(pv_h
)), 0)) {
4531 panic("%s: cpte=%p does not match pv_h=%p (%p), pai=0x%x\n", __func__
, cpte
, pv_h
, pvh_ptep(pv_h
), pai
);
4533 if (IS_ALTACCT_PAGE(pai
, PV_ENTRY_NULL
)) {
4534 assert(IS_INTERNAL_PAGE(pai
));
4536 (*num_alt_internal
)++;
4537 CLR_ALTACCT_PAGE(pai
, PV_ENTRY_NULL
);
4538 } else if (IS_INTERNAL_PAGE(pai
)) {
4539 if (IS_REUSABLE_PAGE(pai
)) {
4547 pvh_update_head(pv_h
, PV_ENTRY_NULL
, PVH_TYPE_NULL
);
4548 } else if (pvh_test_type(pv_h
, PVH_TYPE_PVEP
)) {
4550 pve_p
= pvh_list(pv_h
);
4552 while (pve_p
!= PV_ENTRY_NULL
&&
4553 (pve_get_ptep(pve_p
) != cpte
)) {
4554 pve_pp
= pve_link_field(pve_p
);
4555 pve_p
= PVE_NEXT_PTR(pve_next(pve_p
));
4558 if (__builtin_expect((pve_p
== PV_ENTRY_NULL
), 0)) {
4559 panic("%s: cpte=%p (pai=0x%x) not in pv_h=%p\n", __func__
, cpte
, pai
, pv_h
);
4563 if ((pmap
!= NULL
) && (kern_feature_override(KF_PMAPV_OVRD
) == FALSE
)) {
4564 pv_entry_t
*check_pve_p
= PVE_NEXT_PTR(pve_next(pve_p
));
4565 while (check_pve_p
!= PV_ENTRY_NULL
) {
4566 if (pve_get_ptep(check_pve_p
) == cpte
) {
4567 panic("%s: duplicate pve entry cpte=%p pmap=%p, pv_h=%p, pve_p=%p, pai=0x%x",
4568 __func__
, cpte
, pmap
, pv_h
, pve_p
, pai
);
4570 check_pve_p
= PVE_NEXT_PTR(pve_next(check_pve_p
));
4575 if (IS_ALTACCT_PAGE(pai
, pve_p
)) {
4576 assert(IS_INTERNAL_PAGE(pai
));
4578 (*num_alt_internal
)++;
4579 CLR_ALTACCT_PAGE(pai
, pve_p
);
4580 } else if (IS_INTERNAL_PAGE(pai
)) {
4581 if (IS_REUSABLE_PAGE(pai
)) {
4590 pvh_remove(pv_h
, pve_pp
, pve_p
);
4592 if (!pvh_test_type(pv_h
, PVH_TYPE_NULL
)) {
4593 pvh_set_flags(pv_h
, pvh_flags
);
4596 panic("%s: unexpected PV head %p, cpte=%p pmap=%p pv_h=%p pai=0x%x",
4597 __func__
, *pv_h
, cpte
, pmap
, pv_h
, pai
);
4600 #ifdef PVH_FLAG_EXEC
4601 if ((pvh_flags
& PVH_FLAG_EXEC
) && pvh_test_type(pv_h
, PVH_TYPE_NULL
)) {
4602 pmap_set_ptov_ap(pai
, AP_RWNA
, FALSE
);
4608 pmap_remove_range_options(
4610 vm_map_address_t va
,
4614 bool *need_strong_sync __unused
,
4618 int num_removed
, num_unwired
;
4619 int num_pte_changed
;
4622 int num_external
, num_internal
, num_reusable
;
4623 int num_alt_internal
;
4624 uint64_t num_compressed
, num_alt_compressed
;
4626 PMAP_ASSERT_LOCKED(pmap
);
4630 num_pte_changed
= 0;
4635 num_alt_internal
= 0;
4636 num_alt_compressed
= 0;
4638 for (cpte
= bpte
; cpte
< epte
;
4639 cpte
+= PAGE_SIZE
/ ARM_PGBYTES
, va
+= PAGE_SIZE
) {
4641 boolean_t managed
= FALSE
;
4646 if (pgtrace_enabled
) {
4647 pmap_pgtrace_remove_clone(pmap
, pte_to_pa(spte
), va
);
4652 if (pmap
!= kernel_pmap
&&
4653 (options
& PMAP_OPTIONS_REMOVE
) &&
4654 (ARM_PTE_IS_COMPRESSED(spte
, cpte
))) {
4656 * "pmap" must be locked at this point,
4657 * so this should not race with another
4658 * pmap_remove_range() or pmap_enter().
4661 /* one less "compressed"... */
4663 if (spte
& ARM_PTE_COMPRESSED_ALT
) {
4664 /* ... but it used to be "ALTACCT" */
4665 num_alt_compressed
++;
4669 WRITE_PTE_FAST(cpte
, ARM_PTE_TYPE_FAULT
);
4671 * "refcnt" also accounts for
4672 * our "compressed" markers,
4673 * so let's update it here.
4675 if (OSAddAtomic16(-1, (SInt16
*) &(ptep_get_ptd(cpte
)->ptd_info
[ARM_PT_DESC_INDEX(cpte
)].refcnt
)) <= 0) {
4676 panic("pmap_remove_range_options: over-release of ptdp %p for pte %p\n", ptep_get_ptd(cpte
), cpte
);
4681 * It may be possible for the pte to transition from managed
4682 * to unmanaged in this timeframe; for now, elide the assert.
4683 * We should break out as a consequence of checking pa_valid.
4685 //assert(!ARM_PTE_IS_COMPRESSED(spte));
4686 pa
= pte_to_pa(spte
);
4687 if (!pa_valid(pa
)) {
4690 pai
= (int)pa_index(pa
);
4693 pa
= pte_to_pa(spte
);
4694 if (pai
== (int)pa_index(pa
)) {
4696 break; // Leave pai locked as we will unlock it after we free the PV entry
4701 if (ARM_PTE_IS_COMPRESSED(*cpte
, cpte
)) {
4703 * There used to be a valid mapping here but it
4704 * has already been removed when the page was
4705 * sent to the VM compressor, so nothing left to
4711 /* remove the translation, do not flush the TLB */
4712 if (*cpte
!= ARM_PTE_TYPE_FAULT
) {
4713 assertf(!ARM_PTE_IS_COMPRESSED(*cpte
, cpte
), "unexpected compressed pte %p (=0x%llx)", cpte
, (uint64_t)*cpte
);
4714 assertf((*cpte
& ARM_PTE_TYPE_VALID
) == ARM_PTE_TYPE
, "invalid pte %p (=0x%llx)", cpte
, (uint64_t)*cpte
);
4716 if (managed
&& (pmap
!= kernel_pmap
) && (ptep_get_va(cpte
) != va
)) {
4717 panic("pmap_remove_range_options(): cpte=%p ptd=%p pte=0x%llx va=0x%llx\n",
4718 cpte
, ptep_get_ptd(cpte
), (uint64_t)*cpte
, (uint64_t)va
);
4721 WRITE_PTE_FAST(cpte
, ARM_PTE_TYPE_FAULT
);
4725 if ((spte
!= ARM_PTE_TYPE_FAULT
) &&
4726 (pmap
!= kernel_pmap
)) {
4727 assertf(!ARM_PTE_IS_COMPRESSED(spte
, cpte
), "unexpected compressed pte %p (=0x%llx)", cpte
, (uint64_t)spte
);
4728 assertf((spte
& ARM_PTE_TYPE_VALID
) == ARM_PTE_TYPE
, "invalid pte %p (=0x%llx)", cpte
, (uint64_t)spte
);
4729 if (OSAddAtomic16(-1, (SInt16
*) &(ptep_get_ptd(cpte
)->ptd_info
[ARM_PT_DESC_INDEX(cpte
)].refcnt
)) <= 0) {
4730 panic("pmap_remove_range_options: over-release of ptdp %p for pte %p\n", ptep_get_ptd(cpte
), cpte
);
4737 if (pte_is_wired(spte
)) {
4738 pte_set_wired(cpte
, 0);
4742 * if not managed, we're done
4748 * find and remove the mapping from the chain for this
4752 pmap_remove_pv(pmap
, cpte
, pai
, &num_internal
, &num_alt_internal
, &num_reusable
, &num_external
);
4761 OSAddAtomic(-num_removed
, (SInt32
*) &pmap
->stats
.resident_count
);
4762 pmap_ledger_debit(pmap
, task_ledgers
.phys_mem
, machine_ptob(num_removed
));
4764 if (pmap
!= kernel_pmap
) {
4765 /* sanity checks... */
4767 if (pmap
->stats
.internal
< num_internal
) {
4768 if ((!pmap_stats_assert
||
4769 !pmap
->pmap_stats_assert
)) {
4770 printf("%d[%s] pmap_remove_range_options(%p,0x%llx,%p,%p,0x%x): num_internal=%d num_removed=%d num_unwired=%d num_external=%d num_reusable=%d num_compressed=%lld num_alt_internal=%d num_alt_compressed=%lld num_pte_changed=%d stats.internal=%d stats.reusable=%d\n",
4772 pmap
->pmap_procname
,
4787 pmap
->stats
.internal
,
4788 pmap
->stats
.reusable
);
4790 panic("%d[%s] pmap_remove_range_options(%p,0x%llx,%p,%p,0x%x): num_internal=%d num_removed=%d num_unwired=%d num_external=%d num_reusable=%d num_compressed=%lld num_alt_internal=%d num_alt_compressed=%lld num_pte_changed=%d stats.internal=%d stats.reusable=%d",
4792 pmap
->pmap_procname
,
4807 pmap
->stats
.internal
,
4808 pmap
->stats
.reusable
);
4811 #endif /* MACH_ASSERT */
4812 PMAP_STATS_ASSERTF(pmap
->stats
.external
>= num_external
,
4814 "pmap=%p num_external=%d stats.external=%d",
4815 pmap
, num_external
, pmap
->stats
.external
);
4816 PMAP_STATS_ASSERTF(pmap
->stats
.internal
>= num_internal
,
4818 "pmap=%p num_internal=%d stats.internal=%d num_reusable=%d stats.reusable=%d",
4820 num_internal
, pmap
->stats
.internal
,
4821 num_reusable
, pmap
->stats
.reusable
);
4822 PMAP_STATS_ASSERTF(pmap
->stats
.reusable
>= num_reusable
,
4824 "pmap=%p num_internal=%d stats.internal=%d num_reusable=%d stats.reusable=%d",
4826 num_internal
, pmap
->stats
.internal
,
4827 num_reusable
, pmap
->stats
.reusable
);
4828 PMAP_STATS_ASSERTF(pmap
->stats
.compressed
>= num_compressed
,
4830 "pmap=%p num_compressed=%lld num_alt_compressed=%lld stats.compressed=%lld",
4831 pmap
, num_compressed
, num_alt_compressed
,
4832 pmap
->stats
.compressed
);
4834 /* update pmap stats... */
4835 OSAddAtomic(-num_unwired
, (SInt32
*) &pmap
->stats
.wired_count
);
4837 OSAddAtomic(-num_external
, &pmap
->stats
.external
);
4840 OSAddAtomic(-num_internal
, &pmap
->stats
.internal
);
4843 OSAddAtomic(-num_reusable
, &pmap
->stats
.reusable
);
4845 if (num_compressed
) {
4846 OSAddAtomic64(-num_compressed
, &pmap
->stats
.compressed
);
4848 /* ... and ledgers */
4849 pmap_ledger_debit(pmap
, task_ledgers
.wired_mem
, machine_ptob(num_unwired
));
4850 pmap_ledger_debit(pmap
, task_ledgers
.internal
, machine_ptob(num_internal
));
4851 pmap_ledger_debit(pmap
, task_ledgers
.alternate_accounting
, machine_ptob(num_alt_internal
));
4852 pmap_ledger_debit(pmap
, task_ledgers
.alternate_accounting_compressed
, machine_ptob(num_alt_compressed
));
4853 pmap_ledger_debit(pmap
, task_ledgers
.internal_compressed
, machine_ptob(num_compressed
));
4854 /* make needed adjustments to phys_footprint */
4855 pmap_ledger_debit(pmap
, task_ledgers
.phys_footprint
,
4856 machine_ptob((num_internal
-
4859 num_alt_compressed
)));
4862 /* flush the ptable entries we have written */
4863 if (num_pte_changed
> 0) {
4864 FLUSH_PTE_RANGE_STRONG(bpte
, epte
);
4867 return num_pte_changed
;
4872 * Remove the given range of addresses
4873 * from the specified map.
4875 * It is assumed that the start and end are properly
4876 * rounded to the hardware page size.
4881 vm_map_address_t start
,
4882 vm_map_address_t end
)
4884 pmap_remove_options(pmap
, start
, end
, PMAP_OPTIONS_REMOVE
);
4887 MARK_AS_PMAP_TEXT
static int
4888 pmap_remove_options_internal(
4890 vm_map_address_t start
,
4891 vm_map_address_t end
,
4894 int remove_count
= 0;
4895 pt_entry_t
*bpte
, *epte
;
4898 uint32_t rmv_spte
= 0;
4899 bool need_strong_sync
= false;
4900 bool flush_tte
= false;
4902 if (__improbable(end
< start
)) {
4903 panic("%s: invalid address range %p, %p", __func__
, (void*)start
, (void*)end
);
4906 VALIDATE_PMAP(pmap
);
4908 __unused
const pt_attr_t
* const pt_attr
= pmap_get_pt_attr(pmap
);
4912 tte_p
= pmap_tte(pmap
, start
);
4914 if (tte_p
== (tt_entry_t
*) NULL
) {
4918 if ((*tte_p
& ARM_TTE_TYPE_MASK
) == ARM_TTE_TYPE_TABLE
) {
4919 pte_p
= (pt_entry_t
*) ttetokv(*tte_p
);
4920 bpte
= &pte_p
[ptenum(start
)];
4921 epte
= bpte
+ ((end
- start
) >> pt_attr_leaf_shift(pt_attr
));
4923 remove_count
+= pmap_remove_range_options(pmap
, start
, bpte
, epte
,
4924 &rmv_spte
, &need_strong_sync
, options
);
4926 if (rmv_spte
&& (ptep_get_ptd(pte_p
)->ptd_info
[ARM_PT_DESC_INDEX(pte_p
)].refcnt
== 0) &&
4927 (pmap
!= kernel_pmap
) && (pmap
->nested
== FALSE
)) {
4928 pmap_tte_deallocate(pmap
, tte_p
, pt_attr_twig_level(pt_attr
));
4936 if (remove_count
> 0) {
4937 PMAP_UPDATE_TLBS(pmap
, start
, end
, need_strong_sync
);
4938 } else if (flush_tte
> 0) {
4939 pmap_get_pt_ops(pmap
)->flush_tlb_tte_async(start
, pmap
);
4942 return remove_count
;
4946 pmap_remove_options(
4948 vm_map_address_t start
,
4949 vm_map_address_t end
,
4952 int remove_count
= 0;
4953 vm_map_address_t va
;
4955 if (pmap
== PMAP_NULL
) {
4959 __unused
const pt_attr_t
* const pt_attr
= pmap_get_pt_attr(pmap
);
4961 PMAP_TRACE(2, PMAP_CODE(PMAP__REMOVE
) | DBG_FUNC_START
,
4962 VM_KERNEL_ADDRHIDE(pmap
), VM_KERNEL_ADDRHIDE(start
),
4963 VM_KERNEL_ADDRHIDE(end
));
4966 if ((start
| end
) & PAGE_MASK
) {
4967 panic("pmap_remove_options() pmap %p start 0x%llx end 0x%llx\n",
4968 pmap
, (uint64_t)start
, (uint64_t)end
);
4970 if ((end
< start
) || (start
< pmap
->min
) || (end
> pmap
->max
)) {
4971 panic("pmap_remove_options(): invalid address range, pmap=%p, start=0x%llx, end=0x%llx\n",
4972 pmap
, (uint64_t)start
, (uint64_t)end
);
4977 * Invalidate the translation buffer first
4983 l
= ((va
+ pt_attr_twig_size(pt_attr
)) & ~pt_attr_twig_offmask(pt_attr
));
4988 remove_count
+= pmap_remove_options_internal(pmap
, va
, l
, options
);
4993 PMAP_TRACE(2, PMAP_CODE(PMAP__REMOVE
) | DBG_FUNC_END
);
4998 * Remove phys addr if mapped in specified map
5001 pmap_remove_some_phys(
5002 __unused pmap_t map
,
5003 __unused ppnum_t pn
)
5005 /* Implement to support working set code */
5011 #if !__ARM_USER_PROTECT__
5017 #if __ARM_USER_PROTECT__
5018 if (pmap
->tte_index_max
== NTTES
) {
5019 thread
->machine
.uptw_ttc
= 2;
5021 thread
->machine
.uptw_ttc
= 1;
5023 thread
->machine
.uptw_ttb
= ((unsigned int) pmap
->ttep
) | TTBR_SETUP
;
5024 thread
->machine
.asid
= pmap
->hw_asid
;
5029 pmap_flush_core_tlb_asid(pmap_t pmap
)
5031 #if (__ARM_VMSA__ == 7)
5032 flush_core_tlb_asid(pmap
->hw_asid
);
5034 flush_core_tlb_asid(((uint64_t) pmap
->hw_asid
) << TLBI_ASID_SHIFT
);
5038 MARK_AS_PMAP_TEXT
static void
5039 pmap_switch_internal(
5042 VALIDATE_PMAP(pmap
);
5043 pmap_cpu_data_t
*cpu_data_ptr
= pmap_get_cpu_data();
5044 uint16_t asid_index
= pmap
->hw_asid
;
5045 boolean_t do_asid_flush
= FALSE
;
5047 #if __ARM_KERNEL_PROTECT__
5051 #if (__ARM_VMSA__ > 7)
5052 pmap_t last_nested_pmap
= cpu_data_ptr
->cpu_nested_pmap
;
5055 #if MAX_ASID > MAX_HW_ASID
5056 if (asid_index
> 0) {
5059 assert(asid_index
< (sizeof(cpu_data_ptr
->cpu_asid_high_bits
) / sizeof(*cpu_data_ptr
->cpu_asid_high_bits
)));
5061 /* Extract the "virtual" bits of the ASIDs (which could cause us to alias). */
5062 uint8_t asid_high_bits
= pmap
->sw_asid
;
5063 uint8_t last_asid_high_bits
= cpu_data_ptr
->cpu_asid_high_bits
[asid_index
];
5065 if (asid_high_bits
!= last_asid_high_bits
) {
5067 * If the virtual ASID of the new pmap does not match the virtual ASID
5068 * last seen on this CPU for the physical ASID (that was a mouthful),
5069 * then this switch runs the risk of aliasing. We need to flush the
5070 * TLB for this phyiscal ASID in this case.
5072 cpu_data_ptr
->cpu_asid_high_bits
[asid_index
] = asid_high_bits
;
5073 do_asid_flush
= TRUE
;
5076 #endif /* MAX_ASID > MAX_HW_ASID */
5078 pmap_switch_user_ttb_internal(pmap
);
5080 #if (__ARM_VMSA__ > 7)
5081 /* If we're switching to a different nested pmap (i.e. shared region), we'll need
5082 * to flush the userspace mappings for that region. Those mappings are global
5083 * and will not be protected by the ASID. It should also be cheaper to flush the
5084 * entire local TLB rather than to do a broadcast MMU flush by VA region. */
5085 if ((pmap
!= kernel_pmap
) && (last_nested_pmap
!= NULL
) && (pmap
->nested_pmap
!= last_nested_pmap
)) {
5089 if (do_asid_flush
) {
5090 pmap_flush_core_tlb_asid(pmap
);
5091 #if DEVELOPMENT || DEBUG
5092 os_atomic_inc(&pmap_asid_flushes
, relaxed
);
5101 PMAP_TRACE(1, PMAP_CODE(PMAP__SWITCH
) | DBG_FUNC_START
, VM_KERNEL_ADDRHIDE(pmap
), PMAP_VASID(pmap
), pmap
->hw_asid
);
5102 pmap_switch_internal(pmap
);
5103 PMAP_TRACE(1, PMAP_CODE(PMAP__SWITCH
) | DBG_FUNC_END
);
5111 pmap_page_protect_options(ppnum
, prot
, 0, NULL
);
5115 * Routine: pmap_page_protect_options
5118 * Lower the permission for all mappings to a given
5121 MARK_AS_PMAP_TEXT
static void
5122 pmap_page_protect_options_internal(
5125 unsigned int options
)
5127 pmap_paddr_t phys
= ptoa(ppnum
);
5129 pv_entry_t
**pve_pp
;
5134 pv_entry_t
*new_pve_p
;
5135 pt_entry_t
*new_pte_p
;
5136 vm_offset_t pvh_flags
;
5140 boolean_t tlb_flush_needed
= FALSE
;
5141 unsigned int pvh_cnt
= 0;
5143 assert(ppnum
!= vm_page_fictitious_addr
);
5145 /* Only work with managed pages. */
5146 if (!pa_valid(phys
)) {
5151 * Determine the new protection.
5155 return; /* nothing to do */
5157 case VM_PROT_READ
| VM_PROT_EXECUTE
:
5165 pai
= (int)pa_index(phys
);
5167 pv_h
= pai_to_pvh(pai
);
5168 pvh_flags
= pvh_get_flags(pv_h
);
5171 pte_p
= PT_ENTRY_NULL
;
5172 pve_p
= PV_ENTRY_NULL
;
5174 pveh_p
= PV_ENTRY_NULL
;
5175 pvet_p
= PV_ENTRY_NULL
;
5176 new_pve_p
= PV_ENTRY_NULL
;
5177 new_pte_p
= PT_ENTRY_NULL
;
5178 if (pvh_test_type(pv_h
, PVH_TYPE_PTEP
)) {
5179 pte_p
= pvh_ptep(pv_h
);
5180 } else if (pvh_test_type(pv_h
, PVH_TYPE_PVEP
)) {
5181 pve_p
= pvh_list(pv_h
);
5185 while ((pve_p
!= PV_ENTRY_NULL
) || (pte_p
!= PT_ENTRY_NULL
)) {
5186 vm_map_address_t va
;
5189 boolean_t update
= FALSE
;
5191 if (pve_p
!= PV_ENTRY_NULL
) {
5192 pte_p
= pve_get_ptep(pve_p
);
5195 #ifdef PVH_FLAG_IOMMU
5196 if ((vm_offset_t
)pte_p
& PVH_FLAG_IOMMU
) {
5198 if (options
& PMAP_OPTIONS_COMPRESSOR
) {
5199 panic("pmap_page_protect: attempt to compress ppnum 0x%x owned by iommu 0x%llx, pve_p=%p",
5200 ppnum
, (uint64_t)pte_p
& ~PVH_FLAG_IOMMU
, pve_p
);
5202 if (pve_p
!= PV_ENTRY_NULL
) {
5203 pv_entry_t
*temp_pve_p
= PVE_NEXT_PTR(pve_next(pve_p
));
5204 pvh_remove(pv_h
, pve_pp
, pve_p
);
5205 pveh_p
= pvh_list(pv_h
);
5206 pve_next(pve_p
) = new_pve_p
;
5215 goto protect_skip_pve
;
5218 pmap
= ptep_get_pmap(pte_p
);
5219 va
= ptep_get_va(pte_p
);
5221 if (pte_p
== PT_ENTRY_NULL
) {
5222 panic("pmap_page_protect: pmap=%p prot=%d options=%u, pv_h=%p, pveh_p=%p, pve_p=%p, va=0x%llx ppnum: 0x%x\n",
5223 pmap
, prot
, options
, pv_h
, pveh_p
, pve_p
, (uint64_t)va
, ppnum
);
5224 } else if ((pmap
== NULL
) || (atop(pte_to_pa(*pte_p
)) != ppnum
)) {
5226 if (kern_feature_override(KF_PMAPV_OVRD
) == FALSE
) {
5227 pv_entry_t
*check_pve_p
= pveh_p
;
5228 while (check_pve_p
!= PV_ENTRY_NULL
) {
5229 if ((check_pve_p
!= pve_p
) && (pve_get_ptep(check_pve_p
) == pte_p
)) {
5230 panic("pmap_page_protect: duplicate pve entry pte_p=%p pmap=%p prot=%d options=%u, pv_h=%p, pveh_p=%p, pve_p=%p, pte=0x%llx, va=0x%llx ppnum: 0x%x\n",
5231 pte_p
, pmap
, prot
, options
, pv_h
, pveh_p
, pve_p
, (uint64_t)*pte_p
, (uint64_t)va
, ppnum
);
5233 check_pve_p
= PVE_NEXT_PTR(pve_next(check_pve_p
));
5237 panic("pmap_page_protect: bad pve entry pte_p=%p pmap=%p prot=%d options=%u, pv_h=%p, pveh_p=%p, pve_p=%p, pte=0x%llx, va=0x%llx ppnum: 0x%x\n",
5238 pte_p
, pmap
, prot
, options
, pv_h
, pveh_p
, pve_p
, (uint64_t)*pte_p
, (uint64_t)va
, ppnum
);
5241 #if DEVELOPMENT || DEBUG
5242 if ((prot
& VM_PROT_EXECUTE
) || !nx_enabled
|| !pmap
->nx_enabled
)
5244 if ((prot
& VM_PROT_EXECUTE
))
5246 { set_NX
= FALSE
;} else {
5250 /* Remove the mapping if new protection is NONE */
5252 boolean_t is_altacct
= FALSE
;
5254 if (IS_ALTACCT_PAGE(pai
, pve_p
)) {
5260 if (pte_is_wired(*pte_p
)) {
5261 pte_set_wired(pte_p
, 0);
5262 if (pmap
!= kernel_pmap
) {
5263 pmap_ledger_debit(pmap
, task_ledgers
.wired_mem
, PAGE_SIZE
);
5264 OSAddAtomic(-1, (SInt32
*) &pmap
->stats
.wired_count
);
5268 if (*pte_p
!= ARM_PTE_TYPE_FAULT
&&
5269 pmap
!= kernel_pmap
&&
5270 (options
& PMAP_OPTIONS_COMPRESSOR
) &&
5271 IS_INTERNAL_PAGE(pai
)) {
5272 assert(!ARM_PTE_IS_COMPRESSED(*pte_p
, pte_p
));
5273 /* mark this PTE as having been "compressed" */
5274 tmplate
= ARM_PTE_COMPRESSED
;
5276 tmplate
|= ARM_PTE_COMPRESSED_ALT
;
5280 tmplate
= ARM_PTE_TYPE_FAULT
;
5283 if ((*pte_p
!= ARM_PTE_TYPE_FAULT
) &&
5284 tmplate
== ARM_PTE_TYPE_FAULT
&&
5285 (pmap
!= kernel_pmap
)) {
5286 if (OSAddAtomic16(-1, (SInt16
*) &(ptep_get_ptd(pte_p
)->ptd_info
[ARM_PT_DESC_INDEX(pte_p
)].refcnt
)) <= 0) {
5287 panic("pmap_page_protect_options(): over-release of ptdp %p for pte %p\n", ptep_get_ptd(pte_p
), pte_p
);
5291 if (*pte_p
!= tmplate
) {
5292 WRITE_PTE_STRONG(pte_p
, tmplate
);
5296 pmap_ledger_debit(pmap
, task_ledgers
.phys_mem
, PAGE_SIZE
);
5297 OSAddAtomic(-1, (SInt32
*) &pmap
->stats
.resident_count
);
5301 * We only ever compress internal pages.
5303 if (options
& PMAP_OPTIONS_COMPRESSOR
) {
5304 assert(IS_INTERNAL_PAGE(pai
));
5308 if (pmap
!= kernel_pmap
) {
5309 if (IS_REUSABLE_PAGE(pai
) &&
5310 IS_INTERNAL_PAGE(pai
) &&
5312 PMAP_STATS_ASSERTF(pmap
->stats
.reusable
> 0, pmap
, "stats.reusable %d", pmap
->stats
.reusable
);
5313 OSAddAtomic(-1, &pmap
->stats
.reusable
);
5314 } else if (IS_INTERNAL_PAGE(pai
)) {
5315 PMAP_STATS_ASSERTF(pmap
->stats
.internal
> 0, pmap
, "stats.internal %d", pmap
->stats
.internal
);
5316 OSAddAtomic(-1, &pmap
->stats
.internal
);
5318 PMAP_STATS_ASSERTF(pmap
->stats
.external
> 0, pmap
, "stats.external %d", pmap
->stats
.external
);
5319 OSAddAtomic(-1, &pmap
->stats
.external
);
5321 if ((options
& PMAP_OPTIONS_COMPRESSOR
) &&
5322 IS_INTERNAL_PAGE(pai
)) {
5323 /* adjust "compressed" stats */
5324 OSAddAtomic64(+1, &pmap
->stats
.compressed
);
5325 PMAP_STATS_PEAK(pmap
->stats
.compressed
);
5326 pmap
->stats
.compressed_lifetime
++;
5329 if (IS_ALTACCT_PAGE(pai
, pve_p
)) {
5330 assert(IS_INTERNAL_PAGE(pai
));
5331 pmap_ledger_debit(pmap
, task_ledgers
.internal
, PAGE_SIZE
);
5332 pmap_ledger_debit(pmap
, task_ledgers
.alternate_accounting
, PAGE_SIZE
);
5333 if (options
& PMAP_OPTIONS_COMPRESSOR
) {
5334 pmap_ledger_credit(pmap
, task_ledgers
.internal_compressed
, PAGE_SIZE
);
5335 pmap_ledger_credit(pmap
, task_ledgers
.alternate_accounting_compressed
, PAGE_SIZE
);
5339 * Cleanup our marker before
5340 * we free this pv_entry.
5342 CLR_ALTACCT_PAGE(pai
, pve_p
);
5343 } else if (IS_REUSABLE_PAGE(pai
)) {
5344 assert(IS_INTERNAL_PAGE(pai
));
5345 if (options
& PMAP_OPTIONS_COMPRESSOR
) {
5346 pmap_ledger_credit(pmap
, task_ledgers
.internal_compressed
, PAGE_SIZE
);
5347 /* was not in footprint, but is now */
5348 pmap_ledger_credit(pmap
, task_ledgers
.phys_footprint
, PAGE_SIZE
);
5350 } else if (IS_INTERNAL_PAGE(pai
)) {
5351 pmap_ledger_debit(pmap
, task_ledgers
.internal
, PAGE_SIZE
);
5354 * Update all stats related to physical footprint, which only
5355 * deals with internal pages.
5357 if (options
& PMAP_OPTIONS_COMPRESSOR
) {
5359 * This removal is only being done so we can send this page to
5360 * the compressor; therefore it mustn't affect total task footprint.
5362 pmap_ledger_credit(pmap
, task_ledgers
.internal_compressed
, PAGE_SIZE
);
5365 * This internal page isn't going to the compressor, so adjust stats to keep
5366 * phys_footprint up to date.
5368 pmap_ledger_debit(pmap
, task_ledgers
.phys_footprint
, PAGE_SIZE
);
5371 /* external page: no impact on ledgers */
5375 if (pve_p
!= PV_ENTRY_NULL
) {
5376 assert(pve_next(pve_p
) == PVE_NEXT_PTR(pve_next(pve_p
)));
5380 const pt_attr_t
*const pt_attr
= pmap_get_pt_attr(pmap
);
5384 if (pmap
== kernel_pmap
) {
5385 tmplate
= ((spte
& ~ARM_PTE_APMASK
) | ARM_PTE_AP(AP_RONA
));
5387 tmplate
= ((spte
& ~ARM_PTE_APMASK
) | pt_attr_leaf_ro(pt_attr
));
5390 pte_set_was_writeable(tmplate
, false);
5392 * While the naive implementation of this would serve to add execute
5393 * permission, this is not how the VM uses this interface, or how
5394 * x86_64 implements it. So ignore requests to add execute permissions.
5397 tmplate
|= pt_attr_leaf_xn(pt_attr
);
5401 if (*pte_p
!= ARM_PTE_TYPE_FAULT
&&
5402 !ARM_PTE_IS_COMPRESSED(*pte_p
, pte_p
) &&
5403 *pte_p
!= tmplate
) {
5404 WRITE_PTE_STRONG(pte_p
, tmplate
);
5409 /* Invalidate TLBs for all CPUs using it */
5411 tlb_flush_needed
= TRUE
;
5412 pmap_get_pt_ops(pmap
)->flush_tlb_region_async(va
, PAGE_SIZE
, pmap
);
5415 #ifdef PVH_FLAG_IOMMU
5418 pte_p
= PT_ENTRY_NULL
;
5420 if (pve_p
!= PV_ENTRY_NULL
) {
5422 assert(pve_next(pve_p
) == PVE_NEXT_PTR(pve_next(pve_p
)));
5424 pve_pp
= pve_link_field(pve_p
);
5425 pve_p
= PVE_NEXT_PTR(pve_next(pve_p
));
5429 #ifdef PVH_FLAG_EXEC
5430 if (remove
&& (pvh_get_flags(pv_h
) & PVH_FLAG_EXEC
)) {
5431 pmap_set_ptov_ap(pai
, AP_RWNA
, tlb_flush_needed
);
5434 if (tlb_flush_needed
) {
5438 /* if we removed a bunch of entries, take care of them now */
5440 if (new_pve_p
!= PV_ENTRY_NULL
) {
5441 pvh_update_head(pv_h
, new_pve_p
, PVH_TYPE_PVEP
);
5442 pvh_set_flags(pv_h
, pvh_flags
);
5443 } else if (new_pte_p
!= PT_ENTRY_NULL
) {
5444 pvh_update_head(pv_h
, new_pte_p
, PVH_TYPE_PTEP
);
5445 pvh_set_flags(pv_h
, pvh_flags
);
5447 pvh_update_head(pv_h
, PV_ENTRY_NULL
, PVH_TYPE_NULL
);
5453 if (remove
&& (pvet_p
!= PV_ENTRY_NULL
)) {
5454 pv_list_free(pveh_p
, pvet_p
, pvh_cnt
);
5459 pmap_page_protect_options(
5462 unsigned int options
,
5465 pmap_paddr_t phys
= ptoa(ppnum
);
5467 assert(ppnum
!= vm_page_fictitious_addr
);
5469 /* Only work with managed pages. */
5470 if (!pa_valid(phys
)) {
5475 * Determine the new protection.
5477 if (prot
== VM_PROT_ALL
) {
5478 return; /* nothing to do */
5481 PMAP_TRACE(2, PMAP_CODE(PMAP__PAGE_PROTECT
) | DBG_FUNC_START
, ppnum
, prot
);
5483 pmap_page_protect_options_internal(ppnum
, prot
, options
);
5485 PMAP_TRACE(2, PMAP_CODE(PMAP__PAGE_PROTECT
) | DBG_FUNC_END
);
5489 * Indicates if the pmap layer enforces some additional restrictions on the
5490 * given set of protections.
5493 pmap_has_prot_policy(__unused vm_prot_t prot
)
5499 * Set the physical protection on the
5500 * specified range of this map as requested.
5501 * VERY IMPORTANT: Will not increase permissions.
5502 * VERY IMPORTANT: Only pmap_enter() is allowed to grant permissions.
5511 pmap_protect_options(pmap
, b
, e
, prot
, 0, NULL
);
5514 MARK_AS_PMAP_TEXT
static void
5515 pmap_protect_options_internal(
5517 vm_map_address_t start
,
5518 vm_map_address_t end
,
5520 unsigned int options
,
5521 __unused
void *args
)
5523 const pt_attr_t
*const pt_attr
= pmap_get_pt_attr(pmap
);
5525 pt_entry_t
*bpte_p
, *epte_p
;
5527 boolean_t set_NX
= TRUE
;
5528 #if (__ARM_VMSA__ > 7)
5529 boolean_t set_XO
= FALSE
;
5531 boolean_t should_have_removed
= FALSE
;
5532 bool need_strong_sync
= false;
5534 if (__improbable(end
< start
)) {
5535 panic("%s called with bogus range: %p, %p", __func__
, (void*)start
, (void*)end
);
5538 #if DEVELOPMENT || DEBUG
5539 if (options
& PMAP_OPTIONS_PROTECT_IMMEDIATE
) {
5540 if ((prot
& VM_PROT_ALL
) == VM_PROT_NONE
) {
5541 should_have_removed
= TRUE
;
5546 /* Determine the new protection. */
5548 #if (__ARM_VMSA__ > 7)
5549 case VM_PROT_EXECUTE
:
5554 case VM_PROT_READ
| VM_PROT_EXECUTE
:
5556 case VM_PROT_READ
| VM_PROT_WRITE
:
5558 return; /* nothing to do */
5560 should_have_removed
= TRUE
;
5564 if (should_have_removed
) {
5565 panic("%s: should have been a remove operation, "
5566 "pmap=%p, start=%p, end=%p, prot=%#x, options=%#x, args=%p",
5568 pmap
, (void *)start
, (void *)end
, prot
, options
, args
);
5571 #if DEVELOPMENT || DEBUG
5572 if ((prot
& VM_PROT_EXECUTE
) || !nx_enabled
|| !pmap
->nx_enabled
)
5574 if ((prot
& VM_PROT_EXECUTE
))
5582 VALIDATE_PMAP(pmap
);
5584 tte_p
= pmap_tte(pmap
, start
);
5586 if ((tte_p
!= (tt_entry_t
*) NULL
) && (*tte_p
& ARM_TTE_TYPE_MASK
) == ARM_TTE_TYPE_TABLE
) {
5587 bpte_p
= (pt_entry_t
*) ttetokv(*tte_p
);
5588 bpte_p
= &bpte_p
[ptenum(start
)];
5589 epte_p
= bpte_p
+ arm_atop(end
- start
);
5592 for (pte_p
= bpte_p
;
5594 pte_p
+= PAGE_SIZE
/ ARM_PGBYTES
) {
5596 #if DEVELOPMENT || DEBUG
5597 boolean_t force_write
= FALSE
;
5602 if ((spte
== ARM_PTE_TYPE_FAULT
) ||
5603 ARM_PTE_IS_COMPRESSED(spte
, pte_p
)) {
5609 boolean_t managed
= FALSE
;
5613 * It may be possible for the pte to transition from managed
5614 * to unmanaged in this timeframe; for now, elide the assert.
5615 * We should break out as a consequence of checking pa_valid.
5617 // assert(!ARM_PTE_IS_COMPRESSED(spte));
5618 pa
= pte_to_pa(spte
);
5619 if (!pa_valid(pa
)) {
5622 pai
= (int)pa_index(pa
);
5625 pa
= pte_to_pa(spte
);
5626 if (pai
== (int)pa_index(pa
)) {
5628 break; // Leave the PVH locked as we will unlock it after we free the PTE
5633 if ((spte
== ARM_PTE_TYPE_FAULT
) ||
5634 ARM_PTE_IS_COMPRESSED(spte
, pte_p
)) {
5640 if (pmap
== kernel_pmap
) {
5641 #if DEVELOPMENT || DEBUG
5642 if ((options
& PMAP_OPTIONS_PROTECT_IMMEDIATE
) && (prot
& VM_PROT_WRITE
)) {
5644 tmplate
= ((spte
& ~ARM_PTE_APMASK
) | ARM_PTE_AP(AP_RWNA
));
5648 tmplate
= ((spte
& ~ARM_PTE_APMASK
) | ARM_PTE_AP(AP_RONA
));
5651 #if DEVELOPMENT || DEBUG
5652 if ((options
& PMAP_OPTIONS_PROTECT_IMMEDIATE
) && (prot
& VM_PROT_WRITE
)) {
5654 tmplate
= ((spte
& ~ARM_PTE_APMASK
) | pt_attr_leaf_rw(pt_attr
));
5658 tmplate
= ((spte
& ~ARM_PTE_APMASK
) | pt_attr_leaf_ro(pt_attr
));
5663 * XXX Removing "NX" would
5664 * grant "execute" access
5665 * immediately, bypassing any
5666 * checks VM might want to do
5667 * in its soft fault path.
5668 * pmap_protect() and co. are
5669 * not allowed to increase
5670 * access permissions.
5673 tmplate
|= pt_attr_leaf_xn(pt_attr
);
5675 #if (__ARM_VMSA__ > 7)
5676 if (pmap
== kernel_pmap
) {
5677 /* do NOT clear "PNX"! */
5678 tmplate
|= ARM_PTE_NX
;
5680 /* do NOT clear "NX"! */
5681 tmplate
|= pt_attr_leaf_x(pt_attr
);
5683 tmplate
&= ~ARM_PTE_APMASK
;
5684 tmplate
|= pt_attr_leaf_rona(pt_attr
);
5690 #if DEVELOPMENT || DEBUG
5693 * TODO: Run CS/Monitor checks here.
5697 * We are marking the page as writable,
5698 * so we consider it to be modified and
5701 pa_set_bits(pa
, PP_ATTR_REFERENCED
| PP_ATTR_MODIFIED
);
5702 tmplate
|= ARM_PTE_AF
;
5704 if (IS_REFFAULT_PAGE(pai
)) {
5705 CLR_REFFAULT_PAGE(pai
);
5708 if (IS_MODFAULT_PAGE(pai
)) {
5709 CLR_MODFAULT_PAGE(pai
);
5712 } else if (options
& PMAP_OPTIONS_PROTECT_IMMEDIATE
) {
5714 * An immediate request for anything other than
5715 * write should still mark the page as
5716 * referenced if managed.
5719 pa_set_bits(pa
, PP_ATTR_REFERENCED
);
5720 tmplate
|= ARM_PTE_AF
;
5722 if (IS_REFFAULT_PAGE(pai
)) {
5723 CLR_REFFAULT_PAGE(pai
);
5729 /* We do not expect to write fast fault the entry. */
5730 pte_set_was_writeable(tmplate
, false);
5732 WRITE_PTE_FAST(pte_p
, tmplate
);
5735 ASSERT_PVH_LOCKED(pai
);
5739 FLUSH_PTE_RANGE_STRONG(bpte_p
, epte_p
);
5740 PMAP_UPDATE_TLBS(pmap
, start
, end
, need_strong_sync
);
5747 pmap_protect_options(
5752 unsigned int options
,
5753 __unused
void *args
)
5755 vm_map_address_t l
, beg
;
5757 __unused
const pt_attr_t
* const pt_attr
= pmap_get_pt_attr(pmap
);
5759 if ((b
| e
) & PAGE_MASK
) {
5760 panic("pmap_protect_options() pmap %p start 0x%llx end 0x%llx\n",
5761 pmap
, (uint64_t)b
, (uint64_t)e
);
5764 #if DEVELOPMENT || DEBUG
5765 if (options
& PMAP_OPTIONS_PROTECT_IMMEDIATE
) {
5766 if ((prot
& VM_PROT_ALL
) == VM_PROT_NONE
) {
5767 pmap_remove_options(pmap
, b
, e
, options
);
5773 /* Determine the new protection. */
5775 case VM_PROT_EXECUTE
:
5777 case VM_PROT_READ
| VM_PROT_EXECUTE
:
5779 case VM_PROT_READ
| VM_PROT_WRITE
:
5781 return; /* nothing to do */
5783 pmap_remove_options(pmap
, b
, e
, options
);
5788 PMAP_TRACE(2, PMAP_CODE(PMAP__PROTECT
) | DBG_FUNC_START
,
5789 VM_KERNEL_ADDRHIDE(pmap
), VM_KERNEL_ADDRHIDE(b
),
5790 VM_KERNEL_ADDRHIDE(e
));
5795 l
= ((beg
+ pt_attr_twig_size(pt_attr
)) & ~pt_attr_twig_offmask(pt_attr
));
5801 pmap_protect_options_internal(pmap
, beg
, l
, prot
, options
, args
);
5806 PMAP_TRACE(2, PMAP_CODE(PMAP__PROTECT
) | DBG_FUNC_END
);
5809 /* Map a (possibly) autogenned block */
5818 __unused
unsigned int flags
)
5821 addr64_t original_va
= va
;
5824 for (page
= 0; page
< size
; page
++) {
5825 kr
= pmap_enter(pmap
, va
, pa
, prot
, VM_PROT_NONE
, attr
, TRUE
);
5827 if (kr
!= KERN_SUCCESS
) {
5829 * This will panic for now, as it is unclear that
5830 * removing the mappings is correct.
5832 panic("%s: failed pmap_enter, "
5833 "pmap=%p, va=%#llx, pa=%u, size=%u, prot=%#x, flags=%#x",
5835 pmap
, va
, pa
, size
, prot
, flags
);
5837 pmap_remove(pmap
, original_va
, va
- original_va
);
5845 return KERN_SUCCESS
;
5849 * Insert the given physical page (p) at
5850 * the specified virtual address (v) in the
5851 * target physical map with the protection requested.
5853 * If specified, the page will be wired down, meaning
5854 * that the related pte can not be reclaimed.
5856 * NB: This is the only routine which MAY NOT lazy-evaluate
5857 * or lose information. That is, this routine must actually
5858 * insert this page into the given map eventually (must make
5859 * forward progress eventually.
5867 vm_prot_t fault_type
,
5871 return pmap_enter_options(pmap
, v
, pn
, prot
, fault_type
, flags
, wired
, 0, NULL
);
5876 pmap_enter_pte(pmap_t pmap
, pt_entry_t
*pte_p
, pt_entry_t pte
, vm_map_address_t v
)
5878 if (pmap
!= kernel_pmap
&& ((pte
& ARM_PTE_WIRED
) != (*pte_p
& ARM_PTE_WIRED
))) {
5879 SInt16
*ptd_wiredcnt_ptr
= (SInt16
*)&(ptep_get_ptd(pte_p
)->ptd_info
[ARM_PT_DESC_INDEX(pte_p
)].wiredcnt
);
5880 if (pte
& ARM_PTE_WIRED
) {
5881 OSAddAtomic16(1, ptd_wiredcnt_ptr
);
5882 pmap_ledger_credit(pmap
, task_ledgers
.wired_mem
, PAGE_SIZE
);
5883 OSAddAtomic(1, (SInt32
*) &pmap
->stats
.wired_count
);
5885 OSAddAtomic16(-1, ptd_wiredcnt_ptr
);
5886 pmap_ledger_debit(pmap
, task_ledgers
.wired_mem
, PAGE_SIZE
);
5887 OSAddAtomic(-1, (SInt32
*) &pmap
->stats
.wired_count
);
5890 if (*pte_p
!= ARM_PTE_TYPE_FAULT
&&
5891 !ARM_PTE_IS_COMPRESSED(*pte_p
, pte_p
)) {
5892 WRITE_PTE_STRONG(pte_p
, pte
);
5893 PMAP_UPDATE_TLBS(pmap
, v
, v
+ PAGE_SIZE
, false);
5895 WRITE_PTE(pte_p
, pte
);
5896 __builtin_arm_isb(ISB_SY
);
5899 PMAP_TRACE(3, PMAP_CODE(PMAP__TTE
), VM_KERNEL_ADDRHIDE(pmap
), VM_KERNEL_ADDRHIDE(v
), VM_KERNEL_ADDRHIDE(v
+ PAGE_SIZE
), pte
);
5902 MARK_AS_PMAP_TEXT
static pt_entry_t
5903 wimg_to_pte(unsigned int wimg
)
5907 switch (wimg
& (VM_WIMG_MASK
)) {
5910 pte
= ARM_PTE_ATTRINDX(CACHE_ATTRINDX_DISABLE
);
5911 pte
|= ARM_PTE_NX
| ARM_PTE_PNX
;
5913 case VM_WIMG_POSTED
:
5914 pte
= ARM_PTE_ATTRINDX(CACHE_ATTRINDX_POSTED
);
5915 pte
|= ARM_PTE_NX
| ARM_PTE_PNX
;
5917 case VM_WIMG_POSTED_REORDERED
:
5918 pte
= ARM_PTE_ATTRINDX(CACHE_ATTRINDX_POSTED_REORDERED
);
5919 pte
|= ARM_PTE_NX
| ARM_PTE_PNX
;
5921 case VM_WIMG_POSTED_COMBINED_REORDERED
:
5922 pte
= ARM_PTE_ATTRINDX(CACHE_ATTRINDX_POSTED_COMBINED_REORDERED
);
5923 pte
|= ARM_PTE_NX
| ARM_PTE_PNX
;
5926 pte
= ARM_PTE_ATTRINDX(CACHE_ATTRINDX_WRITECOMB
);
5927 pte
|= ARM_PTE_NX
| ARM_PTE_PNX
;
5930 pte
= ARM_PTE_ATTRINDX(CACHE_ATTRINDX_WRITETHRU
);
5931 #if (__ARM_VMSA__ > 7)
5932 pte
|= ARM_PTE_SH(SH_OUTER_MEMORY
);
5937 case VM_WIMG_COPYBACK
:
5938 pte
= ARM_PTE_ATTRINDX(CACHE_ATTRINDX_WRITEBACK
);
5939 #if (__ARM_VMSA__ > 7)
5940 pte
|= ARM_PTE_SH(SH_OUTER_MEMORY
);
5945 case VM_WIMG_INNERWBACK
:
5946 pte
= ARM_PTE_ATTRINDX(CACHE_ATTRINDX_INNERWRITEBACK
);
5947 #if (__ARM_VMSA__ > 7)
5948 pte
|= ARM_PTE_SH(SH_INNER_MEMORY
);
5954 pte
= ARM_PTE_ATTRINDX(CACHE_ATTRINDX_DEFAULT
);
5955 #if (__ARM_VMSA__ > 7)
5956 pte
|= ARM_PTE_SH(SH_OUTER_MEMORY
);
5970 unsigned int options
,
5972 boolean_t
*is_altacct
)
5975 pv_h
= pai_to_pvh(pai
);
5976 boolean_t first_cpu_mapping
;
5978 ASSERT_PVH_LOCKED(pai
);
5980 vm_offset_t pvh_flags
= pvh_get_flags(pv_h
);
5984 /* An IOMMU mapping may already be present for a page that hasn't yet
5985 * had a CPU mapping established, so we use PVH_FLAG_CPU to determine
5986 * if this is the first CPU mapping. We base internal/reusable
5987 * accounting on the options specified for the first CPU mapping.
5988 * PVH_FLAG_CPU, and thus this accounting, will then persist as long
5989 * as there are *any* mappings of the page. The accounting for a
5990 * page should not need to change until the page is recycled by the
5991 * VM layer, and we assert that there are no mappings when a page
5992 * is recycled. An IOMMU mapping of a freed/recycled page is
5993 * considered a security violation & potential DMA corruption path.*/
5994 first_cpu_mapping
= ((pmap
!= NULL
) && !(pvh_flags
& PVH_FLAG_CPU
));
5995 if (first_cpu_mapping
) {
5996 pvh_flags
|= PVH_FLAG_CPU
;
5999 first_cpu_mapping
= pvh_test_type(pv_h
, PVH_TYPE_NULL
);
6002 if (first_cpu_mapping
) {
6003 if (options
& PMAP_OPTIONS_INTERNAL
) {
6004 SET_INTERNAL_PAGE(pai
);
6006 CLR_INTERNAL_PAGE(pai
);
6008 if ((options
& PMAP_OPTIONS_INTERNAL
) &&
6009 (options
& PMAP_OPTIONS_REUSABLE
)) {
6010 SET_REUSABLE_PAGE(pai
);
6012 CLR_REUSABLE_PAGE(pai
);
6015 if (pvh_test_type(pv_h
, PVH_TYPE_NULL
)) {
6016 pvh_update_head(pv_h
, pte_p
, PVH_TYPE_PTEP
);
6017 if (pmap
!= NULL
&& pmap
!= kernel_pmap
&&
6018 ((options
& PMAP_OPTIONS_ALT_ACCT
) ||
6019 PMAP_FOOTPRINT_SUSPENDED(pmap
)) &&
6020 IS_INTERNAL_PAGE(pai
)) {
6022 * Make a note to ourselves that this mapping is using alternative
6023 * accounting. We'll need this in order to know which ledger to
6024 * debit when the mapping is removed.
6026 * The altacct bit must be set while the pv head is locked. Defer
6027 * the ledger accounting until after we've dropped the lock.
6029 SET_ALTACCT_PAGE(pai
, PV_ENTRY_NULL
);
6032 CLR_ALTACCT_PAGE(pai
, PV_ENTRY_NULL
);
6035 if (pvh_test_type(pv_h
, PVH_TYPE_PTEP
)) {
6039 * convert pvh list from PVH_TYPE_PTEP to PVH_TYPE_PVEP
6041 pte1_p
= pvh_ptep(pv_h
);
6042 pvh_set_flags(pv_h
, pvh_flags
);
6043 if ((*pve_p
== PV_ENTRY_NULL
) && (!pv_alloc(pmap
, pai
, pve_p
))) {
6047 pve_set_ptep(*pve_p
, pte1_p
);
6048 (*pve_p
)->pve_next
= PV_ENTRY_NULL
;
6050 if (IS_ALTACCT_PAGE(pai
, PV_ENTRY_NULL
)) {
6052 * transfer "altacct" from
6053 * pp_attr to this pve
6055 CLR_ALTACCT_PAGE(pai
, PV_ENTRY_NULL
);
6056 SET_ALTACCT_PAGE(pai
, *pve_p
);
6058 pvh_update_head(pv_h
, *pve_p
, PVH_TYPE_PVEP
);
6059 *pve_p
= PV_ENTRY_NULL
;
6060 } else if (!pvh_test_type(pv_h
, PVH_TYPE_PVEP
)) {
6061 panic("%s: unexpected PV head %p, pte_p=%p pmap=%p pv_h=%p",
6062 __func__
, *pv_h
, pte_p
, pmap
, pv_h
);
6065 * Set up pv_entry for this new mapping and then
6066 * add it to the list for this physical page.
6068 pvh_set_flags(pv_h
, pvh_flags
);
6069 if ((*pve_p
== PV_ENTRY_NULL
) && (!pv_alloc(pmap
, pai
, pve_p
))) {
6073 pve_set_ptep(*pve_p
, pte_p
);
6074 (*pve_p
)->pve_next
= PV_ENTRY_NULL
;
6076 pvh_add(pv_h
, *pve_p
);
6078 if (pmap
!= NULL
&& pmap
!= kernel_pmap
&&
6079 ((options
& PMAP_OPTIONS_ALT_ACCT
) ||
6080 PMAP_FOOTPRINT_SUSPENDED(pmap
)) &&
6081 IS_INTERNAL_PAGE(pai
)) {
6083 * Make a note to ourselves that this
6084 * mapping is using alternative
6085 * accounting. We'll need this in order
6086 * to know which ledger to debit when
6087 * the mapping is removed.
6089 * The altacct bit must be set while
6090 * the pv head is locked. Defer the
6091 * ledger accounting until after we've
6094 SET_ALTACCT_PAGE(pai
, *pve_p
);
6098 *pve_p
= PV_ENTRY_NULL
;
6101 pvh_set_flags(pv_h
, pvh_flags
);
6106 MARK_AS_PMAP_TEXT
static kern_return_t
6107 pmap_enter_options_internal(
6112 vm_prot_t fault_type
,
6115 unsigned int options
)
6117 pmap_paddr_t pa
= ptoa(pn
);
6123 boolean_t set_XO
= FALSE
;
6124 boolean_t refcnt_updated
;
6125 boolean_t wiredcnt_updated
;
6126 unsigned int wimg_bits
;
6127 boolean_t was_compressed
, was_alt_compressed
;
6128 kern_return_t kr
= KERN_SUCCESS
;
6130 VALIDATE_PMAP(pmap
);
6132 __unused
const pt_attr_t
* const pt_attr
= pmap_get_pt_attr(pmap
);
6134 if ((v
) & PAGE_MASK
) {
6135 panic("pmap_enter_options() pmap %p v 0x%llx\n",
6139 if ((prot
& VM_PROT_EXECUTE
) && (prot
& VM_PROT_WRITE
) && (pmap
== kernel_pmap
)) {
6140 panic("pmap_enter_options(): WX request on kernel_pmap");
6143 #if DEVELOPMENT || DEBUG
6144 if ((prot
& VM_PROT_EXECUTE
) || !nx_enabled
|| !pmap
->nx_enabled
)
6146 if ((prot
& VM_PROT_EXECUTE
))
6148 { set_NX
= FALSE
;} else {
6152 #if (__ARM_VMSA__ > 7)
6153 if (prot
== VM_PROT_EXECUTE
) {
6158 assert(pn
!= vm_page_fictitious_addr
);
6160 refcnt_updated
= FALSE
;
6161 wiredcnt_updated
= FALSE
;
6162 pve_p
= PV_ENTRY_NULL
;
6163 was_compressed
= FALSE
;
6164 was_alt_compressed
= FALSE
;
6169 * Expand pmap to include this pte. Assume that
6170 * pmap is always expanded to include enough hardware
6171 * pages to map one VM page.
6173 while ((pte_p
= pmap_pte(pmap
, v
)) == PT_ENTRY_NULL
) {
6174 /* Must unlock to expand the pmap. */
6177 kr
= pmap_expand(pmap
, v
, options
, PMAP_TT_MAX_LEVEL
);
6179 if (kr
!= KERN_SUCCESS
) {
6186 if (options
& PMAP_OPTIONS_NOENTER
) {
6188 return KERN_SUCCESS
;
6195 if (ARM_PTE_IS_COMPRESSED(spte
, pte_p
)) {
6197 * "pmap" should be locked at this point, so this should
6198 * not race with another pmap_enter() or pmap_remove_range().
6200 assert(pmap
!= kernel_pmap
);
6202 /* one less "compressed" */
6203 OSAddAtomic64(-1, &pmap
->stats
.compressed
);
6204 pmap_ledger_debit(pmap
, task_ledgers
.internal_compressed
,
6207 was_compressed
= TRUE
;
6208 if (spte
& ARM_PTE_COMPRESSED_ALT
) {
6209 was_alt_compressed
= TRUE
;
6210 pmap_ledger_debit(pmap
, task_ledgers
.alternate_accounting_compressed
, PAGE_SIZE
);
6212 /* was part of the footprint */
6213 pmap_ledger_debit(pmap
, task_ledgers
.phys_footprint
, PAGE_SIZE
);
6216 /* clear "compressed" marker */
6217 /* XXX is it necessary since we're about to overwrite it ? */
6218 WRITE_PTE_FAST(pte_p
, ARM_PTE_TYPE_FAULT
);
6219 spte
= ARM_PTE_TYPE_FAULT
;
6222 * We're replacing a "compressed" marker with a valid PTE,
6223 * so no change for "refcnt".
6225 refcnt_updated
= TRUE
;
6228 if ((spte
!= ARM_PTE_TYPE_FAULT
) && (pte_to_pa(spte
) != pa
)) {
6229 pmap_remove_range(pmap
, v
, pte_p
, pte_p
+ 1, 0);
6232 pte
= pa_to_pte(pa
) | ARM_PTE_TYPE
;
6234 /* Don't bother tracking wiring for kernel PTEs. We use ARM_PTE_WIRED to track
6235 * wired memory statistics for user pmaps, but kernel PTEs are assumed
6236 * to be wired in nearly all cases. For VM layer functionality, the wired
6237 * count in vm_page_t is sufficient. */
6238 if (wired
&& pmap
!= kernel_pmap
) {
6239 pte
|= ARM_PTE_WIRED
;
6243 pte
|= pt_attr_leaf_xn(pt_attr
);
6245 #if (__ARM_VMSA__ > 7)
6246 if (pmap
== kernel_pmap
) {
6249 pte
|= pt_attr_leaf_x(pt_attr
);
6254 if (pmap
== kernel_pmap
) {
6255 #if __ARM_KERNEL_PROTECT__
6257 #endif /* __ARM_KERNEL_PROTECT__ */
6258 if (prot
& VM_PROT_WRITE
) {
6259 pte
|= ARM_PTE_AP(AP_RWNA
);
6260 pa_set_bits(pa
, PP_ATTR_MODIFIED
| PP_ATTR_REFERENCED
);
6262 pte
|= ARM_PTE_AP(AP_RONA
);
6263 pa_set_bits(pa
, PP_ATTR_REFERENCED
);
6265 #if (__ARM_VMSA__ == 7)
6266 if ((_COMM_PAGE_BASE_ADDRESS
<= v
) && (v
< _COMM_PAGE_BASE_ADDRESS
+ _COMM_PAGE_AREA_LENGTH
)) {
6267 pte
= (pte
& ~(ARM_PTE_APMASK
)) | ARM_PTE_AP(AP_RORO
);
6271 if (!pmap
->nested
) {
6273 } else if ((pmap
->nested_region_asid_bitmap
)
6274 && (v
>= pmap
->nested_region_subord_addr
)
6275 && (v
< (pmap
->nested_region_subord_addr
+ pmap
->nested_region_size
))) {
6276 unsigned int index
= (unsigned int)((v
- pmap
->nested_region_subord_addr
) >> pt_attr_twig_shift(pt_attr
));
6278 if ((pmap
->nested_region_asid_bitmap
)
6279 && testbit(index
, (int *)pmap
->nested_region_asid_bitmap
)) {
6284 if (pmap
->nested_pmap
!= NULL
) {
6285 vm_map_address_t nest_vaddr
;
6286 pt_entry_t
*nest_pte_p
;
6288 nest_vaddr
= v
- pmap
->nested_region_grand_addr
+ pmap
->nested_region_subord_addr
;
6290 if ((nest_vaddr
>= pmap
->nested_region_subord_addr
)
6291 && (nest_vaddr
< (pmap
->nested_region_subord_addr
+ pmap
->nested_region_size
))
6292 && ((nest_pte_p
= pmap_pte(pmap
->nested_pmap
, nest_vaddr
)) != PT_ENTRY_NULL
)
6293 && (*nest_pte_p
!= ARM_PTE_TYPE_FAULT
)
6294 && (!ARM_PTE_IS_COMPRESSED(*nest_pte_p
, nest_pte_p
))
6295 && (((*nest_pte_p
) & ARM_PTE_NG
) != ARM_PTE_NG
)) {
6296 unsigned int index
= (unsigned int)((v
- pmap
->nested_region_subord_addr
) >> pt_attr_twig_shift(pt_attr
));
6298 if ((pmap
->nested_pmap
->nested_region_asid_bitmap
)
6299 && !testbit(index
, (int *)pmap
->nested_pmap
->nested_region_asid_bitmap
)) {
6300 panic("pmap_enter(): Global attribute conflict nest_pte_p=%p pmap=%p v=0x%llx spte=0x%llx \n",
6301 nest_pte_p
, pmap
, (uint64_t)v
, (uint64_t)*nest_pte_p
);
6306 if (prot
& VM_PROT_WRITE
) {
6307 if (pa_valid(pa
) && (!pa_test_bits(pa
, PP_ATTR_MODIFIED
))) {
6308 if (fault_type
& VM_PROT_WRITE
) {
6310 pte
|= pt_attr_leaf_rwna(pt_attr
);
6312 pte
|= pt_attr_leaf_rw(pt_attr
);
6314 pa_set_bits(pa
, PP_ATTR_REFERENCED
| PP_ATTR_MODIFIED
);
6317 pte
|= pt_attr_leaf_rona(pt_attr
);
6319 pte
|= pt_attr_leaf_ro(pt_attr
);
6321 pa_set_bits(pa
, PP_ATTR_REFERENCED
);
6322 pte_set_was_writeable(pte
, true);
6326 pte
|= pt_attr_leaf_rwna(pt_attr
);
6328 pte
|= pt_attr_leaf_rw(pt_attr
);
6330 pa_set_bits(pa
, PP_ATTR_REFERENCED
);
6334 pte
|= pt_attr_leaf_rona(pt_attr
);
6336 pte
|= pt_attr_leaf_ro(pt_attr
);;
6338 pa_set_bits(pa
, PP_ATTR_REFERENCED
);
6344 volatile uint16_t *refcnt
= NULL
;
6345 volatile uint16_t *wiredcnt
= NULL
;
6346 if (pmap
!= kernel_pmap
) {
6347 refcnt
= &(ptep_get_ptd(pte_p
)->ptd_info
[ARM_PT_DESC_INDEX(pte_p
)].refcnt
);
6348 wiredcnt
= &(ptep_get_ptd(pte_p
)->ptd_info
[ARM_PT_DESC_INDEX(pte_p
)].wiredcnt
);
6349 /* Bump the wired count to keep the PTE page from being reclaimed. We need this because
6350 * we may drop the PVH and pmap locks later in pmap_enter() if we need to allocate
6351 * a new PV entry. */
6352 if (!wiredcnt_updated
) {
6353 OSAddAtomic16(1, (volatile int16_t*)wiredcnt
);
6354 wiredcnt_updated
= TRUE
;
6356 if (!refcnt_updated
) {
6357 OSAddAtomic16(1, (volatile int16_t*)refcnt
);
6358 refcnt_updated
= TRUE
;
6364 boolean_t is_altacct
, is_internal
;
6366 is_internal
= FALSE
;
6369 pai
= (int)pa_index(pa
);
6374 if ((flags
& (VM_WIMG_MASK
| VM_WIMG_USE_DEFAULT
))) {
6375 wimg_bits
= (flags
& (VM_WIMG_MASK
| VM_WIMG_USE_DEFAULT
));
6377 wimg_bits
= pmap_cache_attributes(pn
);
6380 /* We may be retrying this operation after dropping the PVH lock.
6381 * Cache attributes for the physical page may have changed while the lock
6382 * was dropped, so clear any cache attributes we may have previously set
6383 * in the PTE template. */
6384 pte
&= ~(ARM_PTE_ATTRINDXMASK
| ARM_PTE_SHMASK
);
6385 pte
|= pmap_get_pt_ops(pmap
)->wimg_to_pte(wimg_bits
);
6389 if (pte
== *pte_p
) {
6391 * This pmap_enter operation has been completed by another thread
6392 * undo refcnt on pt and return
6395 goto Pmap_enter_cleanup
;
6396 } else if (pte_to_pa(*pte_p
) == pa
) {
6397 pmap_enter_pte(pmap
, pte_p
, pte
, v
);
6399 goto Pmap_enter_cleanup
;
6400 } else if (*pte_p
!= ARM_PTE_TYPE_FAULT
) {
6402 * pte has been modified by another thread
6403 * hold refcnt on pt and retry pmap_enter operation
6406 goto Pmap_enter_retry
;
6408 if (!pmap_enter_pv(pmap
, pte_p
, pai
, options
, &pve_p
, &is_altacct
)) {
6409 goto Pmap_enter_loop
;
6412 pmap_enter_pte(pmap
, pte_p
, pte
, v
);
6414 if (pmap
!= kernel_pmap
) {
6415 if (IS_REUSABLE_PAGE(pai
) &&
6417 assert(IS_INTERNAL_PAGE(pai
));
6418 OSAddAtomic(+1, &pmap
->stats
.reusable
);
6419 PMAP_STATS_PEAK(pmap
->stats
.reusable
);
6420 } else if (IS_INTERNAL_PAGE(pai
)) {
6421 OSAddAtomic(+1, &pmap
->stats
.internal
);
6422 PMAP_STATS_PEAK(pmap
->stats
.internal
);
6425 OSAddAtomic(+1, &pmap
->stats
.external
);
6426 PMAP_STATS_PEAK(pmap
->stats
.external
);
6432 if (pmap
!= kernel_pmap
) {
6433 pmap_ledger_credit(pmap
, task_ledgers
.phys_mem
, PAGE_SIZE
);
6437 * Make corresponding adjustments to
6438 * phys_footprint statistics.
6440 pmap_ledger_credit(pmap
, task_ledgers
.internal
, PAGE_SIZE
);
6443 * If this page is internal and
6444 * in an IOKit region, credit
6445 * the task's total count of
6446 * dirty, internal IOKit pages.
6447 * It should *not* count towards
6448 * the task's total physical
6449 * memory footprint, because
6450 * this entire region was
6451 * already billed to the task
6452 * at the time the mapping was
6455 * Put another way, this is
6457 * alternate_accounting++, so
6458 * net effect on phys_footprint
6459 * is 0. That means: don't
6460 * touch phys_footprint here.
6462 pmap_ledger_credit(pmap
, task_ledgers
.alternate_accounting
, PAGE_SIZE
);
6464 pmap_ledger_credit(pmap
, task_ledgers
.phys_footprint
, PAGE_SIZE
);
6469 OSAddAtomic(1, (SInt32
*) &pmap
->stats
.resident_count
);
6470 if (pmap
->stats
.resident_count
> pmap
->stats
.resident_max
) {
6471 pmap
->stats
.resident_max
= pmap
->stats
.resident_count
;
6474 if (prot
& VM_PROT_EXECUTE
) {
6476 goto Pmap_enter_cleanup
;
6479 wimg_bits
= pmap_cache_attributes(pn
);
6480 if ((flags
& (VM_WIMG_MASK
| VM_WIMG_USE_DEFAULT
))) {
6481 wimg_bits
= (wimg_bits
& (~VM_WIMG_MASK
)) | (flags
& (VM_WIMG_MASK
| VM_WIMG_USE_DEFAULT
));
6484 pte
|= pmap_get_pt_ops(pmap
)->wimg_to_pte(wimg_bits
);
6486 pmap_enter_pte(pmap
, pte_p
, pte
, v
);
6489 goto Pmap_enter_return
;
6493 if (refcnt
!= NULL
) {
6494 assert(refcnt_updated
);
6495 if (OSAddAtomic16(-1, (volatile int16_t*)refcnt
) <= 0) {
6496 panic("pmap_enter(): over-release of ptdp %p for pte %p\n", ptep_get_ptd(pte_p
), pte_p
);
6503 if (pgtrace_enabled
) {
6504 // Clone and invalidate original mapping if eligible
6505 for (int i
= 0; i
< PAGE_RATIO
; i
++) {
6506 pmap_pgtrace_enter_clone(pmap
, v
+ ARM_PGBYTES
* i
, 0, 0);
6511 if (pve_p
!= PV_ENTRY_NULL
) {
6515 if (wiredcnt_updated
&& (OSAddAtomic16(-1, (volatile int16_t*)wiredcnt
) <= 0)) {
6516 panic("pmap_enter(): over-unwire of ptdp %p for pte %p\n", ptep_get_ptd(pte_p
), pte_p
);
6530 vm_prot_t fault_type
,
6533 unsigned int options
,
6536 kern_return_t kr
= KERN_FAILURE
;
6538 PMAP_TRACE(2, PMAP_CODE(PMAP__ENTER
) | DBG_FUNC_START
,
6539 VM_KERNEL_ADDRHIDE(pmap
), VM_KERNEL_ADDRHIDE(v
), pn
, prot
);
6541 kr
= pmap_enter_options_internal(pmap
, v
, pn
, prot
, fault_type
, flags
, wired
, options
);
6542 pv_water_mark_check();
6544 PMAP_TRACE(2, PMAP_CODE(PMAP__ENTER
) | DBG_FUNC_END
, kr
);
6550 * Routine: pmap_change_wiring
6551 * Function: Change the wiring attribute for a map/virtual-address
6553 * In/out conditions:
6554 * The mapping must already exist in the pmap.
6556 MARK_AS_PMAP_TEXT
static void
6557 pmap_change_wiring_internal(
6565 /* Don't bother tracking wiring for kernel PTEs. We use ARM_PTE_WIRED to track
6566 * wired memory statistics for user pmaps, but kernel PTEs are assumed
6567 * to be wired in nearly all cases. For VM layer functionality, the wired
6568 * count in vm_page_t is sufficient. */
6569 if (pmap
== kernel_pmap
) {
6572 VALIDATE_USER_PMAP(pmap
);
6575 pte_p
= pmap_pte(pmap
, v
);
6576 assert(pte_p
!= PT_ENTRY_NULL
);
6577 pa
= pte_to_pa(*pte_p
);
6579 while (pa_valid(pa
)) {
6580 pmap_paddr_t new_pa
;
6582 LOCK_PVH((int)pa_index(pa
));
6583 new_pa
= pte_to_pa(*pte_p
);
6589 UNLOCK_PVH((int)pa_index(pa
));
6593 if (wired
&& !pte_is_wired(*pte_p
)) {
6594 pte_set_wired(pte_p
, wired
);
6595 OSAddAtomic(+1, (SInt32
*) &pmap
->stats
.wired_count
);
6596 pmap_ledger_credit(pmap
, task_ledgers
.wired_mem
, PAGE_SIZE
);
6597 } else if (!wired
&& pte_is_wired(*pte_p
)) {
6598 PMAP_STATS_ASSERTF(pmap
->stats
.wired_count
>= 1, pmap
, "stats.wired_count %d", pmap
->stats
.wired_count
);
6599 pte_set_wired(pte_p
, wired
);
6600 OSAddAtomic(-1, (SInt32
*) &pmap
->stats
.wired_count
);
6601 pmap_ledger_debit(pmap
, task_ledgers
.wired_mem
, PAGE_SIZE
);
6605 UNLOCK_PVH((int)pa_index(pa
));
6617 pmap_change_wiring_internal(pmap
, v
, wired
);
6620 MARK_AS_PMAP_TEXT
static ppnum_t
6621 pmap_find_phys_internal(
6627 VALIDATE_PMAP(pmap
);
6629 if (pmap
!= kernel_pmap
) {
6633 ppn
= pmap_vtophys(pmap
, va
);
6635 if (pmap
!= kernel_pmap
) {
6647 pmap_paddr_t pa
= 0;
6649 if (pmap
== kernel_pmap
) {
6651 } else if ((current_thread()->map
) && (pmap
== vm_map_pmap(current_thread()->map
))) {
6656 return (ppnum_t
)(pa
>> PAGE_SHIFT
);
6660 return pmap_find_phys_internal(pmap
, va
);
6662 return pmap_vtophys(pmap
, va
);
6676 pa
= ((pmap_paddr_t
)pmap_vtophys(kernel_pmap
, va
)) << PAGE_SHIFT
;
6678 pa
|= (va
& PAGE_MASK
);
6681 return (pmap_paddr_t
)pa
;
6689 if ((va
< pmap
->min
) || (va
>= pmap
->max
)) {
6693 #if (__ARM_VMSA__ == 7)
6694 tt_entry_t
*tte_p
, tte
;
6698 tte_p
= pmap_tte(pmap
, va
);
6699 if (tte_p
== (tt_entry_t
*) NULL
) {
6704 if ((tte
& ARM_TTE_TYPE_MASK
) == ARM_TTE_TYPE_TABLE
) {
6705 pte_p
= (pt_entry_t
*) ttetokv(tte
) + ptenum(va
);
6706 ppn
= (ppnum_t
) atop(pte_to_pa(*pte_p
) | (va
& ARM_PGMASK
));
6707 #if DEVELOPMENT || DEBUG
6709 ARM_PTE_IS_COMPRESSED(*pte_p
, pte_p
)) {
6710 panic("pmap_vtophys(%p,0x%llx): compressed pte_p=%p 0x%llx with ppn=0x%x\n",
6711 pmap
, va
, pte_p
, (uint64_t) (*pte_p
), ppn
);
6713 #endif /* DEVELOPMENT || DEBUG */
6714 } else if ((tte
& ARM_TTE_TYPE_MASK
) == ARM_TTE_TYPE_BLOCK
) {
6715 if ((tte
& ARM_TTE_BLOCK_SUPER
) == ARM_TTE_BLOCK_SUPER
) {
6716 ppn
= (ppnum_t
) atop(suptte_to_pa(tte
) | (va
& ARM_TT_L1_SUPER_OFFMASK
));
6718 ppn
= (ppnum_t
) atop(sectte_to_pa(tte
) | (va
& ARM_TT_L1_BLOCK_OFFMASK
));
6728 __unused
const pt_attr_t
* const pt_attr
= pmap_get_pt_attr(pmap
);
6730 /* Level 0 currently unused */
6732 /* Get first-level (1GB) entry */
6733 ttp
= pmap_tt1e(pmap
, va
);
6735 if ((tte
& (ARM_TTE_TYPE_MASK
| ARM_TTE_VALID
)) != (ARM_TTE_TYPE_TABLE
| ARM_TTE_VALID
)) {
6739 tte
= ((tt_entry_t
*) phystokv(tte
& ARM_TTE_TABLE_MASK
))[tt2_index(pmap
, pt_attr
, va
)];
6741 if ((tte
& ARM_TTE_VALID
) != (ARM_TTE_VALID
)) {
6745 if ((tte
& ARM_TTE_TYPE_MASK
) == ARM_TTE_TYPE_BLOCK
) {
6746 ppn
= (ppnum_t
) atop((tte
& ARM_TTE_BLOCK_L2_MASK
) | (va
& ARM_TT_L2_OFFMASK
));
6749 tte
= ((tt_entry_t
*) phystokv(tte
& ARM_TTE_TABLE_MASK
))[tt3_index(pmap
, pt_attr
, va
)];
6750 ppn
= (ppnum_t
) atop((tte
& ARM_PTE_MASK
) | (va
& ARM_TT_L3_OFFMASK
));
6756 MARK_AS_PMAP_TEXT
static vm_offset_t
6757 pmap_extract_internal(
6759 vm_map_address_t va
)
6761 pmap_paddr_t pa
= 0;
6768 VALIDATE_PMAP(pmap
);
6772 ppn
= pmap_vtophys(pmap
, va
);
6775 pa
= ptoa(ppn
) | ((va
) & PAGE_MASK
);
6784 * Routine: pmap_extract
6786 * Extract the physical page address associated
6787 * with the given map/virtual_address pair.
6793 vm_map_address_t va
)
6795 pmap_paddr_t pa
= 0;
6797 if (pmap
== kernel_pmap
) {
6799 } else if (pmap
== vm_map_pmap(current_thread()->map
)) {
6807 return pmap_extract_internal(pmap
, va
);
6811 * pmap_init_pte_page - Initialize a page table page.
6818 unsigned int ttlevel
,
6819 boolean_t alloc_ptd
,
6822 pt_desc_t
*ptdp
= NULL
;
6825 pvh
= (vm_offset_t
*)(pai_to_pvh(pa_index(ml_static_vtop((vm_offset_t
)pte_p
))));
6827 if (pvh_test_type(pvh
, PVH_TYPE_NULL
)) {
6830 * This path should only be invoked from arm_vm_init. If we are emulating 16KB pages
6831 * on 4KB hardware, we may already have allocated a page table descriptor for a
6832 * bootstrap request, so we check for an existing PTD here.
6834 ptdp
= ptd_alloc(pmap
, true);
6835 pvh_update_head_unlocked(pvh
, ptdp
, PVH_TYPE_PTDP
);
6837 panic("pmap_init_pte_page(): pte_p %p", pte_p
);
6839 } else if (pvh_test_type(pvh
, PVH_TYPE_PTDP
)) {
6840 ptdp
= (pt_desc_t
*)(pvh_list(pvh
));
6842 panic("pmap_init_pte_page(): invalid PVH type for pte_p %p", pte_p
);
6846 bzero(pte_p
, ARM_PGBYTES
);
6847 // below barrier ensures the page zeroing is visible to PTW before
6848 // it is linked to the PTE of previous level
6849 __builtin_arm_dmb(DMB_ISHST
);
6851 ptd_init(ptdp
, pmap
, va
, ttlevel
, pte_p
);
6855 * Routine: pmap_expand
6857 * Expands a pmap to be able to map the specified virtual address.
6859 * Allocates new memory for the default (COARSE) translation table
6860 * entry, initializes all the pte entries to ARM_PTE_TYPE_FAULT and
6861 * also allocates space for the corresponding pv entries.
6863 * Nothing should be locked.
6865 static kern_return_t
6869 unsigned int options
,
6872 __unused
const pt_attr_t
* const pt_attr
= pmap_get_pt_attr(pmap
);
6874 #if (__ARM_VMSA__ == 7)
6880 #if DEVELOPMENT || DEBUG
6882 * We no longer support root level expansion; panic in case something
6883 * still attempts to trigger it.
6885 i
= tte_index(pmap
, pt_attr
, v
);
6887 if (i
>= pmap
->tte_index_max
) {
6888 panic("%s: index out of range, index=%u, max=%u, "
6889 "pmap=%p, addr=%p, options=%u, level=%u",
6890 __func__
, i
, pmap
->tte_index_max
,
6891 pmap
, (void *)v
, options
, level
);
6893 #endif /* DEVELOPMENT || DEBUG */
6896 return KERN_SUCCESS
;
6900 tt_entry_t
*tte_next_p
;
6904 if (pmap_pte(pmap
, v
) != PT_ENTRY_NULL
) {
6906 return KERN_SUCCESS
;
6908 tte_p
= &pmap
->tte
[ttenum(v
& ~ARM_TT_L1_PT_OFFMASK
)];
6909 for (i
= 0, tte_next_p
= tte_p
; i
< 4; i
++) {
6910 if (tte_to_pa(*tte_next_p
)) {
6911 pa
= tte_to_pa(*tte_next_p
);
6916 pa
= pa
& ~PAGE_MASK
;
6918 tte_p
= &pmap
->tte
[ttenum(v
)];
6919 *tte_p
= pa_to_tte(pa
) | (((v
>> ARM_TT_L1_SHIFT
) & 0x3) << 10) | ARM_TTE_TYPE_TABLE
;
6921 PMAP_TRACE(3, PMAP_CODE(PMAP__TTE
), VM_KERNEL_ADDRHIDE(pmap
), VM_KERNEL_ADDRHIDE(v
& ~ARM_TT_L1_OFFMASK
),
6922 VM_KERNEL_ADDRHIDE((v
& ~ARM_TT_L1_OFFMASK
) + ARM_TT_L1_SIZE
), *tte_p
);
6924 return KERN_SUCCESS
;
6928 v
= v
& ~ARM_TT_L1_PT_OFFMASK
;
6931 while (pmap_pte(pmap
, v
) == PT_ENTRY_NULL
) {
6933 * Allocate a VM page for the level 2 page table entries.
6935 while (pmap_tt_allocate(pmap
, &tt_p
, PMAP_TT_L2_LEVEL
, ((options
& PMAP_TT_ALLOCATE_NOWAIT
)? PMAP_PAGES_ALLOCATE_NOWAIT
: 0)) != KERN_SUCCESS
) {
6936 if (options
& PMAP_OPTIONS_NOWAIT
) {
6937 return KERN_RESOURCE_SHORTAGE
;
6944 * See if someone else expanded us first
6946 if (pmap_pte(pmap
, v
) == PT_ENTRY_NULL
) {
6947 tt_entry_t
*tte_next_p
;
6949 pmap_init_pte_page(pmap
, (pt_entry_t
*) tt_p
, v
, PMAP_TT_L2_LEVEL
, FALSE
, TRUE
);
6950 pa
= kvtophys((vm_offset_t
)tt_p
);
6951 tte_p
= &pmap
->tte
[ttenum(v
)];
6952 for (i
= 0, tte_next_p
= tte_p
; i
< 4; i
++) {
6953 *tte_next_p
= pa_to_tte(pa
) | ARM_TTE_TYPE_TABLE
;
6954 PMAP_TRACE(3, PMAP_CODE(PMAP__TTE
), VM_KERNEL_ADDRHIDE(pmap
), VM_KERNEL_ADDRHIDE((v
& ~ARM_TT_L1_PT_OFFMASK
) + (i
* ARM_TT_L1_SIZE
)),
6955 VM_KERNEL_ADDRHIDE((v
& ~ARM_TT_L1_PT_OFFMASK
) + ((i
+ 1) * ARM_TT_L1_SIZE
)), *tte_p
);
6959 FLUSH_PTE_RANGE(tte_p
, tte_p
+ 4);
6962 tt_p
= (tt_entry_t
*)NULL
;
6965 if (tt_p
!= (tt_entry_t
*)NULL
) {
6966 pmap_tt_deallocate(pmap
, tt_p
, PMAP_TT_L2_LEVEL
);
6967 tt_p
= (tt_entry_t
*)NULL
;
6970 return KERN_SUCCESS
;
6973 unsigned int ttlevel
= pt_attr_root_level(pt_attr
);
6978 tt_p
= (tt_entry_t
*)NULL
;
6980 for (; ttlevel
< level
; ttlevel
++) {
6983 if (pmap_ttne(pmap
, ttlevel
+ 1, v
) == PT_ENTRY_NULL
) {
6985 while (pmap_tt_allocate(pmap
, &tt_p
, ttlevel
+ 1, ((options
& PMAP_TT_ALLOCATE_NOWAIT
)? PMAP_PAGES_ALLOCATE_NOWAIT
: 0)) != KERN_SUCCESS
) {
6986 if (options
& PMAP_OPTIONS_NOWAIT
) {
6987 return KERN_RESOURCE_SHORTAGE
;
6992 if ((pmap_ttne(pmap
, ttlevel
+ 1, v
) == PT_ENTRY_NULL
)) {
6993 pmap_init_pte_page(pmap
, (pt_entry_t
*) tt_p
, v
, ttlevel
+ 1, FALSE
, TRUE
);
6994 pa
= kvtophys((vm_offset_t
)tt_p
);
6995 tte_p
= pmap_ttne(pmap
, ttlevel
, v
);
6996 *tte_p
= (pa
& ARM_TTE_TABLE_MASK
) | ARM_TTE_TYPE_TABLE
| ARM_TTE_VALID
;
6997 PMAP_TRACE(ttlevel
+ 1, PMAP_CODE(PMAP__TTE
), VM_KERNEL_ADDRHIDE(pmap
), VM_KERNEL_ADDRHIDE(v
& ~pt_attr_ln_offmask(pt_attr
, ttlevel
)),
6998 VM_KERNEL_ADDRHIDE((v
& ~pt_attr_ln_offmask(pt_attr
, ttlevel
)) + pt_attr_ln_size(pt_attr
, ttlevel
)), *tte_p
);
7000 tt_p
= (tt_entry_t
*)NULL
;
7006 if (tt_p
!= (tt_entry_t
*)NULL
) {
7007 pmap_tt_deallocate(pmap
, tt_p
, ttlevel
+ 1);
7008 tt_p
= (tt_entry_t
*)NULL
;
7012 return KERN_SUCCESS
;
7017 * Routine: pmap_collect
7019 * Garbage collects the physical map system for
7020 * pages which are no longer used.
7021 * Success need not be guaranteed -- that is, there
7022 * may well be pages which are not referenced, but
7023 * others may be collected.
7026 pmap_collect(pmap_t pmap
)
7028 if (pmap
== PMAP_NULL
) {
7034 if ((pmap
->nested
== FALSE
) && (pmap
!= kernel_pmap
)) {
7035 /* TODO: Scan for vm page assigned to top level page tables with no reference */
7046 * Pmap garbage collection
7047 * Called by the pageout daemon when pages are scarce.
7054 pmap_t pmap
, pmap_next
;
7057 if (pmap_gc_allowed
&&
7058 (pmap_gc_allowed_by_time_throttle
||
7060 pmap_gc_forced
= FALSE
;
7061 pmap_gc_allowed_by_time_throttle
= FALSE
;
7062 pmap_simple_lock(&pmaps_lock
);
7063 pmap
= CAST_DOWN_EXPLICIT(pmap_t
, queue_first(&map_pmap_list
));
7064 while (!queue_end(&map_pmap_list
, (queue_entry_t
)pmap
)) {
7065 if (!(pmap
->gc_status
& PMAP_GC_INFLIGHT
)) {
7066 pmap
->gc_status
|= PMAP_GC_INFLIGHT
;
7068 pmap_simple_unlock(&pmaps_lock
);
7072 pmap_simple_lock(&pmaps_lock
);
7073 gc_wait
= (pmap
->gc_status
& PMAP_GC_WAIT
);
7074 pmap
->gc_status
&= ~(PMAP_GC_INFLIGHT
| PMAP_GC_WAIT
);
7075 pmap_next
= CAST_DOWN_EXPLICIT(pmap_t
, queue_next(&pmap
->pmaps
));
7077 if (!queue_end(&map_pmap_list
, (queue_entry_t
)pmap_next
)) {
7078 pmap_next
->gc_status
|= PMAP_GC_INFLIGHT
;
7080 pmap_simple_unlock(&pmaps_lock
);
7081 thread_wakeup((event_t
) &pmap
->gc_status
);
7082 pmap_simple_lock(&pmaps_lock
);
7086 pmap_simple_unlock(&pmaps_lock
);
7091 * Called by the VM to reclaim pages that we can reclaim quickly and cheaply.
7094 pmap_release_pages_fast(void)
7100 * By default, don't attempt pmap GC more frequently
7101 * than once / 1 minutes.
7105 compute_pmap_gc_throttle(
7108 pmap_gc_allowed_by_time_throttle
= TRUE
;
7112 * pmap_attribute_cache_sync(vm_offset_t pa)
7114 * Invalidates all of the instruction cache on a physical page and
7115 * pushes any dirty data from the data cache for the same physical page
7119 pmap_attribute_cache_sync(
7122 __unused vm_machine_attribute_t attribute
,
7123 __unused vm_machine_attribute_val_t
* value
)
7125 if (size
> PAGE_SIZE
) {
7126 panic("pmap_attribute_cache_sync size: 0x%llx\n", (uint64_t)size
);
7128 cache_sync_page(pp
);
7131 return KERN_SUCCESS
;
7135 * pmap_sync_page_data_phys(ppnum_t pp)
7137 * Invalidates all of the instruction cache on a physical page and
7138 * pushes any dirty data from the data cache for the same physical page
7141 pmap_sync_page_data_phys(
7144 cache_sync_page(pp
);
7148 * pmap_sync_page_attributes_phys(ppnum_t pp)
7150 * Write back and invalidate all cachelines on a physical page.
7153 pmap_sync_page_attributes_phys(
7156 flush_dcache((vm_offset_t
) (pp
<< PAGE_SHIFT
), PAGE_SIZE
, TRUE
);
7160 /* temporary workaround */
7169 pte_p
= pmap_pte(map
->pmap
, va
);
7174 return (spte
& ARM_PTE_ATTRINDXMASK
) == ARM_PTE_ATTRINDX(CACHE_ATTRINDX_DEFAULT
);
7186 addr
= (unsigned int *) phystokv(ptoa(pn
));
7187 count
= PAGE_SIZE
/ sizeof(unsigned int);
7193 extern void mapping_set_mod(ppnum_t pn
);
7199 pmap_set_modify(pn
);
7202 extern void mapping_set_ref(ppnum_t pn
);
7208 pmap_set_reference(pn
);
7212 * Clear specified attribute bits.
7214 * Try to force an arm_fast_fault() for all mappings of
7215 * the page - to force attributes to be set again at fault time.
7216 * If the forcing succeeds, clear the cached bits at the head.
7217 * Otherwise, something must have been wired, so leave the cached
7220 MARK_AS_PMAP_TEXT
static void
7221 phys_attribute_clear_internal(
7227 pmap_paddr_t pa
= ptoa(pn
);
7228 vm_prot_t allow_mode
= VM_PROT_ALL
;
7231 if ((bits
& PP_ATTR_MODIFIED
) &&
7232 (options
& PMAP_OPTIONS_NOFLUSH
) &&
7234 panic("phys_attribute_clear(0x%x,0x%x,0x%x,%p): "
7235 "should not clear 'modified' without flushing TLBs\n",
7236 pn
, bits
, options
, arg
);
7239 assert(pn
!= vm_page_fictitious_addr
);
7241 if (options
& PMAP_OPTIONS_CLEAR_WRITE
) {
7242 assert(bits
== PP_ATTR_MODIFIED
);
7244 pmap_page_protect_options_internal(pn
, (VM_PROT_ALL
& ~VM_PROT_WRITE
), 0);
7246 * We short circuit this case; it should not need to
7247 * invoke arm_force_fast_fault, so just clear the modified bit.
7248 * pmap_page_protect has taken care of resetting
7249 * the state so that we'll see the next write as a fault to
7250 * the VM (i.e. we don't want a fast fault).
7252 pa_clear_bits(pa
, bits
);
7255 if (bits
& PP_ATTR_REFERENCED
) {
7256 allow_mode
&= ~(VM_PROT_READ
| VM_PROT_EXECUTE
);
7258 if (bits
& PP_ATTR_MODIFIED
) {
7259 allow_mode
&= ~VM_PROT_WRITE
;
7262 if (bits
== PP_ATTR_NOENCRYPT
) {
7264 * We short circuit this case; it should not need to
7265 * invoke arm_force_fast_fault, so just clear and
7266 * return. On ARM, this bit is just a debugging aid.
7268 pa_clear_bits(pa
, bits
);
7272 if (arm_force_fast_fault_internal(pn
, allow_mode
, options
)) {
7273 pa_clear_bits(pa
, bits
);
7279 phys_attribute_clear(
7286 * Do we really want this tracepoint? It will be extremely chatty.
7287 * Also, should we have a corresponding trace point for the set path?
7289 PMAP_TRACE(3, PMAP_CODE(PMAP__ATTRIBUTE_CLEAR
) | DBG_FUNC_START
, pn
, bits
);
7291 phys_attribute_clear_internal(pn
, bits
, options
, arg
);
7293 PMAP_TRACE(3, PMAP_CODE(PMAP__ATTRIBUTE_CLEAR
) | DBG_FUNC_END
);
7297 * Set specified attribute bits.
7299 * Set cached value in the pv head because we have
7300 * no per-mapping hardware support for referenced and
7303 MARK_AS_PMAP_TEXT
static void
7304 phys_attribute_set_internal(
7308 pmap_paddr_t pa
= ptoa(pn
);
7309 assert(pn
!= vm_page_fictitious_addr
);
7312 pa_set_bits(pa
, bits
);
7322 phys_attribute_set_internal(pn
, bits
);
7327 * Check specified attribute bits.
7329 * use the software cached bits (since no hw support).
7332 phys_attribute_test(
7336 pmap_paddr_t pa
= ptoa(pn
);
7337 assert(pn
!= vm_page_fictitious_addr
);
7338 return pa_test_bits(pa
, bits
);
7343 * Set the modify/reference bits on the specified physical page.
7346 pmap_set_modify(ppnum_t pn
)
7348 phys_attribute_set(pn
, PP_ATTR_MODIFIED
);
7353 * Clear the modify bits on the specified physical page.
7359 phys_attribute_clear(pn
, PP_ATTR_MODIFIED
, 0, NULL
);
7366 * Return whether or not the specified physical page is modified
7367 * by any physical maps.
7373 return phys_attribute_test(pn
, PP_ATTR_MODIFIED
);
7378 * Set the reference bit on the specified physical page.
7384 phys_attribute_set(pn
, PP_ATTR_REFERENCED
);
7388 * Clear the reference bits on the specified physical page.
7391 pmap_clear_reference(
7394 phys_attribute_clear(pn
, PP_ATTR_REFERENCED
, 0, NULL
);
7399 * pmap_is_referenced:
7401 * Return whether or not the specified physical page is referenced
7402 * by any physical maps.
7408 return phys_attribute_test(pn
, PP_ATTR_REFERENCED
);
7412 * pmap_get_refmod(phys)
7413 * returns the referenced and modified bits of the specified
7420 return ((phys_attribute_test(pn
, PP_ATTR_MODIFIED
)) ? VM_MEM_MODIFIED
: 0)
7421 | ((phys_attribute_test(pn
, PP_ATTR_REFERENCED
)) ? VM_MEM_REFERENCED
: 0);
7425 * pmap_clear_refmod(phys, mask)
7426 * clears the referenced and modified bits as specified by the mask
7427 * of the specified physical page.
7430 pmap_clear_refmod_options(
7433 unsigned int options
,
7438 bits
= ((mask
& VM_MEM_MODIFIED
) ? PP_ATTR_MODIFIED
: 0) |
7439 ((mask
& VM_MEM_REFERENCED
) ? PP_ATTR_REFERENCED
: 0);
7440 phys_attribute_clear(pn
, bits
, options
, arg
);
7448 pmap_clear_refmod_options(pn
, mask
, 0, NULL
);
7452 pmap_disconnect_options(
7454 unsigned int options
,
7457 if ((options
& PMAP_OPTIONS_COMPRESSOR_IFF_MODIFIED
)) {
7459 * On ARM, the "modified" bit is managed by software, so
7460 * we know up-front if the physical page is "modified",
7461 * without having to scan all the PTEs pointing to it.
7462 * The caller should have made the VM page "busy" so noone
7463 * should be able to establish any new mapping and "modify"
7464 * the page behind us.
7466 if (pmap_is_modified(pn
)) {
7468 * The page has been modified and will be sent to
7469 * the VM compressor.
7471 options
|= PMAP_OPTIONS_COMPRESSOR
;
7474 * The page hasn't been modified and will be freed
7475 * instead of compressed.
7480 /* disconnect the page */
7481 pmap_page_protect_options(pn
, 0, options
, arg
);
7483 /* return ref/chg status */
7484 return pmap_get_refmod(pn
);
7492 * Disconnect all mappings for this page and return reference and change status
7493 * in generic format.
7500 pmap_page_protect(pn
, 0); /* disconnect the page */
7501 return pmap_get_refmod(pn
); /* return ref/chg status */
7505 pmap_has_managed_page(ppnum_t first
, ppnum_t last
)
7507 if (ptoa(first
) >= vm_last_phys
) {
7510 if (ptoa(last
) < vm_first_phys
) {
7518 * The state maintained by the noencrypt functions is used as a
7519 * debugging aid on ARM. This incurs some overhead on the part
7520 * of the caller. A special case check in phys_attribute_clear
7521 * (the most expensive path) currently minimizes this overhead,
7522 * but stubbing these functions out on RELEASE kernels yields
7529 #if DEVELOPMENT || DEBUG
7530 boolean_t result
= FALSE
;
7532 if (!pa_valid(ptoa(pn
))) {
7536 result
= (phys_attribute_test(pn
, PP_ATTR_NOENCRYPT
));
7549 #if DEVELOPMENT || DEBUG
7550 if (!pa_valid(ptoa(pn
))) {
7554 phys_attribute_set(pn
, PP_ATTR_NOENCRYPT
);
7561 pmap_clear_noencrypt(
7564 #if DEVELOPMENT || DEBUG
7565 if (!pa_valid(ptoa(pn
))) {
7569 phys_attribute_clear(pn
, PP_ATTR_NOENCRYPT
, 0, NULL
);
7577 pmap_lock_phys_page(ppnum_t pn
)
7580 pmap_paddr_t phys
= ptoa(pn
);
7582 if (pa_valid(phys
)) {
7583 pai
= (int)pa_index(phys
);
7586 { simple_lock(&phys_backup_lock
, LCK_GRP_NULL
);}
7591 pmap_unlock_phys_page(ppnum_t pn
)
7594 pmap_paddr_t phys
= ptoa(pn
);
7596 if (pa_valid(phys
)) {
7597 pai
= (int)pa_index(phys
);
7600 { simple_unlock(&phys_backup_lock
);}
7603 MARK_AS_PMAP_TEXT
static void
7604 pmap_switch_user_ttb_internal(
7607 VALIDATE_PMAP(pmap
);
7608 pmap_cpu_data_t
*cpu_data_ptr
;
7609 cpu_data_ptr
= pmap_get_cpu_data();
7611 #if (__ARM_VMSA__ == 7)
7612 cpu_data_ptr
->cpu_user_pmap
= pmap
;
7613 cpu_data_ptr
->cpu_user_pmap_stamp
= pmap
->stamp
;
7615 #if MACH_ASSERT && __ARM_USER_PROTECT__
7617 unsigned int ttbr0_val
, ttbr1_val
;
7618 __asm__
volatile ("mrc p15,0,%0,c2,c0,0\n" : "=r"(ttbr0_val
));
7619 __asm__
volatile ("mrc p15,0,%0,c2,c0,1\n" : "=r"(ttbr1_val
));
7620 if (ttbr0_val
!= ttbr1_val
) {
7621 panic("Misaligned ttbr0 %08X\n", ttbr0_val
);
7625 if (pmap
->tte_index_max
== NTTES
) {
7626 /* Setting TTBCR.N for TTBR0 TTBR1 boundary at 0x40000000 */
7627 __asm__
volatile ("mcr p15,0,%0,c2,c0,2" : : "r"(2));
7628 __builtin_arm_isb(ISB_SY
);
7629 #if !__ARM_USER_PROTECT__
7630 set_mmu_ttb(pmap
->ttep
);
7633 #if !__ARM_USER_PROTECT__
7634 set_mmu_ttb(pmap
->ttep
);
7636 /* Setting TTBCR.N for TTBR0 TTBR1 boundary at 0x80000000 */
7637 __asm__
volatile ("mcr p15,0,%0,c2,c0,2" : : "r"(1));
7638 __builtin_arm_isb(ISB_SY
);
7639 #if MACH_ASSERT && __ARM_USER_PROTECT__
7640 if (pmap
->ttep
& 0x1000) {
7641 panic("Misaligned ttbr0 %08X\n", pmap
->ttep
);
7646 #if !__ARM_USER_PROTECT__
7647 set_context_id(pmap
->hw_asid
);
7650 #else /* (__ARM_VMSA__ == 7) */
7652 if (pmap
!= kernel_pmap
) {
7653 cpu_data_ptr
->cpu_nested_pmap
= pmap
->nested_pmap
;
7656 if (pmap
== kernel_pmap
) {
7657 pmap_clear_user_ttb_internal();
7659 set_mmu_ttb((pmap
->ttep
& TTBR_BADDR_MASK
) | (((uint64_t)pmap
->hw_asid
) << TTBR_ASID_SHIFT
));
7662 #if defined(HAS_APPLE_PAC) && (__APCFG_SUPPORTED__ || __APSTS_SUPPORTED__)
7663 if (!(BootArgs
->bootFlags
& kBootFlagsDisableJOP
) && !(BootArgs
->bootFlags
& kBootFlagsDisableUserJOP
)) {
7664 uint64_t sctlr
= __builtin_arm_rsr64("SCTLR_EL1");
7665 bool jop_enabled
= sctlr
& SCTLR_JOP_KEYS_ENABLED
;
7666 if (!jop_enabled
&& !pmap
->disable_jop
) {
7668 sctlr
|= SCTLR_JOP_KEYS_ENABLED
;
7669 __builtin_arm_wsr64("SCTLR_EL1", sctlr
);
7670 // no ISB necessary because this won't take effect until eret returns to EL0
7671 } else if (jop_enabled
&& pmap
->disable_jop
) {
7673 sctlr
&= ~SCTLR_JOP_KEYS_ENABLED
;
7674 __builtin_arm_wsr64("SCTLR_EL1", sctlr
);
7677 #endif /* HAS_APPLE_PAC && (__APCFG_SUPPORTED__ || __APSTS_SUPPORTED__) */
7678 #endif /* (__ARM_VMSA__ == 7) */
7682 pmap_switch_user_ttb(
7685 PMAP_TRACE(1, PMAP_CODE(PMAP__SWITCH_USER_TTB
) | DBG_FUNC_START
, VM_KERNEL_ADDRHIDE(pmap
), PMAP_VASID(pmap
), pmap
->hw_asid
);
7686 pmap_switch_user_ttb_internal(pmap
);
7687 PMAP_TRACE(1, PMAP_CODE(PMAP__SWITCH_USER_TTB
) | DBG_FUNC_END
);
7690 MARK_AS_PMAP_TEXT
static void
7691 pmap_clear_user_ttb_internal(void)
7693 #if (__ARM_VMSA__ > 7)
7694 set_mmu_ttb(invalid_ttep
& TTBR_BADDR_MASK
);
7696 set_mmu_ttb(kernel_pmap
->ttep
);
7701 pmap_clear_user_ttb(void)
7703 pmap_clear_user_ttb_internal();
7707 * Routine: arm_force_fast_fault
7710 * Force all mappings for this page to fault according
7711 * to the access modes allowed, so we can gather ref/modify
7714 MARK_AS_PMAP_TEXT
static boolean_t
7715 arm_force_fast_fault_internal(
7717 vm_prot_t allow_mode
,
7720 pmap_paddr_t phys
= ptoa(ppnum
);
7726 boolean_t is_reusable
, is_internal
;
7727 boolean_t tlb_flush_needed
= FALSE
;
7728 boolean_t ref_fault
;
7729 boolean_t mod_fault
;
7731 assert(ppnum
!= vm_page_fictitious_addr
);
7733 if (!pa_valid(phys
)) {
7734 return FALSE
; /* Not a managed page. */
7740 pai
= (int)pa_index(phys
);
7742 pv_h
= pai_to_pvh(pai
);
7744 pte_p
= PT_ENTRY_NULL
;
7745 pve_p
= PV_ENTRY_NULL
;
7746 if (pvh_test_type(pv_h
, PVH_TYPE_PTEP
)) {
7747 pte_p
= pvh_ptep(pv_h
);
7748 } else if (pvh_test_type(pv_h
, PVH_TYPE_PVEP
)) {
7749 pve_p
= pvh_list(pv_h
);
7752 is_reusable
= IS_REUSABLE_PAGE(pai
);
7753 is_internal
= IS_INTERNAL_PAGE(pai
);
7755 while ((pve_p
!= PV_ENTRY_NULL
) || (pte_p
!= PT_ENTRY_NULL
)) {
7756 vm_map_address_t va
;
7760 boolean_t update_pte
;
7762 if (pve_p
!= PV_ENTRY_NULL
) {
7763 pte_p
= pve_get_ptep(pve_p
);
7766 if (pte_p
== PT_ENTRY_NULL
) {
7767 panic("pte_p is NULL: pve_p=%p ppnum=0x%x\n", pve_p
, ppnum
);
7769 #ifdef PVH_FLAG_IOMMU
7770 if ((vm_offset_t
)pte_p
& PVH_FLAG_IOMMU
) {
7774 if (*pte_p
== ARM_PTE_EMPTY
) {
7775 panic("pte is NULL: pte_p=%p ppnum=0x%x\n", pte_p
, ppnum
);
7777 if (ARM_PTE_IS_COMPRESSED(*pte_p
, pte_p
)) {
7778 panic("pte is COMPRESSED: pte_p=%p ppnum=0x%x\n", pte_p
, ppnum
);
7781 pmap
= ptep_get_pmap(pte_p
);
7782 va
= ptep_get_va(pte_p
);
7784 assert(va
>= pmap
->min
&& va
< pmap
->max
);
7786 if (pte_is_wired(*pte_p
) || pmap
== kernel_pmap
) {
7795 if ((allow_mode
& VM_PROT_READ
) != VM_PROT_READ
) {
7796 /* read protection sets the pte to fault */
7797 tmplate
= tmplate
& ~ARM_PTE_AF
;
7801 if ((allow_mode
& VM_PROT_WRITE
) != VM_PROT_WRITE
) {
7802 /* take away write permission if set */
7803 if (pmap
== kernel_pmap
) {
7804 if ((tmplate
& ARM_PTE_APMASK
) == ARM_PTE_AP(AP_RWNA
)) {
7805 tmplate
= ((tmplate
& ~ARM_PTE_APMASK
) | ARM_PTE_AP(AP_RONA
));
7806 pte_set_was_writeable(tmplate
, true);
7811 if ((tmplate
& ARM_PTE_APMASK
) == ARM_PTE_AP(AP_RWRW
)) {
7812 tmplate
= ((tmplate
& ~ARM_PTE_APMASK
) | pt_attr_leaf_ro(pmap_get_pt_attr(pmap
)));
7813 pte_set_was_writeable(tmplate
, true);
7822 if (*pte_p
!= ARM_PTE_TYPE_FAULT
&&
7823 !ARM_PTE_IS_COMPRESSED(*pte_p
, pte_p
)) {
7824 WRITE_PTE_STRONG(pte_p
, tmplate
);
7825 pmap_get_pt_ops(pmap
)->flush_tlb_region_async(va
, PAGE_SIZE
, pmap
);
7826 tlb_flush_needed
= TRUE
;
7828 WRITE_PTE(pte_p
, tmplate
);
7829 __builtin_arm_isb(ISB_SY
);
7833 /* update pmap stats and ledgers */
7834 if (IS_ALTACCT_PAGE(pai
, pve_p
)) {
7836 * We do not track "reusable" status for
7837 * "alternate accounting" mappings.
7839 } else if ((options
& PMAP_OPTIONS_CLEAR_REUSABLE
) &&
7842 pmap
!= kernel_pmap
) {
7843 /* one less "reusable" */
7844 PMAP_STATS_ASSERTF(pmap
->stats
.reusable
> 0, pmap
, "stats.reusable %d", pmap
->stats
.reusable
);
7845 OSAddAtomic(-1, &pmap
->stats
.reusable
);
7846 /* one more "internal" */
7847 OSAddAtomic(+1, &pmap
->stats
.internal
);
7848 PMAP_STATS_PEAK(pmap
->stats
.internal
);
7849 PMAP_STATS_ASSERTF(pmap
->stats
.internal
> 0, pmap
, "stats.internal %d", pmap
->stats
.internal
);
7850 pmap_ledger_credit(pmap
, task_ledgers
.internal
, machine_ptob(1));
7851 assert(!IS_ALTACCT_PAGE(pai
, pve_p
));
7852 assert(IS_INTERNAL_PAGE(pai
));
7853 pmap_ledger_credit(pmap
, task_ledgers
.phys_footprint
, machine_ptob(1));
7856 * Avoid the cost of another trap to handle the fast
7857 * fault when we next write to this page: let's just
7858 * handle that now since we already have all the
7859 * necessary information.
7862 arm_clear_fast_fault(ppnum
, VM_PROT_WRITE
);
7864 } else if ((options
& PMAP_OPTIONS_SET_REUSABLE
) &&
7867 pmap
!= kernel_pmap
) {
7868 /* one more "reusable" */
7869 OSAddAtomic(+1, &pmap
->stats
.reusable
);
7870 PMAP_STATS_PEAK(pmap
->stats
.reusable
);
7871 PMAP_STATS_ASSERTF(pmap
->stats
.reusable
> 0, pmap
, "stats.reusable %d", pmap
->stats
.reusable
);
7872 /* one less "internal" */
7873 PMAP_STATS_ASSERTF(pmap
->stats
.internal
> 0, pmap
, "stats.internal %d", pmap
->stats
.internal
);
7874 OSAddAtomic(-1, &pmap
->stats
.internal
);
7875 pmap_ledger_debit(pmap
, task_ledgers
.internal
, machine_ptob(1));
7876 assert(!IS_ALTACCT_PAGE(pai
, pve_p
));
7877 assert(IS_INTERNAL_PAGE(pai
));
7878 pmap_ledger_debit(pmap
, task_ledgers
.phys_footprint
, machine_ptob(1));
7881 #ifdef PVH_FLAG_IOMMU
7884 pte_p
= PT_ENTRY_NULL
;
7885 if (pve_p
!= PV_ENTRY_NULL
) {
7886 pve_p
= PVE_NEXT_PTR(pve_next(pve_p
));
7890 if (tlb_flush_needed
) {
7894 /* update global "reusable" status for this page */
7896 if ((options
& PMAP_OPTIONS_CLEAR_REUSABLE
) &&
7898 CLR_REUSABLE_PAGE(pai
);
7899 } else if ((options
& PMAP_OPTIONS_SET_REUSABLE
) &&
7901 SET_REUSABLE_PAGE(pai
);
7906 SET_MODFAULT_PAGE(pai
);
7909 SET_REFFAULT_PAGE(pai
);
7917 arm_force_fast_fault(
7919 vm_prot_t allow_mode
,
7923 pmap_paddr_t phys
= ptoa(ppnum
);
7925 assert(ppnum
!= vm_page_fictitious_addr
);
7927 if (!pa_valid(phys
)) {
7928 return FALSE
; /* Not a managed page. */
7931 return arm_force_fast_fault_internal(ppnum
, allow_mode
, options
);
7935 * Routine: arm_clear_fast_fault
7938 * Clear pending force fault for all mappings for this page based on
7939 * the observed fault type, update ref/modify bits.
7942 arm_clear_fast_fault(
7944 vm_prot_t fault_type
)
7946 pmap_paddr_t pa
= ptoa(ppnum
);
7951 boolean_t tlb_flush_needed
= FALSE
;
7954 assert(ppnum
!= vm_page_fictitious_addr
);
7956 if (!pa_valid(pa
)) {
7957 return FALSE
; /* Not a managed page. */
7961 pai
= (int)pa_index(pa
);
7962 ASSERT_PVH_LOCKED(pai
);
7963 pv_h
= pai_to_pvh(pai
);
7965 pte_p
= PT_ENTRY_NULL
;
7966 pve_p
= PV_ENTRY_NULL
;
7967 if (pvh_test_type(pv_h
, PVH_TYPE_PTEP
)) {
7968 pte_p
= pvh_ptep(pv_h
);
7969 } else if (pvh_test_type(pv_h
, PVH_TYPE_PVEP
)) {
7970 pve_p
= pvh_list(pv_h
);
7973 while ((pve_p
!= PV_ENTRY_NULL
) || (pte_p
!= PT_ENTRY_NULL
)) {
7974 vm_map_address_t va
;
7979 if (pve_p
!= PV_ENTRY_NULL
) {
7980 pte_p
= pve_get_ptep(pve_p
);
7983 if (pte_p
== PT_ENTRY_NULL
) {
7984 panic("pte_p is NULL: pve_p=%p ppnum=0x%x\n", pve_p
, ppnum
);
7986 #ifdef PVH_FLAG_IOMMU
7987 if ((vm_offset_t
)pte_p
& PVH_FLAG_IOMMU
) {
7991 if (*pte_p
== ARM_PTE_EMPTY
) {
7992 panic("pte is NULL: pte_p=%p ppnum=0x%x\n", pte_p
, ppnum
);
7995 pmap
= ptep_get_pmap(pte_p
);
7996 va
= ptep_get_va(pte_p
);
7998 assert(va
>= pmap
->min
&& va
< pmap
->max
);
8003 if ((fault_type
& VM_PROT_WRITE
) && (pte_was_writeable(spte
))) {
8005 if (pmap
== kernel_pmap
) {
8006 tmplate
= ((spte
& ~ARM_PTE_APMASK
) | ARM_PTE_AP(AP_RWNA
));
8008 tmplate
= ((spte
& ~ARM_PTE_APMASK
) | pt_attr_leaf_rw(pmap_get_pt_attr(pmap
)));
8012 tmplate
|= ARM_PTE_AF
;
8014 pte_set_was_writeable(tmplate
, false);
8015 pa_set_bits(pa
, PP_ATTR_REFERENCED
| PP_ATTR_MODIFIED
);
8016 } else if ((fault_type
& VM_PROT_READ
) && ((spte
& ARM_PTE_AF
) != ARM_PTE_AF
)) {
8017 tmplate
= spte
| ARM_PTE_AF
;
8020 pa_set_bits(pa
, PP_ATTR_REFERENCED
);
8025 if (spte
!= tmplate
) {
8026 if (spte
!= ARM_PTE_TYPE_FAULT
) {
8027 WRITE_PTE_STRONG(pte_p
, tmplate
);
8028 pmap_get_pt_ops(pmap
)->flush_tlb_region_async(va
, PAGE_SIZE
, pmap
);
8029 tlb_flush_needed
= TRUE
;
8031 WRITE_PTE(pte_p
, tmplate
);
8032 __builtin_arm_isb(ISB_SY
);
8037 #ifdef PVH_FLAG_IOMMU
8040 pte_p
= PT_ENTRY_NULL
;
8041 if (pve_p
!= PV_ENTRY_NULL
) {
8042 pve_p
= PVE_NEXT_PTR(pve_next(pve_p
));
8045 if (tlb_flush_needed
) {
8052 * Determine if the fault was induced by software tracking of
8053 * modify/reference bits. If so, re-enable the mapping (and set
8054 * the appropriate bits).
8056 * Returns KERN_SUCCESS if the fault was induced and was
8057 * successfully handled.
8059 * Returns KERN_FAILURE if the fault was not induced and
8060 * the function was unable to deal with it.
8062 * Returns KERN_PROTECTION_FAILURE if the pmap layer explictly
8063 * disallows this type of access.
8065 MARK_AS_PMAP_TEXT
static kern_return_t
8066 arm_fast_fault_internal(
8068 vm_map_address_t va
,
8069 vm_prot_t fault_type
,
8070 __unused
bool was_af_fault
,
8071 __unused
bool from_user
)
8073 kern_return_t result
= KERN_FAILURE
;
8075 pt_entry_t spte
= ARM_PTE_TYPE_FAULT
;
8078 VALIDATE_PMAP(pmap
);
8083 * If the entry doesn't exist, is completely invalid, or is already
8084 * valid, we can't fix it here.
8087 ptep
= pmap_pte(pmap
, va
);
8088 if (ptep
!= PT_ENTRY_NULL
) {
8092 pa
= pte_to_pa(spte
);
8094 if ((spte
== ARM_PTE_TYPE_FAULT
) ||
8095 ARM_PTE_IS_COMPRESSED(spte
, ptep
)) {
8100 if (!pa_valid(pa
)) {
8104 pai
= (int)pa_index(pa
);
8114 if ((IS_REFFAULT_PAGE(pai
)) ||
8115 ((fault_type
& VM_PROT_WRITE
) && IS_MODFAULT_PAGE(pai
))) {
8117 * An attempted access will always clear ref/mod fault state, as
8118 * appropriate for the fault type. arm_clear_fast_fault will
8119 * update the associated PTEs for the page as appropriate; if
8120 * any PTEs are updated, we redrive the access. If the mapping
8121 * does not actually allow for the attempted access, the
8122 * following fault will (hopefully) fail to update any PTEs, and
8123 * thus cause arm_fast_fault to decide that it failed to handle
8126 if (IS_REFFAULT_PAGE(pai
)) {
8127 CLR_REFFAULT_PAGE(pai
);
8129 if ((fault_type
& VM_PROT_WRITE
) && IS_MODFAULT_PAGE(pai
)) {
8130 CLR_MODFAULT_PAGE(pai
);
8133 if (arm_clear_fast_fault((ppnum_t
)atop(pa
), fault_type
)) {
8135 * Should this preserve KERN_PROTECTION_FAILURE? The
8136 * cost of not doing so is a another fault in a case
8137 * that should already result in an exception.
8139 result
= KERN_SUCCESS
;
8151 vm_map_address_t va
,
8152 vm_prot_t fault_type
,
8154 __unused
bool from_user
)
8156 kern_return_t result
= KERN_FAILURE
;
8158 if (va
< pmap
->min
|| va
>= pmap
->max
) {
8162 PMAP_TRACE(3, PMAP_CODE(PMAP__FAST_FAULT
) | DBG_FUNC_START
,
8163 VM_KERNEL_ADDRHIDE(pmap
), VM_KERNEL_ADDRHIDE(va
), fault_type
,
8166 #if (__ARM_VMSA__ == 7)
8167 if (pmap
!= kernel_pmap
) {
8168 pmap_cpu_data_t
*cpu_data_ptr
= pmap_get_cpu_data();
8170 pmap_t cur_user_pmap
;
8172 cur_pmap
= current_pmap();
8173 cur_user_pmap
= cpu_data_ptr
->cpu_user_pmap
;
8175 if ((cur_user_pmap
== cur_pmap
) && (cur_pmap
== pmap
)) {
8176 if (cpu_data_ptr
->cpu_user_pmap_stamp
!= pmap
->stamp
) {
8177 pmap_set_pmap(pmap
, current_thread());
8178 result
= KERN_SUCCESS
;
8185 result
= arm_fast_fault_internal(pmap
, va
, fault_type
, was_af_fault
, from_user
);
8187 #if (__ARM_VMSA__ == 7)
8191 PMAP_TRACE(3, PMAP_CODE(PMAP__FAST_FAULT
) | DBG_FUNC_END
, result
);
8201 bcopy_phys((addr64_t
) (ptoa(psrc
)),
8202 (addr64_t
) (ptoa(pdst
)),
8208 * pmap_copy_page copies the specified (machine independent) pages.
8211 pmap_copy_part_page(
8213 vm_offset_t src_offset
,
8215 vm_offset_t dst_offset
,
8218 bcopy_phys((addr64_t
) (ptoa(psrc
) + src_offset
),
8219 (addr64_t
) (ptoa(pdst
) + dst_offset
),
8225 * pmap_zero_page zeros the specified (machine independent) page.
8231 assert(pn
!= vm_page_fictitious_addr
);
8232 bzero_phys((addr64_t
) ptoa(pn
), PAGE_SIZE
);
8236 * pmap_zero_part_page
8237 * zeros the specified (machine independent) part of a page.
8240 pmap_zero_part_page(
8245 assert(pn
!= vm_page_fictitious_addr
);
8246 assert(offset
+ len
<= PAGE_SIZE
);
8247 bzero_phys((addr64_t
) (ptoa(pn
) + offset
), len
);
8252 * nop in current arm implementation
8256 __unused thread_t t
)
8264 pt_entry_t
*ptep
, pte
;
8266 ptep
= pmap_pte(kernel_pmap
, LOWGLOBAL_ALIAS
);
8267 assert(ptep
!= PT_ENTRY_NULL
);
8268 assert(*ptep
== ARM_PTE_EMPTY
);
8270 pte
= pa_to_pte(ml_static_vtop((vm_offset_t
)&lowGlo
)) | AP_RONA
| ARM_PTE_NX
| ARM_PTE_PNX
| ARM_PTE_AF
| ARM_PTE_TYPE
;
8271 #if __ARM_KERNEL_PROTECT__
8273 #endif /* __ARM_KERNEL_PROTECT__ */
8274 pte
|= ARM_PTE_ATTRINDX(CACHE_ATTRINDX_WRITEBACK
);
8275 #if (__ARM_VMSA__ > 7)
8276 pte
|= ARM_PTE_SH(SH_OUTER_MEMORY
);
8281 FLUSH_PTE_RANGE(ptep
, (ptep
+ 1));
8282 PMAP_UPDATE_TLBS(kernel_pmap
, LOWGLOBAL_ALIAS
, LOWGLOBAL_ALIAS
+ PAGE_SIZE
, false);
8286 pmap_cpu_windows_copy_addr(int cpu_num
, unsigned int index
)
8288 if (__improbable(index
>= CPUWINDOWS_MAX
)) {
8289 panic("%s: invalid index %u", __func__
, index
);
8291 return (vm_offset_t
)(CPUWINDOWS_BASE
+ (PAGE_SIZE
* ((CPUWINDOWS_MAX
* cpu_num
) + index
)));
8294 MARK_AS_PMAP_TEXT
static unsigned int
8295 pmap_map_cpu_windows_copy_internal(
8298 unsigned int wimg_bits
)
8300 pt_entry_t
*ptep
= NULL
, pte
;
8301 pmap_cpu_data_t
*pmap_cpu_data
= pmap_get_cpu_data();
8302 unsigned int cpu_num
;
8304 vm_offset_t cpu_copywindow_vaddr
= 0;
8305 bool need_strong_sync
= false;
8308 cpu_num
= pmap_cpu_data
->cpu_number
;
8310 for (i
= 0; i
< CPUWINDOWS_MAX
; i
++) {
8311 cpu_copywindow_vaddr
= pmap_cpu_windows_copy_addr(cpu_num
, i
);
8312 ptep
= pmap_pte(kernel_pmap
, cpu_copywindow_vaddr
);
8313 assert(!ARM_PTE_IS_COMPRESSED(*ptep
, ptep
));
8314 if (*ptep
== ARM_PTE_TYPE_FAULT
) {
8318 if (i
== CPUWINDOWS_MAX
) {
8319 panic("pmap_map_cpu_windows_copy: out of window\n");
8322 pte
= pa_to_pte(ptoa(pn
)) | ARM_PTE_TYPE
| ARM_PTE_AF
| ARM_PTE_NX
| ARM_PTE_PNX
;
8323 #if __ARM_KERNEL_PROTECT__
8325 #endif /* __ARM_KERNEL_PROTECT__ */
8327 pte
|= wimg_to_pte(wimg_bits
);
8329 if (prot
& VM_PROT_WRITE
) {
8330 pte
|= ARM_PTE_AP(AP_RWNA
);
8332 pte
|= ARM_PTE_AP(AP_RONA
);
8335 WRITE_PTE_FAST(ptep
, pte
);
8337 * Invalidate tlb. Cover nested cpu_copywindow_vaddr usage with the interrupted context
8338 * in pmap_unmap_cpu_windows_copy() after clearing the pte and before tlb invalidate.
8340 FLUSH_PTE_STRONG(ptep
);
8341 PMAP_UPDATE_TLBS(kernel_pmap
, cpu_copywindow_vaddr
, cpu_copywindow_vaddr
+ PAGE_SIZE
, pmap_cpu_data
->copywindow_strong_sync
[i
]);
8342 pmap_cpu_data
->copywindow_strong_sync
[i
] = need_strong_sync
;
8348 pmap_map_cpu_windows_copy(
8351 unsigned int wimg_bits
)
8353 return pmap_map_cpu_windows_copy_internal(pn
, prot
, wimg_bits
);
8356 MARK_AS_PMAP_TEXT
static void
8357 pmap_unmap_cpu_windows_copy_internal(
8361 unsigned int cpu_num
;
8362 vm_offset_t cpu_copywindow_vaddr
= 0;
8363 pmap_cpu_data_t
*pmap_cpu_data
= pmap_get_cpu_data();
8365 cpu_num
= pmap_cpu_data
->cpu_number
;
8367 cpu_copywindow_vaddr
= pmap_cpu_windows_copy_addr(cpu_num
, index
);
8368 /* Issue full-system DSB to ensure prior operations on the per-CPU window
8369 * (which are likely to have been on I/O memory) are complete before
8370 * tearing down the mapping. */
8371 __builtin_arm_dsb(DSB_SY
);
8372 ptep
= pmap_pte(kernel_pmap
, cpu_copywindow_vaddr
);
8373 WRITE_PTE_STRONG(ptep
, ARM_PTE_TYPE_FAULT
);
8374 PMAP_UPDATE_TLBS(kernel_pmap
, cpu_copywindow_vaddr
, cpu_copywindow_vaddr
+ PAGE_SIZE
, pmap_cpu_data
->copywindow_strong_sync
[index
]);
8378 pmap_unmap_cpu_windows_copy(
8381 return pmap_unmap_cpu_windows_copy_internal(index
);
8385 * Indicate that a pmap is intended to be used as a nested pmap
8386 * within one or more larger address spaces. This must be set
8387 * before pmap_nest() is called with this pmap as the 'subordinate'.
8389 MARK_AS_PMAP_TEXT
static void
8390 pmap_set_nested_internal(
8393 VALIDATE_PMAP(pmap
);
8394 pmap
->nested
= TRUE
;
8401 pmap_set_nested_internal(pmap
);
8405 * pmap_trim_range(pmap, start, end)
8407 * pmap = pmap to operate on
8408 * start = start of the range
8409 * end = end of the range
8411 * Attempts to deallocate TTEs for the given range in the nested range.
8413 MARK_AS_PMAP_TEXT
static void
8420 addr64_t nested_region_start
;
8421 addr64_t nested_region_end
;
8422 addr64_t adjusted_start
;
8423 addr64_t adjusted_end
;
8424 addr64_t adjust_offmask
;
8427 __unused
const pt_attr_t
* const pt_attr
= pmap_get_pt_attr(pmap
);
8429 if (__improbable(end
< start
)) {
8430 panic("%s: invalid address range, "
8431 "pmap=%p, start=%p, end=%p",
8433 pmap
, (void*)start
, (void*)end
);
8436 nested_region_start
= pmap
->nested
? pmap
->nested_region_subord_addr
: pmap
->nested_region_subord_addr
;
8437 nested_region_end
= nested_region_start
+ pmap
->nested_region_size
;
8439 if (__improbable((start
< nested_region_start
) || (end
> nested_region_end
))) {
8440 panic("%s: range outside nested region %p-%p, "
8441 "pmap=%p, start=%p, end=%p",
8442 __func__
, (void *)nested_region_start
, (void *)nested_region_end
,
8443 pmap
, (void*)start
, (void*)end
);
8446 /* Contract the range to TT page boundaries. */
8447 adjust_offmask
= pt_attr_leaf_table_offmask(pt_attr
);
8448 adjusted_start
= ((start
+ adjust_offmask
) & ~adjust_offmask
);
8449 adjusted_end
= end
& ~adjust_offmask
;
8450 bool modified
= false;
8452 /* Iterate over the range, trying to remove TTEs. */
8453 for (cur
= adjusted_start
; (cur
< adjusted_end
) && (cur
>= adjusted_start
); cur
+= pt_attr_twig_size(pt_attr
)) {
8456 tte_p
= pmap_tte(pmap
, cur
);
8458 if (tte_p
== (tt_entry_t
*) NULL
) {
8462 if ((*tte_p
& ARM_TTE_TYPE_MASK
) == ARM_TTE_TYPE_TABLE
) {
8463 pte_p
= (pt_entry_t
*) ttetokv(*tte_p
);
8465 if ((ptep_get_ptd(pte_p
)->ptd_info
[ARM_PT_DESC_INDEX(pte_p
)].refcnt
== 0) &&
8466 (pmap
!= kernel_pmap
)) {
8467 if (pmap
->nested
== TRUE
) {
8468 /* Deallocate for the nested map. */
8469 pmap_tte_deallocate(pmap
, tte_p
, pt_attr_twig_level(pt_attr
));
8471 /* Just remove for the parent map. */
8472 pmap_tte_remove(pmap
, tte_p
, pt_attr_twig_level(pt_attr
));
8475 pmap_get_pt_ops(pmap
)->flush_tlb_tte_async(cur
, pmap
);
8488 #if (__ARM_VMSA__ > 7)
8489 /* Remove empty L2 TTs. */
8490 adjusted_start
= ((start
+ ARM_TT_L1_OFFMASK
) & ~ARM_TT_L1_OFFMASK
);
8491 adjusted_end
= end
& ~ARM_TT_L1_OFFMASK
;
8493 for (cur
= adjusted_start
; (cur
< adjusted_end
) && (cur
>= adjusted_start
); cur
+= ARM_TT_L1_SIZE
) {
8494 /* For each L1 entry in our range... */
8497 bool remove_tt1e
= true;
8498 tt_entry_t
* tt1e_p
= pmap_tt1e(pmap
, cur
);
8499 tt_entry_t
* tt2e_start
;
8500 tt_entry_t
* tt2e_end
;
8501 tt_entry_t
* tt2e_p
;
8504 if (tt1e_p
== NULL
) {
8511 if (tt1e
== ARM_TTE_TYPE_FAULT
) {
8516 tt2e_start
= &((tt_entry_t
*) phystokv(tt1e
& ARM_TTE_TABLE_MASK
))[0];
8517 tt2e_end
= &tt2e_start
[TTE_PGENTRIES
];
8519 for (tt2e_p
= tt2e_start
; tt2e_p
< tt2e_end
; tt2e_p
++) {
8520 if (*tt2e_p
!= ARM_TTE_TYPE_FAULT
) {
8522 * If any TTEs are populated, don't remove the
8525 remove_tt1e
= false;
8530 pmap_tte_deallocate(pmap
, tt1e_p
, PMAP_TT_L1_LEVEL
);
8531 PMAP_UPDATE_TLBS(pmap
, cur
, cur
+ PAGE_SIZE
, false);
8536 #endif /* (__ARM_VMSA__ > 7) */
8540 * pmap_trim_internal(grand, subord, vstart, nstart, size)
8542 * grand = pmap subord is nested in
8543 * subord = nested pmap
8544 * vstart = start of the used range in grand
8545 * nstart = start of the used range in nstart
8546 * size = size of the used range
8548 * Attempts to trim the shared region page tables down to only cover the given
8549 * range in subord and grand.
8551 MARK_AS_PMAP_TEXT
static void
8559 addr64_t vend
, nend
;
8560 addr64_t adjust_offmask
;
8562 if (__improbable(os_add_overflow(vstart
, size
, &vend
))) {
8563 panic("%s: grand addr wraps around, "
8564 "grand=%p, subord=%p, vstart=%p, nstart=%p, size=%#llx",
8565 __func__
, grand
, subord
, (void*)vstart
, (void*)nstart
, size
);
8568 if (__improbable(os_add_overflow(nstart
, size
, &nend
))) {
8569 panic("%s: nested addr wraps around, "
8570 "grand=%p, subord=%p, vstart=%p, nstart=%p, size=%#llx",
8571 __func__
, grand
, subord
, (void*)vstart
, (void*)nstart
, size
);
8574 VALIDATE_PMAP(grand
);
8575 VALIDATE_PMAP(subord
);
8577 __unused
const pt_attr_t
* const pt_attr
= pmap_get_pt_attr(grand
);
8581 if (!subord
->nested
) {
8582 panic("%s: subord is not nestable, "
8583 "grand=%p, subord=%p, vstart=%p, nstart=%p, size=%#llx",
8584 __func__
, grand
, subord
, (void*)vstart
, (void*)nstart
, size
);
8587 if (grand
->nested
) {
8588 panic("%s: grand is nestable, "
8589 "grand=%p, subord=%p, vstart=%p, nstart=%p, size=%#llx",
8590 __func__
, grand
, subord
, (void*)vstart
, (void*)nstart
, size
);
8593 if (grand
->nested_pmap
!= subord
) {
8594 panic("%s: grand->nested != subord, "
8595 "grand=%p, subord=%p, vstart=%p, nstart=%p, size=%#llx",
8596 __func__
, grand
, subord
, (void*)vstart
, (void*)nstart
, size
);
8600 if ((vstart
< grand
->nested_region_grand_addr
) || (vend
> (grand
->nested_region_grand_addr
+ grand
->nested_region_size
))) {
8601 panic("%s: grand range not in nested region, "
8602 "grand=%p, subord=%p, vstart=%p, nstart=%p, size=%#llx",
8603 __func__
, grand
, subord
, (void*)vstart
, (void*)nstart
, size
);
8606 if ((nstart
< grand
->nested_region_grand_addr
) || (nend
> (grand
->nested_region_grand_addr
+ grand
->nested_region_size
))) {
8607 panic("%s: subord range not in nested region, "
8608 "grand=%p, subord=%p, vstart=%p, nstart=%p, size=%#llx",
8609 __func__
, grand
, subord
, (void*)vstart
, (void*)nstart
, size
);
8614 if (!grand
->nested_has_no_bounds_ref
) {
8615 assert(subord
->nested_bounds_set
);
8617 if (!grand
->nested_bounds_set
) {
8618 /* Inherit the bounds from subord. */
8619 grand
->nested_region_true_start
= (subord
->nested_region_true_start
- grand
->nested_region_subord_addr
) + grand
->nested_region_grand_addr
;
8620 grand
->nested_region_true_end
= (subord
->nested_region_true_end
- grand
->nested_region_subord_addr
) + grand
->nested_region_grand_addr
;
8621 grand
->nested_bounds_set
= true;
8624 PMAP_UNLOCK(subord
);
8628 if ((!subord
->nested_bounds_set
) && size
) {
8629 adjust_offmask
= pt_attr_leaf_table_offmask(pt_attr
);
8631 subord
->nested_region_true_start
= nstart
;
8632 subord
->nested_region_true_end
= nend
;
8633 subord
->nested_region_true_start
&= ~adjust_offmask
;
8635 if (__improbable(os_add_overflow(subord
->nested_region_true_end
, adjust_offmask
, &subord
->nested_region_true_end
))) {
8636 panic("%s: padded true end wraps around, "
8637 "grand=%p, subord=%p, vstart=%p, nstart=%p, size=%#llx",
8638 __func__
, grand
, subord
, (void*)vstart
, (void*)nstart
, size
);
8641 subord
->nested_region_true_end
&= ~adjust_offmask
;
8642 subord
->nested_bounds_set
= true;
8645 if (subord
->nested_bounds_set
) {
8646 /* Inherit the bounds from subord. */
8647 grand
->nested_region_true_start
= (subord
->nested_region_true_start
- grand
->nested_region_subord_addr
) + grand
->nested_region_grand_addr
;
8648 grand
->nested_region_true_end
= (subord
->nested_region_true_end
- grand
->nested_region_subord_addr
) + grand
->nested_region_grand_addr
;
8649 grand
->nested_bounds_set
= true;
8651 /* If we know the bounds, we can trim the pmap. */
8652 grand
->nested_has_no_bounds_ref
= false;
8653 PMAP_UNLOCK(subord
);
8655 /* Don't trim if we don't know the bounds. */
8656 PMAP_UNLOCK(subord
);
8660 /* Trim grand to only cover the given range. */
8661 pmap_trim_range(grand
, grand
->nested_region_grand_addr
, grand
->nested_region_true_start
);
8662 pmap_trim_range(grand
, grand
->nested_region_true_end
, (grand
->nested_region_grand_addr
+ grand
->nested_region_size
));
8664 /* Try to trim subord. */
8665 pmap_trim_subord(subord
);
8668 MARK_AS_PMAP_TEXT
static void
8669 pmap_trim_self(pmap_t pmap
)
8671 if (pmap
->nested_has_no_bounds_ref
&& pmap
->nested_pmap
) {
8672 /* If we have a no bounds ref, we need to drop it. */
8673 PMAP_LOCK(pmap
->nested_pmap
);
8674 pmap
->nested_has_no_bounds_ref
= false;
8675 boolean_t nested_bounds_set
= pmap
->nested_pmap
->nested_bounds_set
;
8676 vm_map_offset_t nested_region_true_start
= (pmap
->nested_pmap
->nested_region_true_start
- pmap
->nested_region_subord_addr
) + pmap
->nested_region_grand_addr
;
8677 vm_map_offset_t nested_region_true_end
= (pmap
->nested_pmap
->nested_region_true_end
- pmap
->nested_region_subord_addr
) + pmap
->nested_region_grand_addr
;
8678 PMAP_UNLOCK(pmap
->nested_pmap
);
8680 if (nested_bounds_set
) {
8681 pmap_trim_range(pmap
, pmap
->nested_region_grand_addr
, nested_region_true_start
);
8682 pmap_trim_range(pmap
, nested_region_true_end
, (pmap
->nested_region_grand_addr
+ pmap
->nested_region_size
));
8685 * Try trimming the nested pmap, in case we had the
8688 pmap_trim_subord(pmap
->nested_pmap
);
8693 * pmap_trim_subord(grand, subord)
8695 * grand = pmap that we have nested subord in
8696 * subord = nested pmap we are attempting to trim
8698 * Trims subord if possible
8700 MARK_AS_PMAP_TEXT
static void
8701 pmap_trim_subord(pmap_t subord
)
8703 bool contract_subord
= false;
8707 subord
->nested_no_bounds_refcnt
--;
8709 if ((subord
->nested_no_bounds_refcnt
== 0) && (subord
->nested_bounds_set
)) {
8710 /* If this was the last no bounds reference, trim subord. */
8711 contract_subord
= true;
8714 PMAP_UNLOCK(subord
);
8716 if (contract_subord
) {
8717 pmap_trim_range(subord
, subord
->nested_region_subord_addr
, subord
->nested_region_true_start
);
8718 pmap_trim_range(subord
, subord
->nested_region_true_end
, subord
->nested_region_subord_addr
+ subord
->nested_region_size
);
8730 pmap_trim_internal(grand
, subord
, vstart
, nstart
, size
);
8735 * kern_return_t pmap_nest(grand, subord, vstart, size)
8737 * grand = the pmap that we will nest subord into
8738 * subord = the pmap that goes into the grand
8739 * vstart = start of range in pmap to be inserted
8740 * nstart = start of range in pmap nested pmap
8741 * size = Size of nest area (up to 16TB)
8743 * Inserts a pmap into another. This is used to implement shared segments.
8747 MARK_AS_PMAP_TEXT
static kern_return_t
8755 kern_return_t kr
= KERN_FAILURE
;
8756 vm_map_offset_t vaddr
, nvaddr
;
8760 unsigned int num_tte
;
8761 unsigned int nested_region_asid_bitmap_size
;
8762 unsigned int* nested_region_asid_bitmap
;
8763 int expand_options
= 0;
8765 addr64_t vend
, nend
;
8766 if (__improbable(os_add_overflow(vstart
, size
, &vend
))) {
8767 panic("%s: %p grand addr wraps around: 0x%llx + 0x%llx", __func__
, grand
, vstart
, size
);
8769 if (__improbable(os_add_overflow(nstart
, size
, &nend
))) {
8770 panic("%s: %p nested addr wraps around: 0x%llx + 0x%llx", __func__
, subord
, nstart
, size
);
8773 VALIDATE_PMAP(grand
);
8774 VALIDATE_PMAP(subord
);
8776 __unused
const pt_attr_t
* const pt_attr
= pmap_get_pt_attr(grand
);
8777 assert(pmap_get_pt_attr(subord
) == pt_attr
);
8780 if (((size
| vstart
| nstart
) & (pt_attr_leaf_table_offmask(pt_attr
))) != 0x0ULL
) {
8781 panic("pmap_nest() pmap %p unaligned nesting request 0x%llx, 0x%llx, 0x%llx\n", grand
, vstart
, nstart
, size
);
8784 if (!subord
->nested
) {
8785 panic("%s: subordinate pmap %p is not nestable", __func__
, subord
);
8788 if ((grand
->nested_pmap
!= PMAP_NULL
) && (grand
->nested_pmap
!= subord
)) {
8789 panic("pmap_nest() pmap %p has a nested pmap\n", grand
);
8792 if (subord
->nested_region_asid_bitmap
== NULL
) {
8793 nested_region_asid_bitmap_size
= (unsigned int)(size
>> pt_attr_twig_shift(pt_attr
)) / (sizeof(unsigned int) * NBBY
);
8795 nested_region_asid_bitmap
= kalloc(nested_region_asid_bitmap_size
* sizeof(unsigned int));
8796 bzero(nested_region_asid_bitmap
, nested_region_asid_bitmap_size
* sizeof(unsigned int));
8799 if (subord
->nested_region_asid_bitmap
== NULL
) {
8800 subord
->nested_region_asid_bitmap
= nested_region_asid_bitmap
;
8801 subord
->nested_region_asid_bitmap_size
= nested_region_asid_bitmap_size
;
8802 subord
->nested_region_subord_addr
= nstart
;
8803 subord
->nested_region_size
= (mach_vm_offset_t
) size
;
8804 nested_region_asid_bitmap
= NULL
;
8806 PMAP_UNLOCK(subord
);
8807 if (nested_region_asid_bitmap
!= NULL
) {
8808 kfree(nested_region_asid_bitmap
, nested_region_asid_bitmap_size
* sizeof(unsigned int));
8811 if ((subord
->nested_region_subord_addr
+ subord
->nested_region_size
) < nend
) {
8813 unsigned int new_nested_region_asid_bitmap_size
;
8814 unsigned int* new_nested_region_asid_bitmap
;
8816 nested_region_asid_bitmap
= NULL
;
8817 nested_region_asid_bitmap_size
= 0;
8818 new_size
= nend
- subord
->nested_region_subord_addr
;
8820 /* We explicitly add 1 to the bitmap allocation size in order to avoid issues with truncation. */
8821 new_nested_region_asid_bitmap_size
= (unsigned int)((new_size
>> pt_attr_twig_shift(pt_attr
)) / (sizeof(unsigned int) * NBBY
)) + 1;
8823 new_nested_region_asid_bitmap
= kalloc(new_nested_region_asid_bitmap_size
* sizeof(unsigned int));
8825 if (subord
->nested_region_size
< new_size
) {
8826 bzero(new_nested_region_asid_bitmap
, new_nested_region_asid_bitmap_size
* sizeof(unsigned int));
8827 bcopy(subord
->nested_region_asid_bitmap
, new_nested_region_asid_bitmap
, subord
->nested_region_asid_bitmap_size
);
8828 nested_region_asid_bitmap_size
= subord
->nested_region_asid_bitmap_size
;
8829 nested_region_asid_bitmap
= subord
->nested_region_asid_bitmap
;
8830 subord
->nested_region_asid_bitmap
= new_nested_region_asid_bitmap
;
8831 subord
->nested_region_asid_bitmap_size
= new_nested_region_asid_bitmap_size
;
8832 subord
->nested_region_size
= new_size
;
8833 new_nested_region_asid_bitmap
= NULL
;
8835 PMAP_UNLOCK(subord
);
8836 if (nested_region_asid_bitmap
!= NULL
)
8837 { kfree(nested_region_asid_bitmap
, nested_region_asid_bitmap_size
* sizeof(unsigned int));}
8838 if (new_nested_region_asid_bitmap
!= NULL
)
8839 { kfree(new_nested_region_asid_bitmap
, new_nested_region_asid_bitmap_size
* sizeof(unsigned int));}
8843 if (grand
->nested_pmap
== PMAP_NULL
) {
8844 grand
->nested_pmap
= subord
;
8846 if (!subord
->nested_bounds_set
) {
8848 * We are nesting without the shared regions bounds
8849 * being known. We'll have to trim the pmap later.
8851 grand
->nested_has_no_bounds_ref
= true;
8852 subord
->nested_no_bounds_refcnt
++;
8855 grand
->nested_region_grand_addr
= vstart
;
8856 grand
->nested_region_subord_addr
= nstart
;
8857 grand
->nested_region_size
= (mach_vm_offset_t
) size
;
8859 if ((grand
->nested_region_grand_addr
> vstart
)) {
8860 panic("pmap_nest() pmap %p : attempt to nest outside the nested region\n", grand
);
8861 } else if ((grand
->nested_region_grand_addr
+ grand
->nested_region_size
) < vend
) {
8862 grand
->nested_region_size
= (mach_vm_offset_t
)(vstart
- grand
->nested_region_grand_addr
+ size
);
8866 #if (__ARM_VMSA__ == 7)
8867 nvaddr
= (vm_map_offset_t
) nstart
;
8868 vaddr
= (vm_map_offset_t
) vstart
;
8869 num_tte
= size
>> ARM_TT_L1_SHIFT
;
8871 for (i
= 0; i
< num_tte
; i
++) {
8872 if (((subord
->nested_region_true_start
) > nvaddr
) || ((subord
->nested_region_true_end
) <= nvaddr
)) {
8876 stte_p
= pmap_tte(subord
, nvaddr
);
8877 if ((stte_p
== (tt_entry_t
*)NULL
) || (((*stte_p
) & ARM_TTE_TYPE_MASK
) != ARM_TTE_TYPE_TABLE
)) {
8878 PMAP_UNLOCK(subord
);
8879 kr
= pmap_expand(subord
, nvaddr
, expand_options
, PMAP_TT_L2_LEVEL
);
8881 if (kr
!= KERN_SUCCESS
) {
8888 PMAP_UNLOCK(subord
);
8890 stte_p
= pmap_tte(grand
, vaddr
);
8891 if (stte_p
== (tt_entry_t
*)NULL
) {
8893 kr
= pmap_expand(grand
, vaddr
, expand_options
, PMAP_TT_L1_LEVEL
);
8895 if (kr
!= KERN_SUCCESS
) {
8906 nvaddr
+= ARM_TT_L1_SIZE
;
8907 vaddr
+= ARM_TT_L1_SIZE
;
8911 nvaddr
= (vm_map_offset_t
) nstart
;
8912 num_tte
= (unsigned int)(size
>> pt_attr_twig_shift(pt_attr
));
8914 for (i
= 0; i
< num_tte
; i
++) {
8915 if (((subord
->nested_region_true_start
) > nvaddr
) || ((subord
->nested_region_true_end
) <= nvaddr
)) {
8919 stte_p
= pmap_tte(subord
, nvaddr
);
8920 if (stte_p
== PT_ENTRY_NULL
|| *stte_p
== ARM_TTE_EMPTY
) {
8921 PMAP_UNLOCK(subord
);
8922 kr
= pmap_expand(subord
, nvaddr
, expand_options
, PMAP_TT_LEAF_LEVEL
);
8924 if (kr
!= KERN_SUCCESS
) {
8932 nvaddr
+= pt_attr_twig_size(pt_attr
);
8935 PMAP_UNLOCK(subord
);
8938 * copy tte's from subord pmap into grand pmap
8942 nvaddr
= (vm_map_offset_t
) nstart
;
8943 vaddr
= (vm_map_offset_t
) vstart
;
8946 #if (__ARM_VMSA__ == 7)
8947 for (i
= 0; i
< num_tte
; i
++) {
8948 if (((subord
->nested_region_true_start
) > nvaddr
) || ((subord
->nested_region_true_end
) <= nvaddr
)) {
8952 stte_p
= pmap_tte(subord
, nvaddr
);
8953 gtte_p
= pmap_tte(grand
, vaddr
);
8957 nvaddr
+= ARM_TT_L1_SIZE
;
8958 vaddr
+= ARM_TT_L1_SIZE
;
8961 for (i
= 0; i
< num_tte
; i
++) {
8962 if (((subord
->nested_region_true_start
) > nvaddr
) || ((subord
->nested_region_true_end
) <= nvaddr
)) {
8966 stte_p
= pmap_tte(subord
, nvaddr
);
8967 gtte_p
= pmap_tte(grand
, vaddr
);
8968 if (gtte_p
== PT_ENTRY_NULL
) {
8970 kr
= pmap_expand(grand
, vaddr
, expand_options
, PMAP_TT_TWIG_LEVEL
);
8973 if (kr
!= KERN_SUCCESS
) {
8977 gtte_p
= pmap_tt2e(grand
, vaddr
);
8982 vaddr
+= pt_attr_twig_size(pt_attr
);
8983 nvaddr
+= pt_attr_twig_size(pt_attr
);
8990 stte_p
= pmap_tte(grand
, vstart
);
8991 FLUSH_PTE_RANGE_STRONG(stte_p
, stte_p
+ num_tte
);
8993 #if (__ARM_VMSA__ > 7)
8995 * check for overflow on LP64 arch
8997 assert((size
& 0xFFFFFFFF00000000ULL
) == 0);
8999 PMAP_UPDATE_TLBS(grand
, vstart
, vend
, false);
9013 kern_return_t kr
= KERN_FAILURE
;
9015 PMAP_TRACE(2, PMAP_CODE(PMAP__NEST
) | DBG_FUNC_START
,
9016 VM_KERNEL_ADDRHIDE(grand
), VM_KERNEL_ADDRHIDE(subord
),
9017 VM_KERNEL_ADDRHIDE(vstart
));
9019 kr
= pmap_nest_internal(grand
, subord
, vstart
, nstart
, size
);
9021 PMAP_TRACE(2, PMAP_CODE(PMAP__NEST
) | DBG_FUNC_END
, kr
);
9027 * kern_return_t pmap_unnest(grand, vaddr)
9029 * grand = the pmap that will have the virtual range unnested
9030 * vaddr = start of range in pmap to be unnested
9031 * size = size of range in pmap to be unnested
9041 return pmap_unnest_options(grand
, vaddr
, size
, 0);
9044 MARK_AS_PMAP_TEXT
static kern_return_t
9045 pmap_unnest_options_internal(
9049 unsigned int option
)
9051 vm_map_offset_t start
;
9052 vm_map_offset_t addr
;
9054 unsigned int current_index
;
9055 unsigned int start_index
;
9056 unsigned int max_index
;
9057 unsigned int num_tte
;
9061 if (__improbable(os_add_overflow(vaddr
, size
, &vend
))) {
9062 panic("%s: %p vaddr wraps around: 0x%llx + 0x%llx", __func__
, grand
, vaddr
, size
);
9065 VALIDATE_PMAP(grand
);
9067 __unused
const pt_attr_t
* const pt_attr
= pmap_get_pt_attr(grand
);
9069 if (((size
| vaddr
) & pt_attr_twig_offmask(pt_attr
)) != 0x0ULL
) {
9070 panic("pmap_unnest(): unaligned request");
9073 if ((option
& PMAP_UNNEST_CLEAN
) == 0) {
9074 if (grand
->nested_pmap
== NULL
) {
9075 panic("%s: %p has no nested pmap", __func__
, grand
);
9078 if ((vaddr
< grand
->nested_region_grand_addr
) || (vend
> (grand
->nested_region_grand_addr
+ grand
->nested_region_size
))) {
9079 panic("%s: %p: unnest request to region not-fully-nested region [%p, %p)", __func__
, grand
, (void*)vaddr
, (void*)vend
);
9082 PMAP_LOCK(grand
->nested_pmap
);
9084 start
= vaddr
- grand
->nested_region_grand_addr
+ grand
->nested_region_subord_addr
;
9085 start_index
= (unsigned int)((vaddr
- grand
->nested_region_grand_addr
) >> pt_attr_twig_shift(pt_attr
));
9086 max_index
= (unsigned int)(start_index
+ (size
>> pt_attr_twig_shift(pt_attr
)));
9087 num_tte
= (unsigned int)(size
>> pt_attr_twig_shift(pt_attr
));
9089 for (current_index
= start_index
, addr
= start
; current_index
< max_index
; current_index
++, addr
+= pt_attr_twig_size(pt_attr
)) {
9090 pt_entry_t
*bpte
, *epte
, *cpte
;
9092 if (addr
< grand
->nested_pmap
->nested_region_true_start
) {
9093 /* We haven't reached the interesting range. */
9097 if (addr
>= grand
->nested_pmap
->nested_region_true_end
) {
9098 /* We're done with the interesting range. */
9102 bpte
= pmap_pte(grand
->nested_pmap
, addr
);
9103 epte
= bpte
+ (pt_attr_leaf_index_mask(pt_attr
) >> pt_attr_leaf_shift(pt_attr
));
9105 if (!testbit(current_index
, (int *)grand
->nested_pmap
->nested_region_asid_bitmap
)) {
9106 setbit(current_index
, (int *)grand
->nested_pmap
->nested_region_asid_bitmap
);
9108 for (cpte
= bpte
; cpte
<= epte
; cpte
++) {
9111 boolean_t managed
= FALSE
;
9114 if ((*cpte
!= ARM_PTE_TYPE_FAULT
)
9115 && (!ARM_PTE_IS_COMPRESSED(*cpte
, cpte
))) {
9118 pa
= pte_to_pa(spte
);
9119 if (!pa_valid(pa
)) {
9122 pai
= (int)pa_index(pa
);
9125 pa
= pte_to_pa(spte
);
9126 if (pai
== (int)pa_index(pa
)) {
9128 break; // Leave the PVH locked as we'll unlock it after we update the PTE
9133 if (((spte
& ARM_PTE_NG
) != ARM_PTE_NG
)) {
9134 WRITE_PTE_FAST(cpte
, (spte
| ARM_PTE_NG
));
9138 ASSERT_PVH_LOCKED(pai
);
9145 FLUSH_PTE_RANGE_STRONG(bpte
, epte
);
9146 flush_mmu_tlb_region_asid_async(start
, (unsigned)size
, grand
->nested_pmap
);
9151 PMAP_UNLOCK(grand
->nested_pmap
);
9157 * invalidate all pdes for segment at vaddr in pmap grand
9162 num_tte
= (unsigned int)(size
>> pt_attr_twig_shift(pt_attr
));
9164 for (i
= 0; i
< num_tte
; i
++, addr
+= pt_attr_twig_size(pt_attr
)) {
9165 if (addr
< grand
->nested_pmap
->nested_region_true_start
) {
9166 /* We haven't reached the interesting range. */
9170 if (addr
>= grand
->nested_pmap
->nested_region_true_end
) {
9171 /* We're done with the interesting range. */
9175 tte_p
= pmap_tte(grand
, addr
);
9176 *tte_p
= ARM_TTE_TYPE_FAULT
;
9179 tte_p
= pmap_tte(grand
, start
);
9180 FLUSH_PTE_RANGE_STRONG(tte_p
, tte_p
+ num_tte
);
9181 PMAP_UPDATE_TLBS(grand
, start
, vend
, false);
9185 return KERN_SUCCESS
;
9189 pmap_unnest_options(
9193 unsigned int option
)
9195 kern_return_t kr
= KERN_FAILURE
;
9197 PMAP_TRACE(2, PMAP_CODE(PMAP__UNNEST
) | DBG_FUNC_START
,
9198 VM_KERNEL_ADDRHIDE(grand
), VM_KERNEL_ADDRHIDE(vaddr
));
9200 kr
= pmap_unnest_options_internal(grand
, vaddr
, size
, option
);
9202 PMAP_TRACE(2, PMAP_CODE(PMAP__UNNEST
) | DBG_FUNC_END
, kr
);
9208 pmap_adjust_unnest_parameters(
9210 __unused vm_map_offset_t
*s
,
9211 __unused vm_map_offset_t
*e
)
9213 return TRUE
; /* to get to log_unnest_badness()... */
9217 * disable no-execute capability on
9218 * the specified pmap
9220 #if DEVELOPMENT || DEBUG
9225 pmap
->nx_enabled
= FALSE
;
9230 __unused pmap_t pmap
)
9239 pt_fake_zone_index
= zone_index
;
9245 vm_size_t
*cur_size
, vm_size_t
*max_size
, vm_size_t
*elem_size
, vm_size_t
*alloc_size
,
9246 uint64_t *sum_size
, int *collectable
, int *exhaustable
, int *caller_acct
)
9248 *count
= inuse_pmap_pages_count
;
9249 *cur_size
= PAGE_SIZE
* (inuse_pmap_pages_count
);
9250 *max_size
= PAGE_SIZE
* (inuse_pmap_pages_count
+ vm_page_inactive_count
+ vm_page_active_count
+ vm_page_free_count
);
9251 *elem_size
= PAGE_SIZE
;
9252 *alloc_size
= PAGE_SIZE
;
9253 *sum_size
= (alloc_pmap_pages_count
) * PAGE_SIZE
;
9261 * flush a range of hardware TLB entries.
9262 * NOTE: assumes the smallest TLB entry in use will be for
9263 * an ARM small page (4K).
9266 #define ARM_FULL_TLB_FLUSH_THRESHOLD 64
9268 #if __ARM_RANGE_TLBI__
9269 #define ARM64_RANGE_TLB_FLUSH_THRESHOLD 1
9270 #define ARM64_FULL_TLB_FLUSH_THRESHOLD ARM64_16K_TLB_RANGE_PAGES
9272 #define ARM64_FULL_TLB_FLUSH_THRESHOLD 256
9273 #endif // __ARM_RANGE_TLBI__
9276 flush_mmu_tlb_region_asid_async(
9281 #if (__ARM_VMSA__ == 7)
9282 vm_offset_t end
= va
+ length
;
9285 asid
= pmap
->hw_asid
;
9287 if (length
/ ARM_SMALL_PAGE_SIZE
> ARM_FULL_TLB_FLUSH_THRESHOLD
) {
9288 boolean_t flush_all
= FALSE
;
9290 if ((asid
== 0) || (pmap
->nested
== TRUE
)) {
9294 flush_mmu_tlb_async();
9296 flush_mmu_tlb_asid_async(asid
);
9301 if (pmap
->nested
== TRUE
) {
9305 va
= arm_trunc_page(va
);
9307 flush_mmu_tlb_mva_entries_async(va
);
9308 va
+= ARM_SMALL_PAGE_SIZE
;
9313 va
= arm_trunc_page(va
) | (asid
& 0xff);
9314 flush_mmu_tlb_entries_async(va
, end
);
9317 unsigned npages
= length
>> pt_attr_leaf_shift(pmap_get_pt_attr(pmap
));
9320 asid
= pmap
->hw_asid
;
9322 if (npages
> ARM64_FULL_TLB_FLUSH_THRESHOLD
) {
9323 boolean_t flush_all
= FALSE
;
9325 if ((asid
== 0) || (pmap
->nested
== TRUE
)) {
9329 flush_mmu_tlb_async();
9331 flush_mmu_tlb_asid_async((uint64_t)asid
<< TLBI_ASID_SHIFT
);
9335 #if __ARM_RANGE_TLBI__
9336 if (npages
> ARM64_RANGE_TLB_FLUSH_THRESHOLD
) {
9337 va
= generate_rtlbi_param(npages
, asid
, va
);
9338 if (pmap
->nested
== TRUE
) {
9339 flush_mmu_tlb_allrange_async(va
);
9341 flush_mmu_tlb_range_async(va
);
9346 vm_offset_t end
= tlbi_asid(asid
) | tlbi_addr(va
+ length
);
9347 va
= tlbi_asid(asid
) | tlbi_addr(va
);
9348 if (pmap
->nested
== TRUE
) {
9349 flush_mmu_tlb_allentries_async(va
, end
);
9351 flush_mmu_tlb_entries_async(va
, end
);
9357 MARK_AS_PMAP_TEXT
static void
9358 flush_mmu_tlb_tte_asid_async(vm_offset_t va
, pmap_t pmap
)
9360 #if (__ARM_VMSA__ == 7)
9361 flush_mmu_tlb_entry_async((va
& ~ARM_TT_L1_PT_OFFMASK
) | (pmap
->hw_asid
& 0xff));
9362 flush_mmu_tlb_entry_async(((va
& ~ARM_TT_L1_PT_OFFMASK
) + ARM_TT_L1_SIZE
) | (pmap
->hw_asid
& 0xff));
9363 flush_mmu_tlb_entry_async(((va
& ~ARM_TT_L1_PT_OFFMASK
) + 2 * ARM_TT_L1_SIZE
) | (pmap
->hw_asid
& 0xff));
9364 flush_mmu_tlb_entry_async(((va
& ~ARM_TT_L1_PT_OFFMASK
) + 3 * ARM_TT_L1_SIZE
) | (pmap
->hw_asid
& 0xff));
9366 flush_mmu_tlb_entry_async(tlbi_addr(va
& ~pt_attr_twig_offmask(pmap_get_pt_attr(pmap
))) | tlbi_asid(pmap
->hw_asid
));
9370 MARK_AS_PMAP_TEXT
static void
9371 flush_mmu_tlb_full_asid_async(pmap_t pmap
)
9373 #if (__ARM_VMSA__ == 7)
9374 flush_mmu_tlb_asid_async(pmap
->hw_asid
);
9375 #else /* (__ARM_VMSA__ == 7) */
9376 flush_mmu_tlb_asid_async((uint64_t)(pmap
->hw_asid
) << TLBI_ASID_SHIFT
);
9377 #endif /* (__ARM_VMSA__ == 7) */
9381 flush_mmu_tlb_region(
9385 flush_mmu_tlb_region_asid_async(va
, length
, kernel_pmap
);
9389 static pmap_io_range_t
*
9390 pmap_find_io_attr(pmap_paddr_t paddr
)
9392 pmap_io_range_t find_range
= {.addr
= paddr
& ~PAGE_MASK
, .len
= PAGE_SIZE
};
9393 unsigned int begin
= 0, end
= num_io_rgns
- 1;
9394 if ((num_io_rgns
== 0) || (paddr
< io_attr_table
[begin
].addr
) ||
9395 (paddr
>= (io_attr_table
[end
].addr
+ io_attr_table
[end
].len
))) {
9400 unsigned int middle
= (begin
+ end
) / 2;
9401 int cmp
= cmp_io_rgns(&find_range
, &io_attr_table
[middle
]);
9403 return &io_attr_table
[middle
];
9404 } else if (begin
== end
) {
9406 } else if (cmp
> 0) {
9417 pmap_cache_attributes(
9422 unsigned int result
;
9423 pp_attr_t pp_attr_current
;
9427 assert(vm_last_phys
> vm_first_phys
); // Check that pmap has been bootstrapped
9429 if (!pa_valid(paddr
)) {
9430 pmap_io_range_t
*io_rgn
= pmap_find_io_attr(paddr
);
9431 return (io_rgn
== NULL
) ? VM_WIMG_IO
: io_rgn
->wimg
;
9434 result
= VM_WIMG_DEFAULT
;
9436 pai
= (int)pa_index(paddr
);
9438 pp_attr_current
= pp_attr_table
[pai
];
9439 if (pp_attr_current
& PP_ATTR_WIMG_MASK
) {
9440 result
= pp_attr_current
& PP_ATTR_WIMG_MASK
;
9445 MARK_AS_PMAP_TEXT
static void
9446 pmap_sync_wimg(ppnum_t pn
, unsigned int wimg_bits_prev
, unsigned int wimg_bits_new
)
9448 if ((wimg_bits_prev
!= wimg_bits_new
)
9449 && ((wimg_bits_prev
== VM_WIMG_COPYBACK
)
9450 || ((wimg_bits_prev
== VM_WIMG_INNERWBACK
)
9451 && (wimg_bits_new
!= VM_WIMG_COPYBACK
))
9452 || ((wimg_bits_prev
== VM_WIMG_WTHRU
)
9453 && ((wimg_bits_new
!= VM_WIMG_COPYBACK
) || (wimg_bits_new
!= VM_WIMG_INNERWBACK
))))) {
9454 pmap_sync_page_attributes_phys(pn
);
9457 if ((wimg_bits_new
== VM_WIMG_RT
) && (wimg_bits_prev
!= VM_WIMG_RT
)) {
9458 pmap_force_dcache_clean(phystokv(ptoa(pn
)), PAGE_SIZE
);
9462 MARK_AS_PMAP_TEXT
static __unused
void
9463 pmap_update_compressor_page_internal(ppnum_t pn
, unsigned int prev_cacheattr
, unsigned int new_cacheattr
)
9465 pmap_paddr_t paddr
= ptoa(pn
);
9466 int pai
= (int)pa_index(paddr
);
9468 if (__improbable(!pa_valid(paddr
))) {
9469 panic("%s called on non-managed page 0x%08x", __func__
, pn
);
9475 pmap_update_cache_attributes_locked(pn
, new_cacheattr
);
9479 pmap_sync_wimg(pn
, prev_cacheattr
& VM_WIMG_MASK
, new_cacheattr
& VM_WIMG_MASK
);
9483 pmap_map_compressor_page(ppnum_t pn
)
9485 #if __ARM_PTE_PHYSMAP__
9486 unsigned int cacheattr
= pmap_cache_attributes(pn
) & VM_WIMG_MASK
;
9487 if (cacheattr
!= VM_WIMG_DEFAULT
) {
9488 pmap_update_compressor_page_internal(pn
, cacheattr
, VM_WIMG_DEFAULT
);
9491 return (void*)phystokv(ptoa(pn
));
9495 pmap_unmap_compressor_page(ppnum_t pn __unused
, void *kva __unused
)
9497 #if __ARM_PTE_PHYSMAP__
9498 unsigned int cacheattr
= pmap_cache_attributes(pn
) & VM_WIMG_MASK
;
9499 if (cacheattr
!= VM_WIMG_DEFAULT
) {
9500 pmap_update_compressor_page_internal(pn
, VM_WIMG_DEFAULT
, cacheattr
);
9505 MARK_AS_PMAP_TEXT
static boolean_t
9506 pmap_batch_set_cache_attributes_internal(
9508 unsigned int cacheattr
,
9509 unsigned int page_cnt
,
9510 unsigned int page_index
,
9516 pp_attr_t pp_attr_current
;
9517 pp_attr_t pp_attr_template
;
9518 unsigned int wimg_bits_prev
, wimg_bits_new
;
9520 if (cacheattr
& VM_WIMG_USE_DEFAULT
) {
9521 cacheattr
= VM_WIMG_DEFAULT
;
9524 if ((doit
== FALSE
) && (*res
== 0)) {
9525 pmap_pin_kernel_pages((vm_offset_t
)res
, sizeof(*res
));
9527 pmap_unpin_kernel_pages((vm_offset_t
)res
, sizeof(*res
));
9528 if (platform_cache_batch_wimg(cacheattr
& (VM_WIMG_MASK
), page_cnt
<< PAGE_SHIFT
) == FALSE
) {
9535 if (!pa_valid(paddr
)) {
9536 panic("pmap_batch_set_cache_attributes(): pn 0x%08x not managed", pn
);
9539 pai
= (int)pa_index(paddr
);
9546 pp_attr_current
= pp_attr_table
[pai
];
9547 wimg_bits_prev
= VM_WIMG_DEFAULT
;
9548 if (pp_attr_current
& PP_ATTR_WIMG_MASK
) {
9549 wimg_bits_prev
= pp_attr_current
& PP_ATTR_WIMG_MASK
;
9552 pp_attr_template
= (pp_attr_current
& ~PP_ATTR_WIMG_MASK
) | PP_ATTR_WIMG(cacheattr
& (VM_WIMG_MASK
));
9558 /* WIMG bits should only be updated under the PVH lock, but we should do this in a CAS loop
9559 * to avoid losing simultaneous updates to other bits like refmod. */
9560 } while (!OSCompareAndSwap16(pp_attr_current
, pp_attr_template
, &pp_attr_table
[pai
]));
9562 wimg_bits_new
= VM_WIMG_DEFAULT
;
9563 if (pp_attr_template
& PP_ATTR_WIMG_MASK
) {
9564 wimg_bits_new
= pp_attr_template
& PP_ATTR_WIMG_MASK
;
9568 if (wimg_bits_new
!= wimg_bits_prev
) {
9569 pmap_update_cache_attributes_locked(pn
, cacheattr
);
9572 if ((wimg_bits_new
== VM_WIMG_RT
) && (wimg_bits_prev
!= VM_WIMG_RT
)) {
9573 pmap_force_dcache_clean(phystokv(paddr
), PAGE_SIZE
);
9576 if (wimg_bits_new
== VM_WIMG_COPYBACK
) {
9579 if (wimg_bits_prev
== wimg_bits_new
) {
9580 pmap_pin_kernel_pages((vm_offset_t
)res
, sizeof(*res
));
9582 pmap_unpin_kernel_pages((vm_offset_t
)res
, sizeof(*res
));
9583 if (!platform_cache_batch_wimg(wimg_bits_new
, (*res
) << PAGE_SHIFT
)) {
9590 if (page_cnt
== (page_index
+ 1)) {
9591 wimg_bits_prev
= VM_WIMG_COPYBACK
;
9592 if (((wimg_bits_prev
!= wimg_bits_new
))
9593 && ((wimg_bits_prev
== VM_WIMG_COPYBACK
)
9594 || ((wimg_bits_prev
== VM_WIMG_INNERWBACK
)
9595 && (wimg_bits_new
!= VM_WIMG_COPYBACK
))
9596 || ((wimg_bits_prev
== VM_WIMG_WTHRU
)
9597 && ((wimg_bits_new
!= VM_WIMG_COPYBACK
) || (wimg_bits_new
!= VM_WIMG_INNERWBACK
))))) {
9598 platform_cache_flush_wimg(wimg_bits_new
);
9606 pmap_batch_set_cache_attributes(
9608 unsigned int cacheattr
,
9609 unsigned int page_cnt
,
9610 unsigned int page_index
,
9614 return pmap_batch_set_cache_attributes_internal(pn
, cacheattr
, page_cnt
, page_index
, doit
, res
);
9617 MARK_AS_PMAP_TEXT
static void
9618 pmap_set_cache_attributes_priv(
9620 unsigned int cacheattr
,
9621 boolean_t external __unused
)
9625 pp_attr_t pp_attr_current
;
9626 pp_attr_t pp_attr_template
;
9627 unsigned int wimg_bits_prev
, wimg_bits_new
;
9631 if (!pa_valid(paddr
)) {
9632 return; /* Not a managed page. */
9635 if (cacheattr
& VM_WIMG_USE_DEFAULT
) {
9636 cacheattr
= VM_WIMG_DEFAULT
;
9639 pai
= (int)pa_index(paddr
);
9645 pp_attr_current
= pp_attr_table
[pai
];
9646 wimg_bits_prev
= VM_WIMG_DEFAULT
;
9647 if (pp_attr_current
& PP_ATTR_WIMG_MASK
) {
9648 wimg_bits_prev
= pp_attr_current
& PP_ATTR_WIMG_MASK
;
9651 pp_attr_template
= (pp_attr_current
& ~PP_ATTR_WIMG_MASK
) | PP_ATTR_WIMG(cacheattr
& (VM_WIMG_MASK
));
9653 /* WIMG bits should only be updated under the PVH lock, but we should do this in a CAS loop
9654 * to avoid losing simultaneous updates to other bits like refmod. */
9655 } while (!OSCompareAndSwap16(pp_attr_current
, pp_attr_template
, &pp_attr_table
[pai
]));
9657 wimg_bits_new
= VM_WIMG_DEFAULT
;
9658 if (pp_attr_template
& PP_ATTR_WIMG_MASK
) {
9659 wimg_bits_new
= pp_attr_template
& PP_ATTR_WIMG_MASK
;
9662 if (wimg_bits_new
!= wimg_bits_prev
) {
9663 pmap_update_cache_attributes_locked(pn
, cacheattr
);
9668 pmap_sync_wimg(pn
, wimg_bits_prev
, wimg_bits_new
);
9671 MARK_AS_PMAP_TEXT
static void
9672 pmap_set_cache_attributes_internal(
9674 unsigned int cacheattr
)
9676 pmap_set_cache_attributes_priv(pn
, cacheattr
, TRUE
);
9680 pmap_set_cache_attributes(
9682 unsigned int cacheattr
)
9684 pmap_set_cache_attributes_internal(pn
, cacheattr
);
9687 MARK_AS_PMAP_TEXT
void
9688 pmap_update_cache_attributes_locked(
9690 unsigned attributes
)
9692 pmap_paddr_t phys
= ptoa(ppnum
);
9698 boolean_t tlb_flush_needed
= FALSE
;
9700 PMAP_TRACE(2, PMAP_CODE(PMAP__UPDATE_CACHING
) | DBG_FUNC_START
, ppnum
, attributes
);
9702 #if __ARM_PTE_PHYSMAP__
9703 vm_offset_t kva
= phystokv(phys
);
9704 pte_p
= pmap_pte(kernel_pmap
, kva
);
9707 tmplate
&= ~(ARM_PTE_ATTRINDXMASK
| ARM_PTE_SHMASK
);
9708 tmplate
|= wimg_to_pte(attributes
);
9709 #if (__ARM_VMSA__ > 7)
9710 if (tmplate
& ARM_PTE_HINT_MASK
) {
9711 panic("%s: physical aperture PTE %p has hint bit set, va=%p, pte=0x%llx",
9712 __FUNCTION__
, pte_p
, (void *)kva
, tmplate
);
9715 WRITE_PTE_STRONG(pte_p
, tmplate
);
9716 flush_mmu_tlb_region_asid_async(kva
, PAGE_SIZE
, kernel_pmap
);
9717 tlb_flush_needed
= TRUE
;
9720 pai
= (unsigned int)pa_index(phys
);
9722 pv_h
= pai_to_pvh(pai
);
9724 pte_p
= PT_ENTRY_NULL
;
9725 pve_p
= PV_ENTRY_NULL
;
9726 if (pvh_test_type(pv_h
, PVH_TYPE_PTEP
)) {
9727 pte_p
= pvh_ptep(pv_h
);
9728 } else if (pvh_test_type(pv_h
, PVH_TYPE_PVEP
)) {
9729 pve_p
= pvh_list(pv_h
);
9730 pte_p
= PT_ENTRY_NULL
;
9733 while ((pve_p
!= PV_ENTRY_NULL
) || (pte_p
!= PT_ENTRY_NULL
)) {
9734 vm_map_address_t va
;
9737 if (pve_p
!= PV_ENTRY_NULL
) {
9738 pte_p
= pve_get_ptep(pve_p
);
9740 #ifdef PVH_FLAG_IOMMU
9741 if ((vm_offset_t
)pte_p
& PVH_FLAG_IOMMU
) {
9742 goto cache_skip_pve
;
9745 pmap
= ptep_get_pmap(pte_p
);
9746 va
= ptep_get_va(pte_p
);
9749 tmplate
&= ~(ARM_PTE_ATTRINDXMASK
| ARM_PTE_SHMASK
);
9750 tmplate
|= pmap_get_pt_ops(pmap
)->wimg_to_pte(attributes
);
9752 WRITE_PTE_STRONG(pte_p
, tmplate
);
9753 pmap_get_pt_ops(pmap
)->flush_tlb_region_async(va
, PAGE_SIZE
, pmap
);
9754 tlb_flush_needed
= TRUE
;
9756 #ifdef PVH_FLAG_IOMMU
9759 pte_p
= PT_ENTRY_NULL
;
9760 if (pve_p
!= PV_ENTRY_NULL
) {
9761 pve_p
= PVE_NEXT_PTR(pve_next(pve_p
));
9764 if (tlb_flush_needed
) {
9768 PMAP_TRACE(2, PMAP_CODE(PMAP__UPDATE_CACHING
) | DBG_FUNC_END
, ppnum
, attributes
);
9771 #if (__ARM_VMSA__ == 7)
9773 pmap_create_sharedpage(
9779 (void) pmap_pages_alloc(&pa
, PAGE_SIZE
, 0);
9780 memset((char *) phystokv(pa
), 0, PAGE_SIZE
);
9782 kr
= pmap_enter(kernel_pmap
, _COMM_PAGE_BASE_ADDRESS
, atop(pa
), VM_PROT_READ
| VM_PROT_WRITE
, VM_PROT_NONE
, VM_WIMG_USE_DEFAULT
, TRUE
);
9783 assert(kr
== KERN_SUCCESS
);
9785 return (vm_map_address_t
)phystokv(pa
);
9791 vm_address_t address
,
9792 tt_entry_t
template)
9794 tt_entry_t
*ptep
, pte
;
9796 ptep
= pmap_tt3e(pmap
, address
);
9798 panic("%s: no ptep?\n", __FUNCTION__
);
9802 pte
= tte_to_pa(pte
) | template;
9803 WRITE_PTE_STRONG(ptep
, pte
);
9806 /* Note absence of non-global bit */
9807 #define PMAP_COMM_PAGE_PTE_TEMPLATE (ARM_PTE_TYPE_VALID \
9808 | ARM_PTE_ATTRINDX(CACHE_ATTRINDX_WRITEBACK) \
9809 | ARM_PTE_SH(SH_INNER_MEMORY) | ARM_PTE_NX \
9810 | ARM_PTE_PNX | ARM_PTE_AP(AP_RORO) | ARM_PTE_AF)
9813 pmap_create_sharedpage(
9818 pmap_paddr_t pa
= 0;
9821 (void) pmap_pages_alloc(&pa
, PAGE_SIZE
, 0);
9823 memset((char *) phystokv(pa
), 0, PAGE_SIZE
);
9825 #ifdef CONFIG_XNUPOST
9827 * The kernel pmap maintains a user accessible mapping of the commpage
9830 kr
= pmap_enter(kernel_pmap
, _COMM_HIGH_PAGE64_BASE_ADDRESS
, (ppnum_t
)atop(pa
), VM_PROT_READ
, VM_PROT_NONE
, VM_WIMG_USE_DEFAULT
, TRUE
);
9831 assert(kr
== KERN_SUCCESS
);
9834 * This mapping should not be global (as we only expect to reference it
9837 pmap_update_tt3e(kernel_pmap
, _COMM_HIGH_PAGE64_BASE_ADDRESS
, PMAP_COMM_PAGE_PTE_TEMPLATE
| ARM_PTE_NG
);
9840 kasan_map_shadow(_COMM_HIGH_PAGE64_BASE_ADDRESS
, PAGE_SIZE
, true);
9842 #endif /* CONFIG_XNUPOST */
9845 * In order to avoid burning extra pages on mapping the shared page, we
9846 * create a dedicated pmap for the shared page. We forcibly nest the
9847 * translation tables from this pmap into other pmaps. The level we
9848 * will nest at depends on the MMU configuration (page size, TTBR range,
9851 * Note that this is NOT "the nested pmap" (which is used to nest the
9854 * Note that we update parameters of the entry for our unique needs (NG
9857 sharedpage_pmap
= pmap_create_options(NULL
, 0x0, 0);
9858 assert(sharedpage_pmap
!= NULL
);
9860 /* The user 64-bit mapping... */
9861 kr
= pmap_enter(sharedpage_pmap
, _COMM_PAGE64_BASE_ADDRESS
, (ppnum_t
)atop(pa
), VM_PROT_READ
, VM_PROT_NONE
, VM_WIMG_USE_DEFAULT
, TRUE
);
9862 assert(kr
== KERN_SUCCESS
);
9863 pmap_update_tt3e(sharedpage_pmap
, _COMM_PAGE64_BASE_ADDRESS
, PMAP_COMM_PAGE_PTE_TEMPLATE
);
9865 /* ...and the user 32-bit mapping. */
9866 kr
= pmap_enter(sharedpage_pmap
, _COMM_PAGE32_BASE_ADDRESS
, (ppnum_t
)atop(pa
), VM_PROT_READ
, VM_PROT_NONE
, VM_WIMG_USE_DEFAULT
, TRUE
);
9867 assert(kr
== KERN_SUCCESS
);
9868 pmap_update_tt3e(sharedpage_pmap
, _COMM_PAGE32_BASE_ADDRESS
, PMAP_COMM_PAGE_PTE_TEMPLATE
);
9870 /* For manipulation in kernel, go straight to physical page */
9871 return (vm_map_address_t
)phystokv(pa
);
9875 * Asserts to ensure that the TTEs we nest to map the shared page do not overlap
9876 * with user controlled TTEs.
9878 #if (ARM_PGSHIFT == 14)
9879 static_assert((_COMM_PAGE64_BASE_ADDRESS
& ~ARM_TT_L2_OFFMASK
) >= MACH_VM_MAX_ADDRESS
);
9880 static_assert((_COMM_PAGE32_BASE_ADDRESS
& ~ARM_TT_L2_OFFMASK
) >= VM_MAX_ADDRESS
);
9881 #elif (ARM_PGSHIFT == 12)
9882 static_assert((_COMM_PAGE64_BASE_ADDRESS
& ~ARM_TT_L1_OFFMASK
) >= MACH_VM_MAX_ADDRESS
);
9883 static_assert((_COMM_PAGE32_BASE_ADDRESS
& ~ARM_TT_L1_OFFMASK
) >= VM_MAX_ADDRESS
);
9885 #error Nested shared page mapping is unsupported on this config
9888 MARK_AS_PMAP_TEXT
static kern_return_t
9889 pmap_insert_sharedpage_internal(
9892 kern_return_t kr
= KERN_SUCCESS
;
9893 vm_offset_t sharedpage_vaddr
;
9894 pt_entry_t
*ttep
, *src_ttep
;
9897 VALIDATE_PMAP(pmap
);
9899 #if _COMM_PAGE_AREA_LENGTH != PAGE_SIZE
9900 #error We assume a single page.
9903 if (pmap_is_64bit(pmap
)) {
9904 sharedpage_vaddr
= _COMM_PAGE64_BASE_ADDRESS
;
9906 sharedpage_vaddr
= _COMM_PAGE32_BASE_ADDRESS
;
9912 * For 4KB pages, we can force the commpage to nest at the level one
9913 * page table, as each entry is 1GB (i.e, there will be no overlap
9914 * with regular userspace mappings). For 16KB pages, each level one
9915 * entry is 64GB, so we must go to the second level entry (32MB) in
9918 #if (ARM_PGSHIFT == 12)
9921 /* Just slam in the L1 entry. */
9922 ttep
= pmap_tt1e(pmap
, sharedpage_vaddr
);
9924 if (*ttep
!= ARM_PTE_EMPTY
) {
9925 panic("%s: Found something mapped at the commpage address?!", __FUNCTION__
);
9928 src_ttep
= pmap_tt1e(sharedpage_pmap
, sharedpage_vaddr
);
9929 #elif (ARM_PGSHIFT == 14)
9930 /* Allocate for the L2 entry if necessary, and slam it into place. */
9932 * As long as we are use a three level page table, the first level
9933 * should always exist, so we don't need to check for it.
9935 while (*pmap_tt1e(pmap
, sharedpage_vaddr
) == ARM_PTE_EMPTY
) {
9938 kr
= pmap_expand(pmap
, sharedpage_vaddr
, options
, PMAP_TT_L2_LEVEL
);
9940 if (kr
!= KERN_SUCCESS
) {
9942 panic("Failed to pmap_expand for commpage, pmap=%p", pmap
);
9949 ttep
= pmap_tt2e(pmap
, sharedpage_vaddr
);
9951 if (*ttep
!= ARM_PTE_EMPTY
) {
9952 panic("%s: Found something mapped at the commpage address?!", __FUNCTION__
);
9955 src_ttep
= pmap_tt2e(sharedpage_pmap
, sharedpage_vaddr
);
9959 FLUSH_PTE_STRONG(ttep
);
9961 /* TODO: Should we flush in the 64-bit case? */
9962 flush_mmu_tlb_region_asid_async(sharedpage_vaddr
, PAGE_SIZE
, kernel_pmap
);
9964 #if (ARM_PGSHIFT == 12)
9965 flush_mmu_tlb_entry_async(tlbi_addr(sharedpage_vaddr
& ~ARM_TT_L1_OFFMASK
) | tlbi_asid(pmap
->hw_asid
));
9966 #elif (ARM_PGSHIFT == 14)
9967 flush_mmu_tlb_entry_async(tlbi_addr(sharedpage_vaddr
& ~ARM_TT_L2_OFFMASK
) | tlbi_asid(pmap
->hw_asid
));
9977 pmap_unmap_sharedpage(
9981 vm_offset_t sharedpage_vaddr
;
9983 #if _COMM_PAGE_AREA_LENGTH != PAGE_SIZE
9984 #error We assume a single page.
9987 if (pmap_is_64bit(pmap
)) {
9988 sharedpage_vaddr
= _COMM_PAGE64_BASE_ADDRESS
;
9990 sharedpage_vaddr
= _COMM_PAGE32_BASE_ADDRESS
;
9993 #if (ARM_PGSHIFT == 12)
9994 ttep
= pmap_tt1e(pmap
, sharedpage_vaddr
);
10000 /* It had better be mapped to the shared page */
10001 if (*ttep
!= ARM_TTE_EMPTY
&& *ttep
!= *pmap_tt1e(sharedpage_pmap
, sharedpage_vaddr
)) {
10002 panic("%s: Something other than commpage mapped in shared page slot?", __FUNCTION__
);
10004 #elif (ARM_PGSHIFT == 14)
10005 ttep
= pmap_tt2e(pmap
, sharedpage_vaddr
);
10007 if (ttep
== NULL
) {
10011 /* It had better be mapped to the shared page */
10012 if (*ttep
!= ARM_TTE_EMPTY
&& *ttep
!= *pmap_tt2e(sharedpage_pmap
, sharedpage_vaddr
)) {
10013 panic("%s: Something other than commpage mapped in shared page slot?", __FUNCTION__
);
10017 *ttep
= ARM_TTE_EMPTY
;
10018 flush_mmu_tlb_region_asid_async(sharedpage_vaddr
, PAGE_SIZE
, kernel_pmap
);
10020 #if (ARM_PGSHIFT == 12)
10021 flush_mmu_tlb_entry_async(tlbi_addr(sharedpage_vaddr
& ~ARM_TT_L1_OFFMASK
) | tlbi_asid(pmap
->hw_asid
));
10022 #elif (ARM_PGSHIFT == 14)
10023 flush_mmu_tlb_entry_async(tlbi_addr(sharedpage_vaddr
& ~ARM_TT_L2_OFFMASK
) | tlbi_asid(pmap
->hw_asid
));
10029 pmap_insert_sharedpage(
10032 pmap_insert_sharedpage_internal(pmap
);
10039 return pmap
->is_64bit
;
10044 /* ARMTODO -- an implementation that accounts for
10045 * holes in the physical map, if any.
10051 return pa_valid(ptoa(pn
));
10055 pmap_bootloader_page(
10058 pmap_paddr_t paddr
= ptoa(pn
);
10060 if (pa_valid(paddr
)) {
10063 pmap_io_range_t
*io_rgn
= pmap_find_io_attr(paddr
);
10064 return (io_rgn
!= NULL
) && (io_rgn
->wimg
& PMAP_IO_RANGE_CARVEOUT
);
10067 MARK_AS_PMAP_TEXT
static boolean_t
10068 pmap_is_empty_internal(
10070 vm_map_offset_t va_start
,
10071 vm_map_offset_t va_end
)
10073 vm_map_offset_t block_start
, block_end
;
10076 if (pmap
== NULL
) {
10080 VALIDATE_PMAP(pmap
);
10082 __unused
const pt_attr_t
* const pt_attr
= pmap_get_pt_attr(pmap
);
10083 unsigned int initial_not_in_kdp
= not_in_kdp
;
10085 if ((pmap
!= kernel_pmap
) && (initial_not_in_kdp
)) {
10089 #if (__ARM_VMSA__ == 7)
10090 if (tte_index(pmap
, pt_attr
, va_end
) >= pmap
->tte_index_max
) {
10091 if ((pmap
!= kernel_pmap
) && (initial_not_in_kdp
)) {
10098 /* TODO: This will be faster if we increment ttep at each level. */
10099 block_start
= va_start
;
10101 while (block_start
< va_end
) {
10102 pt_entry_t
*bpte_p
, *epte_p
;
10105 block_end
= (block_start
+ pt_attr_twig_size(pt_attr
)) & ~pt_attr_twig_offmask(pt_attr
);
10106 if (block_end
> va_end
) {
10107 block_end
= va_end
;
10110 tte_p
= pmap_tte(pmap
, block_start
);
10111 if ((tte_p
!= PT_ENTRY_NULL
)
10112 && ((*tte_p
& ARM_TTE_TYPE_MASK
) == ARM_TTE_TYPE_TABLE
)) {
10113 pte_p
= (pt_entry_t
*) ttetokv(*tte_p
);
10114 bpte_p
= &pte_p
[pte_index(pmap
, pt_attr
, block_start
)];
10115 epte_p
= &pte_p
[pte_index(pmap
, pt_attr
, block_end
)];
10117 for (pte_p
= bpte_p
; pte_p
< epte_p
; pte_p
++) {
10118 if (*pte_p
!= ARM_PTE_EMPTY
) {
10119 if ((pmap
!= kernel_pmap
) && (initial_not_in_kdp
)) {
10126 block_start
= block_end
;
10129 if ((pmap
!= kernel_pmap
) && (initial_not_in_kdp
)) {
10139 vm_map_offset_t va_start
,
10140 vm_map_offset_t va_end
)
10142 return pmap_is_empty_internal(pmap
, va_start
, va_end
);
10148 unsigned int option
)
10150 return (is64
) ? pmap_max_64bit_offset(option
) : pmap_max_32bit_offset(option
);
10154 pmap_max_64bit_offset(
10155 __unused
unsigned int option
)
10157 vm_map_offset_t max_offset_ret
= 0;
10159 #if defined(__arm64__)
10160 const vm_map_offset_t min_max_offset
= SHARED_REGION_BASE_ARM64
+ SHARED_REGION_SIZE_ARM64
+ 0x20000000; // end of shared region + 512MB for various purposes
10161 if (option
== ARM_PMAP_MAX_OFFSET_DEFAULT
) {
10162 max_offset_ret
= arm64_pmap_max_offset_default
;
10163 } else if (option
== ARM_PMAP_MAX_OFFSET_MIN
) {
10164 max_offset_ret
= min_max_offset
;
10165 } else if (option
== ARM_PMAP_MAX_OFFSET_MAX
) {
10166 max_offset_ret
= MACH_VM_MAX_ADDRESS
;
10167 } else if (option
== ARM_PMAP_MAX_OFFSET_DEVICE
) {
10168 if (arm64_pmap_max_offset_default
) {
10169 max_offset_ret
= arm64_pmap_max_offset_default
;
10170 } else if (max_mem
> 0xC0000000) {
10171 max_offset_ret
= min_max_offset
+ 0x138000000; // Max offset is 13.375GB for devices with > 3GB of memory
10172 } else if (max_mem
> 0x40000000) {
10173 max_offset_ret
= min_max_offset
+ 0x38000000; // Max offset is 9.375GB for devices with > 1GB and <= 3GB of memory
10175 max_offset_ret
= min_max_offset
;
10177 } else if (option
== ARM_PMAP_MAX_OFFSET_JUMBO
) {
10178 if (arm64_pmap_max_offset_default
) {
10179 // Allow the boot-arg to override jumbo size
10180 max_offset_ret
= arm64_pmap_max_offset_default
;
10182 max_offset_ret
= MACH_VM_MAX_ADDRESS
; // Max offset is 64GB for pmaps with special "jumbo" blessing
10185 panic("pmap_max_64bit_offset illegal option 0x%x\n", option
);
10188 assert(max_offset_ret
<= MACH_VM_MAX_ADDRESS
);
10189 assert(max_offset_ret
>= min_max_offset
);
10191 panic("Can't run pmap_max_64bit_offset on non-64bit architectures\n");
10194 return max_offset_ret
;
10198 pmap_max_32bit_offset(
10199 unsigned int option
)
10201 vm_map_offset_t max_offset_ret
= 0;
10203 if (option
== ARM_PMAP_MAX_OFFSET_DEFAULT
) {
10204 max_offset_ret
= arm_pmap_max_offset_default
;
10205 } else if (option
== ARM_PMAP_MAX_OFFSET_MIN
) {
10206 max_offset_ret
= 0x80000000;
10207 } else if (option
== ARM_PMAP_MAX_OFFSET_MAX
) {
10208 max_offset_ret
= VM_MAX_ADDRESS
;
10209 } else if (option
== ARM_PMAP_MAX_OFFSET_DEVICE
) {
10210 if (arm_pmap_max_offset_default
) {
10211 max_offset_ret
= arm_pmap_max_offset_default
;
10212 } else if (max_mem
> 0x20000000) {
10213 max_offset_ret
= 0x80000000;
10215 max_offset_ret
= 0x80000000;
10217 } else if (option
== ARM_PMAP_MAX_OFFSET_JUMBO
) {
10218 max_offset_ret
= 0x80000000;
10220 panic("pmap_max_32bit_offset illegal option 0x%x\n", option
);
10223 assert(max_offset_ret
<= MACH_VM_MAX_ADDRESS
);
10224 return max_offset_ret
;
10229 * Constrain DTrace copyin/copyout actions
10231 extern kern_return_t
dtrace_copyio_preflight(addr64_t
);
10232 extern kern_return_t
dtrace_copyio_postflight(addr64_t
);
10235 dtrace_copyio_preflight(
10236 __unused addr64_t va
)
10238 if (current_map() == kernel_map
) {
10239 return KERN_FAILURE
;
10241 return KERN_SUCCESS
;
10246 dtrace_copyio_postflight(
10247 __unused addr64_t va
)
10249 return KERN_SUCCESS
;
10251 #endif /* CONFIG_DTRACE */
10255 pmap_flush_context_init(__unused pmap_flush_context
*pfc
)
10262 __unused pmap_flush_context
*cpus_to_flush
)
10264 /* not implemented yet */
10269 static void __unused
10270 pmap_pin_kernel_pages(vm_offset_t kva __unused
, size_t nbytes __unused
)
10274 static void __unused
10275 pmap_unpin_kernel_pages(vm_offset_t kva __unused
, size_t nbytes __unused
)
10281 #define PMAP_RESIDENT_INVALID ((mach_vm_size_t)-1)
10283 MARK_AS_PMAP_TEXT
static mach_vm_size_t
10284 pmap_query_resident_internal(
10286 vm_map_address_t start
,
10287 vm_map_address_t end
,
10288 mach_vm_size_t
*compressed_bytes_p
)
10290 mach_vm_size_t resident_bytes
= 0;
10291 mach_vm_size_t compressed_bytes
= 0;
10293 pt_entry_t
*bpte
, *epte
;
10297 if (pmap
== NULL
) {
10298 return PMAP_RESIDENT_INVALID
;
10301 VALIDATE_PMAP(pmap
);
10303 /* Ensure that this request is valid, and addresses exactly one TTE. */
10304 if (__improbable((start
% ARM_PGBYTES
) || (end
% ARM_PGBYTES
))) {
10305 panic("%s: address range %p, %p not page-aligned", __func__
, (void*)start
, (void*)end
);
10308 if (__improbable((end
< start
) || ((end
- start
) > (PTE_PGENTRIES
* ARM_PGBYTES
)))) {
10309 panic("%s: invalid address range %p, %p", __func__
, (void*)start
, (void*)end
);
10313 tte_p
= pmap_tte(pmap
, start
);
10314 if (tte_p
== (tt_entry_t
*) NULL
) {
10316 return PMAP_RESIDENT_INVALID
;
10318 if ((*tte_p
& ARM_TTE_TYPE_MASK
) == ARM_TTE_TYPE_TABLE
) {
10319 __unused
const pt_attr_t
* const pt_attr
= pmap_get_pt_attr(pmap
);
10320 pte_p
= (pt_entry_t
*) ttetokv(*tte_p
);
10321 bpte
= &pte_p
[pte_index(pmap
, pt_attr
, start
)];
10322 epte
= &pte_p
[pte_index(pmap
, pt_attr
, end
)];
10324 for (; bpte
< epte
; bpte
++) {
10325 if (ARM_PTE_IS_COMPRESSED(*bpte
, bpte
)) {
10326 compressed_bytes
+= ARM_PGBYTES
;
10327 } else if (pa_valid(pte_to_pa(*bpte
))) {
10328 resident_bytes
+= ARM_PGBYTES
;
10334 if (compressed_bytes_p
) {
10335 pmap_pin_kernel_pages((vm_offset_t
)compressed_bytes_p
, sizeof(*compressed_bytes_p
));
10336 *compressed_bytes_p
+= compressed_bytes
;
10337 pmap_unpin_kernel_pages((vm_offset_t
)compressed_bytes_p
, sizeof(*compressed_bytes_p
));
10340 return resident_bytes
;
10344 pmap_query_resident(
10346 vm_map_address_t start
,
10347 vm_map_address_t end
,
10348 mach_vm_size_t
*compressed_bytes_p
)
10350 mach_vm_size_t total_resident_bytes
;
10351 mach_vm_size_t compressed_bytes
;
10352 vm_map_address_t va
;
10355 if (pmap
== PMAP_NULL
) {
10356 if (compressed_bytes_p
) {
10357 *compressed_bytes_p
= 0;
10362 __unused
const pt_attr_t
* const pt_attr
= pmap_get_pt_attr(pmap
);
10364 total_resident_bytes
= 0;
10365 compressed_bytes
= 0;
10367 PMAP_TRACE(3, PMAP_CODE(PMAP__QUERY_RESIDENT
) | DBG_FUNC_START
,
10368 VM_KERNEL_ADDRHIDE(pmap
), VM_KERNEL_ADDRHIDE(start
),
10369 VM_KERNEL_ADDRHIDE(end
));
10373 vm_map_address_t l
;
10374 mach_vm_size_t resident_bytes
;
10376 l
= ((va
+ pt_attr_twig_size(pt_attr
)) & ~pt_attr_twig_offmask(pt_attr
));
10381 resident_bytes
= pmap_query_resident_internal(pmap
, va
, l
, compressed_bytes_p
);
10382 if (resident_bytes
== PMAP_RESIDENT_INVALID
) {
10386 total_resident_bytes
+= resident_bytes
;
10391 if (compressed_bytes_p
) {
10392 *compressed_bytes_p
= compressed_bytes
;
10395 PMAP_TRACE(3, PMAP_CODE(PMAP__QUERY_RESIDENT
) | DBG_FUNC_END
,
10396 total_resident_bytes
);
10398 return total_resident_bytes
;
10403 pmap_check_ledgers(
10409 if (pmap
->pmap_pid
== 0) {
10411 * This pmap was not or is no longer fully associated
10412 * with a task (e.g. the old pmap after a fork()/exec() or
10413 * spawn()). Its "ledger" still points at a task that is
10414 * now using a different (and active) address space, so
10415 * we can't check that all the pmap ledgers are balanced here.
10417 * If the "pid" is set, that means that we went through
10418 * pmap_set_process() in task_terminate_internal(), so
10419 * this task's ledger should not have been re-used and
10420 * all the pmap ledgers should be back to 0.
10425 pid
= pmap
->pmap_pid
;
10426 procname
= pmap
->pmap_procname
;
10428 vm_map_pmap_check_ledgers(pmap
, pmap
->ledger
, pid
, procname
);
10430 PMAP_STATS_ASSERTF(pmap
->stats
.resident_count
== 0, pmap
, "stats.resident_count %d", pmap
->stats
.resident_count
);
10432 PMAP_STATS_ASSERTF(pmap
->stats
.wired_count
== 0, pmap
, "stats.wired_count %d", pmap
->stats
.wired_count
);
10434 PMAP_STATS_ASSERTF(pmap
->stats
.device
== 0, pmap
, "stats.device %d", pmap
->stats
.device
);
10435 PMAP_STATS_ASSERTF(pmap
->stats
.internal
== 0, pmap
, "stats.internal %d", pmap
->stats
.internal
);
10436 PMAP_STATS_ASSERTF(pmap
->stats
.external
== 0, pmap
, "stats.external %d", pmap
->stats
.external
);
10437 PMAP_STATS_ASSERTF(pmap
->stats
.reusable
== 0, pmap
, "stats.reusable %d", pmap
->stats
.reusable
);
10438 PMAP_STATS_ASSERTF(pmap
->stats
.compressed
== 0, pmap
, "stats.compressed %lld", pmap
->stats
.compressed
);
10440 #endif /* MACH_ASSERT */
10443 pmap_advise_pagezero_range(__unused pmap_t p
, __unused
uint64_t a
)
10449 #define PROF_START uint64_t t, nanot;\
10450 t = mach_absolute_time();
10452 #define PROF_END absolutetime_to_nanoseconds(mach_absolute_time()-t, &nanot);\
10453 kprintf("%s: took %llu ns\n", __func__, nanot);
10455 #define PMAP_PGTRACE_LOCK(p) \
10457 *(p) = ml_set_interrupts_enabled(false); \
10458 if (simple_lock_try(&(pmap_pgtrace.lock), LCK_GRP_NULL)) break; \
10459 ml_set_interrupts_enabled(*(p)); \
10462 #define PMAP_PGTRACE_UNLOCK(p) \
10464 simple_unlock(&(pmap_pgtrace.lock)); \
10465 ml_set_interrupts_enabled(*(p)); \
10468 #define PGTRACE_WRITE_PTE(pte_p, pte_entry) \
10470 *(pte_p) = (pte_entry); \
10471 FLUSH_PTE(pte_p); \
10474 #define PGTRACE_MAX_MAP 16 // maximum supported va to same pa
10481 } pmap_pgtrace_page_state_t
;
10484 queue_chain_t chain
;
10488 * maps - list of va maps to upper pa
10489 * map_pool - map pool
10490 * map_waste - waste can
10495 queue_head_t map_pool
;
10496 queue_head_t map_waste
;
10497 pmap_pgtrace_page_state_t state
;
10498 } pmap_pgtrace_page_t
;
10502 * pages - list of tracing page info
10504 queue_head_t pages
;
10505 decl_simple_lock_data(, lock
);
10506 } pmap_pgtrace
= {};
10509 pmap_pgtrace_init(void)
10511 queue_init(&(pmap_pgtrace
.pages
));
10512 simple_lock_init(&(pmap_pgtrace
.lock
), 0);
10516 if (PE_parse_boot_argn("pgtrace", &enabled
, sizeof(enabled
))) {
10517 pgtrace_enabled
= enabled
;
10521 // find a page with given pa - pmap_pgtrace should be locked
10522 inline static pmap_pgtrace_page_t
*
10523 pmap_pgtrace_find_page(pmap_paddr_t pa
)
10525 queue_head_t
*q
= &(pmap_pgtrace
.pages
);
10526 pmap_pgtrace_page_t
*p
;
10528 queue_iterate(q
, p
, pmap_pgtrace_page_t
*, chain
) {
10529 if (p
->state
== UNDEFINED
) {
10532 if (p
->state
== PA_UNDEFINED
) {
10543 // enter clone of given pmap, va page and range - pmap should be locked
10545 pmap_pgtrace_enter_clone(pmap_t pmap
, vm_map_offset_t va_page
, vm_map_offset_t start
, vm_map_offset_t end
)
10548 queue_head_t
*q
= &(pmap_pgtrace
.pages
);
10549 pmap_paddr_t pa_page
;
10550 pt_entry_t
*ptep
, *cptep
;
10551 pmap_pgtrace_page_t
*p
;
10552 bool found
= false;
10554 PMAP_ASSERT_LOCKED(pmap
);
10555 assert(va_page
== arm_trunc_page(va_page
));
10557 PMAP_PGTRACE_LOCK(&ints
);
10559 ptep
= pmap_pte(pmap
, va_page
);
10561 // target pte should exist
10562 if (!ptep
|| !(*ptep
& ARM_PTE_TYPE_VALID
)) {
10563 PMAP_PGTRACE_UNLOCK(&ints
);
10567 queue_head_t
*mapq
;
10568 queue_head_t
*mappool
;
10569 pmap_pgtrace_map_t
*map
= NULL
;
10571 pa_page
= pte_to_pa(*ptep
);
10573 // find if we have a page info defined for this
10574 queue_iterate(q
, p
, pmap_pgtrace_page_t
*, chain
) {
10576 mappool
= &(p
->map_pool
);
10578 switch (p
->state
) {
10580 queue_iterate(mapq
, map
, pmap_pgtrace_map_t
*, chain
) {
10581 if (map
->cloned
== false && map
->pmap
== pmap
&& map
->ova
== va_page
) {
10583 map
->range
.start
= start
;
10584 map
->range
.end
= end
;
10592 if (p
->pa
!= pa_page
) {
10595 queue_iterate(mapq
, map
, pmap_pgtrace_map_t
*, chain
) {
10596 if (map
->cloned
== false) {
10598 map
->ova
= va_page
;
10599 map
->range
.start
= start
;
10600 map
->range
.end
= end
;
10608 if (p
->pa
!= pa_page
) {
10611 queue_iterate(mapq
, map
, pmap_pgtrace_map_t
*, chain
) {
10612 if (map
->cloned
== true && map
->pmap
== pmap
&& map
->ova
== va_page
) {
10613 kprintf("%s: skip existing mapping at va=%llx\n", __func__
, va_page
);
10615 } else if (map
->cloned
== true && map
->pmap
== kernel_pmap
&& map
->cva
[1] == va_page
) {
10616 kprintf("%s: skip clone mapping at va=%llx\n", __func__
, va_page
);
10618 } else if (map
->cloned
== false && map
->pmap
== pmap
&& map
->ova
== va_page
) {
10619 // range should be already defined as well
10627 panic("invalid state p->state=%x\n", p
->state
);
10630 if (found
== true) {
10635 // do not clone if no page info found
10636 if (found
== false) {
10637 PMAP_PGTRACE_UNLOCK(&ints
);
10641 // copy pre, target and post ptes to clone ptes
10642 for (int i
= 0; i
< 3; i
++) {
10643 ptep
= pmap_pte(pmap
, va_page
+ (i
- 1) * ARM_PGBYTES
);
10644 cptep
= pmap_pte(kernel_pmap
, map
->cva
[i
]);
10645 assert(cptep
!= NULL
);
10646 if (ptep
== NULL
) {
10647 PGTRACE_WRITE_PTE(cptep
, (pt_entry_t
)NULL
);
10649 PGTRACE_WRITE_PTE(cptep
, *ptep
);
10651 PMAP_UPDATE_TLBS(kernel_pmap
, map
->cva
[i
], map
->cva
[i
] + ARM_PGBYTES
, false);
10654 // get ptes for original and clone
10655 ptep
= pmap_pte(pmap
, va_page
);
10656 cptep
= pmap_pte(kernel_pmap
, map
->cva
[1]);
10658 // invalidate original pte and mark it as a pgtrace page
10659 PGTRACE_WRITE_PTE(ptep
, (*ptep
| ARM_PTE_PGTRACE
) & ~ARM_PTE_TYPE_VALID
);
10660 PMAP_UPDATE_TLBS(pmap
, map
->ova
, map
->ova
+ ARM_PGBYTES
, false);
10662 map
->cloned
= true;
10663 p
->state
= DEFINED
;
10665 kprintf("%s: pa_page=%llx va_page=%llx cva[1]=%llx pmap=%p ptep=%p cptep=%p\n", __func__
, pa_page
, va_page
, map
->cva
[1], pmap
, ptep
, cptep
);
10667 PMAP_PGTRACE_UNLOCK(&ints
);
10672 // This function removes trace bit and validate pte if applicable. Pmap must be locked.
10674 pmap_pgtrace_remove_clone(pmap_t pmap
, pmap_paddr_t pa
, vm_map_offset_t va
)
10676 bool ints
, found
= false;
10677 pmap_pgtrace_page_t
*p
;
10680 PMAP_PGTRACE_LOCK(&ints
);
10682 // we must have this page info
10683 p
= pmap_pgtrace_find_page(pa
);
10688 // find matching map
10689 queue_head_t
*mapq
= &(p
->maps
);
10690 queue_head_t
*mappool
= &(p
->map_pool
);
10691 pmap_pgtrace_map_t
*map
;
10693 queue_iterate(mapq
, map
, pmap_pgtrace_map_t
*, chain
) {
10694 if (map
->pmap
== pmap
&& map
->ova
== va
) {
10704 if (map
->cloned
== true) {
10705 // Restore back the pte to original state
10706 ptep
= pmap_pte(pmap
, map
->ova
);
10708 PGTRACE_WRITE_PTE(ptep
, *ptep
| ARM_PTE_TYPE_VALID
);
10709 PMAP_UPDATE_TLBS(pmap
, va
, va
+ ARM_PGBYTES
, false);
10711 // revert clone pages
10712 for (int i
= 0; i
< 3; i
++) {
10713 ptep
= pmap_pte(kernel_pmap
, map
->cva
[i
]);
10714 assert(ptep
!= NULL
);
10715 PGTRACE_WRITE_PTE(ptep
, map
->cva_spte
[i
]);
10716 PMAP_UPDATE_TLBS(kernel_pmap
, map
->cva
[i
], map
->cva
[i
] + ARM_PGBYTES
, false);
10720 queue_remove(mapq
, map
, pmap_pgtrace_map_t
*, chain
);
10722 map
->ova
= (vm_map_offset_t
)NULL
;
10723 map
->cloned
= false;
10724 queue_enter_first(mappool
, map
, pmap_pgtrace_map_t
*, chain
);
10726 kprintf("%s: p=%p pa=%llx va=%llx\n", __func__
, p
, pa
, va
);
10729 PMAP_PGTRACE_UNLOCK(&ints
);
10732 // remove all clones of given pa - pmap must be locked
10734 pmap_pgtrace_remove_all_clone(pmap_paddr_t pa
)
10737 pmap_pgtrace_page_t
*p
;
10740 PMAP_PGTRACE_LOCK(&ints
);
10742 // we must have this page info
10743 p
= pmap_pgtrace_find_page(pa
);
10745 PMAP_PGTRACE_UNLOCK(&ints
);
10749 queue_head_t
*mapq
= &(p
->maps
);
10750 queue_head_t
*mappool
= &(p
->map_pool
);
10751 queue_head_t
*mapwaste
= &(p
->map_waste
);
10752 pmap_pgtrace_map_t
*map
;
10754 // move maps to waste
10755 while (!queue_empty(mapq
)) {
10756 queue_remove_first(mapq
, map
, pmap_pgtrace_map_t
*, chain
);
10757 queue_enter_first(mapwaste
, map
, pmap_pgtrace_map_t
*, chain
);
10760 PMAP_PGTRACE_UNLOCK(&ints
);
10762 // sanitize maps in waste
10763 queue_iterate(mapwaste
, map
, pmap_pgtrace_map_t
*, chain
) {
10764 if (map
->cloned
== true) {
10765 PMAP_LOCK(map
->pmap
);
10767 // restore back original pte
10768 ptep
= pmap_pte(map
->pmap
, map
->ova
);
10770 PGTRACE_WRITE_PTE(ptep
, *ptep
| ARM_PTE_TYPE_VALID
);
10771 PMAP_UPDATE_TLBS(map
->pmap
, map
->ova
, map
->ova
+ ARM_PGBYTES
, false);
10773 // revert clone ptes
10774 for (int i
= 0; i
< 3; i
++) {
10775 ptep
= pmap_pte(kernel_pmap
, map
->cva
[i
]);
10776 assert(ptep
!= NULL
);
10777 PGTRACE_WRITE_PTE(ptep
, map
->cva_spte
[i
]);
10778 PMAP_UPDATE_TLBS(kernel_pmap
, map
->cva
[i
], map
->cva
[i
] + ARM_PGBYTES
, false);
10781 PMAP_UNLOCK(map
->pmap
);
10785 map
->ova
= (vm_map_offset_t
)NULL
;
10786 map
->cloned
= false;
10789 PMAP_PGTRACE_LOCK(&ints
);
10791 // recycle maps back to map_pool
10792 while (!queue_empty(mapwaste
)) {
10793 queue_remove_first(mapwaste
, map
, pmap_pgtrace_map_t
*, chain
);
10794 queue_enter_first(mappool
, map
, pmap_pgtrace_map_t
*, chain
);
10797 PMAP_PGTRACE_UNLOCK(&ints
);
10801 pmap_pgtrace_get_search_space(pmap_t pmap
, vm_map_offset_t
*startp
, vm_map_offset_t
*endp
)
10804 vm_map_offset_t end
;
10806 if (pmap
== kernel_pmap
) {
10807 tsz
= (get_tcr() >> TCR_T1SZ_SHIFT
) & TCR_TSZ_MASK
;
10808 *startp
= MAX(VM_MIN_KERNEL_ADDRESS
, (UINT64_MAX
>> (64 - tsz
)) << (64 - tsz
));
10809 *endp
= VM_MAX_KERNEL_ADDRESS
;
10811 tsz
= (get_tcr() >> TCR_T0SZ_SHIFT
) & TCR_TSZ_MASK
;
10815 end
= ((uint64_t)1 << (64 - tsz
)) - 1;
10822 assert(*endp
> *startp
);
10827 // has pa mapped in given pmap? then clone it
10829 pmap_pgtrace_clone_from_pa(pmap_t pmap
, pmap_paddr_t pa
, vm_map_offset_t start_offset
, vm_map_offset_t end_offset
)
10832 vm_map_offset_t min
, max
;
10833 vm_map_offset_t cur_page
, end_page
;
10837 __unused
const pt_attr_t
* const pt_attr
= pmap_get_pt_attr(pmap
);
10839 pmap_pgtrace_get_search_space(pmap
, &min
, &max
);
10841 cur_page
= arm_trunc_page(min
);
10842 end_page
= arm_trunc_page(max
);
10843 while (cur_page
<= end_page
) {
10844 vm_map_offset_t add
= 0;
10848 // skip uninterested space
10849 if (pmap
== kernel_pmap
&&
10850 ((vm_kernel_base
<= cur_page
&& cur_page
< vm_kernel_top
) ||
10851 (vm_kext_base
<= cur_page
&& cur_page
< vm_kext_top
))) {
10853 goto unlock_continue
;
10856 // check whether we can skip l1
10857 ttep
= pmap_tt1e(pmap
, cur_page
);
10860 if ((tte
& (ARM_TTE_TYPE_MASK
| ARM_TTE_VALID
)) != (ARM_TTE_TYPE_TABLE
| ARM_TTE_VALID
)) {
10861 add
= ARM_TT_L1_SIZE
;
10862 goto unlock_continue
;
10866 tte
= ((tt_entry_t
*) phystokv(tte
& ARM_TTE_TABLE_MASK
))[tt2_index(pmap
, pt_attr
, cur_page
)];
10868 if ((tte
& (ARM_TTE_TYPE_MASK
| ARM_TTE_VALID
)) != (ARM_TTE_TYPE_TABLE
| ARM_TTE_VALID
)) {
10869 add
= ARM_TT_L2_SIZE
;
10870 goto unlock_continue
;
10874 ptep
= &(((pt_entry_t
*) phystokv(tte
& ARM_TTE_TABLE_MASK
))[tt3_index(pmap
, pt_attr
, cur_page
)]);
10875 if (ptep
== PT_ENTRY_NULL
) {
10876 add
= ARM_TT_L3_SIZE
;
10877 goto unlock_continue
;
10880 if (arm_trunc_page(pa
) == pte_to_pa(*ptep
)) {
10881 if (pmap_pgtrace_enter_clone(pmap
, cur_page
, start_offset
, end_offset
) == true) {
10892 if (cur_page
+ add
< cur_page
) {
10903 // search pv table and clone vas of given pa
10905 pmap_pgtrace_clone_from_pvtable(pmap_paddr_t pa
, vm_map_offset_t start_offset
, vm_map_offset_t end_offset
)
10914 queue_chain_t chain
;
10916 vm_map_offset_t va
;
10919 queue_head_t pmapvaq
;
10922 queue_init(&pmapvaq
);
10924 pai
= pa_index(pa
);
10926 pvh
= pai_to_pvh(pai
);
10928 // collect pmap/va pair from pvh
10929 if (pvh_test_type(pvh
, PVH_TYPE_PTEP
)) {
10930 ptep
= pvh_ptep(pvh
);
10931 pmap
= ptep_get_pmap(ptep
);
10933 pmapva
= (pmap_va_t
*)kalloc(sizeof(pmap_va_t
));
10934 pmapva
->pmap
= pmap
;
10935 pmapva
->va
= ptep_get_va(ptep
);
10937 queue_enter_first(&pmapvaq
, pmapva
, pmap_va_t
*, chain
);
10938 } else if (pvh_test_type(pvh
, PVH_TYPE_PVEP
)) {
10941 pvep
= pvh_list(pvh
);
10943 ptep
= pve_get_ptep(pvep
);
10944 pmap
= ptep_get_pmap(ptep
);
10946 pmapva
= (pmap_va_t
*)kalloc(sizeof(pmap_va_t
));
10947 pmapva
->pmap
= pmap
;
10948 pmapva
->va
= ptep_get_va(ptep
);
10950 queue_enter_first(&pmapvaq
, pmapva
, pmap_va_t
*, chain
);
10952 pvep
= PVE_NEXT_PTR(pve_next(pvep
));
10958 // clone them while making sure mapping still exists
10959 queue_iterate(&pmapvaq
, pmapva
, pmap_va_t
*, chain
) {
10960 PMAP_LOCK(pmapva
->pmap
);
10961 ptep
= pmap_pte(pmapva
->pmap
, pmapva
->va
);
10962 if (pte_to_pa(*ptep
) == pa
) {
10963 if (pmap_pgtrace_enter_clone(pmapva
->pmap
, pmapva
->va
, start_offset
, end_offset
) == true) {
10967 PMAP_UNLOCK(pmapva
->pmap
);
10969 kfree(pmapva
, sizeof(pmap_va_t
));
10975 // allocate a page info
10976 static pmap_pgtrace_page_t
*
10977 pmap_pgtrace_alloc_page(void)
10979 pmap_pgtrace_page_t
*p
;
10980 queue_head_t
*mapq
;
10981 queue_head_t
*mappool
;
10982 queue_head_t
*mapwaste
;
10983 pmap_pgtrace_map_t
*map
;
10985 p
= kalloc(sizeof(pmap_pgtrace_page_t
));
10988 p
->state
= UNDEFINED
;
10991 mappool
= &(p
->map_pool
);
10992 mapwaste
= &(p
->map_waste
);
10994 queue_init(mappool
);
10995 queue_init(mapwaste
);
10997 for (int i
= 0; i
< PGTRACE_MAX_MAP
; i
++) {
10998 vm_map_offset_t newcva
;
11001 vm_map_entry_t entry
;
11004 vm_object_reference(kernel_object
);
11005 kr
= vm_map_find_space(kernel_map
, &newcva
, vm_map_round_page(3 * ARM_PGBYTES
, PAGE_MASK
), 0, 0, VM_MAP_KERNEL_FLAGS_NONE
, VM_KERN_MEMORY_DIAG
, &entry
);
11006 if (kr
!= KERN_SUCCESS
) {
11007 panic("%s VM couldn't find any space kr=%d\n", __func__
, kr
);
11009 VME_OBJECT_SET(entry
, kernel_object
);
11010 VME_OFFSET_SET(entry
, newcva
);
11011 vm_map_unlock(kernel_map
);
11013 // fill default clone page info and add to pool
11014 map
= kalloc(sizeof(pmap_pgtrace_map_t
));
11015 for (int j
= 0; j
< 3; j
++) {
11016 vm_map_offset_t addr
= newcva
+ j
* ARM_PGBYTES
;
11018 // pre-expand pmap while preemption enabled
11019 kr
= pmap_expand(kernel_pmap
, addr
, 0, PMAP_TT_MAX_LEVEL
);
11020 if (kr
!= KERN_SUCCESS
) {
11021 panic("%s: pmap_expand(kernel_pmap, addr=%llx) returns kr=%d\n", __func__
, addr
, kr
);
11024 cptep
= pmap_pte(kernel_pmap
, addr
);
11025 assert(cptep
!= NULL
);
11027 map
->cva
[j
] = addr
;
11028 map
->cva_spte
[j
] = *cptep
;
11030 map
->range
.start
= map
->range
.end
= 0;
11031 map
->cloned
= false;
11032 queue_enter_first(mappool
, map
, pmap_pgtrace_map_t
*, chain
);
11038 // free a page info
11040 pmap_pgtrace_free_page(pmap_pgtrace_page_t
*p
)
11042 queue_head_t
*mapq
;
11043 queue_head_t
*mappool
;
11044 queue_head_t
*mapwaste
;
11045 pmap_pgtrace_map_t
*map
;
11050 mappool
= &(p
->map_pool
);
11051 mapwaste
= &(p
->map_waste
);
11053 while (!queue_empty(mapq
)) {
11054 queue_remove_first(mapq
, map
, pmap_pgtrace_map_t
*, chain
);
11055 kfree(map
, sizeof(pmap_pgtrace_map_t
));
11058 while (!queue_empty(mappool
)) {
11059 queue_remove_first(mappool
, map
, pmap_pgtrace_map_t
*, chain
);
11060 kfree(map
, sizeof(pmap_pgtrace_map_t
));
11063 while (!queue_empty(mapwaste
)) {
11064 queue_remove_first(mapwaste
, map
, pmap_pgtrace_map_t
*, chain
);
11065 kfree(map
, sizeof(pmap_pgtrace_map_t
));
11068 kfree(p
, sizeof(pmap_pgtrace_page_t
));
11071 // construct page infos with the given address range
11073 pmap_pgtrace_add_page(pmap_t pmap
, vm_map_offset_t start
, vm_map_offset_t end
)
11077 queue_head_t
*q
= &(pmap_pgtrace
.pages
);
11079 vm_map_offset_t cur_page
, end_page
;
11082 kprintf("%s: invalid start=%llx > end=%llx\n", __func__
, start
, end
);
11088 // add each page in given range
11089 cur_page
= arm_trunc_page(start
);
11090 end_page
= arm_trunc_page(end
);
11091 while (cur_page
<= end_page
) {
11092 pmap_paddr_t pa_page
= 0;
11093 uint64_t num_cloned
= 0;
11094 pmap_pgtrace_page_t
*p
= NULL
, *newp
;
11095 bool free_newp
= true;
11096 pmap_pgtrace_page_state_t state
;
11098 // do all allocations outside of spinlocks
11099 newp
= pmap_pgtrace_alloc_page();
11101 // keep lock orders in pmap, kernel_pmap and pgtrace lock
11102 if (pmap
!= NULL
) {
11105 if (pmap
!= kernel_pmap
) {
11106 PMAP_LOCK(kernel_pmap
);
11109 // addresses are physical if pmap is null
11110 if (pmap
== NULL
) {
11112 pa_page
= cur_page
;
11113 state
= VA_UNDEFINED
;
11115 ptep
= pmap_pte(pmap
, cur_page
);
11116 if (ptep
!= NULL
) {
11117 pa_page
= pte_to_pa(*ptep
);
11120 state
= PA_UNDEFINED
;
11124 // search if we have a page info already
11125 PMAP_PGTRACE_LOCK(&ints
);
11126 if (state
!= PA_UNDEFINED
) {
11127 p
= pmap_pgtrace_find_page(pa_page
);
11130 // add pre-allocated page info if nothing found
11132 queue_enter_first(q
, newp
, pmap_pgtrace_page_t
*, chain
);
11137 // now p points what we want
11140 queue_head_t
*mapq
= &(p
->maps
);
11141 queue_head_t
*mappool
= &(p
->map_pool
);
11142 pmap_pgtrace_map_t
*map
;
11143 vm_map_offset_t start_offset
, end_offset
;
11145 // calculate trace offsets in the page
11146 if (cur_page
> start
) {
11149 start_offset
= start
- cur_page
;
11151 if (cur_page
== end_page
) {
11152 end_offset
= end
- end_page
;
11154 end_offset
= ARM_PGBYTES
- 1;
11157 kprintf("%s: pmap=%p cur_page=%llx ptep=%p state=%d start_offset=%llx end_offset=%llx\n", __func__
, pmap
, cur_page
, ptep
, state
, start_offset
, end_offset
);
11160 assert(!queue_empty(mappool
));
11161 queue_remove_first(mappool
, map
, pmap_pgtrace_map_t
*, chain
);
11162 if (p
->state
== PA_UNDEFINED
) {
11164 map
->ova
= cur_page
;
11165 map
->range
.start
= start_offset
;
11166 map
->range
.end
= end_offset
;
11167 } else if (p
->state
== VA_UNDEFINED
) {
11169 map
->range
.start
= start_offset
;
11170 map
->range
.end
= end_offset
;
11171 } else if (p
->state
== DEFINED
) {
11174 map
->ova
= cur_page
;
11175 map
->range
.start
= start_offset
;
11176 map
->range
.end
= end_offset
;
11178 panic("invalid p->state=%d\n", p
->state
);
11182 map
->cloned
= false;
11183 queue_enter(mapq
, map
, pmap_pgtrace_map_t
*, chain
);
11186 PMAP_PGTRACE_UNLOCK(&ints
);
11187 if (pmap
!= kernel_pmap
) {
11188 PMAP_UNLOCK(kernel_pmap
);
11190 if (pmap
!= NULL
) {
11195 if (pa_valid(pa_page
)) {
11196 num_cloned
= pmap_pgtrace_clone_from_pvtable(pa_page
, start_offset
, end_offset
);
11198 if (pmap
== NULL
) {
11199 num_cloned
+= pmap_pgtrace_clone_from_pa(kernel_pmap
, pa_page
, start_offset
, end_offset
);
11201 num_cloned
+= pmap_pgtrace_clone_from_pa(pmap
, pa_page
, start_offset
, end_offset
);
11204 // free pre-allocations if we didn't add it to the q
11206 pmap_pgtrace_free_page(newp
);
11209 if (num_cloned
== 0) {
11210 kprintf("%s: no mapping found for pa_page=%llx but will be added when a page entered\n", __func__
, pa_page
);
11216 if (cur_page
+ ARM_PGBYTES
< cur_page
) {
11219 cur_page
+= ARM_PGBYTES
;
11228 // delete page infos for given address range
11230 pmap_pgtrace_delete_page(pmap_t pmap
, vm_map_offset_t start
, vm_map_offset_t end
)
11234 queue_head_t
*q
= &(pmap_pgtrace
.pages
);
11235 pmap_pgtrace_page_t
*p
;
11236 vm_map_offset_t cur_page
, end_page
;
11238 kprintf("%s start=%llx end=%llx\n", __func__
, start
, end
);
11243 pmap_paddr_t pa_page
;
11245 // remove page info from start to end
11246 cur_page
= arm_trunc_page(start
);
11247 end_page
= arm_trunc_page(end
);
11248 while (cur_page
<= end_page
) {
11251 if (pmap
== NULL
) {
11252 pa_page
= cur_page
;
11255 ptep
= pmap_pte(pmap
, cur_page
);
11256 if (ptep
== NULL
) {
11260 pa_page
= pte_to_pa(*ptep
);
11264 // remove all clones and validate
11265 pmap_pgtrace_remove_all_clone(pa_page
);
11267 // find page info and delete
11268 PMAP_PGTRACE_LOCK(&ints
);
11269 p
= pmap_pgtrace_find_page(pa_page
);
11271 queue_remove(q
, p
, pmap_pgtrace_page_t
*, chain
);
11274 PMAP_PGTRACE_UNLOCK(&ints
);
11276 // free outside of locks
11278 pmap_pgtrace_free_page(p
);
11283 if (cur_page
+ ARM_PGBYTES
< cur_page
) {
11286 cur_page
+= ARM_PGBYTES
;
11296 pmap_pgtrace_fault(pmap_t pmap
, vm_map_offset_t va
, arm_saved_state_t
*ss
)
11299 pgtrace_run_result_t res
;
11300 pmap_pgtrace_page_t
*p
;
11301 bool ints
, found
= false;
11304 // Quick check if we are interested
11305 ptep
= pmap_pte(pmap
, va
);
11306 if (!ptep
|| !(*ptep
& ARM_PTE_PGTRACE
)) {
11307 return KERN_FAILURE
;
11310 PMAP_PGTRACE_LOCK(&ints
);
11312 // Check again since access is serialized
11313 ptep
= pmap_pte(pmap
, va
);
11314 if (!ptep
|| !(*ptep
& ARM_PTE_PGTRACE
)) {
11315 PMAP_PGTRACE_UNLOCK(&ints
);
11316 return KERN_FAILURE
;
11317 } else if ((*ptep
& ARM_PTE_TYPE_VALID
) == ARM_PTE_TYPE_VALID
) {
11318 // Somehow this cpu's tlb has not updated
11319 kprintf("%s Somehow this cpu's tlb has not updated?\n", __func__
);
11320 PMAP_UPDATE_TLBS(pmap
, va
, va
+ ARM_PGBYTES
, false);
11322 PMAP_PGTRACE_UNLOCK(&ints
);
11323 return KERN_SUCCESS
;
11326 // Find if this pa is what we are tracing
11327 pa
= pte_to_pa(*ptep
);
11329 p
= pmap_pgtrace_find_page(arm_trunc_page(pa
));
11331 panic("%s Can't find va=%llx pa=%llx from tracing pages\n", __func__
, va
, pa
);
11334 // find if pmap and va are also matching
11335 queue_head_t
*mapq
= &(p
->maps
);
11336 queue_head_t
*mapwaste
= &(p
->map_waste
);
11337 pmap_pgtrace_map_t
*map
;
11339 queue_iterate(mapq
, map
, pmap_pgtrace_map_t
*, chain
) {
11340 if (map
->pmap
== pmap
&& map
->ova
== arm_trunc_page(va
)) {
11346 // if not found, search map waste as they are still valid
11348 queue_iterate(mapwaste
, map
, pmap_pgtrace_map_t
*, chain
) {
11349 if (map
->pmap
== pmap
&& map
->ova
== arm_trunc_page(va
)) {
11357 panic("%s Can't find va=%llx pa=%llx from tracing pages\n", __func__
, va
, pa
);
11360 // Decode and run it on the clone map
11361 bzero(&res
, sizeof(res
));
11362 pgtrace_decode_and_run(*(uint32_t *)get_saved_state_pc(ss
), // instruction
11363 va
, map
->cva
, // fault va and clone page vas
11366 // write a log if in range
11367 vm_map_offset_t offset
= va
- map
->ova
;
11368 if (map
->range
.start
<= offset
&& offset
<= map
->range
.end
) {
11369 pgtrace_write_log(res
);
11372 PMAP_PGTRACE_UNLOCK(&ints
);
11374 // Return to next instruction
11375 add_saved_state_pc(ss
, sizeof(uint32_t));
11377 return KERN_SUCCESS
;
11382 pmap_enforces_execute_only(
11383 #if (__ARM_VMSA__ == 7)
11388 #if (__ARM_VMSA__ > 7)
11389 return pmap
!= kernel_pmap
;
11395 MARK_AS_PMAP_TEXT
void
11396 pmap_set_jit_entitled_internal(
11397 __unused pmap_t pmap
)
11403 pmap_set_jit_entitled(
11406 pmap_set_jit_entitled_internal(pmap
);
11409 MARK_AS_PMAP_TEXT
static kern_return_t
11410 pmap_query_page_info_internal(
11412 vm_map_offset_t va
,
11419 pv_entry_t
**pv_h
, *pve_p
;
11421 if (pmap
== PMAP_NULL
|| pmap
== kernel_pmap
) {
11422 pmap_pin_kernel_pages((vm_offset_t
)disp_p
, sizeof(*disp_p
));
11424 pmap_unpin_kernel_pages((vm_offset_t
)disp_p
, sizeof(*disp_p
));
11425 return KERN_INVALID_ARGUMENT
;
11430 VALIDATE_PMAP(pmap
);
11433 pte
= pmap_pte(pmap
, va
);
11434 if (pte
== PT_ENTRY_NULL
) {
11438 pa
= pte_to_pa(*pte
);
11440 if (ARM_PTE_IS_COMPRESSED(*pte
, pte
)) {
11441 disp
|= PMAP_QUERY_PAGE_COMPRESSED
;
11442 if (*pte
& ARM_PTE_COMPRESSED_ALT
) {
11443 disp
|= PMAP_QUERY_PAGE_COMPRESSED_ALTACCT
;
11447 disp
|= PMAP_QUERY_PAGE_PRESENT
;
11448 pai
= (int) pa_index(pa
);
11449 if (!pa_valid(pa
)) {
11453 pv_h
= pai_to_pvh(pai
);
11454 pve_p
= PV_ENTRY_NULL
;
11455 if (pvh_test_type(pv_h
, PVH_TYPE_PVEP
)) {
11456 pve_p
= pvh_list(pv_h
);
11457 while (pve_p
!= PV_ENTRY_NULL
&&
11458 pve_get_ptep(pve_p
) != pte
) {
11459 pve_p
= PVE_NEXT_PTR(pve_next(pve_p
));
11462 if (IS_ALTACCT_PAGE(pai
, pve_p
)) {
11463 disp
|= PMAP_QUERY_PAGE_ALTACCT
;
11464 } else if (IS_REUSABLE_PAGE(pai
)) {
11465 disp
|= PMAP_QUERY_PAGE_REUSABLE
;
11466 } else if (IS_INTERNAL_PAGE(pai
)) {
11467 disp
|= PMAP_QUERY_PAGE_INTERNAL
;
11474 pmap_pin_kernel_pages((vm_offset_t
)disp_p
, sizeof(*disp_p
));
11476 pmap_unpin_kernel_pages((vm_offset_t
)disp_p
, sizeof(*disp_p
));
11477 return KERN_SUCCESS
;
11481 pmap_query_page_info(
11483 vm_map_offset_t va
,
11486 return pmap_query_page_info_internal(pmap
, va
, disp_p
);
11489 MARK_AS_PMAP_TEXT kern_return_t
11490 pmap_return_internal(__unused boolean_t do_panic
, __unused boolean_t do_recurse
)
11493 return KERN_SUCCESS
;
11497 pmap_return(boolean_t do_panic
, boolean_t do_recurse
)
11499 return pmap_return_internal(do_panic
, do_recurse
);
11505 MARK_AS_PMAP_TEXT
static void
11506 pmap_footprint_suspend_internal(
11510 #if DEVELOPMENT || DEBUG
11512 current_thread()->pmap_footprint_suspended
= TRUE
;
11513 map
->pmap
->footprint_was_suspended
= TRUE
;
11515 current_thread()->pmap_footprint_suspended
= FALSE
;
11517 #else /* DEVELOPMENT || DEBUG */
11520 #endif /* DEVELOPMENT || DEBUG */
11524 pmap_footprint_suspend(
11528 pmap_footprint_suspend_internal(map
, suspend
);
11531 #if defined(__arm64__) && (DEVELOPMENT || DEBUG)
11533 struct page_table_dump_header
{
11535 uint64_t num_entries
;
11541 pmap_dump_page_tables_recurse(pmap_t pmap
,
11542 const tt_entry_t
*ttp
,
11543 unsigned int cur_level
,
11548 size_t bytes_used
= 0;
11549 uint64_t num_entries
= ARM_PGBYTES
/ sizeof(*ttp
);
11550 const pt_attr_t
* const pt_attr
= pmap_get_pt_attr(pmap
);
11552 uint64_t size
= pt_attr
->pta_level_info
[cur_level
].size
;
11553 uint64_t valid_mask
= pt_attr
->pta_level_info
[cur_level
].valid_mask
;
11554 uint64_t type_mask
= pt_attr
->pta_level_info
[cur_level
].type_mask
;
11555 uint64_t type_block
= pt_attr
->pta_level_info
[cur_level
].type_block
;
11557 if (cur_level
== arm64_root_pgtable_level
) {
11558 num_entries
= arm64_root_pgtable_num_ttes
;
11561 uint64_t tt_size
= num_entries
* sizeof(tt_entry_t
);
11562 const tt_entry_t
*tt_end
= &ttp
[num_entries
];
11564 if (((vm_offset_t
)buf_end
- (vm_offset_t
)bufp
) < (tt_size
+ sizeof(struct page_table_dump_header
))) {
11568 struct page_table_dump_header
*header
= (struct page_table_dump_header
*)bufp
;
11569 header
->pa
= ml_static_vtop((vm_offset_t
)ttp
);
11570 header
->num_entries
= num_entries
;
11571 header
->start_va
= start_va
;
11572 header
->end_va
= start_va
+ (num_entries
* size
);
11574 bcopy(ttp
, (uint8_t*)bufp
+ sizeof(*header
), tt_size
);
11575 bytes_used
+= (sizeof(*header
) + tt_size
);
11576 uint64_t current_va
= start_va
;
11578 for (const tt_entry_t
*ttep
= ttp
; ttep
< tt_end
; ttep
++, current_va
+= size
) {
11579 tt_entry_t tte
= *ttep
;
11581 if (!(tte
& valid_mask
)) {
11585 if ((tte
& type_mask
) == type_block
) {
11588 if (cur_level
>= PMAP_TT_MAX_LEVEL
) {
11589 panic("%s: corrupt entry %#llx at %p, "
11590 "ttp=%p, cur_level=%u, bufp=%p, buf_end=%p",
11591 __FUNCTION__
, tte
, ttep
,
11592 ttp
, cur_level
, bufp
, buf_end
);
11595 const tt_entry_t
*next_tt
= (const tt_entry_t
*)phystokv(tte
& ARM_TTE_TABLE_MASK
);
11597 size_t recurse_result
= pmap_dump_page_tables_recurse(pmap
, next_tt
, cur_level
+ 1, current_va
, (uint8_t*)bufp
+ bytes_used
, buf_end
);
11599 if (recurse_result
== 0) {
11603 bytes_used
+= recurse_result
;
11611 pmap_dump_page_tables(pmap_t pmap
, void *bufp
, void *buf_end
)
11614 panic("pmap_dump_page_tables must only be called from kernel debugger context");
11616 return pmap_dump_page_tables_recurse(pmap
, pmap
->tte
, arm64_root_pgtable_level
, pmap
->min
, bufp
, buf_end
);
11619 #else /* defined(__arm64__) && (DEVELOPMENT || DEBUG) */
11622 pmap_dump_page_tables(pmap_t pmap __unused
, void *bufp __unused
, void *buf_end __unused
)
11627 #endif /* !defined(__arm64__) */