2 * Copyright (c) 2011-2020 Apple Inc. All rights reserved.
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
29 #include <mach_assert.h>
30 #include <mach_ldebug.h>
32 #include <mach/shared_region.h>
33 #include <mach/vm_param.h>
34 #include <mach/vm_prot.h>
35 #include <mach/vm_map.h>
36 #include <mach/machine/vm_param.h>
37 #include <mach/machine/vm_types.h>
39 #include <mach/boolean.h>
40 #include <kern/bits.h>
41 #include <kern/thread.h>
42 #include <kern/sched.h>
43 #include <kern/zalloc.h>
44 #include <kern/kalloc.h>
45 #include <kern/ledger.h>
47 #include <kern/trustcache.h>
49 #include <os/overflow.h>
52 #include <vm/vm_map.h>
53 #include <vm/vm_kern.h>
54 #include <vm/vm_protos.h>
55 #include <vm/vm_object.h>
56 #include <vm/vm_page.h>
57 #include <vm/vm_pageout.h>
60 #include <libkern/img4/interface.h>
61 #include <libkern/section_keywords.h>
62 #include <sys/errno.h>
64 #include <machine/atomic.h>
65 #include <machine/thread.h>
66 #include <machine/lowglobals.h>
68 #include <arm/caches_internal.h>
69 #include <arm/cpu_data.h>
70 #include <arm/cpu_data_internal.h>
71 #include <arm/cpu_capabilities.h>
72 #include <arm/cpu_number.h>
73 #include <arm/machine_cpu.h>
74 #include <arm/misc_protos.h>
77 #if (__ARM_VMSA__ > 7)
78 #include <arm64/proc_reg.h>
79 #include <pexpert/arm64/boot.h>
82 #include <arm64/pgtrace.h>
83 #if CONFIG_PGTRACE_NONKEXT
84 #include <arm64/pgtrace_decoder.h>
85 #endif // CONFIG_PGTRACE_NONKEXT
86 #endif // CONFIG_PGTRACE
87 #if defined(KERNEL_INTEGRITY_KTRR) || defined(KERNEL_INTEGRITY_CTRR)
88 #include <arm64/amcc_rorgn.h>
89 #endif // defined(KERNEL_INTEGRITY_KTRR) || defined(KERNEL_INTEGRITY_CTRR)
92 #include <pexpert/device_tree.h>
94 #include <san/kasan.h>
95 #include <sys/cdefs.h>
97 #if defined(HAS_APPLE_PAC)
101 #ifdef CONFIG_XNUPOST
102 #include <tests/xnupost.h>
107 #include <IOKit/IOHibernatePrivate.h>
108 #endif /* HIBERNATION */
110 #define PMAP_TT_L0_LEVEL 0x0
111 #define PMAP_TT_L1_LEVEL 0x1
112 #define PMAP_TT_L2_LEVEL 0x2
113 #define PMAP_TT_L3_LEVEL 0x3
115 #ifdef __ARM64_PMAP_SUBPAGE_L1__
116 #if (__ARM_VMSA__ <= 7)
117 #error This is not supported for old-style page tables
119 #define PMAP_ROOT_ALLOC_SIZE (((ARM_TT_L1_INDEX_MASK >> ARM_TT_L1_SHIFT) + 1) * sizeof(tt_entry_t))
121 #if (__ARM_VMSA__ <= 7)
122 #define PMAP_ROOT_ALLOC_SIZE (ARM_PGBYTES * 2)
124 #define PMAP_ROOT_ALLOC_SIZE (ARM_PGBYTES)
128 extern u_int32_t
random(void); /* from <libkern/libkern.h> */
130 static bool alloc_asid(pmap_t pmap
);
131 static void free_asid(pmap_t pmap
);
132 static void flush_mmu_tlb_region_asid_async(vm_offset_t va
, size_t length
, pmap_t pmap
);
133 static void flush_mmu_tlb_tte_asid_async(vm_offset_t va
, pmap_t pmap
);
134 static void flush_mmu_tlb_full_asid_async(pmap_t pmap
);
135 static pt_entry_t
wimg_to_pte(unsigned int wimg
);
137 struct page_table_ops
{
138 bool (*alloc_id
)(pmap_t pmap
);
139 void (*free_id
)(pmap_t pmap
);
140 void (*flush_tlb_region_async
)(vm_offset_t va
, size_t length
, pmap_t pmap
);
141 void (*flush_tlb_tte_async
)(vm_offset_t va
, pmap_t pmap
);
142 void (*flush_tlb_async
)(pmap_t pmap
);
143 pt_entry_t (*wimg_to_pte
)(unsigned int wimg
);
146 static const struct page_table_ops native_pt_ops
=
148 .alloc_id
= alloc_asid
,
149 .free_id
= free_asid
,
150 .flush_tlb_region_async
= flush_mmu_tlb_region_asid_async
,
151 .flush_tlb_tte_async
= flush_mmu_tlb_tte_asid_async
,
152 .flush_tlb_async
= flush_mmu_tlb_full_asid_async
,
153 .wimg_to_pte
= wimg_to_pte
,
156 #if (__ARM_VMSA__ > 7)
157 const struct page_table_level_info pmap_table_level_info_16k
[] =
160 .size
= ARM_16K_TT_L0_SIZE
,
161 .offmask
= ARM_16K_TT_L0_OFFMASK
,
162 .shift
= ARM_16K_TT_L0_SHIFT
,
163 .index_mask
= ARM_16K_TT_L0_INDEX_MASK
,
164 .valid_mask
= ARM_TTE_VALID
,
165 .type_mask
= ARM_TTE_TYPE_MASK
,
166 .type_block
= ARM_TTE_TYPE_BLOCK
169 .size
= ARM_16K_TT_L1_SIZE
,
170 .offmask
= ARM_16K_TT_L1_OFFMASK
,
171 .shift
= ARM_16K_TT_L1_SHIFT
,
172 .index_mask
= ARM_16K_TT_L1_INDEX_MASK
,
173 .valid_mask
= ARM_TTE_VALID
,
174 .type_mask
= ARM_TTE_TYPE_MASK
,
175 .type_block
= ARM_TTE_TYPE_BLOCK
178 .size
= ARM_16K_TT_L2_SIZE
,
179 .offmask
= ARM_16K_TT_L2_OFFMASK
,
180 .shift
= ARM_16K_TT_L2_SHIFT
,
181 .index_mask
= ARM_16K_TT_L2_INDEX_MASK
,
182 .valid_mask
= ARM_TTE_VALID
,
183 .type_mask
= ARM_TTE_TYPE_MASK
,
184 .type_block
= ARM_TTE_TYPE_BLOCK
187 .size
= ARM_16K_TT_L3_SIZE
,
188 .offmask
= ARM_16K_TT_L3_OFFMASK
,
189 .shift
= ARM_16K_TT_L3_SHIFT
,
190 .index_mask
= ARM_16K_TT_L3_INDEX_MASK
,
191 .valid_mask
= ARM_PTE_TYPE_VALID
,
192 .type_mask
= ARM_PTE_TYPE_MASK
,
193 .type_block
= ARM_TTE_TYPE_L3BLOCK
197 const struct page_table_level_info pmap_table_level_info_4k
[] =
200 .size
= ARM_4K_TT_L0_SIZE
,
201 .offmask
= ARM_4K_TT_L0_OFFMASK
,
202 .shift
= ARM_4K_TT_L0_SHIFT
,
203 .index_mask
= ARM_4K_TT_L0_INDEX_MASK
,
204 .valid_mask
= ARM_TTE_VALID
,
205 .type_mask
= ARM_TTE_TYPE_MASK
,
206 .type_block
= ARM_TTE_TYPE_BLOCK
209 .size
= ARM_4K_TT_L1_SIZE
,
210 .offmask
= ARM_4K_TT_L1_OFFMASK
,
211 .shift
= ARM_4K_TT_L1_SHIFT
,
212 .index_mask
= ARM_4K_TT_L1_INDEX_MASK
,
213 .valid_mask
= ARM_TTE_VALID
,
214 .type_mask
= ARM_TTE_TYPE_MASK
,
215 .type_block
= ARM_TTE_TYPE_BLOCK
218 .size
= ARM_4K_TT_L2_SIZE
,
219 .offmask
= ARM_4K_TT_L2_OFFMASK
,
220 .shift
= ARM_4K_TT_L2_SHIFT
,
221 .index_mask
= ARM_4K_TT_L2_INDEX_MASK
,
222 .valid_mask
= ARM_TTE_VALID
,
223 .type_mask
= ARM_TTE_TYPE_MASK
,
224 .type_block
= ARM_TTE_TYPE_BLOCK
227 .size
= ARM_4K_TT_L3_SIZE
,
228 .offmask
= ARM_4K_TT_L3_OFFMASK
,
229 .shift
= ARM_4K_TT_L3_SHIFT
,
230 .index_mask
= ARM_4K_TT_L3_INDEX_MASK
,
231 .valid_mask
= ARM_PTE_TYPE_VALID
,
232 .type_mask
= ARM_PTE_TYPE_MASK
,
233 .type_block
= ARM_TTE_TYPE_L3BLOCK
237 struct page_table_attr
{
238 const struct page_table_level_info
* const pta_level_info
;
239 const struct page_table_ops
* const pta_ops
;
240 const uintptr_t ap_ro
;
241 const uintptr_t ap_rw
;
242 const uintptr_t ap_rona
;
243 const uintptr_t ap_rwna
;
244 const uintptr_t ap_xn
;
245 const uintptr_t ap_x
;
246 const unsigned int pta_root_level
;
247 const unsigned int pta_sharedpage_level
;
248 const unsigned int pta_max_level
;
249 #if __ARM_MIXED_PAGE_SIZE__
250 const uint64_t pta_tcr_value
;
251 #endif /* __ARM_MIXED_PAGE_SIZE__ */
252 const uint64_t pta_page_size
;
253 const uint64_t pta_page_shift
;
256 const struct page_table_attr pmap_pt_attr_4k
= {
257 .pta_level_info
= pmap_table_level_info_4k
,
258 .pta_root_level
= (T0SZ_BOOT
- 16) / 9,
259 #if __ARM_MIXED_PAGE_SIZE__
260 .pta_sharedpage_level
= PMAP_TT_L2_LEVEL
,
261 #else /* __ARM_MIXED_PAGE_SIZE__ */
263 .pta_sharedpage_level
= PMAP_TT_L2_LEVEL
,
264 #else /* __ARM_16K_PG__ */
265 .pta_sharedpage_level
= PMAP_TT_L1_LEVEL
,
266 #endif /* __ARM_16K_PG__ */
267 #endif /* __ARM_MIXED_PAGE_SIZE__ */
268 .pta_max_level
= PMAP_TT_L3_LEVEL
,
269 .pta_ops
= &native_pt_ops
,
270 .ap_ro
= ARM_PTE_AP(AP_RORO
),
271 .ap_rw
= ARM_PTE_AP(AP_RWRW
),
272 .ap_rona
= ARM_PTE_AP(AP_RONA
),
273 .ap_rwna
= ARM_PTE_AP(AP_RWNA
),
274 .ap_xn
= ARM_PTE_PNX
| ARM_PTE_NX
,
276 #if __ARM_MIXED_PAGE_SIZE__
277 .pta_tcr_value
= TCR_EL1_4KB
,
278 #endif /* __ARM_MIXED_PAGE_SIZE__ */
279 .pta_page_size
= 4096,
280 .pta_page_shift
= 12,
283 const struct page_table_attr pmap_pt_attr_16k
= {
284 .pta_level_info
= pmap_table_level_info_16k
,
285 .pta_root_level
= PMAP_TT_L1_LEVEL
,
286 .pta_sharedpage_level
= PMAP_TT_L2_LEVEL
,
287 .pta_max_level
= PMAP_TT_L3_LEVEL
,
288 .pta_ops
= &native_pt_ops
,
289 .ap_ro
= ARM_PTE_AP(AP_RORO
),
290 .ap_rw
= ARM_PTE_AP(AP_RWRW
),
291 .ap_rona
= ARM_PTE_AP(AP_RONA
),
292 .ap_rwna
= ARM_PTE_AP(AP_RWNA
),
293 .ap_xn
= ARM_PTE_PNX
| ARM_PTE_NX
,
295 #if __ARM_MIXED_PAGE_SIZE__
296 .pta_tcr_value
= TCR_EL1_16KB
,
297 #endif /* __ARM_MIXED_PAGE_SIZE__ */
298 .pta_page_size
= 16384,
299 .pta_page_shift
= 14,
303 const struct page_table_attr
* const native_pt_attr
= &pmap_pt_attr_16k
;
304 #else /* !__ARM_16K_PG__ */
305 const struct page_table_attr
* const native_pt_attr
= &pmap_pt_attr_4k
;
306 #endif /* !__ARM_16K_PG__ */
309 #else /* (__ARM_VMSA__ > 7) */
311 * We don't support pmap parameterization for VMSA7, so use an opaque
312 * page_table_attr structure.
314 const struct page_table_attr
* const native_pt_attr
= NULL
;
315 #endif /* (__ARM_VMSA__ > 7) */
317 typedef struct page_table_attr pt_attr_t
;
319 /* Macro for getting pmap attributes; not a function for const propagation. */
320 #if ARM_PARAMETERIZED_PMAP
321 /* The page table attributes are linked to the pmap */
322 #define pmap_get_pt_attr(pmap) ((pmap)->pmap_pt_attr)
323 #define pmap_get_pt_ops(pmap) ((pmap)->pmap_pt_attr->pta_ops)
324 #else /* !ARM_PARAMETERIZED_PMAP */
325 /* The page table attributes are fixed (to allow for const propagation) */
326 #define pmap_get_pt_attr(pmap) (native_pt_attr)
327 #define pmap_get_pt_ops(pmap) (&native_pt_ops)
328 #endif /* !ARM_PARAMETERIZED_PMAP */
330 #if (__ARM_VMSA__ > 7)
331 static inline uint64_t
332 pt_attr_page_size(const pt_attr_t
* const pt_attr
)
334 return pt_attr
->pta_page_size
;
337 __unused
static inline uint64_t
338 pt_attr_ln_size(const pt_attr_t
* const pt_attr
, unsigned int level
)
340 return pt_attr
->pta_level_info
[level
].size
;
343 __unused
static inline uint64_t
344 pt_attr_ln_shift(const pt_attr_t
* const pt_attr
, unsigned int level
)
346 return pt_attr
->pta_level_info
[level
].shift
;
349 static inline uint64_t
350 pt_attr_ln_offmask(const pt_attr_t
* const pt_attr
, unsigned int level
)
352 return pt_attr
->pta_level_info
[level
].offmask
;
355 __unused
static inline uint64_t
356 pt_attr_ln_pt_offmask(const pt_attr_t
* const pt_attr
, unsigned int level
)
358 return pt_attr_ln_offmask(pt_attr
, level
);
361 __unused
static inline uint64_t
362 pt_attr_ln_index_mask(const pt_attr_t
* const pt_attr
, unsigned int level
)
364 return pt_attr
->pta_level_info
[level
].index_mask
;
367 static inline unsigned int
368 pt_attr_twig_level(const pt_attr_t
* const pt_attr
)
370 return pt_attr
->pta_max_level
- 1;
373 static inline unsigned int
374 pt_attr_root_level(const pt_attr_t
* const pt_attr
)
376 return pt_attr
->pta_root_level
;
380 * This is the level at which to copy a pt_entry from the sharedpage_pmap into
381 * the user pmap. Typically L1 for 4K pages, and L2 for 16K pages. In this way,
382 * the sharedpage's L2/L3 page tables are reused in every 4k task, whereas only
383 * the L3 page table is reused in 16K tasks.
385 static inline unsigned int
386 pt_attr_sharedpage_level(const pt_attr_t
* const pt_attr
)
388 return pt_attr
->pta_sharedpage_level
;
391 static __unused
inline uint64_t
392 pt_attr_leaf_size(const pt_attr_t
* const pt_attr
)
394 return pt_attr
->pta_level_info
[pt_attr
->pta_max_level
].size
;
397 static __unused
inline uint64_t
398 pt_attr_leaf_offmask(const pt_attr_t
* const pt_attr
)
400 return pt_attr
->pta_level_info
[pt_attr
->pta_max_level
].offmask
;
403 static inline uint64_t
404 pt_attr_leaf_shift(const pt_attr_t
* const pt_attr
)
406 return pt_attr
->pta_level_info
[pt_attr
->pta_max_level
].shift
;
409 static __unused
inline uint64_t
410 pt_attr_leaf_index_mask(const pt_attr_t
* const pt_attr
)
412 return pt_attr
->pta_level_info
[pt_attr
->pta_max_level
].index_mask
;
415 static inline uint64_t
416 pt_attr_twig_size(const pt_attr_t
* const pt_attr
)
418 return pt_attr
->pta_level_info
[pt_attr
->pta_max_level
- 1].size
;
421 static inline uint64_t
422 pt_attr_twig_offmask(const pt_attr_t
* const pt_attr
)
424 return pt_attr
->pta_level_info
[pt_attr
->pta_max_level
- 1].offmask
;
427 static inline uint64_t
428 pt_attr_twig_shift(const pt_attr_t
* const pt_attr
)
430 return pt_attr
->pta_level_info
[pt_attr
->pta_max_level
- 1].shift
;
433 static __unused
inline uint64_t
434 pt_attr_twig_index_mask(const pt_attr_t
* const pt_attr
)
436 return pt_attr
->pta_level_info
[pt_attr
->pta_max_level
- 1].index_mask
;
439 static inline uint64_t
440 pt_attr_leaf_table_size(const pt_attr_t
* const pt_attr
)
442 return pt_attr_twig_size(pt_attr
);
445 static inline uint64_t
446 pt_attr_leaf_table_offmask(const pt_attr_t
* const pt_attr
)
448 return pt_attr_twig_offmask(pt_attr
);
451 static inline uintptr_t
452 pt_attr_leaf_rw(const pt_attr_t
* const pt_attr
)
454 return pt_attr
->ap_rw
;
457 static inline uintptr_t
458 pt_attr_leaf_ro(const pt_attr_t
* const pt_attr
)
460 return pt_attr
->ap_ro
;
463 static inline uintptr_t
464 pt_attr_leaf_rona(const pt_attr_t
* const pt_attr
)
466 return pt_attr
->ap_rona
;
469 static inline uintptr_t
470 pt_attr_leaf_rwna(const pt_attr_t
* const pt_attr
)
472 return pt_attr
->ap_rwna
;
475 static inline uintptr_t
476 pt_attr_leaf_xn(const pt_attr_t
* const pt_attr
)
478 return pt_attr
->ap_xn
;
481 static inline uintptr_t
482 pt_attr_leaf_x(const pt_attr_t
* const pt_attr
)
484 return pt_attr
->ap_x
;
487 #else /* (__ARM_VMSA__ > 7) */
488 static inline uint64_t
489 pt_attr_page_size(__unused
const pt_attr_t
* const pt_attr
)
494 __unused
static inline unsigned int
495 pt_attr_root_level(__unused
const pt_attr_t
* const pt_attr
)
497 return PMAP_TT_L1_LEVEL
;
500 __unused
static inline unsigned int
501 pt_attr_sharedpage_level(__unused
const pt_attr_t
* const pt_attr
)
503 return PMAP_TT_L1_LEVEL
;
506 static inline unsigned int
507 pt_attr_twig_level(__unused
const pt_attr_t
* const pt_attr
)
509 return PMAP_TT_L1_LEVEL
;
512 static inline uint64_t
513 pt_attr_twig_size(__unused
const pt_attr_t
* const pt_attr
)
515 return ARM_TT_TWIG_SIZE
;
518 static inline uint64_t
519 pt_attr_twig_offmask(__unused
const pt_attr_t
* const pt_attr
)
521 return ARM_TT_TWIG_OFFMASK
;
524 static inline uint64_t
525 pt_attr_twig_shift(__unused
const pt_attr_t
* const pt_attr
)
527 return ARM_TT_TWIG_SHIFT
;
530 static __unused
inline uint64_t
531 pt_attr_twig_index_mask(__unused
const pt_attr_t
* const pt_attr
)
533 return ARM_TT_TWIG_INDEX_MASK
;
536 __unused
static inline uint64_t
537 pt_attr_leaf_size(__unused
const pt_attr_t
* const pt_attr
)
539 return ARM_TT_LEAF_SIZE
;
542 __unused
static inline uint64_t
543 pt_attr_leaf_offmask(__unused
const pt_attr_t
* const pt_attr
)
545 return ARM_TT_LEAF_OFFMASK
;
548 static inline uint64_t
549 pt_attr_leaf_shift(__unused
const pt_attr_t
* const pt_attr
)
551 return ARM_TT_LEAF_SHIFT
;
554 static __unused
inline uint64_t
555 pt_attr_leaf_index_mask(__unused
const pt_attr_t
* const pt_attr
)
557 return ARM_TT_LEAF_INDEX_MASK
;
560 static inline uint64_t
561 pt_attr_leaf_table_size(__unused
const pt_attr_t
* const pt_attr
)
563 return ARM_TT_L1_PT_SIZE
;
566 static inline uint64_t
567 pt_attr_leaf_table_offmask(__unused
const pt_attr_t
* const pt_attr
)
569 return ARM_TT_L1_PT_OFFMASK
;
572 static inline uintptr_t
573 pt_attr_leaf_rw(__unused
const pt_attr_t
* const pt_attr
)
575 return ARM_PTE_AP(AP_RWRW
);
578 static inline uintptr_t
579 pt_attr_leaf_ro(__unused
const pt_attr_t
* const pt_attr
)
581 return ARM_PTE_AP(AP_RORO
);
584 static inline uintptr_t
585 pt_attr_leaf_rona(__unused
const pt_attr_t
* const pt_attr
)
587 return ARM_PTE_AP(AP_RONA
);
590 static inline uintptr_t
591 pt_attr_leaf_rwna(__unused
const pt_attr_t
* const pt_attr
)
593 return ARM_PTE_AP(AP_RWNA
);
596 static inline uintptr_t
597 pt_attr_leaf_xn(__unused
const pt_attr_t
* const pt_attr
)
602 __unused
static inline uintptr_t
603 pt_attr_ln_offmask(__unused
const pt_attr_t
* const pt_attr
, unsigned int level
)
605 if (level
== PMAP_TT_L1_LEVEL
) {
606 return ARM_TT_L1_OFFMASK
;
607 } else if (level
== PMAP_TT_L2_LEVEL
) {
608 return ARM_TT_L2_OFFMASK
;
614 static inline uintptr_t
615 pt_attr_ln_pt_offmask(__unused
const pt_attr_t
* const pt_attr
, unsigned int level
)
617 if (level
== PMAP_TT_L1_LEVEL
) {
618 return ARM_TT_L1_PT_OFFMASK
;
619 } else if (level
== PMAP_TT_L2_LEVEL
) {
620 return ARM_TT_L2_OFFMASK
;
626 #endif /* (__ARM_VMSA__ > 7) */
628 static inline unsigned int
629 pt_attr_leaf_level(const pt_attr_t
* const pt_attr
)
631 return pt_attr_twig_level(pt_attr
) + 1;
636 pmap_sync_tlb(bool strong __unused
)
642 int vm_footprint_suspend_allowed
= 1;
644 extern int pmap_ledgers_panic
;
645 extern int pmap_ledgers_panic_leeway
;
647 int pmap_stats_assert
= 1;
648 #define PMAP_STATS_ASSERTF(cond, pmap, fmt, ...) \
650 if (pmap_stats_assert && (pmap)->pmap_stats_assert) \
651 assertf(cond, fmt, ##__VA_ARGS__); \
653 #else /* MACH_ASSERT */
654 #define PMAP_STATS_ASSERTF(cond, pmap, fmt, ...)
655 #endif /* MACH_ASSERT */
657 #if DEVELOPMENT || DEBUG
658 #define PMAP_FOOTPRINT_SUSPENDED(pmap) \
659 (current_thread()->pmap_footprint_suspended)
660 #else /* DEVELOPMENT || DEBUG */
661 #define PMAP_FOOTPRINT_SUSPENDED(pmap) (FALSE)
662 #endif /* DEVELOPMENT || DEBUG */
666 * Represents a tlb range that will be flushed before exiting
668 * Used by phys_attribute_clear_range to defer flushing pages in
669 * this range until the end of the operation.
671 typedef struct pmap_tlb_flush_range
{
673 vm_map_address_t ptfr_start
;
674 vm_map_address_t ptfr_end
;
675 bool ptfr_flush_needed
;
676 } pmap_tlb_flush_range_t
;
680 * PPL External References.
682 extern vm_offset_t segPPLDATAB
;
683 extern unsigned long segSizePPLDATA
;
684 extern vm_offset_t segPPLTEXTB
;
685 extern unsigned long segSizePPLTEXT
;
686 #if __APRR_SUPPORTED__
687 extern vm_offset_t segPPLTRAMPB
;
688 extern unsigned long segSizePPLTRAMP
;
689 extern void ppl_trampoline_start
;
690 extern void ppl_trampoline_end
;
692 extern vm_offset_t segPPLDATACONSTB
;
693 extern unsigned long segSizePPLDATACONST
;
697 * PPL Global Variables
700 #if (DEVELOPMENT || DEBUG) || CONFIG_CSR_FROM_DT
701 /* Indicates if the PPL will enforce mapping policies; set by -unsafe_kernel_text */
702 SECURITY_READ_ONLY_LATE(boolean_t
) pmap_ppl_disable
= FALSE
;
704 const boolean_t pmap_ppl_disable
= FALSE
;
707 /* Indicates if the PPL has started applying APRR. */
708 boolean_t pmap_ppl_locked_down MARK_AS_PMAP_DATA
= FALSE
;
711 * The PPL cannot invoke the kernel in order to allocate memory, so we must
712 * maintain a list of free pages that the PPL owns. The kernel can give the PPL
715 MARK_AS_PMAP_DATA
SIMPLE_LOCK_DECLARE(pmap_ppl_free_page_lock
, 0);
716 void ** pmap_ppl_free_page_list MARK_AS_PMAP_DATA
= NULL
;
717 uint64_t pmap_ppl_free_page_count MARK_AS_PMAP_DATA
= 0;
718 uint64_t pmap_ppl_pages_returned_to_kernel_count_total
= 0;
720 struct pmap_cpu_data_array_entry pmap_cpu_data_array
[MAX_CPUS
] MARK_AS_PMAP_DATA
= {0};
722 extern void *pmap_stacks_start
;
723 extern void *pmap_stacks_end
;
724 SECURITY_READ_ONLY_LATE(pmap_paddr_t
) pmap_stacks_start_pa
= 0;
725 SECURITY_READ_ONLY_LATE(pmap_paddr_t
) pmap_stacks_end_pa
= 0;
726 SECURITY_READ_ONLY_LATE(pmap_paddr_t
) ppl_cpu_save_area_start
= 0;
727 SECURITY_READ_ONLY_LATE(pmap_paddr_t
) ppl_cpu_save_area_end
= 0;
729 /* Allocation data/locks for pmap structures. */
731 MARK_AS_PMAP_DATA
SIMPLE_LOCK_DECLARE(pmap_free_list_lock
, 0);
733 SECURITY_READ_ONLY_LATE(unsigned long) pmap_array_count
= 0;
734 SECURITY_READ_ONLY_LATE(void *) pmap_array_begin
= NULL
;
735 SECURITY_READ_ONLY_LATE(void *) pmap_array_end
= NULL
;
736 SECURITY_READ_ONLY_LATE(pmap_t
) pmap_array
= NULL
;
737 pmap_t pmap_free_list MARK_AS_PMAP_DATA
= NULL
;
739 /* Allocation data/locks/structs for task ledger structures. */
740 #define PMAP_LEDGER_DATA_BYTES \
741 (((sizeof(task_ledgers) / sizeof(int)) * sizeof(struct ledger_entry)) + sizeof(struct ledger))
744 * Maximum number of ledgers allowed are maximum number of tasks
745 * allowed on system plus some more i.e. ~10% of total tasks = 200.
747 #define MAX_PMAP_LEDGERS (pmap_max_asids + 200)
748 #define PMAP_ARRAY_SIZE (pmap_max_asids)
750 typedef struct pmap_ledger_data
{
751 char pld_data
[PMAP_LEDGER_DATA_BYTES
];
752 } pmap_ledger_data_t
;
754 typedef struct pmap_ledger
{
756 struct pmap_ledger_data ple_data
;
757 struct pmap_ledger
* next
;
760 struct pmap_ledger
** back_ptr
;
763 SECURITY_READ_ONLY_LATE(bool) pmap_ledger_alloc_initialized
= false;
764 MARK_AS_PMAP_DATA
SIMPLE_LOCK_DECLARE(pmap_ledger_lock
, 0);
765 SECURITY_READ_ONLY_LATE(void *) pmap_ledger_refcnt_begin
= NULL
;
766 SECURITY_READ_ONLY_LATE(void *) pmap_ledger_refcnt_end
= NULL
;
767 SECURITY_READ_ONLY_LATE(os_refcnt_t
*) pmap_ledger_refcnt
= NULL
;
768 SECURITY_READ_ONLY_LATE(void *) pmap_ledger_ptr_array_begin
= NULL
;
769 SECURITY_READ_ONLY_LATE(void *) pmap_ledger_ptr_array_end
= NULL
;
770 SECURITY_READ_ONLY_LATE(pmap_ledger_t
* *) pmap_ledger_ptr_array
= NULL
;
771 uint64_t pmap_ledger_ptr_array_free_index MARK_AS_PMAP_DATA
= 0;
772 pmap_ledger_t
* pmap_ledger_free_list MARK_AS_PMAP_DATA
= NULL
;
774 #define pmap_ledger_debit(p, e, a) ledger_debit_nocheck((p)->ledger, e, a)
775 #define pmap_ledger_credit(p, e, a) ledger_credit_nocheck((p)->ledger, e, a)
778 pmap_check_ledger_fields(ledger_t ledger
)
780 if (ledger
== NULL
) {
784 thread_t cur_thread
= current_thread();
785 ledger_check_new_balance(cur_thread
, ledger
, task_ledgers
.alternate_accounting
);
786 ledger_check_new_balance(cur_thread
, ledger
, task_ledgers
.alternate_accounting_compressed
);
787 ledger_check_new_balance(cur_thread
, ledger
, task_ledgers
.internal
);
788 ledger_check_new_balance(cur_thread
, ledger
, task_ledgers
.internal_compressed
);
789 ledger_check_new_balance(cur_thread
, ledger
, task_ledgers
.page_table
);
790 ledger_check_new_balance(cur_thread
, ledger
, task_ledgers
.phys_footprint
);
791 ledger_check_new_balance(cur_thread
, ledger
, task_ledgers
.phys_mem
);
792 ledger_check_new_balance(cur_thread
, ledger
, task_ledgers
.tkm_private
);
793 ledger_check_new_balance(cur_thread
, ledger
, task_ledgers
.wired_mem
);
796 #define pmap_ledger_check_balance(p) pmap_check_ledger_fields((p)->ledger)
798 #else /* XNU_MONITOR */
800 #define pmap_ledger_debit(p, e, a) ledger_debit((p)->ledger, e, a)
801 #define pmap_ledger_credit(p, e, a) ledger_credit((p)->ledger, e, a)
803 #endif /* !XNU_MONITOR */
806 /* Virtual memory region for early allocation */
807 #if (__ARM_VMSA__ == 7)
808 #define VREGION1_HIGH_WINDOW (0)
810 #define VREGION1_HIGH_WINDOW (PE_EARLY_BOOT_VA)
812 #define VREGION1_START ((VM_MAX_KERNEL_ADDRESS & CPUWINDOWS_BASE_MASK) - VREGION1_HIGH_WINDOW)
813 #define VREGION1_SIZE (trunc_page(VM_MAX_KERNEL_ADDRESS - (VREGION1_START)))
815 extern uint8_t bootstrap_pagetables
[];
817 extern unsigned int not_in_kdp
;
819 extern vm_offset_t first_avail
;
821 extern pmap_paddr_t avail_start
;
822 extern pmap_paddr_t avail_end
;
824 extern vm_offset_t virtual_space_start
; /* Next available kernel VA */
825 extern vm_offset_t virtual_space_end
; /* End of kernel address space */
826 extern vm_offset_t static_memory_end
;
828 extern const vm_map_address_t physmap_base
;
829 extern const vm_map_address_t physmap_end
;
831 extern int maxproc
, hard_maxproc
;
833 vm_address_t MARK_AS_PMAP_DATA image4_slab
= 0;
835 #if (__ARM_VMSA__ > 7)
836 /* The number of address bits one TTBR can cover. */
837 #define PGTABLE_ADDR_BITS (64ULL - T0SZ_BOOT)
840 * The bounds on our TTBRs. These are for sanity checking that
841 * an address is accessible by a TTBR before we attempt to map it.
843 #define ARM64_TTBR0_MIN_ADDR (0ULL)
844 #define ARM64_TTBR0_MAX_ADDR (0ULL + (1ULL << PGTABLE_ADDR_BITS) - 1)
845 #define ARM64_TTBR1_MIN_ADDR (0ULL - (1ULL << PGTABLE_ADDR_BITS))
846 #define ARM64_TTBR1_MAX_ADDR (~0ULL)
848 /* The level of the root of a page table. */
849 const uint64_t arm64_root_pgtable_level
= (3 - ((PGTABLE_ADDR_BITS
- 1 - ARM_PGSHIFT
) / (ARM_PGSHIFT
- TTE_SHIFT
)));
851 /* The number of entries in the root TT of a page table. */
852 const uint64_t arm64_root_pgtable_num_ttes
= (2 << ((PGTABLE_ADDR_BITS
- 1 - ARM_PGSHIFT
) % (ARM_PGSHIFT
- TTE_SHIFT
)));
854 const uint64_t arm64_root_pgtable_level
= 0;
855 const uint64_t arm64_root_pgtable_num_ttes
= 0;
858 struct pmap kernel_pmap_store MARK_AS_PMAP_DATA
;
859 SECURITY_READ_ONLY_LATE(pmap_t
) kernel_pmap
= &kernel_pmap_store
;
861 struct vm_object pmap_object_store VM_PAGE_PACKED_ALIGNED
; /* store pt pages */
862 vm_object_t pmap_object
= &pmap_object_store
;
864 static SECURITY_READ_ONLY_LATE(zone_t
) pmap_zone
; /* zone of pmap structures */
866 MARK_AS_PMAP_DATA
SIMPLE_LOCK_DECLARE(pmaps_lock
, 0);
867 MARK_AS_PMAP_DATA
SIMPLE_LOCK_DECLARE(tt1_lock
, 0);
868 unsigned int pmap_stamp MARK_AS_PMAP_DATA
;
869 queue_head_t map_pmap_list MARK_AS_PMAP_DATA
;
871 MARK_AS_PMAP_DATA
SIMPLE_LOCK_DECLARE(pt_pages_lock
, 0);
872 queue_head_t pt_page_list MARK_AS_PMAP_DATA
; /* pt page ptd entries list */
874 MARK_AS_PMAP_DATA
SIMPLE_LOCK_DECLARE(pmap_pages_lock
, 0);
876 typedef struct page_free_entry
{
877 struct page_free_entry
*next
;
880 #define PAGE_FREE_ENTRY_NULL ((page_free_entry_t *) 0)
882 page_free_entry_t
*pmap_pages_reclaim_list MARK_AS_PMAP_DATA
; /* Reclaimed pt page list */
883 unsigned int pmap_pages_request_count MARK_AS_PMAP_DATA
; /* Pending requests to reclaim pt page */
884 unsigned long long pmap_pages_request_acum MARK_AS_PMAP_DATA
;
887 typedef struct tt_free_entry
{
888 struct tt_free_entry
*next
;
891 #define TT_FREE_ENTRY_NULL ((tt_free_entry_t *) 0)
893 tt_free_entry_t
*free_page_size_tt_list MARK_AS_PMAP_DATA
;
894 unsigned int free_page_size_tt_count MARK_AS_PMAP_DATA
;
895 unsigned int free_page_size_tt_max MARK_AS_PMAP_DATA
;
896 #define FREE_PAGE_SIZE_TT_MAX 4
897 tt_free_entry_t
*free_two_page_size_tt_list MARK_AS_PMAP_DATA
;
898 unsigned int free_two_page_size_tt_count MARK_AS_PMAP_DATA
;
899 unsigned int free_two_page_size_tt_max MARK_AS_PMAP_DATA
;
900 #define FREE_TWO_PAGE_SIZE_TT_MAX 4
901 tt_free_entry_t
*free_tt_list MARK_AS_PMAP_DATA
;
902 unsigned int free_tt_count MARK_AS_PMAP_DATA
;
903 unsigned int free_tt_max MARK_AS_PMAP_DATA
;
905 #define TT_FREE_ENTRY_NULL ((tt_free_entry_t *) 0)
907 boolean_t pmap_gc_allowed MARK_AS_PMAP_DATA
= TRUE
;
908 boolean_t pmap_gc_forced MARK_AS_PMAP_DATA
= FALSE
;
909 boolean_t pmap_gc_allowed_by_time_throttle
= TRUE
;
911 unsigned int inuse_user_ttepages_count MARK_AS_PMAP_DATA
= 0; /* non-root, non-leaf user pagetable pages, in units of PAGE_SIZE */
912 unsigned int inuse_user_ptepages_count MARK_AS_PMAP_DATA
= 0; /* leaf user pagetable pages, in units of PAGE_SIZE */
913 unsigned int inuse_user_tteroot_count MARK_AS_PMAP_DATA
= 0; /* root user pagetables, in units of PMAP_ROOT_ALLOC_SIZE */
914 unsigned int inuse_kernel_ttepages_count MARK_AS_PMAP_DATA
= 0; /* non-root, non-leaf kernel pagetable pages, in units of PAGE_SIZE */
915 unsigned int inuse_kernel_ptepages_count MARK_AS_PMAP_DATA
= 0; /* leaf kernel pagetable pages, in units of PAGE_SIZE */
916 unsigned int inuse_kernel_tteroot_count MARK_AS_PMAP_DATA
= 0; /* root kernel pagetables, in units of PMAP_ROOT_ALLOC_SIZE */
917 unsigned int inuse_pmap_pages_count
= 0; /* debugging */
919 SECURITY_READ_ONLY_LATE(tt_entry_t
*) invalid_tte
= 0;
920 SECURITY_READ_ONLY_LATE(pmap_paddr_t
) invalid_ttep
= 0;
922 SECURITY_READ_ONLY_LATE(tt_entry_t
*) cpu_tte
= 0; /* set by arm_vm_init() - keep out of bss */
923 SECURITY_READ_ONLY_LATE(pmap_paddr_t
) cpu_ttep
= 0; /* set by arm_vm_init() - phys tte addr */
925 #if DEVELOPMENT || DEBUG
926 int nx_enabled
= 1; /* enable no-execute protection */
927 int allow_data_exec
= 0; /* No apps may execute data */
928 int allow_stack_exec
= 0; /* No apps may execute from the stack */
929 unsigned long pmap_asid_flushes MARK_AS_PMAP_DATA
= 0;
930 unsigned long pmap_asid_hits MARK_AS_PMAP_DATA
= 0;
931 unsigned long pmap_asid_misses MARK_AS_PMAP_DATA
= 0;
932 #else /* DEVELOPMENT || DEBUG */
933 const int nx_enabled
= 1; /* enable no-execute protection */
934 const int allow_data_exec
= 0; /* No apps may execute data */
935 const int allow_stack_exec
= 0; /* No apps may execute from the stack */
936 #endif /* DEVELOPMENT || DEBUG */
939 * This variable is set true during hibernation entry to protect pmap data structures
940 * during image copying, and reset false on hibernation exit.
942 bool hib_entry_pmap_lockdown MARK_AS_PMAP_DATA
= false;
944 /* Macro used to ensure that pmap data structures aren't modified during hibernation image copying. */
946 #define ASSERT_NOT_HIBERNATING() (assertf(!hib_entry_pmap_lockdown, \
947 "Attempted to modify PMAP data structures after hibernation image copying has begun."))
949 #define ASSERT_NOT_HIBERNATING()
950 #endif /* HIBERNATION */
952 #define PV_ENTRY_NULL ((pv_entry_t *) 0)
956 * We use the least significant bit of the "pve_next" pointer in a "pv_entry"
957 * as a marker for pages mapped through an "alternate accounting" mapping.
958 * These macros set, clear and test for this marker and extract the actual
959 * value of the "pve_next" pointer.
961 #define PVE_NEXT_ALTACCT ((uintptr_t) 0x1)
962 #define PVE_NEXT_SET_ALTACCT(pve_next_p) \
963 *(pve_next_p) = (struct pv_entry *) (((uintptr_t) *(pve_next_p)) | \
965 #define PVE_NEXT_CLR_ALTACCT(pve_next_p) \
966 *(pve_next_p) = (struct pv_entry *) (((uintptr_t) *(pve_next_p)) & \
968 #define PVE_NEXT_IS_ALTACCT(pve_next) \
969 ((((uintptr_t) (pve_next)) & PVE_NEXT_ALTACCT) ? TRUE : FALSE)
970 #define PVE_NEXT_PTR(pve_next) \
971 ((struct pv_entry *)(((uintptr_t) (pve_next)) & \
974 static void pmap_check_ledgers(pmap_t pmap
);
977 pmap_check_ledgers(__unused pmap_t pmap
)
980 #endif /* MACH_ASSERT */
982 SECURITY_READ_ONLY_LATE(pv_entry_t
* *) pv_head_table
; /* array of pv entry pointers */
984 pv_free_list_t pv_free MARK_AS_PMAP_DATA
= {0};
985 pv_free_list_t pv_kern_free MARK_AS_PMAP_DATA
= {0};
986 MARK_AS_PMAP_DATA
SIMPLE_LOCK_DECLARE(pv_free_list_lock
, 0);
987 MARK_AS_PMAP_DATA
SIMPLE_LOCK_DECLARE(pv_kern_free_list_lock
, 0);
989 SIMPLE_LOCK_DECLARE(phys_backup_lock
, 0);
992 * pt_desc - structure to keep info on page assigned to page tables
994 #if (__ARM_VMSA__ == 7)
995 #define PT_INDEX_MAX 1
996 #else /* (__ARM_VMSA__ != 7) */
998 #if __ARM_MIXED_PAGE_SIZE__
999 #define PT_INDEX_MAX (ARM_PGBYTES / 4096)
1000 #elif (ARM_PGSHIFT == 14)
1001 #define PT_INDEX_MAX 1
1002 #elif (ARM_PGSHIFT == 12)
1003 #define PT_INDEX_MAX 4
1005 #error Unsupported ARM_PGSHIFT
1006 #endif /* (ARM_PGSHIFT != 14) */
1008 #endif /* (__ARM_VMSA__ != 7) */
1010 #define PT_DESC_REFCOUNT 0x4000U
1011 #define PT_DESC_IOMMU_REFCOUNT 0x8000U
1015 * For non-leaf pagetables, should always be PT_DESC_REFCOUNT
1016 * For leaf pagetables, should reflect the number of non-empty PTEs
1017 * For IOMMU pages, should always be PT_DESC_IOMMU_REFCOUNT
1019 unsigned short refcnt
;
1021 * For non-leaf pagetables, should be 0
1022 * For leaf pagetables, should reflect the number of wired entries
1023 * For IOMMU pages, may optionally reflect a driver-defined refcount (IOMMU operations are implicitly wired)
1025 unsigned short wiredcnt
;
1029 typedef struct pt_desc
{
1030 queue_chain_t pt_page
;
1034 ptd_info_t ptd_info
[PT_INDEX_MAX
];
1038 #define PTD_ENTRY_NULL ((pt_desc_t *) 0)
1040 SECURITY_READ_ONLY_LATE(pt_desc_t
*) ptd_root_table
;
1042 pt_desc_t
*ptd_free_list MARK_AS_PMAP_DATA
= PTD_ENTRY_NULL
;
1043 SECURITY_READ_ONLY_LATE(boolean_t
) ptd_preboot
= TRUE
;
1044 unsigned int ptd_free_count MARK_AS_PMAP_DATA
= 0;
1045 decl_simple_lock_data(, ptd_free_list_lock MARK_AS_PMAP_DATA
);
1048 * physical page attribute
1050 typedef u_int16_t pp_attr_t
;
1052 #define PP_ATTR_WIMG_MASK 0x003F
1053 #define PP_ATTR_WIMG(x) ((x) & PP_ATTR_WIMG_MASK)
1055 #define PP_ATTR_REFERENCED 0x0040
1056 #define PP_ATTR_MODIFIED 0x0080
1058 #define PP_ATTR_INTERNAL 0x0100
1059 #define PP_ATTR_REUSABLE 0x0200
1060 #define PP_ATTR_ALTACCT 0x0400
1061 #define PP_ATTR_NOENCRYPT 0x0800
1063 #define PP_ATTR_REFFAULT 0x1000
1064 #define PP_ATTR_MODFAULT 0x2000
1068 * Denotes that a page is owned by the PPL. This is modified/checked with the
1069 * PVH lock held, to avoid ownership related races. This does not need to be a
1070 * PP_ATTR bit (as we have the lock), but for now this is a convenient place to
1073 #define PP_ATTR_MONITOR 0x4000
1076 * Denotes that a page *cannot* be owned by the PPL. This is required in order
1077 * to temporarily 'pin' kernel pages that are used to store PPL output parameters.
1078 * Otherwise a malicious or buggy caller could pass PPL-owned memory for these
1079 * parameters and in so doing stage a write gadget against the PPL.
1081 #define PP_ATTR_NO_MONITOR 0x8000
1084 * All of the bits owned by the PPL; kernel requests to set or clear these bits
1087 #define PP_ATTR_PPL_OWNED_BITS (PP_ATTR_MONITOR | PP_ATTR_NO_MONITOR)
1090 SECURITY_READ_ONLY_LATE(volatile pp_attr_t
*) pp_attr_table
;
1093 * The layout of this structure needs to map 1-to-1 with the pmap-io-range device
1094 * tree nodes. Astris (through the LowGlobals) also depends on the consistency
1095 * of this structure.
1097 typedef struct pmap_io_range
{
1100 #define PMAP_IO_RANGE_STRONG_SYNC (1UL << 31) // Strong DSB required for pages in this range
1101 #define PMAP_IO_RANGE_CARVEOUT (1UL << 30) // Corresponds to memory carved out by bootloader
1102 #define PMAP_IO_RANGE_NEEDS_HIBERNATING (1UL << 29) // Pages in this range need to be included in the hibernation image
1103 uint32_t wimg
; // lower 16 bits treated as pp_attr_t, upper 16 bits contain additional mapping flags
1104 uint32_t signature
; // 4CC
1105 } __attribute__((packed
)) pmap_io_range_t
;
1107 SECURITY_READ_ONLY_LATE(pmap_io_range_t
*) io_attr_table
= (pmap_io_range_t
*)0;
1109 SECURITY_READ_ONLY_LATE(pmap_paddr_t
) vm_first_phys
= (pmap_paddr_t
) 0;
1110 SECURITY_READ_ONLY_LATE(pmap_paddr_t
) vm_last_phys
= (pmap_paddr_t
) 0;
1112 SECURITY_READ_ONLY_LATE(unsigned int) num_io_rgns
= 0;
1114 SECURITY_READ_ONLY_LATE(boolean_t
) pmap_initialized
= FALSE
; /* Has pmap_init completed? */
1116 SECURITY_READ_ONLY_LATE(vm_map_offset_t
) arm_pmap_max_offset_default
= 0x0;
1117 #if defined(__arm64__)
1118 SECURITY_READ_ONLY_LATE(vm_map_offset_t
) arm64_pmap_max_offset_default
= 0x0;
1121 #if PMAP_PANIC_DEV_WIMG_ON_MANAGED && (DEVELOPMENT || DEBUG)
1122 SECURITY_READ_ONLY_LATE(boolean_t
) pmap_panic_dev_wimg_on_managed
= TRUE
;
1124 SECURITY_READ_ONLY_LATE(boolean_t
) pmap_panic_dev_wimg_on_managed
= FALSE
;
1127 MARK_AS_PMAP_DATA
SIMPLE_LOCK_DECLARE(asid_lock
, 0);
1128 SECURITY_READ_ONLY_LATE(static uint32_t) pmap_max_asids
= 0;
1129 SECURITY_READ_ONLY_LATE(int) pmap_asid_plru
= 1;
1130 SECURITY_READ_ONLY_LATE(uint16_t) asid_chunk_size
= 0;
1131 SECURITY_READ_ONLY_LATE(static bitmap_t
*) asid_bitmap
;
1132 static bitmap_t asid_plru_bitmap
[BITMAP_LEN(MAX_HW_ASIDS
)] MARK_AS_PMAP_DATA
;
1133 static uint64_t asid_plru_generation
[BITMAP_LEN(MAX_HW_ASIDS
)] MARK_AS_PMAP_DATA
= {0};
1134 static uint64_t asid_plru_gencount MARK_AS_PMAP_DATA
= 0;
1137 #if (__ARM_VMSA__ > 7)
1138 #if __ARM_MIXED_PAGE_SIZE__
1139 SECURITY_READ_ONLY_LATE(pmap_t
) sharedpage_pmap_4k
;
1141 SECURITY_READ_ONLY_LATE(pmap_t
) sharedpage_pmap_default
;
1146 * We define our target as 8 pages; enough for 2 page table pages, a PTD page,
1147 * and a PV page; in essence, twice as many pages as may be necessary to satisfy
1148 * a single pmap_enter request.
1150 #define PMAP_MIN_FREE_PPL_PAGES 8
1153 #define pa_index(pa) \
1154 (atop((pa) - vm_first_phys))
1156 #define pai_to_pvh(pai) \
1157 (&pv_head_table[pai])
1159 #define pa_valid(x) \
1160 ((x) >= vm_first_phys && (x) < vm_last_phys)
1162 /* PTE Define Macros */
1164 #define pte_is_wired(pte) \
1165 (((pte) & ARM_PTE_WIRED_MASK) == ARM_PTE_WIRED)
1167 #define pte_was_writeable(pte) \
1168 (((pte) & ARM_PTE_WRITEABLE) == ARM_PTE_WRITEABLE)
1170 #define pte_set_was_writeable(pte, was_writeable) \
1172 if ((was_writeable)) { \
1173 (pte) |= ARM_PTE_WRITEABLE; \
1175 (pte) &= ~ARM_PTE_WRITEABLE; \
1179 /* PVE Define Macros */
1181 #define pve_next(pve) \
1184 #define pve_link_field(pve) \
1187 #define pve_link(pp, e) \
1188 ((pve_next(e) = pve_next(pp)), (pve_next(pp) = (e)))
1190 #define pve_unlink(pp, e) \
1191 (pve_next(pp) = pve_next(e))
1193 /* bits held in the ptep pointer field */
1195 #define pve_get_ptep(pve) \
1198 #define pve_set_ptep(pve, ptep_new) \
1200 (pve)->pve_ptep = (ptep_new); \
1203 /* PTEP Define Macros */
1205 /* mask for page descriptor index */
1206 #define ARM_TT_PT_INDEX_MASK ARM_PGMASK
1208 #if (__ARM_VMSA__ == 7)
1211 * Shift value used for reconstructing the virtual address for a PTE.
1213 #define ARM_TT_PT_ADDR_SHIFT (10U)
1215 #define ptep_get_pmap(ptep) \
1216 ((((pt_desc_t *) (pvh_list(pai_to_pvh(pa_index(ml_static_vtop((((vm_offset_t)(ptep) & ~ARM_PGMASK))))))))->pmap))
1220 #if (ARM_PGSHIFT == 12)
1222 * Shift value used for reconstructing the virtual address for a PTE.
1224 #define ARM_TT_PT_ADDR_SHIFT (9ULL)
1228 * Shift value used for reconstructing the virtual address for a PTE.
1230 #define ARM_TT_PT_ADDR_SHIFT (11ULL)
1233 #define ptep_get_pmap(ptep) \
1234 ((((pt_desc_t *) (pvh_list(pai_to_pvh(pa_index(ml_static_vtop((((vm_offset_t)(ptep) & ~ARM_PGMASK))))))))->pmap))
1238 #define ptep_get_ptd(ptep) \
1239 ((struct pt_desc *)(pvh_list(pai_to_pvh(pa_index(ml_static_vtop((vm_offset_t)(ptep)))))))
1242 /* PVH Define Macros */
1245 #define PVH_TYPE_NULL 0x0UL
1246 #define PVH_TYPE_PVEP 0x1UL
1247 #define PVH_TYPE_PTEP 0x2UL
1248 #define PVH_TYPE_PTDP 0x3UL
1250 #define PVH_TYPE_MASK (0x3UL)
1254 /* All flags listed below are stored in the PV head pointer unless otherwise noted */
1255 #define PVH_FLAG_IOMMU 0x4UL /* Stored in each PTE, or in PV head for single-PTE PV heads */
1256 #define PVH_FLAG_IOMMU_TABLE (1ULL << 63) /* Stored in each PTE, or in PV head for single-PTE PV heads */
1257 #define PVH_FLAG_CPU (1ULL << 62)
1258 #define PVH_LOCK_BIT 61
1259 #define PVH_FLAG_LOCK (1ULL << PVH_LOCK_BIT)
1260 #define PVH_FLAG_EXEC (1ULL << 60)
1261 #define PVH_FLAG_LOCKDOWN (1ULL << 59)
1262 #define PVH_FLAG_HASHED (1ULL << 58) /* Used to mark that a page has been hashed into the hibernation image. */
1263 #define PVH_HIGH_FLAGS (PVH_FLAG_CPU | PVH_FLAG_LOCK | PVH_FLAG_EXEC | PVH_FLAG_LOCKDOWN | PVH_FLAG_HASHED)
1265 #else /* !__arm64__ */
1267 #define PVH_LOCK_BIT 31
1268 #define PVH_FLAG_LOCK (1UL << PVH_LOCK_BIT)
1269 #define PVH_HIGH_FLAGS PVH_FLAG_LOCK
1273 #define PVH_LIST_MASK (~PVH_TYPE_MASK)
1275 #define pvh_test_type(h, b) \
1276 ((*(vm_offset_t *)(h) & (PVH_TYPE_MASK)) == (b))
1278 #define pvh_ptep(h) \
1279 ((pt_entry_t *)((*(vm_offset_t *)(h) & PVH_LIST_MASK) | PVH_HIGH_FLAGS))
1281 #define pvh_list(h) \
1282 ((pv_entry_t *)((*(vm_offset_t *)(h) & PVH_LIST_MASK) | PVH_HIGH_FLAGS))
1284 #define pvh_get_flags(h) \
1285 (*(vm_offset_t *)(h) & PVH_HIGH_FLAGS)
1287 #define pvh_set_flags(h, f) \
1289 os_atomic_store((vm_offset_t *)(h), (*(vm_offset_t *)(h) & ~PVH_HIGH_FLAGS) | (f), \
1293 #define pvh_update_head(h, e, t) \
1295 assert(*(vm_offset_t *)(h) & PVH_FLAG_LOCK); \
1296 os_atomic_store((vm_offset_t *)(h), (vm_offset_t)(e) | (t) | PVH_FLAG_LOCK, \
1300 #define pvh_update_head_unlocked(h, e, t) \
1302 assert(!(*(vm_offset_t *)(h) & PVH_FLAG_LOCK)); \
1303 *(vm_offset_t *)(h) = ((vm_offset_t)(e) | (t)) & ~PVH_FLAG_LOCK; \
1306 #define pvh_add(h, e) \
1308 assert(!pvh_test_type((h), PVH_TYPE_PTEP)); \
1309 pve_next(e) = pvh_list(h); \
1310 pvh_update_head((h), (e), PVH_TYPE_PVEP); \
1313 #define pvh_remove(h, p, e) \
1315 assert(!PVE_NEXT_IS_ALTACCT(pve_next((e)))); \
1317 if (PVE_NEXT_PTR(pve_next((e))) == PV_ENTRY_NULL) { \
1318 pvh_update_head((h), PV_ENTRY_NULL, PVH_TYPE_NULL); \
1320 pvh_update_head((h), PVE_NEXT_PTR(pve_next((e))), PVH_TYPE_PVEP); \
1325 * preserve the "alternate accounting" bit \
1326 * when updating "p" (the previous entry's \
1329 boolean_t __is_altacct; \
1330 __is_altacct = PVE_NEXT_IS_ALTACCT(*(p)); \
1331 *(p) = PVE_NEXT_PTR(pve_next((e))); \
1332 if (__is_altacct) { \
1333 PVE_NEXT_SET_ALTACCT((p)); \
1335 PVE_NEXT_CLR_ALTACCT((p)); \
1341 /* PPATTR Define Macros */
1343 #define ppattr_set_bits(h, b) os_atomic_or((h), (pp_attr_t)(b), acq_rel)
1344 #define ppattr_clear_bits(h, b) os_atomic_andnot((h), (pp_attr_t)(b), acq_rel)
1346 #define ppattr_test_bits(h, b) \
1347 ((*(h) & (pp_attr_t)(b)) == (pp_attr_t)(b))
1349 #define pa_set_bits(x, b) \
1352 ppattr_set_bits(&pp_attr_table[pa_index(x)], \
1356 #define pa_test_bits(x, b) \
1357 (pa_valid(x) ? ppattr_test_bits(&pp_attr_table[pa_index(x)],\
1360 #define pa_clear_bits(x, b) \
1363 ppattr_clear_bits(&pp_attr_table[pa_index(x)], \
1367 #define pa_set_modify(x) \
1368 pa_set_bits(x, PP_ATTR_MODIFIED)
1370 #define pa_clear_modify(x) \
1371 pa_clear_bits(x, PP_ATTR_MODIFIED)
1373 #define pa_set_reference(x) \
1374 pa_set_bits(x, PP_ATTR_REFERENCED)
1376 #define pa_clear_reference(x) \
1377 pa_clear_bits(x, PP_ATTR_REFERENCED)
1380 #define pa_set_monitor(x) \
1381 pa_set_bits((x), PP_ATTR_MONITOR)
1383 #define pa_clear_monitor(x) \
1384 pa_clear_bits((x), PP_ATTR_MONITOR)
1386 #define pa_test_monitor(x) \
1387 pa_test_bits((x), PP_ATTR_MONITOR)
1389 #define pa_set_no_monitor(x) \
1390 pa_set_bits((x), PP_ATTR_NO_MONITOR)
1392 #define pa_clear_no_monitor(x) \
1393 pa_clear_bits((x), PP_ATTR_NO_MONITOR)
1395 #define pa_test_no_monitor(x) \
1396 pa_test_bits((x), PP_ATTR_NO_MONITOR)
1399 #define IS_INTERNAL_PAGE(pai) \
1400 ppattr_test_bits(&pp_attr_table[pai], PP_ATTR_INTERNAL)
1401 #define SET_INTERNAL_PAGE(pai) \
1402 ppattr_set_bits(&pp_attr_table[pai], PP_ATTR_INTERNAL)
1403 #define CLR_INTERNAL_PAGE(pai) \
1404 ppattr_clear_bits(&pp_attr_table[pai], PP_ATTR_INTERNAL)
1406 #define IS_REUSABLE_PAGE(pai) \
1407 ppattr_test_bits(&pp_attr_table[pai], PP_ATTR_REUSABLE)
1408 #define SET_REUSABLE_PAGE(pai) \
1409 ppattr_set_bits(&pp_attr_table[pai], PP_ATTR_REUSABLE)
1410 #define CLR_REUSABLE_PAGE(pai) \
1411 ppattr_clear_bits(&pp_attr_table[pai], PP_ATTR_REUSABLE)
1413 #define IS_ALTACCT_PAGE(pai, pve_p) \
1414 (((pve_p) == NULL) \
1415 ? ppattr_test_bits(&pp_attr_table[pai], PP_ATTR_ALTACCT) \
1416 : PVE_NEXT_IS_ALTACCT(pve_next((pve_p))))
1417 #define SET_ALTACCT_PAGE(pai, pve_p) \
1418 if ((pve_p) == NULL) { \
1419 ppattr_set_bits(&pp_attr_table[pai], PP_ATTR_ALTACCT); \
1421 PVE_NEXT_SET_ALTACCT(&pve_next((pve_p))); \
1423 #define CLR_ALTACCT_PAGE(pai, pve_p) \
1424 if ((pve_p) == NULL) { \
1425 ppattr_clear_bits(&pp_attr_table[pai], PP_ATTR_ALTACCT); \
1427 PVE_NEXT_CLR_ALTACCT(&pve_next((pve_p))); \
1430 #define IS_REFFAULT_PAGE(pai) \
1431 ppattr_test_bits(&pp_attr_table[pai], PP_ATTR_REFFAULT)
1432 #define SET_REFFAULT_PAGE(pai) \
1433 ppattr_set_bits(&pp_attr_table[pai], PP_ATTR_REFFAULT)
1434 #define CLR_REFFAULT_PAGE(pai) \
1435 ppattr_clear_bits(&pp_attr_table[pai], PP_ATTR_REFFAULT)
1437 #define IS_MODFAULT_PAGE(pai) \
1438 ppattr_test_bits(&pp_attr_table[pai], PP_ATTR_MODFAULT)
1439 #define SET_MODFAULT_PAGE(pai) \
1440 ppattr_set_bits(&pp_attr_table[pai], PP_ATTR_MODFAULT)
1441 #define CLR_MODFAULT_PAGE(pai) \
1442 ppattr_clear_bits(&pp_attr_table[pai], PP_ATTR_MODFAULT)
1444 #define tte_get_ptd(tte) \
1445 ((struct pt_desc *)(pvh_list(pai_to_pvh(pa_index((vm_offset_t)((tte) & ~PAGE_MASK))))))
1448 #if (__ARM_VMSA__ == 7)
1450 #define tte_index(pmap, pt_attr, addr) \
1453 #define pte_index(pmap, pt_attr, addr) \
1458 #define ttn_index(pmap, pt_attr, addr, pt_level) \
1459 (((addr) & (pt_attr)->pta_level_info[(pt_level)].index_mask) >> (pt_attr)->pta_level_info[(pt_level)].shift)
1461 #define tt0_index(pmap, pt_attr, addr) \
1462 ttn_index((pmap), (pt_attr), (addr), PMAP_TT_L0_LEVEL)
1464 #define tt1_index(pmap, pt_attr, addr) \
1465 ttn_index((pmap), (pt_attr), (addr), PMAP_TT_L1_LEVEL)
1467 #define tt2_index(pmap, pt_attr, addr) \
1468 ttn_index((pmap), (pt_attr), (addr), PMAP_TT_L2_LEVEL)
1470 #define tt3_index(pmap, pt_attr, addr) \
1471 ttn_index((pmap), (pt_attr), (addr), PMAP_TT_L3_LEVEL)
1473 #define tte_index(pmap, pt_attr, addr) \
1474 tt2_index((pmap), (pt_attr), (addr))
1476 #define pte_index(pmap, pt_attr, addr) \
1477 tt3_index((pmap), (pt_attr), (addr))
1482 static inline ptd_info_t
*
1483 ptd_get_info(pt_desc_t
*ptd
, const tt_entry_t
*ttep
)
1485 assert(ptd
->ptd_info
[0].refcnt
!= PT_DESC_IOMMU_REFCOUNT
);
1486 #if PT_INDEX_MAX == 1
1487 #pragma unused(ttep)
1488 return &ptd
->ptd_info
[0];
1490 uint64_t pmap_page_shift
= pt_attr_leaf_shift(pmap_get_pt_attr(ptd
->pmap
));
1491 vm_offset_t ttep_page
= (vm_offset_t
)ttep
>> pmap_page_shift
;
1492 unsigned int ttep_index
= ttep_page
& ((1U << (PAGE_SHIFT
- pmap_page_shift
)) - 1);
1493 assert(ttep_index
< PT_INDEX_MAX
);
1494 return &ptd
->ptd_info
[ttep_index
];
1498 static inline ptd_info_t
*
1499 ptep_get_info(const pt_entry_t
*ptep
)
1501 return ptd_get_info(ptep_get_ptd(ptep
), ptep
);
1504 static inline vm_map_address_t
1505 ptep_get_va(const pt_entry_t
*ptep
)
1508 const pt_attr_t
* const pt_attr
= pmap_get_pt_attr(ptep_get_pmap(ptep
));
1509 pv_h
= pai_to_pvh(pa_index(ml_static_vtop(((vm_offset_t
)ptep
))));;
1511 assert(pvh_test_type(pv_h
, PVH_TYPE_PTDP
));
1512 pt_desc_t
*ptdp
= (pt_desc_t
*)(pvh_list(pv_h
));
1514 vm_map_address_t va
= ptd_get_info(ptdp
, ptep
)->va
;
1515 vm_offset_t ptep_index
= ((vm_offset_t
)ptep
& pt_attr_leaf_offmask(pt_attr
)) / sizeof(*ptep
);
1517 va
+= (ptep_index
<< pt_attr_leaf_shift(pt_attr
));
1523 pte_set_wired(pmap_t pmap
, pt_entry_t
*ptep
, boolean_t wired
)
1526 *ptep
|= ARM_PTE_WIRED
;
1528 *ptep
&= ~ARM_PTE_WIRED
;
1531 * Do not track wired page count for kernel pagetable pages. Kernel mappings are
1532 * not guaranteed to have PTDs in the first place, and kernel pagetable pages are
1535 if (pmap
== kernel_pmap
) {
1538 unsigned short *ptd_wiredcnt_ptr
;
1539 ptd_wiredcnt_ptr
= &(ptep_get_info(ptep
)->wiredcnt
);
1541 os_atomic_add(ptd_wiredcnt_ptr
, (unsigned short)1, relaxed
);
1543 unsigned short prev_wired
= os_atomic_sub_orig(ptd_wiredcnt_ptr
, (unsigned short)1, relaxed
);
1544 if (__improbable(prev_wired
== 0)) {
1545 panic("pmap %p (pte %p): wired count underflow", pmap
, ptep
);
1551 * Lock on pmap system
1554 lck_grp_t pmap_lck_grp MARK_AS_PMAP_DATA
;
1557 pmap_lock_init(pmap_t pmap
)
1559 lck_rw_init(&pmap
->rwlock
, &pmap_lck_grp
, 0);
1560 pmap
->rwlock
.lck_rw_can_sleep
= FALSE
;
1564 pmap_lock_destroy(pmap_t pmap
)
1566 lck_rw_destroy(&pmap
->rwlock
, &pmap_lck_grp
);
1570 pmap_lock(pmap_t pmap
)
1573 mp_disable_preemption();
1575 lck_rw_lock_exclusive(&pmap
->rwlock
);
1579 pmap_lock_ro(pmap_t pmap
)
1582 mp_disable_preemption();
1584 lck_rw_lock_shared(&pmap
->rwlock
);
1588 pmap_unlock(pmap_t pmap
)
1590 lck_rw_unlock_exclusive(&pmap
->rwlock
);
1592 mp_enable_preemption();
1597 pmap_unlock_ro(pmap_t pmap
)
1599 lck_rw_unlock_shared(&pmap
->rwlock
);
1601 mp_enable_preemption();
1606 pmap_try_lock(pmap_t pmap
)
1611 mp_disable_preemption();
1613 ret
= lck_rw_try_lock_exclusive(&pmap
->rwlock
);
1616 mp_enable_preemption();
1623 //assert that ONLY READ lock is held
1624 __unused
static inline void
1625 pmap_assert_locked_r(__unused pmap_t pmap
)
1628 lck_rw_assert(&pmap
->rwlock
, LCK_RW_ASSERT_SHARED
);
1633 //assert that ONLY WRITE lock is held
1634 __unused
static inline void
1635 pmap_assert_locked_w(__unused pmap_t pmap
)
1638 lck_rw_assert(&pmap
->rwlock
, LCK_RW_ASSERT_EXCLUSIVE
);
1644 //assert that either READ or WRITE lock is held
1645 __unused
static inline void
1646 pmap_assert_locked_any(__unused pmap_t pmap
)
1649 lck_rw_assert(&pmap
->rwlock
, LCK_RW_ASSERT_HELD
);
1654 #if defined(__arm64__)
1655 #define PVH_LOCK_WORD 1 /* Assumes little-endian */
1657 #define PVH_LOCK_WORD 0
1660 #define ASSERT_PVH_LOCKED(index) \
1662 assert((vm_offset_t)(pv_head_table[index]) & PVH_FLAG_LOCK); \
1665 #define LOCK_PVH(index) \
1667 pmap_lock_bit((uint32_t*)(&pv_head_table[index]) + PVH_LOCK_WORD, PVH_LOCK_BIT - (PVH_LOCK_WORD * 32)); \
1670 #define UNLOCK_PVH(index) \
1672 ASSERT_PVH_LOCKED(index); \
1673 pmap_unlock_bit((uint32_t*)(&pv_head_table[index]) + PVH_LOCK_WORD, PVH_LOCK_BIT - (PVH_LOCK_WORD * 32)); \
1676 #define PMAP_UPDATE_TLBS(pmap, s, e, strong) { \
1677 pmap_get_pt_ops(pmap)->flush_tlb_region_async(s, (size_t)((e) - (s)), pmap); \
1678 pmap_sync_tlb(strong); \
1681 #define FLUSH_PTE_RANGE(spte, epte) \
1682 __builtin_arm_dmb(DMB_ISH);
1684 #define FLUSH_PTE(pte_p) \
1685 __builtin_arm_dmb(DMB_ISH);
1687 #define FLUSH_PTE_STRONG(pte_p) \
1688 __builtin_arm_dsb(DSB_ISH);
1690 #define FLUSH_PTE_RANGE_STRONG(spte, epte) \
1691 __builtin_arm_dsb(DSB_ISH);
1693 #define WRITE_PTE_FAST(pte_p, pte_entry) \
1694 __unreachable_ok_push \
1695 if (TEST_PAGE_RATIO_4) { \
1696 if (((unsigned)(pte_p)) & 0x1f) { \
1697 panic("%s: WRITE_PTE_FAST is unaligned, " \
1698 "pte_p=%p, pte_entry=%p", \
1700 pte_p, (void*)pte_entry); \
1702 if (((pte_entry) & ~ARM_PTE_COMPRESSED_MASK) == ARM_PTE_EMPTY) { \
1703 *(pte_p) = (pte_entry); \
1704 *((pte_p)+1) = (pte_entry); \
1705 *((pte_p)+2) = (pte_entry); \
1706 *((pte_p)+3) = (pte_entry); \
1708 *(pte_p) = (pte_entry); \
1709 *((pte_p)+1) = (pte_entry) | 0x1000; \
1710 *((pte_p)+2) = (pte_entry) | 0x2000; \
1711 *((pte_p)+3) = (pte_entry) | 0x3000; \
1714 *(pte_p) = (pte_entry); \
1716 __unreachable_ok_pop
1718 #define WRITE_PTE(pte_p, pte_entry) \
1719 WRITE_PTE_FAST(pte_p, pte_entry); \
1722 #define WRITE_PTE_STRONG(pte_p, pte_entry) \
1723 WRITE_PTE_FAST(pte_p, pte_entry); \
1724 FLUSH_PTE_STRONG(pte_p);
1727 * Other useful macros.
1729 #define current_pmap() \
1730 (vm_map_pmap(current_thread()->map))
1734 * PPL-related macros.
1736 #define ARRAY_ELEM_PTR_IS_VALID(_ptr_, _elem_size_, _array_begin_, _array_end_) \
1737 (((_ptr_) >= (typeof(_ptr_))_array_begin_) && \
1738 ((_ptr_) < (typeof(_ptr_))_array_end_) && \
1739 !((((void *)(_ptr_)) - ((void *)_array_begin_)) % (_elem_size_)))
1741 #define PMAP_PTR_IS_VALID(x) ARRAY_ELEM_PTR_IS_VALID(x, sizeof(struct pmap), pmap_array_begin, pmap_array_end)
1743 #define USER_PMAP_IS_VALID(x) (PMAP_PTR_IS_VALID(x) && (os_atomic_load(&(x)->ref_count, relaxed) > 0))
1745 #define VALIDATE_PMAP(x) \
1746 if (__improbable(((x) != kernel_pmap) && !USER_PMAP_IS_VALID(x))) \
1747 panic("%s: invalid pmap %p", __func__, (x));
1749 #define VALIDATE_LEDGER_PTR(x) \
1750 if (__improbable(!ARRAY_ELEM_PTR_IS_VALID(x, sizeof(void *), pmap_ledger_ptr_array_begin, pmap_ledger_ptr_array_end))) \
1751 panic("%s: invalid ledger ptr %p", __func__, (x));
1753 #define ARRAY_ELEM_INDEX(x, _elem_size_, _array_begin_) ((uint64_t)((((void *)(x)) - (_array_begin_)) / (_elem_size_)))
1756 pmap_ledger_validate(void * ledger
)
1758 uint64_t array_index
;
1759 pmap_ledger_t
** ledger_ptr_array_ptr
= ((pmap_ledger_t
*)ledger
)->back_ptr
;
1760 VALIDATE_LEDGER_PTR(ledger_ptr_array_ptr
);
1761 array_index
= ARRAY_ELEM_INDEX(ledger_ptr_array_ptr
, sizeof(pmap_ledger_t
*), pmap_ledger_ptr_array_begin
);
1763 if (array_index
>= MAX_PMAP_LEDGERS
) {
1764 panic("%s: ledger %p array index invalid, index was %#llx", __func__
, ledger
, array_index
);
1767 pmap_ledger_t
*ledger_ptr
= *ledger_ptr_array_ptr
;
1769 if (__improbable(ledger_ptr
!= ledger
)) {
1770 panic("%s: ledger pointer mismatch, %p != %p", __func__
, ledger
, ledger_ptr
);
1776 #else /* XNU_MONITOR */
1778 #define VALIDATE_PMAP(x) assert((x) != NULL);
1780 #endif /* XNU_MONITOR */
1782 #if DEVELOPMENT || DEBUG
1785 * Trace levels are controlled by a bitmask in which each
1786 * level can be enabled/disabled by the (1<<level) position
1788 * Level 1: pmap lifecycle (create/destroy/switch)
1789 * Level 2: mapping lifecycle (enter/remove/protect/nest/unnest)
1790 * Level 3: internal state management (attributes/fast-fault)
1791 * Level 4-7: TTE traces for paging levels 0-3. TTBs are traced at level 4.
1794 SECURITY_READ_ONLY_LATE(unsigned int) pmap_trace_mask
= 0;
1796 #define PMAP_TRACE(level, ...) \
1797 if (__improbable((1 << (level)) & pmap_trace_mask)) { \
1798 KDBG_RELEASE(__VA_ARGS__); \
1800 #else /* DEVELOPMENT || DEBUG */
1802 #define PMAP_TRACE(level, ...)
1804 #endif /* DEVELOPMENT || DEBUG */
1808 * Internal function prototypes (forward declarations).
1815 } pv_alloc_return_t
;
1817 static pv_alloc_return_t
pv_alloc(
1820 pv_entry_t
**pvepp
);
1822 static void ptd_bootstrap(
1823 pt_desc_t
*ptdp
, unsigned int ptd_cnt
);
1825 static inline pt_desc_t
*ptd_alloc_unlinked(void);
1827 static pt_desc_t
*ptd_alloc(pmap_t pmap
);
1829 static void ptd_deallocate(pt_desc_t
*ptdp
);
1831 static void ptd_init(
1832 pt_desc_t
*ptdp
, pmap_t pmap
, vm_map_address_t va
, unsigned int ttlevel
, pt_entry_t
* pte_p
);
1834 static void pmap_set_reference(
1837 pmap_paddr_t
pmap_vtophys(
1838 pmap_t pmap
, addr64_t va
);
1840 void pmap_switch_user_ttb(
1843 static kern_return_t
pmap_expand(
1844 pmap_t
, vm_map_address_t
, unsigned int options
, unsigned int level
);
1846 static int pmap_remove_range(
1847 pmap_t
, vm_map_address_t
, pt_entry_t
*, pt_entry_t
*, uint32_t *);
1849 static int pmap_remove_range_options(
1850 pmap_t
, vm_map_address_t
, pt_entry_t
*, pt_entry_t
*, uint32_t *, bool *, int);
1852 static tt_entry_t
*pmap_tt1_allocate(
1853 pmap_t
, vm_size_t
, unsigned int);
1855 #define PMAP_TT_ALLOCATE_NOWAIT 0x1
1857 static void pmap_tt1_deallocate(
1858 pmap_t
, tt_entry_t
*, vm_size_t
, unsigned int);
1860 #define PMAP_TT_DEALLOCATE_NOBLOCK 0x1
1862 static kern_return_t
pmap_tt_allocate(
1863 pmap_t
, tt_entry_t
**, unsigned int, unsigned int);
1865 #define PMAP_TT_ALLOCATE_NOWAIT 0x1
1867 static void pmap_tte_deallocate(
1868 pmap_t
, tt_entry_t
*, unsigned int);
1870 const unsigned int arm_hardware_page_size
= ARM_PGBYTES
;
1871 const unsigned int arm_pt_desc_size
= sizeof(pt_desc_t
);
1872 const unsigned int arm_pt_root_size
= PMAP_ROOT_ALLOC_SIZE
;
1874 #define PMAP_TT_DEALLOCATE_NOBLOCK 0x1
1876 #if (__ARM_VMSA__ > 7)
1878 static inline tt_entry_t
*pmap_tt1e(
1879 pmap_t
, vm_map_address_t
);
1881 static inline tt_entry_t
*pmap_tt2e(
1882 pmap_t
, vm_map_address_t
);
1884 static inline pt_entry_t
*pmap_tt3e(
1885 pmap_t
, vm_map_address_t
);
1887 static inline pt_entry_t
*pmap_ttne(
1888 pmap_t
, unsigned int, vm_map_address_t
);
1890 static void pmap_unmap_sharedpage(
1894 pmap_is_64bit(pmap_t
);
1897 #endif /* (__ARM_VMSA__ > 7) */
1899 static inline tt_entry_t
*pmap_tte(
1900 pmap_t
, vm_map_address_t
);
1902 static inline pt_entry_t
*pmap_pte(
1903 pmap_t
, vm_map_address_t
);
1905 static void pmap_update_cache_attributes_locked(
1908 static boolean_t
arm_clear_fast_fault(
1910 vm_prot_t fault_type
);
1912 static pmap_paddr_t
pmap_pages_reclaim(
1915 static kern_return_t
pmap_pages_alloc_zeroed(
1920 #define PMAP_PAGES_ALLOCATE_NOWAIT 0x1
1921 #define PMAP_PAGES_RECLAIM_NOWAIT 0x2
1923 static void pmap_pages_free(
1927 static void pmap_pin_kernel_pages(vm_offset_t kva
, size_t nbytes
);
1929 static void pmap_unpin_kernel_pages(vm_offset_t kva
, size_t nbytes
);
1931 static void pmap_trim_self(pmap_t pmap
);
1932 static void pmap_trim_subord(pmap_t subord
);
1934 #if __APRR_SUPPORTED__
1935 static uint64_t pte_to_xprr_perm(pt_entry_t pte
);
1936 static pt_entry_t
xprr_perm_to_pte(uint64_t perm
);
1937 #endif /* __APRR_SUPPORTED__*/
1940 * Temporary prototypes, while we wait for pmap_enter to move to taking an
1941 * address instead of a page number.
1943 static kern_return_t
1949 vm_prot_t fault_type
,
1954 pmap_enter_options_addr(
1959 vm_prot_t fault_type
,
1962 unsigned int options
,
1963 __unused
void *arg
);
1965 #ifdef CONFIG_XNUPOST
1966 kern_return_t
pmap_test(void);
1967 #endif /* CONFIG_XNUPOST */
1970 static pmap_paddr_t
pmap_alloc_page_for_kern(unsigned int options
);
1971 static void pmap_alloc_page_for_ppl(unsigned int options
);
1975 * This macro generates prototypes for the *_internal functions, which
1976 * represent the PPL interface. When the PPL is enabled, this will also
1977 * generate prototypes for the PPL entrypoints (*_ppl), as well as generating
1980 #define GEN_ASM_NAME(__function_name) _##__function_name##_ppl
1982 #define PMAP_SUPPORT_PROTOTYPES_WITH_ASM_INTERNAL(__return_type, __function_name, __function_args, __function_index, __assembly_function_name) \
1983 static __return_type __function_name##_internal __function_args; \
1984 extern __return_type __function_name##_ppl __function_args; \
1985 __asm__ (".text \n" \
1987 ".globl " #__assembly_function_name "\n" \
1988 #__assembly_function_name ":\n" \
1989 "mov x15, " #__function_index "\n" \
1990 "b _aprr_ppl_enter\n")
1992 #define PMAP_SUPPORT_PROTOTYPES_WITH_ASM(__return_type, __function_name, __function_args, __function_index, __assembly_function_name) \
1993 PMAP_SUPPORT_PROTOTYPES_WITH_ASM_INTERNAL(__return_type, __function_name, __function_args, __function_index, __assembly_function_name)
1995 #define PMAP_SUPPORT_PROTOTYPES(__return_type, __function_name, __function_args, __function_index) \
1996 PMAP_SUPPORT_PROTOTYPES_WITH_ASM(__return_type, __function_name, __function_args, __function_index, GEN_ASM_NAME(__function_name))
1997 #else /* XNU_MONITOR */
1998 #define PMAP_SUPPORT_PROTOTYPES(__return_type, __function_name, __function_args, __function_index) \
1999 static __return_type __function_name##_internal __function_args
2000 #endif /* XNU_MONITOR */
2002 PMAP_SUPPORT_PROTOTYPES(
2004 arm_fast_fault
, (pmap_t pmap
,
2005 vm_map_address_t va
,
2006 vm_prot_t fault_type
,
2008 bool from_user
), ARM_FAST_FAULT_INDEX
);
2011 PMAP_SUPPORT_PROTOTYPES(
2013 arm_force_fast_fault
, (ppnum_t ppnum
,
2014 vm_prot_t allow_mode
,
2015 int options
), ARM_FORCE_FAST_FAULT_INDEX
);
2017 MARK_AS_PMAP_TEXT
static boolean_t
2018 arm_force_fast_fault_with_flush_range(
2020 vm_prot_t allow_mode
,
2022 pmap_tlb_flush_range_t
*flush_range
);
2024 PMAP_SUPPORT_PROTOTYPES(
2026 mapping_free_prime
, (void), MAPPING_FREE_PRIME_INDEX
);
2028 PMAP_SUPPORT_PROTOTYPES(
2030 pmap_batch_set_cache_attributes
, (ppnum_t pn
,
2031 unsigned int cacheattr
,
2032 unsigned int page_cnt
,
2033 unsigned int page_index
,
2035 unsigned int *res
), PMAP_BATCH_SET_CACHE_ATTRIBUTES_INDEX
);
2037 PMAP_SUPPORT_PROTOTYPES(
2039 pmap_change_wiring
, (pmap_t pmap
,
2041 boolean_t wired
), PMAP_CHANGE_WIRING_INDEX
);
2043 PMAP_SUPPORT_PROTOTYPES(
2045 pmap_create_options
, (ledger_t ledger
,
2048 kern_return_t
* kr
), PMAP_CREATE_INDEX
);
2050 PMAP_SUPPORT_PROTOTYPES(
2052 pmap_destroy
, (pmap_t pmap
), PMAP_DESTROY_INDEX
);
2054 PMAP_SUPPORT_PROTOTYPES(
2056 pmap_enter_options
, (pmap_t pmap
,
2060 vm_prot_t fault_type
,
2063 unsigned int options
), PMAP_ENTER_OPTIONS_INDEX
);
2065 PMAP_SUPPORT_PROTOTYPES(
2067 pmap_find_pa
, (pmap_t pmap
,
2068 addr64_t va
), PMAP_FIND_PA_INDEX
);
2070 #if (__ARM_VMSA__ > 7)
2071 PMAP_SUPPORT_PROTOTYPES(
2073 pmap_insert_sharedpage
, (pmap_t pmap
), PMAP_INSERT_SHAREDPAGE_INDEX
);
2077 PMAP_SUPPORT_PROTOTYPES(
2079 pmap_is_empty
, (pmap_t pmap
,
2080 vm_map_offset_t va_start
,
2081 vm_map_offset_t va_end
), PMAP_IS_EMPTY_INDEX
);
2084 PMAP_SUPPORT_PROTOTYPES(
2086 pmap_map_cpu_windows_copy
, (ppnum_t pn
,
2088 unsigned int wimg_bits
), PMAP_MAP_CPU_WINDOWS_COPY_INDEX
);
2090 PMAP_SUPPORT_PROTOTYPES(
2092 pmap_nest
, (pmap_t grand
,
2095 uint64_t size
), PMAP_NEST_INDEX
);
2097 PMAP_SUPPORT_PROTOTYPES(
2099 pmap_page_protect_options
, (ppnum_t ppnum
,
2101 unsigned int options
), PMAP_PAGE_PROTECT_OPTIONS_INDEX
);
2103 PMAP_SUPPORT_PROTOTYPES(
2105 pmap_protect_options
, (pmap_t pmap
,
2106 vm_map_address_t start
,
2107 vm_map_address_t end
,
2109 unsigned int options
,
2110 void *args
), PMAP_PROTECT_OPTIONS_INDEX
);
2112 PMAP_SUPPORT_PROTOTYPES(
2114 pmap_query_page_info
, (pmap_t pmap
,
2116 int *disp_p
), PMAP_QUERY_PAGE_INFO_INDEX
);
2118 PMAP_SUPPORT_PROTOTYPES(
2120 pmap_query_resident
, (pmap_t pmap
,
2121 vm_map_address_t start
,
2122 vm_map_address_t end
,
2123 mach_vm_size_t
* compressed_bytes_p
), PMAP_QUERY_RESIDENT_INDEX
);
2125 PMAP_SUPPORT_PROTOTYPES(
2127 pmap_reference
, (pmap_t pmap
), PMAP_REFERENCE_INDEX
);
2129 PMAP_SUPPORT_PROTOTYPES(
2131 pmap_remove_options
, (pmap_t pmap
,
2132 vm_map_address_t start
,
2133 vm_map_address_t end
,
2134 int options
), PMAP_REMOVE_OPTIONS_INDEX
);
2136 PMAP_SUPPORT_PROTOTYPES(
2138 pmap_return
, (boolean_t do_panic
,
2139 boolean_t do_recurse
), PMAP_RETURN_INDEX
);
2141 PMAP_SUPPORT_PROTOTYPES(
2143 pmap_set_cache_attributes
, (ppnum_t pn
,
2144 unsigned int cacheattr
), PMAP_SET_CACHE_ATTRIBUTES_INDEX
);
2146 PMAP_SUPPORT_PROTOTYPES(
2148 pmap_update_compressor_page
, (ppnum_t pn
,
2149 unsigned int prev_cacheattr
, unsigned int new_cacheattr
), PMAP_UPDATE_COMPRESSOR_PAGE_INDEX
);
2151 PMAP_SUPPORT_PROTOTYPES(
2153 pmap_set_nested
, (pmap_t pmap
), PMAP_SET_NESTED_INDEX
);
2155 #if MACH_ASSERT || XNU_MONITOR
2156 PMAP_SUPPORT_PROTOTYPES(
2158 pmap_set_process
, (pmap_t pmap
,
2160 char *procname
), PMAP_SET_PROCESS_INDEX
);
2163 PMAP_SUPPORT_PROTOTYPES(
2165 pmap_unmap_cpu_windows_copy
, (unsigned int index
), PMAP_UNMAP_CPU_WINDOWS_COPY_INDEX
);
2167 PMAP_SUPPORT_PROTOTYPES(
2169 pmap_unnest_options
, (pmap_t grand
,
2172 unsigned int option
), PMAP_UNNEST_OPTIONS_INDEX
);
2175 PMAP_SUPPORT_PROTOTYPES(
2177 pmap_cpu_data_init
, (unsigned int cpu_number
), PMAP_CPU_DATA_INIT_INDEX
);
2180 PMAP_SUPPORT_PROTOTYPES(
2182 phys_attribute_set
, (ppnum_t pn
,
2183 unsigned int bits
), PHYS_ATTRIBUTE_SET_INDEX
);
2186 PMAP_SUPPORT_PROTOTYPES(
2188 pmap_mark_page_as_ppl_page
, (pmap_paddr_t pa
, bool initially_free
), PMAP_MARK_PAGE_AS_PMAP_PAGE_INDEX
);
2191 PMAP_SUPPORT_PROTOTYPES(
2193 phys_attribute_clear
, (ppnum_t pn
,
2196 void *arg
), PHYS_ATTRIBUTE_CLEAR_INDEX
);
2198 #if __ARM_RANGE_TLBI__
2199 PMAP_SUPPORT_PROTOTYPES(
2201 phys_attribute_clear_range
, (pmap_t pmap
,
2202 vm_map_address_t start
,
2203 vm_map_address_t end
,
2205 unsigned int options
), PHYS_ATTRIBUTE_CLEAR_RANGE_INDEX
);
2206 #endif /* __ARM_RANGE_TLBI__ */
2209 PMAP_SUPPORT_PROTOTYPES(
2211 pmap_switch
, (pmap_t pmap
), PMAP_SWITCH_INDEX
);
2213 PMAP_SUPPORT_PROTOTYPES(
2215 pmap_switch_user_ttb
, (pmap_t pmap
), PMAP_SWITCH_USER_TTB_INDEX
);
2217 PMAP_SUPPORT_PROTOTYPES(
2219 pmap_clear_user_ttb
, (void), PMAP_CLEAR_USER_TTB_INDEX
);
2222 PMAP_SUPPORT_PROTOTYPES(
2224 pmap_release_ppl_pages_to_kernel
, (void), PMAP_RELEASE_PAGES_TO_KERNEL_INDEX
);
2227 PMAP_SUPPORT_PROTOTYPES(
2229 pmap_set_vm_map_cs_enforced
, (pmap_t pmap
, bool new_value
), PMAP_SET_VM_MAP_CS_ENFORCED_INDEX
);
2231 PMAP_SUPPORT_PROTOTYPES(
2233 pmap_set_jit_entitled
, (pmap_t pmap
), PMAP_SET_JIT_ENTITLED_INDEX
);
2235 #if __has_feature(ptrauth_calls) && defined(XNU_TARGET_OS_OSX)
2236 PMAP_SUPPORT_PROTOTYPES(
2238 pmap_disable_user_jop
, (pmap_t pmap
), PMAP_DISABLE_USER_JOP_INDEX
);
2239 #endif /* __has_feature(ptrauth_calls) && defined(XNU_TARGET_OS_OSX) */
2241 PMAP_SUPPORT_PROTOTYPES(
2243 pmap_trim
, (pmap_t grand
,
2246 uint64_t size
), PMAP_TRIM_INDEX
);
2248 #if HAS_APPLE_PAC && XNU_MONITOR
2249 PMAP_SUPPORT_PROTOTYPES(
2251 pmap_sign_user_ptr
, (void *value
, ptrauth_key key
, uint64_t discriminator
, uint64_t jop_key
), PMAP_SIGN_USER_PTR
);
2252 PMAP_SUPPORT_PROTOTYPES(
2254 pmap_auth_user_ptr
, (void *value
, ptrauth_key key
, uint64_t discriminator
, uint64_t jop_key
), PMAP_AUTH_USER_PTR
);
2255 #endif /* HAS_APPLE_PAC && XNU_MONITOR */
2261 static void pmap_mark_page_as_ppl_page(pmap_paddr_t pa
);
2264 void pmap_footprint_suspend(vm_map_t map
,
2266 PMAP_SUPPORT_PROTOTYPES(
2268 pmap_footprint_suspend
, (vm_map_t map
,
2270 PMAP_FOOTPRINT_SUSPEND_INDEX
);
2273 PMAP_SUPPORT_PROTOTYPES(
2275 pmap_ledger_alloc_init
, (size_t),
2276 PMAP_LEDGER_ALLOC_INIT_INDEX
);
2278 PMAP_SUPPORT_PROTOTYPES(
2280 pmap_ledger_alloc
, (void),
2281 PMAP_LEDGER_ALLOC_INDEX
);
2283 PMAP_SUPPORT_PROTOTYPES(
2285 pmap_ledger_free
, (ledger_t
),
2286 PMAP_LEDGER_FREE_INDEX
);
2293 boolean_t pgtrace_enabled
= 0;
2296 queue_chain_t chain
;
2299 * pmap - pmap for below addresses
2300 * ova - original va page address
2301 * cva - clone va addresses for pre, target and post pages
2302 * cva_spte - clone saved ptes
2303 * range - trace range in this map
2304 * cloned - has been cloned or not
2307 vm_map_offset_t ova
;
2308 vm_map_offset_t cva
[3];
2309 pt_entry_t cva_spte
[3];
2315 } pmap_pgtrace_map_t
;
2317 static void pmap_pgtrace_init(void);
2318 static bool pmap_pgtrace_enter_clone(pmap_t pmap
, vm_map_offset_t va_page
, vm_map_offset_t start
, vm_map_offset_t end
);
2319 static void pmap_pgtrace_remove_clone(pmap_t pmap
, pmap_paddr_t pa_page
, vm_map_offset_t va_page
);
2320 static void pmap_pgtrace_remove_all_clone(pmap_paddr_t pa
);
2323 #if (__ARM_VMSA__ > 7)
2325 * The low global vector page is mapped at a fixed alias.
2326 * Since the page size is 16k for H8 and newer we map the globals to a 16k
2327 * aligned address. Readers of the globals (e.g. lldb, panic server) need
2328 * to check both addresses anyway for backward compatibility. So for now
2329 * we leave H6 and H7 where they were.
2331 #if (ARM_PGSHIFT == 14)
2332 #define LOWGLOBAL_ALIAS (LOW_GLOBAL_BASE_ADDRESS + 0x4000)
2334 #define LOWGLOBAL_ALIAS (LOW_GLOBAL_BASE_ADDRESS + 0x2000)
2338 #define LOWGLOBAL_ALIAS (0xFFFF1000)
2341 long long alloc_tteroot_count
__attribute__((aligned(8))) MARK_AS_PMAP_DATA
= 0LL;
2342 long long alloc_ttepages_count
__attribute__((aligned(8))) MARK_AS_PMAP_DATA
= 0LL;
2343 long long alloc_ptepages_count
__attribute__((aligned(8))) MARK_AS_PMAP_DATA
= 0LL;
2344 long long alloc_pmap_pages_count
__attribute__((aligned(8))) = 0LL;
2348 #if __has_feature(ptrauth_calls)
2349 #define __ptrauth_ppl_handler __ptrauth(ptrauth_key_function_pointer, true, 0)
2351 #define __ptrauth_ppl_handler
2355 * Table of function pointers used for PPL dispatch.
2357 const void * __ptrauth_ppl_handler
const ppl_handler_table
[PMAP_COUNT
] = {
2358 [ARM_FAST_FAULT_INDEX
] = arm_fast_fault_internal
,
2359 [ARM_FORCE_FAST_FAULT_INDEX
] = arm_force_fast_fault_internal
,
2360 [MAPPING_FREE_PRIME_INDEX
] = mapping_free_prime_internal
,
2361 [PHYS_ATTRIBUTE_CLEAR_INDEX
] = phys_attribute_clear_internal
,
2362 [PHYS_ATTRIBUTE_SET_INDEX
] = phys_attribute_set_internal
,
2363 [PMAP_BATCH_SET_CACHE_ATTRIBUTES_INDEX
] = pmap_batch_set_cache_attributes_internal
,
2364 [PMAP_CHANGE_WIRING_INDEX
] = pmap_change_wiring_internal
,
2365 [PMAP_CREATE_INDEX
] = pmap_create_options_internal
,
2366 [PMAP_DESTROY_INDEX
] = pmap_destroy_internal
,
2367 [PMAP_ENTER_OPTIONS_INDEX
] = pmap_enter_options_internal
,
2368 [PMAP_FIND_PA_INDEX
] = pmap_find_pa_internal
,
2369 [PMAP_INSERT_SHAREDPAGE_INDEX
] = pmap_insert_sharedpage_internal
,
2370 [PMAP_IS_EMPTY_INDEX
] = pmap_is_empty_internal
,
2371 [PMAP_MAP_CPU_WINDOWS_COPY_INDEX
] = pmap_map_cpu_windows_copy_internal
,
2372 [PMAP_MARK_PAGE_AS_PMAP_PAGE_INDEX
] = pmap_mark_page_as_ppl_page_internal
,
2373 [PMAP_NEST_INDEX
] = pmap_nest_internal
,
2374 [PMAP_PAGE_PROTECT_OPTIONS_INDEX
] = pmap_page_protect_options_internal
,
2375 [PMAP_PROTECT_OPTIONS_INDEX
] = pmap_protect_options_internal
,
2376 [PMAP_QUERY_PAGE_INFO_INDEX
] = pmap_query_page_info_internal
,
2377 [PMAP_QUERY_RESIDENT_INDEX
] = pmap_query_resident_internal
,
2378 [PMAP_REFERENCE_INDEX
] = pmap_reference_internal
,
2379 [PMAP_REMOVE_OPTIONS_INDEX
] = pmap_remove_options_internal
,
2380 [PMAP_RETURN_INDEX
] = pmap_return_internal
,
2381 [PMAP_SET_CACHE_ATTRIBUTES_INDEX
] = pmap_set_cache_attributes_internal
,
2382 [PMAP_UPDATE_COMPRESSOR_PAGE_INDEX
] = pmap_update_compressor_page_internal
,
2383 [PMAP_SET_NESTED_INDEX
] = pmap_set_nested_internal
,
2384 [PMAP_SET_PROCESS_INDEX
] = pmap_set_process_internal
,
2385 [PMAP_SWITCH_INDEX
] = pmap_switch_internal
,
2386 [PMAP_SWITCH_USER_TTB_INDEX
] = pmap_switch_user_ttb_internal
,
2387 [PMAP_CLEAR_USER_TTB_INDEX
] = pmap_clear_user_ttb_internal
,
2388 [PMAP_UNMAP_CPU_WINDOWS_COPY_INDEX
] = pmap_unmap_cpu_windows_copy_internal
,
2389 [PMAP_UNNEST_OPTIONS_INDEX
] = pmap_unnest_options_internal
,
2390 [PMAP_FOOTPRINT_SUSPEND_INDEX
] = pmap_footprint_suspend_internal
,
2391 [PMAP_CPU_DATA_INIT_INDEX
] = pmap_cpu_data_init_internal
,
2392 [PMAP_RELEASE_PAGES_TO_KERNEL_INDEX
] = pmap_release_ppl_pages_to_kernel_internal
,
2393 [PMAP_SET_VM_MAP_CS_ENFORCED_INDEX
] = pmap_set_vm_map_cs_enforced_internal
,
2394 [PMAP_SET_JIT_ENTITLED_INDEX
] = pmap_set_jit_entitled_internal
,
2395 [PMAP_TRIM_INDEX
] = pmap_trim_internal
,
2396 [PMAP_LEDGER_ALLOC_INIT_INDEX
] = pmap_ledger_alloc_init_internal
,
2397 [PMAP_LEDGER_ALLOC_INDEX
] = pmap_ledger_alloc_internal
,
2398 [PMAP_LEDGER_FREE_INDEX
] = pmap_ledger_free_internal
,
2399 #if HAS_APPLE_PAC && XNU_MONITOR
2400 [PMAP_SIGN_USER_PTR
] = pmap_sign_user_ptr_internal
,
2401 [PMAP_AUTH_USER_PTR
] = pmap_auth_user_ptr_internal
,
2402 #endif /* HAS_APPLE_PAC && XNU_MONITOR */
2403 #if __ARM_RANGE_TLBI__
2404 [PHYS_ATTRIBUTE_CLEAR_RANGE_INDEX
] = phys_attribute_clear_range_internal
,
2405 #endif /* __ARM_RANGE_TLBI__ */
2406 #if __has_feature(ptrauth_calls) && defined(XNU_TARGET_OS_OSX)
2407 [PMAP_DISABLE_USER_JOP_INDEX
] = pmap_disable_user_jop_internal
,
2408 #endif /* __has_feature(ptrauth_calls) && defined(XNU_TARGET_OS_OSX) */
2414 * Allocates and initializes a per-CPU data structure for the pmap.
2416 MARK_AS_PMAP_TEXT
static void
2417 pmap_cpu_data_init_internal(unsigned int cpu_number
)
2419 pmap_cpu_data_t
* pmap_cpu_data
= pmap_get_cpu_data();
2422 /* Verify cacheline-aligned */
2423 assert(((vm_offset_t
)pmap_cpu_data
& ((1 << MAX_L2_CLINE
) - 1)) == 0);
2424 if (pmap_cpu_data
->cpu_number
!= PMAP_INVALID_CPU_NUM
) {
2425 panic("%s: pmap_cpu_data->cpu_number=%u, "
2427 __FUNCTION__
, pmap_cpu_data
->cpu_number
,
2431 pmap_cpu_data
->cpu_number
= cpu_number
;
2435 pmap_cpu_data_init(void)
2438 pmap_cpu_data_init_ppl(cpu_number());
2440 pmap_cpu_data_init_internal(cpu_number());
2445 pmap_cpu_data_array_init(void)
2449 pmap_paddr_t ppl_cpu_save_area_cur
= 0;
2450 pt_entry_t
template, *pte_p
;
2451 vm_offset_t stack_va
= (vm_offset_t
)pmap_stacks_start
+ ARM_PGBYTES
;
2452 assert((pmap_stacks_start
!= NULL
) && (pmap_stacks_end
!= NULL
));
2453 pmap_stacks_start_pa
= avail_start
;
2455 for (i
= 0; i
< MAX_CPUS
; i
++) {
2456 for (vm_offset_t cur_va
= stack_va
; cur_va
< (stack_va
+ PPL_STACK_SIZE
); cur_va
+= ARM_PGBYTES
) {
2457 assert(cur_va
< (vm_offset_t
)pmap_stacks_end
);
2458 pte_p
= pmap_pte(kernel_pmap
, cur_va
);
2459 assert(*pte_p
== ARM_PTE_EMPTY
);
2460 template = pa_to_pte(avail_start
) | ARM_PTE_AF
| ARM_PTE_SH(SH_OUTER_MEMORY
) | ARM_PTE_TYPE
|
2461 ARM_PTE_ATTRINDX(CACHE_ATTRINDX_DEFAULT
) | xprr_perm_to_pte(XPRR_PPL_RW_PERM
);
2462 #if __ARM_KERNEL_PROTECT__
2463 template |= ARM_PTE_NG
;
2464 #endif /* __ARM_KERNEL_PROTECT__ */
2465 WRITE_PTE(pte_p
, template);
2466 __builtin_arm_isb(ISB_SY
);
2467 avail_start
+= ARM_PGBYTES
;
2470 kasan_map_shadow(stack_va
, PPL_STACK_SIZE
, false);
2472 pmap_cpu_data_array
[i
].cpu_data
.cpu_number
= PMAP_INVALID_CPU_NUM
;
2473 pmap_cpu_data_array
[i
].cpu_data
.ppl_state
= PPL_STATE_KERNEL
;
2474 pmap_cpu_data_array
[i
].cpu_data
.ppl_stack
= (void*)(stack_va
+ PPL_STACK_SIZE
);
2475 stack_va
+= (PPL_STACK_SIZE
+ ARM_PGBYTES
);
2478 pmap_stacks_end_pa
= avail_start
;
2480 ppl_cpu_save_area_start
= avail_start
;
2481 ppl_cpu_save_area_end
= ppl_cpu_save_area_start
;
2482 ppl_cpu_save_area_cur
= ppl_cpu_save_area_start
;
2484 for (i
= 0; i
< MAX_CPUS
; i
++) {
2485 while ((ppl_cpu_save_area_end
- ppl_cpu_save_area_cur
) < sizeof(arm_context_t
)) {
2486 avail_start
+= PAGE_SIZE
;
2487 ppl_cpu_save_area_end
= avail_start
;
2490 pmap_cpu_data_array
[i
].cpu_data
.save_area
= (arm_context_t
*)phystokv(ppl_cpu_save_area_cur
);
2491 ppl_cpu_save_area_cur
+= sizeof(arm_context_t
);
2495 pmap_cpu_data_init();
2499 pmap_get_cpu_data(void)
2501 pmap_cpu_data_t
* pmap_cpu_data
= NULL
;
2504 extern pmap_cpu_data_t
* ml_get_ppl_cpu_data(void);
2505 pmap_cpu_data
= ml_get_ppl_cpu_data();
2507 pmap_cpu_data
= &getCpuDatap()->cpu_pmap_cpu_data
;
2510 return pmap_cpu_data
;
2515 * pmap_set_range_xprr_perm takes a range (specified using start and end) that
2516 * falls within the physical aperture. All mappings within this range have
2517 * their protections changed from those specified by the expected_perm to those
2518 * specified by the new_perm.
2521 pmap_set_range_xprr_perm(vm_address_t start
,
2523 unsigned int expected_perm
,
2524 unsigned int new_perm
)
2526 #if (__ARM_VMSA__ == 7)
2527 #error This function is not supported on older ARM hardware
2531 vm_address_t va
= 0;
2532 vm_address_t tte_start
= 0;
2533 vm_address_t tte_end
= 0;
2535 tt_entry_t
*tte_p
= NULL
;
2536 pt_entry_t
*pte_p
= NULL
;
2537 pt_entry_t
*cpte_p
= NULL
;
2538 pt_entry_t
*bpte_p
= NULL
;
2539 pt_entry_t
*epte_p
= NULL
;
2542 pt_entry_t cpte
= 0;
2543 pt_entry_t
template = 0;
2550 * Validate our arguments; any invalid argument will be grounds for a
2553 if ((start
| end
) % ARM_PGBYTES
) {
2554 panic("%s: start or end not page aligned, "
2555 "start=%p, end=%p, new_perm=%u, expected_perm=%u",
2557 (void *)start
, (void *)end
, new_perm
, expected_perm
);
2561 panic("%s: start > end, "
2562 "start=%p, end=%p, new_perm=%u, expected_perm=%u",
2564 (void *)start
, (void *)end
, new_perm
, expected_perm
);
2567 bool in_physmap
= (start
>= physmap_base
) && (end
< physmap_end
);
2568 bool in_static
= (start
>= gVirtBase
) && (end
< static_memory_end
);
2570 if (!(in_physmap
|| in_static
)) {
2571 panic("%s: address not in static region or physical aperture, "
2572 "start=%p, end=%p, new_perm=%u, expected_perm=%u",
2574 (void *)start
, (void *)end
, new_perm
, expected_perm
);
2577 if ((new_perm
> XPRR_MAX_PERM
) || (expected_perm
> XPRR_MAX_PERM
)) {
2578 panic("%s: invalid XPRR index, "
2579 "start=%p, end=%p, new_perm=%u, expected_perm=%u",
2581 (void *)start
, (void *)end
, new_perm
, expected_perm
);
2585 * Walk over the PTEs for the given range, and set the protections on
2590 tte_end
= ((va
+ pt_attr_twig_size(native_pt_attr
)) & ~pt_attr_twig_offmask(native_pt_attr
));
2592 if (tte_end
> end
) {
2596 tte_p
= pmap_tte(pmap
, va
);
2599 * The physical aperture should not have holes.
2600 * The physical aperture should be contiguous.
2601 * Do not make eye contact with the physical aperture.
2603 if (tte_p
== NULL
) {
2604 panic("%s: physical aperture tte is NULL, "
2605 "start=%p, end=%p, new_perm=%u, expected_perm=%u",
2607 (void *)start
, (void *)end
, new_perm
, expected_perm
);
2612 if ((tte
& ARM_TTE_TYPE_MASK
) == ARM_TTE_TYPE_TABLE
) {
2614 * Walk over the given L3 page table page and update the
2617 pte_p
= (pt_entry_t
*)ttetokv(tte
);
2618 bpte_p
= &pte_p
[pte_index(pmap
, native_pt_attr
, va
)];
2619 epte_p
= bpte_p
+ ((tte_end
- va
) >> pt_attr_leaf_shift(native_pt_attr
));
2621 for (cpte_p
= bpte_p
; cpte_p
< epte_p
;
2622 cpte_p
+= PAGE_SIZE
/ ARM_PGBYTES
, va
+= PAGE_SIZE
) {
2623 int pai
= (int)pa_index(pte_to_pa(*cpte_p
));
2628 * Every PTE involved should be valid, should
2629 * not have the hint bit set, and should have
2630 * Every valid PTE involved should
2631 * not have the hint bit set and should have
2632 * the expected APRR index.
2634 if ((cpte
& ARM_PTE_TYPE_MASK
) ==
2635 ARM_PTE_TYPE_FAULT
) {
2636 panic("%s: physical aperture PTE is invalid, va=%p, "
2637 "start=%p, end=%p, new_perm=%u, expected_perm=%u",
2640 (void *)start
, (void *)end
, new_perm
, expected_perm
);
2645 if (cpte
& ARM_PTE_HINT_MASK
) {
2646 panic("%s: physical aperture PTE has hint bit set, va=%p, cpte=0x%llx, "
2647 "start=%p, end=%p, new_perm=%u, expected_perm=%u",
2650 (void *)start
, (void *)end
, new_perm
, expected_perm
);
2653 if (pte_to_xprr_perm(cpte
) != expected_perm
) {
2654 panic("%s: perm=%llu does not match expected_perm, cpte=0x%llx, "
2655 "start=%p, end=%p, new_perm=%u, expected_perm=%u",
2657 pte_to_xprr_perm(cpte
), cpte
,
2658 (void *)start
, (void *)end
, new_perm
, expected_perm
);
2662 template &= ~ARM_PTE_XPRR_MASK
;
2663 template |= xprr_perm_to_pte(new_perm
);
2665 WRITE_PTE_STRONG(cpte_p
, template);
2669 panic("%s: tte=0x%llx is not a table type entry, "
2670 "start=%p, end=%p, new_perm=%u, expected_perm=%u",
2673 (void *)start
, (void *)end
, new_perm
, expected_perm
);
2679 PMAP_UPDATE_TLBS(pmap
, start
, end
, false);
2680 #endif /* (__ARM_VMSA__ == 7) */
2684 * A convenience function for setting protections on a single page.
2687 pmap_set_xprr_perm(vm_address_t page_kva
,
2688 unsigned int expected_perm
,
2689 unsigned int new_perm
)
2691 pmap_set_range_xprr_perm(page_kva
, page_kva
+ PAGE_SIZE
, expected_perm
, new_perm
);
2693 #endif /* XNU_MONITOR */
2697 * pmap_pages_reclaim(): return a page by freeing an active pagetable page.
2698 * To be eligible, a pt page must be assigned to a non-kernel pmap.
2699 * It must not have any wired PTEs and must contain at least one valid PTE.
2700 * If no eligible page is found in the pt page list, return 0.
2706 boolean_t found_page
;
2711 * In a loop, check for a page in the reclaimed pt page list.
2712 * if one is present, unlink that page and return the physical page address.
2713 * Otherwise, scan the pt page list for an eligible pt page to reclaim.
2714 * If found, invoke pmap_remove_range() on its pmap and address range then
2715 * deallocates that pt page. This will end up adding the pt page to the
2716 * reclaimed pt page list.
2719 pmap_simple_lock(&pmap_pages_lock
);
2720 pmap_pages_request_count
++;
2721 pmap_pages_request_acum
++;
2724 if (pmap_pages_reclaim_list
!= (page_free_entry_t
*)NULL
) {
2725 page_free_entry_t
*page_entry
;
2727 page_entry
= pmap_pages_reclaim_list
;
2728 pmap_pages_reclaim_list
= pmap_pages_reclaim_list
->next
;
2729 pmap_simple_unlock(&pmap_pages_lock
);
2731 return (pmap_paddr_t
)ml_static_vtop((vm_offset_t
)page_entry
);
2734 pmap_simple_unlock(&pmap_pages_lock
);
2736 pmap_simple_lock(&pt_pages_lock
);
2737 ptdp
= (pt_desc_t
*)queue_first(&pt_page_list
);
2740 while (!queue_end(&pt_page_list
, (queue_entry_t
)ptdp
)) {
2741 if ((ptdp
->pmap
->nested
== FALSE
)
2742 && (pmap_try_lock(ptdp
->pmap
))) {
2743 assert(ptdp
->pmap
!= kernel_pmap
);
2744 unsigned refcnt_acc
= 0;
2745 unsigned wiredcnt_acc
= 0;
2747 for (i
= 0; i
< PT_INDEX_MAX
; i
++) {
2748 if (ptdp
->ptd_info
[i
].refcnt
== PT_DESC_REFCOUNT
) {
2749 /* Do not attempt to free a page that contains an L2 table */
2753 refcnt_acc
+= ptdp
->ptd_info
[i
].refcnt
;
2754 wiredcnt_acc
+= ptdp
->ptd_info
[i
].wiredcnt
;
2756 if ((wiredcnt_acc
== 0) && (refcnt_acc
!= 0)) {
2758 /* Leave ptdp->pmap locked here. We're about to reclaim
2759 * a tt page from it, so we don't want anyone else messing
2760 * with it while we do that. */
2763 pmap_unlock(ptdp
->pmap
);
2765 ptdp
= (pt_desc_t
*)queue_next((queue_t
)ptdp
);
2768 pmap_simple_unlock(&pt_pages_lock
);
2769 return (pmap_paddr_t
)0;
2771 int remove_count
= 0;
2772 bool need_strong_sync
= false;
2773 vm_map_address_t va
;
2775 pt_entry_t
*bpte
, *epte
;
2778 uint32_t rmv_spte
= 0;
2780 pmap_simple_unlock(&pt_pages_lock
);
2782 pmap_assert_locked_w(pmap
); // pmap write lock should be held from loop above
2784 const pt_attr_t
* const pt_attr
= pmap_get_pt_attr(pmap
);
2786 for (i
= 0; i
< (PAGE_SIZE
/ pt_attr_page_size(pt_attr
)); i
++) {
2787 va
= ptdp
->ptd_info
[i
].va
;
2789 /* If the VA is bogus, this may represent an unallocated region
2790 * or one which is in transition (already being freed or expanded).
2791 * Don't try to remove mappings here. */
2792 if (va
== (vm_offset_t
)-1) {
2796 tte_p
= pmap_tte(pmap
, va
);
2797 if ((tte_p
!= (tt_entry_t
*) NULL
)
2798 && ((*tte_p
& ARM_TTE_TYPE_MASK
) == ARM_TTE_TYPE_TABLE
)) {
2799 pte_p
= (pt_entry_t
*) ttetokv(*tte_p
);
2800 bpte
= &pte_p
[pte_index(pmap
, pt_attr
, va
)];
2801 epte
= bpte
+ pt_attr_page_size(pt_attr
) / sizeof(pt_entry_t
);
2803 * Use PMAP_OPTIONS_REMOVE to clear any
2804 * "compressed" markers and update the
2805 * "compressed" counter in pmap->stats.
2806 * This means that we lose accounting for
2807 * any compressed pages in this range
2808 * but the alternative is to not be able
2809 * to account for their future decompression,
2810 * which could cause the counter to drift
2813 remove_count
+= pmap_remove_range_options(
2814 pmap
, va
, bpte
, epte
,
2815 &rmv_spte
, &need_strong_sync
, PMAP_OPTIONS_REMOVE
);
2816 if (ptd_get_info(ptdp
, pte_p
)->refcnt
!= 0) {
2817 panic("%s: ptdp %p, count %d", __FUNCTION__
, ptdp
, ptd_get_info(ptdp
, pte_p
)->refcnt
);
2820 pmap_tte_deallocate(pmap
, tte_p
, pt_attr_twig_level(pt_attr
));
2822 if (remove_count
> 0) {
2823 pmap_get_pt_ops(pmap
)->flush_tlb_region_async(va
, (size_t)pt_attr_leaf_table_size(pt_attr
), pmap
);
2825 pmap_get_pt_ops(pmap
)->flush_tlb_tte_async(va
, pmap
);
2829 // Undo the lock we grabbed when we found ptdp above
2831 pmap_sync_tlb(need_strong_sync
);
2833 pmap_simple_lock(&pmap_pages_lock
);
2839 * Return a PPL page to the free list.
2841 MARK_AS_PMAP_TEXT
static void
2842 pmap_give_free_ppl_page(pmap_paddr_t paddr
)
2844 assert((paddr
& ARM_PGMASK
) == 0);
2845 void ** new_head
= (void **)phystokv(paddr
);
2846 pmap_simple_lock(&pmap_ppl_free_page_lock
);
2848 void * cur_head
= pmap_ppl_free_page_list
;
2849 *new_head
= cur_head
;
2850 pmap_ppl_free_page_list
= new_head
;
2851 pmap_ppl_free_page_count
++;
2853 pmap_simple_unlock(&pmap_ppl_free_page_lock
);
2857 * Get a PPL page from the free list.
2859 MARK_AS_PMAP_TEXT
static pmap_paddr_t
2860 pmap_get_free_ppl_page(void)
2862 pmap_paddr_t result
= 0;
2864 pmap_simple_lock(&pmap_ppl_free_page_lock
);
2866 if (pmap_ppl_free_page_list
!= NULL
) {
2867 void ** new_head
= NULL
;
2868 new_head
= *((void**)pmap_ppl_free_page_list
);
2869 result
= kvtophys((vm_offset_t
)pmap_ppl_free_page_list
);
2870 pmap_ppl_free_page_list
= new_head
;
2871 pmap_ppl_free_page_count
--;
2876 pmap_simple_unlock(&pmap_ppl_free_page_lock
);
2877 assert((result
& ARM_PGMASK
) == 0);
2883 * pmap_mark_page_as_ppl_page claims a page on behalf of the PPL by marking it
2884 * as PPL-owned and only allowing the PPL to write to it.
2886 MARK_AS_PMAP_TEXT
static void
2887 pmap_mark_page_as_ppl_page_internal(pmap_paddr_t pa
, bool initially_free
)
2889 vm_offset_t kva
= 0;
2890 unsigned int pai
= 0;
2894 * Mark each page that we allocate as belonging to the monitor, as we
2895 * intend to use it for monitor-y stuff (page tables, table pages, that
2898 if (!pa_valid(pa
)) {
2899 panic("%s: bad address, "
2905 pai
= (unsigned int)pa_index(pa
);
2908 /* A page that the PPL already owns can't be given to the PPL. */
2909 if (pa_test_monitor(pa
)) {
2910 panic("%s: page already belongs to PPL, "
2915 /* The page cannot be mapped outside of the physical aperture. */
2916 if (!pmap_verify_free((ppnum_t
)atop(pa
))) {
2917 panic("%s: page is not free, "
2924 attr
= pp_attr_table
[pai
];
2925 if (attr
& PP_ATTR_NO_MONITOR
) {
2926 panic("%s: page excluded from PPL, "
2931 } while (!OSCompareAndSwap16(attr
, attr
| PP_ATTR_MONITOR
, &pp_attr_table
[pai
]));
2936 pmap_set_xprr_perm(kva
, XPRR_KERN_RW_PERM
, XPRR_PPL_RW_PERM
);
2938 if (initially_free
) {
2939 pmap_give_free_ppl_page(pa
);
2944 pmap_mark_page_as_ppl_page(pmap_paddr_t pa
)
2946 pmap_mark_page_as_ppl_page_ppl(pa
, true);
2949 MARK_AS_PMAP_TEXT
static void
2950 pmap_mark_page_as_kernel_page(pmap_paddr_t pa
)
2952 vm_offset_t kva
= 0;
2953 unsigned int pai
= 0;
2955 pai
= (unsigned int)pa_index(pa
);
2958 if (!pa_test_monitor(pa
)) {
2959 panic("%s: page is not a PPL page, "
2965 pa_clear_monitor(pa
);
2969 pmap_set_xprr_perm(kva
, XPRR_PPL_RW_PERM
, XPRR_KERN_RW_PERM
);
2972 MARK_AS_PMAP_TEXT
static pmap_paddr_t
2973 pmap_release_ppl_pages_to_kernel_internal(void)
2975 pmap_paddr_t pa
= 0;
2977 if (pmap_ppl_free_page_count
<= PMAP_MIN_FREE_PPL_PAGES
) {
2981 pa
= pmap_get_free_ppl_page();
2987 pmap_mark_page_as_kernel_page(pa
);
2994 pmap_release_ppl_pages_to_kernel(void)
2996 pmap_paddr_t pa
= 0;
2997 vm_page_t m
= VM_PAGE_NULL
;
2998 vm_page_t local_freeq
= VM_PAGE_NULL
;
2999 uint64_t pmap_ppl_pages_returned_to_kernel_count
= 0;
3001 while (pmap_ppl_free_page_count
> PMAP_MIN_FREE_PPL_PAGES
) {
3002 pa
= pmap_release_ppl_pages_to_kernel_ppl();
3008 /* If we retrieved a page, add it to the free queue. */
3009 vm_object_lock(pmap_object
);
3010 m
= vm_page_lookup(pmap_object
, (pa
- gPhysBase
));
3011 assert(m
!= VM_PAGE_NULL
);
3012 assert(VM_PAGE_WIRED(m
));
3015 m
->vmp_snext
= local_freeq
;
3017 pmap_ppl_pages_returned_to_kernel_count
++;
3018 pmap_ppl_pages_returned_to_kernel_count_total
++;
3020 vm_object_unlock(pmap_object
);
3024 /* We need to hold the object lock for freeing pages. */
3025 vm_object_lock(pmap_object
);
3026 vm_page_free_list(local_freeq
, TRUE
);
3027 vm_object_unlock(pmap_object
);
3030 return pmap_ppl_pages_returned_to_kernel_count
;
3035 pmap_enqueue_pages(vm_page_t m
)
3038 vm_object_lock(pmap_object
);
3039 while (m
!= VM_PAGE_NULL
) {
3040 vm_page_insert_wired(m
, pmap_object
, (vm_object_offset_t
) ((ptoa(VM_PAGE_GET_PHYS_PAGE(m
))) - gPhysBase
), VM_KERN_MEMORY_PTE
);
3042 m
= NEXT_PAGE(m_prev
);
3043 *(NEXT_PAGE_PTR(m_prev
)) = VM_PAGE_NULL
;
3045 vm_object_unlock(pmap_object
);
3048 static kern_return_t
3049 pmap_pages_alloc_zeroed(
3055 ASSERT_NOT_HIBERNATING();
3057 if (size
!= PAGE_SIZE
) {
3058 panic("%s: size != PAGE_SIZE, "
3059 "pa=%p, size=%u, option=%u",
3065 assert(option
& PMAP_PAGES_ALLOCATE_NOWAIT
);
3067 *pa
= pmap_get_free_ppl_page();
3069 if ((*pa
== 0) && (option
& PMAP_PAGES_RECLAIM_NOWAIT
)) {
3070 *pa
= pmap_pages_reclaim();
3074 return KERN_RESOURCE_SHORTAGE
;
3076 bzero((void*)phystokv(*pa
), size
);
3077 return KERN_SUCCESS
;
3080 vm_page_t m
= VM_PAGE_NULL
;
3082 thread_t self
= current_thread();
3083 // We qualify to allocate reserved memory
3084 uint16_t thread_options
= self
->options
;
3085 self
->options
|= TH_OPT_VMPRIV
;
3086 if (__probable(size
== PAGE_SIZE
)) {
3087 while ((m
= vm_page_grab()) == VM_PAGE_NULL
) {
3088 if (option
& PMAP_PAGES_ALLOCATE_NOWAIT
) {
3094 if (m
!= VM_PAGE_NULL
) {
3095 vm_page_lock_queues();
3096 vm_page_wire(m
, VM_KERN_MEMORY_PTE
, TRUE
);
3097 vm_page_unlock_queues();
3099 } else if (size
== 2 * PAGE_SIZE
) {
3100 while (cpm_allocate(size
, &m
, 0, 1, TRUE
, 0) != KERN_SUCCESS
) {
3101 if (option
& PMAP_PAGES_ALLOCATE_NOWAIT
) {
3108 panic("%s: invalid size %u", __func__
, size
);
3111 self
->options
= thread_options
;
3113 if ((m
== VM_PAGE_NULL
) && (option
& PMAP_PAGES_RECLAIM_NOWAIT
)) {
3114 assert(size
== PAGE_SIZE
);
3115 *pa
= pmap_pages_reclaim();
3117 bzero((void*)phystokv(*pa
), size
);
3118 return KERN_SUCCESS
;
3122 if (m
== VM_PAGE_NULL
) {
3123 return KERN_RESOURCE_SHORTAGE
;
3126 *pa
= (pmap_paddr_t
)ptoa(VM_PAGE_GET_PHYS_PAGE(m
));
3128 pmap_enqueue_pages(m
);
3130 OSAddAtomic(size
>> PAGE_SHIFT
, &inuse_pmap_pages_count
);
3131 OSAddAtomic64(size
>> PAGE_SHIFT
, &alloc_pmap_pages_count
);
3133 bzero((void*)phystokv(*pa
), size
);
3134 return KERN_SUCCESS
;
3140 pmap_alloc_page_for_kern(unsigned int options
)
3145 while ((m
= vm_page_grab()) == VM_PAGE_NULL
) {
3146 if (options
& PMAP_PAGES_ALLOCATE_NOWAIT
) {
3152 vm_page_lock_queues();
3153 vm_page_wire(m
, VM_KERN_MEMORY_PTE
, TRUE
);
3154 vm_page_unlock_queues();
3156 paddr
= (pmap_paddr_t
)ptoa(VM_PAGE_GET_PHYS_PAGE(m
));
3158 if (__improbable(paddr
== 0)) {
3159 panic("%s: paddr is 0", __func__
);
3162 pmap_enqueue_pages(m
);
3164 OSAddAtomic(1, &inuse_pmap_pages_count
);
3165 OSAddAtomic64(1, &alloc_pmap_pages_count
);
3171 pmap_alloc_page_for_ppl(unsigned int options
)
3173 thread_t self
= current_thread();
3174 // We qualify to allocate reserved memory
3175 uint16_t thread_options
= self
->options
;
3176 self
->options
|= TH_OPT_VMPRIV
;
3177 pmap_paddr_t paddr
= pmap_alloc_page_for_kern(options
);
3178 self
->options
= thread_options
;
3180 pmap_mark_page_as_ppl_page(paddr
);
3185 pmap_alloc_pmap(void)
3187 pmap_t pmap
= PMAP_NULL
;
3189 pmap_simple_lock(&pmap_free_list_lock
);
3191 if (pmap_free_list
!= PMAP_NULL
) {
3192 pmap
= pmap_free_list
;
3193 pmap_free_list
= *((pmap_t
*)pmap
);
3195 if (!PMAP_PTR_IS_VALID(pmap
)) {
3196 panic("%s: allocated pmap is not valid, pmap=%p",
3197 __FUNCTION__
, pmap
);
3201 pmap_simple_unlock(&pmap_free_list_lock
);
3207 pmap_free_pmap(pmap_t pmap
)
3209 if (!PMAP_PTR_IS_VALID(pmap
)) {
3210 panic("%s: pmap is not valid, "
3216 pmap_simple_lock(&pmap_free_list_lock
);
3217 *((pmap_t
*)pmap
) = pmap_free_list
;
3218 pmap_free_list
= pmap
;
3219 pmap_simple_unlock(&pmap_free_list_lock
);
3223 pmap_bootstrap_pmap_free_list(void)
3225 pmap_t cur_head
= PMAP_NULL
;
3226 unsigned long i
= 0;
3228 simple_lock_init(&pmap_free_list_lock
, 0);
3230 for (i
= 0; i
< pmap_array_count
; i
++) {
3231 *((pmap_t
*)(&pmap_array
[i
])) = cur_head
;
3232 cur_head
= &pmap_array
[i
];
3235 pmap_free_list
= cur_head
;
3244 pmap_simple_lock(&pmap_pages_lock
);
3246 if (pmap_pages_request_count
!= 0) {
3247 page_free_entry_t
*page_entry
;
3249 pmap_pages_request_count
--;
3250 page_entry
= (page_free_entry_t
*)phystokv(pa
);
3251 page_entry
->next
= pmap_pages_reclaim_list
;
3252 pmap_pages_reclaim_list
= page_entry
;
3253 pmap_simple_unlock(&pmap_pages_lock
);
3258 pmap_simple_unlock(&pmap_pages_lock
);
3263 pmap_give_free_ppl_page(pa
);
3266 pmap_paddr_t pa_max
;
3268 OSAddAtomic(-(size
>> PAGE_SHIFT
), &inuse_pmap_pages_count
);
3270 for (pa_max
= pa
+ size
; pa
< pa_max
; pa
= pa
+ PAGE_SIZE
) {
3271 vm_object_lock(pmap_object
);
3272 m
= vm_page_lookup(pmap_object
, (pa
- gPhysBase
));
3273 assert(m
!= VM_PAGE_NULL
);
3274 assert(VM_PAGE_WIRED(m
));
3275 vm_page_lock_queues();
3277 vm_page_unlock_queues();
3278 vm_object_unlock(pmap_object
);
3285 pmap_t pmap
, int bytes
)
3287 pmap_ledger_credit(pmap
, task_ledgers
.tkm_private
, bytes
);
3295 pmap_ledger_debit(pmap
, task_ledgers
.tkm_private
, bytes
);
3299 pmap_tt_ledger_credit(
3303 if (pmap
!= kernel_pmap
) {
3304 pmap_ledger_credit(pmap
, task_ledgers
.phys_footprint
, size
);
3305 pmap_ledger_credit(pmap
, task_ledgers
.page_table
, size
);
3310 pmap_tt_ledger_debit(
3314 if (pmap
!= kernel_pmap
) {
3315 pmap_ledger_debit(pmap
, task_ledgers
.phys_footprint
, size
);
3316 pmap_ledger_debit(pmap
, task_ledgers
.page_table
, size
);
3321 pmap_update_plru(uint16_t asid_index
)
3323 if (__probable(pmap_asid_plru
)) {
3324 unsigned plru_index
= asid_index
>> 6;
3325 if (__improbable(os_atomic_andnot(&asid_plru_bitmap
[plru_index
], (1ULL << (asid_index
& 63)), relaxed
) == 0)) {
3326 asid_plru_generation
[plru_index
] = ++asid_plru_gencount
;
3327 asid_plru_bitmap
[plru_index
] = ((plru_index
== (MAX_HW_ASIDS
>> 6)) ? ~(1ULL << 63) : UINT64_MAX
);
3333 alloc_asid(pmap_t pmap
)
3338 pmap_simple_lock(&asid_lock
);
3340 if (__probable(pmap_asid_plru
)) {
3341 unsigned plru_index
= 0;
3342 uint64_t lowest_gen
= asid_plru_generation
[0];
3343 uint64_t lowest_gen_bitmap
= asid_plru_bitmap
[0];
3344 for (unsigned i
= 1; i
< (sizeof(asid_plru_generation
) / sizeof(asid_plru_generation
[0])); ++i
) {
3345 if (asid_plru_generation
[i
] < lowest_gen
) {
3347 lowest_gen
= asid_plru_generation
[i
];
3348 lowest_gen_bitmap
= asid_plru_bitmap
[i
];
3352 for (; plru_index
< BITMAP_LEN(pmap_max_asids
); plru_index
+= ((MAX_HW_ASIDS
+ 1) >> 6)) {
3353 uint64_t temp_plru
= lowest_gen_bitmap
& asid_bitmap
[plru_index
];
3355 vasid
= (plru_index
<< 6) + lsb_first(temp_plru
);
3356 #if DEVELOPMENT || DEBUG
3363 if (__improbable(vasid
< 0)) {
3364 // bitmap_first() returns highest-order bits first, but a 0-based scheme works
3365 // slightly better with the collision detection scheme used by pmap_switch_internal().
3366 vasid
= bitmap_lsb_first(&asid_bitmap
[0], pmap_max_asids
);
3367 #if DEVELOPMENT || DEBUG
3371 if (__improbable(vasid
< 0)) {
3372 pmap_simple_unlock(&asid_lock
);
3375 assert((uint32_t)vasid
< pmap_max_asids
);
3376 assert(bitmap_test(&asid_bitmap
[0], (unsigned int)vasid
));
3377 bitmap_clear(&asid_bitmap
[0], (unsigned int)vasid
);
3378 pmap_simple_unlock(&asid_lock
);
3379 hw_asid
= vasid
% asid_chunk_size
;
3380 pmap
->sw_asid
= (uint8_t)(vasid
/ asid_chunk_size
);
3381 if (__improbable(hw_asid
== MAX_HW_ASIDS
)) {
3382 /* If we took a PLRU "miss" and ended up with a hardware ASID we can't actually support,
3383 * reassign to a reserved VASID. */
3384 assert(pmap
->sw_asid
< UINT8_MAX
);
3385 pmap
->sw_asid
= UINT8_MAX
;
3386 /* Allocate from the high end of the hardware ASID range to reduce the likelihood of
3387 * aliasing with vital system processes, which are likely to have lower ASIDs. */
3388 hw_asid
= MAX_HW_ASIDS
- 1 - (uint16_t)(vasid
/ asid_chunk_size
);
3389 assert(hw_asid
< MAX_HW_ASIDS
);
3391 pmap_update_plru(hw_asid
);
3392 hw_asid
+= 1; // Account for ASID 0, which is reserved for the kernel
3393 #if __ARM_KERNEL_PROTECT__
3394 hw_asid
<<= 1; // We're really handing out 2 hardware ASIDs, one for EL0 and one for EL1 access
3396 pmap
->hw_asid
= hw_asid
;
3401 free_asid(pmap_t pmap
)
3404 uint16_t hw_asid
= os_atomic_xchg(&pmap
->hw_asid
, 0, relaxed
);
3405 if (__improbable(hw_asid
== 0)) {
3409 #if __ARM_KERNEL_PROTECT__
3414 if (__improbable(pmap
->sw_asid
== UINT8_MAX
)) {
3415 vasid
= ((MAX_HW_ASIDS
- 1 - hw_asid
) * asid_chunk_size
) + MAX_HW_ASIDS
;
3417 vasid
= ((unsigned int)pmap
->sw_asid
* asid_chunk_size
) + hw_asid
;
3420 if (__probable(pmap_asid_plru
)) {
3421 os_atomic_or(&asid_plru_bitmap
[hw_asid
>> 6], (1ULL << (hw_asid
& 63)), relaxed
);
3423 pmap_simple_lock(&asid_lock
);
3424 assert(!bitmap_test(&asid_bitmap
[0], vasid
));
3425 bitmap_set(&asid_bitmap
[0], vasid
);
3426 pmap_simple_unlock(&asid_lock
);
3433 * Increase the padding for PPL devices to accommodate increased
3434 * mapping pressure from IOMMUs. This isn't strictly necessary, but
3435 * will reduce the need to retry mappings due to PV allocation failure.
3438 #define PV_LOW_WATER_MARK_DEFAULT (0x400)
3439 #define PV_KERN_LOW_WATER_MARK_DEFAULT (0x400)
3440 #define PV_ALLOC_CHUNK_INITIAL (0x400)
3441 #define PV_KERN_ALLOC_CHUNK_INITIAL (0x400)
3442 #define PV_CPU_MIN (0x80)
3443 #define PV_CPU_MAX (0x400)
3447 #define PV_LOW_WATER_MARK_DEFAULT (0x200)
3448 #define PV_KERN_LOW_WATER_MARK_DEFAULT (0x200)
3449 #define PV_ALLOC_CHUNK_INITIAL (0x200)
3450 #define PV_KERN_ALLOC_CHUNK_INITIAL (0x200)
3451 #define PV_CPU_MIN (0x40)
3452 #define PV_CPU_MAX (0x200)
3456 #define PV_ALLOC_INITIAL_TARGET (PV_ALLOC_CHUNK_INITIAL * 5)
3457 #define PV_KERN_ALLOC_INITIAL_TARGET (PV_KERN_ALLOC_CHUNK_INITIAL)
3459 uint32_t pv_page_count MARK_AS_PMAP_DATA
= 0;
3461 uint32_t pv_kern_low_water_mark MARK_AS_PMAP_DATA
= PV_KERN_LOW_WATER_MARK_DEFAULT
;
3462 uint32_t pv_alloc_initial_target MARK_AS_PMAP_DATA
= PV_ALLOC_INITIAL_TARGET
;
3463 uint32_t pv_kern_alloc_initial_target MARK_AS_PMAP_DATA
= PV_KERN_ALLOC_INITIAL_TARGET
;
3465 unsigned pmap_reserve_replenish_stat MARK_AS_PMAP_DATA
;
3466 unsigned pmap_kern_reserve_alloc_stat MARK_AS_PMAP_DATA
;
3468 static inline void pv_list_alloc(pv_entry_t
**pv_ep
);
3469 static inline void pv_list_kern_alloc(pv_entry_t
**pv_e
);
3470 static inline void pv_list_free(pv_entry_t
*pv_eh
, pv_entry_t
*pv_et
, int pv_cnt
, uint32_t kern_target
);
3472 static pv_alloc_return_t
3479 pmap_assert_locked_w(pmap
);
3481 ASSERT_PVH_LOCKED(pai
);
3482 pv_list_alloc(pvepp
);
3483 if (PV_ENTRY_NULL
!= *pvepp
) {
3484 return PV_ALLOC_SUCCESS
;
3487 unsigned alloc_flags
= PMAP_PAGES_ALLOCATE_NOWAIT
;
3489 unsigned alloc_flags
= 0;
3491 if ((pmap
== NULL
) || (kernel_pmap
== pmap
)) {
3492 pv_list_kern_alloc(pvepp
);
3494 if (PV_ENTRY_NULL
!= *pvepp
) {
3495 return PV_ALLOC_SUCCESS
;
3497 alloc_flags
= PMAP_PAGES_ALLOCATE_NOWAIT
| PMAP_PAGES_RECLAIM_NOWAIT
;
3505 pv_alloc_return_t pv_status
= PV_ALLOC_RETRY
;
3512 ret
= pmap_pages_alloc_zeroed(&pa
, PAGE_SIZE
, alloc_flags
);
3514 if (ret
!= KERN_SUCCESS
) {
3515 pv_status
= PV_ALLOC_FAIL
;
3516 goto pv_alloc_cleanup
;
3521 pv_e
= (pv_entry_t
*)phystokv(pa
);
3523 pv_cnt
= (PAGE_SIZE
/ sizeof(pv_entry_t
)) - 1;
3525 pv_et
= &pv_e
[pv_cnt
];
3527 pv_list_free(pv_eh
, pv_et
, pv_cnt
, pv_kern_low_water_mark
);
3540 pv_list_free(pvep
, pvep
, 1, pv_kern_low_water_mark
);
3544 pv_free_list_alloc(pv_free_list_t
*free_list
, pv_entry_t
**pv_ep
)
3546 assert(((free_list
->list
!= NULL
) && (free_list
->count
> 0)) ||
3547 ((free_list
->list
== NULL
) && (free_list
->count
== 0)));
3549 if ((*pv_ep
= free_list
->list
) != NULL
) {
3550 pv_entry_t
*pv_e
= *pv_ep
;
3551 if ((pv_e
->pve_next
== NULL
) && (free_list
->count
> 1)) {
3552 free_list
->list
= pv_e
+ 1;
3554 free_list
->list
= pv_e
->pve_next
;
3555 pv_e
->pve_next
= PV_ENTRY_NULL
;
3562 pv_list_alloc(pv_entry_t
**pv_ep
)
3564 assert(*pv_ep
== PV_ENTRY_NULL
);
3566 mp_disable_preemption();
3568 pmap_cpu_data_t
*pmap_cpu_data
= pmap_get_cpu_data();
3569 pv_free_list_alloc(&pmap_cpu_data
->pv_free
, pv_ep
);
3571 mp_enable_preemption();
3573 if (*pv_ep
!= PV_ENTRY_NULL
) {
3577 if (pv_kern_free
.count
< pv_kern_low_water_mark
) {
3579 * If the kernel reserved pool is low, let non-kernel mappings wait for a page
3585 pmap_simple_lock(&pv_free_list_lock
);
3586 pv_free_list_alloc(&pv_free
, pv_ep
);
3587 pmap_simple_unlock(&pv_free_list_lock
);
3591 pv_list_free(pv_entry_t
*pv_eh
, pv_entry_t
*pv_et
, int pv_cnt
, uint32_t kern_target
)
3594 bool limit_exceeded
= false;
3596 mp_disable_preemption();
3598 pmap_cpu_data_t
*pmap_cpu_data
= pmap_get_cpu_data();
3599 pv_et
->pve_next
= pmap_cpu_data
->pv_free
.list
;
3600 pmap_cpu_data
->pv_free
.list
= pv_eh
;
3601 if (pmap_cpu_data
->pv_free
.count
== PV_CPU_MIN
) {
3602 pmap_cpu_data
->pv_free_tail
= pv_et
;
3604 pmap_cpu_data
->pv_free
.count
+= pv_cnt
;
3605 if (__improbable(pmap_cpu_data
->pv_free
.count
> PV_CPU_MAX
)) {
3606 pv_et
= pmap_cpu_data
->pv_free_tail
;
3607 pv_cnt
= pmap_cpu_data
->pv_free
.count
- PV_CPU_MIN
;
3608 pmap_cpu_data
->pv_free
.list
= pmap_cpu_data
->pv_free_tail
->pve_next
;
3609 pmap_cpu_data
->pv_free
.count
= PV_CPU_MIN
;
3610 limit_exceeded
= true;
3613 mp_enable_preemption();
3615 if (__probable(!limit_exceeded
)) {
3619 if (__improbable(pv_kern_free
.count
< kern_target
)) {
3620 pmap_simple_lock(&pv_kern_free_list_lock
);
3621 pv_et
->pve_next
= pv_kern_free
.list
;
3622 pv_kern_free
.list
= pv_eh
;
3623 pv_kern_free
.count
+= pv_cnt
;
3624 pmap_simple_unlock(&pv_kern_free_list_lock
);
3626 pmap_simple_lock(&pv_free_list_lock
);
3627 pv_et
->pve_next
= pv_free
.list
;
3628 pv_free
.list
= pv_eh
;
3629 pv_free
.count
+= pv_cnt
;
3630 pmap_simple_unlock(&pv_free_list_lock
);
3635 pv_list_kern_alloc(pv_entry_t
**pv_ep
)
3637 assert(*pv_ep
== PV_ENTRY_NULL
);
3638 pmap_simple_lock(&pv_kern_free_list_lock
);
3639 if (pv_kern_free
.count
> 0) {
3640 pmap_kern_reserve_alloc_stat
++;
3642 pv_free_list_alloc(&pv_kern_free
, pv_ep
);
3643 pmap_simple_unlock(&pv_kern_free_list_lock
);
3647 mapping_adjust(void)
3649 // Not implemented for arm/arm64
3653 * Fills the kernel and general PV free lists back up to their low watermarks.
3655 MARK_AS_PMAP_TEXT
static kern_return_t
3656 mapping_replenish_internal(uint32_t kern_target_count
, uint32_t user_target_count
)
3662 kern_return_t ret
= KERN_SUCCESS
;
3664 while ((pv_free
.count
< user_target_count
) || (pv_kern_free
.count
< kern_target_count
)) {
3666 if ((ret
= pmap_pages_alloc_zeroed(&pa
, PAGE_SIZE
, PMAP_PAGES_ALLOCATE_NOWAIT
)) != KERN_SUCCESS
) {
3670 ret
= pmap_pages_alloc_zeroed(&pa
, PAGE_SIZE
, 0);
3671 assert(ret
== KERN_SUCCESS
);
3676 pv_eh
= (pv_entry_t
*)phystokv(pa
);
3677 pv_cnt
= PAGE_SIZE
/ sizeof(pv_entry_t
);
3678 pv_et
= &pv_eh
[pv_cnt
- 1];
3680 pmap_reserve_replenish_stat
+= pv_cnt
;
3681 pv_list_free(pv_eh
, pv_et
, pv_cnt
, kern_target_count
);
3688 * Creates a target number of free pv_entry_t objects for the kernel free list
3689 * and the general free list.
3691 MARK_AS_PMAP_TEXT
static kern_return_t
3692 mapping_free_prime_internal(void)
3694 return mapping_replenish_internal(pv_kern_alloc_initial_target
, pv_alloc_initial_target
);
3698 mapping_free_prime(void)
3700 kern_return_t kr
= KERN_FAILURE
;
3706 * Allocate the needed PPL pages up front, to minimize the chance that
3707 * we will need to call into the PPL multiple times.
3709 for (i
= 0; i
< pv_alloc_initial_target
; i
+= (PAGE_SIZE
/ sizeof(pv_entry_t
))) {
3710 pmap_alloc_page_for_ppl(0);
3713 for (i
= 0; i
< pv_kern_alloc_initial_target
; i
+= (PAGE_SIZE
/ sizeof(pv_entry_t
))) {
3714 pmap_alloc_page_for_ppl(0);
3717 while ((kr
= mapping_free_prime_ppl()) == KERN_RESOURCE_SHORTAGE
) {
3718 pmap_alloc_page_for_ppl(0);
3721 kr
= mapping_free_prime_internal();
3724 if (kr
!= KERN_SUCCESS
) {
3725 panic("%s: failed, kr=%d",
3733 unsigned int ptd_cnt
)
3735 simple_lock_init(&ptd_free_list_lock
, 0);
3736 // Region represented by ptdp should be cleared by pmap_bootstrap()
3737 *((void**)(&ptdp
[ptd_cnt
- 1])) = (void*)ptd_free_list
;
3738 ptd_free_list
= ptdp
;
3739 ptd_free_count
+= ptd_cnt
;
3740 ptd_preboot
= FALSE
;
3744 ptd_alloc_unlinked(void)
3750 pmap_simple_lock(&ptd_free_list_lock
);
3753 assert(((ptd_free_list
!= NULL
) && (ptd_free_count
> 0)) ||
3754 ((ptd_free_list
== NULL
) && (ptd_free_count
== 0)));
3756 if (ptd_free_count
== 0) {
3757 unsigned int ptd_cnt
= PAGE_SIZE
/ sizeof(pt_desc_t
);
3760 ptdp
= (pt_desc_t
*)avail_start
;
3761 avail_start
+= PAGE_SIZE
;
3762 bzero(ptdp
, PAGE_SIZE
);
3766 pmap_simple_unlock(&ptd_free_list_lock
);
3768 if (pmap_pages_alloc_zeroed(&pa
, PAGE_SIZE
, PMAP_PAGES_ALLOCATE_NOWAIT
) != KERN_SUCCESS
) {
3771 ptdp
= (pt_desc_t
*)phystokv(pa
);
3773 pmap_simple_lock(&ptd_free_list_lock
);
3776 *((void**)(&ptdp
[ptd_cnt
- 1])) = (void*)ptd_free_list
;
3777 ptd_free_list
= ptdp
;
3778 ptd_free_count
+= ptd_cnt
;
3781 if ((ptdp
= ptd_free_list
) != PTD_ENTRY_NULL
) {
3782 ptd_free_list
= (pt_desc_t
*)(*(void **)ptdp
);
3783 if ((ptd_free_list
== NULL
) && (ptd_free_count
> 1)) {
3784 ptd_free_list
= ptdp
+ 1;
3788 panic("%s: out of ptd entry",
3793 pmap_simple_unlock(&ptd_free_list_lock
);
3796 ptdp
->pt_page
.next
= NULL
;
3797 ptdp
->pt_page
.prev
= NULL
;
3800 for (i
= 0; i
< PT_INDEX_MAX
; i
++) {
3801 ptdp
->ptd_info
[i
].va
= (vm_offset_t
)-1;
3802 ptdp
->ptd_info
[i
].refcnt
= 0;
3803 ptdp
->ptd_info
[i
].wiredcnt
= 0;
3809 static inline pt_desc_t
*
3810 ptd_alloc(pmap_t pmap
)
3812 pt_desc_t
*ptdp
= ptd_alloc_unlinked();
3819 if (pmap
!= kernel_pmap
) {
3820 /* We should never try to reclaim kernel pagetable pages in
3821 * pmap_pages_reclaim(), so don't enter them into the list. */
3822 pmap_simple_lock(&pt_pages_lock
);
3823 queue_enter(&pt_page_list
, ptdp
, pt_desc_t
*, pt_page
);
3824 pmap_simple_unlock(&pt_pages_lock
);
3827 pmap_tt_ledger_credit(pmap
, sizeof(*ptdp
));
3832 ptd_deallocate(pt_desc_t
*ptdp
)
3834 pmap_t pmap
= ptdp
->pmap
;
3837 panic("%s: early boot, "
3843 if (ptdp
->pt_page
.next
!= NULL
) {
3844 pmap_simple_lock(&pt_pages_lock
);
3845 queue_remove(&pt_page_list
, ptdp
, pt_desc_t
*, pt_page
);
3846 pmap_simple_unlock(&pt_pages_lock
);
3848 pmap_simple_lock(&ptd_free_list_lock
);
3849 (*(void **)ptdp
) = (void *)ptd_free_list
;
3850 ptd_free_list
= (pt_desc_t
*)ptdp
;
3852 pmap_simple_unlock(&ptd_free_list_lock
);
3854 pmap_tt_ledger_debit(pmap
, sizeof(*ptdp
));
3862 vm_map_address_t va
,
3866 const pt_attr_t
* const pt_attr
= pmap_get_pt_attr(pmap
);
3868 if (ptdp
->pmap
!= pmap
) {
3869 panic("%s: pmap mismatch, "
3870 "ptdp=%p, pmap=%p, va=%p, level=%u, pte_p=%p",
3872 ptdp
, pmap
, (void*)va
, level
, pte_p
);
3875 assert(level
> pt_attr_root_level(pt_attr
));
3876 ptd_info_t
*ptd_info
= ptd_get_info(ptdp
, pte_p
);
3877 ptd_info
->va
= (vm_offset_t
) va
& ~pt_attr_ln_pt_offmask(pt_attr
, level
- 1);
3879 if (level
< pt_attr_leaf_level(pt_attr
)) {
3880 ptd_info
->refcnt
= PT_DESC_REFCOUNT
;
3889 return pa_valid(addr
);
3892 #if (__ARM_VMSA__ == 7)
3895 * Given an offset and a map, compute the address of the
3896 * corresponding translation table entry.
3898 static inline tt_entry_t
*
3899 pmap_tte(pmap_t pmap
,
3900 vm_map_address_t addr
)
3902 __unused
const pt_attr_t
* const pt_attr
= pmap_get_pt_attr(pmap
);
3904 if (!(tte_index(pmap
, pt_attr
, addr
) < pmap
->tte_index_max
)) {
3905 return (tt_entry_t
*)NULL
;
3907 return &pmap
->tte
[tte_index(pmap
, pt_attr
, addr
)];
3912 * Given an offset and a map, compute the address of the
3913 * pte. If the address is invalid with respect to the map
3914 * then PT_ENTRY_NULL is returned (and the map may need to grow).
3916 * This is only used internally.
3918 static inline pt_entry_t
*
3921 vm_map_address_t addr
)
3927 ttp
= pmap_tte(pmap
, addr
);
3928 if (ttp
== (tt_entry_t
*)NULL
) {
3929 return PT_ENTRY_NULL
;
3933 if ((tte
& ARM_TTE_TYPE_MASK
) == ARM_TTE_TYPE_BLOCK
) {
3934 panic("%s: Attempt to demote L1 block, tte=0x%lx, "
3936 __FUNCTION__
, (unsigned long)tte
,
3940 if ((tte
& ARM_TTE_TYPE_MASK
) != ARM_TTE_TYPE_TABLE
) {
3941 return PT_ENTRY_NULL
;
3943 ptp
= (pt_entry_t
*) ttetokv(tte
) + pte_index(pmap
, pt_addr
, addr
);
3947 __unused
static inline tt_entry_t
*
3948 pmap_ttne(pmap_t pmap
,
3949 unsigned int target_level
,
3950 vm_map_address_t addr
)
3952 tt_entry_t
* ret_ttep
= NULL
;
3954 switch (target_level
) {
3956 ret_ttep
= pmap_tte(pmap
, addr
);
3959 ret_ttep
= (tt_entry_t
*)pmap_pte(pmap
, addr
);
3962 panic("%s: bad level, "
3963 "pmap=%p, target_level=%u, addr=%p",
3965 pmap
, target_level
, (void *)addr
);
3973 static inline tt_entry_t
*
3974 pmap_ttne(pmap_t pmap
,
3975 unsigned int target_level
,
3976 vm_map_address_t addr
)
3978 tt_entry_t
* ttp
= NULL
;
3979 tt_entry_t
* ttep
= NULL
;
3980 tt_entry_t tte
= ARM_TTE_EMPTY
;
3981 unsigned int cur_level
;
3983 const pt_attr_t
* const pt_attr
= pmap_get_pt_attr(pmap
);
3987 assert(target_level
<= pt_attr
->pta_max_level
);
3989 for (cur_level
= pt_attr
->pta_root_level
; cur_level
<= target_level
; cur_level
++) {
3990 ttep
= &ttp
[ttn_index(pmap
, pt_attr
, addr
, cur_level
)];
3992 if (cur_level
== target_level
) {
3999 if ((tte
& (ARM_TTE_TYPE_MASK
| ARM_TTE_VALID
)) == (ARM_TTE_TYPE_BLOCK
| ARM_TTE_VALID
)) {
4000 panic("%s: Attempt to demote L%u block, tte=0x%llx, "
4001 "pmap=%p, target_level=%u, addr=%p",
4002 __FUNCTION__
, cur_level
, tte
,
4003 pmap
, target_level
, (void*)addr
);
4006 if ((tte
& (ARM_TTE_TYPE_MASK
| ARM_TTE_VALID
)) != (ARM_TTE_TYPE_TABLE
| ARM_TTE_VALID
)) {
4007 return TT_ENTRY_NULL
;
4010 ttp
= (tt_entry_t
*)phystokv(tte
& ARM_TTE_TABLE_MASK
);
4017 * Given an offset and a map, compute the address of level 1 translation table entry.
4018 * If the tranlation is invalid then PT_ENTRY_NULL is returned.
4020 static inline tt_entry_t
*
4021 pmap_tt1e(pmap_t pmap
,
4022 vm_map_address_t addr
)
4024 return pmap_ttne(pmap
, PMAP_TT_L1_LEVEL
, addr
);
4028 * Given an offset and a map, compute the address of level 2 translation table entry.
4029 * If the tranlation is invalid then PT_ENTRY_NULL is returned.
4031 static inline tt_entry_t
*
4032 pmap_tt2e(pmap_t pmap
,
4033 vm_map_address_t addr
)
4035 return pmap_ttne(pmap
, PMAP_TT_L2_LEVEL
, addr
);
4040 * Given an offset and a map, compute the address of level 3 translation table entry.
4041 * If the tranlation is invalid then PT_ENTRY_NULL is returned.
4043 static inline pt_entry_t
*
4046 vm_map_address_t addr
)
4048 return (pt_entry_t
*)pmap_ttne(pmap
, PMAP_TT_L3_LEVEL
, addr
);
4051 static inline tt_entry_t
*
4054 vm_map_address_t addr
)
4056 return pmap_tt2e(pmap
, addr
);
4059 static inline pt_entry_t
*
4062 vm_map_address_t addr
)
4064 return pmap_tt3e(pmap
, addr
);
4069 #if __APRR_SUPPORTED__
4071 * Indicates whether the given PTE has special restrictions due to the current
4075 is_pte_aprr_protected(pt_entry_t pte
)
4077 uint64_t aprr_el0_value
;
4078 uint64_t aprr_el1_value
;
4079 uint64_t aprr_index
;
4081 MRS(aprr_el0_value
, APRR_EL0
);
4082 MRS(aprr_el1_value
, APRR_EL1
);
4083 aprr_index
= PTE_TO_APRR_INDEX(pte
);
4085 /* Check to see if this mapping had APRR restrictions. */
4086 if ((APRR_EXTRACT_IDX_ATTR(aprr_el0_value
, aprr_index
) != APRR_EXTRACT_IDX_ATTR(APRR_EL0_RESET
, aprr_index
)) ||
4087 (APRR_EXTRACT_IDX_ATTR(aprr_el1_value
, aprr_index
) != APRR_EXTRACT_IDX_ATTR(APRR_EL1_RESET
, aprr_index
))
4094 #endif /* __APRR_SUPPORTED__ */
4097 #if __APRR_SUPPORTED__
4099 is_pte_xprr_protected(pmap_t pmap __unused
, pt_entry_t pte
)
4101 #if __APRR_SUPPORTED__
4102 return is_pte_aprr_protected(pte
);
4103 #else /* __APRR_SUPPORTED__ */
4104 #error "XPRR configuration error"
4105 #endif /* __APRR_SUPPORTED__ */
4107 #endif /* __APRR_SUPPORTED__*/
4109 #if __APRR_SUPPORTED__
4111 __unused
pte_to_xprr_perm(pt_entry_t pte
)
4113 #if __APRR_SUPPORTED__
4114 switch (PTE_TO_APRR_INDEX(pte
)) {
4115 case APRR_FIRM_RX_INDEX
: return XPRR_FIRM_RX_PERM
;
4116 case APRR_FIRM_RO_INDEX
: return XPRR_FIRM_RO_PERM
;
4117 case APRR_PPL_RW_INDEX
: return XPRR_PPL_RW_PERM
;
4118 case APRR_KERN_RW_INDEX
: return XPRR_KERN_RW_PERM
;
4119 case APRR_FIRM_RW_INDEX
: return XPRR_FIRM_RW_PERM
;
4120 case APRR_KERN0_RW_INDEX
: return XPRR_KERN0_RW_PERM
;
4121 case APRR_USER_JIT_INDEX
: return XPRR_USER_JIT_PERM
;
4122 case APRR_USER_RW_INDEX
: return XPRR_USER_RW_PERM
;
4123 case APRR_PPL_RX_INDEX
: return XPRR_PPL_RX_PERM
;
4124 case APRR_KERN_RX_INDEX
: return XPRR_KERN_RX_PERM
;
4125 case APRR_USER_XO_INDEX
: return XPRR_USER_XO_PERM
;
4126 case APRR_KERN_RO_INDEX
: return XPRR_KERN_RO_PERM
;
4127 case APRR_KERN0_RX_INDEX
: return XPRR_KERN0_RO_PERM
;
4128 case APRR_KERN0_RO_INDEX
: return XPRR_KERN0_RO_PERM
;
4129 case APRR_USER_RX_INDEX
: return XPRR_USER_RX_PERM
;
4130 case APRR_USER_RO_INDEX
: return XPRR_USER_RO_PERM
;
4131 default: return XPRR_MAX_PERM
;
4134 #error "XPRR configuration error"
4138 #if __APRR_SUPPORTED__
4140 xprr_perm_to_aprr_index(uint64_t perm
)
4143 case XPRR_FIRM_RX_PERM
: return APRR_FIRM_RX_INDEX
;
4144 case XPRR_FIRM_RO_PERM
: return APRR_FIRM_RO_INDEX
;
4145 case XPRR_PPL_RW_PERM
: return APRR_PPL_RW_INDEX
;
4146 case XPRR_KERN_RW_PERM
: return APRR_KERN_RW_INDEX
;
4147 case XPRR_FIRM_RW_PERM
: return APRR_FIRM_RW_INDEX
;
4148 case XPRR_KERN0_RW_PERM
: return APRR_KERN0_RW_INDEX
;
4149 case XPRR_USER_JIT_PERM
: return APRR_USER_JIT_INDEX
;
4150 case XPRR_USER_RW_PERM
: return APRR_USER_RW_INDEX
;
4151 case XPRR_PPL_RX_PERM
: return APRR_PPL_RX_INDEX
;
4152 case XPRR_KERN_RX_PERM
: return APRR_KERN_RX_INDEX
;
4153 case XPRR_USER_XO_PERM
: return APRR_USER_XO_INDEX
;
4154 case XPRR_KERN_RO_PERM
: return APRR_KERN_RO_INDEX
;
4155 case XPRR_KERN0_RX_PERM
: return APRR_KERN0_RO_INDEX
;
4156 case XPRR_KERN0_RO_PERM
: return APRR_KERN0_RO_INDEX
;
4157 case XPRR_USER_RX_PERM
: return APRR_USER_RX_INDEX
;
4158 case XPRR_USER_RO_PERM
: return APRR_USER_RO_INDEX
;
4159 default: return APRR_MAX_INDEX
;
4162 #endif /* __APRR_SUPPORTED__ */
4165 __unused
xprr_perm_to_pte(uint64_t perm
)
4167 #if __APRR_SUPPORTED__
4168 return APRR_INDEX_TO_PTE(xprr_perm_to_aprr_index(perm
));
4170 #error "XPRR configuration error"
4173 #endif /* __APRR_SUPPORTED__*/
4177 * Map memory at initialization. The physical addresses being
4178 * mapped are not managed and are never unmapped.
4180 * For now, VM is already on, we only need to map the
4185 vm_map_address_t virt
,
4195 while (start
< end
) {
4196 kr
= pmap_enter(kernel_pmap
, virt
, (ppnum_t
)atop(start
),
4197 prot
, VM_PROT_NONE
, flags
, FALSE
);
4199 if (kr
!= KERN_SUCCESS
) {
4200 panic("%s: failed pmap_enter, "
4201 "virt=%p, start_addr=%p, end_addr=%p, prot=%#x, flags=%#x",
4203 (void *) virt
, (void *) start
, (void *) end
, prot
, flags
);
4213 pmap_map_bd_with_options(
4214 vm_map_address_t virt
,
4222 vm_map_address_t vaddr
;
4224 pt_entry_t mem_attr
;
4226 switch (options
& PMAP_MAP_BD_MASK
) {
4227 case PMAP_MAP_BD_WCOMB
:
4228 mem_attr
= ARM_PTE_ATTRINDX(CACHE_ATTRINDX_WRITECOMB
);
4229 #if (__ARM_VMSA__ > 7)
4230 mem_attr
|= ARM_PTE_SH(SH_OUTER_MEMORY
);
4232 mem_attr
|= ARM_PTE_SH
;
4235 case PMAP_MAP_BD_POSTED
:
4236 mem_attr
= ARM_PTE_ATTRINDX(CACHE_ATTRINDX_POSTED
);
4238 case PMAP_MAP_BD_POSTED_REORDERED
:
4239 mem_attr
= ARM_PTE_ATTRINDX(CACHE_ATTRINDX_POSTED_REORDERED
);
4241 case PMAP_MAP_BD_POSTED_COMBINED_REORDERED
:
4242 mem_attr
= ARM_PTE_ATTRINDX(CACHE_ATTRINDX_POSTED_COMBINED_REORDERED
);
4245 mem_attr
= ARM_PTE_ATTRINDX(CACHE_ATTRINDX_DISABLE
);
4249 tmplate
= pa_to_pte(start
) | ARM_PTE_AP((prot
& VM_PROT_WRITE
) ? AP_RWNA
: AP_RONA
) |
4250 mem_attr
| ARM_PTE_TYPE
| ARM_PTE_NX
| ARM_PTE_PNX
| ARM_PTE_AF
;
4251 #if __ARM_KERNEL_PROTECT__
4252 tmplate
|= ARM_PTE_NG
;
4253 #endif /* __ARM_KERNEL_PROTECT__ */
4257 while (paddr
< end
) {
4258 ptep
= pmap_pte(kernel_pmap
, vaddr
);
4259 if (ptep
== PT_ENTRY_NULL
) {
4260 panic("%s: no PTE for vaddr=%p, "
4261 "virt=%p, start=%p, end=%p, prot=0x%x, options=0x%x",
4262 __FUNCTION__
, (void*)vaddr
,
4263 (void*)virt
, (void*)start
, (void*)end
, prot
, options
);
4266 assert(!ARM_PTE_IS_COMPRESSED(*ptep
, ptep
));
4267 WRITE_PTE_STRONG(ptep
, tmplate
);
4269 pte_increment_pa(tmplate
);
4275 flush_mmu_tlb_region(virt
, (unsigned)(end
- start
));
4282 * Back-door routine for mapping kernel VM at initialization.
4283 * Useful for mapping memory outside the range
4284 * [vm_first_phys, vm_last_phys] (i.e., devices).
4285 * Otherwise like pmap_map.
4289 vm_map_address_t virt
,
4296 vm_map_address_t vaddr
;
4299 /* not cacheable and not buffered */
4300 tmplate
= pa_to_pte(start
)
4301 | ARM_PTE_TYPE
| ARM_PTE_AF
| ARM_PTE_NX
| ARM_PTE_PNX
4302 | ARM_PTE_AP((prot
& VM_PROT_WRITE
) ? AP_RWNA
: AP_RONA
)
4303 | ARM_PTE_ATTRINDX(CACHE_ATTRINDX_DISABLE
);
4304 #if __ARM_KERNEL_PROTECT__
4305 tmplate
|= ARM_PTE_NG
;
4306 #endif /* __ARM_KERNEL_PROTECT__ */
4310 while (paddr
< end
) {
4311 ptep
= pmap_pte(kernel_pmap
, vaddr
);
4312 if (ptep
== PT_ENTRY_NULL
) {
4313 panic("pmap_map_bd");
4315 assert(!ARM_PTE_IS_COMPRESSED(*ptep
, ptep
));
4316 WRITE_PTE_STRONG(ptep
, tmplate
);
4318 pte_increment_pa(tmplate
);
4324 flush_mmu_tlb_region(virt
, (unsigned)(end
- start
));
4331 * Back-door routine for mapping kernel VM at initialization.
4332 * Useful for mapping memory specific physical addresses in early
4333 * boot (i.e., before kernel_map is initialized).
4335 * Maps are in the VM_HIGH_KERNEL_WINDOW area.
4339 pmap_map_high_window_bd(
4340 vm_offset_t pa_start
,
4344 pt_entry_t
*ptep
, pte
;
4345 #if (__ARM_VMSA__ == 7)
4346 vm_map_address_t va_start
= VM_HIGH_KERNEL_WINDOW
;
4347 vm_map_address_t va_max
= VM_MAX_KERNEL_ADDRESS
;
4349 vm_map_address_t va_start
= VREGION1_START
;
4350 vm_map_address_t va_max
= VREGION1_START
+ VREGION1_SIZE
;
4352 vm_map_address_t va_end
;
4353 vm_map_address_t va
;
4356 offset
= pa_start
& PAGE_MASK
;
4360 if (len
> (va_max
- va_start
)) {
4361 panic("%s: area too large, "
4362 "pa_start=%p, len=%p, prot=0x%x",
4364 (void*)pa_start
, (void*)len
, prot
);
4368 for (; va_start
< va_max
; va_start
+= PAGE_SIZE
) {
4369 ptep
= pmap_pte(kernel_pmap
, va_start
);
4370 assert(!ARM_PTE_IS_COMPRESSED(*ptep
, ptep
));
4371 if (*ptep
== ARM_PTE_TYPE_FAULT
) {
4375 if (va_start
> va_max
) {
4376 panic("%s: insufficient pages, "
4377 "pa_start=%p, len=%p, prot=0x%x",
4379 (void*)pa_start
, (void*)len
, prot
);
4382 for (va_end
= va_start
+ PAGE_SIZE
; va_end
< va_start
+ len
; va_end
+= PAGE_SIZE
) {
4383 ptep
= pmap_pte(kernel_pmap
, va_end
);
4384 assert(!ARM_PTE_IS_COMPRESSED(*ptep
, ptep
));
4385 if (*ptep
!= ARM_PTE_TYPE_FAULT
) {
4386 va_start
= va_end
+ PAGE_SIZE
;
4391 for (va
= va_start
; va
< va_end
; va
+= PAGE_SIZE
, pa_start
+= PAGE_SIZE
) {
4392 ptep
= pmap_pte(kernel_pmap
, va
);
4393 pte
= pa_to_pte(pa_start
)
4394 | ARM_PTE_TYPE
| ARM_PTE_AF
| ARM_PTE_NX
| ARM_PTE_PNX
4395 | ARM_PTE_AP((prot
& VM_PROT_WRITE
) ? AP_RWNA
: AP_RONA
)
4396 | ARM_PTE_ATTRINDX(CACHE_ATTRINDX_DEFAULT
);
4397 #if (__ARM_VMSA__ > 7)
4398 pte
|= ARM_PTE_SH(SH_OUTER_MEMORY
);
4402 #if __ARM_KERNEL_PROTECT__
4404 #endif /* __ARM_KERNEL_PROTECT__ */
4405 WRITE_PTE_STRONG(ptep
, pte
);
4407 PMAP_UPDATE_TLBS(kernel_pmap
, va_start
, va_start
+ len
, false);
4409 kasan_notify_address(va_start
, len
);
4414 #define PMAP_ALIGN(addr, align) ((addr) + ((align) - 1) & ~((align) - 1))
4417 pmap_compute_pv_targets(void)
4420 void const *prop
= NULL
;
4422 unsigned int prop_size
;
4424 err
= SecureDTLookupEntry(NULL
, "/defaults", &entry
);
4425 assert(err
== kSuccess
);
4427 if (kSuccess
== SecureDTGetProperty(entry
, "pmap-pv-count", &prop
, &prop_size
)) {
4428 if (prop_size
!= sizeof(pv_alloc_initial_target
)) {
4429 panic("pmap-pv-count property is not a 32-bit integer");
4431 pv_alloc_initial_target
= *((uint32_t const *)prop
);
4434 if (kSuccess
== SecureDTGetProperty(entry
, "pmap-kern-pv-count", &prop
, &prop_size
)) {
4435 if (prop_size
!= sizeof(pv_kern_alloc_initial_target
)) {
4436 panic("pmap-kern-pv-count property is not a 32-bit integer");
4438 pv_kern_alloc_initial_target
= *((uint32_t const *)prop
);
4441 if (kSuccess
== SecureDTGetProperty(entry
, "pmap-kern-pv-min", &prop
, &prop_size
)) {
4442 if (prop_size
!= sizeof(pv_kern_low_water_mark
)) {
4443 panic("pmap-kern-pv-min property is not a 32-bit integer");
4445 pv_kern_low_water_mark
= *((uint32_t const *)prop
);
4451 pmap_compute_max_asids(void)
4454 void const *prop
= NULL
;
4457 unsigned int prop_size
;
4459 err
= SecureDTLookupEntry(NULL
, "/defaults", &entry
);
4460 assert(err
== kSuccess
);
4462 if (kSuccess
!= SecureDTGetProperty(entry
, "pmap-max-asids", &prop
, &prop_size
)) {
4463 /* TODO: consider allowing maxproc limits to be scaled earlier so that
4464 * we can choose a more flexible default value here. */
4468 if (prop_size
!= sizeof(max_asids
)) {
4469 panic("pmap-max-asids property is not a 32-bit integer");
4472 max_asids
= *((uint32_t const *)prop
);
4473 /* Round up to the nearest 64 to make things a bit easier for the Pseudo-LRU allocator. */
4474 max_asids
= (max_asids
+ 63) & ~63UL;
4476 if (((max_asids
+ MAX_HW_ASIDS
) / (MAX_HW_ASIDS
+ 1)) > MIN(MAX_HW_ASIDS
, UINT8_MAX
)) {
4477 /* currently capped by size of pmap->sw_asid */
4478 panic("pmap-max-asids too large");
4480 if (max_asids
== 0) {
4481 panic("pmap-max-asids cannot be zero");
4488 pmap_compute_io_rgns(void)
4491 pmap_io_range_t
const *ranges
;
4493 void const *prop
= NULL
;
4495 unsigned int prop_size
;
4497 err
= SecureDTLookupEntry(NULL
, "/defaults", &entry
);
4498 assert(err
== kSuccess
);
4500 if (kSuccess
!= SecureDTGetProperty(entry
, "pmap-io-ranges", &prop
, &prop_size
)) {
4505 for (unsigned int i
= 0; i
< (prop_size
/ sizeof(*ranges
)); ++i
) {
4506 if (ranges
[i
].addr
& PAGE_MASK
) {
4507 panic("pmap I/O region %u addr 0x%llx is not page-aligned", i
, ranges
[i
].addr
);
4509 if (ranges
[i
].len
& PAGE_MASK
) {
4510 panic("pmap I/O region %u length 0x%llx is not page-aligned", i
, ranges
[i
].len
);
4512 if (os_add_overflow(ranges
[i
].addr
, ranges
[i
].len
, &rgn_end
)) {
4513 panic("pmap I/O region %u addr 0x%llx length 0x%llx wraps around", i
, ranges
[i
].addr
, ranges
[i
].len
);
4515 if (((ranges
[i
].addr
<= gPhysBase
) && (rgn_end
> gPhysBase
)) ||
4516 ((ranges
[i
].addr
< avail_end
) && (rgn_end
>= avail_end
)) ||
4517 ((ranges
[i
].addr
> gPhysBase
) && (rgn_end
< avail_end
))) {
4518 panic("pmap I/O region %u addr 0x%llx length 0x%llx overlaps physical memory", i
, ranges
[i
].addr
, ranges
[i
].len
);
4524 return num_io_rgns
* sizeof(*ranges
);
4528 * return < 0 for a < b
4532 typedef int (*cmpfunc_t
)(const void *a
, const void *b
);
4535 qsort(void *a
, size_t n
, size_t es
, cmpfunc_t cmp
);
4538 cmp_io_rgns(const void *a
, const void *b
)
4540 const pmap_io_range_t
*range_a
= a
;
4541 const pmap_io_range_t
*range_b
= b
;
4542 if ((range_b
->addr
+ range_b
->len
) <= range_a
->addr
) {
4544 } else if ((range_a
->addr
+ range_a
->len
) <= range_b
->addr
) {
4552 pmap_load_io_rgns(void)
4555 pmap_io_range_t
const *ranges
;
4556 void const *prop
= NULL
;
4558 unsigned int prop_size
;
4560 if (num_io_rgns
== 0) {
4564 err
= SecureDTLookupEntry(NULL
, "/defaults", &entry
);
4565 assert(err
== kSuccess
);
4567 err
= SecureDTGetProperty(entry
, "pmap-io-ranges", &prop
, &prop_size
);
4568 assert(err
== kSuccess
);
4571 for (unsigned int i
= 0; i
< (prop_size
/ sizeof(*ranges
)); ++i
) {
4572 io_attr_table
[i
] = ranges
[i
];
4575 qsort(io_attr_table
, num_io_rgns
, sizeof(*ranges
), cmp_io_rgns
);
4580 * pmap_get_arm64_prot
4582 * return effective armv8 VMSA block protections including
4583 * table AP/PXN/XN overrides of a pmap entry
4588 pmap_get_arm64_prot(
4593 unsigned int level
= 0;
4594 uint64_t tte_type
= 0;
4595 uint64_t effective_prot_bits
= 0;
4596 uint64_t aggregate_tte
= 0;
4597 uint64_t table_ap_bits
= 0, table_xn
= 0, table_pxn
= 0;
4598 const pt_attr_t
* const pt_attr
= pmap_get_pt_attr(pmap
);
4600 for (level
= pt_attr
->pta_root_level
; level
<= pt_attr
->pta_max_level
; level
++) {
4601 tte
= *pmap_ttne(pmap
, level
, addr
);
4603 if (!(tte
& ARM_TTE_VALID
)) {
4607 tte_type
= tte
& ARM_TTE_TYPE_MASK
;
4609 if ((tte_type
== ARM_TTE_TYPE_BLOCK
) ||
4610 (level
== pt_attr
->pta_max_level
)) {
4611 /* Block or page mapping; both have the same protection bit layout. */
4613 } else if (tte_type
== ARM_TTE_TYPE_TABLE
) {
4614 /* All of the table bits we care about are overrides, so just OR them together. */
4615 aggregate_tte
|= tte
;
4619 table_ap_bits
= ((aggregate_tte
>> ARM_TTE_TABLE_APSHIFT
) & AP_MASK
);
4620 table_xn
= (aggregate_tte
& ARM_TTE_TABLE_XN
);
4621 table_pxn
= (aggregate_tte
& ARM_TTE_TABLE_PXN
);
4623 /* Start with the PTE bits. */
4624 effective_prot_bits
= tte
& (ARM_PTE_APMASK
| ARM_PTE_NX
| ARM_PTE_PNX
);
4626 /* Table AP bits mask out block/page AP bits */
4627 effective_prot_bits
&= ~(ARM_PTE_AP(table_ap_bits
));
4629 /* XN/PXN bits can be OR'd in. */
4630 effective_prot_bits
|= (table_xn
? ARM_PTE_NX
: 0);
4631 effective_prot_bits
|= (table_pxn
? ARM_PTE_PNX
: 0);
4633 return effective_prot_bits
;
4635 #endif /* __arm64__ */
4639 * Bootstrap the system enough to run with virtual memory.
4641 * The early VM initialization code has already allocated
4642 * the first CPU's translation table and made entries for
4643 * all the one-to-one mappings to be found there.
4645 * We must set up the kernel pmap structures, the
4646 * physical-to-virtual translation lookup tables for the
4647 * physical memory to be managed (between avail_start and
4650 * Map the kernel's code and data, and allocate the system page table.
4651 * Page_size must already be set.
4654 * first_avail first available physical page -
4655 * after kernel page tables
4656 * avail_start PA of first managed physical page
4657 * avail_end PA of last managed physical page
4664 pmap_paddr_t pmap_struct_start
;
4665 vm_size_t pv_head_size
;
4666 vm_size_t ptd_root_table_size
;
4667 vm_size_t pp_attr_table_size
;
4668 vm_size_t io_attr_table_size
;
4669 vm_size_t asid_table_size
;
4670 unsigned int npages
;
4671 vm_map_offset_t maxoffset
;
4673 lck_grp_init(&pmap_lck_grp
, "pmap", LCK_GRP_ATTR_NULL
);
4677 #if DEVELOPMENT || DEBUG
4678 PE_parse_boot_argn("-unsafe_kernel_text", &pmap_ppl_disable
, sizeof(pmap_ppl_disable
));
4681 #if CONFIG_CSR_FROM_DT
4682 if (csr_unsafe_kernel_text
) {
4683 pmap_ppl_disable
= true;
4685 #endif /* CONFIG_CSR_FROM_DT */
4687 #if __APRR_SUPPORTED__
4688 if (((uintptr_t)(&ppl_trampoline_start
)) % PAGE_SIZE
) {
4689 panic("%s: ppl_trampoline_start is not page aligned, "
4695 if (((uintptr_t)(&ppl_trampoline_end
)) % PAGE_SIZE
) {
4696 panic("%s: ppl_trampoline_end is not page aligned, "
4701 #endif /* __APRR_SUPPORTED__ */
4702 #endif /* XNU_MONITOR */
4704 #if DEVELOPMENT || DEBUG
4705 if (PE_parse_boot_argn("pmap_trace", &pmap_trace_mask
, sizeof(pmap_trace_mask
))) {
4706 kprintf("Kernel traces for pmap operations enabled\n");
4711 * Initialize the kernel pmap.
4714 #if ARM_PARAMETERIZED_PMAP
4715 kernel_pmap
->pmap_pt_attr
= native_pt_attr
;
4716 #endif /* ARM_PARAMETERIZED_PMAP */
4718 kernel_pmap
->disable_jop
= 0;
4719 #endif /* HAS_APPLE_PAC */
4720 kernel_pmap
->tte
= cpu_tte
;
4721 kernel_pmap
->ttep
= cpu_ttep
;
4722 #if (__ARM_VMSA__ > 7)
4723 kernel_pmap
->min
= ARM64_TTBR1_MIN_ADDR
;
4725 kernel_pmap
->min
= VM_MIN_KERNEL_AND_KEXT_ADDRESS
;
4727 kernel_pmap
->max
= VM_MAX_KERNEL_ADDRESS
;
4728 os_atomic_init(&kernel_pmap
->ref_count
, 1);
4729 kernel_pmap
->gc_status
= 0;
4730 kernel_pmap
->nx_enabled
= TRUE
;
4732 kernel_pmap
->is_64bit
= TRUE
;
4734 kernel_pmap
->is_64bit
= FALSE
;
4736 kernel_pmap
->stamp
= os_atomic_inc(&pmap_stamp
, relaxed
);
4738 #if ARM_PARAMETERIZED_PMAP
4739 kernel_pmap
->pmap_pt_attr
= native_pt_attr
;
4740 #endif /* ARM_PARAMETERIZED_PMAP */
4742 kernel_pmap
->nested_region_addr
= 0x0ULL
;
4743 kernel_pmap
->nested_region_size
= 0x0ULL
;
4744 kernel_pmap
->nested_region_asid_bitmap
= NULL
;
4745 kernel_pmap
->nested_region_asid_bitmap_size
= 0x0UL
;
4747 #if (__ARM_VMSA__ == 7)
4748 kernel_pmap
->tte_index_max
= 4 * NTTES
;
4750 kernel_pmap
->hw_asid
= 0;
4751 kernel_pmap
->sw_asid
= 0;
4753 pmap_lock_init(kernel_pmap
);
4754 memset((void *) &kernel_pmap
->stats
, 0, sizeof(kernel_pmap
->stats
));
4756 /* allocate space for and initialize the bookkeeping structures */
4757 io_attr_table_size
= pmap_compute_io_rgns();
4758 npages
= (unsigned int)atop(mem_size
);
4759 pp_attr_table_size
= npages
* sizeof(pp_attr_t
);
4760 pv_head_size
= round_page(sizeof(pv_entry_t
*) * npages
);
4761 // allocate enough initial PTDs to map twice the available physical memory
4762 ptd_root_table_size
= sizeof(pt_desc_t
) * (mem_size
/ ((PAGE_SIZE
/ sizeof(pt_entry_t
)) * ARM_PGBYTES
)) * 2;
4763 pmap_max_asids
= pmap_compute_max_asids();
4764 pmap_asid_plru
= (pmap_max_asids
> MAX_HW_ASIDS
);
4765 PE_parse_boot_argn("pmap_asid_plru", &pmap_asid_plru
, sizeof(pmap_asid_plru
));
4766 /* Align the range of available hardware ASIDs to a multiple of 64 to enable the
4767 * masking used by the PLRU scheme. This means we must handle the case in which
4768 * the returned hardware ASID is MAX_HW_ASIDS, which we do in alloc_asid() and free_asid(). */
4769 _Static_assert(sizeof(asid_plru_bitmap
[0] == sizeof(uint64_t)), "bitmap_t is not a 64-bit integer");
4770 _Static_assert(((MAX_HW_ASIDS
+ 1) % 64) == 0, "MAX_HW_ASIDS + 1 is not divisible by 64");
4771 asid_chunk_size
= (pmap_asid_plru
? (MAX_HW_ASIDS
+ 1) : MAX_HW_ASIDS
);
4773 asid_table_size
= sizeof(*asid_bitmap
) * BITMAP_LEN(pmap_max_asids
);
4775 pmap_compute_pv_targets();
4777 pmap_struct_start
= avail_start
;
4779 pp_attr_table
= (pp_attr_t
*) phystokv(avail_start
);
4780 avail_start
= PMAP_ALIGN(avail_start
+ pp_attr_table_size
, __alignof(pp_attr_t
));
4781 io_attr_table
= (pmap_io_range_t
*) phystokv(avail_start
);
4782 avail_start
= PMAP_ALIGN(avail_start
+ io_attr_table_size
, __alignof(pv_entry_t
*));
4783 pv_head_table
= (pv_entry_t
**) phystokv(avail_start
);
4784 avail_start
= PMAP_ALIGN(avail_start
+ pv_head_size
, __alignof(pt_desc_t
));
4785 ptd_root_table
= (pt_desc_t
*)phystokv(avail_start
);
4786 avail_start
= PMAP_ALIGN(avail_start
+ ptd_root_table_size
, __alignof(bitmap_t
));
4787 asid_bitmap
= (bitmap_t
*)phystokv(avail_start
);
4788 avail_start
= round_page(avail_start
+ asid_table_size
);
4790 memset((char *)phystokv(pmap_struct_start
), 0, avail_start
- pmap_struct_start
);
4792 pmap_load_io_rgns();
4793 ptd_bootstrap(ptd_root_table
, (unsigned int)(ptd_root_table_size
/ sizeof(pt_desc_t
)));
4796 pmap_array_begin
= (void *)phystokv(avail_start
);
4797 pmap_array
= pmap_array_begin
;
4798 avail_start
+= round_page(PMAP_ARRAY_SIZE
* sizeof(struct pmap
));
4799 pmap_array_end
= (void *)phystokv(avail_start
);
4801 pmap_array_count
= ((pmap_array_end
- pmap_array_begin
) / sizeof(struct pmap
));
4803 pmap_bootstrap_pmap_free_list();
4805 pmap_ledger_ptr_array_begin
= (void *)phystokv(avail_start
);
4806 pmap_ledger_ptr_array
= pmap_ledger_ptr_array_begin
;
4807 avail_start
+= round_page(MAX_PMAP_LEDGERS
* sizeof(void*));
4808 pmap_ledger_ptr_array_end
= (void *)phystokv(avail_start
);
4810 pmap_ledger_refcnt_begin
= (void *)phystokv(avail_start
);
4811 pmap_ledger_refcnt
= pmap_ledger_refcnt_begin
;
4812 avail_start
+= round_page(MAX_PMAP_LEDGERS
* sizeof(os_refcnt_t
));
4813 pmap_ledger_refcnt_end
= (void *)phystokv(avail_start
);
4815 pmap_cpu_data_array_init();
4817 vm_first_phys
= gPhysBase
;
4818 vm_last_phys
= trunc_page(avail_end
);
4820 queue_init(&map_pmap_list
);
4821 queue_enter(&map_pmap_list
, kernel_pmap
, pmap_t
, pmaps
);
4822 free_page_size_tt_list
= TT_FREE_ENTRY_NULL
;
4823 free_page_size_tt_count
= 0;
4824 free_page_size_tt_max
= 0;
4825 free_two_page_size_tt_list
= TT_FREE_ENTRY_NULL
;
4826 free_two_page_size_tt_count
= 0;
4827 free_two_page_size_tt_max
= 0;
4828 free_tt_list
= TT_FREE_ENTRY_NULL
;
4832 queue_init(&pt_page_list
);
4834 pmap_pages_request_count
= 0;
4835 pmap_pages_request_acum
= 0;
4836 pmap_pages_reclaim_list
= PAGE_FREE_ENTRY_NULL
;
4838 virtual_space_start
= vstart
;
4839 virtual_space_end
= VM_MAX_KERNEL_ADDRESS
;
4841 bitmap_full(&asid_bitmap
[0], pmap_max_asids
);
4842 bitmap_full(&asid_plru_bitmap
[0], MAX_HW_ASIDS
);
4843 // Clear the highest-order bit, which corresponds to MAX_HW_ASIDS + 1
4844 asid_plru_bitmap
[MAX_HW_ASIDS
>> 6] = ~(1ULL << 63);
4848 if (PE_parse_boot_argn("arm_maxoffset", &maxoffset
, sizeof(maxoffset
))) {
4849 maxoffset
= trunc_page(maxoffset
);
4850 if ((maxoffset
>= pmap_max_offset(FALSE
, ARM_PMAP_MAX_OFFSET_MIN
))
4851 && (maxoffset
<= pmap_max_offset(FALSE
, ARM_PMAP_MAX_OFFSET_MAX
))) {
4852 arm_pmap_max_offset_default
= maxoffset
;
4855 #if defined(__arm64__)
4856 if (PE_parse_boot_argn("arm64_maxoffset", &maxoffset
, sizeof(maxoffset
))) {
4857 maxoffset
= trunc_page(maxoffset
);
4858 if ((maxoffset
>= pmap_max_offset(TRUE
, ARM_PMAP_MAX_OFFSET_MIN
))
4859 && (maxoffset
<= pmap_max_offset(TRUE
, ARM_PMAP_MAX_OFFSET_MAX
))) {
4860 arm64_pmap_max_offset_default
= maxoffset
;
4865 PE_parse_boot_argn("pmap_panic_dev_wimg_on_managed", &pmap_panic_dev_wimg_on_managed
, sizeof(pmap_panic_dev_wimg_on_managed
));
4869 PE_parse_boot_argn("pmap_stats_assert",
4871 sizeof(pmap_stats_assert
));
4872 PE_parse_boot_argn("vm_footprint_suspend_allowed",
4873 &vm_footprint_suspend_allowed
,
4874 sizeof(vm_footprint_suspend_allowed
));
4875 #endif /* MACH_ASSERT */
4878 /* Shadow the CPU copy windows, as they fall outside of the physical aperture */
4879 kasan_map_shadow(CPUWINDOWS_BASE
, CPUWINDOWS_TOP
- CPUWINDOWS_BASE
, true);
4886 pa_set_range_monitor(pmap_paddr_t start_pa
, pmap_paddr_t end_pa
)
4888 pmap_paddr_t cur_pa
;
4889 for (cur_pa
= start_pa
; cur_pa
< end_pa
; cur_pa
+= ARM_PGBYTES
) {
4890 assert(pa_valid(cur_pa
));
4891 pa_set_monitor(cur_pa
);
4896 pa_set_range_xprr_perm(pmap_paddr_t start_pa
,
4897 pmap_paddr_t end_pa
,
4898 unsigned int expected_perm
,
4899 unsigned int new_perm
)
4901 vm_offset_t start_va
= phystokv(start_pa
);
4902 vm_offset_t end_va
= start_va
+ (end_pa
- start_pa
);
4904 pa_set_range_monitor(start_pa
, end_pa
);
4905 pmap_set_range_xprr_perm(start_va
, end_va
, expected_perm
, new_perm
);
4909 pmap_lockdown_kc(void)
4911 extern vm_offset_t vm_kernelcache_base
;
4912 extern vm_offset_t vm_kernelcache_top
;
4913 pmap_paddr_t start_pa
= kvtophys(vm_kernelcache_base
);
4914 pmap_paddr_t end_pa
= start_pa
+ (vm_kernelcache_top
- vm_kernelcache_base
);
4915 pmap_paddr_t cur_pa
= start_pa
;
4916 vm_offset_t cur_va
= vm_kernelcache_base
;
4917 while (cur_pa
< end_pa
) {
4918 vm_size_t range_size
= end_pa
- cur_pa
;
4919 vm_offset_t ptov_va
= phystokv_range(cur_pa
, &range_size
);
4920 if (ptov_va
!= cur_va
) {
4922 * If the physical address maps back to a virtual address that is non-linear
4923 * w.r.t. the kernelcache, that means it corresponds to memory that will be
4924 * reclaimed by the OS and should therefore not be locked down.
4926 cur_pa
+= range_size
;
4927 cur_va
+= range_size
;
4930 unsigned int pai
= (unsigned int)pa_index(cur_pa
);
4931 pv_entry_t
**pv_h
= pai_to_pvh(pai
);
4933 vm_offset_t pvh_flags
= pvh_get_flags(pv_h
);
4935 if (__improbable(pvh_flags
& PVH_FLAG_LOCKDOWN
)) {
4936 panic("pai %d already locked down", pai
);
4938 pvh_set_flags(pv_h
, pvh_flags
| PVH_FLAG_LOCKDOWN
);
4939 cur_pa
+= ARM_PGBYTES
;
4940 cur_va
+= ARM_PGBYTES
;
4942 #if defined(KERNEL_INTEGRITY_CTRR) && defined(CONFIG_XNUPOST)
4943 extern uint64_t ctrr_ro_test
;
4944 extern uint64_t ctrr_nx_test
;
4945 pmap_paddr_t exclude_pages
[] = {kvtophys((vm_offset_t
)&ctrr_ro_test
), kvtophys((vm_offset_t
)&ctrr_nx_test
)};
4946 for (unsigned i
= 0; i
< (sizeof(exclude_pages
) / sizeof(exclude_pages
[0])); ++i
) {
4947 pv_entry_t
**pv_h
= pai_to_pvh(pa_index(exclude_pages
[i
]));
4948 pvh_set_flags(pv_h
, pvh_get_flags(pv_h
) & ~PVH_FLAG_LOCKDOWN
);
4954 pmap_static_allocations_done(void)
4956 pmap_paddr_t monitor_start_pa
;
4957 pmap_paddr_t monitor_end_pa
;
4960 * Protect the bootstrap (V=P and V->P) page tables.
4962 * These bootstrap allocations will be used primarily for page tables.
4963 * If we wish to secure the page tables, we need to start by marking
4964 * these bootstrap allocations as pages that we want to protect.
4966 monitor_start_pa
= kvtophys((vm_offset_t
)&bootstrap_pagetables
);
4967 monitor_end_pa
= monitor_start_pa
+ BOOTSTRAP_TABLE_SIZE
;
4969 /* The bootstrap page tables are mapped RW at boostrap. */
4970 pa_set_range_xprr_perm(monitor_start_pa
, monitor_end_pa
, XPRR_KERN_RW_PERM
, XPRR_KERN_RO_PERM
);
4973 * We use avail_start as a pointer to the first address that has not
4974 * been reserved for bootstrap, so we know which pages to give to the
4975 * virtual memory layer.
4977 monitor_start_pa
= BootArgs
->topOfKernelData
;
4978 monitor_end_pa
= avail_start
;
4980 /* The other bootstrap allocations are mapped RW at bootstrap. */
4981 pa_set_range_xprr_perm(monitor_start_pa
, monitor_end_pa
, XPRR_KERN_RW_PERM
, XPRR_PPL_RW_PERM
);
4984 * The RO page tables are mapped RW in arm_vm_init() and later restricted
4985 * to RO in arm_vm_prot_finalize(), which is called after this function.
4986 * Here we only need to mark the underlying physical pages as PPL-owned to ensure
4987 * they can't be allocated for other uses. We don't need a special xPRR
4988 * protection index, as there is no PPL_RO index, and these pages are ultimately
4989 * protected by KTRR/CTRR. Furthermore, use of PPL_RW for these pages would
4990 * expose us to a functional issue on H11 devices where CTRR shifts the APRR
4991 * lookup table index to USER_XO before APRR is applied, leading the hardware
4992 * to believe we are dealing with an user XO page upon performing a translation.
4994 monitor_start_pa
= kvtophys((vm_offset_t
)&ropagetable_begin
);
4995 monitor_end_pa
= monitor_start_pa
+ ((vm_offset_t
)&ropagetable_end
- (vm_offset_t
)&ropagetable_begin
);
4996 pa_set_range_monitor(monitor_start_pa
, monitor_end_pa
);
4998 monitor_start_pa
= kvtophys(segPPLDATAB
);
4999 monitor_end_pa
= monitor_start_pa
+ segSizePPLDATA
;
5001 /* PPL data is RW for the PPL, RO for the kernel. */
5002 pa_set_range_xprr_perm(monitor_start_pa
, monitor_end_pa
, XPRR_KERN_RW_PERM
, XPRR_PPL_RW_PERM
);
5004 monitor_start_pa
= kvtophys(segPPLTEXTB
);
5005 monitor_end_pa
= monitor_start_pa
+ segSizePPLTEXT
;
5007 /* PPL text is RX for the PPL, RO for the kernel. */
5008 pa_set_range_xprr_perm(monitor_start_pa
, monitor_end_pa
, XPRR_KERN_RX_PERM
, XPRR_PPL_RX_PERM
);
5010 #if __APRR_SUPPORTED__
5011 monitor_start_pa
= kvtophys(segPPLTRAMPB
);
5012 monitor_end_pa
= monitor_start_pa
+ segSizePPLTRAMP
;
5015 * The PPLTRAMP pages will be a mix of PPL RX/kernel RO and
5016 * PPL RX/kernel RX. However, all of these pages belong to the PPL.
5018 pa_set_range_monitor(monitor_start_pa
, monitor_end_pa
);
5022 * In order to support DTrace, the save areas for the PPL must be
5023 * writable. This is due to the fact that DTrace will try to update
5026 if (pmap_ppl_disable
) {
5027 vm_offset_t monitor_start_va
= phystokv(ppl_cpu_save_area_start
);
5028 vm_offset_t monitor_end_va
= monitor_start_va
+ (ppl_cpu_save_area_end
- ppl_cpu_save_area_start
);
5030 pmap_set_range_xprr_perm(monitor_start_va
, monitor_end_va
, XPRR_PPL_RW_PERM
, XPRR_KERN_RW_PERM
);
5033 #if __APRR_SUPPORTED__
5034 /* The trampoline must also be specially protected. */
5035 pmap_set_range_xprr_perm((vm_offset_t
)&ppl_trampoline_start
, (vm_offset_t
)&ppl_trampoline_end
, XPRR_KERN_RX_PERM
, XPRR_PPL_RX_PERM
);
5038 if (segSizePPLDATACONST
> 0) {
5039 monitor_start_pa
= kvtophys(segPPLDATACONSTB
);
5040 monitor_end_pa
= monitor_start_pa
+ segSizePPLDATACONST
;
5042 pa_set_range_xprr_perm(monitor_start_pa
, monitor_end_pa
, XPRR_KERN_RO_PERM
, XPRR_KERN_RO_PERM
);
5046 * Mark the original physical aperture mapping for the PPL stack pages RO as an additional security
5047 * precaution. The real RW mappings are at a different location with guard pages.
5049 pa_set_range_xprr_perm(pmap_stacks_start_pa
, pmap_stacks_end_pa
, XPRR_PPL_RW_PERM
, XPRR_KERN_RO_PERM
);
5051 /* Prevent remapping of the kernelcache */
5057 pmap_lockdown_ppl(void)
5059 /* Mark the PPL as being locked down. */
5061 #if __APRR_SUPPORTED__
5062 pmap_ppl_locked_down
= TRUE
;
5063 /* Force a trap into to the PPL to update APRR_EL1. */
5064 pmap_return(FALSE
, FALSE
);
5066 #error "XPRR configuration error"
5067 #endif /* __APRR_SUPPORTED__ */
5070 #endif /* XNU_MONITOR */
5074 vm_offset_t
*startp
,
5078 *startp
= virtual_space_start
;
5079 *endp
= virtual_space_end
;
5084 pmap_virtual_region(
5085 unsigned int region_select
,
5086 vm_map_offset_t
*startp
,
5090 boolean_t ret
= FALSE
;
5091 #if defined(KERNEL_INTEGRITY_KTRR) || defined(KERNEL_INTEGRITY_CTRR)
5092 if (region_select
== 0) {
5094 * In this config, the bootstrap mappings should occupy their own L2
5095 * TTs, as they should be immutable after boot. Having the associated
5096 * TTEs and PTEs in their own pages allows us to lock down those pages,
5097 * while allowing the rest of the kernel address range to be remapped.
5099 #if (__ARM_VMSA__ > 7)
5100 *startp
= LOW_GLOBAL_BASE_ADDRESS
& ~ARM_TT_L2_OFFMASK
;
5102 #error Unsupported configuration
5104 #if defined(ARM_LARGE_MEMORY)
5105 *size
= ((KERNEL_PMAP_HEAP_RANGE_START
- *startp
) & ~PAGE_MASK
);
5107 *size
= ((VM_MAX_KERNEL_ADDRESS
- *startp
) & ~PAGE_MASK
);
5112 #if (__ARM_VMSA__ > 7)
5113 unsigned long low_global_vr_mask
= 0;
5114 vm_map_size_t low_global_vr_size
= 0;
5117 if (region_select
== 0) {
5118 #if (__ARM_VMSA__ == 7)
5119 *startp
= gVirtBase
& 0xFFC00000;
5120 *size
= ((virtual_space_start
- (gVirtBase
& 0xFFC00000)) + ~0xFFC00000) & 0xFFC00000;
5122 /* Round to avoid overlapping with the V=P area; round to at least the L2 block size. */
5123 if (!TEST_PAGE_SIZE_4K
) {
5124 *startp
= gVirtBase
& 0xFFFFFFFFFE000000;
5125 *size
= ((virtual_space_start
- (gVirtBase
& 0xFFFFFFFFFE000000)) + ~0xFFFFFFFFFE000000) & 0xFFFFFFFFFE000000;
5127 *startp
= gVirtBase
& 0xFFFFFFFFFF800000;
5128 *size
= ((virtual_space_start
- (gVirtBase
& 0xFFFFFFFFFF800000)) + ~0xFFFFFFFFFF800000) & 0xFFFFFFFFFF800000;
5133 if (region_select
== 1) {
5134 *startp
= VREGION1_START
;
5135 *size
= VREGION1_SIZE
;
5138 #if (__ARM_VMSA__ > 7)
5139 /* We need to reserve a range that is at least the size of an L2 block mapping for the low globals */
5140 if (!TEST_PAGE_SIZE_4K
) {
5141 low_global_vr_mask
= 0xFFFFFFFFFE000000;
5142 low_global_vr_size
= 0x2000000;
5144 low_global_vr_mask
= 0xFFFFFFFFFF800000;
5145 low_global_vr_size
= 0x800000;
5148 if (((gVirtBase
& low_global_vr_mask
) != LOW_GLOBAL_BASE_ADDRESS
) && (region_select
== 2)) {
5149 *startp
= LOW_GLOBAL_BASE_ADDRESS
;
5150 *size
= low_global_vr_size
;
5154 if (region_select
== 3) {
5155 /* In this config, we allow the bootstrap mappings to occupy the same
5156 * page table pages as the heap.
5158 *startp
= VM_MIN_KERNEL_ADDRESS
;
5159 *size
= LOW_GLOBAL_BASE_ADDRESS
- *startp
;
5171 return (unsigned int)atop(avail_end
- first_avail
);
5178 __unused boolean_t might_free
)
5180 return pmap_next_page(pnum
);
5188 if (first_avail
!= avail_end
) {
5189 *pnum
= (ppnum_t
)atop(first_avail
);
5190 first_avail
+= PAGE_SIZE
;
5198 * Initialize the pmap module.
5199 * Called by vm_init, to initialize any structures that the pmap
5200 * system needs to map virtual memory.
5207 * Protect page zero in the kernel map.
5208 * (can be overruled by permanent transltion
5209 * table entries at page zero - see arm_vm_init).
5211 vm_protect(kernel_map
, 0, PAGE_SIZE
, TRUE
, VM_PROT_NONE
);
5213 pmap_initialized
= TRUE
;
5216 * Create the zone of physical maps
5217 * and the physical-to-virtual entries.
5219 pmap_zone
= zone_create_ext("pmap", sizeof(struct pmap
),
5220 ZC_ZFREE_CLEARMEM
, ZONE_ID_PMAP
, NULL
);
5224 * Initialize the pmap object (for tracking the vm_page_t
5225 * structures for pages we allocate to be page tables in
5228 _vm_object_allocate(mem_size
, pmap_object
);
5229 pmap_object
->copy_strategy
= MEMORY_OBJECT_COPY_NONE
;
5232 * The values of [hard_]maxproc may have been scaled, make sure
5233 * they are still less than the value of pmap_max_asids.
5235 if ((uint32_t)maxproc
> pmap_max_asids
) {
5236 maxproc
= pmap_max_asids
;
5238 if ((uint32_t)hard_maxproc
> pmap_max_asids
) {
5239 hard_maxproc
= pmap_max_asids
;
5243 pmap_pgtrace_init();
5253 pmap_paddr_t phys
= ptoa(ppnum
);
5255 assert(phys
!= vm_page_fictitious_addr
);
5257 if (!pa_valid(phys
)) {
5261 pai
= (int)pa_index(phys
);
5262 pv_h
= pai_to_pvh(pai
);
5264 return pvh_test_type(pv_h
, PVH_TYPE_NULL
);
5269 pmap_assert_free(ppnum_t ppnum
)
5271 assertf(pmap_verify_free(ppnum
), "page = 0x%x", ppnum
);
5278 MARK_AS_PMAP_TEXT
static void
5279 pmap_ledger_alloc_init_internal(size_t size
)
5281 pmap_simple_lock(&pmap_ledger_lock
);
5283 if (pmap_ledger_alloc_initialized
) {
5284 panic("%s: already initialized, "
5290 if ((size
> sizeof(pmap_ledger_data_t
)) ||
5291 ((sizeof(pmap_ledger_data_t
) - size
) % sizeof(struct ledger_entry
))) {
5292 panic("%s: size mismatch, expected %lu, "
5294 __func__
, PMAP_LEDGER_DATA_BYTES
,
5298 pmap_ledger_alloc_initialized
= true;
5300 pmap_simple_unlock(&pmap_ledger_lock
);
5303 MARK_AS_PMAP_TEXT
static ledger_t
5304 pmap_ledger_alloc_internal(void)
5307 uint64_t vaddr
, vstart
, vend
;
5310 ledger_t new_ledger
;
5311 uint64_t array_index
;
5313 pmap_simple_lock(&pmap_ledger_lock
);
5314 if (pmap_ledger_free_list
== NULL
) {
5315 paddr
= pmap_get_free_ppl_page();
5318 pmap_simple_unlock(&pmap_ledger_lock
);
5322 vstart
= phystokv(paddr
);
5323 vend
= vstart
+ PAGE_SIZE
;
5325 for (vaddr
= vstart
; (vaddr
< vend
) && ((vaddr
+ sizeof(pmap_ledger_t
)) <= vend
); vaddr
+= sizeof(pmap_ledger_t
)) {
5326 pmap_ledger_t
*free_ledger
;
5328 index
= pmap_ledger_ptr_array_free_index
++;
5330 if (index
>= MAX_PMAP_LEDGERS
) {
5331 panic("%s: pmap_ledger_ptr_array is full, index=%llu",
5335 free_ledger
= (pmap_ledger_t
*)vaddr
;
5337 pmap_ledger_ptr_array
[index
] = free_ledger
;
5338 free_ledger
->back_ptr
= &pmap_ledger_ptr_array
[index
];
5340 free_ledger
->next
= pmap_ledger_free_list
;
5341 pmap_ledger_free_list
= free_ledger
;
5344 pa_set_range_xprr_perm(paddr
, paddr
+ PAGE_SIZE
, XPRR_PPL_RW_PERM
, XPRR_KERN_RW_PERM
);
5347 new_ledger
= (ledger_t
)pmap_ledger_free_list
;
5348 pmap_ledger_free_list
= pmap_ledger_free_list
->next
;
5350 array_index
= pmap_ledger_validate(new_ledger
);
5351 os_ref_init(&pmap_ledger_refcnt
[array_index
], NULL
);
5353 pmap_simple_unlock(&pmap_ledger_lock
);
5358 MARK_AS_PMAP_TEXT
static void
5359 pmap_ledger_free_internal(ledger_t ledger
)
5361 pmap_ledger_t
* free_ledger
;
5363 free_ledger
= (pmap_ledger_t
*)ledger
;
5365 pmap_simple_lock(&pmap_ledger_lock
);
5366 uint64_t array_index
= pmap_ledger_validate(ledger
);
5368 if (os_ref_release(&pmap_ledger_refcnt
[array_index
]) != 0) {
5369 panic("%s: ledger still referenced, "
5375 free_ledger
->next
= pmap_ledger_free_list
;
5376 pmap_ledger_free_list
= free_ledger
;
5377 pmap_simple_unlock(&pmap_ledger_lock
);
5382 pmap_ledger_retain(ledger_t ledger
)
5384 pmap_simple_lock(&pmap_ledger_lock
);
5385 uint64_t array_index
= pmap_ledger_validate(ledger
);
5386 os_ref_retain(&pmap_ledger_refcnt
[array_index
]);
5387 pmap_simple_unlock(&pmap_ledger_lock
);
5391 pmap_ledger_release(ledger_t ledger
)
5393 pmap_simple_lock(&pmap_ledger_lock
);
5394 uint64_t array_index
= pmap_ledger_validate(ledger
);
5395 os_ref_release_live(&pmap_ledger_refcnt
[array_index
]);
5396 pmap_simple_unlock(&pmap_ledger_lock
);
5400 pmap_ledger_alloc_init(size_t size
)
5402 pmap_ledger_alloc_init_ppl(size
);
5406 pmap_ledger_alloc(void)
5408 ledger_t retval
= NULL
;
5410 while ((retval
= pmap_ledger_alloc_ppl()) == NULL
) {
5411 pmap_alloc_page_for_ppl(0);
5418 pmap_ledger_free(ledger_t ledger
)
5420 pmap_ledger_free_ppl(ledger
);
5422 #else /* XNU_MONITOR */
5425 pmap_ledger_alloc_init(size_t size
)
5427 panic("%s: unsupported, "
5434 pmap_ledger_alloc(void)
5436 panic("%s: unsupported",
5442 pmap_ledger_free(ledger_t ledger
)
5444 panic("%s: unsupported, "
5448 #endif /* XNU_MONITOR */
5451 pmap_root_alloc_size(pmap_t pmap
)
5453 #if (__ARM_VMSA__ > 7)
5454 #pragma unused(pmap)
5455 const pt_attr_t
* const pt_attr
= pmap_get_pt_attr(pmap
);
5456 unsigned int root_level
= pt_attr_root_level(pt_attr
);
5457 return ((pt_attr_ln_index_mask(pt_attr
, root_level
) >> pt_attr_ln_shift(pt_attr
, root_level
)) + 1) * sizeof(tt_entry_t
);
5460 return PMAP_ROOT_ALLOC_SIZE
;
5465 * Create and return a physical map.
5467 * If the size specified for the map
5468 * is zero, the map is an actual physical
5469 * map, and may be referenced by the
5472 * If the size specified is non-zero,
5473 * the map will be used in software only, and
5474 * is bounded by that size.
5476 MARK_AS_PMAP_TEXT
static pmap_t
5477 pmap_create_options_internal(
5484 unsigned tte_index_max
;
5486 bool is_64bit
= flags
& PMAP_CREATE_64BIT
;
5487 #if defined(HAS_APPLE_PAC)
5488 bool disable_jop
= flags
& PMAP_CREATE_DISABLE_JOP
;
5489 #endif /* defined(HAS_APPLE_PAC) */
5490 kern_return_t local_kr
= KERN_SUCCESS
;
5493 * A software use-only map doesn't even need a pmap.
5499 if (0 != (flags
& ~PMAP_CREATE_KNOWN_FLAGS
)) {
5504 if ((p
= pmap_alloc_pmap()) == PMAP_NULL
) {
5505 local_kr
= KERN_NO_SPACE
;
5506 goto pmap_create_fail
;
5510 pmap_ledger_validate(ledger
);
5511 pmap_ledger_retain(ledger
);
5515 * Allocate a pmap struct from the pmap_zone. Then allocate
5516 * the translation table of the right size for the pmap.
5518 if ((p
= (pmap_t
) zalloc(pmap_zone
)) == PMAP_NULL
) {
5519 local_kr
= KERN_RESOURCE_SHORTAGE
;
5520 goto pmap_create_fail
;
5527 p
->pmap_vm_map_cs_enforced
= false;
5529 if (flags
& PMAP_CREATE_64BIT
) {
5530 p
->min
= MACH_VM_MIN_ADDRESS
;
5531 p
->max
= MACH_VM_MAX_ADDRESS
;
5533 p
->min
= VM_MIN_ADDRESS
;
5534 p
->max
= VM_MAX_ADDRESS
;
5536 #if defined(HAS_APPLE_PAC)
5537 p
->disable_jop
= disable_jop
;
5538 #endif /* defined(HAS_APPLE_PAC) */
5540 p
->nested_region_true_start
= 0;
5541 p
->nested_region_true_end
= ~0;
5543 os_atomic_init(&p
->ref_count
, 1);
5545 p
->stamp
= os_atomic_inc(&pmap_stamp
, relaxed
);
5546 p
->nx_enabled
= TRUE
;
5547 p
->is_64bit
= is_64bit
;
5549 p
->nested_pmap
= PMAP_NULL
;
5551 #if ARM_PARAMETERIZED_PMAP
5552 /* Default to the native pt_attr */
5553 p
->pmap_pt_attr
= native_pt_attr
;
5554 #endif /* ARM_PARAMETERIZED_PMAP */
5555 #if __ARM_MIXED_PAGE_SIZE__
5556 if (flags
& PMAP_CREATE_FORCE_4K_PAGES
) {
5557 p
->pmap_pt_attr
= &pmap_pt_attr_4k
;
5559 #endif /* __ARM_MIXED_PAGE_SIZE__ */
5561 if (!pmap_get_pt_ops(p
)->alloc_id(p
)) {
5562 local_kr
= KERN_NO_SPACE
;
5567 memset((void *) &p
->stats
, 0, sizeof(p
->stats
));
5569 p
->tt_entry_free
= (tt_entry_t
*)0;
5570 tte_index_max
= ((unsigned)pmap_root_alloc_size(p
) / sizeof(tt_entry_t
));
5572 #if (__ARM_VMSA__ == 7)
5573 p
->tte_index_max
= tte_index_max
;
5577 p
->tte
= pmap_tt1_allocate(p
, pmap_root_alloc_size(p
), PMAP_TT_ALLOCATE_NOWAIT
);
5579 p
->tte
= pmap_tt1_allocate(p
, pmap_root_alloc_size(p
), 0);
5582 local_kr
= KERN_RESOURCE_SHORTAGE
;
5583 goto tt1_alloc_fail
;
5586 p
->ttep
= ml_static_vtop((vm_offset_t
)p
->tte
);
5587 PMAP_TRACE(4, PMAP_CODE(PMAP__TTE
), VM_KERNEL_ADDRHIDE(p
), VM_KERNEL_ADDRHIDE(p
->min
), VM_KERNEL_ADDRHIDE(p
->max
), p
->ttep
);
5589 /* nullify the translation table */
5590 for (i
= 0; i
< tte_index_max
; i
++) {
5591 p
->tte
[i
] = ARM_TTE_TYPE_FAULT
;
5594 FLUSH_PTE_RANGE(p
->tte
, p
->tte
+ tte_index_max
);
5597 * initialize the rest of the structure
5599 p
->nested_region_addr
= 0x0ULL
;
5600 p
->nested_region_size
= 0x0ULL
;
5601 p
->nested_region_asid_bitmap
= NULL
;
5602 p
->nested_region_asid_bitmap_size
= 0x0UL
;
5604 p
->nested_has_no_bounds_ref
= false;
5605 p
->nested_no_bounds_refcnt
= 0;
5606 p
->nested_bounds_set
= false;
5610 p
->pmap_stats_assert
= TRUE
;
5612 strlcpy(p
->pmap_procname
, "<nil>", sizeof(p
->pmap_procname
));
5613 #endif /* MACH_ASSERT */
5614 #if DEVELOPMENT || DEBUG
5615 p
->footprint_was_suspended
= FALSE
;
5616 #endif /* DEVELOPMENT || DEBUG */
5618 pmap_simple_lock(&pmaps_lock
);
5619 queue_enter(&map_pmap_list
, p
, pmap_t
, pmaps
);
5620 pmap_simple_unlock(&pmaps_lock
);
5625 pmap_get_pt_ops(p
)->free_id(p
);
5631 pmap_ledger_release(ledger
);
5634 zfree(pmap_zone
, p
);
5638 pmap_pin_kernel_pages((vm_offset_t
)kr
, sizeof(*kr
));
5642 pmap_unpin_kernel_pages((vm_offset_t
)kr
, sizeof(*kr
));
5648 pmap_create_options(
5654 kern_return_t kr
= KERN_SUCCESS
;
5656 PMAP_TRACE(1, PMAP_CODE(PMAP__CREATE
) | DBG_FUNC_START
, size
, flags
);
5658 ledger_reference(ledger
);
5662 pmap
= pmap_create_options_ppl(ledger
, size
, flags
, &kr
);
5663 if (kr
!= KERN_RESOURCE_SHORTAGE
) {
5666 assert(pmap
== PMAP_NULL
);
5667 pmap_alloc_page_for_ppl(0);
5671 pmap
= pmap_create_options_internal(ledger
, size
, flags
, &kr
);
5674 if (pmap
== PMAP_NULL
) {
5675 ledger_dereference(ledger
);
5678 PMAP_TRACE(1, PMAP_CODE(PMAP__CREATE
) | DBG_FUNC_END
, VM_KERNEL_ADDRHIDE(pmap
), PMAP_VASID(pmap
), pmap
->hw_asid
);
5685 * This symbol remains in place when the PPL is enabled so that the dispatch
5686 * table does not change from development to release configurations.
5689 #if MACH_ASSERT || XNU_MONITOR
5690 MARK_AS_PMAP_TEXT
static void
5691 pmap_set_process_internal(
5692 __unused pmap_t pmap
,
5694 __unused
char *procname
)
5701 VALIDATE_PMAP(pmap
);
5703 pmap
->pmap_pid
= pid
;
5704 strlcpy(pmap
->pmap_procname
, procname
, sizeof(pmap
->pmap_procname
));
5705 if (pmap_ledgers_panic_leeway
) {
5708 * Some processes somehow trigger some issues that make
5709 * the pmap stats and ledgers go off track, causing
5710 * some assertion failures and ledger panics.
5711 * Turn off the sanity checks if we allow some ledger leeway
5712 * because of that. We'll still do a final check in
5713 * pmap_check_ledgers() for discrepancies larger than the
5714 * allowed leeway after the address space has been fully
5717 pmap
->pmap_stats_assert
= FALSE
;
5718 ledger_disable_panic_on_negative(pmap
->ledger
,
5719 task_ledgers
.phys_footprint
);
5720 ledger_disable_panic_on_negative(pmap
->ledger
,
5721 task_ledgers
.internal
);
5722 ledger_disable_panic_on_negative(pmap
->ledger
,
5723 task_ledgers
.internal_compressed
);
5724 ledger_disable_panic_on_negative(pmap
->ledger
,
5725 task_ledgers
.iokit_mapped
);
5726 ledger_disable_panic_on_negative(pmap
->ledger
,
5727 task_ledgers
.alternate_accounting
);
5728 ledger_disable_panic_on_negative(pmap
->ledger
,
5729 task_ledgers
.alternate_accounting_compressed
);
5731 #endif /* MACH_ASSERT */
5733 #endif /* MACH_ASSERT || XNU_MONITOR */
5743 pmap_set_process_ppl(pmap
, pid
, procname
);
5745 pmap_set_process_internal(pmap
, pid
, procname
);
5748 #endif /* MACH_ASSERT */
5750 #if (__ARM_VMSA__ > 7)
5752 * pmap_deallocate_all_leaf_tts:
5754 * Recursive function for deallocating all leaf TTEs. Walks the given TT,
5755 * removing and deallocating all TTEs.
5757 MARK_AS_PMAP_TEXT
static void
5758 pmap_deallocate_all_leaf_tts(pmap_t pmap
, tt_entry_t
* first_ttep
, unsigned level
)
5760 tt_entry_t tte
= ARM_TTE_EMPTY
;
5761 tt_entry_t
* ttep
= NULL
;
5762 tt_entry_t
* last_ttep
= NULL
;
5764 const pt_attr_t
* const pt_attr
= pmap_get_pt_attr(pmap
);
5766 assert(level
< pt_attr_leaf_level(pt_attr
));
5768 last_ttep
= &first_ttep
[ttn_index(pmap
, pt_attr
, ~0, level
)];
5770 for (ttep
= first_ttep
; ttep
<= last_ttep
; ttep
++) {
5773 if (!(tte
& ARM_TTE_VALID
)) {
5777 if ((tte
& ARM_TTE_TYPE_MASK
) == ARM_TTE_TYPE_BLOCK
) {
5778 panic("%s: found block mapping, ttep=%p, tte=%p, "
5779 "pmap=%p, first_ttep=%p, level=%u",
5780 __FUNCTION__
, ttep
, (void *)tte
,
5781 pmap
, first_ttep
, level
);
5784 /* Must be valid, type table */
5785 if (level
< pt_attr_twig_level(pt_attr
)) {
5786 /* If we haven't reached the twig level, recurse to the next level. */
5787 pmap_deallocate_all_leaf_tts(pmap
, (tt_entry_t
*)phystokv((tte
) & ARM_TTE_TABLE_MASK
), level
+ 1);
5790 /* Remove the TTE. */
5792 pmap_tte_deallocate(pmap
, ttep
, level
);
5796 #endif /* (__ARM_VMSA__ > 7) */
5799 * We maintain stats and ledgers so that a task's physical footprint is:
5800 * phys_footprint = ((internal - alternate_accounting)
5801 * + (internal_compressed - alternate_accounting_compressed)
5803 * + purgeable_nonvolatile
5804 * + purgeable_nonvolatile_compressed
5806 * where "alternate_accounting" includes "iokit" and "purgeable" memory.
5810 * Retire the given physical map from service.
5811 * Should only be called if the map contains
5812 * no valid mappings.
5814 MARK_AS_PMAP_TEXT
static void
5815 pmap_destroy_internal(
5818 if (pmap
== PMAP_NULL
) {
5822 VALIDATE_PMAP(pmap
);
5824 __unused
const pt_attr_t
* const pt_attr
= pmap_get_pt_attr(pmap
);
5826 int32_t ref_count
= os_atomic_dec(&pmap
->ref_count
, relaxed
);
5827 if (ref_count
> 0) {
5829 } else if (ref_count
< 0) {
5830 panic("pmap %p: refcount underflow", pmap
);
5831 } else if (pmap
== kernel_pmap
) {
5832 panic("pmap %p: attempt to destroy kernel pmap", pmap
);
5835 #if (__ARM_VMSA__ > 7)
5836 pmap_unmap_sharedpage(pmap
);
5837 #endif /* (__ARM_VMSA__ > 7) */
5839 pmap_simple_lock(&pmaps_lock
);
5840 while (pmap
->gc_status
& PMAP_GC_INFLIGHT
) {
5841 pmap
->gc_status
|= PMAP_GC_WAIT
;
5842 assert_wait((event_t
) &pmap
->gc_status
, THREAD_UNINT
);
5843 pmap_simple_unlock(&pmaps_lock
);
5844 (void) thread_block(THREAD_CONTINUE_NULL
);
5845 pmap_simple_lock(&pmaps_lock
);
5847 queue_remove(&map_pmap_list
, pmap
, pmap_t
, pmaps
);
5848 pmap_simple_unlock(&pmaps_lock
);
5850 pmap_trim_self(pmap
);
5853 * Free the memory maps, then the
5856 #if (__ARM_VMSA__ == 7)
5861 for (i
= 0; i
< pmap
->tte_index_max
; i
++) {
5862 ttep
= &pmap
->tte
[i
];
5863 if ((*ttep
& ARM_TTE_TYPE_MASK
) == ARM_TTE_TYPE_TABLE
) {
5864 pmap_tte_deallocate(pmap
, ttep
, PMAP_TT_L1_LEVEL
);
5868 #else /* (__ARM_VMSA__ == 7) */
5869 pmap_deallocate_all_leaf_tts(pmap
, pmap
->tte
, pt_attr_root_level(pt_attr
));
5870 #endif /* (__ARM_VMSA__ == 7) */
5875 #if (__ARM_VMSA__ == 7)
5876 pmap_tt1_deallocate(pmap
, pmap
->tte
, pmap
->tte_index_max
* sizeof(tt_entry_t
), 0);
5877 pmap
->tte_index_max
= 0;
5878 #else /* (__ARM_VMSA__ == 7) */
5879 pmap_tt1_deallocate(pmap
, pmap
->tte
, pmap_root_alloc_size(pmap
), 0);
5880 #endif /* (__ARM_VMSA__ == 7) */
5881 pmap
->tte
= (tt_entry_t
*) NULL
;
5885 assert((tt_free_entry_t
*)pmap
->tt_entry_free
== NULL
);
5887 if (__improbable(pmap
->nested
)) {
5888 pmap_get_pt_ops(pmap
)->flush_tlb_region_async(pmap
->nested_region_addr
, pmap
->nested_region_size
, pmap
);
5891 pmap_get_pt_ops(pmap
)->flush_tlb_async(pmap
);
5893 /* return its asid to the pool */
5894 pmap_get_pt_ops(pmap
)->free_id(pmap
);
5895 /* release the reference we hold on the nested pmap */
5896 pmap_destroy_internal(pmap
->nested_pmap
);
5899 pmap_check_ledgers(pmap
);
5901 if (pmap
->nested_region_asid_bitmap
) {
5903 pmap_pages_free(kvtophys((vm_offset_t
)(pmap
->nested_region_asid_bitmap
)), PAGE_SIZE
);
5905 kheap_free(KHEAP_DATA_BUFFERS
, pmap
->nested_region_asid_bitmap
,
5906 pmap
->nested_region_asid_bitmap_size
* sizeof(unsigned int));
5912 pmap_ledger_release(pmap
->ledger
);
5915 pmap_lock_destroy(pmap
);
5916 pmap_free_pmap(pmap
);
5918 pmap_lock_destroy(pmap
);
5919 zfree(pmap_zone
, pmap
);
5929 PMAP_TRACE(1, PMAP_CODE(PMAP__DESTROY
) | DBG_FUNC_START
, VM_KERNEL_ADDRHIDE(pmap
), PMAP_VASID(pmap
), pmap
->hw_asid
);
5931 ledger
= pmap
->ledger
;
5934 pmap_destroy_ppl(pmap
);
5936 pmap_check_ledger_fields(ledger
);
5938 pmap_destroy_internal(pmap
);
5941 ledger_dereference(ledger
);
5943 PMAP_TRACE(1, PMAP_CODE(PMAP__DESTROY
) | DBG_FUNC_END
);
5948 * Add a reference to the specified pmap.
5950 MARK_AS_PMAP_TEXT
static void
5951 pmap_reference_internal(
5954 if (pmap
!= PMAP_NULL
) {
5955 VALIDATE_PMAP(pmap
);
5956 os_atomic_inc(&pmap
->ref_count
, relaxed
);
5965 pmap_reference_ppl(pmap
);
5967 pmap_reference_internal(pmap
);
5977 tt_entry_t
*tt1
= NULL
;
5978 tt_free_entry_t
*tt1_free
;
5981 vm_address_t va_end
;
5984 if ((size
< PAGE_SIZE
) && (size
!= PMAP_ROOT_ALLOC_SIZE
)) {
5988 pmap_simple_lock(&tt1_lock
);
5989 if ((size
== PAGE_SIZE
) && (free_page_size_tt_count
!= 0)) {
5990 free_page_size_tt_count
--;
5991 tt1
= (tt_entry_t
*)free_page_size_tt_list
;
5992 free_page_size_tt_list
= ((tt_free_entry_t
*)tt1
)->next
;
5993 } else if ((size
== 2 * PAGE_SIZE
) && (free_two_page_size_tt_count
!= 0)) {
5994 free_two_page_size_tt_count
--;
5995 tt1
= (tt_entry_t
*)free_two_page_size_tt_list
;
5996 free_two_page_size_tt_list
= ((tt_free_entry_t
*)tt1
)->next
;
5997 } else if ((size
< PAGE_SIZE
) && (free_tt_count
!= 0)) {
5999 tt1
= (tt_entry_t
*)free_tt_list
;
6000 free_tt_list
= (tt_free_entry_t
*)((tt_free_entry_t
*)tt1
)->next
;
6003 pmap_simple_unlock(&tt1_lock
);
6006 pmap_tt_ledger_credit(pmap
, size
);
6007 return (tt_entry_t
*)tt1
;
6010 ret
= pmap_pages_alloc_zeroed(&pa
, (unsigned)((size
< PAGE_SIZE
)? PAGE_SIZE
: size
), ((option
& PMAP_TT_ALLOCATE_NOWAIT
)? PMAP_PAGES_ALLOCATE_NOWAIT
: 0));
6012 if (ret
== KERN_RESOURCE_SHORTAGE
) {
6013 return (tt_entry_t
*)0;
6020 if (size
< PAGE_SIZE
) {
6021 va
= phystokv(pa
) + size
;
6022 tt_free_entry_t
*local_free_list
= (tt_free_entry_t
*)va
;
6023 tt_free_entry_t
*next_free
= NULL
;
6024 for (va_end
= phystokv(pa
) + PAGE_SIZE
; va
< va_end
; va
= va
+ size
) {
6025 tt1_free
= (tt_free_entry_t
*)va
;
6026 tt1_free
->next
= next_free
;
6027 next_free
= tt1_free
;
6029 pmap_simple_lock(&tt1_lock
);
6030 local_free_list
->next
= free_tt_list
;
6031 free_tt_list
= next_free
;
6032 free_tt_count
+= ((PAGE_SIZE
/ size
) - 1);
6033 if (free_tt_count
> free_tt_max
) {
6034 free_tt_max
= free_tt_count
;
6036 pmap_simple_unlock(&tt1_lock
);
6039 /* Always report root allocations in units of PMAP_ROOT_ALLOC_SIZE, which can be obtained by sysctl arm_pt_root_size.
6040 * Depending on the device, this can vary between 512b and 16K. */
6041 OSAddAtomic((uint32_t)(size
/ PMAP_ROOT_ALLOC_SIZE
), (pmap
== kernel_pmap
? &inuse_kernel_tteroot_count
: &inuse_user_tteroot_count
));
6042 OSAddAtomic64(size
/ PMAP_ROOT_ALLOC_SIZE
, &alloc_tteroot_count
);
6043 pmap_tt_ledger_credit(pmap
, size
);
6045 return (tt_entry_t
*) phystokv(pa
);
6049 pmap_tt1_deallocate(
6055 tt_free_entry_t
*tt_entry
;
6057 if ((size
< PAGE_SIZE
) && (size
!= PMAP_ROOT_ALLOC_SIZE
)) {
6061 tt_entry
= (tt_free_entry_t
*)tt
;
6063 pmap_simple_lock(&tt1_lock
);
6065 if (size
< PAGE_SIZE
) {
6067 if (free_tt_count
> free_tt_max
) {
6068 free_tt_max
= free_tt_count
;
6070 tt_entry
->next
= free_tt_list
;
6071 free_tt_list
= tt_entry
;
6074 if (size
== PAGE_SIZE
) {
6075 free_page_size_tt_count
++;
6076 if (free_page_size_tt_count
> free_page_size_tt_max
) {
6077 free_page_size_tt_max
= free_page_size_tt_count
;
6079 tt_entry
->next
= free_page_size_tt_list
;
6080 free_page_size_tt_list
= tt_entry
;
6083 if (size
== 2 * PAGE_SIZE
) {
6084 free_two_page_size_tt_count
++;
6085 if (free_two_page_size_tt_count
> free_two_page_size_tt_max
) {
6086 free_two_page_size_tt_max
= free_two_page_size_tt_count
;
6088 tt_entry
->next
= free_two_page_size_tt_list
;
6089 free_two_page_size_tt_list
= tt_entry
;
6092 if (option
& PMAP_TT_DEALLOCATE_NOBLOCK
) {
6093 pmap_simple_unlock(&tt1_lock
);
6094 pmap_tt_ledger_debit(pmap
, size
);
6098 while (free_page_size_tt_count
> FREE_PAGE_SIZE_TT_MAX
) {
6099 free_page_size_tt_count
--;
6100 tt
= (tt_entry_t
*)free_page_size_tt_list
;
6101 free_page_size_tt_list
= ((tt_free_entry_t
*)tt
)->next
;
6103 pmap_simple_unlock(&tt1_lock
);
6105 pmap_pages_free(ml_static_vtop((vm_offset_t
)tt
), PAGE_SIZE
);
6107 OSAddAtomic(-(int32_t)(PAGE_SIZE
/ PMAP_ROOT_ALLOC_SIZE
), (pmap
== kernel_pmap
? &inuse_kernel_tteroot_count
: &inuse_user_tteroot_count
));
6109 pmap_simple_lock(&tt1_lock
);
6112 while (free_two_page_size_tt_count
> FREE_TWO_PAGE_SIZE_TT_MAX
) {
6113 free_two_page_size_tt_count
--;
6114 tt
= (tt_entry_t
*)free_two_page_size_tt_list
;
6115 free_two_page_size_tt_list
= ((tt_free_entry_t
*)tt
)->next
;
6117 pmap_simple_unlock(&tt1_lock
);
6119 pmap_pages_free(ml_static_vtop((vm_offset_t
)tt
), 2 * PAGE_SIZE
);
6121 OSAddAtomic(-2 * (int32_t)(PAGE_SIZE
/ PMAP_ROOT_ALLOC_SIZE
), (pmap
== kernel_pmap
? &inuse_kernel_tteroot_count
: &inuse_user_tteroot_count
));
6123 pmap_simple_lock(&tt1_lock
);
6125 pmap_simple_unlock(&tt1_lock
);
6126 pmap_tt_ledger_debit(pmap
, size
);
6129 static kern_return_t
6134 unsigned int options
)
6140 if ((tt_free_entry_t
*)pmap
->tt_entry_free
!= NULL
) {
6141 tt_free_entry_t
*tt_free_cur
, *tt_free_next
;
6143 tt_free_cur
= ((tt_free_entry_t
*)pmap
->tt_entry_free
);
6144 tt_free_next
= tt_free_cur
->next
;
6145 tt_free_cur
->next
= NULL
;
6146 *ttp
= (tt_entry_t
*)tt_free_cur
;
6147 pmap
->tt_entry_free
= (tt_entry_t
*)tt_free_next
;
6155 * Allocate a VM page for the level x page table entries.
6157 while (pmap_pages_alloc_zeroed(&pa
, PAGE_SIZE
, ((options
& PMAP_TT_ALLOCATE_NOWAIT
)? PMAP_PAGES_ALLOCATE_NOWAIT
: 0)) != KERN_SUCCESS
) {
6158 if (options
& PMAP_OPTIONS_NOWAIT
) {
6159 return KERN_RESOURCE_SHORTAGE
;
6164 while ((ptdp
= ptd_alloc(pmap
)) == NULL
) {
6165 if (options
& PMAP_OPTIONS_NOWAIT
) {
6166 pmap_pages_free(pa
, PAGE_SIZE
);
6167 return KERN_RESOURCE_SHORTAGE
;
6172 if (level
< pt_attr_leaf_level(pmap_get_pt_attr(pmap
))) {
6173 OSAddAtomic64(1, &alloc_ttepages_count
);
6174 OSAddAtomic(1, (pmap
== kernel_pmap
? &inuse_kernel_ttepages_count
: &inuse_user_ttepages_count
));
6176 OSAddAtomic64(1, &alloc_ptepages_count
);
6177 OSAddAtomic(1, (pmap
== kernel_pmap
? &inuse_kernel_ptepages_count
: &inuse_user_ptepages_count
));
6180 pmap_tt_ledger_credit(pmap
, PAGE_SIZE
);
6182 PMAP_ZINFO_PALLOC(pmap
, PAGE_SIZE
);
6184 pvh_update_head_unlocked(pai_to_pvh(pa_index(pa
)), ptdp
, PVH_TYPE_PTDP
);
6186 uint64_t pmap_page_size
= pt_attr_page_size(pmap_get_pt_attr(pmap
));
6187 if (PAGE_SIZE
> pmap_page_size
) {
6189 vm_address_t va_end
;
6193 for (va_end
= phystokv(pa
) + PAGE_SIZE
, va
= phystokv(pa
) + pmap_page_size
; va
< va_end
; va
= va
+ pmap_page_size
) {
6194 ((tt_free_entry_t
*)va
)->next
= (tt_free_entry_t
*)pmap
->tt_entry_free
;
6195 pmap
->tt_entry_free
= (tt_entry_t
*)va
;
6200 *ttp
= (tt_entry_t
*)phystokv(pa
);
6207 return KERN_SUCCESS
;
6218 ptd_info_t
*ptd_info
;
6219 unsigned pt_acc_cnt
;
6221 vm_offset_t free_page
= 0;
6222 const pt_attr_t
* const pt_attr
= pmap_get_pt_attr(pmap
);
6223 unsigned max_pt_index
= PAGE_SIZE
/ pt_attr_page_size(pt_attr
);
6227 ptdp
= ptep_get_ptd((vm_offset_t
)ttp
);
6228 ptd_info
= ptd_get_info(ptdp
, ttp
);
6230 ptd_info
->va
= (vm_offset_t
)-1;
6232 if ((level
< pt_attr_leaf_level(pt_attr
)) && (ptd_info
->refcnt
== PT_DESC_REFCOUNT
)) {
6233 ptd_info
->refcnt
= 0;
6236 if (ptd_info
->refcnt
!= 0) {
6237 panic("pmap_tt_deallocate(): ptdp %p, count %d\n", ptdp
, ptd_info
->refcnt
);
6240 ptd_info
->refcnt
= 0;
6242 for (i
= 0, pt_acc_cnt
= 0; i
< max_pt_index
; i
++) {
6243 pt_acc_cnt
+= ptdp
->ptd_info
[i
].refcnt
;
6246 if (pt_acc_cnt
== 0) {
6247 tt_free_entry_t
*tt_free_list
= (tt_free_entry_t
*)&pmap
->tt_entry_free
;
6248 unsigned pt_free_entry_cnt
= 1;
6250 while (pt_free_entry_cnt
< max_pt_index
&& tt_free_list
) {
6251 tt_free_entry_t
*tt_free_list_next
;
6253 tt_free_list_next
= tt_free_list
->next
;
6254 if ((((vm_offset_t
)tt_free_list_next
) - ((vm_offset_t
)ttp
& ~PAGE_MASK
)) < PAGE_SIZE
) {
6255 pt_free_entry_cnt
++;
6257 tt_free_list
= tt_free_list_next
;
6259 if (pt_free_entry_cnt
== max_pt_index
) {
6260 tt_free_entry_t
*tt_free_list_cur
;
6262 free_page
= (vm_offset_t
)ttp
& ~PAGE_MASK
;
6263 tt_free_list
= (tt_free_entry_t
*)&pmap
->tt_entry_free
;
6264 tt_free_list_cur
= (tt_free_entry_t
*)&pmap
->tt_entry_free
;
6266 while (tt_free_list_cur
) {
6267 tt_free_entry_t
*tt_free_list_next
;
6269 tt_free_list_next
= tt_free_list_cur
->next
;
6270 if ((((vm_offset_t
)tt_free_list_next
) - free_page
) < PAGE_SIZE
) {
6271 tt_free_list
->next
= tt_free_list_next
->next
;
6273 tt_free_list
= tt_free_list_next
;
6275 tt_free_list_cur
= tt_free_list_next
;
6278 ((tt_free_entry_t
*)ttp
)->next
= (tt_free_entry_t
*)pmap
->tt_entry_free
;
6279 pmap
->tt_entry_free
= ttp
;
6282 ((tt_free_entry_t
*)ttp
)->next
= (tt_free_entry_t
*)pmap
->tt_entry_free
;
6283 pmap
->tt_entry_free
= ttp
;
6288 if (free_page
!= 0) {
6289 ptd_deallocate(ptep_get_ptd((vm_offset_t
)free_page
));
6290 *(pt_desc_t
**)pai_to_pvh(pa_index(ml_static_vtop(free_page
))) = NULL
;
6291 pmap_pages_free(ml_static_vtop(free_page
), PAGE_SIZE
);
6292 if (level
< pt_attr_leaf_level(pt_attr
)) {
6293 OSAddAtomic(-1, (pmap
== kernel_pmap
? &inuse_kernel_ttepages_count
: &inuse_user_ttepages_count
));
6295 OSAddAtomic(-1, (pmap
== kernel_pmap
? &inuse_kernel_ptepages_count
: &inuse_user_ptepages_count
));
6297 PMAP_ZINFO_PFREE(pmap
, PAGE_SIZE
);
6298 pmap_tt_ledger_debit(pmap
, PAGE_SIZE
);
6303 * Safely clear out a translation table entry.
6305 * @note If the TTE to clear out points to a leaf table, then that leaf table
6306 * must have a refcnt of zero before the TTE can be removed.
6308 * @param pmap The pmap containing the page table whose TTE is being removed.
6309 * @param ttep Pointer to the TTE that should be cleared out.
6310 * @param level The level of the page table that contains the TTE to be removed.
6318 tt_entry_t tte
= *ttep
;
6320 if (__improbable(tte
== 0)) {
6321 panic("%s: null tt_entry ttep==%p", __func__
, ttep
);
6324 if (__improbable((level
== pt_attr_twig_level(pmap_get_pt_attr(pmap
))) &&
6325 (ptep_get_info((pt_entry_t
*)ttetokv(tte
))->refcnt
!= 0))) {
6326 panic("%s: non-zero pagetable refcount: pmap=%p ttep=%p ptd=%p refcnt=0x%x", __func__
,
6327 pmap
, ttep
, tte_get_ptd(tte
), ptep_get_info((pt_entry_t
*)ttetokv(tte
))->refcnt
);
6330 #if (__ARM_VMSA__ == 7)
6332 tt_entry_t
*ttep_4M
= (tt_entry_t
*) ((vm_offset_t
)ttep
& 0xFFFFFFF0);
6335 for (i
= 0; i
< 4; i
++, ttep_4M
++) {
6336 *ttep_4M
= (tt_entry_t
) 0;
6338 FLUSH_PTE_RANGE_STRONG(ttep_4M
- 4, ttep_4M
);
6341 *ttep
= (tt_entry_t
) 0;
6342 FLUSH_PTE_STRONG(ttep
);
6343 #endif /* (__ARM_VMSA__ == 7) */
6347 * Given a pointer to an entry within a `level` page table, delete the
6348 * page table at `level` + 1 that is represented by that entry. For instance,
6349 * to delete an unused L3 table, `ttep` would be a pointer to the L2 entry that
6350 * contains the PA of the L3 table, and `level` would be "2".
6352 * @note If the table getting deallocated is a leaf table, then that leaf table
6353 * must have a refcnt of zero before getting deallocated. All other levels
6354 * must have a refcnt of PT_DESC_REFCOUNT in their page table descriptor.
6356 * @param pmap The pmap that owns the page table to be deallocated.
6357 * @param ttep Pointer to the `level` TTE to remove.
6358 * @param level The level of the table that contains an entry pointing to the
6359 * table to be removed. The deallocated page table will be a
6360 * `level` + 1 table (so if `level` is 2, then an L3 table will be
6364 pmap_tte_deallocate(
6372 pmap_assert_locked_w(pmap
);
6377 if (tte_get_ptd(tte
)->pmap
!= pmap
) {
6378 panic("%s: Passed in pmap doesn't own the page table to be deleted ptd=%p ptd->pmap=%p pmap=%p",
6379 __func__
, tte_get_ptd(tte
), tte_get_ptd(tte
)->pmap
, pmap
);
6381 #endif /* MACH_ASSERT */
6383 pmap_tte_remove(pmap
, ttep
, level
);
6385 if ((tte
& ARM_TTE_TYPE_MASK
) == ARM_TTE_TYPE_TABLE
) {
6386 uint64_t pmap_page_size
= pt_attr_page_size(pmap_get_pt_attr(pmap
));
6388 pt_entry_t
*pte_p
= ((pt_entry_t
*) (ttetokv(tte
) & ~(pmap_page_size
- 1)));
6390 for (unsigned i
= 0; i
< (pmap_page_size
/ sizeof(*pte_p
)); i
++, pte_p
++) {
6391 if (__improbable(ARM_PTE_IS_COMPRESSED(*pte_p
, pte_p
))) {
6392 panic_plain("%s: Found compressed mapping in soon to be deleted "
6393 "L%d table tte=0x%llx pmap=%p, pte_p=%p, pte=0x%llx",
6394 __func__
, level
+ 1, (uint64_t)tte
, pmap
, pte_p
, (uint64_t)(*pte_p
));
6395 } else if (__improbable(((*pte_p
) & ARM_PTE_TYPE_MASK
) != ARM_PTE_TYPE_FAULT
)) {
6396 panic_plain("%s: Found valid mapping in soon to be deleted L%d "
6397 "table tte=0x%llx pmap=%p, pte_p=%p, pte=0x%llx",
6398 __func__
, level
+ 1, (uint64_t)tte
, pmap
, pte_p
, (uint64_t)(*pte_p
));
6401 #endif /* MACH_ASSERT */
6404 /* Clear any page offset: we mean to free the whole page, but armv7 TTEs may only be
6405 * aligned on 1K boundaries. We clear the surrounding "chunk" of 4 TTEs above. */
6406 pa
= tte_to_pa(tte
) & ~(pmap_page_size
- 1);
6407 pmap_tt_deallocate(pmap
, (tt_entry_t
*) phystokv(pa
), level
+ 1);
6413 * Remove a range of hardware page-table entries.
6414 * The entries given are the first (inclusive)
6415 * and last (exclusive) entries for the VM pages.
6416 * The virtual address is the va for the first pte.
6418 * The pmap must be locked.
6419 * If the pmap is not the kernel pmap, the range must lie
6420 * entirely within one pte-page. This is NOT checked.
6421 * Assumes that the pte-page exists.
6423 * Returns the number of PTE changed, and sets *rmv_cnt
6424 * to the number of SPTE changed.
6429 vm_map_address_t va
,
6434 bool need_strong_sync
= false;
6435 int num_changed
= pmap_remove_range_options(pmap
, va
, bpte
, epte
, rmv_cnt
,
6436 &need_strong_sync
, PMAP_OPTIONS_REMOVE
);
6437 if (num_changed
> 0) {
6438 PMAP_UPDATE_TLBS(pmap
, va
,
6439 va
+ (pt_attr_page_size(pmap_get_pt_attr(pmap
)) * (epte
- bpte
)), need_strong_sync
);
6445 #ifdef PVH_FLAG_EXEC
6448 * Update the access protection bits of the physical aperture mapping for a page.
6449 * This is useful, for example, in guranteeing that a verified executable page
6450 * has no writable mappings anywhere in the system, including the physical
6451 * aperture. flush_tlb_async can be set to true to avoid unnecessary TLB
6452 * synchronization overhead in cases where the call to this function is
6453 * guaranteed to be followed by other TLB operations.
6456 pmap_set_ptov_ap(unsigned int pai __unused
, unsigned int ap __unused
, boolean_t flush_tlb_async __unused
)
6458 #if __ARM_PTE_PHYSMAP__
6459 ASSERT_PVH_LOCKED(pai
);
6460 vm_offset_t kva
= phystokv(vm_first_phys
+ (pmap_paddr_t
)ptoa(pai
));
6461 pt_entry_t
*pte_p
= pmap_pte(kernel_pmap
, kva
);
6463 pt_entry_t tmplate
= *pte_p
;
6464 if ((tmplate
& ARM_PTE_APMASK
) == ARM_PTE_AP(ap
)) {
6467 tmplate
= (tmplate
& ~ARM_PTE_APMASK
) | ARM_PTE_AP(ap
);
6468 #if (__ARM_VMSA__ > 7)
6469 if (tmplate
& ARM_PTE_HINT_MASK
) {
6470 panic("%s: physical aperture PTE %p has hint bit set, va=%p, pte=0x%llx",
6471 __func__
, pte_p
, (void *)kva
, tmplate
);
6474 WRITE_PTE_STRONG(pte_p
, tmplate
);
6475 flush_mmu_tlb_region_asid_async(kva
, PAGE_SIZE
, kernel_pmap
);
6476 if (!flush_tlb_async
) {
6482 #endif /* defined(PVH_FLAG_EXEC) */
6490 int *num_alt_internal
,
6494 pv_entry_t
**pv_h
, **pve_pp
;
6497 ASSERT_NOT_HIBERNATING();
6498 ASSERT_PVH_LOCKED(pai
);
6499 pv_h
= pai_to_pvh(pai
);
6500 vm_offset_t pvh_flags
= pvh_get_flags(pv_h
);
6503 if (__improbable(pvh_flags
& PVH_FLAG_LOCKDOWN
)) {
6504 panic("%d is locked down (%#lx), cannot remove", pai
, pvh_flags
);
6508 if (pvh_test_type(pv_h
, PVH_TYPE_PTEP
)) {
6509 if (__improbable((cpte
!= pvh_ptep(pv_h
)))) {
6510 panic("%s: cpte=%p does not match pv_h=%p (%p), pai=0x%x\n", __func__
, cpte
, pv_h
, pvh_ptep(pv_h
), pai
);
6512 if (IS_ALTACCT_PAGE(pai
, PV_ENTRY_NULL
)) {
6513 assert(IS_INTERNAL_PAGE(pai
));
6515 (*num_alt_internal
)++;
6516 CLR_ALTACCT_PAGE(pai
, PV_ENTRY_NULL
);
6517 } else if (IS_INTERNAL_PAGE(pai
)) {
6518 if (IS_REUSABLE_PAGE(pai
)) {
6526 pvh_update_head(pv_h
, PV_ENTRY_NULL
, PVH_TYPE_NULL
);
6527 } else if (pvh_test_type(pv_h
, PVH_TYPE_PVEP
)) {
6529 pve_p
= pvh_list(pv_h
);
6531 while (pve_p
!= PV_ENTRY_NULL
&&
6532 (pve_get_ptep(pve_p
) != cpte
)) {
6533 pve_pp
= pve_link_field(pve_p
);
6534 pve_p
= PVE_NEXT_PTR(pve_next(pve_p
));
6537 if (__improbable((pve_p
== PV_ENTRY_NULL
))) {
6538 panic("%s: cpte=%p (pai=0x%x) not in pv_h=%p\n", __func__
, cpte
, pai
, pv_h
);
6542 if ((pmap
!= NULL
) && (kern_feature_override(KF_PMAPV_OVRD
) == FALSE
)) {
6543 pv_entry_t
*check_pve_p
= PVE_NEXT_PTR(pve_next(pve_p
));
6544 while (check_pve_p
!= PV_ENTRY_NULL
) {
6545 if (pve_get_ptep(check_pve_p
) == cpte
) {
6546 panic("%s: duplicate pve entry cpte=%p pmap=%p, pv_h=%p, pve_p=%p, pai=0x%x",
6547 __func__
, cpte
, pmap
, pv_h
, pve_p
, pai
);
6549 check_pve_p
= PVE_NEXT_PTR(pve_next(check_pve_p
));
6554 if (IS_ALTACCT_PAGE(pai
, pve_p
)) {
6555 assert(IS_INTERNAL_PAGE(pai
));
6557 (*num_alt_internal
)++;
6558 CLR_ALTACCT_PAGE(pai
, pve_p
);
6559 } else if (IS_INTERNAL_PAGE(pai
)) {
6560 if (IS_REUSABLE_PAGE(pai
)) {
6569 pvh_remove(pv_h
, pve_pp
, pve_p
);
6570 pv_free_entry(pve_p
);
6571 if (!pvh_test_type(pv_h
, PVH_TYPE_NULL
)) {
6572 pvh_set_flags(pv_h
, pvh_flags
);
6575 panic("%s: unexpected PV head %p, cpte=%p pmap=%p pv_h=%p pai=0x%x",
6576 __func__
, *pv_h
, cpte
, pmap
, pv_h
, pai
);
6579 #ifdef PVH_FLAG_EXEC
6580 if ((pvh_flags
& PVH_FLAG_EXEC
) && pvh_test_type(pv_h
, PVH_TYPE_NULL
)) {
6581 pmap_set_ptov_ap(pai
, AP_RWNA
, FALSE
);
6587 pmap_remove_range_options(
6589 vm_map_address_t va
,
6593 bool *need_strong_sync __unused
,
6597 int num_removed
, num_unwired
;
6598 int num_pte_changed
;
6601 int num_external
, num_internal
, num_reusable
;
6602 int num_alt_internal
;
6603 uint64_t num_compressed
, num_alt_compressed
;
6605 pmap_assert_locked_w(pmap
);
6607 const pt_attr_t
* const pt_attr
= pmap_get_pt_attr(pmap
);
6608 uint64_t pmap_page_size
= pt_attr_page_size(pt_attr
);
6610 if (__improbable((uintptr_t)epte
> (((uintptr_t)bpte
+ pmap_page_size
) & ~(pmap_page_size
- 1)))) {
6611 panic("%s: PTE range [%p, %p) in pmap %p crosses page table boundary", __func__
, bpte
, epte
, pmap
);
6616 num_pte_changed
= 0;
6621 num_alt_internal
= 0;
6622 num_alt_compressed
= 0;
6624 for (cpte
= bpte
; cpte
< epte
;
6625 cpte
+= 1, va
+= pmap_page_size
) {
6627 boolean_t managed
= FALSE
;
6632 if (pgtrace_enabled
) {
6633 pmap_pgtrace_remove_clone(pmap
, pte_to_pa(spte
), va
);
6638 if (pmap
!= kernel_pmap
&&
6639 (options
& PMAP_OPTIONS_REMOVE
) &&
6640 (ARM_PTE_IS_COMPRESSED(spte
, cpte
))) {
6642 * "pmap" must be locked at this point,
6643 * so this should not race with another
6644 * pmap_remove_range() or pmap_enter().
6647 /* one less "compressed"... */
6649 if (spte
& ARM_PTE_COMPRESSED_ALT
) {
6650 /* ... but it used to be "ALTACCT" */
6651 num_alt_compressed
++;
6655 WRITE_PTE_FAST(cpte
, ARM_PTE_TYPE_FAULT
);
6657 * "refcnt" also accounts for
6658 * our "compressed" markers,
6659 * so let's update it here.
6661 if (OSAddAtomic16(-1, (SInt16
*) &(ptep_get_info(cpte
)->refcnt
)) <= 0) {
6662 panic("pmap_remove_range_options: over-release of ptdp %p for pte %p", ptep_get_ptd(cpte
), cpte
);
6667 * It may be possible for the pte to transition from managed
6668 * to unmanaged in this timeframe; for now, elide the assert.
6669 * We should break out as a consequence of checking pa_valid.
6671 //assert(!ARM_PTE_IS_COMPRESSED(spte));
6672 pa
= pte_to_pa(spte
);
6673 if (!pa_valid(pa
)) {
6675 unsigned int cacheattr
= pmap_cache_attributes((ppnum_t
)atop(pa
));
6678 if (__improbable((cacheattr
& PP_ATTR_MONITOR
) &&
6679 (pte_to_xprr_perm(spte
) != XPRR_KERN_RO_PERM
) && !pmap_ppl_disable
)) {
6680 panic("%s: attempt to remove mapping of writable PPL-protected I/O address 0x%llx",
6681 __func__
, (uint64_t)pa
);
6686 pai
= (int)pa_index(pa
);
6689 pa
= pte_to_pa(spte
);
6690 if (pai
== (int)pa_index(pa
)) {
6692 break; // Leave pai locked as we will unlock it after we free the PV entry
6697 if (ARM_PTE_IS_COMPRESSED(*cpte
, cpte
)) {
6699 * There used to be a valid mapping here but it
6700 * has already been removed when the page was
6701 * sent to the VM compressor, so nothing left to
6707 /* remove the translation, do not flush the TLB */
6708 if (*cpte
!= ARM_PTE_TYPE_FAULT
) {
6709 assertf(!ARM_PTE_IS_COMPRESSED(*cpte
, cpte
), "unexpected compressed pte %p (=0x%llx)", cpte
, (uint64_t)*cpte
);
6710 assertf((*cpte
& ARM_PTE_TYPE_VALID
) == ARM_PTE_TYPE
, "invalid pte %p (=0x%llx)", cpte
, (uint64_t)*cpte
);
6712 if (managed
&& (pmap
!= kernel_pmap
) && (ptep_get_va(cpte
) != va
)) {
6713 panic("pmap_remove_range_options(): VA mismatch: cpte=%p ptd=%p pte=0x%llx va=0x%llx, cpte va=0x%llx",
6714 cpte
, ptep_get_ptd(cpte
), (uint64_t)*cpte
, (uint64_t)va
, (uint64_t)ptep_get_va(cpte
));
6717 WRITE_PTE_FAST(cpte
, ARM_PTE_TYPE_FAULT
);
6721 if ((spte
!= ARM_PTE_TYPE_FAULT
) &&
6722 (pmap
!= kernel_pmap
)) {
6723 assertf(!ARM_PTE_IS_COMPRESSED(spte
, cpte
), "unexpected compressed pte %p (=0x%llx)", cpte
, (uint64_t)spte
);
6724 assertf((spte
& ARM_PTE_TYPE_VALID
) == ARM_PTE_TYPE
, "invalid pte %p (=0x%llx)", cpte
, (uint64_t)spte
);
6725 if (OSAddAtomic16(-1, (SInt16
*) &(ptep_get_info(cpte
)->refcnt
)) <= 0) {
6726 panic("pmap_remove_range_options: over-release of ptdp %p for pte %p", ptep_get_ptd(cpte
), cpte
);
6733 if (pte_is_wired(spte
)) {
6734 pte_set_wired(pmap
, cpte
, 0);
6738 * if not managed, we're done
6744 * find and remove the mapping from the chain for this
6748 pmap_remove_pv(pmap
, cpte
, pai
, &num_internal
, &num_alt_internal
, &num_reusable
, &num_external
);
6757 OSAddAtomic(-num_removed
, (SInt32
*) &pmap
->stats
.resident_count
);
6758 pmap_ledger_debit(pmap
, task_ledgers
.phys_mem
, num_removed
* pmap_page_size
* PAGE_RATIO
);
6760 if (pmap
!= kernel_pmap
) {
6761 /* update pmap stats... */
6762 OSAddAtomic(-num_unwired
, (SInt32
*) &pmap
->stats
.wired_count
);
6764 __assert_only
int32_t orig_external
= OSAddAtomic(-num_external
, &pmap
->stats
.external
);
6765 PMAP_STATS_ASSERTF(orig_external
>= num_external
,
6767 "pmap=%p bpte=%p epte=%p num_external=%d stats.external=%d",
6768 pmap
, bpte
, epte
, num_external
, orig_external
);
6771 __assert_only
int32_t orig_internal
= OSAddAtomic(-num_internal
, &pmap
->stats
.internal
);
6772 PMAP_STATS_ASSERTF(orig_internal
>= num_internal
,
6774 "pmap=%p bpte=%p epte=%p num_internal=%d stats.internal=%d num_reusable=%d stats.reusable=%d",
6776 num_internal
, orig_internal
,
6777 num_reusable
, pmap
->stats
.reusable
);
6780 __assert_only
int32_t orig_reusable
= OSAddAtomic(-num_reusable
, &pmap
->stats
.reusable
);
6781 PMAP_STATS_ASSERTF(orig_reusable
>= num_reusable
,
6783 "pmap=%p bpte=%p epte=%p num_internal=%d stats.internal=%d num_reusable=%d stats.reusable=%d",
6785 num_internal
, pmap
->stats
.internal
,
6786 num_reusable
, orig_reusable
);
6788 if (num_compressed
) {
6789 __assert_only
uint64_t orig_compressed
= OSAddAtomic64(-num_compressed
, &pmap
->stats
.compressed
);
6790 PMAP_STATS_ASSERTF(orig_compressed
>= num_compressed
,
6792 "pmap=%p bpte=%p epte=%p num_compressed=%lld num_alt_compressed=%lld stats.compressed=%lld",
6793 pmap
, bpte
, epte
, num_compressed
, num_alt_compressed
,
6796 /* ... and ledgers */
6797 pmap_ledger_debit(pmap
, task_ledgers
.wired_mem
, (num_unwired
) * pmap_page_size
* PAGE_RATIO
);
6798 pmap_ledger_debit(pmap
, task_ledgers
.internal
, (num_internal
) * pt_attr_page_size(pt_attr
) * PAGE_RATIO
);
6799 pmap_ledger_debit(pmap
, task_ledgers
.alternate_accounting
, (num_alt_internal
) * pt_attr_page_size(pt_attr
) * PAGE_RATIO
);
6800 pmap_ledger_debit(pmap
, task_ledgers
.alternate_accounting_compressed
, (num_alt_compressed
) * pt_attr_page_size(pt_attr
) * PAGE_RATIO
);
6801 pmap_ledger_debit(pmap
, task_ledgers
.internal_compressed
, (num_compressed
) * pt_attr_page_size(pt_attr
) * PAGE_RATIO
);
6802 /* make needed adjustments to phys_footprint */
6803 pmap_ledger_debit(pmap
, task_ledgers
.phys_footprint
,
6807 num_alt_compressed
)) * pmap_page_size
* PAGE_RATIO
);
6810 /* flush the ptable entries we have written */
6811 if (num_pte_changed
> 0) {
6812 FLUSH_PTE_RANGE_STRONG(bpte
, epte
);
6815 return num_pte_changed
;
6820 * Remove the given range of addresses
6821 * from the specified map.
6823 * It is assumed that the start and end are properly
6824 * rounded to the hardware page size.
6829 vm_map_address_t start
,
6830 vm_map_address_t end
)
6832 pmap_remove_options(pmap
, start
, end
, PMAP_OPTIONS_REMOVE
);
6835 MARK_AS_PMAP_TEXT
static int
6836 pmap_remove_options_internal(
6838 vm_map_address_t start
,
6839 vm_map_address_t end
,
6842 int remove_count
= 0;
6843 pt_entry_t
*bpte
, *epte
;
6846 uint32_t rmv_spte
= 0;
6847 bool need_strong_sync
= false;
6848 bool flush_tte
= false;
6850 if (__improbable(end
< start
)) {
6851 panic("%s: invalid address range %p, %p", __func__
, (void*)start
, (void*)end
);
6854 VALIDATE_PMAP(pmap
);
6856 __unused
const pt_attr_t
* const pt_attr
= pmap_get_pt_attr(pmap
);
6860 tte_p
= pmap_tte(pmap
, start
);
6862 if (tte_p
== (tt_entry_t
*) NULL
) {
6866 if ((*tte_p
& ARM_TTE_TYPE_MASK
) == ARM_TTE_TYPE_TABLE
) {
6867 pte_p
= (pt_entry_t
*) ttetokv(*tte_p
);
6868 bpte
= &pte_p
[pte_index(pmap
, pt_attr
, start
)];
6869 epte
= bpte
+ ((end
- start
) >> pt_attr_leaf_shift(pt_attr
));
6871 remove_count
+= pmap_remove_range_options(pmap
, start
, bpte
, epte
,
6872 &rmv_spte
, &need_strong_sync
, options
);
6874 if (rmv_spte
&& (ptep_get_info(pte_p
)->refcnt
== 0) &&
6875 (pmap
!= kernel_pmap
) && (pmap
->nested
== FALSE
)) {
6876 pmap_tte_deallocate(pmap
, tte_p
, pt_attr_twig_level(pt_attr
));
6884 if (remove_count
> 0) {
6885 PMAP_UPDATE_TLBS(pmap
, start
, end
, need_strong_sync
);
6886 } else if (flush_tte
) {
6887 pmap_get_pt_ops(pmap
)->flush_tlb_tte_async(start
, pmap
);
6890 return remove_count
;
6894 pmap_remove_options(
6896 vm_map_address_t start
,
6897 vm_map_address_t end
,
6900 int remove_count
= 0;
6901 vm_map_address_t va
;
6903 if (pmap
== PMAP_NULL
) {
6907 __unused
const pt_attr_t
* const pt_attr
= pmap_get_pt_attr(pmap
);
6909 PMAP_TRACE(2, PMAP_CODE(PMAP__REMOVE
) | DBG_FUNC_START
,
6910 VM_KERNEL_ADDRHIDE(pmap
), VM_KERNEL_ADDRHIDE(start
),
6911 VM_KERNEL_ADDRHIDE(end
));
6914 if ((start
| end
) & pt_attr_leaf_offmask(pt_attr
)) {
6915 panic("pmap_remove_options() pmap %p start 0x%llx end 0x%llx\n",
6916 pmap
, (uint64_t)start
, (uint64_t)end
);
6918 if ((end
< start
) || (start
< pmap
->min
) || (end
> pmap
->max
)) {
6919 panic("pmap_remove_options(): invalid address range, pmap=%p, start=0x%llx, end=0x%llx\n",
6920 pmap
, (uint64_t)start
, (uint64_t)end
);
6925 * Invalidate the translation buffer first
6931 l
= ((va
+ pt_attr_twig_size(pt_attr
)) & ~pt_attr_twig_offmask(pt_attr
));
6937 remove_count
+= pmap_remove_options_ppl(pmap
, va
, l
, options
);
6939 pmap_ledger_check_balance(pmap
);
6941 remove_count
+= pmap_remove_options_internal(pmap
, va
, l
, options
);
6947 PMAP_TRACE(2, PMAP_CODE(PMAP__REMOVE
) | DBG_FUNC_END
);
6952 * Remove phys addr if mapped in specified map
6955 pmap_remove_some_phys(
6956 __unused pmap_t map
,
6957 __unused ppnum_t pn
)
6959 /* Implement to support working set code */
6965 #if !__ARM_USER_PROTECT__
6971 #if __ARM_USER_PROTECT__
6972 thread
->machine
.uptw_ttb
= ((unsigned int) pmap
->ttep
) | TTBR_SETUP
;
6973 thread
->machine
.asid
= pmap
->hw_asid
;
6978 pmap_flush_core_tlb_asid_async(pmap_t pmap
)
6980 #if (__ARM_VMSA__ == 7)
6981 flush_core_tlb_asid_async(pmap
->hw_asid
);
6983 flush_core_tlb_asid_async(((uint64_t) pmap
->hw_asid
) << TLBI_ASID_SHIFT
);
6988 pmap_user_ttb_is_clear(void)
6990 #if (__ARM_VMSA__ > 7)
6991 return get_mmu_ttb() == (invalid_ttep
& TTBR_BADDR_MASK
);
6993 return get_mmu_ttb() == kernel_pmap
->ttep
;
6997 MARK_AS_PMAP_TEXT
static void
6998 pmap_switch_internal(
7001 VALIDATE_PMAP(pmap
);
7002 pmap_cpu_data_t
*cpu_data_ptr
= pmap_get_cpu_data();
7003 uint16_t asid_index
= pmap
->hw_asid
;
7004 bool do_asid_flush
= false;
7006 if (__improbable((asid_index
== 0) && (pmap
!= kernel_pmap
))) {
7007 panic("%s: attempt to activate pmap with invalid ASID %p", __func__
, pmap
);
7009 #if __ARM_KERNEL_PROTECT__
7013 #if (__ARM_VMSA__ > 7)
7014 pmap_t last_nested_pmap
= cpu_data_ptr
->cpu_nested_pmap
;
7015 __unused
const pt_attr_t
*last_nested_pmap_attr
= cpu_data_ptr
->cpu_nested_pmap_attr
;
7016 __unused vm_map_address_t last_nested_region_addr
= cpu_data_ptr
->cpu_nested_region_addr
;
7017 __unused vm_map_offset_t last_nested_region_size
= cpu_data_ptr
->cpu_nested_region_size
;
7018 bool do_shared_region_flush
= ((pmap
!= kernel_pmap
) && (last_nested_pmap
!= NULL
) && (pmap
->nested_pmap
!= last_nested_pmap
));
7019 bool break_before_make
= do_shared_region_flush
;
7021 bool do_shared_region_flush
= false;
7022 bool break_before_make
= false;
7025 if ((pmap_max_asids
> MAX_HW_ASIDS
) && (asid_index
> 0)) {
7027 pmap_update_plru(asid_index
);
7030 assert(asid_index
< (sizeof(cpu_data_ptr
->cpu_sw_asids
) / sizeof(*cpu_data_ptr
->cpu_sw_asids
)));
7032 /* Extract the "virtual" bits of the ASIDs (which could cause us to alias). */
7033 uint8_t new_sw_asid
= pmap
->sw_asid
;
7034 uint8_t last_sw_asid
= cpu_data_ptr
->cpu_sw_asids
[asid_index
];
7036 if (new_sw_asid
!= last_sw_asid
) {
7038 * If the virtual ASID of the new pmap does not match the virtual ASID
7039 * last seen on this CPU for the physical ASID (that was a mouthful),
7040 * then this switch runs the risk of aliasing. We need to flush the
7041 * TLB for this phyiscal ASID in this case.
7043 cpu_data_ptr
->cpu_sw_asids
[asid_index
] = new_sw_asid
;
7044 do_asid_flush
= true;
7045 break_before_make
= true;
7049 #if __ARM_MIXED_PAGE_SIZE__
7050 if (pmap_get_pt_attr(pmap
)->pta_tcr_value
!= get_tcr()) {
7051 break_before_make
= true;
7054 if (__improbable(break_before_make
&& !pmap_user_ttb_is_clear())) {
7055 PMAP_TRACE(1, PMAP_CODE(PMAP__CLEAR_USER_TTB
), VM_KERNEL_ADDRHIDE(pmap
), PMAP_VASID(pmap
), pmap
->hw_asid
);
7056 pmap_clear_user_ttb_internal();
7059 #if (__ARM_VMSA__ > 7)
7060 /* If we're switching to a different nested pmap (i.e. shared region), we'll need
7061 * to flush the userspace mappings for that region. Those mappings are global
7062 * and will not be protected by the ASID. It should also be cheaper to flush the
7063 * entire local TLB rather than to do a broadcast MMU flush by VA region. */
7064 if (__improbable(do_shared_region_flush
)) {
7065 #if __ARM_RANGE_TLBI__
7066 uint64_t page_shift_prev
= pt_attr_leaf_shift(last_nested_pmap_attr
);
7067 vm_map_offset_t npages_prev
= last_nested_region_size
>> page_shift_prev
;
7069 /* NOTE: here we flush the global TLB entries for the previous nested region only.
7070 * There may still be non-global entries that overlap with the incoming pmap's
7071 * nested region. On Apple SoCs at least, this is acceptable. Those non-global entries
7072 * must necessarily belong to a different ASID than the incoming pmap, or they would
7073 * be flushed in the do_asid_flush case below. This will prevent them from conflicting
7074 * with the incoming pmap's nested region. However, the ARMv8 ARM is not crystal clear
7075 * on whether such a global/inactive-nonglobal overlap is acceptable, so we may need
7076 * to consider additional invalidation here in the future. */
7077 if (npages_prev
<= ARM64_TLB_RANGE_PAGES
) {
7078 flush_core_tlb_allrange_async(generate_rtlbi_param((ppnum_t
)npages_prev
, 0, last_nested_region_addr
, page_shift_prev
));
7080 do_asid_flush
= false;
7081 flush_core_tlb_async();
7084 do_asid_flush
= false;
7085 flush_core_tlb_async();
7086 #endif // __ARM_RANGE_TLBI__
7088 #endif // (__ARM_VMSA__ > 7)
7089 if (__improbable(do_asid_flush
)) {
7090 pmap_flush_core_tlb_asid_async(pmap
);
7091 #if DEVELOPMENT || DEBUG
7092 os_atomic_inc(&pmap_asid_flushes
, relaxed
);
7095 if (__improbable(do_asid_flush
|| do_shared_region_flush
)) {
7099 pmap_switch_user_ttb_internal(pmap
);
7106 PMAP_TRACE(1, PMAP_CODE(PMAP__SWITCH
) | DBG_FUNC_START
, VM_KERNEL_ADDRHIDE(pmap
), PMAP_VASID(pmap
), pmap
->hw_asid
);
7108 pmap_switch_ppl(pmap
);
7110 pmap_switch_internal(pmap
);
7112 PMAP_TRACE(1, PMAP_CODE(PMAP__SWITCH
) | DBG_FUNC_END
);
7116 pmap_require(pmap_t pmap
)
7119 VALIDATE_PMAP(pmap
);
7121 if (pmap
!= kernel_pmap
) {
7122 zone_id_require(ZONE_ID_PMAP
, sizeof(struct pmap
), pmap
);
7132 pmap_page_protect_options(ppnum
, prot
, 0, NULL
);
7136 * Routine: pmap_page_protect_options
7139 * Lower the permission for all mappings to a given
7142 MARK_AS_PMAP_TEXT
static void
7143 pmap_page_protect_options_with_flush_range(
7146 unsigned int options
,
7147 pmap_tlb_flush_range_t
*flush_range
)
7149 pmap_paddr_t phys
= ptoa(ppnum
);
7151 pv_entry_t
**pve_pp
;
7156 pv_entry_t
*new_pve_p
;
7157 pt_entry_t
*new_pte_p
;
7158 vm_offset_t pvh_flags
;
7162 boolean_t tlb_flush_needed
= FALSE
;
7163 unsigned int pvh_cnt
= 0;
7165 assert(ppnum
!= vm_page_fictitious_addr
);
7167 /* Only work with managed pages. */
7168 if (!pa_valid(phys
)) {
7173 * Determine the new protection.
7177 return; /* nothing to do */
7179 case VM_PROT_READ
| VM_PROT_EXECUTE
:
7187 pai
= (int)pa_index(phys
);
7189 pv_h
= pai_to_pvh(pai
);
7190 pvh_flags
= pvh_get_flags(pv_h
);
7193 if (__improbable(remove
&& (pvh_flags
& PVH_FLAG_LOCKDOWN
))) {
7194 panic("%d is locked down (%#llx), cannot remove", pai
, pvh_get_flags(pv_h
));
7198 pte_p
= PT_ENTRY_NULL
;
7199 pve_p
= PV_ENTRY_NULL
;
7201 pveh_p
= PV_ENTRY_NULL
;
7202 pvet_p
= PV_ENTRY_NULL
;
7203 new_pve_p
= PV_ENTRY_NULL
;
7204 new_pte_p
= PT_ENTRY_NULL
;
7205 if (pvh_test_type(pv_h
, PVH_TYPE_PTEP
)) {
7206 pte_p
= pvh_ptep(pv_h
);
7207 } else if (pvh_test_type(pv_h
, PVH_TYPE_PVEP
)) {
7208 pve_p
= pvh_list(pv_h
);
7212 while ((pve_p
!= PV_ENTRY_NULL
) || (pte_p
!= PT_ENTRY_NULL
)) {
7213 vm_map_address_t va
= 0;
7215 pt_entry_t tmplate
= ARM_PTE_TYPE_FAULT
;
7216 boolean_t update
= FALSE
;
7218 if (pve_p
!= PV_ENTRY_NULL
) {
7219 pte_p
= pve_get_ptep(pve_p
);
7222 #ifdef PVH_FLAG_IOMMU
7223 if ((vm_offset_t
)pte_p
& PVH_FLAG_IOMMU
) {
7225 if (__improbable(pvh_flags
& PVH_FLAG_LOCKDOWN
)) {
7226 panic("pmap_page_protect: ppnum 0x%x locked down, cannot be owned by iommu 0x%llx, pve_p=%p",
7227 ppnum
, (uint64_t)pte_p
& ~PVH_FLAG_IOMMU
, pve_p
);
7231 if (options
& PMAP_OPTIONS_COMPRESSOR
) {
7232 panic("pmap_page_protect: attempt to compress ppnum 0x%x owned by iommu 0x%llx, pve_p=%p",
7233 ppnum
, (uint64_t)pte_p
& ~PVH_FLAG_IOMMU
, pve_p
);
7235 if (pve_p
!= PV_ENTRY_NULL
) {
7236 pv_entry_t
*temp_pve_p
= PVE_NEXT_PTR(pve_next(pve_p
));
7237 pvh_remove(pv_h
, pve_pp
, pve_p
);
7238 pveh_p
= pvh_list(pv_h
);
7239 pve_next(pve_p
) = new_pve_p
;
7248 goto protect_skip_pve
;
7251 pmap
= ptep_get_pmap(pte_p
);
7252 va
= ptep_get_va(pte_p
);
7254 if (pte_p
== PT_ENTRY_NULL
) {
7255 panic("pmap_page_protect: pmap=%p prot=%d options=%u, pv_h=%p, pveh_p=%p, pve_p=%p, va=0x%llx ppnum: 0x%x\n",
7256 pmap
, prot
, options
, pv_h
, pveh_p
, pve_p
, (uint64_t)va
, ppnum
);
7257 } else if ((pmap
== NULL
) || (atop(pte_to_pa(*pte_p
)) != ppnum
)) {
7259 if (kern_feature_override(KF_PMAPV_OVRD
) == FALSE
) {
7260 pv_entry_t
*check_pve_p
= pveh_p
;
7261 while (check_pve_p
!= PV_ENTRY_NULL
) {
7262 if ((check_pve_p
!= pve_p
) && (pve_get_ptep(check_pve_p
) == pte_p
)) {
7263 panic("pmap_page_protect: duplicate pve entry pte_p=%p pmap=%p prot=%d options=%u, pv_h=%p, pveh_p=%p, pve_p=%p, pte=0x%llx, va=0x%llx ppnum: 0x%x\n",
7264 pte_p
, pmap
, prot
, options
, pv_h
, pveh_p
, pve_p
, (uint64_t)*pte_p
, (uint64_t)va
, ppnum
);
7266 check_pve_p
= PVE_NEXT_PTR(pve_next(check_pve_p
));
7270 panic("pmap_page_protect: bad pve entry pte_p=%p pmap=%p prot=%d options=%u, pv_h=%p, pveh_p=%p, pve_p=%p, pte=0x%llx, va=0x%llx ppnum: 0x%x\n",
7271 pte_p
, pmap
, prot
, options
, pv_h
, pveh_p
, pve_p
, (uint64_t)*pte_p
, (uint64_t)va
, ppnum
);
7274 #if DEVELOPMENT || DEBUG
7275 if ((prot
& VM_PROT_EXECUTE
) || !nx_enabled
|| !pmap
->nx_enabled
)
7277 if ((prot
& VM_PROT_EXECUTE
))
7285 /* Remove the mapping if new protection is NONE */
7287 boolean_t is_altacct
= FALSE
;
7288 const pt_attr_t
* const pt_attr
= pmap_get_pt_attr(pmap
);
7289 pt_entry_t spte
= *pte_p
;
7291 if (IS_ALTACCT_PAGE(pai
, pve_p
)) {
7297 if (pte_is_wired(spte
)) {
7298 pte_set_wired(pmap
, pte_p
, 0);
7300 if (pmap
!= kernel_pmap
) {
7301 pmap_ledger_debit(pmap
, task_ledgers
.wired_mem
, pt_attr_page_size(pt_attr
) * PAGE_RATIO
);
7302 OSAddAtomic(-1, (SInt32
*) &pmap
->stats
.wired_count
);
7306 if (spte
!= ARM_PTE_TYPE_FAULT
&&
7307 pmap
!= kernel_pmap
&&
7308 (options
& PMAP_OPTIONS_COMPRESSOR
) &&
7309 IS_INTERNAL_PAGE(pai
)) {
7310 assert(!ARM_PTE_IS_COMPRESSED(*pte_p
, pte_p
));
7311 /* mark this PTE as having been "compressed" */
7312 tmplate
= ARM_PTE_COMPRESSED
;
7314 tmplate
|= ARM_PTE_COMPRESSED_ALT
;
7318 tmplate
= ARM_PTE_TYPE_FAULT
;
7322 * The entry must be written before the refcnt is decremented to
7323 * prevent use-after-free races with code paths that deallocate page
7324 * tables based on a zero refcnt.
7326 if (spte
!= tmplate
) {
7327 WRITE_PTE_STRONG(pte_p
, tmplate
);
7331 if ((spte
!= ARM_PTE_TYPE_FAULT
) &&
7332 (tmplate
== ARM_PTE_TYPE_FAULT
) &&
7333 (pmap
!= kernel_pmap
)) {
7334 if (OSAddAtomic16(-1, (SInt16
*) &(ptep_get_info(pte_p
)->refcnt
)) <= 0) {
7335 panic("pmap_page_protect_options(): over-release of ptdp %p for pte %p\n", ptep_get_ptd(pte_p
), pte_p
);
7340 pmap_ledger_debit(pmap
, task_ledgers
.phys_mem
, pt_attr_page_size(pt_attr
) * PAGE_RATIO
);
7341 OSAddAtomic(-1, (SInt32
*) &pmap
->stats
.resident_count
);
7345 * We only ever compress internal pages.
7347 if (options
& PMAP_OPTIONS_COMPRESSOR
) {
7348 assert(IS_INTERNAL_PAGE(pai
));
7352 if (pmap
!= kernel_pmap
) {
7353 if (IS_REUSABLE_PAGE(pai
) &&
7354 IS_INTERNAL_PAGE(pai
) &&
7356 __assert_only
int32_t orig_reusable
= OSAddAtomic(-1, &pmap
->stats
.reusable
);
7357 PMAP_STATS_ASSERTF(orig_reusable
> 0, pmap
, "stats.reusable %d", orig_reusable
);
7358 } else if (IS_INTERNAL_PAGE(pai
)) {
7359 __assert_only
int32_t orig_internal
= OSAddAtomic(-1, &pmap
->stats
.internal
);
7360 PMAP_STATS_ASSERTF(orig_internal
> 0, pmap
, "stats.internal %d", orig_internal
);
7362 __assert_only
int32_t orig_external
= OSAddAtomic(-1, &pmap
->stats
.external
);
7363 PMAP_STATS_ASSERTF(orig_external
> 0, pmap
, "stats.external %d", orig_external
);
7365 if ((options
& PMAP_OPTIONS_COMPRESSOR
) &&
7366 IS_INTERNAL_PAGE(pai
)) {
7367 /* adjust "compressed" stats */
7368 OSAddAtomic64(+1, &pmap
->stats
.compressed
);
7369 PMAP_STATS_PEAK(pmap
->stats
.compressed
);
7370 pmap
->stats
.compressed_lifetime
++;
7373 if (IS_ALTACCT_PAGE(pai
, pve_p
)) {
7374 assert(IS_INTERNAL_PAGE(pai
));
7375 pmap_ledger_debit(pmap
, task_ledgers
.internal
, pt_attr_page_size(pt_attr
) * PAGE_RATIO
);
7376 pmap_ledger_debit(pmap
, task_ledgers
.alternate_accounting
, pt_attr_page_size(pt_attr
) * PAGE_RATIO
);
7377 if (options
& PMAP_OPTIONS_COMPRESSOR
) {
7378 pmap_ledger_credit(pmap
, task_ledgers
.internal_compressed
, pt_attr_page_size(pt_attr
) * PAGE_RATIO
);
7379 pmap_ledger_credit(pmap
, task_ledgers
.alternate_accounting_compressed
, pt_attr_page_size(pt_attr
) * PAGE_RATIO
);
7383 * Cleanup our marker before
7384 * we free this pv_entry.
7386 CLR_ALTACCT_PAGE(pai
, pve_p
);
7387 } else if (IS_REUSABLE_PAGE(pai
)) {
7388 assert(IS_INTERNAL_PAGE(pai
));
7389 if (options
& PMAP_OPTIONS_COMPRESSOR
) {
7390 pmap_ledger_credit(pmap
, task_ledgers
.internal_compressed
, pt_attr_page_size(pt_attr
) * PAGE_RATIO
);
7391 /* was not in footprint, but is now */
7392 pmap_ledger_credit(pmap
, task_ledgers
.phys_footprint
, pt_attr_page_size(pt_attr
) * PAGE_RATIO
);
7394 } else if (IS_INTERNAL_PAGE(pai
)) {
7395 pmap_ledger_debit(pmap
, task_ledgers
.internal
, pt_attr_page_size(pt_attr
) * PAGE_RATIO
);
7398 * Update all stats related to physical footprint, which only
7399 * deals with internal pages.
7401 if (options
& PMAP_OPTIONS_COMPRESSOR
) {
7403 * This removal is only being done so we can send this page to
7404 * the compressor; therefore it mustn't affect total task footprint.
7406 pmap_ledger_credit(pmap
, task_ledgers
.internal_compressed
, pt_attr_page_size(pt_attr
) * PAGE_RATIO
);
7409 * This internal page isn't going to the compressor, so adjust stats to keep
7410 * phys_footprint up to date.
7412 pmap_ledger_debit(pmap
, task_ledgers
.phys_footprint
, pt_attr_page_size(pt_attr
) * PAGE_RATIO
);
7415 /* external page: no impact on ledgers */
7419 if (pve_p
!= PV_ENTRY_NULL
) {
7420 assert(pve_next(pve_p
) == PVE_NEXT_PTR(pve_next(pve_p
)));
7424 const pt_attr_t
*const pt_attr
= pmap_get_pt_attr(pmap
);
7428 if (pmap
== kernel_pmap
) {
7429 tmplate
= ((spte
& ~ARM_PTE_APMASK
) | ARM_PTE_AP(AP_RONA
));
7431 tmplate
= ((spte
& ~ARM_PTE_APMASK
) | pt_attr_leaf_ro(pt_attr
));
7434 pte_set_was_writeable(tmplate
, false);
7436 * While the naive implementation of this would serve to add execute
7437 * permission, this is not how the VM uses this interface, or how
7438 * x86_64 implements it. So ignore requests to add execute permissions.
7441 tmplate
|= pt_attr_leaf_xn(pt_attr
);
7444 #if __APRR_SUPPORTED__
7446 * Enforce the policy that PPL xPRR mappings can't have their permissions changed after the fact.
7448 * Certain userspace applications (e.g., CrashReporter and debuggers) have a need to remap JIT mappings to
7449 * RO/RX, so we explicitly allow that. This doesn't compromise the security of the PPL since this only
7450 * affects userspace mappings, so allow reducing permissions on JIT mappings to RO/RX. This is similar for
7451 * user execute-only mappings.
7453 if (__improbable(is_pte_xprr_protected(pmap
, spte
) && (pte_to_xprr_perm(spte
) != XPRR_USER_JIT_PERM
)
7454 && (pte_to_xprr_perm(spte
) != XPRR_USER_XO_PERM
))) {
7455 panic("%s: modifying an xPRR mapping pte_p=%p pmap=%p prot=%d options=%u, pv_h=%p, pveh_p=%p, pve_p=%p, pte=0x%llx, tmplate=0x%llx, va=0x%llx ppnum: 0x%x",
7456 __func__
, pte_p
, pmap
, prot
, options
, pv_h
, pveh_p
, pve_p
, (uint64_t)spte
, (uint64_t)tmplate
, (uint64_t)va
, ppnum
);
7460 * Enforce the policy that we can't create a new PPL protected mapping here except for user execute-only
7461 * mappings (which doesn't compromise the security of the PPL since it's userspace-specific).
7463 if (__improbable(is_pte_xprr_protected(pmap
, tmplate
) && (pte_to_xprr_perm(tmplate
) != XPRR_USER_XO_PERM
))) {
7464 panic("%s: creating an xPRR mapping pte_p=%p pmap=%p prot=%d options=%u, pv_h=%p, pveh_p=%p, pve_p=%p, pte=0x%llx, tmplate=0x%llx, va=0x%llx ppnum: 0x%x",
7465 __func__
, pte_p
, pmap
, prot
, options
, pv_h
, pveh_p
, pve_p
, (uint64_t)spte
, (uint64_t)tmplate
, (uint64_t)va
, ppnum
);
7467 #endif /* __APRR_SUPPORTED__*/
7469 if (*pte_p
!= ARM_PTE_TYPE_FAULT
&&
7470 !ARM_PTE_IS_COMPRESSED(*pte_p
, pte_p
) &&
7471 *pte_p
!= tmplate
) {
7472 WRITE_PTE_STRONG(pte_p
, tmplate
);
7477 /* Invalidate TLBs for all CPUs using it */
7479 if (remove
|| !flush_range
||
7480 ((flush_range
->ptfr_pmap
!= pmap
) || va
>= flush_range
->ptfr_end
|| va
< flush_range
->ptfr_start
)) {
7481 pmap_get_pt_ops(pmap
)->flush_tlb_region_async(va
,
7482 pt_attr_page_size(pmap_get_pt_attr(pmap
)) * PAGE_RATIO
, pmap
);
7484 tlb_flush_needed
= TRUE
;
7487 #ifdef PVH_FLAG_IOMMU
7490 pte_p
= PT_ENTRY_NULL
;
7492 if (pve_p
!= PV_ENTRY_NULL
) {
7494 assert(pve_next(pve_p
) == PVE_NEXT_PTR(pve_next(pve_p
)));
7496 pve_pp
= pve_link_field(pve_p
);
7497 pve_p
= PVE_NEXT_PTR(pve_next(pve_p
));
7501 #ifdef PVH_FLAG_EXEC
7502 if (remove
&& (pvh_get_flags(pv_h
) & PVH_FLAG_EXEC
)) {
7503 pmap_set_ptov_ap(pai
, AP_RWNA
, tlb_flush_needed
);
7506 /* if we removed a bunch of entries, take care of them now */
7508 if (new_pve_p
!= PV_ENTRY_NULL
) {
7509 pvh_update_head(pv_h
, new_pve_p
, PVH_TYPE_PVEP
);
7510 pvh_set_flags(pv_h
, pvh_flags
);
7511 } else if (new_pte_p
!= PT_ENTRY_NULL
) {
7512 pvh_update_head(pv_h
, new_pte_p
, PVH_TYPE_PTEP
);
7513 pvh_set_flags(pv_h
, pvh_flags
);
7515 pvh_update_head(pv_h
, PV_ENTRY_NULL
, PVH_TYPE_NULL
);
7521 if (flush_range
&& tlb_flush_needed
) {
7523 flush_range
->ptfr_flush_needed
= true;
7524 tlb_flush_needed
= FALSE
;
7527 if (tlb_flush_needed
) {
7531 if (remove
&& (pvet_p
!= PV_ENTRY_NULL
)) {
7532 pv_list_free(pveh_p
, pvet_p
, pvh_cnt
, pv_kern_low_water_mark
);
7536 MARK_AS_PMAP_TEXT
static void
7537 pmap_page_protect_options_internal(
7540 unsigned int options
)
7542 pmap_page_protect_options_with_flush_range(ppnum
, prot
, options
, NULL
);
7546 pmap_page_protect_options(
7549 unsigned int options
,
7552 pmap_paddr_t phys
= ptoa(ppnum
);
7554 assert(ppnum
!= vm_page_fictitious_addr
);
7556 /* Only work with managed pages. */
7557 if (!pa_valid(phys
)) {
7562 * Determine the new protection.
7564 if (prot
== VM_PROT_ALL
) {
7565 return; /* nothing to do */
7568 PMAP_TRACE(2, PMAP_CODE(PMAP__PAGE_PROTECT
) | DBG_FUNC_START
, ppnum
, prot
);
7571 pmap_page_protect_options_ppl(ppnum
, prot
, options
);
7573 pmap_page_protect_options_internal(ppnum
, prot
, options
);
7576 PMAP_TRACE(2, PMAP_CODE(PMAP__PAGE_PROTECT
) | DBG_FUNC_END
);
7580 #if __has_feature(ptrauth_calls) && defined(XNU_TARGET_OS_OSX)
7581 MARK_AS_PMAP_TEXT
void
7582 pmap_disable_user_jop_internal(pmap_t pmap
)
7584 if (pmap
== kernel_pmap
) {
7585 panic("%s: called with kernel_pmap\n", __func__
);
7587 pmap
->disable_jop
= true;
7591 pmap_disable_user_jop(pmap_t pmap
)
7594 pmap_disable_user_jop_ppl(pmap
);
7596 pmap_disable_user_jop_internal(pmap
);
7599 #endif /* __has_feature(ptrauth_calls) && defined(XNU_TARGET_OS_OSX) */
7602 * Indicates if the pmap layer enforces some additional restrictions on the
7603 * given set of protections.
7606 pmap_has_prot_policy(__unused pmap_t pmap
, __unused
bool translated_allow_execute
, __unused vm_prot_t prot
)
7612 * Set the physical protection on the
7613 * specified range of this map as requested.
7614 * VERY IMPORTANT: Will not increase permissions.
7615 * VERY IMPORTANT: Only pmap_enter() is allowed to grant permissions.
7624 pmap_protect_options(pmap
, b
, e
, prot
, 0, NULL
);
7627 MARK_AS_PMAP_TEXT
static void
7628 pmap_protect_options_internal(
7630 vm_map_address_t start
,
7631 vm_map_address_t end
,
7633 unsigned int options
,
7634 __unused
void *args
)
7636 const pt_attr_t
*const pt_attr
= pmap_get_pt_attr(pmap
);
7638 pt_entry_t
*bpte_p
, *epte_p
;
7640 boolean_t set_NX
= TRUE
;
7641 #if (__ARM_VMSA__ > 7)
7642 boolean_t set_XO
= FALSE
;
7644 boolean_t should_have_removed
= FALSE
;
7645 bool need_strong_sync
= false;
7647 if (__improbable((end
< start
) || (end
> ((start
+ pt_attr_twig_size(pt_attr
)) & ~pt_attr_twig_offmask(pt_attr
))))) {
7648 panic("%s: invalid address range %p, %p", __func__
, (void*)start
, (void*)end
);
7651 #if DEVELOPMENT || DEBUG
7652 if (options
& PMAP_OPTIONS_PROTECT_IMMEDIATE
) {
7653 if ((prot
& VM_PROT_ALL
) == VM_PROT_NONE
) {
7654 should_have_removed
= TRUE
;
7659 /* Determine the new protection. */
7661 #if (__ARM_VMSA__ > 7)
7662 case VM_PROT_EXECUTE
:
7667 case VM_PROT_READ
| VM_PROT_EXECUTE
:
7669 case VM_PROT_READ
| VM_PROT_WRITE
:
7671 return; /* nothing to do */
7673 should_have_removed
= TRUE
;
7677 if (should_have_removed
) {
7678 panic("%s: should have been a remove operation, "
7679 "pmap=%p, start=%p, end=%p, prot=%#x, options=%#x, args=%p",
7681 pmap
, (void *)start
, (void *)end
, prot
, options
, args
);
7684 #if DEVELOPMENT || DEBUG
7685 if ((prot
& VM_PROT_EXECUTE
) || !nx_enabled
|| !pmap
->nx_enabled
)
7687 if ((prot
& VM_PROT_EXECUTE
))
7695 VALIDATE_PMAP(pmap
);
7698 tte_p
= pmap_tte(pmap
, start
);
7700 if ((tte_p
!= (tt_entry_t
*) NULL
) && (*tte_p
& ARM_TTE_TYPE_MASK
) == ARM_TTE_TYPE_TABLE
) {
7701 bpte_p
= (pt_entry_t
*) ttetokv(*tte_p
);
7702 bpte_p
= &bpte_p
[pte_index(pmap
, pt_attr
, start
)];
7703 epte_p
= bpte_p
+ ((end
- start
) >> pt_attr_leaf_shift(pt_attr
));
7706 for (pte_p
= bpte_p
;
7708 pte_p
+= PAGE_RATIO
) {
7710 #if DEVELOPMENT || DEBUG
7711 boolean_t force_write
= FALSE
;
7716 if ((spte
== ARM_PTE_TYPE_FAULT
) ||
7717 ARM_PTE_IS_COMPRESSED(spte
, pte_p
)) {
7723 boolean_t managed
= FALSE
;
7727 * It may be possible for the pte to transition from managed
7728 * to unmanaged in this timeframe; for now, elide the assert.
7729 * We should break out as a consequence of checking pa_valid.
7731 // assert(!ARM_PTE_IS_COMPRESSED(spte));
7732 pa
= pte_to_pa(spte
);
7733 if (!pa_valid(pa
)) {
7736 pai
= (int)pa_index(pa
);
7739 pa
= pte_to_pa(spte
);
7740 if (pai
== (int)pa_index(pa
)) {
7742 break; // Leave the PVH locked as we will unlock it after we free the PTE
7747 if ((spte
== ARM_PTE_TYPE_FAULT
) ||
7748 ARM_PTE_IS_COMPRESSED(spte
, pte_p
)) {
7754 if (pmap
== kernel_pmap
) {
7755 #if DEVELOPMENT || DEBUG
7756 if ((options
& PMAP_OPTIONS_PROTECT_IMMEDIATE
) && (prot
& VM_PROT_WRITE
)) {
7758 tmplate
= ((spte
& ~ARM_PTE_APMASK
) | ARM_PTE_AP(AP_RWNA
));
7762 tmplate
= ((spte
& ~ARM_PTE_APMASK
) | ARM_PTE_AP(AP_RONA
));
7765 #if DEVELOPMENT || DEBUG
7766 if ((options
& PMAP_OPTIONS_PROTECT_IMMEDIATE
) && (prot
& VM_PROT_WRITE
)) {
7768 tmplate
= ((spte
& ~ARM_PTE_APMASK
) | pt_attr_leaf_rw(pt_attr
));
7772 tmplate
= ((spte
& ~ARM_PTE_APMASK
) | pt_attr_leaf_ro(pt_attr
));
7777 * XXX Removing "NX" would
7778 * grant "execute" access
7779 * immediately, bypassing any
7780 * checks VM might want to do
7781 * in its soft fault path.
7782 * pmap_protect() and co. are
7783 * not allowed to increase
7784 * access permissions.
7787 tmplate
|= pt_attr_leaf_xn(pt_attr
);
7789 #if (__ARM_VMSA__ > 7)
7790 if (pmap
== kernel_pmap
) {
7791 /* do NOT clear "PNX"! */
7792 tmplate
|= ARM_PTE_NX
;
7794 /* do NOT clear "NX"! */
7795 tmplate
|= pt_attr_leaf_x(pt_attr
);
7797 tmplate
&= ~ARM_PTE_APMASK
;
7798 tmplate
|= pt_attr_leaf_rona(pt_attr
);
7804 #if DEVELOPMENT || DEBUG
7807 * TODO: Run CS/Monitor checks here.
7811 * We are marking the page as writable,
7812 * so we consider it to be modified and
7815 pa_set_bits(pa
, PP_ATTR_REFERENCED
| PP_ATTR_MODIFIED
);
7816 tmplate
|= ARM_PTE_AF
;
7818 if (IS_REFFAULT_PAGE(pai
)) {
7819 CLR_REFFAULT_PAGE(pai
);
7822 if (IS_MODFAULT_PAGE(pai
)) {
7823 CLR_MODFAULT_PAGE(pai
);
7826 } else if (options
& PMAP_OPTIONS_PROTECT_IMMEDIATE
) {
7828 * An immediate request for anything other than
7829 * write should still mark the page as
7830 * referenced if managed.
7833 pa_set_bits(pa
, PP_ATTR_REFERENCED
);
7834 tmplate
|= ARM_PTE_AF
;
7836 if (IS_REFFAULT_PAGE(pai
)) {
7837 CLR_REFFAULT_PAGE(pai
);
7843 /* We do not expect to write fast fault the entry. */
7844 pte_set_was_writeable(tmplate
, false);
7846 #if __APRR_SUPPORTED__
7848 * Enforce the policy that PPL xPRR mappings can't have their permissions changed after the fact.
7850 * Certain userspace applications (e.g., CrashReporter and debuggers) have a need to remap JIT mappings to
7851 * RO/RX, so we explicitly allow that. This doesn't compromise the security of the PPL since this only
7852 * affects userspace mappings, so allow reducing permissions on JIT mappings to RO/RX/XO. This is similar
7853 * for user execute-only mappings.
7855 if (__improbable(is_pte_xprr_protected(pmap
, spte
) && (pte_to_xprr_perm(spte
) != XPRR_USER_JIT_PERM
)
7856 && (pte_to_xprr_perm(spte
) != XPRR_USER_XO_PERM
))) {
7857 panic("%s: modifying a PPL mapping pte_p=%p pmap=%p prot=%d options=%u, pte=0x%llx, tmplate=0x%llx",
7858 __func__
, pte_p
, pmap
, prot
, options
, (uint64_t)spte
, (uint64_t)tmplate
);
7862 * Enforce the policy that we can't create a new PPL protected mapping here except for user execute-only
7863 * mappings (which doesn't compromise the security of the PPL since it's userspace-specific).
7865 if (__improbable(is_pte_xprr_protected(pmap
, tmplate
) && (pte_to_xprr_perm(tmplate
) != XPRR_USER_XO_PERM
))) {
7866 panic("%s: creating an xPRR mapping pte_p=%p pmap=%p prot=%d options=%u, pte=0x%llx, tmplate=0x%llx",
7867 __func__
, pte_p
, pmap
, prot
, options
, (uint64_t)spte
, (uint64_t)tmplate
);
7869 #endif /* __APRR_SUPPORTED__*/
7870 WRITE_PTE_FAST(pte_p
, tmplate
);
7873 ASSERT_PVH_LOCKED(pai
);
7877 FLUSH_PTE_RANGE_STRONG(bpte_p
, epte_p
);
7878 PMAP_UPDATE_TLBS(pmap
, start
, end
, need_strong_sync
);
7885 pmap_protect_options(
7890 unsigned int options
,
7891 __unused
void *args
)
7893 vm_map_address_t l
, beg
;
7895 __unused
const pt_attr_t
* const pt_attr
= pmap_get_pt_attr(pmap
);
7897 if ((b
| e
) & pt_attr_leaf_offmask(pt_attr
)) {
7898 panic("pmap_protect_options() pmap %p start 0x%llx end 0x%llx\n",
7899 pmap
, (uint64_t)b
, (uint64_t)e
);
7902 #if DEVELOPMENT || DEBUG
7903 if (options
& PMAP_OPTIONS_PROTECT_IMMEDIATE
) {
7904 if ((prot
& VM_PROT_ALL
) == VM_PROT_NONE
) {
7905 pmap_remove_options(pmap
, b
, e
, options
);
7911 /* Determine the new protection. */
7913 case VM_PROT_EXECUTE
:
7915 case VM_PROT_READ
| VM_PROT_EXECUTE
:
7917 case VM_PROT_READ
| VM_PROT_WRITE
:
7919 return; /* nothing to do */
7921 pmap_remove_options(pmap
, b
, e
, options
);
7926 PMAP_TRACE(2, PMAP_CODE(PMAP__PROTECT
) | DBG_FUNC_START
,
7927 VM_KERNEL_ADDRHIDE(pmap
), VM_KERNEL_ADDRHIDE(b
),
7928 VM_KERNEL_ADDRHIDE(e
));
7933 l
= ((beg
+ pt_attr_twig_size(pt_attr
)) & ~pt_attr_twig_offmask(pt_attr
));
7940 pmap_protect_options_ppl(pmap
, beg
, l
, prot
, options
, args
);
7942 pmap_protect_options_internal(pmap
, beg
, l
, prot
, options
, args
);
7948 PMAP_TRACE(2, PMAP_CODE(PMAP__PROTECT
) | DBG_FUNC_END
);
7951 /* Map a (possibly) autogenned block */
7960 __unused
unsigned int flags
)
7963 addr64_t original_va
= va
;
7966 for (page
= 0; page
< size
; page
++) {
7967 kr
= pmap_enter(pmap
, va
, pa
, prot
, VM_PROT_NONE
, attr
, TRUE
);
7969 if (kr
!= KERN_SUCCESS
) {
7971 * This will panic for now, as it is unclear that
7972 * removing the mappings is correct.
7974 panic("%s: failed pmap_enter, "
7975 "pmap=%p, va=%#llx, pa=%u, size=%u, prot=%#x, flags=%#x",
7977 pmap
, va
, pa
, size
, prot
, flags
);
7979 pmap_remove(pmap
, original_va
, va
- original_va
);
7987 return KERN_SUCCESS
;
7996 vm_prot_t fault_type
,
8000 return pmap_enter_options_addr(pmap
, v
, pa
, prot
, fault_type
, flags
, wired
, 0, NULL
);
8004 * Insert the given physical page (p) at
8005 * the specified virtual address (v) in the
8006 * target physical map with the protection requested.
8008 * If specified, the page will be wired down, meaning
8009 * that the related pte can not be reclaimed.
8011 * NB: This is the only routine which MAY NOT lazy-evaluate
8012 * or lose information. That is, this routine must actually
8013 * insert this page into the given map eventually (must make
8014 * forward progress eventually.
8022 vm_prot_t fault_type
,
8026 return pmap_enter_addr(pmap
, v
, ((pmap_paddr_t
)pn
) << PAGE_SHIFT
, prot
, fault_type
, flags
, wired
);
8030 pmap_enter_pte(pmap_t pmap
, pt_entry_t
*pte_p
, pt_entry_t pte
, vm_map_address_t v
)
8032 const pt_attr_t
* const pt_attr
= pmap_get_pt_attr(pmap
);
8034 if (pmap
!= kernel_pmap
&& ((pte
& ARM_PTE_WIRED
) != (*pte_p
& ARM_PTE_WIRED
))) {
8035 SInt16
*ptd_wiredcnt_ptr
= (SInt16
*)&(ptep_get_info(pte_p
)->wiredcnt
);
8036 if (pte
& ARM_PTE_WIRED
) {
8037 OSAddAtomic16(1, ptd_wiredcnt_ptr
);
8038 pmap_ledger_credit(pmap
, task_ledgers
.wired_mem
, pt_attr_page_size(pt_attr
) * PAGE_RATIO
);
8039 OSAddAtomic(1, (SInt32
*) &pmap
->stats
.wired_count
);
8041 OSAddAtomic16(-1, ptd_wiredcnt_ptr
);
8042 pmap_ledger_debit(pmap
, task_ledgers
.wired_mem
, pt_attr_page_size(pt_attr
) * PAGE_RATIO
);
8043 OSAddAtomic(-1, (SInt32
*) &pmap
->stats
.wired_count
);
8046 if (*pte_p
!= ARM_PTE_TYPE_FAULT
&&
8047 !ARM_PTE_IS_COMPRESSED(*pte_p
, pte_p
)) {
8048 WRITE_PTE_STRONG(pte_p
, pte
);
8049 PMAP_UPDATE_TLBS(pmap
, v
, v
+ (pt_attr_page_size(pt_attr
) * PAGE_RATIO
), false);
8051 WRITE_PTE(pte_p
, pte
);
8052 __builtin_arm_isb(ISB_SY
);
8055 PMAP_TRACE(4 + pt_attr_leaf_level(pt_attr
), PMAP_CODE(PMAP__TTE
), VM_KERNEL_ADDRHIDE(pmap
),
8056 VM_KERNEL_ADDRHIDE(v
), VM_KERNEL_ADDRHIDE(v
+ (pt_attr_page_size(pt_attr
) * PAGE_RATIO
)), pte
);
8059 MARK_AS_PMAP_TEXT
static pt_entry_t
8060 wimg_to_pte(unsigned int wimg
)
8064 switch (wimg
& (VM_WIMG_MASK
)) {
8067 pte
= ARM_PTE_ATTRINDX(CACHE_ATTRINDX_DISABLE
);
8068 pte
|= ARM_PTE_NX
| ARM_PTE_PNX
;
8070 case VM_WIMG_POSTED
:
8071 pte
= ARM_PTE_ATTRINDX(CACHE_ATTRINDX_POSTED
);
8072 pte
|= ARM_PTE_NX
| ARM_PTE_PNX
;
8074 case VM_WIMG_POSTED_REORDERED
:
8075 pte
= ARM_PTE_ATTRINDX(CACHE_ATTRINDX_POSTED_REORDERED
);
8076 pte
|= ARM_PTE_NX
| ARM_PTE_PNX
;
8078 case VM_WIMG_POSTED_COMBINED_REORDERED
:
8079 pte
= ARM_PTE_ATTRINDX(CACHE_ATTRINDX_POSTED_COMBINED_REORDERED
);
8080 pte
|= ARM_PTE_NX
| ARM_PTE_PNX
;
8083 pte
= ARM_PTE_ATTRINDX(CACHE_ATTRINDX_WRITECOMB
);
8084 pte
|= ARM_PTE_NX
| ARM_PTE_PNX
;
8087 pte
= ARM_PTE_ATTRINDX(CACHE_ATTRINDX_WRITETHRU
);
8088 #if (__ARM_VMSA__ > 7)
8089 pte
|= ARM_PTE_SH(SH_OUTER_MEMORY
);
8094 case VM_WIMG_COPYBACK
:
8095 pte
= ARM_PTE_ATTRINDX(CACHE_ATTRINDX_WRITEBACK
);
8096 #if (__ARM_VMSA__ > 7)
8097 pte
|= ARM_PTE_SH(SH_OUTER_MEMORY
);
8102 case VM_WIMG_INNERWBACK
:
8103 pte
= ARM_PTE_ATTRINDX(CACHE_ATTRINDX_INNERWRITEBACK
);
8104 #if (__ARM_VMSA__ > 7)
8105 pte
|= ARM_PTE_SH(SH_INNER_MEMORY
);
8111 pte
= ARM_PTE_ATTRINDX(CACHE_ATTRINDX_DEFAULT
);
8112 #if (__ARM_VMSA__ > 7)
8113 pte
|= ARM_PTE_SH(SH_OUTER_MEMORY
);
8122 static pv_alloc_return_t
8127 unsigned int options
,
8129 boolean_t
*is_altacct
)
8132 pv_h
= pai_to_pvh(pai
);
8133 boolean_t first_cpu_mapping
;
8135 ASSERT_NOT_HIBERNATING();
8136 ASSERT_PVH_LOCKED(pai
);
8138 vm_offset_t pvh_flags
= pvh_get_flags(pv_h
);
8141 if (__improbable(pvh_flags
& PVH_FLAG_LOCKDOWN
)) {
8142 panic("%d is locked down (%#lx), cannot enter", pai
, pvh_flags
);
8147 /* An IOMMU mapping may already be present for a page that hasn't yet
8148 * had a CPU mapping established, so we use PVH_FLAG_CPU to determine
8149 * if this is the first CPU mapping. We base internal/reusable
8150 * accounting on the options specified for the first CPU mapping.
8151 * PVH_FLAG_CPU, and thus this accounting, will then persist as long
8152 * as there are *any* mappings of the page. The accounting for a
8153 * page should not need to change until the page is recycled by the
8154 * VM layer, and we assert that there are no mappings when a page
8155 * is recycled. An IOMMU mapping of a freed/recycled page is
8156 * considered a security violation & potential DMA corruption path.*/
8157 first_cpu_mapping
= ((pmap
!= NULL
) && !(pvh_flags
& PVH_FLAG_CPU
));
8158 if (first_cpu_mapping
) {
8159 pvh_flags
|= PVH_FLAG_CPU
;
8162 first_cpu_mapping
= pvh_test_type(pv_h
, PVH_TYPE_NULL
);
8165 if (first_cpu_mapping
) {
8166 if (options
& PMAP_OPTIONS_INTERNAL
) {
8167 SET_INTERNAL_PAGE(pai
);
8169 CLR_INTERNAL_PAGE(pai
);
8171 if ((options
& PMAP_OPTIONS_INTERNAL
) &&
8172 (options
& PMAP_OPTIONS_REUSABLE
)) {
8173 SET_REUSABLE_PAGE(pai
);
8175 CLR_REUSABLE_PAGE(pai
);
8178 if (pvh_test_type(pv_h
, PVH_TYPE_NULL
)) {
8179 pvh_update_head(pv_h
, pte_p
, PVH_TYPE_PTEP
);
8180 if (pmap
!= NULL
&& pmap
!= kernel_pmap
&&
8181 ((options
& PMAP_OPTIONS_ALT_ACCT
) ||
8182 PMAP_FOOTPRINT_SUSPENDED(pmap
)) &&
8183 IS_INTERNAL_PAGE(pai
)) {
8185 * Make a note to ourselves that this mapping is using alternative
8186 * accounting. We'll need this in order to know which ledger to
8187 * debit when the mapping is removed.
8189 * The altacct bit must be set while the pv head is locked. Defer
8190 * the ledger accounting until after we've dropped the lock.
8192 SET_ALTACCT_PAGE(pai
, PV_ENTRY_NULL
);
8195 CLR_ALTACCT_PAGE(pai
, PV_ENTRY_NULL
);
8198 pv_alloc_return_t ret
;
8199 if (pvh_test_type(pv_h
, PVH_TYPE_PTEP
)) {
8203 * convert pvh list from PVH_TYPE_PTEP to PVH_TYPE_PVEP
8205 pte1_p
= pvh_ptep(pv_h
);
8206 pvh_set_flags(pv_h
, pvh_flags
);
8207 if ((*pve_p
== PV_ENTRY_NULL
) && ((ret
= pv_alloc(pmap
, pai
, pve_p
)) != PV_ALLOC_SUCCESS
)) {
8211 pve_set_ptep(*pve_p
, pte1_p
);
8212 (*pve_p
)->pve_next
= PV_ENTRY_NULL
;
8214 if (IS_ALTACCT_PAGE(pai
, PV_ENTRY_NULL
)) {
8216 * transfer "altacct" from
8217 * pp_attr to this pve
8219 CLR_ALTACCT_PAGE(pai
, PV_ENTRY_NULL
);
8220 SET_ALTACCT_PAGE(pai
, *pve_p
);
8222 pvh_update_head(pv_h
, *pve_p
, PVH_TYPE_PVEP
);
8223 *pve_p
= PV_ENTRY_NULL
;
8224 } else if (!pvh_test_type(pv_h
, PVH_TYPE_PVEP
)) {
8225 panic("%s: unexpected PV head %p, pte_p=%p pmap=%p pv_h=%p",
8226 __func__
, *pv_h
, pte_p
, pmap
, pv_h
);
8229 * Set up pv_entry for this new mapping and then
8230 * add it to the list for this physical page.
8232 pvh_set_flags(pv_h
, pvh_flags
);
8233 if ((*pve_p
== PV_ENTRY_NULL
) && ((ret
= pv_alloc(pmap
, pai
, pve_p
)) != PV_ALLOC_SUCCESS
)) {
8237 pve_set_ptep(*pve_p
, pte_p
);
8238 (*pve_p
)->pve_next
= PV_ENTRY_NULL
;
8240 pvh_add(pv_h
, *pve_p
);
8242 if (pmap
!= NULL
&& pmap
!= kernel_pmap
&&
8243 ((options
& PMAP_OPTIONS_ALT_ACCT
) ||
8244 PMAP_FOOTPRINT_SUSPENDED(pmap
)) &&
8245 IS_INTERNAL_PAGE(pai
)) {
8247 * Make a note to ourselves that this
8248 * mapping is using alternative
8249 * accounting. We'll need this in order
8250 * to know which ledger to debit when
8251 * the mapping is removed.
8253 * The altacct bit must be set while
8254 * the pv head is locked. Defer the
8255 * ledger accounting until after we've
8258 SET_ALTACCT_PAGE(pai
, *pve_p
);
8262 *pve_p
= PV_ENTRY_NULL
;
8265 pvh_set_flags(pv_h
, pvh_flags
);
8267 return PV_ALLOC_SUCCESS
;
8270 MARK_AS_PMAP_TEXT
static kern_return_t
8271 pmap_enter_options_internal(
8276 vm_prot_t fault_type
,
8279 unsigned int options
)
8281 ppnum_t pn
= (ppnum_t
)atop(pa
);
8287 boolean_t set_XO
= FALSE
;
8288 boolean_t refcnt_updated
;
8289 boolean_t wiredcnt_updated
;
8290 unsigned int wimg_bits
;
8291 boolean_t was_compressed
, was_alt_compressed
;
8292 kern_return_t kr
= KERN_SUCCESS
;
8294 VALIDATE_PMAP(pmap
);
8296 __unused
const pt_attr_t
* const pt_attr
= pmap_get_pt_attr(pmap
);
8298 if ((v
) & pt_attr_leaf_offmask(pt_attr
)) {
8299 panic("pmap_enter_options() pmap %p v 0x%llx\n",
8303 if ((pa
) & pt_attr_leaf_offmask(pt_attr
)) {
8304 panic("pmap_enter_options() pmap %p pa 0x%llx\n",
8305 pmap
, (uint64_t)pa
);
8308 if ((prot
& VM_PROT_EXECUTE
) && (pmap
== kernel_pmap
)) {
8309 #if defined(KERNEL_INTEGRITY_CTRR) && defined(CONFIG_XNUPOST)
8310 extern vm_offset_t ctrr_test_page
;
8311 if (__probable(v
!= ctrr_test_page
))
8313 panic("pmap_enter_options(): attempt to add executable mapping to kernel_pmap");
8316 #if DEVELOPMENT || DEBUG
8317 if ((prot
& VM_PROT_EXECUTE
) || !nx_enabled
|| !pmap
->nx_enabled
)
8319 if ((prot
& VM_PROT_EXECUTE
))
8327 #if (__ARM_VMSA__ > 7)
8328 if (prot
== VM_PROT_EXECUTE
) {
8333 assert(pn
!= vm_page_fictitious_addr
);
8335 refcnt_updated
= FALSE
;
8336 wiredcnt_updated
= FALSE
;
8337 pve_p
= PV_ENTRY_NULL
;
8338 was_compressed
= FALSE
;
8339 was_alt_compressed
= FALSE
;
8344 * Expand pmap to include this pte. Assume that
8345 * pmap is always expanded to include enough hardware
8346 * pages to map one VM page.
8348 while ((pte_p
= pmap_pte(pmap
, v
)) == PT_ENTRY_NULL
) {
8349 /* Must unlock to expand the pmap. */
8352 kr
= pmap_expand(pmap
, v
, options
, pt_attr_leaf_level(pt_attr
));
8354 if (kr
!= KERN_SUCCESS
) {
8361 if (options
& PMAP_OPTIONS_NOENTER
) {
8363 return KERN_SUCCESS
;
8370 if (ARM_PTE_IS_COMPRESSED(spte
, pte_p
)) {
8372 * "pmap" should be locked at this point, so this should
8373 * not race with another pmap_enter() or pmap_remove_range().
8375 assert(pmap
!= kernel_pmap
);
8377 /* one less "compressed" */
8378 OSAddAtomic64(-1, &pmap
->stats
.compressed
);
8379 pmap_ledger_debit(pmap
, task_ledgers
.internal_compressed
,
8380 pt_attr_page_size(pt_attr
) * PAGE_RATIO
);
8382 was_compressed
= TRUE
;
8383 if (spte
& ARM_PTE_COMPRESSED_ALT
) {
8384 was_alt_compressed
= TRUE
;
8385 pmap_ledger_debit(pmap
, task_ledgers
.alternate_accounting_compressed
, pt_attr_page_size(pt_attr
) * PAGE_RATIO
);
8387 /* was part of the footprint */
8388 pmap_ledger_debit(pmap
, task_ledgers
.phys_footprint
, pt_attr_page_size(pt_attr
) * PAGE_RATIO
);
8391 /* clear "compressed" marker */
8392 /* XXX is it necessary since we're about to overwrite it ? */
8393 WRITE_PTE_FAST(pte_p
, ARM_PTE_TYPE_FAULT
);
8394 spte
= ARM_PTE_TYPE_FAULT
;
8397 * We're replacing a "compressed" marker with a valid PTE,
8398 * so no change for "refcnt".
8400 refcnt_updated
= TRUE
;
8403 if ((spte
!= ARM_PTE_TYPE_FAULT
) && (pte_to_pa(spte
) != pa
)) {
8404 pmap_remove_range(pmap
, v
, pte_p
, pte_p
+ PAGE_RATIO
, 0);
8407 pte
= pa_to_pte(pa
) | ARM_PTE_TYPE
;
8410 pte
|= ARM_PTE_WIRED
;
8414 pte
|= pt_attr_leaf_xn(pt_attr
);
8416 #if (__ARM_VMSA__ > 7)
8417 if (pmap
== kernel_pmap
) {
8420 pte
|= pt_attr_leaf_x(pt_attr
);
8425 if (pmap
== kernel_pmap
) {
8426 #if __ARM_KERNEL_PROTECT__
8428 #endif /* __ARM_KERNEL_PROTECT__ */
8429 if (prot
& VM_PROT_WRITE
) {
8430 pte
|= ARM_PTE_AP(AP_RWNA
);
8431 pa_set_bits(pa
, PP_ATTR_MODIFIED
| PP_ATTR_REFERENCED
);
8433 pte
|= ARM_PTE_AP(AP_RONA
);
8434 pa_set_bits(pa
, PP_ATTR_REFERENCED
);
8436 #if (__ARM_VMSA__ == 7)
8437 if ((_COMM_PAGE_BASE_ADDRESS
<= v
) && (v
< _COMM_PAGE_BASE_ADDRESS
+ _COMM_PAGE_AREA_LENGTH
)) {
8438 pte
= (pte
& ~(ARM_PTE_APMASK
)) | ARM_PTE_AP(AP_RORO
);
8442 if (!pmap
->nested
) {
8444 } else if ((pmap
->nested_region_asid_bitmap
)
8445 && (v
>= pmap
->nested_region_addr
)
8446 && (v
< (pmap
->nested_region_addr
+ pmap
->nested_region_size
))) {
8447 unsigned int index
= (unsigned int)((v
- pmap
->nested_region_addr
) >> pt_attr_twig_shift(pt_attr
));
8449 if ((pmap
->nested_region_asid_bitmap
)
8450 && testbit(index
, (int *)pmap
->nested_region_asid_bitmap
)) {
8455 if (pmap
->nested_pmap
!= NULL
) {
8456 vm_map_address_t nest_vaddr
;
8457 pt_entry_t
*nest_pte_p
;
8459 nest_vaddr
= v
- pmap
->nested_region_addr
+ pmap
->nested_region_addr
;
8461 if ((nest_vaddr
>= pmap
->nested_region_addr
)
8462 && (nest_vaddr
< (pmap
->nested_region_addr
+ pmap
->nested_region_size
))
8463 && ((nest_pte_p
= pmap_pte(pmap
->nested_pmap
, nest_vaddr
)) != PT_ENTRY_NULL
)
8464 && (*nest_pte_p
!= ARM_PTE_TYPE_FAULT
)
8465 && (!ARM_PTE_IS_COMPRESSED(*nest_pte_p
, nest_pte_p
))
8466 && (((*nest_pte_p
) & ARM_PTE_NG
) != ARM_PTE_NG
)) {
8467 unsigned int index
= (unsigned int)((v
- pmap
->nested_region_addr
) >> pt_attr_twig_shift(pt_attr
));
8469 if ((pmap
->nested_pmap
->nested_region_asid_bitmap
)
8470 && !testbit(index
, (int *)pmap
->nested_pmap
->nested_region_asid_bitmap
)) {
8471 panic("pmap_enter(): Global attribute conflict nest_pte_p=%p pmap=%p v=0x%llx spte=0x%llx \n",
8472 nest_pte_p
, pmap
, (uint64_t)v
, (uint64_t)*nest_pte_p
);
8477 if (prot
& VM_PROT_WRITE
) {
8478 if (pa_valid(pa
) && (!pa_test_bits(pa
, PP_ATTR_MODIFIED
))) {
8479 if (fault_type
& VM_PROT_WRITE
) {
8481 pte
|= pt_attr_leaf_rwna(pt_attr
);
8483 pte
|= pt_attr_leaf_rw(pt_attr
);
8485 pa_set_bits(pa
, PP_ATTR_REFERENCED
| PP_ATTR_MODIFIED
);
8488 pte
|= pt_attr_leaf_rona(pt_attr
);
8490 pte
|= pt_attr_leaf_ro(pt_attr
);
8492 pa_set_bits(pa
, PP_ATTR_REFERENCED
);
8493 pte_set_was_writeable(pte
, true);
8497 pte
|= pt_attr_leaf_rwna(pt_attr
);
8499 pte
|= pt_attr_leaf_rw(pt_attr
);
8501 pa_set_bits(pa
, PP_ATTR_REFERENCED
);
8505 pte
|= pt_attr_leaf_rona(pt_attr
);
8507 pte
|= pt_attr_leaf_ro(pt_attr
);;
8509 pa_set_bits(pa
, PP_ATTR_REFERENCED
);
8515 volatile uint16_t *refcnt
= NULL
;
8516 volatile uint16_t *wiredcnt
= NULL
;
8517 if (pmap
!= kernel_pmap
) {
8518 ptd_info_t
*ptd_info
= ptep_get_info(pte_p
);
8519 refcnt
= &ptd_info
->refcnt
;
8520 wiredcnt
= &ptd_info
->wiredcnt
;
8521 /* Bump the wired count to keep the PTE page from being reclaimed. We need this because
8522 * we may drop the PVH and pmap locks later in pmap_enter() if we need to allocate
8523 * a new PV entry. */
8524 if (!wiredcnt_updated
) {
8525 OSAddAtomic16(1, (volatile int16_t*)wiredcnt
);
8526 wiredcnt_updated
= TRUE
;
8528 if (!refcnt_updated
) {
8529 OSAddAtomic16(1, (volatile int16_t*)refcnt
);
8530 refcnt_updated
= TRUE
;
8536 boolean_t is_altacct
, is_internal
;
8538 is_internal
= FALSE
;
8541 pai
= (int)pa_index(pa
);
8546 if ((flags
& (VM_WIMG_MASK
| VM_WIMG_USE_DEFAULT
))) {
8547 wimg_bits
= (flags
& (VM_WIMG_MASK
| VM_WIMG_USE_DEFAULT
));
8549 wimg_bits
= pmap_cache_attributes(pn
);
8552 /* We may be retrying this operation after dropping the PVH lock.
8553 * Cache attributes for the physical page may have changed while the lock
8554 * was dropped, so clear any cache attributes we may have previously set
8555 * in the PTE template. */
8556 pte
&= ~(ARM_PTE_ATTRINDXMASK
| ARM_PTE_SHMASK
);
8557 pte
|= pmap_get_pt_ops(pmap
)->wimg_to_pte(wimg_bits
);
8560 /* The regular old kernel is not allowed to remap PPL pages. */
8561 if (__improbable(pa_test_monitor(pa
))) {
8562 panic("%s: page belongs to PPL, "
8563 "pmap=%p, v=0x%llx, pa=%p, prot=0x%x, fault_type=0x%x, flags=0x%x, wired=%u, options=0x%x",
8565 pmap
, v
, (void*)pa
, prot
, fault_type
, flags
, wired
, options
);
8568 if (__improbable(pvh_get_flags(pai_to_pvh(pai
)) & PVH_FLAG_LOCKDOWN
)) {
8569 panic("%s: page locked down, "
8570 "pmap=%p, v=0x%llx, pa=%p, prot=0x%x, fault_type=0x%x, flags=0x%x, wired=%u, options=0x%x",
8572 pmap
, v
, (void *)pa
, prot
, fault_type
, flags
, wired
, options
);
8577 if (pte
== *pte_p
) {
8579 * This pmap_enter operation has been completed by another thread
8580 * undo refcnt on pt and return
8583 goto Pmap_enter_cleanup
;
8584 } else if (pte_to_pa(*pte_p
) == pa
) {
8585 pmap_enter_pte(pmap
, pte_p
, pte
, v
);
8587 goto Pmap_enter_cleanup
;
8588 } else if (*pte_p
!= ARM_PTE_TYPE_FAULT
) {
8590 * pte has been modified by another thread
8591 * hold refcnt on pt and retry pmap_enter operation
8594 goto Pmap_enter_retry
;
8596 pv_alloc_return_t pv_status
= pmap_enter_pv(pmap
, pte_p
, pai
, options
, &pve_p
, &is_altacct
);
8597 if (pv_status
== PV_ALLOC_RETRY
) {
8598 goto Pmap_enter_loop
;
8599 } else if (pv_status
== PV_ALLOC_FAIL
) {
8601 kr
= KERN_RESOURCE_SHORTAGE
;
8602 goto Pmap_enter_cleanup
;
8605 pmap_enter_pte(pmap
, pte_p
, pte
, v
);
8607 if (pmap
!= kernel_pmap
) {
8608 if (IS_REUSABLE_PAGE(pai
) &&
8610 assert(IS_INTERNAL_PAGE(pai
));
8611 OSAddAtomic(+1, &pmap
->stats
.reusable
);
8612 PMAP_STATS_PEAK(pmap
->stats
.reusable
);
8613 } else if (IS_INTERNAL_PAGE(pai
)) {
8614 OSAddAtomic(+1, &pmap
->stats
.internal
);
8615 PMAP_STATS_PEAK(pmap
->stats
.internal
);
8618 OSAddAtomic(+1, &pmap
->stats
.external
);
8619 PMAP_STATS_PEAK(pmap
->stats
.external
);
8625 if (pmap
!= kernel_pmap
) {
8626 pmap_ledger_credit(pmap
, task_ledgers
.phys_mem
, pt_attr_page_size(pt_attr
) * PAGE_RATIO
);
8630 * Make corresponding adjustments to
8631 * phys_footprint statistics.
8633 pmap_ledger_credit(pmap
, task_ledgers
.internal
, pt_attr_page_size(pt_attr
) * PAGE_RATIO
);
8636 * If this page is internal and
8637 * in an IOKit region, credit
8638 * the task's total count of
8639 * dirty, internal IOKit pages.
8640 * It should *not* count towards
8641 * the task's total physical
8642 * memory footprint, because
8643 * this entire region was
8644 * already billed to the task
8645 * at the time the mapping was
8648 * Put another way, this is
8650 * alternate_accounting++, so
8651 * net effect on phys_footprint
8652 * is 0. That means: don't
8653 * touch phys_footprint here.
8655 pmap_ledger_credit(pmap
, task_ledgers
.alternate_accounting
, pt_attr_page_size(pt_attr
) * PAGE_RATIO
);
8657 pmap_ledger_credit(pmap
, task_ledgers
.phys_footprint
, pt_attr_page_size(pt_attr
) * PAGE_RATIO
);
8662 OSAddAtomic(1, (SInt32
*) &pmap
->stats
.resident_count
);
8663 if (pmap
->stats
.resident_count
> pmap
->stats
.resident_max
) {
8664 pmap
->stats
.resident_max
= pmap
->stats
.resident_count
;
8667 if (prot
& VM_PROT_EXECUTE
) {
8669 goto Pmap_enter_cleanup
;
8672 wimg_bits
= pmap_cache_attributes(pn
);
8673 if ((flags
& (VM_WIMG_MASK
| VM_WIMG_USE_DEFAULT
))) {
8674 wimg_bits
= (wimg_bits
& (~VM_WIMG_MASK
)) | (flags
& (VM_WIMG_MASK
| VM_WIMG_USE_DEFAULT
));
8677 pte
|= pmap_get_pt_ops(pmap
)->wimg_to_pte(wimg_bits
);
8680 if ((wimg_bits
& PP_ATTR_MONITOR
) && !pmap_ppl_disable
) {
8681 uint64_t xprr_perm
= pte_to_xprr_perm(pte
);
8682 switch (xprr_perm
) {
8683 case XPRR_KERN_RO_PERM
:
8685 case XPRR_KERN_RW_PERM
:
8686 pte
&= ~ARM_PTE_XPRR_MASK
;
8687 pte
|= xprr_perm_to_pte(XPRR_PPL_RW_PERM
);
8690 panic("Unsupported xPRR perm %llu for pte 0x%llx", xprr_perm
, (uint64_t)pte
);
8694 pmap_enter_pte(pmap
, pte_p
, pte
, v
);
8697 goto Pmap_enter_return
;
8701 if (refcnt
!= NULL
) {
8702 assert(refcnt_updated
);
8703 if (OSAddAtomic16(-1, (volatile int16_t*)refcnt
) <= 0) {
8704 panic("pmap_enter(): over-release of ptdp %p for pte %p\n", ptep_get_ptd(pte_p
), pte_p
);
8711 if (pgtrace_enabled
) {
8712 // Clone and invalidate original mapping if eligible
8713 pmap_pgtrace_enter_clone(pmap
, v
+ ARM_PGBYTES
, 0, 0);
8715 #endif /* CONFIG_PGTRACE */
8717 if (pve_p
!= PV_ENTRY_NULL
) {
8718 pv_free_entry(pve_p
);
8721 if (wiredcnt_updated
&& (OSAddAtomic16(-1, (volatile int16_t*)wiredcnt
) <= 0)) {
8722 panic("pmap_enter(): over-unwire of ptdp %p for pte %p\n", ptep_get_ptd(pte_p
), pte_p
);
8731 pmap_enter_options_addr(
8736 vm_prot_t fault_type
,
8739 unsigned int options
,
8742 kern_return_t kr
= KERN_FAILURE
;
8745 PMAP_TRACE(2, PMAP_CODE(PMAP__ENTER
) | DBG_FUNC_START
,
8746 VM_KERNEL_ADDRHIDE(pmap
), VM_KERNEL_ADDRHIDE(v
), pa
, prot
);
8751 * If NOWAIT was not requested, loop until the enter does not
8752 * fail due to lack of resources.
8754 while ((kr
= pmap_enter_options_ppl(pmap
, v
, pa
, prot
, fault_type
, flags
, wired
, options
| PMAP_OPTIONS_NOWAIT
)) == KERN_RESOURCE_SHORTAGE
) {
8755 pmap_alloc_page_for_ppl((options
& PMAP_OPTIONS_NOWAIT
) ? PMAP_PAGES_ALLOCATE_NOWAIT
: 0);
8756 if (options
& PMAP_OPTIONS_NOWAIT
) {
8761 pmap_ledger_check_balance(pmap
);
8763 kr
= pmap_enter_options_internal(pmap
, v
, pa
, prot
, fault_type
, flags
, wired
, options
);
8766 PMAP_TRACE(2, PMAP_CODE(PMAP__ENTER
) | DBG_FUNC_END
, kr
);
8777 vm_prot_t fault_type
,
8780 unsigned int options
,
8783 return pmap_enter_options_addr(pmap
, v
, ((pmap_paddr_t
)pn
) << PAGE_SHIFT
, prot
, fault_type
, flags
, wired
, options
, arg
);
8787 * Routine: pmap_change_wiring
8788 * Function: Change the wiring attribute for a map/virtual-address
8790 * In/out conditions:
8791 * The mapping must already exist in the pmap.
8793 MARK_AS_PMAP_TEXT
static void
8794 pmap_change_wiring_internal(
8802 VALIDATE_PMAP(pmap
);
8806 const pt_attr_t
* pt_attr
= pmap_get_pt_attr(pmap
);
8808 pte_p
= pmap_pte(pmap
, v
);
8809 assert(pte_p
!= PT_ENTRY_NULL
);
8810 pa
= pte_to_pa(*pte_p
);
8812 while (pa_valid(pa
)) {
8813 pmap_paddr_t new_pa
;
8815 LOCK_PVH((int)pa_index(pa
));
8816 new_pa
= pte_to_pa(*pte_p
);
8822 UNLOCK_PVH((int)pa_index(pa
));
8826 if (wired
!= pte_is_wired(*pte_p
)) {
8827 pte_set_wired(pmap
, pte_p
, wired
);
8828 if (pmap
!= kernel_pmap
) {
8830 OSAddAtomic(+1, (SInt32
*) &pmap
->stats
.wired_count
);
8831 pmap_ledger_credit(pmap
, task_ledgers
.wired_mem
, pt_attr_page_size(pt_attr
) * PAGE_RATIO
);
8832 } else if (!wired
) {
8833 __assert_only
int32_t orig_wired
= OSAddAtomic(-1, (SInt32
*) &pmap
->stats
.wired_count
);
8834 PMAP_STATS_ASSERTF(orig_wired
> 0, pmap
, "stats.wired_count %d", orig_wired
);
8835 pmap_ledger_debit(pmap
, task_ledgers
.wired_mem
, pt_attr_page_size(pt_attr
) * PAGE_RATIO
);
8841 UNLOCK_PVH((int)pa_index(pa
));
8854 pmap_change_wiring_ppl(pmap
, v
, wired
);
8856 pmap_ledger_check_balance(pmap
);
8858 pmap_change_wiring_internal(pmap
, v
, wired
);
8862 MARK_AS_PMAP_TEXT
static pmap_paddr_t
8863 pmap_find_pa_internal(
8867 pmap_paddr_t pa
= 0;
8869 VALIDATE_PMAP(pmap
);
8871 if (pmap
!= kernel_pmap
) {
8875 pa
= pmap_vtophys(pmap
, va
);
8877 if (pmap
!= kernel_pmap
) {
8878 pmap_unlock_ro(pmap
);
8885 pmap_find_pa_nofault(pmap_t pmap
, addr64_t va
)
8887 pmap_paddr_t pa
= 0;
8889 if (pmap
== kernel_pmap
) {
8891 } else if ((current_thread()->map
) && (pmap
== vm_map_pmap(current_thread()->map
))) {
8893 * Note that this doesn't account for PAN: mmu_uvtop() may return a valid
8894 * translation even if PAN would prevent kernel access through the translation.
8895 * It's therefore assumed the UVA will be accessed in a PAN-disabled context.
8907 pmap_paddr_t pa
= pmap_find_pa_nofault(pmap
, va
);
8915 return pmap_find_pa_ppl(pmap
, va
);
8917 return pmap_find_pa_internal(pmap
, va
);
8920 return pmap_vtophys(pmap
, va
);
8925 pmap_find_phys_nofault(
8930 ppn
= atop(pmap_find_pa_nofault(pmap
, va
));
8940 ppn
= atop(pmap_find_pa(pmap
, va
));
8955 pa
= ((pmap_paddr_t
)pmap_vtophys(kernel_pmap
, va
)) << PAGE_SHIFT
;
8957 pa
|= (va
& PAGE_MASK
);
8960 return (pmap_paddr_t
)pa
;
8968 if ((va
< pmap
->min
) || (va
>= pmap
->max
)) {
8972 #if (__ARM_VMSA__ == 7)
8973 tt_entry_t
*tte_p
, tte
;
8977 tte_p
= pmap_tte(pmap
, va
);
8978 if (tte_p
== (tt_entry_t
*) NULL
) {
8979 return (pmap_paddr_t
) 0;
8983 if ((tte
& ARM_TTE_TYPE_MASK
) == ARM_TTE_TYPE_TABLE
) {
8984 pte_p
= (pt_entry_t
*) ttetokv(tte
) + pte_index(pmap
, pt_attr
, va
);
8985 pa
= pte_to_pa(*pte_p
) | (va
& ARM_PGMASK
);
8986 //LIONEL ppn = (ppnum_t) atop(pte_to_pa(*pte_p) | (va & ARM_PGMASK));
8987 #if DEVELOPMENT || DEBUG
8988 if (atop(pa
) != 0 &&
8989 ARM_PTE_IS_COMPRESSED(*pte_p
, pte_p
)) {
8990 panic("pmap_vtophys(%p,0x%llx): compressed pte_p=%p 0x%llx with ppn=0x%x\n",
8991 pmap
, va
, pte_p
, (uint64_t) (*pte_p
), atop(pa
));
8993 #endif /* DEVELOPMENT || DEBUG */
8994 } else if ((tte
& ARM_TTE_TYPE_MASK
) == ARM_TTE_TYPE_BLOCK
) {
8995 if ((tte
& ARM_TTE_BLOCK_SUPER
) == ARM_TTE_BLOCK_SUPER
) {
8996 pa
= suptte_to_pa(tte
) | (va
& ARM_TT_L1_SUPER_OFFMASK
);
8998 pa
= sectte_to_pa(tte
) | (va
& ARM_TT_L1_BLOCK_OFFMASK
);
9004 tt_entry_t
* ttp
= NULL
;
9005 tt_entry_t
* ttep
= NULL
;
9006 tt_entry_t tte
= ARM_TTE_EMPTY
;
9007 pmap_paddr_t pa
= 0;
9008 unsigned int cur_level
;
9010 const pt_attr_t
* const pt_attr
= pmap_get_pt_attr(pmap
);
9014 for (cur_level
= pt_attr_root_level(pt_attr
); cur_level
<= pt_attr_leaf_level(pt_attr
); cur_level
++) {
9015 ttep
= &ttp
[ttn_index(pmap
, pt_attr
, va
, cur_level
)];
9019 const uint64_t valid_mask
= pt_attr
->pta_level_info
[cur_level
].valid_mask
;
9020 const uint64_t type_mask
= pt_attr
->pta_level_info
[cur_level
].type_mask
;
9021 const uint64_t type_block
= pt_attr
->pta_level_info
[cur_level
].type_block
;
9022 const uint64_t offmask
= pt_attr
->pta_level_info
[cur_level
].offmask
;
9024 if ((tte
& valid_mask
) != valid_mask
) {
9025 return (pmap_paddr_t
) 0;
9028 /* This detects both leaf entries and intermediate block mappings. */
9029 if ((tte
& type_mask
) == type_block
) {
9030 pa
= ((tte
& ARM_TTE_PA_MASK
& ~offmask
) | (va
& offmask
));
9034 ttp
= (tt_entry_t
*)phystokv(tte
& ARM_TTE_TABLE_MASK
);
9042 * pmap_init_pte_page - Initialize a page table page.
9049 unsigned int ttlevel
,
9050 boolean_t alloc_ptd
)
9052 pt_desc_t
*ptdp
= NULL
;
9055 pvh
= (vm_offset_t
*)(pai_to_pvh(pa_index(ml_static_vtop((vm_offset_t
)pte_p
))));
9057 if (pvh_test_type(pvh
, PVH_TYPE_NULL
)) {
9060 * This path should only be invoked from arm_vm_init. If we are emulating 16KB pages
9061 * on 4KB hardware, we may already have allocated a page table descriptor for a
9062 * bootstrap request, so we check for an existing PTD here.
9064 ptdp
= ptd_alloc(pmap
);
9066 panic("%s: unable to allocate PTD", __func__
);
9068 pvh_update_head_unlocked(pvh
, ptdp
, PVH_TYPE_PTDP
);
9070 panic("pmap_init_pte_page(): pte_p %p", pte_p
);
9072 } else if (pvh_test_type(pvh
, PVH_TYPE_PTDP
)) {
9073 ptdp
= (pt_desc_t
*)(pvh_list(pvh
));
9075 panic("pmap_init_pte_page(): invalid PVH type for pte_p %p", pte_p
);
9078 // below barrier ensures previous updates to the page are visible to PTW before
9079 // it is linked to the PTE of previous level
9080 __builtin_arm_dmb(DMB_ISHST
);
9081 ptd_init(ptdp
, pmap
, va
, ttlevel
, pte_p
);
9085 * Routine: pmap_expand
9087 * Expands a pmap to be able to map the specified virtual address.
9089 * Allocates new memory for the default (COARSE) translation table
9090 * entry, initializes all the pte entries to ARM_PTE_TYPE_FAULT and
9091 * also allocates space for the corresponding pv entries.
9093 * Nothing should be locked.
9095 static kern_return_t
9099 unsigned int options
,
9102 __unused
const pt_attr_t
* const pt_attr
= pmap_get_pt_attr(pmap
);
9104 #if (__ARM_VMSA__ == 7)
9110 #if DEVELOPMENT || DEBUG
9112 * We no longer support root level expansion; panic in case something
9113 * still attempts to trigger it.
9115 i
= tte_index(pmap
, pt_attr
, v
);
9117 if (i
>= pmap
->tte_index_max
) {
9118 panic("%s: index out of range, index=%u, max=%u, "
9119 "pmap=%p, addr=%p, options=%u, level=%u",
9120 __func__
, i
, pmap
->tte_index_max
,
9121 pmap
, (void *)v
, options
, level
);
9123 #endif /* DEVELOPMENT || DEBUG */
9126 return KERN_SUCCESS
;
9130 tt_entry_t
*tte_next_p
;
9134 if (pmap_pte(pmap
, v
) != PT_ENTRY_NULL
) {
9136 return KERN_SUCCESS
;
9138 tte_p
= &pmap
->tte
[ttenum(v
& ~ARM_TT_L1_PT_OFFMASK
)];
9139 for (i
= 0, tte_next_p
= tte_p
; i
< 4; i
++) {
9140 if (tte_to_pa(*tte_next_p
)) {
9141 pa
= tte_to_pa(*tte_next_p
);
9146 pa
= pa
& ~PAGE_MASK
;
9148 tte_p
= &pmap
->tte
[ttenum(v
)];
9149 *tte_p
= pa_to_tte(pa
) | (((v
>> ARM_TT_L1_SHIFT
) & 0x3) << 10) | ARM_TTE_TYPE_TABLE
;
9151 PMAP_TRACE(5, PMAP_CODE(PMAP__TTE
), VM_KERNEL_ADDRHIDE(pmap
), VM_KERNEL_ADDRHIDE(v
& ~ARM_TT_L1_OFFMASK
),
9152 VM_KERNEL_ADDRHIDE((v
& ~ARM_TT_L1_OFFMASK
) + ARM_TT_L1_SIZE
), *tte_p
);
9154 return KERN_SUCCESS
;
9158 v
= v
& ~ARM_TT_L1_PT_OFFMASK
;
9161 while (pmap_pte(pmap
, v
) == PT_ENTRY_NULL
) {
9163 * Allocate a VM page for the level 2 page table entries.
9165 while (pmap_tt_allocate(pmap
, &tt_p
, PMAP_TT_L2_LEVEL
, ((options
& PMAP_TT_ALLOCATE_NOWAIT
)? PMAP_PAGES_ALLOCATE_NOWAIT
: 0)) != KERN_SUCCESS
) {
9166 if (options
& PMAP_OPTIONS_NOWAIT
) {
9167 return KERN_RESOURCE_SHORTAGE
;
9174 * See if someone else expanded us first
9176 if (pmap_pte(pmap
, v
) == PT_ENTRY_NULL
) {
9177 tt_entry_t
*tte_next_p
;
9179 pmap_init_pte_page(pmap
, (pt_entry_t
*) tt_p
, v
, PMAP_TT_L2_LEVEL
, FALSE
);
9180 pa
= kvtophys((vm_offset_t
)tt_p
);
9181 tte_p
= &pmap
->tte
[ttenum(v
)];
9182 for (i
= 0, tte_next_p
= tte_p
; i
< 4; i
++) {
9183 *tte_next_p
= pa_to_tte(pa
) | ARM_TTE_TYPE_TABLE
;
9184 PMAP_TRACE(5, PMAP_CODE(PMAP__TTE
), VM_KERNEL_ADDRHIDE(pmap
), VM_KERNEL_ADDRHIDE((v
& ~ARM_TT_L1_PT_OFFMASK
) + (i
* ARM_TT_L1_SIZE
)),
9185 VM_KERNEL_ADDRHIDE((v
& ~ARM_TT_L1_PT_OFFMASK
) + ((i
+ 1) * ARM_TT_L1_SIZE
)), *tte_p
);
9189 FLUSH_PTE_RANGE(tte_p
, tte_p
+ 4);
9192 tt_p
= (tt_entry_t
*)NULL
;
9195 if (tt_p
!= (tt_entry_t
*)NULL
) {
9196 pmap_tt_deallocate(pmap
, tt_p
, PMAP_TT_L2_LEVEL
);
9197 tt_p
= (tt_entry_t
*)NULL
;
9200 return KERN_SUCCESS
;
9203 unsigned int ttlevel
= pt_attr_root_level(pt_attr
);
9208 tt_p
= (tt_entry_t
*)NULL
;
9210 for (; ttlevel
< level
; ttlevel
++) {
9213 if (pmap_ttne(pmap
, ttlevel
+ 1, v
) == PT_ENTRY_NULL
) {
9214 pmap_unlock_ro(pmap
);
9215 while (pmap_tt_allocate(pmap
, &tt_p
, ttlevel
+ 1, ((options
& PMAP_TT_ALLOCATE_NOWAIT
)? PMAP_PAGES_ALLOCATE_NOWAIT
: 0)) != KERN_SUCCESS
) {
9216 if (options
& PMAP_OPTIONS_NOWAIT
) {
9217 return KERN_RESOURCE_SHORTAGE
;
9220 panic("%s: failed to allocate tt, "
9221 "pmap=%p, v=%p, options=0x%x, level=%u",
9223 pmap
, (void *)v
, options
, level
);
9229 if ((pmap_ttne(pmap
, ttlevel
+ 1, v
) == PT_ENTRY_NULL
)) {
9230 pmap_init_pte_page(pmap
, (pt_entry_t
*) tt_p
, v
, ttlevel
+ 1, FALSE
);
9231 pa
= kvtophys((vm_offset_t
)tt_p
);
9232 tte_p
= pmap_ttne(pmap
, ttlevel
, v
);
9233 *tte_p
= (pa
& ARM_TTE_TABLE_MASK
) | ARM_TTE_TYPE_TABLE
| ARM_TTE_VALID
;
9234 PMAP_TRACE(4 + ttlevel
, PMAP_CODE(PMAP__TTE
), VM_KERNEL_ADDRHIDE(pmap
), VM_KERNEL_ADDRHIDE(v
& ~pt_attr_ln_offmask(pt_attr
, ttlevel
)),
9235 VM_KERNEL_ADDRHIDE((v
& ~pt_attr_ln_offmask(pt_attr
, ttlevel
)) + pt_attr_ln_size(pt_attr
, ttlevel
)), *tte_p
);
9237 tt_p
= (tt_entry_t
*)NULL
;
9241 pmap_unlock_ro(pmap
);
9244 if (tt_p
!= (tt_entry_t
*)NULL
) {
9245 pmap_tt_deallocate(pmap
, tt_p
, ttlevel
+ 1);
9246 tt_p
= (tt_entry_t
*)NULL
;
9250 return KERN_SUCCESS
;
9255 * Routine: pmap_collect
9257 * Garbage collects the physical map system for
9258 * pages which are no longer used.
9259 * Success need not be guaranteed -- that is, there
9260 * may well be pages which are not referenced, but
9261 * others may be collected.
9264 pmap_collect(pmap_t pmap
)
9266 if (pmap
== PMAP_NULL
) {
9272 if ((pmap
->nested
== FALSE
) && (pmap
!= kernel_pmap
)) {
9273 /* TODO: Scan for vm page assigned to top level page tables with no reference */
9284 * Pmap garbage collection
9285 * Called by the pageout daemon when pages are scarce.
9294 * We cannot invoke the scheduler from the PPL, so for now we elide the
9295 * GC logic if the PPL is enabled.
9299 pmap_t pmap
, pmap_next
;
9302 if (pmap_gc_allowed
&&
9303 (pmap_gc_allowed_by_time_throttle
||
9305 pmap_gc_forced
= FALSE
;
9306 pmap_gc_allowed_by_time_throttle
= FALSE
;
9307 pmap_simple_lock(&pmaps_lock
);
9308 pmap
= CAST_DOWN_EXPLICIT(pmap_t
, queue_first(&map_pmap_list
));
9309 while (!queue_end(&map_pmap_list
, (queue_entry_t
)pmap
)) {
9310 if (!(pmap
->gc_status
& PMAP_GC_INFLIGHT
)) {
9311 pmap
->gc_status
|= PMAP_GC_INFLIGHT
;
9313 pmap_simple_unlock(&pmaps_lock
);
9317 pmap_simple_lock(&pmaps_lock
);
9318 gc_wait
= (pmap
->gc_status
& PMAP_GC_WAIT
);
9319 pmap
->gc_status
&= ~(PMAP_GC_INFLIGHT
| PMAP_GC_WAIT
);
9320 pmap_next
= CAST_DOWN_EXPLICIT(pmap_t
, queue_next(&pmap
->pmaps
));
9322 if (!queue_end(&map_pmap_list
, (queue_entry_t
)pmap_next
)) {
9323 pmap_next
->gc_status
|= PMAP_GC_INFLIGHT
;
9325 pmap_simple_unlock(&pmaps_lock
);
9326 thread_wakeup((event_t
) &pmap
->gc_status
);
9327 pmap_simple_lock(&pmaps_lock
);
9331 pmap_simple_unlock(&pmaps_lock
);
9337 * Called by the VM to reclaim pages that we can reclaim quickly and cheaply.
9340 pmap_release_pages_fast(void)
9343 return pmap_release_ppl_pages_to_kernel();
9344 #else /* XNU_MONITOR */
9350 * By default, don't attempt pmap GC more frequently
9351 * than once / 1 minutes.
9355 compute_pmap_gc_throttle(
9358 pmap_gc_allowed_by_time_throttle
= TRUE
;
9362 * pmap_attribute_cache_sync(vm_offset_t pa)
9364 * Invalidates all of the instruction cache on a physical page and
9365 * pushes any dirty data from the data cache for the same physical page
9369 pmap_attribute_cache_sync(
9372 __unused vm_machine_attribute_t attribute
,
9373 __unused vm_machine_attribute_val_t
* value
)
9375 if (size
> PAGE_SIZE
) {
9376 panic("pmap_attribute_cache_sync size: 0x%llx\n", (uint64_t)size
);
9378 cache_sync_page(pp
);
9381 return KERN_SUCCESS
;
9385 * pmap_sync_page_data_phys(ppnum_t pp)
9387 * Invalidates all of the instruction cache on a physical page and
9388 * pushes any dirty data from the data cache for the same physical page
9391 pmap_sync_page_data_phys(
9394 cache_sync_page(pp
);
9398 * pmap_sync_page_attributes_phys(ppnum_t pp)
9400 * Write back and invalidate all cachelines on a physical page.
9403 pmap_sync_page_attributes_phys(
9406 flush_dcache((vm_offset_t
) (pp
<< PAGE_SHIFT
), PAGE_SIZE
, TRUE
);
9410 /* temporary workaround */
9414 mach_vm_offset_t va
)
9419 pte_p
= pmap_pte(map
->pmap
, va
);
9424 return (spte
& ARM_PTE_ATTRINDXMASK
) == ARM_PTE_ATTRINDX(CACHE_ATTRINDX_DEFAULT
);
9436 addr
= (unsigned int *) phystokv(ptoa(pn
));
9437 count
= PAGE_SIZE
/ sizeof(unsigned int);
9443 extern void mapping_set_mod(ppnum_t pn
);
9449 pmap_set_modify(pn
);
9452 extern void mapping_set_ref(ppnum_t pn
);
9458 pmap_set_reference(pn
);
9462 * Clear specified attribute bits.
9464 * Try to force an arm_fast_fault() for all mappings of
9465 * the page - to force attributes to be set again at fault time.
9466 * If the forcing succeeds, clear the cached bits at the head.
9467 * Otherwise, something must have been wired, so leave the cached
9470 MARK_AS_PMAP_TEXT
static void
9471 phys_attribute_clear_with_flush_range(
9476 pmap_tlb_flush_range_t
*flush_range
)
9478 pmap_paddr_t pa
= ptoa(pn
);
9479 vm_prot_t allow_mode
= VM_PROT_ALL
;
9482 if (bits
& PP_ATTR_PPL_OWNED_BITS
) {
9483 panic("%s: illegal request, "
9484 "pn=%u, bits=%#x, options=%#x, arg=%p, flush_range=%p",
9486 pn
, bits
, options
, arg
, flush_range
);
9490 if ((bits
& PP_ATTR_MODIFIED
) &&
9491 (options
& PMAP_OPTIONS_NOFLUSH
) &&
9493 (flush_range
== NULL
)) {
9494 panic("phys_attribute_clear(0x%x,0x%x,0x%x,%p,%p): "
9495 "should not clear 'modified' without flushing TLBs\n",
9496 pn
, bits
, options
, arg
, flush_range
);
9499 assert(pn
!= vm_page_fictitious_addr
);
9501 if (options
& PMAP_OPTIONS_CLEAR_WRITE
) {
9502 assert(bits
== PP_ATTR_MODIFIED
);
9504 pmap_page_protect_options_with_flush_range(pn
, (VM_PROT_ALL
& ~VM_PROT_WRITE
), 0, flush_range
);
9506 * We short circuit this case; it should not need to
9507 * invoke arm_force_fast_fault, so just clear the modified bit.
9508 * pmap_page_protect has taken care of resetting
9509 * the state so that we'll see the next write as a fault to
9510 * the VM (i.e. we don't want a fast fault).
9512 pa_clear_bits(pa
, bits
);
9515 if (bits
& PP_ATTR_REFERENCED
) {
9516 allow_mode
&= ~(VM_PROT_READ
| VM_PROT_EXECUTE
);
9518 if (bits
& PP_ATTR_MODIFIED
) {
9519 allow_mode
&= ~VM_PROT_WRITE
;
9522 if (bits
== PP_ATTR_NOENCRYPT
) {
9524 * We short circuit this case; it should not need to
9525 * invoke arm_force_fast_fault, so just clear and
9526 * return. On ARM, this bit is just a debugging aid.
9528 pa_clear_bits(pa
, bits
);
9532 if (arm_force_fast_fault_with_flush_range(pn
, allow_mode
, options
, flush_range
)) {
9533 pa_clear_bits(pa
, bits
);
9537 MARK_AS_PMAP_TEXT
static void
9538 phys_attribute_clear_internal(
9544 phys_attribute_clear_with_flush_range(pn
, bits
, options
, arg
, NULL
);
9547 #if __ARM_RANGE_TLBI__
9548 MARK_AS_PMAP_TEXT
static void
9549 phys_attribute_clear_twig_internal(
9551 vm_map_address_t start
,
9552 vm_map_address_t end
,
9554 unsigned int options
,
9555 pmap_tlb_flush_range_t
*flush_range
)
9557 pmap_assert_locked_r(pmap
);
9558 const pt_attr_t
* const pt_attr
= pmap_get_pt_attr(pmap
);
9559 assert(end
>= start
);
9560 assert((end
- start
) <= pt_attr_twig_size(pt_attr
));
9561 pt_entry_t
*pte_p
, *start_pte_p
, *end_pte_p
, *curr_pte_p
;
9563 tte_p
= pmap_tte(pmap
, start
);
9565 if (tte_p
== (tt_entry_t
*) NULL
) {
9569 if ((*tte_p
& ARM_TTE_TYPE_MASK
) == ARM_TTE_TYPE_TABLE
) {
9570 pte_p
= (pt_entry_t
*) ttetokv(*tte_p
);
9572 start_pte_p
= &pte_p
[pte_index(pmap
, pt_attr
, start
)];
9573 end_pte_p
= start_pte_p
+ ((end
- start
) >> pt_attr_leaf_shift(pt_attr
));
9574 assert(end_pte_p
>= start_pte_p
);
9575 for (curr_pte_p
= start_pte_p
; curr_pte_p
< end_pte_p
; curr_pte_p
++) {
9576 pmap_paddr_t pa
= pte_to_pa(*curr_pte_p
);
9578 ppnum_t pn
= (ppnum_t
) atop(pa
);
9579 phys_attribute_clear_with_flush_range(pn
, bits
, options
, NULL
, flush_range
);
9585 MARK_AS_PMAP_TEXT
static void
9586 phys_attribute_clear_range_internal(
9588 vm_map_address_t start
,
9589 vm_map_address_t end
,
9591 unsigned int options
)
9593 if (__improbable(end
< start
)) {
9594 panic("%s: invalid address range %p, %p", __func__
, (void*)start
, (void*)end
);
9596 VALIDATE_PMAP(pmap
);
9598 vm_map_address_t va
= start
;
9599 pmap_tlb_flush_range_t flush_range
= {
9601 .ptfr_start
= start
,
9603 .ptfr_flush_needed
= false
9607 const pt_attr_t
* const pt_attr
= pmap_get_pt_attr(pmap
);
9610 vm_map_address_t curr_end
;
9612 curr_end
= ((va
+ pt_attr_twig_size(pt_attr
)) & ~pt_attr_twig_offmask(pt_attr
));
9613 if (curr_end
> end
) {
9617 phys_attribute_clear_twig_internal(pmap
, va
, curr_end
, bits
, options
, &flush_range
);
9620 pmap_unlock_ro(pmap
);
9621 if (flush_range
.ptfr_flush_needed
) {
9622 pmap_get_pt_ops(pmap
)->flush_tlb_region_async(
9623 flush_range
.ptfr_start
,
9624 flush_range
.ptfr_end
- flush_range
.ptfr_start
,
9625 flush_range
.ptfr_pmap
);
9631 phys_attribute_clear_range(
9633 vm_map_address_t start
,
9634 vm_map_address_t end
,
9636 unsigned int options
)
9638 PMAP_TRACE(3, PMAP_CODE(PMAP__ATTRIBUTE_CLEAR_RANGE
) | DBG_FUNC_START
, bits
);
9641 phys_attribute_clear_range_ppl(pmap
, start
, end
, bits
, options
);
9643 phys_attribute_clear_range_internal(pmap
, start
, end
, bits
, options
);
9646 PMAP_TRACE(3, PMAP_CODE(PMAP__ATTRIBUTE_CLEAR_RANGE
) | DBG_FUNC_END
);
9648 #endif /* __ARM_RANGE_TLBI__ */
9651 phys_attribute_clear(
9658 * Do we really want this tracepoint? It will be extremely chatty.
9659 * Also, should we have a corresponding trace point for the set path?
9661 PMAP_TRACE(3, PMAP_CODE(PMAP__ATTRIBUTE_CLEAR
) | DBG_FUNC_START
, pn
, bits
);
9664 phys_attribute_clear_ppl(pn
, bits
, options
, arg
);
9666 phys_attribute_clear_internal(pn
, bits
, options
, arg
);
9669 PMAP_TRACE(3, PMAP_CODE(PMAP__ATTRIBUTE_CLEAR
) | DBG_FUNC_END
);
9673 * Set specified attribute bits.
9675 * Set cached value in the pv head because we have
9676 * no per-mapping hardware support for referenced and
9679 MARK_AS_PMAP_TEXT
static void
9680 phys_attribute_set_internal(
9684 pmap_paddr_t pa
= ptoa(pn
);
9685 assert(pn
!= vm_page_fictitious_addr
);
9688 if (bits
& PP_ATTR_PPL_OWNED_BITS
) {
9689 panic("%s: illegal request, "
9696 pa_set_bits(pa
, (uint16_t)bits
);
9707 phys_attribute_set_ppl(pn
, bits
);
9709 phys_attribute_set_internal(pn
, bits
);
9715 * Check specified attribute bits.
9717 * use the software cached bits (since no hw support).
9720 phys_attribute_test(
9724 pmap_paddr_t pa
= ptoa(pn
);
9725 assert(pn
!= vm_page_fictitious_addr
);
9726 return pa_test_bits(pa
, bits
);
9731 * Set the modify/reference bits on the specified physical page.
9734 pmap_set_modify(ppnum_t pn
)
9736 phys_attribute_set(pn
, PP_ATTR_MODIFIED
);
9741 * Clear the modify bits on the specified physical page.
9747 phys_attribute_clear(pn
, PP_ATTR_MODIFIED
, 0, NULL
);
9754 * Return whether or not the specified physical page is modified
9755 * by any physical maps.
9761 return phys_attribute_test(pn
, PP_ATTR_MODIFIED
);
9766 * Set the reference bit on the specified physical page.
9772 phys_attribute_set(pn
, PP_ATTR_REFERENCED
);
9776 * Clear the reference bits on the specified physical page.
9779 pmap_clear_reference(
9782 phys_attribute_clear(pn
, PP_ATTR_REFERENCED
, 0, NULL
);
9787 * pmap_is_referenced:
9789 * Return whether or not the specified physical page is referenced
9790 * by any physical maps.
9796 return phys_attribute_test(pn
, PP_ATTR_REFERENCED
);
9800 * pmap_get_refmod(phys)
9801 * returns the referenced and modified bits of the specified
9808 return ((phys_attribute_test(pn
, PP_ATTR_MODIFIED
)) ? VM_MEM_MODIFIED
: 0)
9809 | ((phys_attribute_test(pn
, PP_ATTR_REFERENCED
)) ? VM_MEM_REFERENCED
: 0);
9812 static inline unsigned int
9813 pmap_clear_refmod_mask_to_modified_bits(const unsigned int mask
)
9815 return ((mask
& VM_MEM_MODIFIED
) ? PP_ATTR_MODIFIED
: 0) |
9816 ((mask
& VM_MEM_REFERENCED
) ? PP_ATTR_REFERENCED
: 0);
9820 * pmap_clear_refmod(phys, mask)
9821 * clears the referenced and modified bits as specified by the mask
9822 * of the specified physical page.
9825 pmap_clear_refmod_options(
9828 unsigned int options
,
9833 bits
= pmap_clear_refmod_mask_to_modified_bits(mask
);
9834 phys_attribute_clear(pn
, bits
, options
, arg
);
9838 * Perform pmap_clear_refmod_options on a virtual address range.
9839 * The operation will be performed in bulk & tlb flushes will be coalesced
9842 * Returns true if the operation is supported on this platform.
9843 * If this function returns false, the operation is not supported and
9844 * nothing has been modified in the pmap.
9847 pmap_clear_refmod_range_options(
9848 pmap_t pmap __unused
,
9849 vm_map_address_t start __unused
,
9850 vm_map_address_t end __unused
,
9851 unsigned int mask __unused
,
9852 unsigned int options __unused
)
9854 #if __ARM_RANGE_TLBI__
9856 bits
= pmap_clear_refmod_mask_to_modified_bits(mask
);
9857 phys_attribute_clear_range(pmap
, start
, end
, bits
, options
);
9859 #else /* __ARM_RANGE_TLBI__ */
9860 #pragma unused(pmap, start, end, mask, options)
9862 * This operation allows the VM to bulk modify refmod bits on a virtually
9863 * contiguous range of addresses. This is large performance improvement on
9864 * platforms that support ranged tlbi instructions. But on older platforms,
9865 * we can only flush per-page or the entire asid. So we currently
9866 * only support this operation on platforms that support ranged tlbi.
9867 * instructions. On other platforms, we require that
9868 * the VM modify the bits on a per-page basis.
9871 #endif /* __ARM_RANGE_TLBI__ */
9879 pmap_clear_refmod_options(pn
, mask
, 0, NULL
);
9883 pmap_disconnect_options(
9885 unsigned int options
,
9888 if ((options
& PMAP_OPTIONS_COMPRESSOR_IFF_MODIFIED
)) {
9890 * On ARM, the "modified" bit is managed by software, so
9891 * we know up-front if the physical page is "modified",
9892 * without having to scan all the PTEs pointing to it.
9893 * The caller should have made the VM page "busy" so noone
9894 * should be able to establish any new mapping and "modify"
9895 * the page behind us.
9897 if (pmap_is_modified(pn
)) {
9899 * The page has been modified and will be sent to
9900 * the VM compressor.
9902 options
|= PMAP_OPTIONS_COMPRESSOR
;
9905 * The page hasn't been modified and will be freed
9906 * instead of compressed.
9911 /* disconnect the page */
9912 pmap_page_protect_options(pn
, 0, options
, arg
);
9914 /* return ref/chg status */
9915 return pmap_get_refmod(pn
);
9923 * Disconnect all mappings for this page and return reference and change status
9924 * in generic format.
9931 pmap_page_protect(pn
, 0); /* disconnect the page */
9932 return pmap_get_refmod(pn
); /* return ref/chg status */
9936 pmap_has_managed_page(ppnum_t first
, ppnum_t last
)
9938 if (ptoa(first
) >= vm_last_phys
) {
9941 if (ptoa(last
) < vm_first_phys
) {
9949 * The state maintained by the noencrypt functions is used as a
9950 * debugging aid on ARM. This incurs some overhead on the part
9951 * of the caller. A special case check in phys_attribute_clear
9952 * (the most expensive path) currently minimizes this overhead,
9953 * but stubbing these functions out on RELEASE kernels yields
9960 #if DEVELOPMENT || DEBUG
9961 boolean_t result
= FALSE
;
9963 if (!pa_valid(ptoa(pn
))) {
9967 result
= (phys_attribute_test(pn
, PP_ATTR_NOENCRYPT
));
9980 #if DEVELOPMENT || DEBUG
9981 if (!pa_valid(ptoa(pn
))) {
9985 phys_attribute_set(pn
, PP_ATTR_NOENCRYPT
);
9992 pmap_clear_noencrypt(
9995 #if DEVELOPMENT || DEBUG
9996 if (!pa_valid(ptoa(pn
))) {
10000 phys_attribute_clear(pn
, PP_ATTR_NOENCRYPT
, 0, NULL
);
10008 pmap_is_monitor(ppnum_t pn
)
10010 assert(pa_valid(ptoa(pn
)));
10011 return phys_attribute_test(pn
, PP_ATTR_MONITOR
);
10016 pmap_lock_phys_page(ppnum_t pn
)
10020 pmap_paddr_t phys
= ptoa(pn
);
10022 if (pa_valid(phys
)) {
10023 pai
= (int)pa_index(phys
);
10029 { simple_lock(&phys_backup_lock
, LCK_GRP_NULL
);}
10034 pmap_unlock_phys_page(ppnum_t pn
)
10038 pmap_paddr_t phys
= ptoa(pn
);
10040 if (pa_valid(phys
)) {
10041 pai
= (int)pa_index(phys
);
10047 { simple_unlock(&phys_backup_lock
);}
10050 MARK_AS_PMAP_TEXT
static void
10051 pmap_switch_user_ttb_internal(
10054 VALIDATE_PMAP(pmap
);
10055 pmap_cpu_data_t
*cpu_data_ptr
;
10056 cpu_data_ptr
= pmap_get_cpu_data();
10058 #if (__ARM_VMSA__ == 7)
10059 cpu_data_ptr
->cpu_user_pmap
= pmap
;
10060 cpu_data_ptr
->cpu_user_pmap_stamp
= pmap
->stamp
;
10062 #if MACH_ASSERT && __ARM_USER_PROTECT__
10064 unsigned int ttbr0_val
, ttbr1_val
;
10065 __asm__
volatile ("mrc p15,0,%0,c2,c0,0\n" : "=r"(ttbr0_val
));
10066 __asm__
volatile ("mrc p15,0,%0,c2,c0,1\n" : "=r"(ttbr1_val
));
10067 if (ttbr0_val
!= ttbr1_val
) {
10068 panic("Misaligned ttbr0 %08X\n", ttbr0_val
);
10070 if (pmap
->ttep
& 0x1000) {
10071 panic("Misaligned ttbr0 %08X\n", pmap
->ttep
);
10075 #if !__ARM_USER_PROTECT__
10076 set_mmu_ttb(pmap
->ttep
);
10077 set_context_id(pmap
->hw_asid
);
10080 #else /* (__ARM_VMSA__ == 7) */
10082 if (pmap
!= kernel_pmap
) {
10083 cpu_data_ptr
->cpu_nested_pmap
= pmap
->nested_pmap
;
10084 cpu_data_ptr
->cpu_nested_pmap_attr
= (cpu_data_ptr
->cpu_nested_pmap
== NULL
) ?
10085 NULL
: pmap_get_pt_attr(cpu_data_ptr
->cpu_nested_pmap
);
10086 cpu_data_ptr
->cpu_nested_region_addr
= pmap
->nested_region_addr
;
10087 cpu_data_ptr
->cpu_nested_region_size
= pmap
->nested_region_size
;
10091 #if __ARM_MIXED_PAGE_SIZE__
10092 if ((pmap
!= kernel_pmap
) && (pmap_get_pt_attr(pmap
)->pta_tcr_value
!= get_tcr())) {
10093 set_tcr(pmap_get_pt_attr(pmap
)->pta_tcr_value
);
10095 #endif /* __ARM_MIXED_PAGE_SIZE__ */
10097 if (pmap
!= kernel_pmap
) {
10098 set_mmu_ttb((pmap
->ttep
& TTBR_BADDR_MASK
) | (((uint64_t)pmap
->hw_asid
) << TTBR_ASID_SHIFT
));
10099 } else if (!pmap_user_ttb_is_clear()) {
10100 pmap_clear_user_ttb_internal();
10103 #if defined(HAS_APPLE_PAC) && (__APCFG_SUPPORTED__ || __APSTS_SUPPORTED__)
10104 if (!arm_user_jop_disabled()) {
10105 uint64_t sctlr
= __builtin_arm_rsr64("SCTLR_EL1");
10106 bool jop_enabled
= sctlr
& SCTLR_JOP_KEYS_ENABLED
;
10107 if (!jop_enabled
&& !pmap
->disable_jop
) {
10109 sctlr
|= SCTLR_JOP_KEYS_ENABLED
;
10110 __builtin_arm_wsr64("SCTLR_EL1", sctlr
);
10111 arm_context_switch_requires_sync();
10112 } else if (jop_enabled
&& pmap
->disable_jop
) {
10114 sctlr
&= ~SCTLR_JOP_KEYS_ENABLED
;
10115 __builtin_arm_wsr64("SCTLR_EL1", sctlr
);
10116 arm_context_switch_requires_sync();
10119 #endif /* HAS_APPLE_PAC && (__APCFG_SUPPORTED__ || __APSTS_SUPPORTED__) */
10120 #endif /* (__ARM_VMSA__ == 7) */
10124 pmap_switch_user_ttb(
10127 PMAP_TRACE(1, PMAP_CODE(PMAP__SWITCH_USER_TTB
) | DBG_FUNC_START
, VM_KERNEL_ADDRHIDE(pmap
), PMAP_VASID(pmap
), pmap
->hw_asid
);
10129 pmap_switch_user_ttb_ppl(pmap
);
10131 pmap_switch_user_ttb_internal(pmap
);
10133 PMAP_TRACE(1, PMAP_CODE(PMAP__SWITCH_USER_TTB
) | DBG_FUNC_END
);
10136 MARK_AS_PMAP_TEXT
static void
10137 pmap_clear_user_ttb_internal(void)
10139 #if (__ARM_VMSA__ > 7)
10140 set_mmu_ttb(invalid_ttep
& TTBR_BADDR_MASK
);
10142 set_mmu_ttb(kernel_pmap
->ttep
);
10147 pmap_clear_user_ttb(void)
10149 PMAP_TRACE(3, PMAP_CODE(PMAP__CLEAR_USER_TTB
) | DBG_FUNC_START
, NULL
, 0, 0);
10151 pmap_clear_user_ttb_ppl();
10153 pmap_clear_user_ttb_internal();
10155 PMAP_TRACE(3, PMAP_CODE(PMAP__CLEAR_USER_TTB
) | DBG_FUNC_END
);
10158 MARK_AS_PMAP_TEXT
static boolean_t
10159 arm_force_fast_fault_with_flush_range(
10161 vm_prot_t allow_mode
,
10163 pmap_tlb_flush_range_t
*flush_range
)
10165 pmap_paddr_t phys
= ptoa(ppnum
);
10171 boolean_t is_reusable
, is_internal
;
10172 boolean_t tlb_flush_needed
= FALSE
;
10173 boolean_t ref_fault
;
10174 boolean_t mod_fault
;
10175 boolean_t clear_write_fault
= FALSE
;
10176 boolean_t ref_aliases_mod
= FALSE
;
10177 bool mustsynch
= ((options
& PMAP_OPTIONS_FF_LOCKED
) == 0);
10179 assert(ppnum
!= vm_page_fictitious_addr
);
10181 if (!pa_valid(phys
)) {
10182 return FALSE
; /* Not a managed page. */
10188 pai
= (int)pa_index(phys
);
10189 if (__probable(mustsynch
)) {
10192 pv_h
= pai_to_pvh(pai
);
10194 pte_p
= PT_ENTRY_NULL
;
10195 pve_p
= PV_ENTRY_NULL
;
10196 if (pvh_test_type(pv_h
, PVH_TYPE_PTEP
)) {
10197 pte_p
= pvh_ptep(pv_h
);
10198 } else if (pvh_test_type(pv_h
, PVH_TYPE_PVEP
)) {
10199 pve_p
= pvh_list(pv_h
);
10202 is_reusable
= IS_REUSABLE_PAGE(pai
);
10203 is_internal
= IS_INTERNAL_PAGE(pai
);
10205 while ((pve_p
!= PV_ENTRY_NULL
) || (pte_p
!= PT_ENTRY_NULL
)) {
10206 vm_map_address_t va
;
10208 pt_entry_t tmplate
;
10210 boolean_t update_pte
;
10212 if (pve_p
!= PV_ENTRY_NULL
) {
10213 pte_p
= pve_get_ptep(pve_p
);
10216 if (pte_p
== PT_ENTRY_NULL
) {
10217 panic("pte_p is NULL: pve_p=%p ppnum=0x%x\n", pve_p
, ppnum
);
10219 #ifdef PVH_FLAG_IOMMU
10220 if ((vm_offset_t
)pte_p
& PVH_FLAG_IOMMU
) {
10224 if (*pte_p
== ARM_PTE_EMPTY
) {
10225 panic("pte is NULL: pte_p=%p ppnum=0x%x\n", pte_p
, ppnum
);
10227 if (ARM_PTE_IS_COMPRESSED(*pte_p
, pte_p
)) {
10228 panic("pte is COMPRESSED: pte_p=%p ppnum=0x%x\n", pte_p
, ppnum
);
10231 pmap
= ptep_get_pmap(pte_p
);
10232 const pt_attr_t
* pt_attr
= pmap_get_pt_attr(pmap
);
10233 va
= ptep_get_va(pte_p
);
10235 assert(va
>= pmap
->min
&& va
< pmap
->max
);
10237 /* update pmap stats and ledgers */
10238 if (IS_ALTACCT_PAGE(pai
, pve_p
)) {
10240 * We do not track "reusable" status for
10241 * "alternate accounting" mappings.
10243 } else if ((options
& PMAP_OPTIONS_CLEAR_REUSABLE
) &&
10246 pmap
!= kernel_pmap
) {
10247 /* one less "reusable" */
10248 __assert_only
int32_t orig_reusable
= OSAddAtomic(-1, &pmap
->stats
.reusable
);
10249 PMAP_STATS_ASSERTF(orig_reusable
> 0, pmap
, "stats.reusable %d", orig_reusable
);
10250 /* one more "internal" */
10251 __assert_only
int32_t orig_internal
= OSAddAtomic(+1, &pmap
->stats
.internal
);
10252 PMAP_STATS_PEAK(pmap
->stats
.internal
);
10253 PMAP_STATS_ASSERTF(orig_internal
>= 0, pmap
, "stats.internal %d", orig_internal
);
10254 pmap_ledger_credit(pmap
, task_ledgers
.internal
, pt_attr_page_size(pt_attr
) * PAGE_RATIO
);
10255 assert(!IS_ALTACCT_PAGE(pai
, pve_p
));
10256 assert(IS_INTERNAL_PAGE(pai
));
10257 pmap_ledger_credit(pmap
, task_ledgers
.phys_footprint
, pt_attr_page_size(pt_attr
) * PAGE_RATIO
);
10260 * Since the page is being marked non-reusable, we assume that it will be
10261 * modified soon. Avoid the cost of another trap to handle the fast
10262 * fault when we next write to this page.
10264 clear_write_fault
= TRUE
;
10265 } else if ((options
& PMAP_OPTIONS_SET_REUSABLE
) &&
10268 pmap
!= kernel_pmap
) {
10269 /* one more "reusable" */
10270 __assert_only
int32_t orig_reusable
= OSAddAtomic(+1, &pmap
->stats
.reusable
);
10271 PMAP_STATS_PEAK(pmap
->stats
.reusable
);
10272 PMAP_STATS_ASSERTF(orig_reusable
>= 0, pmap
, "stats.reusable %d", orig_reusable
);
10273 /* one less "internal" */
10274 __assert_only
int32_t orig_internal
= OSAddAtomic(-1, &pmap
->stats
.internal
);
10275 PMAP_STATS_ASSERTF(orig_internal
> 0, pmap
, "stats.internal %d", orig_internal
);
10276 pmap_ledger_debit(pmap
, task_ledgers
.internal
, pt_attr_page_size(pt_attr
) * PAGE_RATIO
);
10277 assert(!IS_ALTACCT_PAGE(pai
, pve_p
));
10278 assert(IS_INTERNAL_PAGE(pai
));
10279 pmap_ledger_debit(pmap
, task_ledgers
.phys_footprint
, pt_attr_page_size(pt_attr
) * PAGE_RATIO
);
10282 bool wiredskip
= pte_is_wired(*pte_p
) &&
10283 ((options
& PMAP_OPTIONS_FF_WIRED
) == 0);
10292 update_pte
= FALSE
;
10294 if ((allow_mode
& VM_PROT_READ
) != VM_PROT_READ
) {
10295 /* read protection sets the pte to fault */
10296 tmplate
= tmplate
& ~ARM_PTE_AF
;
10300 if ((allow_mode
& VM_PROT_WRITE
) != VM_PROT_WRITE
) {
10301 /* take away write permission if set */
10302 if (pmap
== kernel_pmap
) {
10303 if ((tmplate
& ARM_PTE_APMASK
) == ARM_PTE_AP(AP_RWNA
)) {
10304 tmplate
= ((tmplate
& ~ARM_PTE_APMASK
) | ARM_PTE_AP(AP_RONA
));
10305 pte_set_was_writeable(tmplate
, true);
10310 if ((tmplate
& ARM_PTE_APMASK
) == pt_attr_leaf_rw(pt_attr
)) {
10311 tmplate
= ((tmplate
& ~ARM_PTE_APMASK
) | pt_attr_leaf_ro(pt_attr
));
10312 pte_set_was_writeable(tmplate
, true);
10319 #if MACH_ASSERT && XNU_MONITOR
10320 if (is_pte_xprr_protected(pmap
, spte
)) {
10321 if (pte_to_xprr_perm(spte
) != pte_to_xprr_perm(tmplate
)) {
10322 panic("%s: attempted to mutate an xPRR mapping pte_p=%p, pmap=%p, pv_h=%p, pve_p=%p, pte=0x%llx, tmplate=0x%llx, va=0x%llx, "
10323 "ppnum=0x%x, options=0x%x, allow_mode=0x%x",
10324 __FUNCTION__
, pte_p
, pmap
, pv_h
, pve_p
, (unsigned long long)spte
, (unsigned long long)tmplate
, (unsigned long long)va
,
10325 ppnum
, options
, allow_mode
);
10328 #endif /* MACH_ASSERT && XNU_MONITOR */
10330 if (result
&& update_pte
) {
10331 if (*pte_p
!= ARM_PTE_TYPE_FAULT
&&
10332 !ARM_PTE_IS_COMPRESSED(*pte_p
, pte_p
)) {
10333 WRITE_PTE_STRONG(pte_p
, tmplate
);
10334 if (!flush_range
||
10335 ((flush_range
->ptfr_pmap
!= pmap
) || va
>= flush_range
->ptfr_end
|| va
< flush_range
->ptfr_start
)) {
10336 pmap_get_pt_ops(pmap
)->flush_tlb_region_async(va
,
10337 pt_attr_page_size(pt_attr
) * PAGE_RATIO
, pmap
);
10339 tlb_flush_needed
= TRUE
;
10341 WRITE_PTE(pte_p
, tmplate
);
10342 __builtin_arm_isb(ISB_SY
);
10347 pte_p
= PT_ENTRY_NULL
;
10348 if (pve_p
!= PV_ENTRY_NULL
) {
10349 pve_p
= PVE_NEXT_PTR(pve_next(pve_p
));
10354 * If we are using the same approach for ref and mod
10355 * faults on this PTE, do not clear the write fault;
10356 * this would cause both ref and mod to be set on the
10357 * page again, and prevent us from taking ANY read/write
10358 * fault on the mapping.
10360 if (clear_write_fault
&& !ref_aliases_mod
) {
10361 arm_clear_fast_fault(ppnum
, VM_PROT_WRITE
);
10363 if (tlb_flush_needed
) {
10365 /* Delayed flush. Signal to the caller that the flush is needed. */
10366 flush_range
->ptfr_flush_needed
= true;
10372 /* update global "reusable" status for this page */
10374 if ((options
& PMAP_OPTIONS_CLEAR_REUSABLE
) &&
10376 CLR_REUSABLE_PAGE(pai
);
10377 } else if ((options
& PMAP_OPTIONS_SET_REUSABLE
) &&
10379 SET_REUSABLE_PAGE(pai
);
10384 SET_MODFAULT_PAGE(pai
);
10387 SET_REFFAULT_PAGE(pai
);
10389 if (__probable(mustsynch
)) {
10395 MARK_AS_PMAP_TEXT
static boolean_t
10396 arm_force_fast_fault_internal(
10398 vm_prot_t allow_mode
,
10401 if (__improbable((options
& PMAP_OPTIONS_FF_LOCKED
) != 0)) {
10402 panic("arm_force_fast_fault(0x%x, 0x%x, 0x%x): invalid options", ppnum
, allow_mode
, options
);
10404 return arm_force_fast_fault_with_flush_range(ppnum
, allow_mode
, options
, NULL
);
10408 * Routine: arm_force_fast_fault
10411 * Force all mappings for this page to fault according
10412 * to the access modes allowed, so we can gather ref/modify
10417 arm_force_fast_fault(
10419 vm_prot_t allow_mode
,
10421 __unused
void *arg
)
10423 pmap_paddr_t phys
= ptoa(ppnum
);
10425 assert(ppnum
!= vm_page_fictitious_addr
);
10427 if (!pa_valid(phys
)) {
10428 return FALSE
; /* Not a managed page. */
10432 return arm_force_fast_fault_ppl(ppnum
, allow_mode
, options
);
10434 return arm_force_fast_fault_internal(ppnum
, allow_mode
, options
);
10439 * Routine: arm_clear_fast_fault
10442 * Clear pending force fault for all mappings for this page based on
10443 * the observed fault type, update ref/modify bits.
10445 MARK_AS_PMAP_TEXT
static boolean_t
10446 arm_clear_fast_fault(
10448 vm_prot_t fault_type
)
10450 pmap_paddr_t pa
= ptoa(ppnum
);
10455 boolean_t tlb_flush_needed
= FALSE
;
10458 assert(ppnum
!= vm_page_fictitious_addr
);
10460 if (!pa_valid(pa
)) {
10461 return FALSE
; /* Not a managed page. */
10465 pai
= (int)pa_index(pa
);
10466 ASSERT_PVH_LOCKED(pai
);
10467 pv_h
= pai_to_pvh(pai
);
10469 pte_p
= PT_ENTRY_NULL
;
10470 pve_p
= PV_ENTRY_NULL
;
10471 if (pvh_test_type(pv_h
, PVH_TYPE_PTEP
)) {
10472 pte_p
= pvh_ptep(pv_h
);
10473 } else if (pvh_test_type(pv_h
, PVH_TYPE_PVEP
)) {
10474 pve_p
= pvh_list(pv_h
);
10477 while ((pve_p
!= PV_ENTRY_NULL
) || (pte_p
!= PT_ENTRY_NULL
)) {
10478 vm_map_address_t va
;
10480 pt_entry_t tmplate
;
10483 if (pve_p
!= PV_ENTRY_NULL
) {
10484 pte_p
= pve_get_ptep(pve_p
);
10487 if (pte_p
== PT_ENTRY_NULL
) {
10488 panic("pte_p is NULL: pve_p=%p ppnum=0x%x\n", pve_p
, ppnum
);
10490 #ifdef PVH_FLAG_IOMMU
10491 if ((vm_offset_t
)pte_p
& PVH_FLAG_IOMMU
) {
10495 if (*pte_p
== ARM_PTE_EMPTY
) {
10496 panic("pte is NULL: pte_p=%p ppnum=0x%x\n", pte_p
, ppnum
);
10499 pmap
= ptep_get_pmap(pte_p
);
10500 va
= ptep_get_va(pte_p
);
10502 assert(va
>= pmap
->min
&& va
< pmap
->max
);
10507 if ((fault_type
& VM_PROT_WRITE
) && (pte_was_writeable(spte
))) {
10509 if (pmap
== kernel_pmap
) {
10510 tmplate
= ((spte
& ~ARM_PTE_APMASK
) | ARM_PTE_AP(AP_RWNA
));
10512 tmplate
= ((spte
& ~ARM_PTE_APMASK
) | pt_attr_leaf_rw(pmap_get_pt_attr(pmap
)));
10516 tmplate
|= ARM_PTE_AF
;
10518 pte_set_was_writeable(tmplate
, false);
10519 pa_set_bits(pa
, PP_ATTR_REFERENCED
| PP_ATTR_MODIFIED
);
10520 } else if ((fault_type
& VM_PROT_READ
) && ((spte
& ARM_PTE_AF
) != ARM_PTE_AF
)) {
10521 tmplate
= spte
| ARM_PTE_AF
;
10524 pa_set_bits(pa
, PP_ATTR_REFERENCED
);
10528 #if MACH_ASSERT && XNU_MONITOR
10529 if (is_pte_xprr_protected(pmap
, spte
)) {
10530 if (pte_to_xprr_perm(spte
) != pte_to_xprr_perm(tmplate
)) {
10531 panic("%s: attempted to mutate an xPRR mapping pte_p=%p, pmap=%p, pv_h=%p, pve_p=%p, pte=0x%llx, tmplate=0x%llx, va=0x%llx, "
10532 "ppnum=0x%x, fault_type=0x%x",
10533 __FUNCTION__
, pte_p
, pmap
, pv_h
, pve_p
, (unsigned long long)spte
, (unsigned long long)tmplate
, (unsigned long long)va
,
10534 ppnum
, fault_type
);
10537 #endif /* MACH_ASSERT && XNU_MONITOR */
10539 if (spte
!= tmplate
) {
10540 if (spte
!= ARM_PTE_TYPE_FAULT
) {
10541 WRITE_PTE_STRONG(pte_p
, tmplate
);
10542 pmap_get_pt_ops(pmap
)->flush_tlb_region_async(va
,
10543 pt_attr_page_size(pmap_get_pt_attr(pmap
)) * PAGE_RATIO
, pmap
);
10544 tlb_flush_needed
= TRUE
;
10546 WRITE_PTE(pte_p
, tmplate
);
10547 __builtin_arm_isb(ISB_SY
);
10552 #ifdef PVH_FLAG_IOMMU
10555 pte_p
= PT_ENTRY_NULL
;
10556 if (pve_p
!= PV_ENTRY_NULL
) {
10557 pve_p
= PVE_NEXT_PTR(pve_next(pve_p
));
10560 if (tlb_flush_needed
) {
10567 * Determine if the fault was induced by software tracking of
10568 * modify/reference bits. If so, re-enable the mapping (and set
10569 * the appropriate bits).
10571 * Returns KERN_SUCCESS if the fault was induced and was
10572 * successfully handled.
10574 * Returns KERN_FAILURE if the fault was not induced and
10575 * the function was unable to deal with it.
10577 * Returns KERN_PROTECTION_FAILURE if the pmap layer explictly
10578 * disallows this type of access.
10580 MARK_AS_PMAP_TEXT
static kern_return_t
10581 arm_fast_fault_internal(
10583 vm_map_address_t va
,
10584 vm_prot_t fault_type
,
10585 __unused
bool was_af_fault
,
10586 __unused
bool from_user
)
10588 kern_return_t result
= KERN_FAILURE
;
10590 pt_entry_t spte
= ARM_PTE_TYPE_FAULT
;
10593 VALIDATE_PMAP(pmap
);
10598 * If the entry doesn't exist, is completely invalid, or is already
10599 * valid, we can't fix it here.
10602 ptep
= pmap_pte(pmap
, va
);
10603 if (ptep
!= PT_ENTRY_NULL
) {
10607 pa
= pte_to_pa(spte
);
10609 if ((spte
== ARM_PTE_TYPE_FAULT
) ||
10610 ARM_PTE_IS_COMPRESSED(spte
, ptep
)) {
10615 if (!pa_valid(pa
)) {
10618 if (pmap_cache_attributes((ppnum_t
)atop(pa
)) & PP_ATTR_MONITOR
) {
10619 return KERN_PROTECTION_FAILURE
;
10624 pai
= (int)pa_index(pa
);
10626 #if __APRR_SUPPORTED__
10627 if (*ptep
== spte
) {
10629 * Double-check the spte value, as we care
10630 * about the AF bit.
10635 #else /* !(__APRR_SUPPORTED__*/
10637 #endif /* !(__APRR_SUPPORTED__*/
10644 #if __APRR_SUPPORTED__
10645 /* Check to see if this mapping had APRR restrictions. */
10646 if (is_pte_xprr_protected(pmap
, spte
)) {
10648 * We have faulted on an XPRR managed mapping; decide if the access should be
10649 * reattempted or if it should cause an exception. Now that all JIT entitled
10650 * task threads always have MPRR enabled we're only here because of
10651 * an AF fault or an actual permission fault. AF faults will have result
10652 * changed to KERN_SUCCESS below upon arm_clear_fast_fault return.
10654 if (was_af_fault
&& (spte
& ARM_PTE_AF
)) {
10655 result
= KERN_SUCCESS
;
10658 result
= KERN_PROTECTION_FAILURE
;
10661 #endif /* __APRR_SUPPORTED__*/
10663 if ((IS_REFFAULT_PAGE(pai
)) ||
10664 ((fault_type
& VM_PROT_WRITE
) && IS_MODFAULT_PAGE(pai
))) {
10666 * An attempted access will always clear ref/mod fault state, as
10667 * appropriate for the fault type. arm_clear_fast_fault will
10668 * update the associated PTEs for the page as appropriate; if
10669 * any PTEs are updated, we redrive the access. If the mapping
10670 * does not actually allow for the attempted access, the
10671 * following fault will (hopefully) fail to update any PTEs, and
10672 * thus cause arm_fast_fault to decide that it failed to handle
10675 if (IS_REFFAULT_PAGE(pai
)) {
10676 CLR_REFFAULT_PAGE(pai
);
10678 if ((fault_type
& VM_PROT_WRITE
) && IS_MODFAULT_PAGE(pai
)) {
10679 CLR_MODFAULT_PAGE(pai
);
10682 if (arm_clear_fast_fault((ppnum_t
)atop(pa
), fault_type
)) {
10684 * Should this preserve KERN_PROTECTION_FAILURE? The
10685 * cost of not doing so is a another fault in a case
10686 * that should already result in an exception.
10688 result
= KERN_SUCCESS
;
10692 #if __APRR_SUPPORTED__
10694 #endif /* __APRR_SUPPORTED__*/
10703 vm_map_address_t va
,
10704 vm_prot_t fault_type
,
10706 __unused
bool from_user
)
10708 kern_return_t result
= KERN_FAILURE
;
10710 if (va
< pmap
->min
|| va
>= pmap
->max
) {
10714 PMAP_TRACE(3, PMAP_CODE(PMAP__FAST_FAULT
) | DBG_FUNC_START
,
10715 VM_KERNEL_ADDRHIDE(pmap
), VM_KERNEL_ADDRHIDE(va
), fault_type
,
10718 #if (__ARM_VMSA__ == 7)
10719 if (pmap
!= kernel_pmap
) {
10720 pmap_cpu_data_t
*cpu_data_ptr
= pmap_get_cpu_data();
10722 pmap_t cur_user_pmap
;
10724 cur_pmap
= current_pmap();
10725 cur_user_pmap
= cpu_data_ptr
->cpu_user_pmap
;
10727 if ((cur_user_pmap
== cur_pmap
) && (cur_pmap
== pmap
)) {
10728 if (cpu_data_ptr
->cpu_user_pmap_stamp
!= pmap
->stamp
) {
10729 pmap_set_pmap(pmap
, current_thread());
10730 result
= KERN_SUCCESS
;
10738 result
= arm_fast_fault_ppl(pmap
, va
, fault_type
, was_af_fault
, from_user
);
10740 result
= arm_fast_fault_internal(pmap
, va
, fault_type
, was_af_fault
, from_user
);
10743 #if (__ARM_VMSA__ == 7)
10747 PMAP_TRACE(3, PMAP_CODE(PMAP__FAST_FAULT
) | DBG_FUNC_END
, result
);
10757 bcopy_phys((addr64_t
) (ptoa(psrc
)),
10758 (addr64_t
) (ptoa(pdst
)),
10764 * pmap_copy_page copies the specified (machine independent) pages.
10767 pmap_copy_part_page(
10769 vm_offset_t src_offset
,
10771 vm_offset_t dst_offset
,
10774 bcopy_phys((addr64_t
) (ptoa(psrc
) + src_offset
),
10775 (addr64_t
) (ptoa(pdst
) + dst_offset
),
10781 * pmap_zero_page zeros the specified (machine independent) page.
10787 assert(pn
!= vm_page_fictitious_addr
);
10788 bzero_phys((addr64_t
) ptoa(pn
), PAGE_SIZE
);
10792 * pmap_zero_part_page
10793 * zeros the specified (machine independent) part of a page.
10796 pmap_zero_part_page(
10798 vm_offset_t offset
,
10801 assert(pn
!= vm_page_fictitious_addr
);
10802 assert(offset
+ len
<= PAGE_SIZE
);
10803 bzero_phys((addr64_t
) (ptoa(pn
) + offset
), len
);
10810 pt_entry_t
*ptep
, pte
;
10812 ptep
= pmap_pte(kernel_pmap
, LOWGLOBAL_ALIAS
);
10813 assert(ptep
!= PT_ENTRY_NULL
);
10814 assert(*ptep
== ARM_PTE_EMPTY
);
10816 pte
= pa_to_pte(ml_static_vtop((vm_offset_t
)&lowGlo
)) | AP_RONA
| ARM_PTE_NX
| ARM_PTE_PNX
| ARM_PTE_AF
| ARM_PTE_TYPE
;
10817 #if __ARM_KERNEL_PROTECT__
10819 #endif /* __ARM_KERNEL_PROTECT__ */
10820 pte
|= ARM_PTE_ATTRINDX(CACHE_ATTRINDX_WRITEBACK
);
10821 #if (__ARM_VMSA__ > 7)
10822 pte
|= ARM_PTE_SH(SH_OUTER_MEMORY
);
10827 FLUSH_PTE_RANGE(ptep
, (ptep
+ 1));
10828 PMAP_UPDATE_TLBS(kernel_pmap
, LOWGLOBAL_ALIAS
, LOWGLOBAL_ALIAS
+ PAGE_SIZE
, false);
10832 pmap_cpu_windows_copy_addr(int cpu_num
, unsigned int index
)
10834 if (__improbable(index
>= CPUWINDOWS_MAX
)) {
10835 panic("%s: invalid index %u", __func__
, index
);
10837 return (vm_offset_t
)(CPUWINDOWS_BASE
+ (PAGE_SIZE
* ((CPUWINDOWS_MAX
* cpu_num
) + index
)));
10840 MARK_AS_PMAP_TEXT
static unsigned int
10841 pmap_map_cpu_windows_copy_internal(
10844 unsigned int wimg_bits
)
10846 pt_entry_t
*ptep
= NULL
, pte
;
10847 pmap_cpu_data_t
*pmap_cpu_data
= pmap_get_cpu_data();
10848 unsigned int cpu_num
;
10850 vm_offset_t cpu_copywindow_vaddr
= 0;
10851 bool need_strong_sync
= false;
10854 unsigned int cacheattr
= (!pa_valid(ptoa(pn
)) ? pmap_cache_attributes(pn
) : 0);
10855 need_strong_sync
= ((cacheattr
& PMAP_IO_RANGE_STRONG_SYNC
) != 0);
10859 #ifdef __ARM_COHERENT_IO__
10860 if (__improbable(pa_valid(ptoa(pn
)) && !pmap_ppl_disable
)) {
10861 panic("%s: attempted to map a managed page, "
10862 "pn=%u, prot=0x%x, wimg_bits=0x%x",
10864 pn
, prot
, wimg_bits
);
10866 if (__improbable((cacheattr
& PP_ATTR_MONITOR
) && (prot
!= VM_PROT_READ
) && !pmap_ppl_disable
)) {
10867 panic("%s: attempt to map PPL-protected I/O address 0x%llx as writable", __func__
, (uint64_t)ptoa(pn
));
10870 #else /* __ARM_COHERENT_IO__ */
10871 #error CPU copy windows are not properly supported with both the PPL and incoherent IO
10872 #endif /* __ARM_COHERENT_IO__ */
10873 #endif /* XNU_MONITOR */
10874 cpu_num
= pmap_cpu_data
->cpu_number
;
10876 for (i
= 0; i
< CPUWINDOWS_MAX
; i
++) {
10877 cpu_copywindow_vaddr
= pmap_cpu_windows_copy_addr(cpu_num
, i
);
10878 ptep
= pmap_pte(kernel_pmap
, cpu_copywindow_vaddr
);
10879 assert(!ARM_PTE_IS_COMPRESSED(*ptep
, ptep
));
10880 if (*ptep
== ARM_PTE_TYPE_FAULT
) {
10884 if (i
== CPUWINDOWS_MAX
) {
10885 panic("pmap_map_cpu_windows_copy: out of window\n");
10888 pte
= pa_to_pte(ptoa(pn
)) | ARM_PTE_TYPE
| ARM_PTE_AF
| ARM_PTE_NX
| ARM_PTE_PNX
;
10889 #if __ARM_KERNEL_PROTECT__
10891 #endif /* __ARM_KERNEL_PROTECT__ */
10893 pte
|= wimg_to_pte(wimg_bits
);
10895 if (prot
& VM_PROT_WRITE
) {
10896 pte
|= ARM_PTE_AP(AP_RWNA
);
10898 pte
|= ARM_PTE_AP(AP_RONA
);
10901 WRITE_PTE_FAST(ptep
, pte
);
10903 * Invalidate tlb. Cover nested cpu_copywindow_vaddr usage with the interrupted context
10904 * in pmap_unmap_cpu_windows_copy() after clearing the pte and before tlb invalidate.
10906 FLUSH_PTE_STRONG(ptep
);
10907 PMAP_UPDATE_TLBS(kernel_pmap
, cpu_copywindow_vaddr
, cpu_copywindow_vaddr
+ PAGE_SIZE
, pmap_cpu_data
->copywindow_strong_sync
[i
]);
10908 pmap_cpu_data
->copywindow_strong_sync
[i
] = need_strong_sync
;
10914 pmap_map_cpu_windows_copy(
10917 unsigned int wimg_bits
)
10920 return pmap_map_cpu_windows_copy_ppl(pn
, prot
, wimg_bits
);
10922 return pmap_map_cpu_windows_copy_internal(pn
, prot
, wimg_bits
);
10926 MARK_AS_PMAP_TEXT
static void
10927 pmap_unmap_cpu_windows_copy_internal(
10928 unsigned int index
)
10931 unsigned int cpu_num
;
10932 vm_offset_t cpu_copywindow_vaddr
= 0;
10933 pmap_cpu_data_t
*pmap_cpu_data
= pmap_get_cpu_data();
10935 cpu_num
= pmap_cpu_data
->cpu_number
;
10937 cpu_copywindow_vaddr
= pmap_cpu_windows_copy_addr(cpu_num
, index
);
10938 /* Issue full-system DSB to ensure prior operations on the per-CPU window
10939 * (which are likely to have been on I/O memory) are complete before
10940 * tearing down the mapping. */
10941 __builtin_arm_dsb(DSB_SY
);
10942 ptep
= pmap_pte(kernel_pmap
, cpu_copywindow_vaddr
);
10943 WRITE_PTE_STRONG(ptep
, ARM_PTE_TYPE_FAULT
);
10944 PMAP_UPDATE_TLBS(kernel_pmap
, cpu_copywindow_vaddr
, cpu_copywindow_vaddr
+ PAGE_SIZE
, pmap_cpu_data
->copywindow_strong_sync
[index
]);
10948 pmap_unmap_cpu_windows_copy(
10949 unsigned int index
)
10952 return pmap_unmap_cpu_windows_copy_ppl(index
);
10954 return pmap_unmap_cpu_windows_copy_internal(index
);
10961 * The HMAC SHA driver needs to be able to operate on physical pages in
10962 * place without copying them out. This function provides an interface
10963 * to run a callback on a given page, making use of a CPU copy window
10966 * This should only be used during the hibernation process since every DRAM page
10967 * will be mapped as VM_WIMG_DEFAULT. This can cause coherency issues if the pages
10968 * were originally mapped as VM_WIMG_IO/RT. In the hibernation case, by the time
10969 * we start copying memory all other agents shouldn't be writing to memory so we
10970 * can ignore these coherency issues. Regardless of this code, if other agents
10971 * were modifying memory during the image creation process, there would be
10974 MARK_AS_PMAP_TEXT
void
10975 pmap_invoke_with_page(
10976 ppnum_t page_number
,
10978 void (*callback
)(void *ctx
, ppnum_t page_number
, const void *page
))
10981 /* This function should only be used from within a hibernation context. */
10982 assert((gIOHibernateState
== kIOHibernateStateHibernating
) ||
10983 (gIOHibernateState
== kIOHibernateStateWakingFromHibernate
));
10985 /* from bcopy_phys_internal */
10986 vm_offset_t src
= ptoa_64(page_number
);
10987 vm_offset_t tmp_src
;
10988 bool use_copy_window_src
= !pmap_valid_address(src
);
10989 unsigned int src_index
;
10990 if (use_copy_window_src
) {
10991 unsigned int wimg_bits_src
= pmap_cache_attributes(page_number
);
10994 * Always map DRAM as VM_WIMG_DEFAULT (regardless of whether it's
10995 * kernel-managed) to denote that it's safe to use memcpy on it.
10997 if (is_dram_addr(src
)) {
10998 wimg_bits_src
= VM_WIMG_DEFAULT
;
11001 src_index
= pmap_map_cpu_windows_copy_internal(page_number
, VM_PROT_READ
, wimg_bits_src
);
11002 tmp_src
= pmap_cpu_windows_copy_addr(pmap_get_cpu_data()->cpu_number
, src_index
);
11004 vm_size_t count
= PAGE_SIZE
;
11005 tmp_src
= phystokv_range((pmap_paddr_t
)src
, &count
);
11008 callback(ctx
, page_number
, (const void *)tmp_src
);
11010 if (use_copy_window_src
) {
11011 pmap_unmap_cpu_windows_copy_internal(src_index
);
11014 #pragma unused(page_number, ctx, callback)
11015 #endif /* HIBERNATION */
11019 * Loop over every pmap_io_range (I/O ranges marked as owned by
11020 * the PPL in the device tree) and conditionally call callback() on each range
11021 * that needs to be included in the hibernation image.
11023 * @param ctx Will be passed as-is into the callback method. Use NULL if no
11024 * context is needed in the callback.
11025 * @param callback Callback function invoked on each range (gated by flag).
11027 MARK_AS_PMAP_TEXT
void
11028 pmap_hibernate_invoke(void *ctx
, void (*callback
)(void *ctx
, uint64_t addr
, uint64_t len
))
11030 for (unsigned int i
= 0; i
< num_io_rgns
; ++i
) {
11031 if (io_attr_table
[i
].wimg
& PMAP_IO_RANGE_NEEDS_HIBERNATING
) {
11032 callback(ctx
, io_attr_table
[i
].addr
, io_attr_table
[i
].len
);
11038 * Set the HASHED pv_head_table flag for the passed in physical page if it's a
11039 * PPL-owned page. Otherwise, do nothing.
11041 * @param addr Physical address of the page to set the HASHED flag on.
11043 MARK_AS_PMAP_TEXT
void
11044 pmap_set_ppl_hashed_flag(const pmap_paddr_t addr
)
11046 /* Ignore non-managed kernel memory. */
11047 if (!pa_valid(addr
)) {
11051 const int pai
= (int)pa_index(addr
);
11052 if (pp_attr_table
[pai
] & PP_ATTR_MONITOR
) {
11053 pv_entry_t
**pv_h
= pai_to_pvh(pai
);
11055 /* Mark that the PPL-owned page has been hashed into the hibernation image. */
11057 pvh_set_flags(pv_h
, pvh_get_flags(pv_h
) | PVH_FLAG_HASHED
);
11063 * Loop through every physical page in the system and clear out the HASHED flag
11064 * on every PPL-owned page. That flag is used to keep track of which pages have
11065 * been hashed into the hibernation image during the hibernation entry process.
11067 * The HASHED flag needs to be cleared out between hibernation cycles because the
11068 * pv_head_table and pp_attr_table's might have been copied into the hibernation
11069 * image with the HASHED flag set on certain pages. It's important to clear the
11070 * HASHED flag to ensure that the enforcement of all PPL-owned memory being hashed
11071 * into the hibernation image can't be compromised across hibernation cycles.
11073 MARK_AS_PMAP_TEXT
void
11074 pmap_clear_ppl_hashed_flag_all(void)
11076 const int last_index
= (int)pa_index(vm_last_phys
);
11077 pv_entry_t
**pv_h
= NULL
;
11079 for (int pai
= 0; pai
< last_index
; ++pai
) {
11080 pv_h
= pai_to_pvh(pai
);
11082 /* Test for PPL-owned pages that have the HASHED flag set in its pv_head_table entry. */
11083 if ((pvh_get_flags(pv_h
) & PVH_FLAG_HASHED
) &&
11084 (pp_attr_table
[pai
] & PP_ATTR_MONITOR
)) {
11086 pvh_set_flags(pv_h
, pvh_get_flags(pv_h
) & ~PVH_FLAG_HASHED
);
11093 * Enforce that all PPL-owned pages were hashed into the hibernation image. The
11094 * ppl_hib driver will call this after all wired pages have been copied into the
11095 * hibernation image.
11097 MARK_AS_PMAP_TEXT
void
11098 pmap_check_ppl_hashed_flag_all(void)
11100 const int last_index
= (int)pa_index(vm_last_phys
);
11101 pv_entry_t
**pv_h
= NULL
;
11103 for (int pai
= 0; pai
< last_index
; ++pai
) {
11104 pv_h
= pai_to_pvh(pai
);
11107 * The PMAP stacks are explicitly not saved into the image so skip checking
11108 * the pages that contain the PMAP stacks.
11110 const bool is_pmap_stack
= (pai
>= (int)pa_index(pmap_stacks_start_pa
)) &&
11111 (pai
< (int)pa_index(pmap_stacks_end_pa
));
11113 if (!is_pmap_stack
&&
11114 (pp_attr_table
[pai
] & PP_ATTR_MONITOR
) &&
11115 !(pvh_get_flags(pv_h
) & PVH_FLAG_HASHED
)) {
11116 panic("Found PPL-owned page that was not hashed into the hibernation image: pai %d", pai
);
11121 #endif /* XNU_MONITOR */
11124 * Indicate that a pmap is intended to be used as a nested pmap
11125 * within one or more larger address spaces. This must be set
11126 * before pmap_nest() is called with this pmap as the 'subordinate'.
11128 MARK_AS_PMAP_TEXT
static void
11129 pmap_set_nested_internal(
11132 VALIDATE_PMAP(pmap
);
11133 pmap
->nested
= TRUE
;
11134 pmap_get_pt_ops(pmap
)->free_id(pmap
);
11142 pmap_set_nested_ppl(pmap
);
11144 pmap_set_nested_internal(pmap
);
11149 * pmap_trim_range(pmap, start, end)
11151 * pmap = pmap to operate on
11152 * start = start of the range
11153 * end = end of the range
11155 * Attempts to deallocate TTEs for the given range in the nested range.
11157 MARK_AS_PMAP_TEXT
static void
11164 addr64_t nested_region_start
;
11165 addr64_t nested_region_end
;
11166 addr64_t adjusted_start
;
11167 addr64_t adjusted_end
;
11168 addr64_t adjust_offmask
;
11169 tt_entry_t
* tte_p
;
11170 pt_entry_t
* pte_p
;
11171 __unused
const pt_attr_t
* const pt_attr
= pmap_get_pt_attr(pmap
);
11173 if (__improbable(end
< start
)) {
11174 panic("%s: invalid address range, "
11175 "pmap=%p, start=%p, end=%p",
11177 pmap
, (void*)start
, (void*)end
);
11180 nested_region_start
= pmap
->nested_region_addr
;
11181 nested_region_end
= nested_region_start
+ pmap
->nested_region_size
;
11183 if (__improbable((start
< nested_region_start
) || (end
> nested_region_end
))) {
11184 panic("%s: range outside nested region %p-%p, "
11185 "pmap=%p, start=%p, end=%p",
11186 __func__
, (void *)nested_region_start
, (void *)nested_region_end
,
11187 pmap
, (void*)start
, (void*)end
);
11190 /* Contract the range to TT page boundaries. */
11191 adjust_offmask
= pt_attr_leaf_table_offmask(pt_attr
);
11192 adjusted_start
= ((start
+ adjust_offmask
) & ~adjust_offmask
);
11193 adjusted_end
= end
& ~adjust_offmask
;
11194 bool modified
= false;
11196 /* Iterate over the range, trying to remove TTEs. */
11197 for (cur
= adjusted_start
; (cur
< adjusted_end
) && (cur
>= adjusted_start
); cur
+= pt_attr_twig_size(pt_attr
)) {
11200 tte_p
= pmap_tte(pmap
, cur
);
11202 if (tte_p
== (tt_entry_t
*) NULL
) {
11206 if ((*tte_p
& ARM_TTE_TYPE_MASK
) == ARM_TTE_TYPE_TABLE
) {
11207 pte_p
= (pt_entry_t
*) ttetokv(*tte_p
);
11209 if ((ptep_get_info(pte_p
)->refcnt
== 0) &&
11210 (pmap
!= kernel_pmap
)) {
11211 if (pmap
->nested
== TRUE
) {
11212 /* Deallocate for the nested map. */
11213 pmap_tte_deallocate(pmap
, tte_p
, pt_attr_twig_level(pt_attr
));
11215 /* Just remove for the parent map. */
11216 pmap_tte_remove(pmap
, tte_p
, pt_attr_twig_level(pt_attr
));
11219 pmap_get_pt_ops(pmap
)->flush_tlb_tte_async(cur
, pmap
);
11232 #if (__ARM_VMSA__ > 7)
11233 /* Remove empty L2 TTs. */
11234 adjusted_start
= ((start
+ pt_attr_ln_offmask(pt_attr
, PMAP_TT_L1_LEVEL
)) & ~pt_attr_ln_offmask(pt_attr
, PMAP_TT_L1_LEVEL
));
11235 adjusted_end
= end
& ~pt_attr_ln_offmask(pt_attr
, PMAP_TT_L1_LEVEL
);
11237 for (cur
= adjusted_start
; (cur
< adjusted_end
) && (cur
>= adjusted_start
); cur
+= pt_attr_ln_size(pt_attr
, PMAP_TT_L1_LEVEL
)) {
11238 /* For each L1 entry in our range... */
11241 bool remove_tt1e
= true;
11242 tt_entry_t
* tt1e_p
= pmap_tt1e(pmap
, cur
);
11243 tt_entry_t
* tt2e_start
;
11244 tt_entry_t
* tt2e_end
;
11245 tt_entry_t
* tt2e_p
;
11248 if (tt1e_p
== NULL
) {
11255 if (tt1e
== ARM_TTE_TYPE_FAULT
) {
11260 tt2e_start
= &((tt_entry_t
*) phystokv(tt1e
& ARM_TTE_TABLE_MASK
))[0];
11261 tt2e_end
= &tt2e_start
[pt_attr_page_size(pt_attr
) / sizeof(*tt2e_start
)];
11263 for (tt2e_p
= tt2e_start
; tt2e_p
< tt2e_end
; tt2e_p
++) {
11264 if (*tt2e_p
!= ARM_TTE_TYPE_FAULT
) {
11266 * If any TTEs are populated, don't remove the
11269 remove_tt1e
= false;
11274 pmap_tte_deallocate(pmap
, tt1e_p
, PMAP_TT_L1_LEVEL
);
11275 PMAP_UPDATE_TLBS(pmap
, cur
, cur
+ PAGE_SIZE
, false);
11280 #endif /* (__ARM_VMSA__ > 7) */
11284 * pmap_trim_internal(grand, subord, vstart, size)
11286 * grand = pmap subord is nested in
11287 * subord = nested pmap
11288 * vstart = start of the used range in grand
11289 * size = size of the used range
11291 * Attempts to trim the shared region page tables down to only cover the given
11292 * range in subord and grand.
11294 MARK_AS_PMAP_TEXT
static void
11295 pmap_trim_internal(
11302 addr64_t adjust_offmask
;
11304 if (__improbable(os_add_overflow(vstart
, size
, &vend
))) {
11305 panic("%s: grand addr wraps around, "
11306 "grand=%p, subord=%p, vstart=%p, size=%#llx",
11307 __func__
, grand
, subord
, (void*)vstart
, size
);
11310 VALIDATE_PMAP(grand
);
11311 VALIDATE_PMAP(subord
);
11313 __unused
const pt_attr_t
* const pt_attr
= pmap_get_pt_attr(grand
);
11317 if (__improbable(!subord
->nested
)) {
11318 panic("%s: subord is not nestable, "
11319 "grand=%p, subord=%p, vstart=%p, size=%#llx",
11320 __func__
, grand
, subord
, (void*)vstart
, size
);
11323 if (__improbable(grand
->nested
)) {
11324 panic("%s: grand is nestable, "
11325 "grand=%p, subord=%p, vstart=%p, size=%#llx",
11326 __func__
, grand
, subord
, (void*)vstart
, size
);
11329 if (__improbable(grand
->nested_pmap
!= subord
)) {
11330 panic("%s: grand->nested != subord, "
11331 "grand=%p, subord=%p, vstart=%p, size=%#llx",
11332 __func__
, grand
, subord
, (void*)vstart
, size
);
11335 if (__improbable((size
!= 0) &&
11336 ((vstart
< grand
->nested_region_addr
) || (vend
> (grand
->nested_region_addr
+ grand
->nested_region_size
))))) {
11337 panic("%s: grand range not in nested region, "
11338 "grand=%p, subord=%p, vstart=%p, size=%#llx",
11339 __func__
, grand
, subord
, (void*)vstart
, size
);
11343 if (!grand
->nested_has_no_bounds_ref
) {
11344 assert(subord
->nested_bounds_set
);
11346 if (!grand
->nested_bounds_set
) {
11347 /* Inherit the bounds from subord. */
11348 grand
->nested_region_true_start
= subord
->nested_region_true_start
;
11349 grand
->nested_region_true_end
= subord
->nested_region_true_end
;
11350 grand
->nested_bounds_set
= true;
11353 pmap_unlock(subord
);
11357 if ((!subord
->nested_bounds_set
) && size
) {
11358 adjust_offmask
= pt_attr_leaf_table_offmask(pt_attr
);
11360 subord
->nested_region_true_start
= vstart
;
11361 subord
->nested_region_true_end
= vend
;
11362 subord
->nested_region_true_start
&= ~adjust_offmask
;
11364 if (__improbable(os_add_overflow(subord
->nested_region_true_end
, adjust_offmask
, &subord
->nested_region_true_end
))) {
11365 panic("%s: padded true end wraps around, "
11366 "grand=%p, subord=%p, vstart=%p, size=%#llx",
11367 __func__
, grand
, subord
, (void*)vstart
, size
);
11370 subord
->nested_region_true_end
&= ~adjust_offmask
;
11371 subord
->nested_bounds_set
= true;
11374 if (subord
->nested_bounds_set
) {
11375 /* Inherit the bounds from subord. */
11376 grand
->nested_region_true_start
= subord
->nested_region_true_start
;
11377 grand
->nested_region_true_end
= subord
->nested_region_true_end
;
11378 grand
->nested_bounds_set
= true;
11380 /* If we know the bounds, we can trim the pmap. */
11381 grand
->nested_has_no_bounds_ref
= false;
11382 pmap_unlock(subord
);
11384 /* Don't trim if we don't know the bounds. */
11385 pmap_unlock(subord
);
11389 /* Trim grand to only cover the given range. */
11390 pmap_trim_range(grand
, grand
->nested_region_addr
, grand
->nested_region_true_start
);
11391 pmap_trim_range(grand
, grand
->nested_region_true_end
, (grand
->nested_region_addr
+ grand
->nested_region_size
));
11393 /* Try to trim subord. */
11394 pmap_trim_subord(subord
);
11397 MARK_AS_PMAP_TEXT
static void
11398 pmap_trim_self(pmap_t pmap
)
11400 if (pmap
->nested_has_no_bounds_ref
&& pmap
->nested_pmap
) {
11401 /* If we have a no bounds ref, we need to drop it. */
11402 pmap_lock_ro(pmap
->nested_pmap
);
11403 pmap
->nested_has_no_bounds_ref
= false;
11404 boolean_t nested_bounds_set
= pmap
->nested_pmap
->nested_bounds_set
;
11405 vm_map_offset_t nested_region_true_start
= pmap
->nested_pmap
->nested_region_true_start
;
11406 vm_map_offset_t nested_region_true_end
= pmap
->nested_pmap
->nested_region_true_end
;
11407 pmap_unlock_ro(pmap
->nested_pmap
);
11409 if (nested_bounds_set
) {
11410 pmap_trim_range(pmap
, pmap
->nested_region_addr
, nested_region_true_start
);
11411 pmap_trim_range(pmap
, nested_region_true_end
, (pmap
->nested_region_addr
+ pmap
->nested_region_size
));
11414 * Try trimming the nested pmap, in case we had the
11417 pmap_trim_subord(pmap
->nested_pmap
);
11422 * pmap_trim_subord(grand, subord)
11424 * grand = pmap that we have nested subord in
11425 * subord = nested pmap we are attempting to trim
11427 * Trims subord if possible
11429 MARK_AS_PMAP_TEXT
static void
11430 pmap_trim_subord(pmap_t subord
)
11432 bool contract_subord
= false;
11436 subord
->nested_no_bounds_refcnt
--;
11438 if ((subord
->nested_no_bounds_refcnt
== 0) && (subord
->nested_bounds_set
)) {
11439 /* If this was the last no bounds reference, trim subord. */
11440 contract_subord
= true;
11443 pmap_unlock(subord
);
11445 if (contract_subord
) {
11446 pmap_trim_range(subord
, subord
->nested_region_addr
, subord
->nested_region_true_start
);
11447 pmap_trim_range(subord
, subord
->nested_region_true_end
, subord
->nested_region_addr
+ subord
->nested_region_size
);
11459 pmap_trim_ppl(grand
, subord
, vstart
, size
);
11461 pmap_ledger_check_balance(grand
);
11462 pmap_ledger_check_balance(subord
);
11464 pmap_trim_internal(grand
, subord
, vstart
, size
);
11468 #if HAS_APPLE_PAC && XNU_MONITOR
11470 pmap_sign_user_ptr_internal(void *value
, ptrauth_key key
, uint64_t discriminator
, uint64_t jop_key
)
11473 boolean_t current_intr_state
= ml_set_interrupts_enabled(FALSE
);
11475 uint64_t saved_jop_state
= ml_enable_user_jop_key(jop_key
);
11477 case ptrauth_key_asia
:
11478 res
= ptrauth_sign_unauthenticated(value
, ptrauth_key_asia
, discriminator
);
11480 case ptrauth_key_asda
:
11481 res
= ptrauth_sign_unauthenticated(value
, ptrauth_key_asda
, discriminator
);
11484 panic("attempt to sign user pointer without process independent key");
11486 ml_disable_user_jop_key(jop_key
, saved_jop_state
);
11488 ml_set_interrupts_enabled(current_intr_state
);
11494 pmap_sign_user_ptr(void *value
, ptrauth_key key
, uint64_t discriminator
, uint64_t jop_key
)
11496 return pmap_sign_user_ptr_internal(value
, key
, discriminator
, jop_key
);
11500 pmap_auth_user_ptr_internal(void *value
, ptrauth_key key
, uint64_t discriminator
, uint64_t jop_key
)
11502 if ((key
!= ptrauth_key_asia
) && (key
!= ptrauth_key_asda
)) {
11503 panic("attempt to auth user pointer without process independent key");
11507 boolean_t current_intr_state
= ml_set_interrupts_enabled(FALSE
);
11509 uint64_t saved_jop_state
= ml_enable_user_jop_key(jop_key
);
11510 res
= ml_auth_ptr_unchecked(value
, key
, discriminator
);
11511 ml_disable_user_jop_key(jop_key
, saved_jop_state
);
11513 ml_set_interrupts_enabled(current_intr_state
);
11519 pmap_auth_user_ptr(void *value
, ptrauth_key key
, uint64_t discriminator
, uint64_t jop_key
)
11521 return pmap_auth_user_ptr_internal(value
, key
, discriminator
, jop_key
);
11523 #endif /* HAS_APPLE_PAC && XNU_MONITOR */
11526 * kern_return_t pmap_nest(grand, subord, vstart, size)
11528 * grand = the pmap that we will nest subord into
11529 * subord = the pmap that goes into the grand
11530 * vstart = start of range in pmap to be inserted
11531 * size = Size of nest area (up to 16TB)
11533 * Inserts a pmap into another. This is used to implement shared segments.
11537 MARK_AS_PMAP_TEXT
static kern_return_t
11538 pmap_nest_internal(
11544 kern_return_t kr
= KERN_FAILURE
;
11545 vm_map_offset_t vaddr
;
11546 tt_entry_t
*stte_p
;
11547 tt_entry_t
*gtte_p
;
11549 unsigned int num_tte
;
11550 unsigned int nested_region_asid_bitmap_size
;
11551 unsigned int* nested_region_asid_bitmap
;
11552 int expand_options
= 0;
11553 bool deref_subord
= true;
11554 pmap_t __ptrauth_only subord_addr
;
11557 if (__improbable(os_add_overflow(vstart
, size
, &vend
))) {
11558 panic("%s: %p grand addr wraps around: 0x%llx + 0x%llx", __func__
, grand
, vstart
, size
);
11561 VALIDATE_PMAP(grand
);
11562 pmap_reference_internal(subord
); // This call will also validate subord
11564 __unused
const pt_attr_t
* const pt_attr
= pmap_get_pt_attr(grand
);
11565 assert(pmap_get_pt_attr(subord
) == pt_attr
);
11568 expand_options
|= PMAP_TT_ALLOCATE_NOWAIT
;
11571 if (__improbable(((size
| vstart
) & (pt_attr_leaf_table_offmask(pt_attr
))) != 0x0ULL
)) {
11572 panic("pmap_nest() pmap %p unaligned nesting request 0x%llx, 0x%llx\n", grand
, vstart
, size
);
11575 if (__improbable(!subord
->nested
)) {
11576 panic("%s: subordinate pmap %p is not nestable", __func__
, subord
);
11579 if (subord
->nested_region_asid_bitmap
== NULL
) {
11580 nested_region_asid_bitmap_size
= (unsigned int)(size
>> pt_attr_twig_shift(pt_attr
)) / (sizeof(unsigned int) * NBBY
);
11583 pmap_paddr_t pa
= 0;
11585 if (__improbable((nested_region_asid_bitmap_size
* sizeof(unsigned int)) > PAGE_SIZE
)) {
11586 panic("%s: nested_region_asid_bitmap_size=%u will not fit in a page, "
11587 "grand=%p, subord=%p, vstart=0x%llx, size=%llx",
11588 __FUNCTION__
, nested_region_asid_bitmap_size
,
11589 grand
, subord
, vstart
, size
);
11592 kr
= pmap_pages_alloc_zeroed(&pa
, PAGE_SIZE
, PMAP_PAGES_ALLOCATE_NOWAIT
);
11594 if (kr
!= KERN_SUCCESS
) {
11600 nested_region_asid_bitmap
= (unsigned int *)phystokv(pa
);
11602 nested_region_asid_bitmap
= kheap_alloc(KHEAP_DATA_BUFFERS
,
11603 nested_region_asid_bitmap_size
* sizeof(unsigned int),
11604 Z_WAITOK
| Z_ZERO
);
11608 if (subord
->nested_region_asid_bitmap
== NULL
) {
11609 subord
->nested_region_asid_bitmap_size
= nested_region_asid_bitmap_size
;
11610 subord
->nested_region_addr
= vstart
;
11611 subord
->nested_region_size
= (mach_vm_offset_t
) size
;
11614 * Ensure that the rest of the subord->nested_region_* fields are
11615 * initialized and visible before setting the nested_region_asid_bitmap
11616 * field (which is used as the flag to say that the rest are initialized).
11618 __builtin_arm_dmb(DMB_ISHST
);
11619 subord
->nested_region_asid_bitmap
= nested_region_asid_bitmap
;
11620 nested_region_asid_bitmap
= NULL
;
11622 pmap_unlock(subord
);
11623 if (nested_region_asid_bitmap
!= NULL
) {
11625 pmap_pages_free(kvtophys((vm_offset_t
)nested_region_asid_bitmap
), PAGE_SIZE
);
11627 kheap_free(KHEAP_DATA_BUFFERS
, nested_region_asid_bitmap
,
11628 nested_region_asid_bitmap_size
* sizeof(unsigned int));
11634 * Ensure subsequent reads of the subord->nested_region_* fields don't get
11635 * speculated before their initialization.
11637 __builtin_arm_dmb(DMB_ISHLD
);
11639 if ((subord
->nested_region_addr
+ subord
->nested_region_size
) < vend
) {
11641 unsigned int new_nested_region_asid_bitmap_size
;
11642 unsigned int* new_nested_region_asid_bitmap
;
11644 nested_region_asid_bitmap
= NULL
;
11645 nested_region_asid_bitmap_size
= 0;
11646 new_size
= vend
- subord
->nested_region_addr
;
11648 /* We explicitly add 1 to the bitmap allocation size in order to avoid issues with truncation. */
11649 new_nested_region_asid_bitmap_size
= (unsigned int)((new_size
>> pt_attr_twig_shift(pt_attr
)) / (sizeof(unsigned int) * NBBY
)) + 1;
11652 pmap_paddr_t pa
= 0;
11654 if (__improbable((new_nested_region_asid_bitmap_size
* sizeof(unsigned int)) > PAGE_SIZE
)) {
11655 panic("%s: new_nested_region_asid_bitmap_size=%u will not fit in a page, "
11656 "grand=%p, subord=%p, vstart=0x%llx, new_size=%llx",
11657 __FUNCTION__
, new_nested_region_asid_bitmap_size
,
11658 grand
, subord
, vstart
, new_size
);
11661 kr
= pmap_pages_alloc_zeroed(&pa
, PAGE_SIZE
, PMAP_PAGES_ALLOCATE_NOWAIT
);
11663 if (kr
!= KERN_SUCCESS
) {
11669 new_nested_region_asid_bitmap
= (unsigned int *)phystokv(pa
);
11671 new_nested_region_asid_bitmap
= kheap_alloc(KHEAP_DATA_BUFFERS
,
11672 new_nested_region_asid_bitmap_size
* sizeof(unsigned int),
11673 Z_WAITOK
| Z_ZERO
);
11676 if (subord
->nested_region_size
< new_size
) {
11677 bcopy(subord
->nested_region_asid_bitmap
,
11678 new_nested_region_asid_bitmap
, subord
->nested_region_asid_bitmap_size
);
11679 nested_region_asid_bitmap_size
= subord
->nested_region_asid_bitmap_size
;
11680 nested_region_asid_bitmap
= subord
->nested_region_asid_bitmap
;
11681 subord
->nested_region_asid_bitmap
= new_nested_region_asid_bitmap
;
11682 subord
->nested_region_asid_bitmap_size
= new_nested_region_asid_bitmap_size
;
11683 subord
->nested_region_size
= new_size
;
11684 new_nested_region_asid_bitmap
= NULL
;
11686 pmap_unlock(subord
);
11687 if (nested_region_asid_bitmap
!= NULL
) {
11689 pmap_pages_free(kvtophys((vm_offset_t
)nested_region_asid_bitmap
), PAGE_SIZE
);
11691 kheap_free(KHEAP_DATA_BUFFERS
, nested_region_asid_bitmap
,
11692 nested_region_asid_bitmap_size
* sizeof(unsigned int));
11695 if (new_nested_region_asid_bitmap
!= NULL
) {
11697 pmap_pages_free(kvtophys((vm_offset_t
)new_nested_region_asid_bitmap
), PAGE_SIZE
);
11699 kheap_free(KHEAP_DATA_BUFFERS
, new_nested_region_asid_bitmap
,
11700 new_nested_region_asid_bitmap_size
* sizeof(unsigned int));
11707 #if __has_feature(ptrauth_calls)
11708 subord_addr
= ptrauth_sign_unauthenticated(subord
,
11709 ptrauth_key_process_independent_data
,
11710 ptrauth_blend_discriminator(&grand
->nested_pmap
, ptrauth_string_discriminator("pmap.nested_pmap")));
11712 subord_addr
= subord
;
11713 #endif // __has_feature(ptrauth_calls)
11715 if (os_atomic_cmpxchg(&grand
->nested_pmap
, PMAP_NULL
, subord_addr
, relaxed
)) {
11717 * If this is grand's first nesting operation, keep the reference on subord.
11718 * It will be released by pmap_destroy_internal() when grand is destroyed.
11720 deref_subord
= false;
11722 if (!subord
->nested_bounds_set
) {
11724 * We are nesting without the shared regions bounds
11725 * being known. We'll have to trim the pmap later.
11727 grand
->nested_has_no_bounds_ref
= true;
11728 subord
->nested_no_bounds_refcnt
++;
11731 grand
->nested_region_addr
= vstart
;
11732 grand
->nested_region_size
= (mach_vm_offset_t
) size
;
11734 if (__improbable(grand
->nested_pmap
!= subord
)) {
11735 panic("pmap_nest() pmap %p has a nested pmap\n", grand
);
11736 } else if (__improbable(grand
->nested_region_addr
> vstart
)) {
11737 panic("pmap_nest() pmap %p : attempt to nest outside the nested region\n", grand
);
11738 } else if ((grand
->nested_region_addr
+ grand
->nested_region_size
) < vend
) {
11739 grand
->nested_region_size
= (mach_vm_offset_t
)(vstart
- grand
->nested_region_addr
+ size
);
11743 #if (__ARM_VMSA__ == 7)
11744 vaddr
= (vm_map_offset_t
) vstart
;
11745 num_tte
= size
>> ARM_TT_L1_SHIFT
;
11747 for (i
= 0; i
< num_tte
; i
++) {
11748 if (((subord
->nested_region_true_start
) > vaddr
) || ((subord
->nested_region_true_end
) <= vaddr
)) {
11752 stte_p
= pmap_tte(subord
, vaddr
);
11753 if ((stte_p
== (tt_entry_t
*)NULL
) || (((*stte_p
) & ARM_TTE_TYPE_MASK
) != ARM_TTE_TYPE_TABLE
)) {
11754 pmap_unlock(subord
);
11755 kr
= pmap_expand(subord
, vaddr
, expand_options
, PMAP_TT_L2_LEVEL
);
11757 if (kr
!= KERN_SUCCESS
) {
11764 pmap_unlock(subord
);
11766 stte_p
= pmap_tte(grand
, vaddr
);
11767 if (stte_p
== (tt_entry_t
*)NULL
) {
11768 pmap_unlock(grand
);
11769 kr
= pmap_expand(grand
, vaddr
, expand_options
, PMAP_TT_L1_LEVEL
);
11771 if (kr
!= KERN_SUCCESS
) {
11776 pmap_unlock(grand
);
11782 vaddr
+= ARM_TT_L1_SIZE
;
11786 vaddr
= (vm_map_offset_t
) vstart
;
11787 num_tte
= (unsigned int)(size
>> pt_attr_twig_shift(pt_attr
));
11789 for (i
= 0; i
< num_tte
; i
++) {
11790 if (((subord
->nested_region_true_start
) > vaddr
) || ((subord
->nested_region_true_end
) <= vaddr
)) {
11794 stte_p
= pmap_tte(subord
, vaddr
);
11795 if (stte_p
== PT_ENTRY_NULL
|| *stte_p
== ARM_TTE_EMPTY
) {
11796 pmap_unlock(subord
);
11797 kr
= pmap_expand(subord
, vaddr
, expand_options
, pt_attr_leaf_level(pt_attr
));
11799 if (kr
!= KERN_SUCCESS
) {
11807 vaddr
+= pt_attr_twig_size(pt_attr
);
11810 pmap_unlock(subord
);
11813 * copy tte's from subord pmap into grand pmap
11817 vaddr
= (vm_map_offset_t
) vstart
;
11820 #if (__ARM_VMSA__ == 7)
11821 for (i
= 0; i
< num_tte
; i
++) {
11822 if (((subord
->nested_region_true_start
) > vaddr
) || ((subord
->nested_region_true_end
) <= vaddr
)) {
11826 stte_p
= pmap_tte(subord
, vaddr
);
11827 gtte_p
= pmap_tte(grand
, vaddr
);
11831 vaddr
+= ARM_TT_L1_SIZE
;
11834 for (i
= 0; i
< num_tte
; i
++) {
11835 if (((subord
->nested_region_true_start
) > vaddr
) || ((subord
->nested_region_true_end
) <= vaddr
)) {
11839 stte_p
= pmap_tte(subord
, vaddr
);
11840 gtte_p
= pmap_tte(grand
, vaddr
);
11841 if (gtte_p
== PT_ENTRY_NULL
) {
11842 pmap_unlock(grand
);
11843 kr
= pmap_expand(grand
, vaddr
, expand_options
, pt_attr_twig_level(pt_attr
));
11846 if (kr
!= KERN_SUCCESS
) {
11850 gtte_p
= pmap_tt2e(grand
, vaddr
);
11855 vaddr
+= pt_attr_twig_size(pt_attr
);
11862 stte_p
= pmap_tte(grand
, vstart
);
11863 FLUSH_PTE_RANGE_STRONG(stte_p
, stte_p
+ num_tte
);
11864 PMAP_UPDATE_TLBS(grand
, vstart
, vend
, false);
11866 pmap_unlock(grand
);
11870 if (deref_subord
) {
11871 pmap_destroy_internal(subord
);
11883 kern_return_t kr
= KERN_FAILURE
;
11885 PMAP_TRACE(2, PMAP_CODE(PMAP__NEST
) | DBG_FUNC_START
,
11886 VM_KERNEL_ADDRHIDE(grand
), VM_KERNEL_ADDRHIDE(subord
),
11887 VM_KERNEL_ADDRHIDE(vstart
));
11890 while ((kr
= pmap_nest_ppl(grand
, subord
, vstart
, size
)) == KERN_RESOURCE_SHORTAGE
) {
11891 pmap_alloc_page_for_ppl(0);
11894 pmap_ledger_check_balance(grand
);
11895 pmap_ledger_check_balance(subord
);
11897 kr
= pmap_nest_internal(grand
, subord
, vstart
, size
);
11900 PMAP_TRACE(2, PMAP_CODE(PMAP__NEST
) | DBG_FUNC_END
, kr
);
11906 * kern_return_t pmap_unnest(grand, vaddr)
11908 * grand = the pmap that will have the virtual range unnested
11909 * vaddr = start of range in pmap to be unnested
11910 * size = size of range in pmap to be unnested
11920 return pmap_unnest_options(grand
, vaddr
, size
, 0);
11923 MARK_AS_PMAP_TEXT
static kern_return_t
11924 pmap_unnest_options_internal(
11928 unsigned int option
)
11930 vm_map_offset_t start
;
11931 vm_map_offset_t addr
;
11933 unsigned int current_index
;
11934 unsigned int start_index
;
11935 unsigned int max_index
;
11936 unsigned int num_tte
;
11940 if (__improbable(os_add_overflow(vaddr
, size
, &vend
))) {
11941 panic("%s: %p vaddr wraps around: 0x%llx + 0x%llx", __func__
, grand
, vaddr
, size
);
11944 VALIDATE_PMAP(grand
);
11946 __unused
const pt_attr_t
* const pt_attr
= pmap_get_pt_attr(grand
);
11948 if (((size
| vaddr
) & pt_attr_twig_offmask(pt_attr
)) != 0x0ULL
) {
11949 panic("pmap_unnest(): unaligned request");
11952 if ((option
& PMAP_UNNEST_CLEAN
) == 0) {
11953 if (grand
->nested_pmap
== NULL
) {
11954 panic("%s: %p has no nested pmap", __func__
, grand
);
11957 if ((vaddr
< grand
->nested_region_addr
) || (vend
> (grand
->nested_region_addr
+ grand
->nested_region_size
))) {
11958 panic("%s: %p: unnest request to region not-fully-nested region [%p, %p)", __func__
, grand
, (void*)vaddr
, (void*)vend
);
11961 pmap_lock(grand
->nested_pmap
);
11964 start_index
= (unsigned int)((vaddr
- grand
->nested_region_addr
) >> pt_attr_twig_shift(pt_attr
));
11965 max_index
= (unsigned int)(start_index
+ (size
>> pt_attr_twig_shift(pt_attr
)));
11966 num_tte
= (unsigned int)(size
>> pt_attr_twig_shift(pt_attr
));
11968 for (current_index
= start_index
, addr
= start
; current_index
< max_index
; current_index
++, addr
+= pt_attr_twig_size(pt_attr
)) {
11969 pt_entry_t
*bpte
, *epte
, *cpte
;
11971 if (addr
< grand
->nested_pmap
->nested_region_true_start
) {
11972 /* We haven't reached the interesting range. */
11976 if (addr
>= grand
->nested_pmap
->nested_region_true_end
) {
11977 /* We're done with the interesting range. */
11981 bpte
= pmap_pte(grand
->nested_pmap
, addr
);
11982 epte
= bpte
+ (pt_attr_leaf_index_mask(pt_attr
) >> pt_attr_leaf_shift(pt_attr
));
11984 if (!testbit(current_index
, (int *)grand
->nested_pmap
->nested_region_asid_bitmap
)) {
11985 setbit(current_index
, (int *)grand
->nested_pmap
->nested_region_asid_bitmap
);
11987 for (cpte
= bpte
; cpte
<= epte
; cpte
++) {
11990 boolean_t managed
= FALSE
;
11993 if ((*cpte
!= ARM_PTE_TYPE_FAULT
)
11994 && (!ARM_PTE_IS_COMPRESSED(*cpte
, cpte
))) {
11997 pa
= pte_to_pa(spte
);
11998 if (!pa_valid(pa
)) {
12001 pai
= (int)pa_index(pa
);
12004 pa
= pte_to_pa(spte
);
12005 if (pai
== (int)pa_index(pa
)) {
12007 break; // Leave the PVH locked as we'll unlock it after we update the PTE
12012 if (((spte
& ARM_PTE_NG
) != ARM_PTE_NG
)) {
12013 WRITE_PTE_FAST(cpte
, (spte
| ARM_PTE_NG
));
12017 ASSERT_PVH_LOCKED(pai
);
12024 FLUSH_PTE_RANGE_STRONG(bpte
, epte
);
12027 flush_mmu_tlb_region_asid_async(vaddr
, (unsigned)size
, grand
->nested_pmap
);
12030 pmap_unlock(grand
->nested_pmap
);
12036 * invalidate all pdes for segment at vaddr in pmap grand
12041 num_tte
= (unsigned int)(size
>> pt_attr_twig_shift(pt_attr
));
12043 for (i
= 0; i
< num_tte
; i
++, addr
+= pt_attr_twig_size(pt_attr
)) {
12044 if (addr
< grand
->nested_pmap
->nested_region_true_start
) {
12045 /* We haven't reached the interesting range. */
12049 if (addr
>= grand
->nested_pmap
->nested_region_true_end
) {
12050 /* We're done with the interesting range. */
12054 tte_p
= pmap_tte(grand
, addr
);
12055 *tte_p
= ARM_TTE_TYPE_FAULT
;
12058 tte_p
= pmap_tte(grand
, start
);
12059 FLUSH_PTE_RANGE_STRONG(tte_p
, tte_p
+ num_tte
);
12060 PMAP_UPDATE_TLBS(grand
, start
, vend
, false);
12062 pmap_unlock(grand
);
12064 return KERN_SUCCESS
;
12068 pmap_unnest_options(
12072 unsigned int option
)
12074 kern_return_t kr
= KERN_FAILURE
;
12076 PMAP_TRACE(2, PMAP_CODE(PMAP__UNNEST
) | DBG_FUNC_START
,
12077 VM_KERNEL_ADDRHIDE(grand
), VM_KERNEL_ADDRHIDE(vaddr
));
12080 kr
= pmap_unnest_options_ppl(grand
, vaddr
, size
, option
);
12082 kr
= pmap_unnest_options_internal(grand
, vaddr
, size
, option
);
12085 PMAP_TRACE(2, PMAP_CODE(PMAP__UNNEST
) | DBG_FUNC_END
, kr
);
12091 pmap_adjust_unnest_parameters(
12093 __unused vm_map_offset_t
*s
,
12094 __unused vm_map_offset_t
*e
)
12096 return TRUE
; /* to get to log_unnest_badness()... */
12100 * disable no-execute capability on
12101 * the specified pmap
12103 #if DEVELOPMENT || DEBUG
12108 pmap
->nx_enabled
= FALSE
;
12113 __unused pmap_t pmap
)
12119 * flush a range of hardware TLB entries.
12120 * NOTE: assumes the smallest TLB entry in use will be for
12121 * an ARM small page (4K).
12124 #define ARM_FULL_TLB_FLUSH_THRESHOLD 64
12126 #if __ARM_RANGE_TLBI__
12127 #define ARM64_RANGE_TLB_FLUSH_THRESHOLD 1
12128 #define ARM64_FULL_TLB_FLUSH_THRESHOLD ARM64_TLB_RANGE_PAGES
12130 #define ARM64_FULL_TLB_FLUSH_THRESHOLD 256
12131 #endif // __ARM_RANGE_TLBI__
12134 flush_mmu_tlb_region_asid_async(
12139 #if (__ARM_VMSA__ == 7)
12140 vm_offset_t end
= va
+ length
;
12143 asid
= pmap
->hw_asid
;
12145 if (length
/ ARM_SMALL_PAGE_SIZE
> ARM_FULL_TLB_FLUSH_THRESHOLD
) {
12146 boolean_t flush_all
= FALSE
;
12148 if ((asid
== 0) || (pmap
->nested
== TRUE
)) {
12152 flush_mmu_tlb_async();
12154 flush_mmu_tlb_asid_async(asid
);
12159 if (pmap
->nested
== TRUE
) {
12160 #if !__ARM_MP_EXT__
12163 va
= arm_trunc_page(va
);
12165 flush_mmu_tlb_mva_entries_async(va
);
12166 va
+= ARM_SMALL_PAGE_SIZE
;
12171 va
= arm_trunc_page(va
) | (asid
& 0xff);
12172 flush_mmu_tlb_entries_async(va
, end
);
12175 unsigned long pmap_page_shift
= pt_attr_leaf_shift(pmap_get_pt_attr(pmap
));
12176 const uint64_t pmap_page_size
= 1ULL << pmap_page_shift
;
12177 ppnum_t npages
= (ppnum_t
)(length
>> pmap_page_shift
);
12180 asid
= pmap
->hw_asid
;
12182 if (npages
> ARM64_FULL_TLB_FLUSH_THRESHOLD
) {
12183 boolean_t flush_all
= FALSE
;
12185 if ((asid
== 0) || (pmap
->nested
== TRUE
)) {
12189 flush_mmu_tlb_async();
12191 flush_mmu_tlb_asid_async((uint64_t)asid
<< TLBI_ASID_SHIFT
);
12195 #if __ARM_RANGE_TLBI__
12196 if (npages
> ARM64_RANGE_TLB_FLUSH_THRESHOLD
) {
12197 va
= generate_rtlbi_param(npages
, asid
, va
, pmap_page_shift
);
12198 if (pmap
->nested
== TRUE
) {
12199 flush_mmu_tlb_allrange_async(va
);
12201 flush_mmu_tlb_range_async(va
);
12206 vm_offset_t end
= tlbi_asid(asid
) | tlbi_addr(va
+ length
);
12207 va
= tlbi_asid(asid
) | tlbi_addr(va
);
12209 if (pmap
->nested
== TRUE
) {
12210 flush_mmu_tlb_allentries_async(va
, end
, pmap_page_size
);
12212 flush_mmu_tlb_entries_async(va
, end
, pmap_page_size
);
12218 MARK_AS_PMAP_TEXT
static void
12219 flush_mmu_tlb_tte_asid_async(vm_offset_t va
, pmap_t pmap
)
12221 #if (__ARM_VMSA__ == 7)
12222 flush_mmu_tlb_entry_async((va
& ~ARM_TT_L1_PT_OFFMASK
) | (pmap
->hw_asid
& 0xff));
12223 flush_mmu_tlb_entry_async(((va
& ~ARM_TT_L1_PT_OFFMASK
) + ARM_TT_L1_SIZE
) | (pmap
->hw_asid
& 0xff));
12224 flush_mmu_tlb_entry_async(((va
& ~ARM_TT_L1_PT_OFFMASK
) + 2 * ARM_TT_L1_SIZE
) | (pmap
->hw_asid
& 0xff));
12225 flush_mmu_tlb_entry_async(((va
& ~ARM_TT_L1_PT_OFFMASK
) + 3 * ARM_TT_L1_SIZE
) | (pmap
->hw_asid
& 0xff));
12227 flush_mmu_tlb_entry_async(tlbi_addr(va
& ~pt_attr_twig_offmask(pmap_get_pt_attr(pmap
))) | tlbi_asid(pmap
->hw_asid
));
12231 MARK_AS_PMAP_TEXT
static void
12232 flush_mmu_tlb_full_asid_async(pmap_t pmap
)
12234 #if (__ARM_VMSA__ == 7)
12235 flush_mmu_tlb_asid_async(pmap
->hw_asid
);
12236 #else /* (__ARM_VMSA__ == 7) */
12237 flush_mmu_tlb_asid_async((uint64_t)(pmap
->hw_asid
) << TLBI_ASID_SHIFT
);
12238 #endif /* (__ARM_VMSA__ == 7) */
12242 flush_mmu_tlb_region(
12246 flush_mmu_tlb_region_asid_async(va
, length
, kernel_pmap
);
12250 static pmap_io_range_t
*
12251 pmap_find_io_attr(pmap_paddr_t paddr
)
12253 pmap_io_range_t find_range
= {.addr
= paddr
& ~PAGE_MASK
, .len
= PAGE_SIZE
};
12254 unsigned int begin
= 0, end
= num_io_rgns
- 1;
12255 if ((num_io_rgns
== 0) || (paddr
< io_attr_table
[begin
].addr
) ||
12256 (paddr
>= (io_attr_table
[end
].addr
+ io_attr_table
[end
].len
))) {
12261 unsigned int middle
= (begin
+ end
) / 2;
12262 int cmp
= cmp_io_rgns(&find_range
, &io_attr_table
[middle
]);
12264 return &io_attr_table
[middle
];
12265 } else if (begin
== end
) {
12267 } else if (cmp
> 0) {
12268 begin
= middle
+ 1;
12278 pmap_cache_attributes(
12281 pmap_paddr_t paddr
;
12283 unsigned int result
;
12284 pp_attr_t pp_attr_current
;
12288 assert(vm_last_phys
> vm_first_phys
); // Check that pmap has been bootstrapped
12290 if (!pa_valid(paddr
)) {
12291 pmap_io_range_t
*io_rgn
= pmap_find_io_attr(paddr
);
12292 return (io_rgn
== NULL
) ? VM_WIMG_IO
: io_rgn
->wimg
;
12295 result
= VM_WIMG_DEFAULT
;
12297 pai
= (int)pa_index(paddr
);
12299 pp_attr_current
= pp_attr_table
[pai
];
12300 if (pp_attr_current
& PP_ATTR_WIMG_MASK
) {
12301 result
= pp_attr_current
& PP_ATTR_WIMG_MASK
;
12306 MARK_AS_PMAP_TEXT
static void
12307 pmap_sync_wimg(ppnum_t pn
, unsigned int wimg_bits_prev
, unsigned int wimg_bits_new
)
12309 if ((wimg_bits_prev
!= wimg_bits_new
)
12310 && ((wimg_bits_prev
== VM_WIMG_COPYBACK
)
12311 || ((wimg_bits_prev
== VM_WIMG_INNERWBACK
)
12312 && (wimg_bits_new
!= VM_WIMG_COPYBACK
))
12313 || ((wimg_bits_prev
== VM_WIMG_WTHRU
)
12314 && ((wimg_bits_new
!= VM_WIMG_COPYBACK
) || (wimg_bits_new
!= VM_WIMG_INNERWBACK
))))) {
12315 pmap_sync_page_attributes_phys(pn
);
12318 if ((wimg_bits_new
== VM_WIMG_RT
) && (wimg_bits_prev
!= VM_WIMG_RT
)) {
12319 pmap_force_dcache_clean(phystokv(ptoa(pn
)), PAGE_SIZE
);
12323 MARK_AS_PMAP_TEXT
static __unused
void
12324 pmap_update_compressor_page_internal(ppnum_t pn
, unsigned int prev_cacheattr
, unsigned int new_cacheattr
)
12326 pmap_paddr_t paddr
= ptoa(pn
);
12327 int pai
= (int)pa_index(paddr
);
12329 if (__improbable(!pa_valid(paddr
))) {
12330 panic("%s called on non-managed page 0x%08x", __func__
, pn
);
12336 if (__improbable(pa_test_monitor(paddr
))) {
12337 panic("%s invoked on PPL page 0x%08x", __func__
, pn
);
12341 pmap_update_cache_attributes_locked(pn
, new_cacheattr
);
12345 pmap_sync_wimg(pn
, prev_cacheattr
& VM_WIMG_MASK
, new_cacheattr
& VM_WIMG_MASK
);
12349 pmap_map_compressor_page(ppnum_t pn
)
12351 #if __ARM_PTE_PHYSMAP__
12352 unsigned int cacheattr
= pmap_cache_attributes(pn
) & VM_WIMG_MASK
;
12353 if (cacheattr
!= VM_WIMG_DEFAULT
) {
12355 pmap_update_compressor_page_ppl(pn
, cacheattr
, VM_WIMG_DEFAULT
);
12357 pmap_update_compressor_page_internal(pn
, cacheattr
, VM_WIMG_DEFAULT
);
12361 return (void*)phystokv(ptoa(pn
));
12365 pmap_unmap_compressor_page(ppnum_t pn __unused
, void *kva __unused
)
12367 #if __ARM_PTE_PHYSMAP__
12368 unsigned int cacheattr
= pmap_cache_attributes(pn
) & VM_WIMG_MASK
;
12369 if (cacheattr
!= VM_WIMG_DEFAULT
) {
12371 pmap_update_compressor_page_ppl(pn
, VM_WIMG_DEFAULT
, cacheattr
);
12373 pmap_update_compressor_page_internal(pn
, VM_WIMG_DEFAULT
, cacheattr
);
12379 MARK_AS_PMAP_TEXT
static boolean_t
12380 pmap_batch_set_cache_attributes_internal(
12382 unsigned int cacheattr
,
12383 unsigned int page_cnt
,
12384 unsigned int page_index
,
12388 pmap_paddr_t paddr
;
12390 pp_attr_t pp_attr_current
;
12391 pp_attr_t pp_attr_template
;
12392 unsigned int wimg_bits_prev
, wimg_bits_new
;
12394 if (cacheattr
& VM_WIMG_USE_DEFAULT
) {
12395 cacheattr
= VM_WIMG_DEFAULT
;
12398 if ((doit
== FALSE
) && (*res
== 0)) {
12399 pmap_pin_kernel_pages((vm_offset_t
)res
, sizeof(*res
));
12401 pmap_unpin_kernel_pages((vm_offset_t
)res
, sizeof(*res
));
12402 if (platform_cache_batch_wimg(cacheattr
& (VM_WIMG_MASK
), page_cnt
<< PAGE_SHIFT
) == FALSE
) {
12409 if (!pa_valid(paddr
)) {
12410 panic("pmap_batch_set_cache_attributes(): pn 0x%08x not managed", pn
);
12413 pai
= (int)pa_index(paddr
);
12418 if (pa_test_monitor(paddr
)) {
12419 panic("%s invoked on PPL page 0x%llx", __func__
, (uint64_t)paddr
);
12425 pp_attr_current
= pp_attr_table
[pai
];
12426 wimg_bits_prev
= VM_WIMG_DEFAULT
;
12427 if (pp_attr_current
& PP_ATTR_WIMG_MASK
) {
12428 wimg_bits_prev
= pp_attr_current
& PP_ATTR_WIMG_MASK
;
12431 pp_attr_template
= (pp_attr_current
& ~PP_ATTR_WIMG_MASK
) | PP_ATTR_WIMG(cacheattr
& (VM_WIMG_MASK
));
12437 /* WIMG bits should only be updated under the PVH lock, but we should do this in a CAS loop
12438 * to avoid losing simultaneous updates to other bits like refmod. */
12439 } while (!OSCompareAndSwap16(pp_attr_current
, pp_attr_template
, &pp_attr_table
[pai
]));
12441 wimg_bits_new
= VM_WIMG_DEFAULT
;
12442 if (pp_attr_template
& PP_ATTR_WIMG_MASK
) {
12443 wimg_bits_new
= pp_attr_template
& PP_ATTR_WIMG_MASK
;
12447 if (wimg_bits_new
!= wimg_bits_prev
) {
12448 pmap_update_cache_attributes_locked(pn
, cacheattr
);
12451 if ((wimg_bits_new
== VM_WIMG_RT
) && (wimg_bits_prev
!= VM_WIMG_RT
)) {
12452 pmap_force_dcache_clean(phystokv(paddr
), PAGE_SIZE
);
12455 if (wimg_bits_new
== VM_WIMG_COPYBACK
) {
12458 if (wimg_bits_prev
== wimg_bits_new
) {
12459 pmap_pin_kernel_pages((vm_offset_t
)res
, sizeof(*res
));
12461 pmap_unpin_kernel_pages((vm_offset_t
)res
, sizeof(*res
));
12462 if (!platform_cache_batch_wimg(wimg_bits_new
, (*res
) << PAGE_SHIFT
)) {
12469 if (page_cnt
== (page_index
+ 1)) {
12470 wimg_bits_prev
= VM_WIMG_COPYBACK
;
12471 if (((wimg_bits_prev
!= wimg_bits_new
))
12472 && ((wimg_bits_prev
== VM_WIMG_COPYBACK
)
12473 || ((wimg_bits_prev
== VM_WIMG_INNERWBACK
)
12474 && (wimg_bits_new
!= VM_WIMG_COPYBACK
))
12475 || ((wimg_bits_prev
== VM_WIMG_WTHRU
)
12476 && ((wimg_bits_new
!= VM_WIMG_COPYBACK
) || (wimg_bits_new
!= VM_WIMG_INNERWBACK
))))) {
12477 platform_cache_flush_wimg(wimg_bits_new
);
12485 pmap_batch_set_cache_attributes(
12487 unsigned int cacheattr
,
12488 unsigned int page_cnt
,
12489 unsigned int page_index
,
12494 return pmap_batch_set_cache_attributes_ppl(pn
, cacheattr
, page_cnt
, page_index
, doit
, res
);
12496 return pmap_batch_set_cache_attributes_internal(pn
, cacheattr
, page_cnt
, page_index
, doit
, res
);
12500 MARK_AS_PMAP_TEXT
static void
12501 pmap_set_cache_attributes_priv(
12503 unsigned int cacheattr
,
12504 boolean_t external __unused
)
12506 pmap_paddr_t paddr
;
12508 pp_attr_t pp_attr_current
;
12509 pp_attr_t pp_attr_template
;
12510 unsigned int wimg_bits_prev
, wimg_bits_new
;
12514 if (!pa_valid(paddr
)) {
12515 return; /* Not a managed page. */
12518 if (cacheattr
& VM_WIMG_USE_DEFAULT
) {
12519 cacheattr
= VM_WIMG_DEFAULT
;
12522 pai
= (int)pa_index(paddr
);
12527 if (external
&& pa_test_monitor(paddr
)) {
12528 panic("%s invoked on PPL page 0x%llx", __func__
, (uint64_t)paddr
);
12529 } else if (!external
&& !pa_test_monitor(paddr
)) {
12530 panic("%s invoked on non-PPL page 0x%llx", __func__
, (uint64_t)paddr
);
12535 pp_attr_current
= pp_attr_table
[pai
];
12536 wimg_bits_prev
= VM_WIMG_DEFAULT
;
12537 if (pp_attr_current
& PP_ATTR_WIMG_MASK
) {
12538 wimg_bits_prev
= pp_attr_current
& PP_ATTR_WIMG_MASK
;
12541 pp_attr_template
= (pp_attr_current
& ~PP_ATTR_WIMG_MASK
) | PP_ATTR_WIMG(cacheattr
& (VM_WIMG_MASK
));
12543 /* WIMG bits should only be updated under the PVH lock, but we should do this in a CAS loop
12544 * to avoid losing simultaneous updates to other bits like refmod. */
12545 } while (!OSCompareAndSwap16(pp_attr_current
, pp_attr_template
, &pp_attr_table
[pai
]));
12547 wimg_bits_new
= VM_WIMG_DEFAULT
;
12548 if (pp_attr_template
& PP_ATTR_WIMG_MASK
) {
12549 wimg_bits_new
= pp_attr_template
& PP_ATTR_WIMG_MASK
;
12552 if (wimg_bits_new
!= wimg_bits_prev
) {
12553 pmap_update_cache_attributes_locked(pn
, cacheattr
);
12558 pmap_sync_wimg(pn
, wimg_bits_prev
, wimg_bits_new
);
12561 MARK_AS_PMAP_TEXT
static void
12562 pmap_set_cache_attributes_internal(
12564 unsigned int cacheattr
)
12566 pmap_set_cache_attributes_priv(pn
, cacheattr
, TRUE
);
12570 pmap_set_cache_attributes(
12572 unsigned int cacheattr
)
12575 pmap_set_cache_attributes_ppl(pn
, cacheattr
);
12577 pmap_set_cache_attributes_internal(pn
, cacheattr
);
12581 MARK_AS_PMAP_TEXT
void
12582 pmap_update_cache_attributes_locked(
12584 unsigned attributes
)
12586 pmap_paddr_t phys
= ptoa(ppnum
);
12590 pt_entry_t tmplate
;
12592 boolean_t tlb_flush_needed
= FALSE
;
12594 PMAP_TRACE(2, PMAP_CODE(PMAP__UPDATE_CACHING
) | DBG_FUNC_START
, ppnum
, attributes
);
12596 if (pmap_panic_dev_wimg_on_managed
) {
12597 switch (attributes
& VM_WIMG_MASK
) {
12598 case VM_WIMG_IO
: // nGnRnE
12599 case VM_WIMG_POSTED
: // nGnRE
12600 /* supported on DRAM, but slow, so we disallow */
12602 case VM_WIMG_POSTED_REORDERED
: // nGRE
12603 case VM_WIMG_POSTED_COMBINED_REORDERED
: // GRE
12604 /* unsupported on DRAM */
12606 panic("%s: trying to use unsupported VM_WIMG type for managed page, VM_WIMG=%x, ppnum=%#x",
12607 __FUNCTION__
, attributes
& VM_WIMG_MASK
, ppnum
);
12611 /* not device type memory, all good */
12617 #if __ARM_PTE_PHYSMAP__
12618 vm_offset_t kva
= phystokv(phys
);
12619 pte_p
= pmap_pte(kernel_pmap
, kva
);
12622 tmplate
&= ~(ARM_PTE_ATTRINDXMASK
| ARM_PTE_SHMASK
);
12624 tmplate
|= (wimg_to_pte(attributes
) & ~ARM_PTE_XPRR_MASK
);
12626 tmplate
|= wimg_to_pte(attributes
);
12628 #if (__ARM_VMSA__ > 7)
12629 if (tmplate
& ARM_PTE_HINT_MASK
) {
12630 panic("%s: physical aperture PTE %p has hint bit set, va=%p, pte=0x%llx",
12631 __FUNCTION__
, pte_p
, (void *)kva
, tmplate
);
12634 WRITE_PTE_STRONG(pte_p
, tmplate
);
12635 flush_mmu_tlb_region_asid_async(kva
, PAGE_SIZE
, kernel_pmap
);
12636 tlb_flush_needed
= TRUE
;
12639 pai
= (unsigned int)pa_index(phys
);
12641 pv_h
= pai_to_pvh(pai
);
12643 pte_p
= PT_ENTRY_NULL
;
12644 pve_p
= PV_ENTRY_NULL
;
12645 if (pvh_test_type(pv_h
, PVH_TYPE_PTEP
)) {
12646 pte_p
= pvh_ptep(pv_h
);
12647 } else if (pvh_test_type(pv_h
, PVH_TYPE_PVEP
)) {
12648 pve_p
= pvh_list(pv_h
);
12649 pte_p
= PT_ENTRY_NULL
;
12652 while ((pve_p
!= PV_ENTRY_NULL
) || (pte_p
!= PT_ENTRY_NULL
)) {
12653 vm_map_address_t va
;
12656 if (pve_p
!= PV_ENTRY_NULL
) {
12657 pte_p
= pve_get_ptep(pve_p
);
12659 #ifdef PVH_FLAG_IOMMU
12660 if ((vm_offset_t
)pte_p
& PVH_FLAG_IOMMU
) {
12661 goto cache_skip_pve
;
12664 pmap
= ptep_get_pmap(pte_p
);
12665 va
= ptep_get_va(pte_p
);
12668 tmplate
&= ~(ARM_PTE_ATTRINDXMASK
| ARM_PTE_SHMASK
);
12669 tmplate
|= pmap_get_pt_ops(pmap
)->wimg_to_pte(attributes
);
12671 WRITE_PTE_STRONG(pte_p
, tmplate
);
12672 pmap_get_pt_ops(pmap
)->flush_tlb_region_async(va
,
12673 pt_attr_page_size(pmap_get_pt_attr(pmap
)) * PAGE_RATIO
, pmap
);
12674 tlb_flush_needed
= TRUE
;
12676 #ifdef PVH_FLAG_IOMMU
12679 pte_p
= PT_ENTRY_NULL
;
12680 if (pve_p
!= PV_ENTRY_NULL
) {
12681 pve_p
= PVE_NEXT_PTR(pve_next(pve_p
));
12684 if (tlb_flush_needed
) {
12685 /* For targets that distinguish between mild and strong DSB, mild DSB
12686 * will not drain the prefetcher. This can lead to prefetch-driven
12687 * cache fills that defeat the uncacheable requirement of the RT memory type.
12688 * In those cases, strong DSB must instead be employed to drain the prefetcher. */
12689 pmap_sync_tlb((attributes
& VM_WIMG_MASK
) == VM_WIMG_RT
);
12692 PMAP_TRACE(2, PMAP_CODE(PMAP__UPDATE_CACHING
) | DBG_FUNC_END
, ppnum
, attributes
);
12695 #if (__ARM_VMSA__ == 7)
12697 pmap_create_sharedpages(vm_map_address_t
*kernel_data_addr
, vm_map_address_t
*kernel_text_addr
,
12698 vm_map_address_t
*user_commpage_addr
)
12703 assert(kernel_data_addr
!= NULL
);
12704 assert(kernel_text_addr
!= NULL
);
12705 assert(user_commpage_addr
!= NULL
);
12707 (void) pmap_pages_alloc_zeroed(&pa
, PAGE_SIZE
, 0);
12709 kr
= pmap_enter(kernel_pmap
, _COMM_PAGE_BASE_ADDRESS
, atop(pa
), VM_PROT_READ
| VM_PROT_WRITE
, VM_PROT_NONE
, VM_WIMG_USE_DEFAULT
, TRUE
);
12710 assert(kr
== KERN_SUCCESS
);
12712 *kernel_data_addr
= phystokv(pa
);
12713 // We don't have PFZ for 32 bit arm, always NULL
12714 *kernel_text_addr
= 0;
12715 *user_commpage_addr
= 0;
12718 #else /* __ARM_VMSA__ == 7 */
12723 vm_address_t address
,
12724 tt_entry_t
template)
12726 tt_entry_t
*ptep
, pte
;
12728 ptep
= pmap_tt3e(pmap
, address
);
12729 if (ptep
== NULL
) {
12730 panic("%s: no ptep?\n", __FUNCTION__
);
12734 pte
= tte_to_pa(pte
) | template;
12735 WRITE_PTE_STRONG(ptep
, pte
);
12738 /* Note absence of non-global bit */
12739 #define PMAP_COMM_PAGE_PTE_TEMPLATE (ARM_PTE_TYPE_VALID \
12740 | ARM_PTE_ATTRINDX(CACHE_ATTRINDX_WRITEBACK) \
12741 | ARM_PTE_SH(SH_INNER_MEMORY) | ARM_PTE_NX \
12742 | ARM_PTE_PNX | ARM_PTE_AP(AP_RORO) | ARM_PTE_AF)
12744 /* Note absence of non-global bit and no-execute bit. */
12745 #define PMAP_COMM_PAGE_TEXT_PTE_TEMPLATE (ARM_PTE_TYPE_VALID \
12746 | ARM_PTE_ATTRINDX(CACHE_ATTRINDX_WRITEBACK) \
12747 | ARM_PTE_SH(SH_INNER_MEMORY) | ARM_PTE_PNX \
12748 | ARM_PTE_AP(AP_RORO) | ARM_PTE_AF)
12751 pmap_create_sharedpages(vm_map_address_t
*kernel_data_addr
, vm_map_address_t
*kernel_text_addr
,
12752 vm_map_address_t
*user_text_addr
)
12755 pmap_paddr_t data_pa
= 0; // data address
12756 pmap_paddr_t text_pa
= 0; // text address
12758 *kernel_data_addr
= 0;
12759 *kernel_text_addr
= 0;
12760 *user_text_addr
= 0;
12763 data_pa
= pmap_alloc_page_for_kern(0);
12765 memset((char *) phystokv(data_pa
), 0, PAGE_SIZE
);
12767 text_pa
= pmap_alloc_page_for_kern(0);
12769 memset((char *) phystokv(text_pa
), 0, PAGE_SIZE
);
12772 #else /* XNU_MONITOR */
12773 (void) pmap_pages_alloc_zeroed(&data_pa
, PAGE_SIZE
, 0);
12775 (void) pmap_pages_alloc_zeroed(&text_pa
, PAGE_SIZE
, 0);
12778 #endif /* XNU_MONITOR */
12780 #ifdef CONFIG_XNUPOST
12782 * The kernel pmap maintains a user accessible mapping of the commpage
12785 kr
= pmap_enter(kernel_pmap
, _COMM_HIGH_PAGE64_BASE_ADDRESS
, (ppnum_t
)atop(data_pa
), VM_PROT_READ
, VM_PROT_NONE
, VM_WIMG_USE_DEFAULT
, TRUE
);
12786 assert(kr
== KERN_SUCCESS
);
12789 * This mapping should not be global (as we only expect to reference it
12792 pmap_update_tt3e(kernel_pmap
, _COMM_HIGH_PAGE64_BASE_ADDRESS
, PMAP_COMM_PAGE_PTE_TEMPLATE
| ARM_PTE_NG
);
12795 kasan_map_shadow(_COMM_HIGH_PAGE64_BASE_ADDRESS
, PAGE_SIZE
, true);
12797 #endif /* CONFIG_XNUPOST */
12800 * In order to avoid burning extra pages on mapping the shared page, we
12801 * create a dedicated pmap for the shared page. We forcibly nest the
12802 * translation tables from this pmap into other pmaps. The level we
12803 * will nest at depends on the MMU configuration (page size, TTBR range,
12804 * etc). Typically, this is at L1 for 4K tasks and L2 for 16K tasks.
12806 * Note that this is NOT "the nested pmap" (which is used to nest the
12809 * Note that we update parameters of the entry for our unique needs (NG
12812 sharedpage_pmap_default
= pmap_create_options(NULL
, 0x0, 0);
12813 assert(sharedpage_pmap_default
!= NULL
);
12815 /* The user 64-bit mapping... */
12816 kr
= pmap_enter_addr(sharedpage_pmap_default
, _COMM_PAGE64_BASE_ADDRESS
, data_pa
, VM_PROT_READ
, VM_PROT_NONE
, VM_WIMG_USE_DEFAULT
, TRUE
);
12817 assert(kr
== KERN_SUCCESS
);
12818 pmap_update_tt3e(sharedpage_pmap_default
, _COMM_PAGE64_BASE_ADDRESS
, PMAP_COMM_PAGE_PTE_TEMPLATE
);
12820 /* User mapping of comm page text section for 64 bit mapping only
12822 * We don't insert it into the 32 bit mapping because we don't want 32 bit
12823 * user processes to get this page mapped in, they should never call into
12826 * The data comm page is in a pre-reserved L3 VA range and the text commpage
12827 * is slid in the same L3 as the data commpage. It is either outside the
12828 * max of user VA or is pre-reserved in the vm_map_exec(). This means that
12829 * it is reserved and unavailable to mach VM for future mappings.
12831 const pt_attr_t
* const pt_attr
= pmap_get_pt_attr(sharedpage_pmap_default
);
12832 int num_ptes
= pt_attr_leaf_size(pt_attr
) >> PTE_SHIFT
;
12834 vm_map_address_t commpage_text_va
= 0;
12837 int text_leaf_index
= random() % num_ptes
;
12839 // Generate a VA for the commpage text with the same root and twig index as data
12840 // comm page, but with new leaf index we've just generated.
12841 commpage_text_va
= (_COMM_PAGE64_BASE_ADDRESS
& ~pt_attr_leaf_index_mask(pt_attr
));
12842 commpage_text_va
|= (text_leaf_index
<< pt_attr_leaf_shift(pt_attr
));
12843 } while (commpage_text_va
== _COMM_PAGE64_BASE_ADDRESS
); // Try again if we collide (should be unlikely)
12845 // Assert that this is empty
12846 __assert_only pt_entry_t
*ptep
= pmap_pte(sharedpage_pmap_default
, commpage_text_va
);
12847 assert(ptep
!= PT_ENTRY_NULL
);
12848 assert(*ptep
== ARM_TTE_EMPTY
);
12850 // At this point, we've found the address we want to insert our comm page at
12851 kr
= pmap_enter_addr(sharedpage_pmap_default
, commpage_text_va
, text_pa
, VM_PROT_READ
| VM_PROT_EXECUTE
, VM_PROT_NONE
, VM_WIMG_USE_DEFAULT
, TRUE
);
12852 assert(kr
== KERN_SUCCESS
);
12853 // Mark it as global page R/X so that it doesn't get thrown out on tlb flush
12854 pmap_update_tt3e(sharedpage_pmap_default
, commpage_text_va
, PMAP_COMM_PAGE_TEXT_PTE_TEMPLATE
);
12856 *user_text_addr
= commpage_text_va
;
12859 /* ...and the user 32-bit mapping. */
12860 kr
= pmap_enter_addr(sharedpage_pmap_default
, _COMM_PAGE32_BASE_ADDRESS
, data_pa
, VM_PROT_READ
, VM_PROT_NONE
, VM_WIMG_USE_DEFAULT
, TRUE
);
12861 assert(kr
== KERN_SUCCESS
);
12862 pmap_update_tt3e(sharedpage_pmap_default
, _COMM_PAGE32_BASE_ADDRESS
, PMAP_COMM_PAGE_PTE_TEMPLATE
);
12864 #if __ARM_MIXED_PAGE_SIZE__
12866 * To handle 4K tasks a new view/pmap of the shared page is needed. These are a
12867 * new set of page tables that point to the exact same 16K shared page as
12868 * before. Only the first 4K of the 16K shared page is mapped since that's
12869 * the only part that contains relevant data.
12871 sharedpage_pmap_4k
= pmap_create_options(NULL
, 0x0, PMAP_CREATE_FORCE_4K_PAGES
);
12872 assert(sharedpage_pmap_4k
!= NULL
);
12874 /* The user 64-bit mapping... */
12875 kr
= pmap_enter_addr(sharedpage_pmap_4k
, _COMM_PAGE64_BASE_ADDRESS
, data_pa
, VM_PROT_READ
, VM_PROT_NONE
, VM_WIMG_USE_DEFAULT
, TRUE
);
12876 assert(kr
== KERN_SUCCESS
);
12877 pmap_update_tt3e(sharedpage_pmap_4k
, _COMM_PAGE64_BASE_ADDRESS
, PMAP_COMM_PAGE_PTE_TEMPLATE
);
12879 /* ...and the user 32-bit mapping. */
12880 kr
= pmap_enter_addr(sharedpage_pmap_4k
, _COMM_PAGE32_BASE_ADDRESS
, data_pa
, VM_PROT_READ
, VM_PROT_NONE
, VM_WIMG_USE_DEFAULT
, TRUE
);
12881 assert(kr
== KERN_SUCCESS
);
12882 pmap_update_tt3e(sharedpage_pmap_4k
, _COMM_PAGE32_BASE_ADDRESS
, PMAP_COMM_PAGE_PTE_TEMPLATE
);
12886 /* For manipulation in kernel, go straight to physical page */
12887 *kernel_data_addr
= phystokv(data_pa
);
12888 *kernel_text_addr
= (text_pa
) ? phystokv(text_pa
) : 0;
12895 * Asserts to ensure that the TTEs we nest to map the shared page do not overlap
12896 * with user controlled TTEs for regions that aren't explicitly reserved by the
12897 * VM (e.g., _COMM_PAGE64_NESTING_START/_COMM_PAGE64_BASE_ADDRESS).
12899 #if (ARM_PGSHIFT == 14)
12900 static_assert((_COMM_PAGE32_BASE_ADDRESS
& ~ARM_TT_L2_OFFMASK
) >= VM_MAX_ADDRESS
);
12901 #elif (ARM_PGSHIFT == 12)
12902 static_assert((_COMM_PAGE32_BASE_ADDRESS
& ~ARM_TT_L1_OFFMASK
) >= VM_MAX_ADDRESS
);
12904 #error Nested shared page mapping is unsupported on this config
12907 MARK_AS_PMAP_TEXT
static kern_return_t
12908 pmap_insert_sharedpage_internal(
12911 kern_return_t kr
= KERN_SUCCESS
;
12912 vm_offset_t sharedpage_vaddr
;
12913 pt_entry_t
*ttep
, *src_ttep
;
12915 pmap_t sharedpage_pmap
= sharedpage_pmap_default
;
12917 const pt_attr_t
* const pt_attr
= pmap_get_pt_attr(pmap
);
12918 const unsigned int sharedpage_level
= pt_attr_sharedpage_level(pt_attr
);
12920 #if __ARM_MIXED_PAGE_SIZE__
12921 #if !__ARM_16K_PG__
12922 /* The following code assumes that sharedpage_pmap_default is a 16KB pmap. */
12923 #error "pmap_insert_sharedpage_internal requires a 16KB default kernel page size when __ARM_MIXED_PAGE_SIZE__ is enabled"
12924 #endif /* !__ARM_16K_PG__ */
12926 /* Choose the correct shared page pmap to use. */
12927 const uint64_t pmap_page_size
= pt_attr_page_size(pt_attr
);
12928 if (pmap_page_size
== 16384) {
12929 sharedpage_pmap
= sharedpage_pmap_default
;
12930 } else if (pmap_page_size
== 4096) {
12931 sharedpage_pmap
= sharedpage_pmap_4k
;
12933 panic("No shared page pmap exists for the wanted page size: %llu", pmap_page_size
);
12935 #endif /* __ARM_MIXED_PAGE_SIZE__ */
12937 VALIDATE_PMAP(pmap
);
12939 options
|= PMAP_OPTIONS_NOWAIT
;
12940 #endif /* XNU_MONITOR */
12942 #if _COMM_PAGE_AREA_LENGTH != PAGE_SIZE
12943 #error We assume a single page.
12946 if (pmap_is_64bit(pmap
)) {
12947 sharedpage_vaddr
= _COMM_PAGE64_BASE_ADDRESS
;
12949 sharedpage_vaddr
= _COMM_PAGE32_BASE_ADDRESS
;
12956 * For 4KB pages, we either "nest" at the level one page table (1GB) or level
12957 * two (2MB) depending on the address space layout. For 16KB pages, each level
12958 * one entry is 64GB, so we must go to the second level entry (32MB) in order
12961 * Note: This is not "nesting" in the shared cache sense. This definition of
12962 * nesting just means inserting pointers to pre-allocated tables inside of
12963 * the passed in pmap to allow us to share page tables (which map the shared
12964 * page) for every task. This saves at least one page of memory per process
12965 * compared to creating new page tables in every process for mapping the
12970 * Allocate the twig page tables if needed, and slam a pointer to the shared
12971 * page's tables into place.
12973 while ((ttep
= pmap_ttne(pmap
, sharedpage_level
, sharedpage_vaddr
)) == TT_ENTRY_NULL
) {
12976 kr
= pmap_expand(pmap
, sharedpage_vaddr
, options
, sharedpage_level
);
12978 if (kr
!= KERN_SUCCESS
) {
12980 if (kr
== KERN_RESOURCE_SHORTAGE
) {
12985 panic("Failed to pmap_expand for commpage, pmap=%p", pmap
);
12992 if (*ttep
!= ARM_PTE_EMPTY
) {
12993 panic("%s: Found something mapped at the commpage address?!", __FUNCTION__
);
12996 src_ttep
= pmap_ttne(sharedpage_pmap
, sharedpage_level
, sharedpage_vaddr
);
12999 FLUSH_PTE_STRONG(ttep
);
13007 pmap_unmap_sharedpage(
13011 vm_offset_t sharedpage_vaddr
;
13012 pmap_t sharedpage_pmap
= sharedpage_pmap_default
;
13014 const pt_attr_t
* const pt_attr
= pmap_get_pt_attr(pmap
);
13015 const unsigned int sharedpage_level
= pt_attr_sharedpage_level(pt_attr
);
13017 #if __ARM_MIXED_PAGE_SIZE__
13018 #if !__ARM_16K_PG__
13019 /* The following code assumes that sharedpage_pmap_default is a 16KB pmap. */
13020 #error "pmap_unmap_sharedpage requires a 16KB default kernel page size when __ARM_MIXED_PAGE_SIZE__ is enabled"
13021 #endif /* !__ARM_16K_PG__ */
13023 /* Choose the correct shared page pmap to use. */
13024 const uint64_t pmap_page_size
= pt_attr_page_size(pt_attr
);
13025 if (pmap_page_size
== 16384) {
13026 sharedpage_pmap
= sharedpage_pmap_default
;
13027 } else if (pmap_page_size
== 4096) {
13028 sharedpage_pmap
= sharedpage_pmap_4k
;
13030 panic("No shared page pmap exists for the wanted page size: %llu", pmap_page_size
);
13032 #endif /* __ARM_MIXED_PAGE_SIZE__ */
13034 #if _COMM_PAGE_AREA_LENGTH != PAGE_SIZE
13035 #error We assume a single page.
13038 if (pmap_is_64bit(pmap
)) {
13039 sharedpage_vaddr
= _COMM_PAGE64_BASE_ADDRESS
;
13041 sharedpage_vaddr
= _COMM_PAGE32_BASE_ADDRESS
;
13045 ttep
= pmap_ttne(pmap
, sharedpage_level
, sharedpage_vaddr
);
13047 if (ttep
== NULL
) {
13051 /* It had better be mapped to the shared page. */
13052 if (*ttep
!= ARM_TTE_EMPTY
&& *ttep
!= *pmap_ttne(sharedpage_pmap
, sharedpage_level
, sharedpage_vaddr
)) {
13053 panic("%s: Something other than commpage mapped in shared page slot?", __FUNCTION__
);
13056 *ttep
= ARM_TTE_EMPTY
;
13057 FLUSH_PTE_STRONG(ttep
);
13059 flush_mmu_tlb_region_asid_async(sharedpage_vaddr
, PAGE_SIZE
, pmap
);
13064 pmap_insert_sharedpage(
13068 kern_return_t kr
= KERN_FAILURE
;
13070 while ((kr
= pmap_insert_sharedpage_ppl(pmap
)) == KERN_RESOURCE_SHORTAGE
) {
13071 pmap_alloc_page_for_ppl(0);
13074 pmap_ledger_check_balance(pmap
);
13076 if (kr
!= KERN_SUCCESS
) {
13077 panic("%s: failed to insert the shared page, kr=%d, "
13083 pmap_insert_sharedpage_internal(pmap
);
13091 return pmap
->is_64bit
;
13096 pmap_t pmap __unused
)
13103 /* ARMTODO -- an implementation that accounts for
13104 * holes in the physical map, if any.
13110 return pa_valid(ptoa(pn
));
13114 pmap_bootloader_page(
13117 pmap_paddr_t paddr
= ptoa(pn
);
13119 if (pa_valid(paddr
)) {
13122 pmap_io_range_t
*io_rgn
= pmap_find_io_attr(paddr
);
13123 return (io_rgn
!= NULL
) && (io_rgn
->wimg
& PMAP_IO_RANGE_CARVEOUT
);
13126 MARK_AS_PMAP_TEXT
static boolean_t
13127 pmap_is_empty_internal(
13129 vm_map_offset_t va_start
,
13130 vm_map_offset_t va_end
)
13132 vm_map_offset_t block_start
, block_end
;
13135 if (pmap
== NULL
) {
13139 VALIDATE_PMAP(pmap
);
13141 __unused
const pt_attr_t
* const pt_attr
= pmap_get_pt_attr(pmap
);
13142 unsigned int initial_not_in_kdp
= not_in_kdp
;
13144 if ((pmap
!= kernel_pmap
) && (initial_not_in_kdp
)) {
13145 pmap_lock_ro(pmap
);
13148 #if (__ARM_VMSA__ == 7)
13149 if (tte_index(pmap
, pt_attr
, va_end
) >= pmap
->tte_index_max
) {
13150 if ((pmap
!= kernel_pmap
) && (initial_not_in_kdp
)) {
13151 pmap_unlock_ro(pmap
);
13157 /* TODO: This will be faster if we increment ttep at each level. */
13158 block_start
= va_start
;
13160 while (block_start
< va_end
) {
13161 pt_entry_t
*bpte_p
, *epte_p
;
13164 block_end
= (block_start
+ pt_attr_twig_size(pt_attr
)) & ~pt_attr_twig_offmask(pt_attr
);
13165 if (block_end
> va_end
) {
13166 block_end
= va_end
;
13169 tte_p
= pmap_tte(pmap
, block_start
);
13170 if ((tte_p
!= PT_ENTRY_NULL
)
13171 && ((*tte_p
& ARM_TTE_TYPE_MASK
) == ARM_TTE_TYPE_TABLE
)) {
13172 pte_p
= (pt_entry_t
*) ttetokv(*tte_p
);
13173 bpte_p
= &pte_p
[pte_index(pmap
, pt_attr
, block_start
)];
13174 epte_p
= &pte_p
[pte_index(pmap
, pt_attr
, block_end
)];
13176 for (pte_p
= bpte_p
; pte_p
< epte_p
; pte_p
++) {
13177 if (*pte_p
!= ARM_PTE_EMPTY
) {
13178 if ((pmap
!= kernel_pmap
) && (initial_not_in_kdp
)) {
13179 pmap_unlock_ro(pmap
);
13185 block_start
= block_end
;
13188 if ((pmap
!= kernel_pmap
) && (initial_not_in_kdp
)) {
13189 pmap_unlock_ro(pmap
);
13198 vm_map_offset_t va_start
,
13199 vm_map_offset_t va_end
)
13202 return pmap_is_empty_ppl(pmap
, va_start
, va_end
);
13204 return pmap_is_empty_internal(pmap
, va_start
, va_end
);
13211 unsigned int option
)
13213 return (is64
) ? pmap_max_64bit_offset(option
) : pmap_max_32bit_offset(option
);
13217 pmap_max_64bit_offset(
13218 __unused
unsigned int option
)
13220 vm_map_offset_t max_offset_ret
= 0;
13222 #if defined(__arm64__)
13223 #define ARM64_MIN_MAX_ADDRESS (SHARED_REGION_BASE_ARM64 + SHARED_REGION_SIZE_ARM64 + 0x20000000) // end of shared region + 512MB for various purposes
13224 _Static_assert((ARM64_MIN_MAX_ADDRESS
> SHARED_REGION_BASE_ARM64
) && (ARM64_MIN_MAX_ADDRESS
<= MACH_VM_MAX_ADDRESS
),
13225 "Minimum address space size outside allowable range");
13226 const vm_map_offset_t min_max_offset
= ARM64_MIN_MAX_ADDRESS
; // end of shared region + 512MB for various purposes
13227 if (option
== ARM_PMAP_MAX_OFFSET_DEFAULT
) {
13228 max_offset_ret
= arm64_pmap_max_offset_default
;
13229 } else if (option
== ARM_PMAP_MAX_OFFSET_MIN
) {
13230 max_offset_ret
= min_max_offset
;
13231 } else if (option
== ARM_PMAP_MAX_OFFSET_MAX
) {
13232 max_offset_ret
= MACH_VM_MAX_ADDRESS
;
13233 } else if (option
== ARM_PMAP_MAX_OFFSET_DEVICE
) {
13234 if (arm64_pmap_max_offset_default
) {
13235 max_offset_ret
= arm64_pmap_max_offset_default
;
13236 } else if (max_mem
> 0xC0000000) {
13237 max_offset_ret
= min_max_offset
+ 0x138000000; // Max offset is 13.375GB for devices with > 3GB of memory
13238 } else if (max_mem
> 0x40000000) {
13239 max_offset_ret
= min_max_offset
+ 0x38000000; // Max offset is 9.375GB for devices with > 1GB and <= 3GB of memory
13241 max_offset_ret
= min_max_offset
;
13243 } else if (option
== ARM_PMAP_MAX_OFFSET_JUMBO
) {
13244 if (arm64_pmap_max_offset_default
) {
13245 // Allow the boot-arg to override jumbo size
13246 max_offset_ret
= arm64_pmap_max_offset_default
;
13248 max_offset_ret
= MACH_VM_MAX_ADDRESS
; // Max offset is 64GB for pmaps with special "jumbo" blessing
13251 panic("pmap_max_64bit_offset illegal option 0x%x\n", option
);
13254 assert(max_offset_ret
<= MACH_VM_MAX_ADDRESS
);
13255 assert(max_offset_ret
>= min_max_offset
);
13257 panic("Can't run pmap_max_64bit_offset on non-64bit architectures\n");
13260 return max_offset_ret
;
13264 pmap_max_32bit_offset(
13265 unsigned int option
)
13267 vm_map_offset_t max_offset_ret
= 0;
13269 if (option
== ARM_PMAP_MAX_OFFSET_DEFAULT
) {
13270 max_offset_ret
= arm_pmap_max_offset_default
;
13271 } else if (option
== ARM_PMAP_MAX_OFFSET_MIN
) {
13272 max_offset_ret
= 0x80000000;
13273 } else if (option
== ARM_PMAP_MAX_OFFSET_MAX
) {
13274 max_offset_ret
= VM_MAX_ADDRESS
;
13275 } else if (option
== ARM_PMAP_MAX_OFFSET_DEVICE
) {
13276 if (arm_pmap_max_offset_default
) {
13277 max_offset_ret
= arm_pmap_max_offset_default
;
13278 } else if (max_mem
> 0x20000000) {
13279 max_offset_ret
= 0x80000000;
13281 max_offset_ret
= 0x80000000;
13283 } else if (option
== ARM_PMAP_MAX_OFFSET_JUMBO
) {
13284 max_offset_ret
= 0x80000000;
13286 panic("pmap_max_32bit_offset illegal option 0x%x\n", option
);
13289 assert(max_offset_ret
<= MACH_VM_MAX_ADDRESS
);
13290 return max_offset_ret
;
13295 * Constrain DTrace copyin/copyout actions
13297 extern kern_return_t
dtrace_copyio_preflight(addr64_t
);
13298 extern kern_return_t
dtrace_copyio_postflight(addr64_t
);
13301 dtrace_copyio_preflight(
13302 __unused addr64_t va
)
13304 if (current_map() == kernel_map
) {
13305 return KERN_FAILURE
;
13307 return KERN_SUCCESS
;
13312 dtrace_copyio_postflight(
13313 __unused addr64_t va
)
13315 return KERN_SUCCESS
;
13317 #endif /* CONFIG_DTRACE */
13321 pmap_flush_context_init(__unused pmap_flush_context
*pfc
)
13328 __unused pmap_flush_context
*cpus_to_flush
)
13330 /* not implemented yet */
13337 * Enforce that the address range described by kva and nbytes is not currently
13338 * PPL-owned, and won't become PPL-owned while pinned. This is to prevent
13339 * unintentionally writing to PPL-owned memory.
13342 pmap_pin_kernel_pages(vm_offset_t kva
, size_t nbytes
)
13345 if (os_add_overflow(kva
, nbytes
, &end
)) {
13346 panic("%s(%p, 0x%llx): overflow", __func__
, (void*)kva
, (uint64_t)nbytes
);
13348 for (vm_offset_t ckva
= kva
; ckva
< end
; ckva
= round_page(ckva
+ 1)) {
13349 pmap_paddr_t pa
= kvtophys(ckva
);
13350 if (!pa_valid(pa
)) {
13351 panic("%s(%p): invalid physical page 0x%llx", __func__
, (void*)kva
, (uint64_t)pa
);
13354 unsigned int pai
= (unsigned int)pa_index(pa
);
13355 if (ckva
== phystokv(pa
)) {
13356 panic("%s(%p): attempt to pin static mapping for page 0x%llx", __func__
, (void*)kva
, (uint64_t)pa
);
13359 attr
= pp_attr_table
[pai
] & ~PP_ATTR_NO_MONITOR
;
13360 if (attr
& PP_ATTR_MONITOR
) {
13361 panic("%s(%p): physical page 0x%llx belongs to PPL", __func__
, (void*)kva
, (uint64_t)pa
);
13363 } while (!OSCompareAndSwap16(attr
, attr
| PP_ATTR_NO_MONITOR
, &pp_attr_table
[pai
]));
13368 pmap_unpin_kernel_pages(vm_offset_t kva
, size_t nbytes
)
13371 if (os_add_overflow(kva
, nbytes
, &end
)) {
13372 panic("%s(%p, 0x%llx): overflow", __func__
, (void*)kva
, (uint64_t)nbytes
);
13374 for (vm_offset_t ckva
= kva
; ckva
< end
; ckva
= round_page(ckva
+ 1)) {
13375 pmap_paddr_t pa
= kvtophys(ckva
);
13376 if (!pa_valid(pa
)) {
13377 panic("%s(%p): invalid physical page 0x%llx", __func__
, (void*)kva
, (uint64_t)pa
);
13379 if (!(pp_attr_table
[pa_index(pa
)] & PP_ATTR_NO_MONITOR
)) {
13380 panic("%s(%p): physical page 0x%llx not pinned", __func__
, (void*)kva
, (uint64_t)pa
);
13382 assert(!(pp_attr_table
[pa_index(pa
)] & PP_ATTR_MONITOR
));
13383 pa_clear_no_monitor(pa
);
13388 * Lock down a page, making all mappings read-only, and preventing
13389 * further mappings or removal of this particular kva's mapping.
13390 * Effectively, it makes the page at kva immutable.
13392 MARK_AS_PMAP_TEXT
static void
13393 pmap_ppl_lockdown_page(vm_address_t kva
)
13395 pmap_paddr_t pa
= kvtophys(kva
);
13396 unsigned int pai
= (unsigned int)pa_index(pa
);
13398 pv_entry_t
**pv_h
= pai_to_pvh(pai
);
13400 if (__improbable(pa_test_monitor(pa
))) {
13401 panic("%#lx: page %llx belongs to PPL", kva
, pa
);
13404 if (__improbable(pvh_get_flags(pv_h
) & (PVH_FLAG_LOCKDOWN
| PVH_FLAG_EXEC
))) {
13405 panic("%#lx: already locked down/executable (%#llx)", kva
, pvh_get_flags(pv_h
));
13408 pt_entry_t
*pte_p
= pmap_pte(kernel_pmap
, kva
);
13410 if (pte_p
== PT_ENTRY_NULL
) {
13411 panic("%#lx: NULL pte", kva
);
13414 pt_entry_t tmplate
= *pte_p
;
13415 if (__improbable((tmplate
& ARM_PTE_APMASK
) != ARM_PTE_AP(AP_RWNA
))) {
13416 panic("%#lx: not a kernel r/w page (%#llx)", kva
, tmplate
& ARM_PTE_APMASK
);
13419 pvh_set_flags(pv_h
, pvh_get_flags(pv_h
) | PVH_FLAG_LOCKDOWN
);
13421 pmap_set_ptov_ap(pai
, AP_RONA
, FALSE
);
13425 pmap_page_protect_options_internal((ppnum_t
)atop(pa
), VM_PROT_READ
, 0);
13429 * Release a page from being locked down to the PPL, making it writable
13430 * to the kernel once again.
13432 MARK_AS_PMAP_TEXT
static void
13433 pmap_ppl_unlockdown_page(vm_address_t kva
)
13435 pmap_paddr_t pa
= kvtophys(kva
);
13436 unsigned int pai
= (unsigned int)pa_index(pa
);
13438 pv_entry_t
**pv_h
= pai_to_pvh(pai
);
13440 vm_offset_t pvh_flags
= pvh_get_flags(pv_h
);
13442 if (__improbable(!(pvh_flags
& PVH_FLAG_LOCKDOWN
))) {
13443 panic("unlockdown attempt on not locked down virtual %#lx/pai %d", kva
, pai
);
13446 pvh_set_flags(pv_h
, pvh_flags
& ~PVH_FLAG_LOCKDOWN
);
13447 pmap_set_ptov_ap(pai
, AP_RWNA
, FALSE
);
13451 #else /* XNU_MONITOR */
13453 static void __unused
13454 pmap_pin_kernel_pages(vm_offset_t kva __unused
, size_t nbytes __unused
)
13458 static void __unused
13459 pmap_unpin_kernel_pages(vm_offset_t kva __unused
, size_t nbytes __unused
)
13463 #endif /* !XNU_MONITOR */
13466 #define PMAP_RESIDENT_INVALID ((mach_vm_size_t)-1)
13468 MARK_AS_PMAP_TEXT
static mach_vm_size_t
13469 pmap_query_resident_internal(
13471 vm_map_address_t start
,
13472 vm_map_address_t end
,
13473 mach_vm_size_t
*compressed_bytes_p
)
13475 mach_vm_size_t resident_bytes
= 0;
13476 mach_vm_size_t compressed_bytes
= 0;
13478 pt_entry_t
*bpte
, *epte
;
13482 if (pmap
== NULL
) {
13483 return PMAP_RESIDENT_INVALID
;
13486 VALIDATE_PMAP(pmap
);
13488 const pt_attr_t
* const pt_attr
= pmap_get_pt_attr(pmap
);
13490 /* Ensure that this request is valid, and addresses exactly one TTE. */
13491 if (__improbable((start
% pt_attr_page_size(pt_attr
)) ||
13492 (end
% pt_attr_page_size(pt_attr
)))) {
13493 panic("%s: address range %p, %p not page-aligned to 0x%llx", __func__
, (void*)start
, (void*)end
, pt_attr_page_size(pt_attr
));
13496 if (__improbable((end
< start
) || (end
> ((start
+ pt_attr_twig_size(pt_attr
)) & ~pt_attr_twig_offmask(pt_attr
))))) {
13497 panic("%s: invalid address range %p, %p", __func__
, (void*)start
, (void*)end
);
13500 pmap_lock_ro(pmap
);
13501 tte_p
= pmap_tte(pmap
, start
);
13502 if (tte_p
== (tt_entry_t
*) NULL
) {
13503 pmap_unlock_ro(pmap
);
13504 return PMAP_RESIDENT_INVALID
;
13506 if ((*tte_p
& ARM_TTE_TYPE_MASK
) == ARM_TTE_TYPE_TABLE
) {
13507 pte_p
= (pt_entry_t
*) ttetokv(*tte_p
);
13508 bpte
= &pte_p
[pte_index(pmap
, pt_attr
, start
)];
13509 epte
= &pte_p
[pte_index(pmap
, pt_attr
, end
)];
13511 for (; bpte
< epte
; bpte
++) {
13512 if (ARM_PTE_IS_COMPRESSED(*bpte
, bpte
)) {
13513 compressed_bytes
+= pt_attr_page_size(pt_attr
);
13514 } else if (pa_valid(pte_to_pa(*bpte
))) {
13515 resident_bytes
+= pt_attr_page_size(pt_attr
);
13519 pmap_unlock_ro(pmap
);
13521 if (compressed_bytes_p
) {
13522 pmap_pin_kernel_pages((vm_offset_t
)compressed_bytes_p
, sizeof(*compressed_bytes_p
));
13523 *compressed_bytes_p
+= compressed_bytes
;
13524 pmap_unpin_kernel_pages((vm_offset_t
)compressed_bytes_p
, sizeof(*compressed_bytes_p
));
13527 return resident_bytes
;
13531 pmap_query_resident(
13533 vm_map_address_t start
,
13534 vm_map_address_t end
,
13535 mach_vm_size_t
*compressed_bytes_p
)
13537 mach_vm_size_t total_resident_bytes
;
13538 mach_vm_size_t compressed_bytes
;
13539 vm_map_address_t va
;
13542 if (pmap
== PMAP_NULL
) {
13543 if (compressed_bytes_p
) {
13544 *compressed_bytes_p
= 0;
13549 __unused
const pt_attr_t
* const pt_attr
= pmap_get_pt_attr(pmap
);
13551 total_resident_bytes
= 0;
13552 compressed_bytes
= 0;
13554 PMAP_TRACE(3, PMAP_CODE(PMAP__QUERY_RESIDENT
) | DBG_FUNC_START
,
13555 VM_KERNEL_ADDRHIDE(pmap
), VM_KERNEL_ADDRHIDE(start
),
13556 VM_KERNEL_ADDRHIDE(end
));
13560 vm_map_address_t l
;
13561 mach_vm_size_t resident_bytes
;
13563 l
= ((va
+ pt_attr_twig_size(pt_attr
)) & ~pt_attr_twig_offmask(pt_attr
));
13569 resident_bytes
= pmap_query_resident_ppl(pmap
, va
, l
, compressed_bytes_p
);
13571 resident_bytes
= pmap_query_resident_internal(pmap
, va
, l
, compressed_bytes_p
);
13573 if (resident_bytes
== PMAP_RESIDENT_INVALID
) {
13577 total_resident_bytes
+= resident_bytes
;
13582 if (compressed_bytes_p
) {
13583 *compressed_bytes_p
= compressed_bytes
;
13586 PMAP_TRACE(3, PMAP_CODE(PMAP__QUERY_RESIDENT
) | DBG_FUNC_END
,
13587 total_resident_bytes
);
13589 return total_resident_bytes
;
13594 pmap_check_ledgers(
13600 if (pmap
->pmap_pid
== 0) {
13602 * This pmap was not or is no longer fully associated
13603 * with a task (e.g. the old pmap after a fork()/exec() or
13604 * spawn()). Its "ledger" still points at a task that is
13605 * now using a different (and active) address space, so
13606 * we can't check that all the pmap ledgers are balanced here.
13608 * If the "pid" is set, that means that we went through
13609 * pmap_set_process() in task_terminate_internal(), so
13610 * this task's ledger should not have been re-used and
13611 * all the pmap ledgers should be back to 0.
13616 pid
= pmap
->pmap_pid
;
13617 procname
= pmap
->pmap_procname
;
13619 vm_map_pmap_check_ledgers(pmap
, pmap
->ledger
, pid
, procname
);
13621 PMAP_STATS_ASSERTF(pmap
->stats
.resident_count
== 0, pmap
, "stats.resident_count %d", pmap
->stats
.resident_count
);
13623 PMAP_STATS_ASSERTF(pmap
->stats
.wired_count
== 0, pmap
, "stats.wired_count %d", pmap
->stats
.wired_count
);
13625 PMAP_STATS_ASSERTF(pmap
->stats
.device
== 0, pmap
, "stats.device %d", pmap
->stats
.device
);
13626 PMAP_STATS_ASSERTF(pmap
->stats
.internal
== 0, pmap
, "stats.internal %d", pmap
->stats
.internal
);
13627 PMAP_STATS_ASSERTF(pmap
->stats
.external
== 0, pmap
, "stats.external %d", pmap
->stats
.external
);
13628 PMAP_STATS_ASSERTF(pmap
->stats
.reusable
== 0, pmap
, "stats.reusable %d", pmap
->stats
.reusable
);
13629 PMAP_STATS_ASSERTF(pmap
->stats
.compressed
== 0, pmap
, "stats.compressed %lld", pmap
->stats
.compressed
);
13631 #endif /* MACH_ASSERT */
13634 pmap_advise_pagezero_range(__unused pmap_t p
, __unused
uint64_t a
)
13640 #define PROF_START uint64_t t, nanot;\
13641 t = mach_absolute_time();
13643 #define PROF_END absolutetime_to_nanoseconds(mach_absolute_time()-t, &nanot);\
13644 kprintf("%s: took %llu ns\n", __func__, nanot);
13646 #define PMAP_PGTRACE_LOCK(p) \
13648 *(p) = ml_set_interrupts_enabled(false); \
13649 if (simple_lock_try(&(pmap_pgtrace.lock), LCK_GRP_NULL)) break; \
13650 ml_set_interrupts_enabled(*(p)); \
13653 #define PMAP_PGTRACE_UNLOCK(p) \
13655 simple_unlock(&(pmap_pgtrace.lock)); \
13656 ml_set_interrupts_enabled(*(p)); \
13659 #define PGTRACE_WRITE_PTE(pte_p, pte_entry) \
13661 *(pte_p) = (pte_entry); \
13662 FLUSH_PTE(pte_p); \
13665 #define PGTRACE_MAX_MAP 16 // maximum supported va to same pa
13672 } pmap_pgtrace_page_state_t
;
13675 queue_chain_t chain
;
13679 * maps - list of va maps to upper pa
13680 * map_pool - map pool
13681 * map_waste - waste can
13686 queue_head_t map_pool
;
13687 queue_head_t map_waste
;
13688 pmap_pgtrace_page_state_t state
;
13689 } pmap_pgtrace_page_t
;
13692 queue_chain_t chain
;
13694 vm_map_offset_t va
;
13697 static ZONE_VIEW_DEFINE(ZV_PMAP_VA
, "pmap va",
13698 KHEAP_ID_DEFAULT
, sizeof(pmap_va_t
));
13700 static ZONE_VIEW_DEFINE(ZV_PMAP_PGTRACE
, "pmap pgtrace",
13701 KHEAP_ID_DEFAULT
, sizeof(pmap_pgtrace_page_t
));
13705 * pages - list of tracing page info
13707 queue_head_t pages
;
13708 decl_simple_lock_data(, lock
);
13709 } pmap_pgtrace
= {};
13712 pmap_pgtrace_init(void)
13714 queue_init(&(pmap_pgtrace
.pages
));
13715 simple_lock_init(&(pmap_pgtrace
.lock
), 0);
13719 if (PE_parse_boot_argn("pgtrace", &enabled
, sizeof(enabled
))) {
13720 pgtrace_enabled
= enabled
;
13724 // find a page with given pa - pmap_pgtrace should be locked
13725 inline static pmap_pgtrace_page_t
*
13726 pmap_pgtrace_find_page(pmap_paddr_t pa
)
13728 queue_head_t
*q
= &(pmap_pgtrace
.pages
);
13729 pmap_pgtrace_page_t
*p
;
13731 queue_iterate(q
, p
, pmap_pgtrace_page_t
*, chain
) {
13732 if (p
->state
== UNDEFINED
) {
13735 if (p
->state
== PA_UNDEFINED
) {
13746 // enter clone of given pmap, va page and range - pmap should be locked
13748 pmap_pgtrace_enter_clone(pmap_t pmap
, vm_map_offset_t va_page
, vm_map_offset_t start
, vm_map_offset_t end
)
13751 queue_head_t
*q
= &(pmap_pgtrace
.pages
);
13752 pmap_paddr_t pa_page
;
13753 pt_entry_t
*ptep
, *cptep
;
13754 pmap_pgtrace_page_t
*p
;
13755 bool found
= false;
13757 pmap_assert_locked_w(pmap
);
13758 assert(va_page
== arm_trunc_page(va_page
));
13760 PMAP_PGTRACE_LOCK(&ints
);
13762 ptep
= pmap_pte(pmap
, va_page
);
13764 // target pte should exist
13765 if (!ptep
|| !(*ptep
& ARM_PTE_TYPE_VALID
)) {
13766 PMAP_PGTRACE_UNLOCK(&ints
);
13770 queue_head_t
*mapq
;
13771 queue_head_t
*mappool
;
13772 pmap_pgtrace_map_t
*map
= NULL
;
13774 pa_page
= pte_to_pa(*ptep
);
13776 // find if we have a page info defined for this
13777 queue_iterate(q
, p
, pmap_pgtrace_page_t
*, chain
) {
13779 mappool
= &(p
->map_pool
);
13781 switch (p
->state
) {
13783 queue_iterate(mapq
, map
, pmap_pgtrace_map_t
*, chain
) {
13784 if (map
->cloned
== false && map
->pmap
== pmap
&& map
->ova
== va_page
) {
13786 map
->range
.start
= start
;
13787 map
->range
.end
= end
;
13795 if (p
->pa
!= pa_page
) {
13798 queue_iterate(mapq
, map
, pmap_pgtrace_map_t
*, chain
) {
13799 if (map
->cloned
== false) {
13801 map
->ova
= va_page
;
13802 map
->range
.start
= start
;
13803 map
->range
.end
= end
;
13811 if (p
->pa
!= pa_page
) {
13814 queue_iterate(mapq
, map
, pmap_pgtrace_map_t
*, chain
) {
13815 if (map
->cloned
== true && map
->pmap
== pmap
&& map
->ova
== va_page
) {
13816 kprintf("%s: skip existing mapping at va=%llx\n", __func__
, va_page
);
13818 } else if (map
->cloned
== true && map
->pmap
== kernel_pmap
&& map
->cva
[1] == va_page
) {
13819 kprintf("%s: skip clone mapping at va=%llx\n", __func__
, va_page
);
13821 } else if (map
->cloned
== false && map
->pmap
== pmap
&& map
->ova
== va_page
) {
13822 // range should be already defined as well
13830 panic("invalid state p->state=%x\n", p
->state
);
13833 if (found
== true) {
13838 // do not clone if no page info found
13839 if (found
== false) {
13840 PMAP_PGTRACE_UNLOCK(&ints
);
13844 // copy pre, target and post ptes to clone ptes
13845 for (int i
= 0; i
< 3; i
++) {
13846 ptep
= pmap_pte(pmap
, va_page
+ (i
- 1) * ARM_PGBYTES
);
13847 cptep
= pmap_pte(kernel_pmap
, map
->cva
[i
]);
13848 assert(cptep
!= NULL
);
13849 if (ptep
== NULL
) {
13850 PGTRACE_WRITE_PTE(cptep
, (pt_entry_t
)NULL
);
13852 PGTRACE_WRITE_PTE(cptep
, *ptep
);
13854 PMAP_UPDATE_TLBS(kernel_pmap
, map
->cva
[i
], map
->cva
[i
] + ARM_PGBYTES
, false);
13857 // get ptes for original and clone
13858 ptep
= pmap_pte(pmap
, va_page
);
13859 cptep
= pmap_pte(kernel_pmap
, map
->cva
[1]);
13861 // invalidate original pte and mark it as a pgtrace page
13862 PGTRACE_WRITE_PTE(ptep
, (*ptep
| ARM_PTE_PGTRACE
) & ~ARM_PTE_TYPE_VALID
);
13863 PMAP_UPDATE_TLBS(pmap
, map
->ova
, map
->ova
+ ARM_PGBYTES
, false);
13865 map
->cloned
= true;
13866 p
->state
= DEFINED
;
13868 kprintf("%s: pa_page=%llx va_page=%llx cva[1]=%llx pmap=%p ptep=%p cptep=%p\n", __func__
, pa_page
, va_page
, map
->cva
[1], pmap
, ptep
, cptep
);
13870 PMAP_PGTRACE_UNLOCK(&ints
);
13875 // This function removes trace bit and validate pte if applicable. Pmap must be locked.
13877 pmap_pgtrace_remove_clone(pmap_t pmap
, pmap_paddr_t pa
, vm_map_offset_t va
)
13879 bool ints
, found
= false;
13880 pmap_pgtrace_page_t
*p
;
13883 PMAP_PGTRACE_LOCK(&ints
);
13885 // we must have this page info
13886 p
= pmap_pgtrace_find_page(pa
);
13891 // find matching map
13892 queue_head_t
*mapq
= &(p
->maps
);
13893 queue_head_t
*mappool
= &(p
->map_pool
);
13894 pmap_pgtrace_map_t
*map
;
13896 queue_iterate(mapq
, map
, pmap_pgtrace_map_t
*, chain
) {
13897 if (map
->pmap
== pmap
&& map
->ova
== va
) {
13907 if (map
->cloned
== true) {
13908 // Restore back the pte to original state
13909 ptep
= pmap_pte(pmap
, map
->ova
);
13911 PGTRACE_WRITE_PTE(ptep
, *ptep
| ARM_PTE_TYPE_VALID
);
13912 PMAP_UPDATE_TLBS(pmap
, va
, va
+ ARM_PGBYTES
, false);
13914 // revert clone pages
13915 for (int i
= 0; i
< 3; i
++) {
13916 ptep
= pmap_pte(kernel_pmap
, map
->cva
[i
]);
13917 assert(ptep
!= NULL
);
13918 PGTRACE_WRITE_PTE(ptep
, map
->cva_spte
[i
]);
13919 PMAP_UPDATE_TLBS(kernel_pmap
, map
->cva
[i
], map
->cva
[i
] + ARM_PGBYTES
, false);
13923 queue_remove(mapq
, map
, pmap_pgtrace_map_t
*, chain
);
13925 map
->ova
= (vm_map_offset_t
)NULL
;
13926 map
->cloned
= false;
13927 queue_enter_first(mappool
, map
, pmap_pgtrace_map_t
*, chain
);
13929 kprintf("%s: p=%p pa=%llx va=%llx\n", __func__
, p
, pa
, va
);
13932 PMAP_PGTRACE_UNLOCK(&ints
);
13935 // remove all clones of given pa - pmap must be locked
13937 pmap_pgtrace_remove_all_clone(pmap_paddr_t pa
)
13940 pmap_pgtrace_page_t
*p
;
13943 PMAP_PGTRACE_LOCK(&ints
);
13945 // we must have this page info
13946 p
= pmap_pgtrace_find_page(pa
);
13948 PMAP_PGTRACE_UNLOCK(&ints
);
13952 queue_head_t
*mapq
= &(p
->maps
);
13953 queue_head_t
*mappool
= &(p
->map_pool
);
13954 queue_head_t
*mapwaste
= &(p
->map_waste
);
13955 pmap_pgtrace_map_t
*map
;
13957 // move maps to waste
13958 while (!queue_empty(mapq
)) {
13959 queue_remove_first(mapq
, map
, pmap_pgtrace_map_t
*, chain
);
13960 queue_enter_first(mapwaste
, map
, pmap_pgtrace_map_t
*, chain
);
13963 PMAP_PGTRACE_UNLOCK(&ints
);
13965 // sanitize maps in waste
13966 queue_iterate(mapwaste
, map
, pmap_pgtrace_map_t
*, chain
) {
13967 if (map
->cloned
== true) {
13968 pmap_lock(map
->pmap
);
13970 // restore back original pte
13971 ptep
= pmap_pte(map
->pmap
, map
->ova
);
13973 PGTRACE_WRITE_PTE(ptep
, *ptep
| ARM_PTE_TYPE_VALID
);
13974 PMAP_UPDATE_TLBS(map
->pmap
, map
->ova
, map
->ova
+ ARM_PGBYTES
, false);
13976 // revert clone ptes
13977 for (int i
= 0; i
< 3; i
++) {
13978 ptep
= pmap_pte(kernel_pmap
, map
->cva
[i
]);
13979 assert(ptep
!= NULL
);
13980 PGTRACE_WRITE_PTE(ptep
, map
->cva_spte
[i
]);
13981 PMAP_UPDATE_TLBS(kernel_pmap
, map
->cva
[i
], map
->cva
[i
] + ARM_PGBYTES
, false);
13984 pmap_unlock(map
->pmap
);
13988 map
->ova
= (vm_map_offset_t
)NULL
;
13989 map
->cloned
= false;
13992 PMAP_PGTRACE_LOCK(&ints
);
13994 // recycle maps back to map_pool
13995 while (!queue_empty(mapwaste
)) {
13996 queue_remove_first(mapwaste
, map
, pmap_pgtrace_map_t
*, chain
);
13997 queue_enter_first(mappool
, map
, pmap_pgtrace_map_t
*, chain
);
14000 PMAP_PGTRACE_UNLOCK(&ints
);
14004 pmap_pgtrace_get_search_space(pmap_t pmap
, vm_map_offset_t
*startp
, vm_map_offset_t
*endp
)
14007 vm_map_offset_t end
;
14009 if (pmap
== kernel_pmap
) {
14010 tsz
= (get_tcr() >> TCR_T1SZ_SHIFT
) & TCR_TSZ_MASK
;
14011 *startp
= MAX(VM_MIN_KERNEL_ADDRESS
, (UINT64_MAX
>> (64 - tsz
)) << (64 - tsz
));
14012 *endp
= VM_MAX_KERNEL_ADDRESS
;
14014 tsz
= (get_tcr() >> TCR_T0SZ_SHIFT
) & TCR_TSZ_MASK
;
14018 end
= ((uint64_t)1 << (64 - tsz
)) - 1;
14025 assert(*endp
> *startp
);
14030 // has pa mapped in given pmap? then clone it
14032 pmap_pgtrace_clone_from_pa(pmap_t pmap
, pmap_paddr_t pa
, vm_map_offset_t start_offset
, vm_map_offset_t end_offset
)
14035 vm_map_offset_t min
, max
;
14036 vm_map_offset_t cur_page
, end_page
;
14040 __unused
const pt_attr_t
* const pt_attr
= pmap_get_pt_attr(pmap
);
14042 pmap_pgtrace_get_search_space(pmap
, &min
, &max
);
14044 cur_page
= arm_trunc_page(min
);
14045 end_page
= arm_trunc_page(max
);
14046 while (cur_page
<= end_page
) {
14047 vm_map_offset_t add
= 0;
14051 // skip uninterested space
14052 if (pmap
== kernel_pmap
&&
14053 ((vm_kernel_base
<= cur_page
&& cur_page
< vm_kernel_top
) ||
14054 (vm_kext_base
<= cur_page
&& cur_page
< vm_kext_top
))) {
14056 goto unlock_continue
;
14059 // check whether we can skip l1
14060 ttep
= pmap_tt1e(pmap
, cur_page
);
14063 if ((tte
& (ARM_TTE_TYPE_MASK
| ARM_TTE_VALID
)) != (ARM_TTE_TYPE_TABLE
| ARM_TTE_VALID
)) {
14064 add
= ARM_TT_L1_SIZE
;
14065 goto unlock_continue
;
14069 tte
= ((tt_entry_t
*) phystokv(tte
& ARM_TTE_TABLE_MASK
))[tt2_index(pmap
, pt_attr
, cur_page
)];
14071 if ((tte
& (ARM_TTE_TYPE_MASK
| ARM_TTE_VALID
)) != (ARM_TTE_TYPE_TABLE
| ARM_TTE_VALID
)) {
14072 add
= ARM_TT_L2_SIZE
;
14073 goto unlock_continue
;
14077 ptep
= &(((pt_entry_t
*) phystokv(tte
& ARM_TTE_TABLE_MASK
))[tt3_index(pmap
, pt_attr
, cur_page
)]);
14078 if (ptep
== PT_ENTRY_NULL
) {
14079 add
= ARM_TT_L3_SIZE
;
14080 goto unlock_continue
;
14083 if (arm_trunc_page(pa
) == pte_to_pa(*ptep
)) {
14084 if (pmap_pgtrace_enter_clone(pmap
, cur_page
, start_offset
, end_offset
) == true) {
14095 if (cur_page
+ add
< cur_page
) {
14106 // search pv table and clone vas of given pa
14108 pmap_pgtrace_clone_from_pvtable(pmap_paddr_t pa
, vm_map_offset_t start_offset
, vm_map_offset_t end_offset
)
14116 queue_head_t pmapvaq
;
14119 queue_init(&pmapvaq
);
14121 pai
= pa_index(pa
);
14123 pvh
= pai_to_pvh(pai
);
14125 // collect pmap/va pair from pvh
14126 if (pvh_test_type(pvh
, PVH_TYPE_PTEP
)) {
14127 ptep
= pvh_ptep(pvh
);
14128 pmap
= ptep_get_pmap(ptep
);
14130 pmapva
= (pmap_va_t
*)zalloc(ZV_PMAP_VA
);
14131 pmapva
->pmap
= pmap
;
14132 pmapva
->va
= ptep_get_va(ptep
);
14134 queue_enter_first(&pmapvaq
, pmapva
, pmap_va_t
*, chain
);
14135 } else if (pvh_test_type(pvh
, PVH_TYPE_PVEP
)) {
14138 pvep
= pvh_list(pvh
);
14140 ptep
= pve_get_ptep(pvep
);
14141 pmap
= ptep_get_pmap(ptep
);
14143 pmapva
= (pmap_va_t
*)zalloc(ZV_PMAP_VA
);
14144 pmapva
->pmap
= pmap
;
14145 pmapva
->va
= ptep_get_va(ptep
);
14147 queue_enter_first(&pmapvaq
, pmapva
, pmap_va_t
*, chain
);
14149 pvep
= PVE_NEXT_PTR(pve_next(pvep
));
14155 // clone them while making sure mapping still exists
14156 queue_iterate(&pmapvaq
, pmapva
, pmap_va_t
*, chain
) {
14157 pmap_lock(pmapva
->pmap
);
14158 ptep
= pmap_pte(pmapva
->pmap
, pmapva
->va
);
14159 if (pte_to_pa(*ptep
) == pa
) {
14160 if (pmap_pgtrace_enter_clone(pmapva
->pmap
, pmapva
->va
, start_offset
, end_offset
) == true) {
14164 pmap_unlock(pmapva
->pmap
);
14166 zfree(ZV_PMAP_VA
, pmapva
);
14172 // allocate a page info
14173 static pmap_pgtrace_page_t
*
14174 pmap_pgtrace_alloc_page(void)
14176 pmap_pgtrace_page_t
*p
;
14177 queue_head_t
*mapq
;
14178 queue_head_t
*mappool
;
14179 queue_head_t
*mapwaste
;
14180 pmap_pgtrace_map_t
*map
;
14182 p
= zalloc(ZV_PMAP_PGTRACE
);
14185 p
->state
= UNDEFINED
;
14188 mappool
= &(p
->map_pool
);
14189 mapwaste
= &(p
->map_waste
);
14191 queue_init(mappool
);
14192 queue_init(mapwaste
);
14194 for (int i
= 0; i
< PGTRACE_MAX_MAP
; i
++) {
14195 vm_map_offset_t newcva
;
14198 vm_map_entry_t entry
;
14201 vm_object_reference(kernel_object
);
14202 kr
= vm_map_find_space(kernel_map
, &newcva
, vm_map_round_page(3 * ARM_PGBYTES
, PAGE_MASK
), 0, 0, VM_MAP_KERNEL_FLAGS_NONE
, VM_KERN_MEMORY_DIAG
, &entry
);
14203 if (kr
!= KERN_SUCCESS
) {
14204 panic("%s VM couldn't find any space kr=%d\n", __func__
, kr
);
14206 VME_OBJECT_SET(entry
, kernel_object
);
14207 VME_OFFSET_SET(entry
, newcva
);
14208 vm_map_unlock(kernel_map
);
14210 // fill default clone page info and add to pool
14211 map
= zalloc(ZV_PMAP_PGTRACE
);
14212 for (int j
= 0; j
< 3; j
++) {
14213 vm_map_offset_t addr
= newcva
+ j
* ARM_PGBYTES
;
14215 // pre-expand pmap while preemption enabled
14216 kr
= pmap_expand(kernel_pmap
, addr
, 0, PMAP_TT_L3_LEVEL
);
14217 if (kr
!= KERN_SUCCESS
) {
14218 panic("%s: pmap_expand(kernel_pmap, addr=%llx) returns kr=%d\n", __func__
, addr
, kr
);
14221 cptep
= pmap_pte(kernel_pmap
, addr
);
14222 assert(cptep
!= NULL
);
14224 map
->cva
[j
] = addr
;
14225 map
->cva_spte
[j
] = *cptep
;
14227 map
->range
.start
= map
->range
.end
= 0;
14228 map
->cloned
= false;
14229 queue_enter_first(mappool
, map
, pmap_pgtrace_map_t
*, chain
);
14235 // free a page info
14237 pmap_pgtrace_free_page(pmap_pgtrace_page_t
*p
)
14239 queue_head_t
*mapq
;
14240 queue_head_t
*mappool
;
14241 queue_head_t
*mapwaste
;
14242 pmap_pgtrace_map_t
*map
;
14247 mappool
= &(p
->map_pool
);
14248 mapwaste
= &(p
->map_waste
);
14250 while (!queue_empty(mapq
)) {
14251 queue_remove_first(mapq
, map
, pmap_pgtrace_map_t
*, chain
);
14252 zfree(ZV_PMAP_PGTRACE
, map
);
14255 while (!queue_empty(mappool
)) {
14256 queue_remove_first(mappool
, map
, pmap_pgtrace_map_t
*, chain
);
14257 zfree(ZV_PMAP_PGTRACE
, map
);
14260 while (!queue_empty(mapwaste
)) {
14261 queue_remove_first(mapwaste
, map
, pmap_pgtrace_map_t
*, chain
);
14262 zfree(ZV_PMAP_PGTRACE
, map
);
14265 zfree(ZV_PMAP_PGTRACE
, p
);
14268 // construct page infos with the given address range
14270 pmap_pgtrace_add_page(pmap_t pmap
, vm_map_offset_t start
, vm_map_offset_t end
)
14274 queue_head_t
*q
= &(pmap_pgtrace
.pages
);
14276 vm_map_offset_t cur_page
, end_page
;
14279 kprintf("%s: invalid start=%llx > end=%llx\n", __func__
, start
, end
);
14285 // add each page in given range
14286 cur_page
= arm_trunc_page(start
);
14287 end_page
= arm_trunc_page(end
);
14288 while (cur_page
<= end_page
) {
14289 pmap_paddr_t pa_page
= 0;
14290 uint64_t num_cloned
= 0;
14291 pmap_pgtrace_page_t
*p
= NULL
, *newp
;
14292 bool free_newp
= true;
14293 pmap_pgtrace_page_state_t state
;
14295 // do all allocations outside of spinlocks
14296 newp
= pmap_pgtrace_alloc_page();
14298 // keep lock orders in pmap, kernel_pmap and pgtrace lock
14299 if (pmap
!= NULL
) {
14300 pmap_lock_ro(pmap
);
14302 if (pmap
!= kernel_pmap
) {
14303 pmap_lock_ro(kernel_pmap
);
14306 // addresses are physical if pmap is null
14307 if (pmap
== NULL
) {
14309 pa_page
= cur_page
;
14310 state
= VA_UNDEFINED
;
14312 ptep
= pmap_pte(pmap
, cur_page
);
14313 if (ptep
!= NULL
) {
14314 pa_page
= pte_to_pa(*ptep
);
14317 state
= PA_UNDEFINED
;
14321 // search if we have a page info already
14322 PMAP_PGTRACE_LOCK(&ints
);
14323 if (state
!= PA_UNDEFINED
) {
14324 p
= pmap_pgtrace_find_page(pa_page
);
14327 // add pre-allocated page info if nothing found
14329 queue_enter_first(q
, newp
, pmap_pgtrace_page_t
*, chain
);
14334 // now p points what we want
14337 queue_head_t
*mapq
= &(p
->maps
);
14338 queue_head_t
*mappool
= &(p
->map_pool
);
14339 pmap_pgtrace_map_t
*map
;
14340 vm_map_offset_t start_offset
, end_offset
;
14342 // calculate trace offsets in the page
14343 if (cur_page
> start
) {
14346 start_offset
= start
- cur_page
;
14348 if (cur_page
== end_page
) {
14349 end_offset
= end
- end_page
;
14351 end_offset
= ARM_PGBYTES
- 1;
14354 kprintf("%s: pmap=%p cur_page=%llx ptep=%p state=%d start_offset=%llx end_offset=%llx\n", __func__
, pmap
, cur_page
, ptep
, state
, start_offset
, end_offset
);
14357 assert(!queue_empty(mappool
));
14358 queue_remove_first(mappool
, map
, pmap_pgtrace_map_t
*, chain
);
14359 if (p
->state
== PA_UNDEFINED
) {
14361 map
->ova
= cur_page
;
14362 map
->range
.start
= start_offset
;
14363 map
->range
.end
= end_offset
;
14364 } else if (p
->state
== VA_UNDEFINED
) {
14366 map
->range
.start
= start_offset
;
14367 map
->range
.end
= end_offset
;
14368 } else if (p
->state
== DEFINED
) {
14371 map
->ova
= cur_page
;
14372 map
->range
.start
= start_offset
;
14373 map
->range
.end
= end_offset
;
14375 panic("invalid p->state=%d\n", p
->state
);
14379 map
->cloned
= false;
14380 queue_enter(mapq
, map
, pmap_pgtrace_map_t
*, chain
);
14383 PMAP_PGTRACE_UNLOCK(&ints
);
14384 if (pmap
!= kernel_pmap
) {
14385 pmap_unlock_ro(kernel_pmap
);
14387 if (pmap
!= NULL
) {
14388 pmap_unlock_ro(pmap
);
14392 if (pa_valid(pa_page
)) {
14393 num_cloned
= pmap_pgtrace_clone_from_pvtable(pa_page
, start_offset
, end_offset
);
14395 if (pmap
== NULL
) {
14396 num_cloned
+= pmap_pgtrace_clone_from_pa(kernel_pmap
, pa_page
, start_offset
, end_offset
);
14398 num_cloned
+= pmap_pgtrace_clone_from_pa(pmap
, pa_page
, start_offset
, end_offset
);
14401 // free pre-allocations if we didn't add it to the q
14403 pmap_pgtrace_free_page(newp
);
14406 if (num_cloned
== 0) {
14407 kprintf("%s: no mapping found for pa_page=%llx but will be added when a page entered\n", __func__
, pa_page
);
14413 if (cur_page
+ ARM_PGBYTES
< cur_page
) {
14416 cur_page
+= ARM_PGBYTES
;
14425 // delete page infos for given address range
14427 pmap_pgtrace_delete_page(pmap_t pmap
, vm_map_offset_t start
, vm_map_offset_t end
)
14431 queue_head_t
*q
= &(pmap_pgtrace
.pages
);
14432 pmap_pgtrace_page_t
*p
;
14433 vm_map_offset_t cur_page
, end_page
;
14435 kprintf("%s start=%llx end=%llx\n", __func__
, start
, end
);
14440 pmap_paddr_t pa_page
;
14442 // remove page info from start to end
14443 cur_page
= arm_trunc_page(start
);
14444 end_page
= arm_trunc_page(end
);
14445 while (cur_page
<= end_page
) {
14448 if (pmap
== NULL
) {
14449 pa_page
= cur_page
;
14452 ptep
= pmap_pte(pmap
, cur_page
);
14453 if (ptep
== NULL
) {
14457 pa_page
= pte_to_pa(*ptep
);
14461 // remove all clones and validate
14462 pmap_pgtrace_remove_all_clone(pa_page
);
14464 // find page info and delete
14465 PMAP_PGTRACE_LOCK(&ints
);
14466 p
= pmap_pgtrace_find_page(pa_page
);
14468 queue_remove(q
, p
, pmap_pgtrace_page_t
*, chain
);
14471 PMAP_PGTRACE_UNLOCK(&ints
);
14473 // free outside of locks
14475 pmap_pgtrace_free_page(p
);
14480 if (cur_page
+ ARM_PGBYTES
< cur_page
) {
14483 cur_page
+= ARM_PGBYTES
;
14493 pmap_pgtrace_fault(pmap_t pmap
, vm_map_offset_t va
, arm_saved_state_t
*ss
)
14496 pgtrace_run_result_t res
;
14497 pmap_pgtrace_page_t
*p
;
14498 bool ints
, found
= false;
14501 // Quick check if we are interested
14502 ptep
= pmap_pte(pmap
, va
);
14503 if (!ptep
|| !(*ptep
& ARM_PTE_PGTRACE
)) {
14504 return KERN_FAILURE
;
14507 PMAP_PGTRACE_LOCK(&ints
);
14509 // Check again since access is serialized
14510 ptep
= pmap_pte(pmap
, va
);
14511 if (!ptep
|| !(*ptep
& ARM_PTE_PGTRACE
)) {
14512 PMAP_PGTRACE_UNLOCK(&ints
);
14513 return KERN_FAILURE
;
14514 } else if ((*ptep
& ARM_PTE_TYPE_VALID
) == ARM_PTE_TYPE_VALID
) {
14515 // Somehow this cpu's tlb has not updated
14516 kprintf("%s Somehow this cpu's tlb has not updated?\n", __func__
);
14517 PMAP_UPDATE_TLBS(pmap
, va
, va
+ ARM_PGBYTES
, false);
14519 PMAP_PGTRACE_UNLOCK(&ints
);
14520 return KERN_SUCCESS
;
14523 // Find if this pa is what we are tracing
14524 pa
= pte_to_pa(*ptep
);
14526 p
= pmap_pgtrace_find_page(arm_trunc_page(pa
));
14528 panic("%s Can't find va=%llx pa=%llx from tracing pages\n", __func__
, va
, pa
);
14531 // find if pmap and va are also matching
14532 queue_head_t
*mapq
= &(p
->maps
);
14533 queue_head_t
*mapwaste
= &(p
->map_waste
);
14534 pmap_pgtrace_map_t
*map
;
14536 queue_iterate(mapq
, map
, pmap_pgtrace_map_t
*, chain
) {
14537 if (map
->pmap
== pmap
&& map
->ova
== arm_trunc_page(va
)) {
14543 // if not found, search map waste as they are still valid
14545 queue_iterate(mapwaste
, map
, pmap_pgtrace_map_t
*, chain
) {
14546 if (map
->pmap
== pmap
&& map
->ova
== arm_trunc_page(va
)) {
14554 panic("%s Can't find va=%llx pa=%llx from tracing pages\n", __func__
, va
, pa
);
14557 // Decode and run it on the clone map
14558 bzero(&res
, sizeof(res
));
14559 pgtrace_decode_and_run(*(uint32_t *)get_saved_state_pc(ss
), // instruction
14560 va
, map
->cva
, // fault va and clone page vas
14563 // write a log if in range
14564 vm_map_offset_t offset
= va
- map
->ova
;
14565 if (map
->range
.start
<= offset
&& offset
<= map
->range
.end
) {
14566 pgtrace_write_log(res
);
14569 PMAP_PGTRACE_UNLOCK(&ints
);
14571 // Return to next instruction
14572 add_saved_state_pc(ss
, sizeof(uint32_t));
14574 return KERN_SUCCESS
;
14579 * The minimum shared region nesting size is used by the VM to determine when to
14580 * break up large mappings to nested regions. The smallest size that these
14581 * mappings can be broken into is determined by what page table level those
14582 * regions are being nested in at and the size of the page tables.
14584 * For instance, if a nested region is nesting at L2 for a process utilizing
14585 * 16KB page tables, then the minimum nesting size would be 32MB (size of an L2
14588 * @param pmap The target pmap to determine the block size based on whether it's
14589 * using 16KB or 4KB page tables.
14592 pmap_shared_region_size_min(__unused pmap_t pmap
)
14594 #if (__ARM_VMSA__ > 7)
14595 const pt_attr_t
* const pt_attr
= pmap_get_pt_attr(pmap
);
14598 * We always nest the shared region at L2 (32MB for 16KB pages, 2MB for
14599 * 4KB pages). This means that a target pmap will contain L2 entries that
14600 * point to shared L3 page tables in the shared region pmap.
14602 return pt_attr_twig_size(pt_attr
);
14605 return ARM_NESTING_SIZE_MIN
;
14610 * The concept of a nesting size maximum was made to accomodate restrictions in
14611 * place for nesting regions on PowerPC. There are no restrictions to max nesting
14612 * sizes on x86/armv7/armv8 and this should get removed.
14614 * TODO: <rdar://problem/65247502> Completely remove pmap_nesting_size_max()
14617 pmap_nesting_size_max(__unused pmap_t pmap
)
14619 return ARM_NESTING_SIZE_MAX
;
14623 pmap_enforces_execute_only(
14624 #if (__ARM_VMSA__ == 7)
14629 #if (__ARM_VMSA__ > 7)
14630 return pmap
!= kernel_pmap
;
14636 MARK_AS_PMAP_TEXT
void
14637 pmap_set_vm_map_cs_enforced_internal(
14641 VALIDATE_PMAP(pmap
);
14642 pmap
->pmap_vm_map_cs_enforced
= new_value
;
14646 pmap_set_vm_map_cs_enforced(
14651 pmap_set_vm_map_cs_enforced_ppl(pmap
, new_value
);
14653 pmap_set_vm_map_cs_enforced_internal(pmap
, new_value
);
14657 extern int cs_process_enforcement_enable
;
14659 pmap_get_vm_map_cs_enforced(
14662 if (cs_process_enforcement_enable
) {
14665 return pmap
->pmap_vm_map_cs_enforced
;
14668 MARK_AS_PMAP_TEXT
void
14669 pmap_set_jit_entitled_internal(
14670 __unused pmap_t pmap
)
14676 pmap_set_jit_entitled(
14680 pmap_set_jit_entitled_ppl(pmap
);
14682 pmap_set_jit_entitled_internal(pmap
);
14687 pmap_get_jit_entitled(
14688 __unused pmap_t pmap
)
14693 MARK_AS_PMAP_TEXT
static kern_return_t
14694 pmap_query_page_info_internal(
14696 vm_map_offset_t va
,
14703 pv_entry_t
**pv_h
, *pve_p
;
14705 if (pmap
== PMAP_NULL
|| pmap
== kernel_pmap
) {
14706 pmap_pin_kernel_pages((vm_offset_t
)disp_p
, sizeof(*disp_p
));
14708 pmap_unpin_kernel_pages((vm_offset_t
)disp_p
, sizeof(*disp_p
));
14709 return KERN_INVALID_ARGUMENT
;
14714 VALIDATE_PMAP(pmap
);
14715 pmap_lock_ro(pmap
);
14717 pte
= pmap_pte(pmap
, va
);
14718 if (pte
== PT_ENTRY_NULL
) {
14722 pa
= pte_to_pa(*pte
);
14724 if (ARM_PTE_IS_COMPRESSED(*pte
, pte
)) {
14725 disp
|= PMAP_QUERY_PAGE_COMPRESSED
;
14726 if (*pte
& ARM_PTE_COMPRESSED_ALT
) {
14727 disp
|= PMAP_QUERY_PAGE_COMPRESSED_ALTACCT
;
14731 disp
|= PMAP_QUERY_PAGE_PRESENT
;
14732 pai
= (int) pa_index(pa
);
14733 if (!pa_valid(pa
)) {
14737 pv_h
= pai_to_pvh(pai
);
14738 pve_p
= PV_ENTRY_NULL
;
14739 if (pvh_test_type(pv_h
, PVH_TYPE_PVEP
)) {
14740 pve_p
= pvh_list(pv_h
);
14741 while (pve_p
!= PV_ENTRY_NULL
&&
14742 pve_get_ptep(pve_p
) != pte
) {
14743 pve_p
= PVE_NEXT_PTR(pve_next(pve_p
));
14746 if (IS_ALTACCT_PAGE(pai
, pve_p
)) {
14747 disp
|= PMAP_QUERY_PAGE_ALTACCT
;
14748 } else if (IS_REUSABLE_PAGE(pai
)) {
14749 disp
|= PMAP_QUERY_PAGE_REUSABLE
;
14750 } else if (IS_INTERNAL_PAGE(pai
)) {
14751 disp
|= PMAP_QUERY_PAGE_INTERNAL
;
14757 pmap_unlock_ro(pmap
);
14758 pmap_pin_kernel_pages((vm_offset_t
)disp_p
, sizeof(*disp_p
));
14760 pmap_unpin_kernel_pages((vm_offset_t
)disp_p
, sizeof(*disp_p
));
14761 return KERN_SUCCESS
;
14765 pmap_query_page_info(
14767 vm_map_offset_t va
,
14771 return pmap_query_page_info_ppl(pmap
, va
, disp_p
);
14773 return pmap_query_page_info_internal(pmap
, va
, disp_p
);
14777 MARK_AS_PMAP_TEXT kern_return_t
14778 pmap_return_internal(__unused boolean_t do_panic
, __unused boolean_t do_recurse
)
14781 return KERN_SUCCESS
;
14785 pmap_return(boolean_t do_panic
, boolean_t do_recurse
)
14788 return pmap_return_ppl(do_panic
, do_recurse
);
14790 return pmap_return_internal(do_panic
, do_recurse
);
14797 MARK_AS_PMAP_TEXT
static void
14798 pmap_footprint_suspend_internal(
14802 #if DEVELOPMENT || DEBUG
14804 current_thread()->pmap_footprint_suspended
= TRUE
;
14805 map
->pmap
->footprint_was_suspended
= TRUE
;
14807 current_thread()->pmap_footprint_suspended
= FALSE
;
14809 #else /* DEVELOPMENT || DEBUG */
14812 #endif /* DEVELOPMENT || DEBUG */
14816 pmap_footprint_suspend(
14821 pmap_footprint_suspend_ppl(map
, suspend
);
14823 pmap_footprint_suspend_internal(map
, suspend
);
14827 #if defined(__arm64__) && (DEVELOPMENT || DEBUG)
14829 struct page_table_dump_header
{
14831 uint64_t num_entries
;
14836 static kern_return_t
14837 pmap_dump_page_tables_recurse(pmap_t pmap
,
14838 const tt_entry_t
*ttp
,
14839 unsigned int cur_level
,
14840 unsigned int level_mask
,
14844 size_t *bytes_copied
)
14846 const pt_attr_t
* const pt_attr
= pmap_get_pt_attr(pmap
);
14847 uint64_t num_entries
= pt_attr_page_size(pt_attr
) / sizeof(*ttp
);
14849 uint64_t size
= pt_attr
->pta_level_info
[cur_level
].size
;
14850 uint64_t valid_mask
= pt_attr
->pta_level_info
[cur_level
].valid_mask
;
14851 uint64_t type_mask
= pt_attr
->pta_level_info
[cur_level
].type_mask
;
14852 uint64_t type_block
= pt_attr
->pta_level_info
[cur_level
].type_block
;
14854 void *bufp
= (uint8_t*)buf_start
+ *bytes_copied
;
14856 if (cur_level
== pt_attr_root_level(pt_attr
)) {
14857 num_entries
= pmap_root_alloc_size(pmap
) / sizeof(tt_entry_t
);
14860 uint64_t tt_size
= num_entries
* sizeof(tt_entry_t
);
14861 const tt_entry_t
*tt_end
= &ttp
[num_entries
];
14863 if (((vm_offset_t
)buf_end
- (vm_offset_t
)bufp
) < (tt_size
+ sizeof(struct page_table_dump_header
))) {
14864 return KERN_INSUFFICIENT_BUFFER_SIZE
;
14867 if (level_mask
& (1U << cur_level
)) {
14868 struct page_table_dump_header
*header
= (struct page_table_dump_header
*)bufp
;
14869 header
->pa
= ml_static_vtop((vm_offset_t
)ttp
);
14870 header
->num_entries
= num_entries
;
14871 header
->start_va
= start_va
;
14872 header
->end_va
= start_va
+ (num_entries
* size
);
14874 bcopy(ttp
, (uint8_t*)bufp
+ sizeof(*header
), tt_size
);
14875 *bytes_copied
= *bytes_copied
+ sizeof(*header
) + tt_size
;
14877 uint64_t current_va
= start_va
;
14879 for (const tt_entry_t
*ttep
= ttp
; ttep
< tt_end
; ttep
++, current_va
+= size
) {
14880 tt_entry_t tte
= *ttep
;
14882 if (!(tte
& valid_mask
)) {
14886 if ((tte
& type_mask
) == type_block
) {
14889 if (cur_level
>= pt_attr_leaf_level(pt_attr
)) {
14890 panic("%s: corrupt entry %#llx at %p, "
14891 "ttp=%p, cur_level=%u, bufp=%p, buf_end=%p",
14892 __FUNCTION__
, tte
, ttep
,
14893 ttp
, cur_level
, bufp
, buf_end
);
14896 const tt_entry_t
*next_tt
= (const tt_entry_t
*)phystokv(tte
& ARM_TTE_TABLE_MASK
);
14898 kern_return_t recurse_result
= pmap_dump_page_tables_recurse(pmap
, next_tt
, cur_level
+ 1,
14899 level_mask
, current_va
, buf_start
, buf_end
, bytes_copied
);
14901 if (recurse_result
!= KERN_SUCCESS
) {
14902 return recurse_result
;
14907 return KERN_SUCCESS
;
14911 pmap_dump_page_tables(pmap_t pmap
, void *bufp
, void *buf_end
, unsigned int level_mask
, size_t *bytes_copied
)
14914 panic("pmap_dump_page_tables must only be called from kernel debugger context");
14916 return pmap_dump_page_tables_recurse(pmap
, pmap
->tte
, pt_attr_root_level(pmap_get_pt_attr(pmap
)),
14917 level_mask
, pmap
->min
, bufp
, buf_end
, bytes_copied
);
14920 #else /* defined(__arm64__) && (DEVELOPMENT || DEBUG) */
14923 pmap_dump_page_tables(pmap_t pmap __unused
, void *bufp __unused
, void *buf_end __unused
,
14924 unsigned int level_mask __unused
, size_t *bytes_copied __unused
)
14926 return KERN_NOT_SUPPORTED
;
14928 #endif /* !defined(__arm64__) */
14931 #ifdef CONFIG_XNUPOST
14933 static volatile bool pmap_test_took_fault
= false;
14936 pmap_test_fault_handler(arm_saved_state_t
* state
)
14938 bool retval
= false;
14939 uint32_t esr
= get_saved_state_esr(state
);
14940 esr_exception_class_t
class = ESR_EC(esr
);
14941 fault_status_t fsc
= ISS_IA_FSC(ESR_ISS(esr
));
14943 if ((class == ESR_EC_DABORT_EL1
) &&
14944 ((fsc
== FSC_PERMISSION_FAULT_L3
) || (fsc
== FSC_ACCESS_FLAG_FAULT_L3
))) {
14945 pmap_test_took_fault
= true;
14946 /* return to the instruction immediately after the call to NX page */
14947 set_saved_state_pc(state
, get_saved_state_pc(state
) + 4);
14955 pmap_test_access(pmap_t pmap
, vm_map_address_t va
, bool should_fault
, bool is_write
)
14958 * We're switching pmaps without using the normal thread mechanism;
14959 * disable interrupts and preemption to avoid any unexpected memory
14962 boolean_t old_int_state
= ml_set_interrupts_enabled(false);
14963 pmap_t old_pmap
= current_pmap();
14964 mp_disable_preemption();
14967 pmap_test_took_fault
= false;
14969 /* Disable PAN; pmap shouldn't be the kernel pmap. */
14970 #if __ARM_PAN_AVAILABLE__
14971 __builtin_arm_wsr("pan", 0);
14972 #endif /* __ARM_PAN_AVAILABLE__ */
14973 ml_expect_fault_begin(pmap_test_fault_handler
, va
);
14976 *((volatile uint64_t*)(va
)) = 0xdec0de;
14978 volatile uint64_t tmp
= *((volatile uint64_t*)(va
));
14982 /* Save the fault bool, and undo the gross stuff we did. */
14983 bool took_fault
= pmap_test_took_fault
;
14984 ml_expect_fault_end();
14985 #if __ARM_PAN_AVAILABLE__
14986 __builtin_arm_wsr("pan", 1);
14987 #endif /* __ARM_PAN_AVAILABLE__ */
14989 pmap_switch(old_pmap
);
14990 mp_enable_preemption();
14991 ml_set_interrupts_enabled(old_int_state
);
14992 bool retval
= (took_fault
== should_fault
);
14997 pmap_test_read(pmap_t pmap
, vm_map_address_t va
, bool should_fault
)
14999 bool retval
= pmap_test_access(pmap
, va
, should_fault
, false);
15003 "pmap=%p, va=%p, should_fault=%u",
15004 __func__
, should_fault
? "did not fault" : "faulted",
15005 pmap
, (void*)va
, (unsigned)should_fault
);
15012 pmap_test_write(pmap_t pmap
, vm_map_address_t va
, bool should_fault
)
15014 bool retval
= pmap_test_access(pmap
, va
, should_fault
, true);
15018 "pmap=%p, va=%p, should_fault=%u",
15019 __func__
, should_fault
? "did not fault" : "faulted",
15020 pmap
, (void*)va
, (unsigned)should_fault
);
15027 pmap_test_check_refmod(pmap_paddr_t pa
, unsigned int should_be_set
)
15029 unsigned int should_be_clear
= (~should_be_set
) & (VM_MEM_REFERENCED
| VM_MEM_MODIFIED
);
15030 unsigned int bits
= pmap_get_refmod((ppnum_t
)atop(pa
));
15032 bool retval
= (((bits
& should_be_set
) == should_be_set
) && ((bits
& should_be_clear
) == 0));
15035 T_FAIL("%s: bits=%u, "
15036 "pa=%p, should_be_set=%u",
15038 (void*)pa
, should_be_set
);
15044 static __attribute__((noinline
)) bool
15045 pmap_test_read_write(pmap_t pmap
, vm_map_address_t va
, bool allow_read
, bool allow_write
)
15047 bool retval
= (pmap_test_read(pmap
, va
, !allow_read
) | pmap_test_write(pmap
, va
, !allow_write
));
15052 pmap_test_test_config(unsigned int flags
)
15054 T_LOG("running pmap_test_test_config flags=0x%X", flags
);
15055 unsigned int map_count
= 0;
15056 unsigned long page_ratio
= 0;
15057 pmap_t pmap
= pmap_create_options(NULL
, 0, flags
);
15060 panic("Failed to allocate pmap");
15063 __unused
const pt_attr_t
* const pt_attr
= pmap_get_pt_attr(pmap
);
15064 uintptr_t native_page_size
= pt_attr_page_size(native_pt_attr
);
15065 uintptr_t pmap_page_size
= pt_attr_page_size(pt_attr
);
15066 uintptr_t pmap_twig_size
= pt_attr_twig_size(pt_attr
);
15068 if (pmap_page_size
<= native_page_size
) {
15069 page_ratio
= native_page_size
/ pmap_page_size
;
15072 * We claim to support a page_ratio of less than 1, which is
15073 * not currently supported by the pmap layer; panic.
15075 panic("%s: page_ratio < 1, native_page_size=%lu, pmap_page_size=%lu"
15077 __func__
, native_page_size
, pmap_page_size
,
15081 if (PAGE_RATIO
> 1) {
15083 * The kernel is deliberately pretending to have 16KB pages.
15084 * The pmap layer has code that supports this, so pretend the
15085 * page size is larger than it is.
15087 pmap_page_size
= PAGE_SIZE
;
15088 native_page_size
= PAGE_SIZE
;
15092 * Get two pages from the VM; one to be mapped wired, and one to be
15095 vm_page_t unwired_vm_page
= vm_page_grab();
15096 vm_page_t wired_vm_page
= vm_page_grab();
15098 if ((unwired_vm_page
== VM_PAGE_NULL
) || (wired_vm_page
== VM_PAGE_NULL
)) {
15099 panic("Failed to grab VM pages");
15102 ppnum_t pn
= VM_PAGE_GET_PHYS_PAGE(unwired_vm_page
);
15103 ppnum_t wired_pn
= VM_PAGE_GET_PHYS_PAGE(wired_vm_page
);
15105 pmap_paddr_t pa
= ptoa(pn
);
15106 pmap_paddr_t wired_pa
= ptoa(wired_pn
);
15109 * We'll start mappings at the second twig TT. This keeps us from only
15110 * using the first entry in each TT, which would trivially be address
15111 * 0; one of the things we will need to test is retrieving the VA for
15114 vm_map_address_t va_base
= pmap_twig_size
;
15115 vm_map_address_t wired_va_base
= ((2 * pmap_twig_size
) - pmap_page_size
);
15117 if (wired_va_base
< (va_base
+ (page_ratio
* pmap_page_size
))) {
15119 * Not exactly a functional failure, but this test relies on
15120 * there being a spare PTE slot we can use to pin the TT.
15122 panic("Cannot pin translation table");
15126 * Create the wired mapping; this will prevent the pmap layer from
15127 * reclaiming our test TTs, which would interfere with this test
15128 * ("interfere" -> "make it panic").
15130 pmap_enter_addr(pmap
, wired_va_base
, wired_pa
, VM_PROT_READ
, VM_PROT_READ
, 0, true);
15133 * Create read-only mappings of the nonwired page; if the pmap does
15134 * not use the same page size as the kernel, create multiple mappings
15135 * so that the kernel page is fully mapped.
15137 for (map_count
= 0; map_count
< page_ratio
; map_count
++) {
15138 pmap_enter_addr(pmap
, va_base
+ (pmap_page_size
* map_count
), pa
+ (pmap_page_size
* (map_count
)), VM_PROT_READ
, VM_PROT_READ
, 0, false);
15141 /* Validate that all the PTEs have the expected PA and VA. */
15142 for (map_count
= 0; map_count
< page_ratio
; map_count
++) {
15143 pt_entry_t
* ptep
= pmap_pte(pmap
, va_base
+ (pmap_page_size
* map_count
));
15145 if (pte_to_pa(*ptep
) != (pa
+ (pmap_page_size
* map_count
))) {
15146 T_FAIL("Unexpected pa=%p, expected %p, map_count=%u",
15147 (void*)pte_to_pa(*ptep
), (void*)(pa
+ (pmap_page_size
* map_count
)), map_count
);
15150 if (ptep_get_va(ptep
) != (va_base
+ (pmap_page_size
* map_count
))) {
15151 T_FAIL("Unexpected va=%p, expected %p, map_count=%u",
15152 (void*)ptep_get_va(ptep
), (void*)(va_base
+ (pmap_page_size
* map_count
)), map_count
);
15156 T_LOG("Validate that reads to our mapping do not fault.");
15157 pmap_test_read(pmap
, va_base
, false);
15159 T_LOG("Validate that writes to our mapping fault.");
15160 pmap_test_write(pmap
, va_base
, true);
15162 T_LOG("Make the first mapping writable.");
15163 pmap_enter_addr(pmap
, va_base
, pa
, VM_PROT_READ
| VM_PROT_WRITE
, VM_PROT_READ
| VM_PROT_WRITE
, 0, false);
15165 T_LOG("Validate that writes to our mapping do not fault.");
15166 pmap_test_write(pmap
, va_base
, false);
15169 bool pmap_cs_enforced
= pmap
->pmap_cs_enforced
;
15171 T_LOG("Disable PMAP CS enforcement");
15172 pmap_cs_configure_enforcement(pmap
, false);
15175 T_LOG("Make the first mapping XO.");
15176 pmap_enter_addr(pmap
, va_base
, pa
, VM_PROT_EXECUTE
, VM_PROT_EXECUTE
, 0, false);
15178 #if __APRR_SUPPORTED__
15179 T_LOG("Validate that reads to our mapping fault.");
15180 pmap_test_read(pmap
, va_base
, true);
15182 T_LOG("Validate that reads to our mapping do not fault.");
15183 pmap_test_read(pmap
, va_base
, false);
15186 T_LOG("Validate that writes to our mapping fault.");
15187 pmap_test_write(pmap
, va_base
, true);
15190 T_LOG("Set PMAP CS enforcement configuration to previous value.");
15191 pmap_cs_configure_enforcement(pmap
, pmap_cs_enforced
);
15195 * For page ratios of greater than 1: validate that writes to the other
15196 * mappings still fault. Remove the mappings afterwards (we're done
15197 * with page ratio testing).
15199 for (map_count
= 1; map_count
< page_ratio
; map_count
++) {
15200 pmap_test_write(pmap
, va_base
+ (pmap_page_size
* map_count
), true);
15201 pmap_remove(pmap
, va_base
+ (pmap_page_size
* map_count
), va_base
+ (pmap_page_size
* map_count
) + pmap_page_size
);
15204 T_LOG("Mark the page unreferenced and unmodified.");
15205 pmap_clear_refmod(pn
, VM_MEM_MODIFIED
| VM_MEM_REFERENCED
);
15206 pmap_test_check_refmod(pa
, 0);
15209 * Begin testing the ref/mod state machine. Re-enter the mapping with
15210 * different protection/fault_type settings, and confirm that the
15211 * ref/mod state matches our expectations at each step.
15213 T_LOG("!ref/!mod: read, no fault. Expect ref/!mod");
15214 pmap_enter_addr(pmap
, va_base
, pa
, VM_PROT_READ
, VM_PROT_NONE
, 0, false);
15215 pmap_test_check_refmod(pa
, VM_MEM_REFERENCED
);
15217 T_LOG("!ref/!mod: read, read fault. Expect ref/!mod");
15218 pmap_clear_refmod(pn
, VM_MEM_MODIFIED
| VM_MEM_REFERENCED
);
15219 pmap_enter_addr(pmap
, va_base
, pa
, VM_PROT_READ
, VM_PROT_READ
, 0, false);
15220 pmap_test_check_refmod(pa
, VM_MEM_REFERENCED
);
15222 T_LOG("!ref/!mod: rw, read fault. Expect ref/!mod");
15223 pmap_clear_refmod(pn
, VM_MEM_MODIFIED
| VM_MEM_REFERENCED
);
15224 pmap_enter_addr(pmap
, va_base
, pa
, VM_PROT_READ
| VM_PROT_WRITE
, VM_PROT_NONE
, 0, false);
15225 pmap_test_check_refmod(pa
, VM_MEM_REFERENCED
);
15227 T_LOG("ref/!mod: rw, read fault. Expect ref/!mod");
15228 pmap_enter_addr(pmap
, va_base
, pa
, VM_PROT_READ
| VM_PROT_WRITE
, VM_PROT_READ
, 0, false);
15229 pmap_test_check_refmod(pa
, VM_MEM_REFERENCED
);
15231 T_LOG("!ref/!mod: rw, rw fault. Expect ref/mod");
15232 pmap_clear_refmod(pn
, VM_MEM_MODIFIED
| VM_MEM_REFERENCED
);
15233 pmap_enter_addr(pmap
, va_base
, pa
, VM_PROT_READ
| VM_PROT_WRITE
, VM_PROT_READ
| VM_PROT_WRITE
, 0, false);
15234 pmap_test_check_refmod(pa
, VM_MEM_REFERENCED
| VM_MEM_MODIFIED
);
15237 * Shared memory testing; we'll have two mappings; one read-only,
15240 vm_map_address_t rw_base
= va_base
;
15241 vm_map_address_t ro_base
= va_base
+ pmap_page_size
;
15243 pmap_enter_addr(pmap
, rw_base
, pa
, VM_PROT_READ
| VM_PROT_WRITE
, VM_PROT_READ
| VM_PROT_WRITE
, 0, false);
15244 pmap_enter_addr(pmap
, ro_base
, pa
, VM_PROT_READ
, VM_PROT_READ
, 0, false);
15247 * Test that we take faults as expected for unreferenced/unmodified
15248 * pages. Also test the arm_fast_fault interface, to ensure that
15249 * mapping permissions change as expected.
15251 T_LOG("!ref/!mod: expect no access");
15252 pmap_clear_refmod(pn
, VM_MEM_MODIFIED
| VM_MEM_REFERENCED
);
15253 pmap_test_read_write(pmap
, ro_base
, false, false);
15254 pmap_test_read_write(pmap
, rw_base
, false, false);
15256 T_LOG("Read fault; expect !ref/!mod -> ref/!mod, read access");
15257 arm_fast_fault(pmap
, rw_base
, VM_PROT_READ
, false, false);
15258 pmap_test_check_refmod(pa
, VM_MEM_REFERENCED
);
15259 pmap_test_read_write(pmap
, ro_base
, true, false);
15260 pmap_test_read_write(pmap
, rw_base
, true, false);
15262 T_LOG("Write fault; expect ref/!mod -> ref/mod, read and write access");
15263 arm_fast_fault(pmap
, rw_base
, VM_PROT_READ
| VM_PROT_WRITE
, false, false);
15264 pmap_test_check_refmod(pa
, VM_MEM_REFERENCED
| VM_MEM_MODIFIED
);
15265 pmap_test_read_write(pmap
, ro_base
, true, false);
15266 pmap_test_read_write(pmap
, rw_base
, true, true);
15268 T_LOG("Write fault; expect !ref/!mod -> ref/mod, read and write access");
15269 pmap_clear_refmod(pn
, VM_MEM_MODIFIED
| VM_MEM_REFERENCED
);
15270 arm_fast_fault(pmap
, rw_base
, VM_PROT_READ
| VM_PROT_WRITE
, false, false);
15271 pmap_test_check_refmod(pa
, VM_MEM_REFERENCED
| VM_MEM_MODIFIED
);
15272 pmap_test_read_write(pmap
, ro_base
, true, false);
15273 pmap_test_read_write(pmap
, rw_base
, true, true);
15275 T_LOG("RW protect both mappings; should not change protections.");
15276 pmap_protect(pmap
, ro_base
, ro_base
+ pmap_page_size
, VM_PROT_READ
| VM_PROT_WRITE
);
15277 pmap_protect(pmap
, rw_base
, rw_base
+ pmap_page_size
, VM_PROT_READ
| VM_PROT_WRITE
);
15278 pmap_test_read_write(pmap
, ro_base
, true, false);
15279 pmap_test_read_write(pmap
, rw_base
, true, true);
15281 T_LOG("Read protect both mappings; RW mapping should become RO.");
15282 pmap_protect(pmap
, ro_base
, ro_base
+ pmap_page_size
, VM_PROT_READ
);
15283 pmap_protect(pmap
, rw_base
, rw_base
+ pmap_page_size
, VM_PROT_READ
);
15284 pmap_test_read_write(pmap
, ro_base
, true, false);
15285 pmap_test_read_write(pmap
, rw_base
, true, false);
15287 T_LOG("RW protect the page; mappings should not change protections.");
15288 pmap_enter_addr(pmap
, rw_base
, pa
, VM_PROT_READ
| VM_PROT_WRITE
, VM_PROT_READ
| VM_PROT_WRITE
, 0, false);
15289 pmap_page_protect(pn
, VM_PROT_ALL
);
15290 pmap_test_read_write(pmap
, ro_base
, true, false);
15291 pmap_test_read_write(pmap
, rw_base
, true, true);
15293 T_LOG("Read protect the page; RW mapping should become RO.");
15294 pmap_page_protect(pn
, VM_PROT_READ
);
15295 pmap_test_read_write(pmap
, ro_base
, true, false);
15296 pmap_test_read_write(pmap
, rw_base
, true, false);
15298 T_LOG("Validate that disconnect removes all known mappings of the page.");
15299 pmap_disconnect(pn
);
15300 if (!pmap_verify_free(pn
)) {
15301 T_FAIL("Page still has mappings");
15304 T_LOG("Remove the wired mapping, so we can tear down the test map.");
15305 pmap_remove(pmap
, wired_va_base
, wired_va_base
+ pmap_page_size
);
15306 pmap_destroy(pmap
);
15308 T_LOG("Release the pages back to the VM.");
15309 vm_page_lock_queues();
15310 vm_page_free(unwired_vm_page
);
15311 vm_page_free(wired_vm_page
);
15312 vm_page_unlock_queues();
15314 T_LOG("Testing successful!");
15317 #endif /* __arm64__ */
15322 T_LOG("Starting pmap_tests");
15325 flags
|= PMAP_CREATE_64BIT
;
15327 #if __ARM_MIXED_PAGE_SIZE__
15328 T_LOG("Testing VM_PAGE_SIZE_4KB");
15329 pmap_test_test_config(flags
| PMAP_CREATE_FORCE_4K_PAGES
);
15330 T_LOG("Testing VM_PAGE_SIZE_16KB");
15331 pmap_test_test_config(flags
);
15332 #else /* __ARM_MIXED_PAGE_SIZE__ */
15333 pmap_test_test_config(flags
);
15334 #endif /* __ARM_MIXED_PAGE_SIZE__ */
15336 #endif /* __arm64__ */
15337 T_PASS("completed pmap_test successfully");
15338 return KERN_SUCCESS
;
15340 #endif /* CONFIG_XNUPOST */