2 * Copyright (c) 2011-2020 Apple Inc. All rights reserved.
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
29 #include <mach_assert.h>
30 #include <mach_ldebug.h>
32 #include <mach/shared_region.h>
33 #include <mach/vm_param.h>
34 #include <mach/vm_prot.h>
35 #include <mach/vm_map.h>
36 #include <mach/machine/vm_param.h>
37 #include <mach/machine/vm_types.h>
39 #include <mach/boolean.h>
40 #include <kern/bits.h>
41 #include <kern/thread.h>
42 #include <kern/sched.h>
43 #include <kern/zalloc.h>
44 #include <kern/kalloc.h>
45 #include <kern/ledger.h>
47 #include <kern/trustcache.h>
49 #include <os/overflow.h>
52 #include <vm/vm_map.h>
53 #include <vm/vm_kern.h>
54 #include <vm/vm_protos.h>
55 #include <vm/vm_object.h>
56 #include <vm/vm_page.h>
57 #include <vm/vm_pageout.h>
60 #include <libkern/img4/interface.h>
61 #include <libkern/section_keywords.h>
62 #include <sys/errno.h>
64 #include <machine/atomic.h>
65 #include <machine/thread.h>
66 #include <machine/lowglobals.h>
68 #include <arm/caches_internal.h>
69 #include <arm/cpu_data.h>
70 #include <arm/cpu_data_internal.h>
71 #include <arm/cpu_capabilities.h>
72 #include <arm/cpu_number.h>
73 #include <arm/machine_cpu.h>
74 #include <arm/misc_protos.h>
77 #if (__ARM_VMSA__ > 7)
78 #include <arm64/proc_reg.h>
79 #include <pexpert/arm64/boot.h>
82 #include <arm64/pgtrace.h>
83 #if CONFIG_PGTRACE_NONKEXT
84 #include <arm64/pgtrace_decoder.h>
85 #endif // CONFIG_PGTRACE_NONKEXT
86 #endif // CONFIG_PGTRACE
87 #if defined(KERNEL_INTEGRITY_KTRR) || defined(KERNEL_INTEGRITY_CTRR)
88 #include <arm64/amcc_rorgn.h>
89 #endif // defined(KERNEL_INTEGRITY_KTRR) || defined(KERNEL_INTEGRITY_CTRR)
92 #include <pexpert/device_tree.h>
94 #include <san/kasan.h>
95 #include <sys/cdefs.h>
97 #if defined(HAS_APPLE_PAC)
101 #ifdef CONFIG_XNUPOST
102 #include <tests/xnupost.h>
107 #include <IOKit/IOHibernatePrivate.h>
108 #endif /* HIBERNATION */
110 #define PMAP_TT_L0_LEVEL 0x0
111 #define PMAP_TT_L1_LEVEL 0x1
112 #define PMAP_TT_L2_LEVEL 0x2
113 #define PMAP_TT_L3_LEVEL 0x3
115 #ifdef __ARM64_PMAP_SUBPAGE_L1__
116 #if (__ARM_VMSA__ <= 7)
117 #error This is not supported for old-style page tables
119 #define PMAP_ROOT_ALLOC_SIZE (((ARM_TT_L1_INDEX_MASK >> ARM_TT_L1_SHIFT) + 1) * sizeof(tt_entry_t))
121 #if (__ARM_VMSA__ <= 7)
122 #define PMAP_ROOT_ALLOC_SIZE (ARM_PGBYTES * 2)
124 #define PMAP_ROOT_ALLOC_SIZE (ARM_PGBYTES)
128 extern u_int32_t
random(void); /* from <libkern/libkern.h> */
130 static bool alloc_asid(pmap_t pmap
);
131 static void free_asid(pmap_t pmap
);
132 static void flush_mmu_tlb_region_asid_async(vm_offset_t va
, size_t length
, pmap_t pmap
);
133 static void flush_mmu_tlb_full_asid_async(pmap_t pmap
);
134 static pt_entry_t
wimg_to_pte(unsigned int wimg
);
136 struct page_table_ops
{
137 bool (*alloc_id
)(pmap_t pmap
);
138 void (*free_id
)(pmap_t pmap
);
139 void (*flush_tlb_region_async
)(vm_offset_t va
, size_t length
, pmap_t pmap
);
140 void (*flush_tlb_async
)(pmap_t pmap
);
141 pt_entry_t (*wimg_to_pte
)(unsigned int wimg
);
144 static const struct page_table_ops native_pt_ops
=
146 .alloc_id
= alloc_asid
,
147 .free_id
= free_asid
,
148 .flush_tlb_region_async
= flush_mmu_tlb_region_asid_async
,
149 .flush_tlb_async
= flush_mmu_tlb_full_asid_async
,
150 .wimg_to_pte
= wimg_to_pte
,
153 #if (__ARM_VMSA__ > 7)
154 const struct page_table_level_info pmap_table_level_info_16k
[] =
157 .size
= ARM_16K_TT_L0_SIZE
,
158 .offmask
= ARM_16K_TT_L0_OFFMASK
,
159 .shift
= ARM_16K_TT_L0_SHIFT
,
160 .index_mask
= ARM_16K_TT_L0_INDEX_MASK
,
161 .valid_mask
= ARM_TTE_VALID
,
162 .type_mask
= ARM_TTE_TYPE_MASK
,
163 .type_block
= ARM_TTE_TYPE_BLOCK
166 .size
= ARM_16K_TT_L1_SIZE
,
167 .offmask
= ARM_16K_TT_L1_OFFMASK
,
168 .shift
= ARM_16K_TT_L1_SHIFT
,
169 .index_mask
= ARM_16K_TT_L1_INDEX_MASK
,
170 .valid_mask
= ARM_TTE_VALID
,
171 .type_mask
= ARM_TTE_TYPE_MASK
,
172 .type_block
= ARM_TTE_TYPE_BLOCK
175 .size
= ARM_16K_TT_L2_SIZE
,
176 .offmask
= ARM_16K_TT_L2_OFFMASK
,
177 .shift
= ARM_16K_TT_L2_SHIFT
,
178 .index_mask
= ARM_16K_TT_L2_INDEX_MASK
,
179 .valid_mask
= ARM_TTE_VALID
,
180 .type_mask
= ARM_TTE_TYPE_MASK
,
181 .type_block
= ARM_TTE_TYPE_BLOCK
184 .size
= ARM_16K_TT_L3_SIZE
,
185 .offmask
= ARM_16K_TT_L3_OFFMASK
,
186 .shift
= ARM_16K_TT_L3_SHIFT
,
187 .index_mask
= ARM_16K_TT_L3_INDEX_MASK
,
188 .valid_mask
= ARM_PTE_TYPE_VALID
,
189 .type_mask
= ARM_PTE_TYPE_MASK
,
190 .type_block
= ARM_TTE_TYPE_L3BLOCK
194 const struct page_table_level_info pmap_table_level_info_4k
[] =
197 .size
= ARM_4K_TT_L0_SIZE
,
198 .offmask
= ARM_4K_TT_L0_OFFMASK
,
199 .shift
= ARM_4K_TT_L0_SHIFT
,
200 .index_mask
= ARM_4K_TT_L0_INDEX_MASK
,
201 .valid_mask
= ARM_TTE_VALID
,
202 .type_mask
= ARM_TTE_TYPE_MASK
,
203 .type_block
= ARM_TTE_TYPE_BLOCK
206 .size
= ARM_4K_TT_L1_SIZE
,
207 .offmask
= ARM_4K_TT_L1_OFFMASK
,
208 .shift
= ARM_4K_TT_L1_SHIFT
,
209 .index_mask
= ARM_4K_TT_L1_INDEX_MASK
,
210 .valid_mask
= ARM_TTE_VALID
,
211 .type_mask
= ARM_TTE_TYPE_MASK
,
212 .type_block
= ARM_TTE_TYPE_BLOCK
215 .size
= ARM_4K_TT_L2_SIZE
,
216 .offmask
= ARM_4K_TT_L2_OFFMASK
,
217 .shift
= ARM_4K_TT_L2_SHIFT
,
218 .index_mask
= ARM_4K_TT_L2_INDEX_MASK
,
219 .valid_mask
= ARM_TTE_VALID
,
220 .type_mask
= ARM_TTE_TYPE_MASK
,
221 .type_block
= ARM_TTE_TYPE_BLOCK
224 .size
= ARM_4K_TT_L3_SIZE
,
225 .offmask
= ARM_4K_TT_L3_OFFMASK
,
226 .shift
= ARM_4K_TT_L3_SHIFT
,
227 .index_mask
= ARM_4K_TT_L3_INDEX_MASK
,
228 .valid_mask
= ARM_PTE_TYPE_VALID
,
229 .type_mask
= ARM_PTE_TYPE_MASK
,
230 .type_block
= ARM_TTE_TYPE_L3BLOCK
234 struct page_table_attr
{
235 const struct page_table_level_info
* const pta_level_info
;
236 const struct page_table_ops
* const pta_ops
;
237 const uintptr_t ap_ro
;
238 const uintptr_t ap_rw
;
239 const uintptr_t ap_rona
;
240 const uintptr_t ap_rwna
;
241 const uintptr_t ap_xn
;
242 const uintptr_t ap_x
;
243 const unsigned int pta_root_level
;
244 const unsigned int pta_sharedpage_level
;
245 const unsigned int pta_max_level
;
246 #if __ARM_MIXED_PAGE_SIZE__
247 const uint64_t pta_tcr_value
;
248 #endif /* __ARM_MIXED_PAGE_SIZE__ */
249 const uint64_t pta_page_size
;
250 const uint64_t pta_page_shift
;
253 const struct page_table_attr pmap_pt_attr_4k
= {
254 .pta_level_info
= pmap_table_level_info_4k
,
255 .pta_root_level
= (T0SZ_BOOT
- 16) / 9,
256 #if __ARM_MIXED_PAGE_SIZE__
257 .pta_sharedpage_level
= PMAP_TT_L2_LEVEL
,
258 #else /* __ARM_MIXED_PAGE_SIZE__ */
260 .pta_sharedpage_level
= PMAP_TT_L2_LEVEL
,
261 #else /* __ARM_16K_PG__ */
262 .pta_sharedpage_level
= PMAP_TT_L1_LEVEL
,
263 #endif /* __ARM_16K_PG__ */
264 #endif /* __ARM_MIXED_PAGE_SIZE__ */
265 .pta_max_level
= PMAP_TT_L3_LEVEL
,
266 .pta_ops
= &native_pt_ops
,
267 .ap_ro
= ARM_PTE_AP(AP_RORO
),
268 .ap_rw
= ARM_PTE_AP(AP_RWRW
),
269 .ap_rona
= ARM_PTE_AP(AP_RONA
),
270 .ap_rwna
= ARM_PTE_AP(AP_RWNA
),
271 .ap_xn
= ARM_PTE_PNX
| ARM_PTE_NX
,
273 #if __ARM_MIXED_PAGE_SIZE__
274 .pta_tcr_value
= TCR_EL1_4KB
,
275 #endif /* __ARM_MIXED_PAGE_SIZE__ */
276 .pta_page_size
= 4096,
277 .pta_page_shift
= 12,
280 const struct page_table_attr pmap_pt_attr_16k
= {
281 .pta_level_info
= pmap_table_level_info_16k
,
282 .pta_root_level
= PMAP_TT_L1_LEVEL
,
283 .pta_sharedpage_level
= PMAP_TT_L2_LEVEL
,
284 .pta_max_level
= PMAP_TT_L3_LEVEL
,
285 .pta_ops
= &native_pt_ops
,
286 .ap_ro
= ARM_PTE_AP(AP_RORO
),
287 .ap_rw
= ARM_PTE_AP(AP_RWRW
),
288 .ap_rona
= ARM_PTE_AP(AP_RONA
),
289 .ap_rwna
= ARM_PTE_AP(AP_RWNA
),
290 .ap_xn
= ARM_PTE_PNX
| ARM_PTE_NX
,
292 #if __ARM_MIXED_PAGE_SIZE__
293 .pta_tcr_value
= TCR_EL1_16KB
,
294 #endif /* __ARM_MIXED_PAGE_SIZE__ */
295 .pta_page_size
= 16384,
296 .pta_page_shift
= 14,
300 const struct page_table_attr
* const native_pt_attr
= &pmap_pt_attr_16k
;
301 #else /* !__ARM_16K_PG__ */
302 const struct page_table_attr
* const native_pt_attr
= &pmap_pt_attr_4k
;
303 #endif /* !__ARM_16K_PG__ */
306 #else /* (__ARM_VMSA__ > 7) */
308 * We don't support pmap parameterization for VMSA7, so use an opaque
309 * page_table_attr structure.
311 const struct page_table_attr
* const native_pt_attr
= NULL
;
312 #endif /* (__ARM_VMSA__ > 7) */
314 typedef struct page_table_attr pt_attr_t
;
316 /* Macro for getting pmap attributes; not a function for const propagation. */
317 #if ARM_PARAMETERIZED_PMAP
318 /* The page table attributes are linked to the pmap */
319 #define pmap_get_pt_attr(pmap) ((pmap)->pmap_pt_attr)
320 #define pmap_get_pt_ops(pmap) ((pmap)->pmap_pt_attr->pta_ops)
321 #else /* !ARM_PARAMETERIZED_PMAP */
322 /* The page table attributes are fixed (to allow for const propagation) */
323 #define pmap_get_pt_attr(pmap) (native_pt_attr)
324 #define pmap_get_pt_ops(pmap) (&native_pt_ops)
325 #endif /* !ARM_PARAMETERIZED_PMAP */
327 #if (__ARM_VMSA__ > 7)
328 static inline uint64_t
329 pt_attr_page_size(const pt_attr_t
* const pt_attr
)
331 return pt_attr
->pta_page_size
;
334 __unused
static inline uint64_t
335 pt_attr_ln_size(const pt_attr_t
* const pt_attr
, unsigned int level
)
337 return pt_attr
->pta_level_info
[level
].size
;
340 __unused
static inline uint64_t
341 pt_attr_ln_shift(const pt_attr_t
* const pt_attr
, unsigned int level
)
343 return pt_attr
->pta_level_info
[level
].shift
;
346 static inline uint64_t
347 pt_attr_ln_offmask(const pt_attr_t
* const pt_attr
, unsigned int level
)
349 return pt_attr
->pta_level_info
[level
].offmask
;
352 __unused
static inline uint64_t
353 pt_attr_ln_pt_offmask(const pt_attr_t
* const pt_attr
, unsigned int level
)
355 return pt_attr_ln_offmask(pt_attr
, level
);
358 __unused
static inline uint64_t
359 pt_attr_ln_index_mask(const pt_attr_t
* const pt_attr
, unsigned int level
)
361 return pt_attr
->pta_level_info
[level
].index_mask
;
364 static inline unsigned int
365 pt_attr_twig_level(const pt_attr_t
* const pt_attr
)
367 return pt_attr
->pta_max_level
- 1;
370 static inline unsigned int
371 pt_attr_root_level(const pt_attr_t
* const pt_attr
)
373 return pt_attr
->pta_root_level
;
377 * This is the level at which to copy a pt_entry from the sharedpage_pmap into
378 * the user pmap. Typically L1 for 4K pages, and L2 for 16K pages. In this way,
379 * the sharedpage's L2/L3 page tables are reused in every 4k task, whereas only
380 * the L3 page table is reused in 16K tasks.
382 static inline unsigned int
383 pt_attr_sharedpage_level(const pt_attr_t
* const pt_attr
)
385 return pt_attr
->pta_sharedpage_level
;
388 static __unused
inline uint64_t
389 pt_attr_leaf_size(const pt_attr_t
* const pt_attr
)
391 return pt_attr
->pta_level_info
[pt_attr
->pta_max_level
].size
;
394 static __unused
inline uint64_t
395 pt_attr_leaf_offmask(const pt_attr_t
* const pt_attr
)
397 return pt_attr
->pta_level_info
[pt_attr
->pta_max_level
].offmask
;
400 static inline uint64_t
401 pt_attr_leaf_shift(const pt_attr_t
* const pt_attr
)
403 return pt_attr
->pta_level_info
[pt_attr
->pta_max_level
].shift
;
406 static __unused
inline uint64_t
407 pt_attr_leaf_index_mask(const pt_attr_t
* const pt_attr
)
409 return pt_attr
->pta_level_info
[pt_attr
->pta_max_level
].index_mask
;
412 static inline uint64_t
413 pt_attr_twig_size(const pt_attr_t
* const pt_attr
)
415 return pt_attr
->pta_level_info
[pt_attr
->pta_max_level
- 1].size
;
418 static inline uint64_t
419 pt_attr_twig_offmask(const pt_attr_t
* const pt_attr
)
421 return pt_attr
->pta_level_info
[pt_attr
->pta_max_level
- 1].offmask
;
424 static inline uint64_t
425 pt_attr_twig_shift(const pt_attr_t
* const pt_attr
)
427 return pt_attr
->pta_level_info
[pt_attr
->pta_max_level
- 1].shift
;
430 static __unused
inline uint64_t
431 pt_attr_twig_index_mask(const pt_attr_t
* const pt_attr
)
433 return pt_attr
->pta_level_info
[pt_attr
->pta_max_level
- 1].index_mask
;
436 static inline uint64_t
437 pt_attr_leaf_table_size(const pt_attr_t
* const pt_attr
)
439 return pt_attr_twig_size(pt_attr
);
442 static inline uint64_t
443 pt_attr_leaf_table_offmask(const pt_attr_t
* const pt_attr
)
445 return pt_attr_twig_offmask(pt_attr
);
448 static inline uintptr_t
449 pt_attr_leaf_rw(const pt_attr_t
* const pt_attr
)
451 return pt_attr
->ap_rw
;
454 static inline uintptr_t
455 pt_attr_leaf_ro(const pt_attr_t
* const pt_attr
)
457 return pt_attr
->ap_ro
;
460 static inline uintptr_t
461 pt_attr_leaf_rona(const pt_attr_t
* const pt_attr
)
463 return pt_attr
->ap_rona
;
466 static inline uintptr_t
467 pt_attr_leaf_rwna(const pt_attr_t
* const pt_attr
)
469 return pt_attr
->ap_rwna
;
472 static inline uintptr_t
473 pt_attr_leaf_xn(const pt_attr_t
* const pt_attr
)
475 return pt_attr
->ap_xn
;
478 static inline uintptr_t
479 pt_attr_leaf_x(const pt_attr_t
* const pt_attr
)
481 return pt_attr
->ap_x
;
484 #else /* (__ARM_VMSA__ > 7) */
485 static inline uint64_t
486 pt_attr_page_size(__unused
const pt_attr_t
* const pt_attr
)
491 __unused
static inline unsigned int
492 pt_attr_root_level(__unused
const pt_attr_t
* const pt_attr
)
494 return PMAP_TT_L1_LEVEL
;
497 __unused
static inline unsigned int
498 pt_attr_sharedpage_level(__unused
const pt_attr_t
* const pt_attr
)
500 return PMAP_TT_L1_LEVEL
;
503 static inline unsigned int
504 pt_attr_twig_level(__unused
const pt_attr_t
* const pt_attr
)
506 return PMAP_TT_L1_LEVEL
;
509 static inline uint64_t
510 pt_attr_twig_size(__unused
const pt_attr_t
* const pt_attr
)
512 return ARM_TT_TWIG_SIZE
;
515 static inline uint64_t
516 pt_attr_twig_offmask(__unused
const pt_attr_t
* const pt_attr
)
518 return ARM_TT_TWIG_OFFMASK
;
521 static inline uint64_t
522 pt_attr_twig_shift(__unused
const pt_attr_t
* const pt_attr
)
524 return ARM_TT_TWIG_SHIFT
;
527 static __unused
inline uint64_t
528 pt_attr_twig_index_mask(__unused
const pt_attr_t
* const pt_attr
)
530 return ARM_TT_TWIG_INDEX_MASK
;
533 __unused
static inline uint64_t
534 pt_attr_leaf_size(__unused
const pt_attr_t
* const pt_attr
)
536 return ARM_TT_LEAF_SIZE
;
539 __unused
static inline uint64_t
540 pt_attr_leaf_offmask(__unused
const pt_attr_t
* const pt_attr
)
542 return ARM_TT_LEAF_OFFMASK
;
545 static inline uint64_t
546 pt_attr_leaf_shift(__unused
const pt_attr_t
* const pt_attr
)
548 return ARM_TT_LEAF_SHIFT
;
551 static __unused
inline uint64_t
552 pt_attr_leaf_index_mask(__unused
const pt_attr_t
* const pt_attr
)
554 return ARM_TT_LEAF_INDEX_MASK
;
557 static inline uint64_t
558 pt_attr_leaf_table_size(__unused
const pt_attr_t
* const pt_attr
)
560 return ARM_TT_L1_PT_SIZE
;
563 static inline uint64_t
564 pt_attr_leaf_table_offmask(__unused
const pt_attr_t
* const pt_attr
)
566 return ARM_TT_L1_PT_OFFMASK
;
569 static inline uintptr_t
570 pt_attr_leaf_rw(__unused
const pt_attr_t
* const pt_attr
)
572 return ARM_PTE_AP(AP_RWRW
);
575 static inline uintptr_t
576 pt_attr_leaf_ro(__unused
const pt_attr_t
* const pt_attr
)
578 return ARM_PTE_AP(AP_RORO
);
581 static inline uintptr_t
582 pt_attr_leaf_rona(__unused
const pt_attr_t
* const pt_attr
)
584 return ARM_PTE_AP(AP_RONA
);
587 static inline uintptr_t
588 pt_attr_leaf_rwna(__unused
const pt_attr_t
* const pt_attr
)
590 return ARM_PTE_AP(AP_RWNA
);
593 static inline uintptr_t
594 pt_attr_leaf_xn(__unused
const pt_attr_t
* const pt_attr
)
599 static inline uintptr_t
600 pt_attr_leaf_x(__unused
const pt_attr_t
* const pt_attr
)
605 __unused
static inline uintptr_t
606 pt_attr_ln_offmask(__unused
const pt_attr_t
* const pt_attr
, unsigned int level
)
608 if (level
== PMAP_TT_L1_LEVEL
) {
609 return ARM_TT_L1_OFFMASK
;
610 } else if (level
== PMAP_TT_L2_LEVEL
) {
611 return ARM_TT_L2_OFFMASK
;
617 static inline uintptr_t
618 pt_attr_ln_pt_offmask(__unused
const pt_attr_t
* const pt_attr
, unsigned int level
)
620 if (level
== PMAP_TT_L1_LEVEL
) {
621 return ARM_TT_L1_PT_OFFMASK
;
622 } else if (level
== PMAP_TT_L2_LEVEL
) {
623 return ARM_TT_L2_OFFMASK
;
629 #endif /* (__ARM_VMSA__ > 7) */
631 static inline unsigned int
632 pt_attr_leaf_level(const pt_attr_t
* const pt_attr
)
634 return pt_attr_twig_level(pt_attr
) + 1;
639 pmap_sync_tlb(bool strong __unused
)
645 int vm_footprint_suspend_allowed
= 1;
647 extern int pmap_ledgers_panic
;
648 extern int pmap_ledgers_panic_leeway
;
650 int pmap_stats_assert
= 1;
651 #define PMAP_STATS_ASSERTF(cond, pmap, fmt, ...) \
653 if (pmap_stats_assert && (pmap)->pmap_stats_assert) \
654 assertf(cond, fmt, ##__VA_ARGS__); \
656 #else /* MACH_ASSERT */
657 #define PMAP_STATS_ASSERTF(cond, pmap, fmt, ...)
658 #endif /* MACH_ASSERT */
660 #if DEVELOPMENT || DEBUG
661 #define PMAP_FOOTPRINT_SUSPENDED(pmap) \
662 (current_thread()->pmap_footprint_suspended)
663 #else /* DEVELOPMENT || DEBUG */
664 #define PMAP_FOOTPRINT_SUSPENDED(pmap) (FALSE)
665 #endif /* DEVELOPMENT || DEBUG */
668 #ifdef PLATFORM_BridgeOS
669 static struct pmap_legacy_trust_cache
*pmap_legacy_trust_caches MARK_AS_PMAP_DATA
= NULL
;
671 static struct pmap_image4_trust_cache
*pmap_image4_trust_caches MARK_AS_PMAP_DATA
= NULL
;
673 MARK_AS_PMAP_DATA
SIMPLE_LOCK_DECLARE(pmap_loaded_trust_caches_lock
, 0);
677 * Represents a tlb range that will be flushed before exiting
679 * Used by phys_attribute_clear_range to defer flushing pages in
680 * this range until the end of the operation.
682 typedef struct pmap_tlb_flush_range
{
684 vm_map_address_t ptfr_start
;
685 vm_map_address_t ptfr_end
;
686 bool ptfr_flush_needed
;
687 } pmap_tlb_flush_range_t
;
691 * PPL External References.
693 extern vm_offset_t segPPLDATAB
;
694 extern unsigned long segSizePPLDATA
;
695 extern vm_offset_t segPPLTEXTB
;
696 extern unsigned long segSizePPLTEXT
;
697 extern vm_offset_t segPPLDATACONSTB
;
698 extern unsigned long segSizePPLDATACONST
;
702 * PPL Global Variables
705 #if (DEVELOPMENT || DEBUG) || CONFIG_CSR_FROM_DT
706 /* Indicates if the PPL will enforce mapping policies; set by -unsafe_kernel_text */
707 SECURITY_READ_ONLY_LATE(boolean_t
) pmap_ppl_disable
= FALSE
;
709 const boolean_t pmap_ppl_disable
= FALSE
;
712 /* Indicates if the PPL has started applying APRR. */
713 boolean_t pmap_ppl_locked_down MARK_AS_PMAP_DATA
= FALSE
;
716 * The PPL cannot invoke the kernel in order to allocate memory, so we must
717 * maintain a list of free pages that the PPL owns. The kernel can give the PPL
720 MARK_AS_PMAP_DATA
SIMPLE_LOCK_DECLARE(pmap_ppl_free_page_lock
, 0);
721 void ** pmap_ppl_free_page_list MARK_AS_PMAP_DATA
= NULL
;
722 uint64_t pmap_ppl_free_page_count MARK_AS_PMAP_DATA
= 0;
723 uint64_t pmap_ppl_pages_returned_to_kernel_count_total
= 0;
725 struct pmap_cpu_data_array_entry pmap_cpu_data_array
[MAX_CPUS
] MARK_AS_PMAP_DATA
= {0};
727 extern void *pmap_stacks_start
;
728 extern void *pmap_stacks_end
;
729 SECURITY_READ_ONLY_LATE(pmap_paddr_t
) pmap_stacks_start_pa
= 0;
730 SECURITY_READ_ONLY_LATE(pmap_paddr_t
) pmap_stacks_end_pa
= 0;
731 SECURITY_READ_ONLY_LATE(pmap_paddr_t
) ppl_cpu_save_area_start
= 0;
732 SECURITY_READ_ONLY_LATE(pmap_paddr_t
) ppl_cpu_save_area_end
= 0;
734 /* Allocation data/locks for pmap structures. */
736 MARK_AS_PMAP_DATA
SIMPLE_LOCK_DECLARE(pmap_free_list_lock
, 0);
738 SECURITY_READ_ONLY_LATE(unsigned long) pmap_array_count
= 0;
739 SECURITY_READ_ONLY_LATE(void *) pmap_array_begin
= NULL
;
740 SECURITY_READ_ONLY_LATE(void *) pmap_array_end
= NULL
;
741 SECURITY_READ_ONLY_LATE(pmap_t
) pmap_array
= NULL
;
742 pmap_t pmap_free_list MARK_AS_PMAP_DATA
= NULL
;
744 /* Allocation data/locks/structs for task ledger structures. */
745 #define PMAP_LEDGER_DATA_BYTES \
746 (((sizeof(task_ledgers) / sizeof(int)) * sizeof(struct ledger_entry)) + sizeof(struct ledger))
749 * Maximum number of ledgers allowed are maximum number of tasks
750 * allowed on system plus some more i.e. ~10% of total tasks = 200.
752 #define MAX_PMAP_LEDGERS (pmap_max_asids + 200)
753 #define PMAP_ARRAY_SIZE (pmap_max_asids)
755 typedef struct pmap_ledger_data
{
756 char pld_data
[PMAP_LEDGER_DATA_BYTES
];
757 } pmap_ledger_data_t
;
759 typedef struct pmap_ledger
{
761 struct pmap_ledger_data ple_data
;
762 struct pmap_ledger
* next
;
765 struct pmap_ledger
** back_ptr
;
768 SECURITY_READ_ONLY_LATE(bool) pmap_ledger_alloc_initialized
= false;
769 MARK_AS_PMAP_DATA
SIMPLE_LOCK_DECLARE(pmap_ledger_lock
, 0);
770 SECURITY_READ_ONLY_LATE(void *) pmap_ledger_refcnt_begin
= NULL
;
771 SECURITY_READ_ONLY_LATE(void *) pmap_ledger_refcnt_end
= NULL
;
772 SECURITY_READ_ONLY_LATE(os_refcnt_t
*) pmap_ledger_refcnt
= NULL
;
773 SECURITY_READ_ONLY_LATE(void *) pmap_ledger_ptr_array_begin
= NULL
;
774 SECURITY_READ_ONLY_LATE(void *) pmap_ledger_ptr_array_end
= NULL
;
775 SECURITY_READ_ONLY_LATE(pmap_ledger_t
* *) pmap_ledger_ptr_array
= NULL
;
776 uint64_t pmap_ledger_ptr_array_free_index MARK_AS_PMAP_DATA
= 0;
777 pmap_ledger_t
* pmap_ledger_free_list MARK_AS_PMAP_DATA
= NULL
;
779 #define pmap_ledger_debit(p, e, a) ledger_debit_nocheck((p)->ledger, e, a)
780 #define pmap_ledger_credit(p, e, a) ledger_credit_nocheck((p)->ledger, e, a)
783 pmap_check_ledger_fields(ledger_t ledger
)
785 if (ledger
== NULL
) {
789 thread_t cur_thread
= current_thread();
790 ledger_check_new_balance(cur_thread
, ledger
, task_ledgers
.alternate_accounting
);
791 ledger_check_new_balance(cur_thread
, ledger
, task_ledgers
.alternate_accounting_compressed
);
792 ledger_check_new_balance(cur_thread
, ledger
, task_ledgers
.internal
);
793 ledger_check_new_balance(cur_thread
, ledger
, task_ledgers
.internal_compressed
);
794 ledger_check_new_balance(cur_thread
, ledger
, task_ledgers
.page_table
);
795 ledger_check_new_balance(cur_thread
, ledger
, task_ledgers
.phys_footprint
);
796 ledger_check_new_balance(cur_thread
, ledger
, task_ledgers
.phys_mem
);
797 ledger_check_new_balance(cur_thread
, ledger
, task_ledgers
.tkm_private
);
798 ledger_check_new_balance(cur_thread
, ledger
, task_ledgers
.wired_mem
);
801 #define pmap_ledger_check_balance(p) pmap_check_ledger_fields((p)->ledger)
803 #else /* XNU_MONITOR */
805 #define pmap_ledger_debit(p, e, a) ledger_debit((p)->ledger, e, a)
806 #define pmap_ledger_credit(p, e, a) ledger_credit((p)->ledger, e, a)
808 #endif /* !XNU_MONITOR */
811 /* Virtual memory region for early allocation */
812 #if (__ARM_VMSA__ == 7)
813 #define VREGION1_HIGH_WINDOW (0)
815 #define VREGION1_HIGH_WINDOW (PE_EARLY_BOOT_VA)
817 #define VREGION1_START ((VM_MAX_KERNEL_ADDRESS & CPUWINDOWS_BASE_MASK) - VREGION1_HIGH_WINDOW)
818 #define VREGION1_SIZE (trunc_page(VM_MAX_KERNEL_ADDRESS - (VREGION1_START)))
820 extern uint8_t bootstrap_pagetables
[];
822 extern unsigned int not_in_kdp
;
824 extern vm_offset_t first_avail
;
826 extern pmap_paddr_t avail_start
;
827 extern pmap_paddr_t avail_end
;
829 extern vm_offset_t virtual_space_start
; /* Next available kernel VA */
830 extern vm_offset_t virtual_space_end
; /* End of kernel address space */
831 extern vm_offset_t static_memory_end
;
833 extern const vm_map_address_t physmap_base
;
834 extern const vm_map_address_t physmap_end
;
836 extern int maxproc
, hard_maxproc
;
838 vm_address_t MARK_AS_PMAP_DATA image4_slab
= 0;
840 #if (__ARM_VMSA__ > 7)
841 /* The number of address bits one TTBR can cover. */
842 #define PGTABLE_ADDR_BITS (64ULL - T0SZ_BOOT)
845 * The bounds on our TTBRs. These are for sanity checking that
846 * an address is accessible by a TTBR before we attempt to map it.
848 #define ARM64_TTBR0_MIN_ADDR (0ULL)
849 #define ARM64_TTBR0_MAX_ADDR (0ULL + (1ULL << PGTABLE_ADDR_BITS) - 1)
850 #define ARM64_TTBR1_MIN_ADDR (0ULL - (1ULL << PGTABLE_ADDR_BITS))
851 #define ARM64_TTBR1_MAX_ADDR (~0ULL)
853 /* The level of the root of a page table. */
854 const uint64_t arm64_root_pgtable_level
= (3 - ((PGTABLE_ADDR_BITS
- 1 - ARM_PGSHIFT
) / (ARM_PGSHIFT
- TTE_SHIFT
)));
856 /* The number of entries in the root TT of a page table. */
857 const uint64_t arm64_root_pgtable_num_ttes
= (2 << ((PGTABLE_ADDR_BITS
- 1 - ARM_PGSHIFT
) % (ARM_PGSHIFT
- TTE_SHIFT
)));
859 const uint64_t arm64_root_pgtable_level
= 0;
860 const uint64_t arm64_root_pgtable_num_ttes
= 0;
863 struct pmap kernel_pmap_store MARK_AS_PMAP_DATA
;
864 SECURITY_READ_ONLY_LATE(pmap_t
) kernel_pmap
= &kernel_pmap_store
;
866 struct vm_object pmap_object_store VM_PAGE_PACKED_ALIGNED
; /* store pt pages */
867 SECURITY_READ_ONLY_LATE(vm_object_t
) pmap_object
= &pmap_object_store
;
869 static SECURITY_READ_ONLY_LATE(zone_t
) pmap_zone
; /* zone of pmap structures */
871 MARK_AS_PMAP_DATA
SIMPLE_LOCK_DECLARE(pmaps_lock
, 0);
872 MARK_AS_PMAP_DATA
SIMPLE_LOCK_DECLARE(tt1_lock
, 0);
873 unsigned int pmap_stamp MARK_AS_PMAP_DATA
;
874 queue_head_t map_pmap_list MARK_AS_PMAP_DATA
;
876 MARK_AS_PMAP_DATA
SIMPLE_LOCK_DECLARE(pt_pages_lock
, 0);
877 queue_head_t pt_page_list MARK_AS_PMAP_DATA
; /* pt page ptd entries list */
879 MARK_AS_PMAP_DATA
SIMPLE_LOCK_DECLARE(pmap_pages_lock
, 0);
881 typedef struct page_free_entry
{
882 struct page_free_entry
*next
;
885 #define PAGE_FREE_ENTRY_NULL ((page_free_entry_t *) 0)
887 page_free_entry_t
*pmap_pages_reclaim_list MARK_AS_PMAP_DATA
; /* Reclaimed pt page list */
888 unsigned int pmap_pages_request_count MARK_AS_PMAP_DATA
; /* Pending requests to reclaim pt page */
889 unsigned long long pmap_pages_request_acum MARK_AS_PMAP_DATA
;
892 typedef struct tt_free_entry
{
893 struct tt_free_entry
*next
;
896 #define TT_FREE_ENTRY_NULL ((tt_free_entry_t *) 0)
898 tt_free_entry_t
*free_page_size_tt_list MARK_AS_PMAP_DATA
;
899 unsigned int free_page_size_tt_count MARK_AS_PMAP_DATA
;
900 unsigned int free_page_size_tt_max MARK_AS_PMAP_DATA
;
901 #define FREE_PAGE_SIZE_TT_MAX 4
902 tt_free_entry_t
*free_two_page_size_tt_list MARK_AS_PMAP_DATA
;
903 unsigned int free_two_page_size_tt_count MARK_AS_PMAP_DATA
;
904 unsigned int free_two_page_size_tt_max MARK_AS_PMAP_DATA
;
905 #define FREE_TWO_PAGE_SIZE_TT_MAX 4
906 tt_free_entry_t
*free_tt_list MARK_AS_PMAP_DATA
;
907 unsigned int free_tt_count MARK_AS_PMAP_DATA
;
908 unsigned int free_tt_max MARK_AS_PMAP_DATA
;
910 #define TT_FREE_ENTRY_NULL ((tt_free_entry_t *) 0)
912 boolean_t pmap_gc_allowed MARK_AS_PMAP_DATA
= TRUE
;
913 boolean_t pmap_gc_forced MARK_AS_PMAP_DATA
= FALSE
;
914 boolean_t pmap_gc_allowed_by_time_throttle
= TRUE
;
916 unsigned int inuse_user_ttepages_count MARK_AS_PMAP_DATA
= 0; /* non-root, non-leaf user pagetable pages, in units of PAGE_SIZE */
917 unsigned int inuse_user_ptepages_count MARK_AS_PMAP_DATA
= 0; /* leaf user pagetable pages, in units of PAGE_SIZE */
918 unsigned int inuse_user_tteroot_count MARK_AS_PMAP_DATA
= 0; /* root user pagetables, in units of PMAP_ROOT_ALLOC_SIZE */
919 unsigned int inuse_kernel_ttepages_count MARK_AS_PMAP_DATA
= 0; /* non-root, non-leaf kernel pagetable pages, in units of PAGE_SIZE */
920 unsigned int inuse_kernel_ptepages_count MARK_AS_PMAP_DATA
= 0; /* leaf kernel pagetable pages, in units of PAGE_SIZE */
921 unsigned int inuse_kernel_tteroot_count MARK_AS_PMAP_DATA
= 0; /* root kernel pagetables, in units of PMAP_ROOT_ALLOC_SIZE */
922 unsigned int inuse_pmap_pages_count
= 0; /* debugging */
924 SECURITY_READ_ONLY_LATE(tt_entry_t
*) invalid_tte
= 0;
925 SECURITY_READ_ONLY_LATE(pmap_paddr_t
) invalid_ttep
= 0;
927 SECURITY_READ_ONLY_LATE(tt_entry_t
*) cpu_tte
= 0; /* set by arm_vm_init() - keep out of bss */
928 SECURITY_READ_ONLY_LATE(pmap_paddr_t
) cpu_ttep
= 0; /* set by arm_vm_init() - phys tte addr */
930 #if DEVELOPMENT || DEBUG
931 int nx_enabled
= 1; /* enable no-execute protection */
932 int allow_data_exec
= 0; /* No apps may execute data */
933 int allow_stack_exec
= 0; /* No apps may execute from the stack */
934 unsigned long pmap_asid_flushes MARK_AS_PMAP_DATA
= 0;
935 unsigned long pmap_asid_hits MARK_AS_PMAP_DATA
= 0;
936 unsigned long pmap_asid_misses MARK_AS_PMAP_DATA
= 0;
937 #else /* DEVELOPMENT || DEBUG */
938 const int nx_enabled
= 1; /* enable no-execute protection */
939 const int allow_data_exec
= 0; /* No apps may execute data */
940 const int allow_stack_exec
= 0; /* No apps may execute from the stack */
941 #endif /* DEVELOPMENT || DEBUG */
944 * This variable is set true during hibernation entry to protect pmap data structures
945 * during image copying, and reset false on hibernation exit.
947 bool hib_entry_pmap_lockdown MARK_AS_PMAP_DATA
= false;
949 /* Macro used to ensure that pmap data structures aren't modified during hibernation image copying. */
951 #define ASSERT_NOT_HIBERNATING() (assertf(!hib_entry_pmap_lockdown, \
952 "Attempted to modify PMAP data structures after hibernation image copying has begun."))
954 #define ASSERT_NOT_HIBERNATING()
955 #endif /* HIBERNATION */
957 #define PV_ENTRY_NULL ((pv_entry_t *) 0)
961 * We use the least significant bit of the "pve_next" pointer in a "pv_entry"
962 * as a marker for pages mapped through an "alternate accounting" mapping.
963 * These macros set, clear and test for this marker and extract the actual
964 * value of the "pve_next" pointer.
966 #define PVE_NEXT_ALTACCT ((uintptr_t) 0x1)
967 #define PVE_NEXT_SET_ALTACCT(pve_next_p) \
968 *(pve_next_p) = (struct pv_entry *) (((uintptr_t) *(pve_next_p)) | \
970 #define PVE_NEXT_CLR_ALTACCT(pve_next_p) \
971 *(pve_next_p) = (struct pv_entry *) (((uintptr_t) *(pve_next_p)) & \
973 #define PVE_NEXT_IS_ALTACCT(pve_next) \
974 ((((uintptr_t) (pve_next)) & PVE_NEXT_ALTACCT) ? TRUE : FALSE)
975 #define PVE_NEXT_PTR(pve_next) \
976 ((struct pv_entry *)(((uintptr_t) (pve_next)) & \
979 static void pmap_check_ledgers(pmap_t pmap
);
982 pmap_check_ledgers(__unused pmap_t pmap
)
985 #endif /* MACH_ASSERT */
987 SECURITY_READ_ONLY_LATE(pv_entry_t
* *) pv_head_table
; /* array of pv entry pointers */
989 pv_free_list_t pv_free MARK_AS_PMAP_DATA
= {0};
990 pv_free_list_t pv_kern_free MARK_AS_PMAP_DATA
= {0};
991 MARK_AS_PMAP_DATA
SIMPLE_LOCK_DECLARE(pv_free_list_lock
, 0);
992 MARK_AS_PMAP_DATA
SIMPLE_LOCK_DECLARE(pv_kern_free_list_lock
, 0);
994 SIMPLE_LOCK_DECLARE(phys_backup_lock
, 0);
997 * pt_desc - structure to keep info on page assigned to page tables
999 #if (__ARM_VMSA__ == 7)
1000 #define PT_INDEX_MAX 1
1001 #else /* (__ARM_VMSA__ != 7) */
1003 #if __ARM_MIXED_PAGE_SIZE__
1004 #define PT_INDEX_MAX (ARM_PGBYTES / 4096)
1005 #elif (ARM_PGSHIFT == 14)
1006 #define PT_INDEX_MAX 1
1007 #elif (ARM_PGSHIFT == 12)
1008 #define PT_INDEX_MAX 4
1010 #error Unsupported ARM_PGSHIFT
1011 #endif /* (ARM_PGSHIFT != 14) */
1013 #endif /* (__ARM_VMSA__ != 7) */
1015 #define PT_DESC_REFCOUNT 0x4000U
1016 #define PT_DESC_IOMMU_REFCOUNT 0x8000U
1020 * For non-leaf pagetables, should always be PT_DESC_REFCOUNT
1021 * For leaf pagetables, should reflect the number of non-empty PTEs
1022 * For IOMMU pages, should always be PT_DESC_IOMMU_REFCOUNT
1024 unsigned short refcnt
;
1026 * For non-leaf pagetables, should be 0
1027 * For leaf pagetables, should reflect the number of wired entries
1028 * For IOMMU pages, may optionally reflect a driver-defined refcount (IOMMU operations are implicitly wired)
1030 unsigned short wiredcnt
;
1034 typedef struct pt_desc
{
1035 queue_chain_t pt_page
;
1039 ptd_info_t ptd_info
[PT_INDEX_MAX
];
1043 #define PTD_ENTRY_NULL ((pt_desc_t *) 0)
1045 SECURITY_READ_ONLY_LATE(pt_desc_t
*) ptd_root_table
;
1047 pt_desc_t
*ptd_free_list MARK_AS_PMAP_DATA
= PTD_ENTRY_NULL
;
1048 SECURITY_READ_ONLY_LATE(boolean_t
) ptd_preboot
= TRUE
;
1049 unsigned int ptd_free_count MARK_AS_PMAP_DATA
= 0;
1050 decl_simple_lock_data(, ptd_free_list_lock MARK_AS_PMAP_DATA
);
1053 * physical page attribute
1055 typedef u_int16_t pp_attr_t
;
1057 #define PP_ATTR_WIMG_MASK 0x003F
1058 #define PP_ATTR_WIMG(x) ((x) & PP_ATTR_WIMG_MASK)
1060 #define PP_ATTR_REFERENCED 0x0040
1061 #define PP_ATTR_MODIFIED 0x0080
1063 #define PP_ATTR_INTERNAL 0x0100
1064 #define PP_ATTR_REUSABLE 0x0200
1065 #define PP_ATTR_ALTACCT 0x0400
1066 #define PP_ATTR_NOENCRYPT 0x0800
1068 #define PP_ATTR_REFFAULT 0x1000
1069 #define PP_ATTR_MODFAULT 0x2000
1073 * Denotes that a page is owned by the PPL. This is modified/checked with the
1074 * PVH lock held, to avoid ownership related races. This does not need to be a
1075 * PP_ATTR bit (as we have the lock), but for now this is a convenient place to
1078 #define PP_ATTR_MONITOR 0x4000
1081 * Denotes that a page *cannot* be owned by the PPL. This is required in order
1082 * to temporarily 'pin' kernel pages that are used to store PPL output parameters.
1083 * Otherwise a malicious or buggy caller could pass PPL-owned memory for these
1084 * parameters and in so doing stage a write gadget against the PPL.
1086 #define PP_ATTR_NO_MONITOR 0x8000
1089 * All of the bits owned by the PPL; kernel requests to set or clear these bits
1092 #define PP_ATTR_PPL_OWNED_BITS (PP_ATTR_MONITOR | PP_ATTR_NO_MONITOR)
1095 SECURITY_READ_ONLY_LATE(volatile pp_attr_t
*) pp_attr_table
;
1098 * The layout of this structure needs to map 1-to-1 with the pmap-io-range device
1099 * tree nodes. Astris (through the LowGlobals) also depends on the consistency
1100 * of this structure.
1102 typedef struct pmap_io_range
{
1105 #define PMAP_IO_RANGE_STRONG_SYNC (1UL << 31) // Strong DSB required for pages in this range
1106 #define PMAP_IO_RANGE_CARVEOUT (1UL << 30) // Corresponds to memory carved out by bootloader
1107 #define PMAP_IO_RANGE_NEEDS_HIBERNATING (1UL << 29) // Pages in this range need to be included in the hibernation image
1108 uint32_t wimg
; // lower 16 bits treated as pp_attr_t, upper 16 bits contain additional mapping flags
1109 uint32_t signature
; // 4CC
1110 } __attribute__((packed
)) pmap_io_range_t
;
1112 SECURITY_READ_ONLY_LATE(pmap_io_range_t
*) io_attr_table
= (pmap_io_range_t
*)0;
1114 SECURITY_READ_ONLY_LATE(pmap_paddr_t
) vm_first_phys
= (pmap_paddr_t
) 0;
1115 SECURITY_READ_ONLY_LATE(pmap_paddr_t
) vm_last_phys
= (pmap_paddr_t
) 0;
1117 SECURITY_READ_ONLY_LATE(unsigned int) num_io_rgns
= 0;
1119 SECURITY_READ_ONLY_LATE(boolean_t
) pmap_initialized
= FALSE
; /* Has pmap_init completed? */
1121 SECURITY_READ_ONLY_LATE(vm_map_offset_t
) arm_pmap_max_offset_default
= 0x0;
1122 #if defined(__arm64__)
1123 # ifdef XNU_TARGET_OS_OSX
1124 SECURITY_READ_ONLY_LATE(vm_map_offset_t
) arm64_pmap_max_offset_default
= MACH_VM_MAX_ADDRESS
;
1126 SECURITY_READ_ONLY_LATE(vm_map_offset_t
) arm64_pmap_max_offset_default
= 0x0;
1128 #endif /* __arm64__ */
1130 #if PMAP_PANIC_DEV_WIMG_ON_MANAGED && (DEVELOPMENT || DEBUG)
1131 SECURITY_READ_ONLY_LATE(boolean_t
) pmap_panic_dev_wimg_on_managed
= TRUE
;
1133 SECURITY_READ_ONLY_LATE(boolean_t
) pmap_panic_dev_wimg_on_managed
= FALSE
;
1136 MARK_AS_PMAP_DATA
SIMPLE_LOCK_DECLARE(asid_lock
, 0);
1137 SECURITY_READ_ONLY_LATE(static uint32_t) pmap_max_asids
= 0;
1138 SECURITY_READ_ONLY_LATE(int) pmap_asid_plru
= 1;
1139 SECURITY_READ_ONLY_LATE(uint16_t) asid_chunk_size
= 0;
1140 SECURITY_READ_ONLY_LATE(static bitmap_t
*) asid_bitmap
;
1141 static bitmap_t asid_plru_bitmap
[BITMAP_LEN(MAX_HW_ASIDS
)] MARK_AS_PMAP_DATA
;
1142 static uint64_t asid_plru_generation
[BITMAP_LEN(MAX_HW_ASIDS
)] MARK_AS_PMAP_DATA
= {0};
1143 static uint64_t asid_plru_gencount MARK_AS_PMAP_DATA
= 0;
1146 #if (__ARM_VMSA__ > 7)
1147 #if __ARM_MIXED_PAGE_SIZE__
1148 SECURITY_READ_ONLY_LATE(pmap_t
) sharedpage_pmap_4k
;
1150 SECURITY_READ_ONLY_LATE(pmap_t
) sharedpage_pmap_default
;
1155 * We define our target as 8 pages; enough for 2 page table pages, a PTD page,
1156 * and a PV page; in essence, twice as many pages as may be necessary to satisfy
1157 * a single pmap_enter request.
1159 #define PMAP_MIN_FREE_PPL_PAGES 8
1162 #define pa_index(pa) \
1163 (atop((pa) - vm_first_phys))
1165 #define pai_to_pvh(pai) \
1166 (&pv_head_table[pai])
1168 #define pa_valid(x) \
1169 ((x) >= vm_first_phys && (x) < vm_last_phys)
1171 /* PTE Define Macros */
1173 #define pte_is_wired(pte) \
1174 (((pte) & ARM_PTE_WIRED_MASK) == ARM_PTE_WIRED)
1176 #define pte_was_writeable(pte) \
1177 (((pte) & ARM_PTE_WRITEABLE) == ARM_PTE_WRITEABLE)
1179 #define pte_set_was_writeable(pte, was_writeable) \
1181 if ((was_writeable)) { \
1182 (pte) |= ARM_PTE_WRITEABLE; \
1184 (pte) &= ~ARM_PTE_WRITEABLE; \
1188 /* PVE Define Macros */
1190 #define pve_next(pve) \
1193 #define pve_link_field(pve) \
1196 #define pve_link(pp, e) \
1197 ((pve_next(e) = pve_next(pp)), (pve_next(pp) = (e)))
1199 #define pve_unlink(pp, e) \
1200 (pve_next(pp) = pve_next(e))
1202 /* bits held in the ptep pointer field */
1204 #define pve_get_ptep(pve) \
1207 #define pve_set_ptep(pve, ptep_new) \
1209 (pve)->pve_ptep = (ptep_new); \
1212 /* PTEP Define Macros */
1214 /* mask for page descriptor index */
1215 #define ARM_TT_PT_INDEX_MASK ARM_PGMASK
1217 #if (__ARM_VMSA__ == 7)
1220 * Shift value used for reconstructing the virtual address for a PTE.
1222 #define ARM_TT_PT_ADDR_SHIFT (10U)
1224 #define ptep_get_pmap(ptep) \
1225 ((((pt_desc_t *) (pvh_list(pai_to_pvh(pa_index(ml_static_vtop((((vm_offset_t)(ptep) & ~ARM_PGMASK))))))))->pmap))
1229 #if (ARM_PGSHIFT == 12)
1231 * Shift value used for reconstructing the virtual address for a PTE.
1233 #define ARM_TT_PT_ADDR_SHIFT (9ULL)
1237 * Shift value used for reconstructing the virtual address for a PTE.
1239 #define ARM_TT_PT_ADDR_SHIFT (11ULL)
1242 #define ptep_get_pmap(ptep) \
1243 ((((pt_desc_t *) (pvh_list(pai_to_pvh(pa_index(ml_static_vtop((((vm_offset_t)(ptep) & ~ARM_PGMASK))))))))->pmap))
1247 #define ptep_get_ptd(ptep) \
1248 ((struct pt_desc *)(pvh_list(pai_to_pvh(pa_index(ml_static_vtop((vm_offset_t)(ptep)))))))
1251 /* PVH Define Macros */
1254 #define PVH_TYPE_NULL 0x0UL
1255 #define PVH_TYPE_PVEP 0x1UL
1256 #define PVH_TYPE_PTEP 0x2UL
1257 #define PVH_TYPE_PTDP 0x3UL
1259 #define PVH_TYPE_MASK (0x3UL)
1263 /* All flags listed below are stored in the PV head pointer unless otherwise noted */
1264 #define PVH_FLAG_IOMMU 0x4UL /* Stored in each PTE, or in PV head for single-PTE PV heads */
1265 #define PVH_FLAG_IOMMU_TABLE (1ULL << 63) /* Stored in each PTE, or in PV head for single-PTE PV heads */
1266 #define PVH_FLAG_CPU (1ULL << 62)
1267 #define PVH_LOCK_BIT 61
1268 #define PVH_FLAG_LOCK (1ULL << PVH_LOCK_BIT)
1269 #define PVH_FLAG_EXEC (1ULL << 60)
1270 #define PVH_FLAG_LOCKDOWN (1ULL << 59)
1271 #define PVH_FLAG_HASHED (1ULL << 58) /* Used to mark that a page has been hashed into the hibernation image. */
1272 #define PVH_HIGH_FLAGS (PVH_FLAG_CPU | PVH_FLAG_LOCK | PVH_FLAG_EXEC | PVH_FLAG_LOCKDOWN | PVH_FLAG_HASHED)
1274 #else /* !__arm64__ */
1276 #define PVH_LOCK_BIT 31
1277 #define PVH_FLAG_LOCK (1UL << PVH_LOCK_BIT)
1278 #define PVH_HIGH_FLAGS PVH_FLAG_LOCK
1282 #define PVH_LIST_MASK (~PVH_TYPE_MASK)
1284 #define pvh_test_type(h, b) \
1285 ((*(vm_offset_t *)(h) & (PVH_TYPE_MASK)) == (b))
1287 #define pvh_ptep(h) \
1288 ((pt_entry_t *)((*(vm_offset_t *)(h) & PVH_LIST_MASK) | PVH_HIGH_FLAGS))
1290 #define pvh_list(h) \
1291 ((pv_entry_t *)((*(vm_offset_t *)(h) & PVH_LIST_MASK) | PVH_HIGH_FLAGS))
1293 #define pvh_get_flags(h) \
1294 (*(vm_offset_t *)(h) & PVH_HIGH_FLAGS)
1296 #define pvh_set_flags(h, f) \
1298 os_atomic_store((vm_offset_t *)(h), (*(vm_offset_t *)(h) & ~PVH_HIGH_FLAGS) | (f), \
1302 #define pvh_update_head(h, e, t) \
1304 assert(*(vm_offset_t *)(h) & PVH_FLAG_LOCK); \
1305 os_atomic_store((vm_offset_t *)(h), (vm_offset_t)(e) | (t) | PVH_FLAG_LOCK, \
1309 #define pvh_update_head_unlocked(h, e, t) \
1311 assert(!(*(vm_offset_t *)(h) & PVH_FLAG_LOCK)); \
1312 *(vm_offset_t *)(h) = ((vm_offset_t)(e) | (t)) & ~PVH_FLAG_LOCK; \
1315 #define pvh_add(h, e) \
1317 assert(!pvh_test_type((h), PVH_TYPE_PTEP)); \
1318 pve_next(e) = pvh_list(h); \
1319 pvh_update_head((h), (e), PVH_TYPE_PVEP); \
1322 #define pvh_remove(h, p, e) \
1324 assert(!PVE_NEXT_IS_ALTACCT(pve_next((e)))); \
1326 if (PVE_NEXT_PTR(pve_next((e))) == PV_ENTRY_NULL) { \
1327 pvh_update_head((h), PV_ENTRY_NULL, PVH_TYPE_NULL); \
1329 pvh_update_head((h), PVE_NEXT_PTR(pve_next((e))), PVH_TYPE_PVEP); \
1334 * preserve the "alternate accounting" bit \
1335 * when updating "p" (the previous entry's \
1338 boolean_t __is_altacct; \
1339 __is_altacct = PVE_NEXT_IS_ALTACCT(*(p)); \
1340 *(p) = PVE_NEXT_PTR(pve_next((e))); \
1341 if (__is_altacct) { \
1342 PVE_NEXT_SET_ALTACCT((p)); \
1344 PVE_NEXT_CLR_ALTACCT((p)); \
1350 /* PPATTR Define Macros */
1352 #define ppattr_set_bits(h, b) os_atomic_or((h), (pp_attr_t)(b), acq_rel)
1353 #define ppattr_clear_bits(h, b) os_atomic_andnot((h), (pp_attr_t)(b), acq_rel)
1355 #define ppattr_test_bits(h, b) \
1356 ((*(h) & (pp_attr_t)(b)) == (pp_attr_t)(b))
1358 #define pa_set_bits(x, b) \
1361 ppattr_set_bits(&pp_attr_table[pa_index(x)], \
1365 #define pa_test_bits(x, b) \
1366 (pa_valid(x) ? ppattr_test_bits(&pp_attr_table[pa_index(x)],\
1369 #define pa_clear_bits(x, b) \
1372 ppattr_clear_bits(&pp_attr_table[pa_index(x)], \
1376 #define pa_set_modify(x) \
1377 pa_set_bits(x, PP_ATTR_MODIFIED)
1379 #define pa_clear_modify(x) \
1380 pa_clear_bits(x, PP_ATTR_MODIFIED)
1382 #define pa_set_reference(x) \
1383 pa_set_bits(x, PP_ATTR_REFERENCED)
1385 #define pa_clear_reference(x) \
1386 pa_clear_bits(x, PP_ATTR_REFERENCED)
1389 #define pa_set_monitor(x) \
1390 pa_set_bits((x), PP_ATTR_MONITOR)
1392 #define pa_clear_monitor(x) \
1393 pa_clear_bits((x), PP_ATTR_MONITOR)
1395 #define pa_test_monitor(x) \
1396 pa_test_bits((x), PP_ATTR_MONITOR)
1398 #define pa_set_no_monitor(x) \
1399 pa_set_bits((x), PP_ATTR_NO_MONITOR)
1401 #define pa_clear_no_monitor(x) \
1402 pa_clear_bits((x), PP_ATTR_NO_MONITOR)
1404 #define pa_test_no_monitor(x) \
1405 pa_test_bits((x), PP_ATTR_NO_MONITOR)
1408 #define IS_INTERNAL_PAGE(pai) \
1409 ppattr_test_bits(&pp_attr_table[pai], PP_ATTR_INTERNAL)
1410 #define SET_INTERNAL_PAGE(pai) \
1411 ppattr_set_bits(&pp_attr_table[pai], PP_ATTR_INTERNAL)
1412 #define CLR_INTERNAL_PAGE(pai) \
1413 ppattr_clear_bits(&pp_attr_table[pai], PP_ATTR_INTERNAL)
1415 #define IS_REUSABLE_PAGE(pai) \
1416 ppattr_test_bits(&pp_attr_table[pai], PP_ATTR_REUSABLE)
1417 #define SET_REUSABLE_PAGE(pai) \
1418 ppattr_set_bits(&pp_attr_table[pai], PP_ATTR_REUSABLE)
1419 #define CLR_REUSABLE_PAGE(pai) \
1420 ppattr_clear_bits(&pp_attr_table[pai], PP_ATTR_REUSABLE)
1422 #define IS_ALTACCT_PAGE(pai, pve_p) \
1423 (((pve_p) == NULL) \
1424 ? ppattr_test_bits(&pp_attr_table[pai], PP_ATTR_ALTACCT) \
1425 : PVE_NEXT_IS_ALTACCT(pve_next((pve_p))))
1426 #define SET_ALTACCT_PAGE(pai, pve_p) \
1427 if ((pve_p) == NULL) { \
1428 ppattr_set_bits(&pp_attr_table[pai], PP_ATTR_ALTACCT); \
1430 PVE_NEXT_SET_ALTACCT(&pve_next((pve_p))); \
1432 #define CLR_ALTACCT_PAGE(pai, pve_p) \
1433 if ((pve_p) == NULL) { \
1434 ppattr_clear_bits(&pp_attr_table[pai], PP_ATTR_ALTACCT); \
1436 PVE_NEXT_CLR_ALTACCT(&pve_next((pve_p))); \
1439 #define IS_REFFAULT_PAGE(pai) \
1440 ppattr_test_bits(&pp_attr_table[pai], PP_ATTR_REFFAULT)
1441 #define SET_REFFAULT_PAGE(pai) \
1442 ppattr_set_bits(&pp_attr_table[pai], PP_ATTR_REFFAULT)
1443 #define CLR_REFFAULT_PAGE(pai) \
1444 ppattr_clear_bits(&pp_attr_table[pai], PP_ATTR_REFFAULT)
1446 #define IS_MODFAULT_PAGE(pai) \
1447 ppattr_test_bits(&pp_attr_table[pai], PP_ATTR_MODFAULT)
1448 #define SET_MODFAULT_PAGE(pai) \
1449 ppattr_set_bits(&pp_attr_table[pai], PP_ATTR_MODFAULT)
1450 #define CLR_MODFAULT_PAGE(pai) \
1451 ppattr_clear_bits(&pp_attr_table[pai], PP_ATTR_MODFAULT)
1453 #define tte_get_ptd(tte) \
1454 ((struct pt_desc *)(pvh_list(pai_to_pvh(pa_index((vm_offset_t)((tte) & ~PAGE_MASK))))))
1457 #if (__ARM_VMSA__ == 7)
1459 #define tte_index(pmap, pt_attr, addr) \
1462 #define pte_index(pmap, pt_attr, addr) \
1467 #define ttn_index(pmap, pt_attr, addr, pt_level) \
1468 (((addr) & (pt_attr)->pta_level_info[(pt_level)].index_mask) >> (pt_attr)->pta_level_info[(pt_level)].shift)
1470 #define tt0_index(pmap, pt_attr, addr) \
1471 ttn_index((pmap), (pt_attr), (addr), PMAP_TT_L0_LEVEL)
1473 #define tt1_index(pmap, pt_attr, addr) \
1474 ttn_index((pmap), (pt_attr), (addr), PMAP_TT_L1_LEVEL)
1476 #define tt2_index(pmap, pt_attr, addr) \
1477 ttn_index((pmap), (pt_attr), (addr), PMAP_TT_L2_LEVEL)
1479 #define tt3_index(pmap, pt_attr, addr) \
1480 ttn_index((pmap), (pt_attr), (addr), PMAP_TT_L3_LEVEL)
1482 #define tte_index(pmap, pt_attr, addr) \
1483 tt2_index((pmap), (pt_attr), (addr))
1485 #define pte_index(pmap, pt_attr, addr) \
1486 tt3_index((pmap), (pt_attr), (addr))
1491 static inline ptd_info_t
*
1492 ptd_get_info(pt_desc_t
*ptd
, const tt_entry_t
*ttep
)
1494 assert(ptd
->ptd_info
[0].refcnt
!= PT_DESC_IOMMU_REFCOUNT
);
1495 #if PT_INDEX_MAX == 1
1496 #pragma unused(ttep)
1497 return &ptd
->ptd_info
[0];
1499 uint64_t pmap_page_shift
= pt_attr_leaf_shift(pmap_get_pt_attr(ptd
->pmap
));
1500 vm_offset_t ttep_page
= (vm_offset_t
)ttep
>> pmap_page_shift
;
1501 unsigned int ttep_index
= ttep_page
& ((1U << (PAGE_SHIFT
- pmap_page_shift
)) - 1);
1502 assert(ttep_index
< PT_INDEX_MAX
);
1503 return &ptd
->ptd_info
[ttep_index
];
1507 static inline ptd_info_t
*
1508 ptep_get_info(const pt_entry_t
*ptep
)
1510 return ptd_get_info(ptep_get_ptd(ptep
), ptep
);
1513 static inline vm_map_address_t
1514 ptep_get_va(const pt_entry_t
*ptep
)
1517 const pt_attr_t
* const pt_attr
= pmap_get_pt_attr(ptep_get_pmap(ptep
));
1518 pv_h
= pai_to_pvh(pa_index(ml_static_vtop(((vm_offset_t
)ptep
))));;
1520 assert(pvh_test_type(pv_h
, PVH_TYPE_PTDP
));
1521 pt_desc_t
*ptdp
= (pt_desc_t
*)(pvh_list(pv_h
));
1523 vm_map_address_t va
= ptd_get_info(ptdp
, ptep
)->va
;
1524 vm_offset_t ptep_index
= ((vm_offset_t
)ptep
& pt_attr_leaf_offmask(pt_attr
)) / sizeof(*ptep
);
1526 va
+= (ptep_index
<< pt_attr_leaf_shift(pt_attr
));
1532 pte_set_wired(pmap_t pmap
, pt_entry_t
*ptep
, boolean_t wired
)
1535 *ptep
|= ARM_PTE_WIRED
;
1537 *ptep
&= ~ARM_PTE_WIRED
;
1540 * Do not track wired page count for kernel pagetable pages. Kernel mappings are
1541 * not guaranteed to have PTDs in the first place, and kernel pagetable pages are
1544 if (pmap
== kernel_pmap
) {
1547 unsigned short *ptd_wiredcnt_ptr
;
1548 ptd_wiredcnt_ptr
= &(ptep_get_info(ptep
)->wiredcnt
);
1550 os_atomic_add(ptd_wiredcnt_ptr
, (unsigned short)1, relaxed
);
1552 unsigned short prev_wired
= os_atomic_sub_orig(ptd_wiredcnt_ptr
, (unsigned short)1, relaxed
);
1553 if (__improbable(prev_wired
== 0)) {
1554 panic("pmap %p (pte %p): wired count underflow", pmap
, ptep
);
1560 * Lock on pmap system
1563 lck_grp_t pmap_lck_grp MARK_AS_PMAP_DATA
;
1566 pmap_lock_init(pmap_t pmap
)
1568 lck_rw_init(&pmap
->rwlock
, &pmap_lck_grp
, 0);
1569 pmap
->rwlock
.lck_rw_can_sleep
= FALSE
;
1573 pmap_lock_destroy(pmap_t pmap
)
1575 lck_rw_destroy(&pmap
->rwlock
, &pmap_lck_grp
);
1579 pmap_lock(pmap_t pmap
)
1582 mp_disable_preemption();
1584 lck_rw_lock_exclusive(&pmap
->rwlock
);
1588 pmap_lock_ro(pmap_t pmap
)
1591 mp_disable_preemption();
1593 lck_rw_lock_shared(&pmap
->rwlock
);
1597 pmap_unlock(pmap_t pmap
)
1599 lck_rw_unlock_exclusive(&pmap
->rwlock
);
1601 mp_enable_preemption();
1606 pmap_unlock_ro(pmap_t pmap
)
1608 lck_rw_unlock_shared(&pmap
->rwlock
);
1610 mp_enable_preemption();
1615 pmap_try_lock(pmap_t pmap
)
1620 mp_disable_preemption();
1622 ret
= lck_rw_try_lock_exclusive(&pmap
->rwlock
);
1625 mp_enable_preemption();
1632 //assert that ONLY READ lock is held
1633 __unused
static inline void
1634 pmap_assert_locked_r(__unused pmap_t pmap
)
1637 lck_rw_assert(&pmap
->rwlock
, LCK_RW_ASSERT_SHARED
);
1642 //assert that ONLY WRITE lock is held
1643 __unused
static inline void
1644 pmap_assert_locked_w(__unused pmap_t pmap
)
1647 lck_rw_assert(&pmap
->rwlock
, LCK_RW_ASSERT_EXCLUSIVE
);
1653 //assert that either READ or WRITE lock is held
1654 __unused
static inline void
1655 pmap_assert_locked_any(__unused pmap_t pmap
)
1658 lck_rw_assert(&pmap
->rwlock
, LCK_RW_ASSERT_HELD
);
1663 #if defined(__arm64__)
1664 #define PVH_LOCK_WORD 1 /* Assumes little-endian */
1666 #define PVH_LOCK_WORD 0
1669 #define ASSERT_PVH_LOCKED(index) \
1671 assert((vm_offset_t)(pv_head_table[index]) & PVH_FLAG_LOCK); \
1674 #define LOCK_PVH(index) \
1676 pmap_lock_bit((uint32_t*)(&pv_head_table[index]) + PVH_LOCK_WORD, PVH_LOCK_BIT - (PVH_LOCK_WORD * 32)); \
1679 #define UNLOCK_PVH(index) \
1681 ASSERT_PVH_LOCKED(index); \
1682 pmap_unlock_bit((uint32_t*)(&pv_head_table[index]) + PVH_LOCK_WORD, PVH_LOCK_BIT - (PVH_LOCK_WORD * 32)); \
1685 #define PMAP_UPDATE_TLBS(pmap, s, e, strong) { \
1686 pmap_get_pt_ops(pmap)->flush_tlb_region_async(s, (size_t)((e) - (s)), pmap); \
1687 pmap_sync_tlb(strong); \
1690 #define FLUSH_PTE_RANGE(spte, epte) \
1691 __builtin_arm_dmb(DMB_ISH);
1693 #define FLUSH_PTE(pte_p) \
1694 __builtin_arm_dmb(DMB_ISH);
1696 #define FLUSH_PTE_STRONG(pte_p) \
1697 __builtin_arm_dsb(DSB_ISH);
1699 #define FLUSH_PTE_RANGE_STRONG(spte, epte) \
1700 __builtin_arm_dsb(DSB_ISH);
1702 #define WRITE_PTE_FAST(pte_p, pte_entry) \
1703 __unreachable_ok_push \
1704 if (TEST_PAGE_RATIO_4) { \
1705 if (((unsigned)(pte_p)) & 0x1f) { \
1706 panic("%s: WRITE_PTE_FAST is unaligned, " \
1707 "pte_p=%p, pte_entry=%p", \
1709 pte_p, (void*)pte_entry); \
1711 if (((pte_entry) & ~ARM_PTE_COMPRESSED_MASK) == ARM_PTE_EMPTY) { \
1712 *(pte_p) = (pte_entry); \
1713 *((pte_p)+1) = (pte_entry); \
1714 *((pte_p)+2) = (pte_entry); \
1715 *((pte_p)+3) = (pte_entry); \
1717 *(pte_p) = (pte_entry); \
1718 *((pte_p)+1) = (pte_entry) | 0x1000; \
1719 *((pte_p)+2) = (pte_entry) | 0x2000; \
1720 *((pte_p)+3) = (pte_entry) | 0x3000; \
1723 *(pte_p) = (pte_entry); \
1725 __unreachable_ok_pop
1727 #define WRITE_PTE(pte_p, pte_entry) \
1728 WRITE_PTE_FAST(pte_p, pte_entry); \
1731 #define WRITE_PTE_STRONG(pte_p, pte_entry) \
1732 WRITE_PTE_FAST(pte_p, pte_entry); \
1733 FLUSH_PTE_STRONG(pte_p);
1736 * Other useful macros.
1738 #define current_pmap() \
1739 (vm_map_pmap(current_thread()->map))
1743 * PPL-related macros.
1745 #define ARRAY_ELEM_PTR_IS_VALID(_ptr_, _elem_size_, _array_begin_, _array_end_) \
1746 (((_ptr_) >= (typeof(_ptr_))_array_begin_) && \
1747 ((_ptr_) < (typeof(_ptr_))_array_end_) && \
1748 !((((void *)(_ptr_)) - ((void *)_array_begin_)) % (_elem_size_)))
1750 #define PMAP_PTR_IS_VALID(x) ARRAY_ELEM_PTR_IS_VALID(x, sizeof(struct pmap), pmap_array_begin, pmap_array_end)
1752 #define USER_PMAP_IS_VALID(x) (PMAP_PTR_IS_VALID(x) && (os_atomic_load(&(x)->ref_count, relaxed) > 0))
1754 #define VALIDATE_PMAP(x) \
1755 if (__improbable(((x) != kernel_pmap) && !USER_PMAP_IS_VALID(x))) \
1756 panic("%s: invalid pmap %p", __func__, (x));
1758 #define VALIDATE_LEDGER_PTR(x) \
1759 if (__improbable(!ARRAY_ELEM_PTR_IS_VALID(x, sizeof(void *), pmap_ledger_ptr_array_begin, pmap_ledger_ptr_array_end))) \
1760 panic("%s: invalid ledger ptr %p", __func__, (x));
1762 #define ARRAY_ELEM_INDEX(x, _elem_size_, _array_begin_) ((uint64_t)((((void *)(x)) - (_array_begin_)) / (_elem_size_)))
1765 pmap_ledger_validate(void * ledger
)
1767 uint64_t array_index
;
1768 pmap_ledger_t
** ledger_ptr_array_ptr
= ((pmap_ledger_t
*)ledger
)->back_ptr
;
1769 VALIDATE_LEDGER_PTR(ledger_ptr_array_ptr
);
1770 array_index
= ARRAY_ELEM_INDEX(ledger_ptr_array_ptr
, sizeof(pmap_ledger_t
*), pmap_ledger_ptr_array_begin
);
1772 if (array_index
>= MAX_PMAP_LEDGERS
) {
1773 panic("%s: ledger %p array index invalid, index was %#llx", __func__
, ledger
, array_index
);
1776 pmap_ledger_t
*ledger_ptr
= *ledger_ptr_array_ptr
;
1778 if (__improbable(ledger_ptr
!= ledger
)) {
1779 panic("%s: ledger pointer mismatch, %p != %p", __func__
, ledger
, ledger_ptr
);
1785 #else /* XNU_MONITOR */
1787 #define VALIDATE_PMAP(x) assert((x) != NULL);
1789 #endif /* XNU_MONITOR */
1791 #if DEVELOPMENT || DEBUG
1794 * Trace levels are controlled by a bitmask in which each
1795 * level can be enabled/disabled by the (1<<level) position
1797 * Level 0: PPL extension functionality
1798 * Level 1: pmap lifecycle (create/destroy/switch)
1799 * Level 2: mapping lifecycle (enter/remove/protect/nest/unnest)
1800 * Level 3: internal state management (attributes/fast-fault)
1801 * Level 4-7: TTE traces for paging levels 0-3. TTBs are traced at level 4.
1804 SECURITY_READ_ONLY_LATE(unsigned int) pmap_trace_mask
= 0;
1806 #define PMAP_TRACE(level, ...) \
1807 if (__improbable((1 << (level)) & pmap_trace_mask)) { \
1808 KDBG_RELEASE(__VA_ARGS__); \
1810 #else /* DEVELOPMENT || DEBUG */
1812 #define PMAP_TRACE(level, ...)
1814 #endif /* DEVELOPMENT || DEBUG */
1818 * Internal function prototypes (forward declarations).
1825 } pv_alloc_return_t
;
1827 static pv_alloc_return_t
pv_alloc(
1830 pv_entry_t
**pvepp
);
1832 static void ptd_bootstrap(
1833 pt_desc_t
*ptdp
, unsigned int ptd_cnt
);
1835 static inline pt_desc_t
*ptd_alloc_unlinked(void);
1837 static pt_desc_t
*ptd_alloc(pmap_t pmap
);
1839 static void ptd_deallocate(pt_desc_t
*ptdp
);
1841 static void ptd_init(
1842 pt_desc_t
*ptdp
, pmap_t pmap
, vm_map_address_t va
, unsigned int ttlevel
, pt_entry_t
* pte_p
);
1844 static void pmap_set_reference(
1847 pmap_paddr_t
pmap_vtophys(
1848 pmap_t pmap
, addr64_t va
);
1850 void pmap_switch_user_ttb(
1853 static kern_return_t
pmap_expand(
1854 pmap_t
, vm_map_address_t
, unsigned int options
, unsigned int level
);
1856 static int pmap_remove_range(
1857 pmap_t
, vm_map_address_t
, pt_entry_t
*, pt_entry_t
*);
1859 static int pmap_remove_range_options(
1860 pmap_t
, vm_map_address_t
, pt_entry_t
*, pt_entry_t
*, vm_map_address_t
*, bool *, int);
1862 static tt_entry_t
*pmap_tt1_allocate(
1863 pmap_t
, vm_size_t
, unsigned int);
1865 #define PMAP_TT_ALLOCATE_NOWAIT 0x1
1867 static void pmap_tt1_deallocate(
1868 pmap_t
, tt_entry_t
*, vm_size_t
, unsigned int);
1870 #define PMAP_TT_DEALLOCATE_NOBLOCK 0x1
1872 static kern_return_t
pmap_tt_allocate(
1873 pmap_t
, tt_entry_t
**, unsigned int, unsigned int);
1875 #define PMAP_TT_ALLOCATE_NOWAIT 0x1
1877 static void pmap_tte_deallocate(
1878 pmap_t
, vm_offset_t
, vm_offset_t
, bool, tt_entry_t
*, unsigned int);
1880 const unsigned int arm_hardware_page_size
= ARM_PGBYTES
;
1881 const unsigned int arm_pt_desc_size
= sizeof(pt_desc_t
);
1882 const unsigned int arm_pt_root_size
= PMAP_ROOT_ALLOC_SIZE
;
1884 #define PMAP_TT_DEALLOCATE_NOBLOCK 0x1
1886 #if (__ARM_VMSA__ > 7)
1888 static inline tt_entry_t
*pmap_tt1e(
1889 pmap_t
, vm_map_address_t
);
1891 static inline tt_entry_t
*pmap_tt2e(
1892 pmap_t
, vm_map_address_t
);
1894 static inline pt_entry_t
*pmap_tt3e(
1895 pmap_t
, vm_map_address_t
);
1897 static inline pt_entry_t
*pmap_ttne(
1898 pmap_t
, unsigned int, vm_map_address_t
);
1900 static void pmap_unmap_sharedpage(
1904 pmap_is_64bit(pmap_t
);
1907 #endif /* (__ARM_VMSA__ > 7) */
1909 static inline tt_entry_t
*pmap_tte(
1910 pmap_t
, vm_map_address_t
);
1912 static inline pt_entry_t
*pmap_pte(
1913 pmap_t
, vm_map_address_t
);
1915 static void pmap_update_cache_attributes_locked(
1918 static boolean_t
arm_clear_fast_fault(
1920 vm_prot_t fault_type
);
1922 static pmap_paddr_t
pmap_pages_reclaim(
1925 static kern_return_t
pmap_pages_alloc_zeroed(
1930 #define PMAP_PAGES_ALLOCATE_NOWAIT 0x1
1931 #define PMAP_PAGES_RECLAIM_NOWAIT 0x2
1933 static void pmap_pages_free(
1937 static void pmap_pin_kernel_pages(vm_offset_t kva
, size_t nbytes
);
1939 static void pmap_unpin_kernel_pages(vm_offset_t kva
, size_t nbytes
);
1941 static void pmap_trim_self(pmap_t pmap
);
1942 static void pmap_trim_subord(pmap_t subord
);
1946 * Temporary prototypes, while we wait for pmap_enter to move to taking an
1947 * address instead of a page number.
1949 static kern_return_t
1955 vm_prot_t fault_type
,
1960 pmap_enter_options_addr(
1965 vm_prot_t fault_type
,
1968 unsigned int options
,
1969 __unused
void *arg
);
1971 #ifdef CONFIG_XNUPOST
1972 kern_return_t
pmap_test(void);
1973 #endif /* CONFIG_XNUPOST */
1976 static pmap_paddr_t
pmap_alloc_page_for_kern(unsigned int options
);
1977 static void pmap_alloc_page_for_ppl(unsigned int options
);
1981 * This macro generates prototypes for the *_internal functions, which
1982 * represent the PPL interface. When the PPL is enabled, this will also
1983 * generate prototypes for the PPL entrypoints (*_ppl), as well as generating
1986 #define GEN_ASM_NAME(__function_name) _##__function_name##_ppl
1988 #define PMAP_SUPPORT_PROTOTYPES_WITH_ASM_INTERNAL(__return_type, __function_name, __function_args, __function_index, __assembly_function_name) \
1989 static __return_type __function_name##_internal __function_args; \
1990 extern __return_type __function_name##_ppl __function_args; \
1991 __asm__ (".text \n" \
1993 ".globl " #__assembly_function_name "\n" \
1994 #__assembly_function_name ":\n" \
1995 "mov x15, " #__function_index "\n" \
1996 "b _aprr_ppl_enter\n")
1998 #define PMAP_SUPPORT_PROTOTYPES_WITH_ASM(__return_type, __function_name, __function_args, __function_index, __assembly_function_name) \
1999 PMAP_SUPPORT_PROTOTYPES_WITH_ASM_INTERNAL(__return_type, __function_name, __function_args, __function_index, __assembly_function_name)
2001 #define PMAP_SUPPORT_PROTOTYPES(__return_type, __function_name, __function_args, __function_index) \
2002 PMAP_SUPPORT_PROTOTYPES_WITH_ASM(__return_type, __function_name, __function_args, __function_index, GEN_ASM_NAME(__function_name))
2003 #else /* XNU_MONITOR */
2004 #define PMAP_SUPPORT_PROTOTYPES(__return_type, __function_name, __function_args, __function_index) \
2005 static __return_type __function_name##_internal __function_args
2006 #endif /* XNU_MONITOR */
2008 PMAP_SUPPORT_PROTOTYPES(
2010 arm_fast_fault
, (pmap_t pmap
,
2011 vm_map_address_t va
,
2012 vm_prot_t fault_type
,
2014 bool from_user
), ARM_FAST_FAULT_INDEX
);
2017 PMAP_SUPPORT_PROTOTYPES(
2019 arm_force_fast_fault
, (ppnum_t ppnum
,
2020 vm_prot_t allow_mode
,
2021 int options
), ARM_FORCE_FAST_FAULT_INDEX
);
2023 MARK_AS_PMAP_TEXT
static boolean_t
2024 arm_force_fast_fault_with_flush_range(
2026 vm_prot_t allow_mode
,
2028 pmap_tlb_flush_range_t
*flush_range
);
2030 PMAP_SUPPORT_PROTOTYPES(
2032 mapping_free_prime
, (void), MAPPING_FREE_PRIME_INDEX
);
2034 PMAP_SUPPORT_PROTOTYPES(
2036 pmap_batch_set_cache_attributes
, (ppnum_t pn
,
2037 unsigned int cacheattr
,
2038 unsigned int page_cnt
,
2039 unsigned int page_index
,
2041 unsigned int *res
), PMAP_BATCH_SET_CACHE_ATTRIBUTES_INDEX
);
2043 PMAP_SUPPORT_PROTOTYPES(
2045 pmap_change_wiring
, (pmap_t pmap
,
2047 boolean_t wired
), PMAP_CHANGE_WIRING_INDEX
);
2049 PMAP_SUPPORT_PROTOTYPES(
2051 pmap_create_options
, (ledger_t ledger
,
2054 kern_return_t
* kr
), PMAP_CREATE_INDEX
);
2056 PMAP_SUPPORT_PROTOTYPES(
2058 pmap_destroy
, (pmap_t pmap
), PMAP_DESTROY_INDEX
);
2060 PMAP_SUPPORT_PROTOTYPES(
2062 pmap_enter_options
, (pmap_t pmap
,
2066 vm_prot_t fault_type
,
2069 unsigned int options
), PMAP_ENTER_OPTIONS_INDEX
);
2071 PMAP_SUPPORT_PROTOTYPES(
2073 pmap_find_pa
, (pmap_t pmap
,
2074 addr64_t va
), PMAP_FIND_PA_INDEX
);
2076 #if (__ARM_VMSA__ > 7)
2077 PMAP_SUPPORT_PROTOTYPES(
2079 pmap_insert_sharedpage
, (pmap_t pmap
), PMAP_INSERT_SHAREDPAGE_INDEX
);
2083 PMAP_SUPPORT_PROTOTYPES(
2085 pmap_is_empty
, (pmap_t pmap
,
2086 vm_map_offset_t va_start
,
2087 vm_map_offset_t va_end
), PMAP_IS_EMPTY_INDEX
);
2090 PMAP_SUPPORT_PROTOTYPES(
2092 pmap_map_cpu_windows_copy
, (ppnum_t pn
,
2094 unsigned int wimg_bits
), PMAP_MAP_CPU_WINDOWS_COPY_INDEX
);
2096 PMAP_SUPPORT_PROTOTYPES(
2098 pmap_nest
, (pmap_t grand
,
2101 uint64_t size
), PMAP_NEST_INDEX
);
2103 PMAP_SUPPORT_PROTOTYPES(
2105 pmap_page_protect_options
, (ppnum_t ppnum
,
2107 unsigned int options
,
2108 void *arg
), PMAP_PAGE_PROTECT_OPTIONS_INDEX
);
2110 PMAP_SUPPORT_PROTOTYPES(
2112 pmap_protect_options
, (pmap_t pmap
,
2113 vm_map_address_t start
,
2114 vm_map_address_t end
,
2116 unsigned int options
,
2117 void *args
), PMAP_PROTECT_OPTIONS_INDEX
);
2119 PMAP_SUPPORT_PROTOTYPES(
2121 pmap_query_page_info
, (pmap_t pmap
,
2123 int *disp_p
), PMAP_QUERY_PAGE_INFO_INDEX
);
2125 PMAP_SUPPORT_PROTOTYPES(
2127 pmap_query_resident
, (pmap_t pmap
,
2128 vm_map_address_t start
,
2129 vm_map_address_t end
,
2130 mach_vm_size_t
* compressed_bytes_p
), PMAP_QUERY_RESIDENT_INDEX
);
2132 PMAP_SUPPORT_PROTOTYPES(
2134 pmap_reference
, (pmap_t pmap
), PMAP_REFERENCE_INDEX
);
2136 PMAP_SUPPORT_PROTOTYPES(
2138 pmap_remove_options
, (pmap_t pmap
,
2139 vm_map_address_t start
,
2140 vm_map_address_t end
,
2141 int options
), PMAP_REMOVE_OPTIONS_INDEX
);
2143 PMAP_SUPPORT_PROTOTYPES(
2145 pmap_return
, (boolean_t do_panic
,
2146 boolean_t do_recurse
), PMAP_RETURN_INDEX
);
2148 PMAP_SUPPORT_PROTOTYPES(
2150 pmap_set_cache_attributes
, (ppnum_t pn
,
2151 unsigned int cacheattr
), PMAP_SET_CACHE_ATTRIBUTES_INDEX
);
2153 PMAP_SUPPORT_PROTOTYPES(
2155 pmap_update_compressor_page
, (ppnum_t pn
,
2156 unsigned int prev_cacheattr
, unsigned int new_cacheattr
), PMAP_UPDATE_COMPRESSOR_PAGE_INDEX
);
2158 PMAP_SUPPORT_PROTOTYPES(
2160 pmap_set_nested
, (pmap_t pmap
), PMAP_SET_NESTED_INDEX
);
2162 #if MACH_ASSERT || XNU_MONITOR
2163 PMAP_SUPPORT_PROTOTYPES(
2165 pmap_set_process
, (pmap_t pmap
,
2167 char *procname
), PMAP_SET_PROCESS_INDEX
);
2170 PMAP_SUPPORT_PROTOTYPES(
2172 pmap_unmap_cpu_windows_copy
, (unsigned int index
), PMAP_UNMAP_CPU_WINDOWS_COPY_INDEX
);
2174 PMAP_SUPPORT_PROTOTYPES(
2176 pmap_unnest_options
, (pmap_t grand
,
2179 unsigned int option
), PMAP_UNNEST_OPTIONS_INDEX
);
2182 PMAP_SUPPORT_PROTOTYPES(
2184 pmap_cpu_data_init
, (unsigned int cpu_number
), PMAP_CPU_DATA_INIT_INDEX
);
2187 PMAP_SUPPORT_PROTOTYPES(
2189 phys_attribute_set
, (ppnum_t pn
,
2190 unsigned int bits
), PHYS_ATTRIBUTE_SET_INDEX
);
2193 PMAP_SUPPORT_PROTOTYPES(
2195 pmap_mark_page_as_ppl_page
, (pmap_paddr_t pa
, bool initially_free
), PMAP_MARK_PAGE_AS_PMAP_PAGE_INDEX
);
2198 PMAP_SUPPORT_PROTOTYPES(
2200 phys_attribute_clear
, (ppnum_t pn
,
2203 void *arg
), PHYS_ATTRIBUTE_CLEAR_INDEX
);
2205 #if __ARM_RANGE_TLBI__
2206 PMAP_SUPPORT_PROTOTYPES(
2208 phys_attribute_clear_range
, (pmap_t pmap
,
2209 vm_map_address_t start
,
2210 vm_map_address_t end
,
2212 unsigned int options
), PHYS_ATTRIBUTE_CLEAR_RANGE_INDEX
);
2213 #endif /* __ARM_RANGE_TLBI__ */
2216 PMAP_SUPPORT_PROTOTYPES(
2218 pmap_switch
, (pmap_t pmap
), PMAP_SWITCH_INDEX
);
2220 PMAP_SUPPORT_PROTOTYPES(
2222 pmap_switch_user_ttb
, (pmap_t pmap
), PMAP_SWITCH_USER_TTB_INDEX
);
2224 PMAP_SUPPORT_PROTOTYPES(
2226 pmap_clear_user_ttb
, (void), PMAP_CLEAR_USER_TTB_INDEX
);
2229 PMAP_SUPPORT_PROTOTYPES(
2231 pmap_release_ppl_pages_to_kernel
, (void), PMAP_RELEASE_PAGES_TO_KERNEL_INDEX
);
2234 PMAP_SUPPORT_PROTOTYPES(
2236 pmap_set_vm_map_cs_enforced
, (pmap_t pmap
, bool new_value
), PMAP_SET_VM_MAP_CS_ENFORCED_INDEX
);
2238 PMAP_SUPPORT_PROTOTYPES(
2240 pmap_set_jit_entitled
, (pmap_t pmap
), PMAP_SET_JIT_ENTITLED_INDEX
);
2242 #if __has_feature(ptrauth_calls) && defined(XNU_TARGET_OS_OSX)
2243 PMAP_SUPPORT_PROTOTYPES(
2245 pmap_disable_user_jop
, (pmap_t pmap
), PMAP_DISABLE_USER_JOP_INDEX
);
2246 #endif /* __has_feature(ptrauth_calls) && defined(XNU_TARGET_OS_OSX) */
2248 PMAP_SUPPORT_PROTOTYPES(
2250 pmap_trim
, (pmap_t grand
,
2253 uint64_t size
), PMAP_TRIM_INDEX
);
2256 PMAP_SUPPORT_PROTOTYPES(
2258 pmap_sign_user_ptr
, (void *value
, ptrauth_key key
, uint64_t discriminator
, uint64_t jop_key
), PMAP_SIGN_USER_PTR
);
2259 PMAP_SUPPORT_PROTOTYPES(
2261 pmap_auth_user_ptr
, (void *value
, ptrauth_key key
, uint64_t discriminator
, uint64_t jop_key
), PMAP_AUTH_USER_PTR
);
2262 #endif /* HAS_APPLE_PAC */
2267 PMAP_SUPPORT_PROTOTYPES(
2269 pmap_is_trust_cache_loaded
, (const uuid_t uuid
), PMAP_IS_TRUST_CACHE_LOADED_INDEX
);
2271 PMAP_SUPPORT_PROTOTYPES(
2273 pmap_lookup_in_static_trust_cache
, (const uint8_t cdhash
[CS_CDHASH_LEN
]), PMAP_LOOKUP_IN_STATIC_TRUST_CACHE_INDEX
);
2275 PMAP_SUPPORT_PROTOTYPES(
2277 pmap_lookup_in_loaded_trust_caches
, (const uint8_t cdhash
[CS_CDHASH_LEN
]), PMAP_LOOKUP_IN_LOADED_TRUST_CACHES_INDEX
);
2279 PMAP_SUPPORT_PROTOTYPES(
2281 pmap_set_compilation_service_cdhash
, (const uint8_t cdhash
[CS_CDHASH_LEN
]),
2282 PMAP_SET_COMPILATION_SERVICE_CDHASH_INDEX
);
2284 PMAP_SUPPORT_PROTOTYPES(
2286 pmap_match_compilation_service_cdhash
, (const uint8_t cdhash
[CS_CDHASH_LEN
]),
2287 PMAP_MATCH_COMPILATION_SERVICE_CDHASH_INDEX
);
2290 static void pmap_mark_page_as_ppl_page(pmap_paddr_t pa
);
2293 void pmap_footprint_suspend(vm_map_t map
,
2295 PMAP_SUPPORT_PROTOTYPES(
2297 pmap_footprint_suspend
, (vm_map_t map
,
2299 PMAP_FOOTPRINT_SUSPEND_INDEX
);
2302 PMAP_SUPPORT_PROTOTYPES(
2304 pmap_ledger_alloc_init
, (size_t),
2305 PMAP_LEDGER_ALLOC_INIT_INDEX
);
2307 PMAP_SUPPORT_PROTOTYPES(
2309 pmap_ledger_alloc
, (void),
2310 PMAP_LEDGER_ALLOC_INDEX
);
2312 PMAP_SUPPORT_PROTOTYPES(
2314 pmap_ledger_free
, (ledger_t
),
2315 PMAP_LEDGER_FREE_INDEX
);
2322 boolean_t pgtrace_enabled
= 0;
2325 queue_chain_t chain
;
2328 * pmap - pmap for below addresses
2329 * ova - original va page address
2330 * cva - clone va addresses for pre, target and post pages
2331 * cva_spte - clone saved ptes
2332 * range - trace range in this map
2333 * cloned - has been cloned or not
2336 vm_map_offset_t ova
;
2337 vm_map_offset_t cva
[3];
2338 pt_entry_t cva_spte
[3];
2344 } pmap_pgtrace_map_t
;
2346 static void pmap_pgtrace_init(void);
2347 static bool pmap_pgtrace_enter_clone(pmap_t pmap
, vm_map_offset_t va_page
, vm_map_offset_t start
, vm_map_offset_t end
);
2348 static void pmap_pgtrace_remove_clone(pmap_t pmap
, pmap_paddr_t pa_page
, vm_map_offset_t va_page
);
2349 static void pmap_pgtrace_remove_all_clone(pmap_paddr_t pa
);
2352 #if DEVELOPMENT || DEBUG
2353 PMAP_SUPPORT_PROTOTYPES(
2355 pmap_test_text_corruption
, (pmap_paddr_t
),
2356 PMAP_TEST_TEXT_CORRUPTION_INDEX
);
2357 #endif /* DEVELOPMENT || DEBUG */
2359 #if (__ARM_VMSA__ > 7)
2361 * The low global vector page is mapped at a fixed alias.
2362 * Since the page size is 16k for H8 and newer we map the globals to a 16k
2363 * aligned address. Readers of the globals (e.g. lldb, panic server) need
2364 * to check both addresses anyway for backward compatibility. So for now
2365 * we leave H6 and H7 where they were.
2367 #if (ARM_PGSHIFT == 14)
2368 #define LOWGLOBAL_ALIAS (LOW_GLOBAL_BASE_ADDRESS + 0x4000)
2370 #define LOWGLOBAL_ALIAS (LOW_GLOBAL_BASE_ADDRESS + 0x2000)
2374 #define LOWGLOBAL_ALIAS (0xFFFF1000)
2377 long long alloc_tteroot_count
__attribute__((aligned(8))) MARK_AS_PMAP_DATA
= 0LL;
2378 long long alloc_ttepages_count
__attribute__((aligned(8))) MARK_AS_PMAP_DATA
= 0LL;
2379 long long alloc_ptepages_count
__attribute__((aligned(8))) MARK_AS_PMAP_DATA
= 0LL;
2380 long long alloc_pmap_pages_count
__attribute__((aligned(8))) = 0LL;
2384 #if __has_feature(ptrauth_calls)
2385 #define __ptrauth_ppl_handler __ptrauth(ptrauth_key_function_pointer, true, 0)
2387 #define __ptrauth_ppl_handler
2391 * Table of function pointers used for PPL dispatch.
2393 const void * __ptrauth_ppl_handler
const ppl_handler_table
[PMAP_COUNT
] = {
2394 [ARM_FAST_FAULT_INDEX
] = arm_fast_fault_internal
,
2395 [ARM_FORCE_FAST_FAULT_INDEX
] = arm_force_fast_fault_internal
,
2396 [MAPPING_FREE_PRIME_INDEX
] = mapping_free_prime_internal
,
2397 [PHYS_ATTRIBUTE_CLEAR_INDEX
] = phys_attribute_clear_internal
,
2398 [PHYS_ATTRIBUTE_SET_INDEX
] = phys_attribute_set_internal
,
2399 [PMAP_BATCH_SET_CACHE_ATTRIBUTES_INDEX
] = pmap_batch_set_cache_attributes_internal
,
2400 [PMAP_CHANGE_WIRING_INDEX
] = pmap_change_wiring_internal
,
2401 [PMAP_CREATE_INDEX
] = pmap_create_options_internal
,
2402 [PMAP_DESTROY_INDEX
] = pmap_destroy_internal
,
2403 [PMAP_ENTER_OPTIONS_INDEX
] = pmap_enter_options_internal
,
2404 [PMAP_FIND_PA_INDEX
] = pmap_find_pa_internal
,
2405 [PMAP_INSERT_SHAREDPAGE_INDEX
] = pmap_insert_sharedpage_internal
,
2406 [PMAP_IS_EMPTY_INDEX
] = pmap_is_empty_internal
,
2407 [PMAP_MAP_CPU_WINDOWS_COPY_INDEX
] = pmap_map_cpu_windows_copy_internal
,
2408 [PMAP_MARK_PAGE_AS_PMAP_PAGE_INDEX
] = pmap_mark_page_as_ppl_page_internal
,
2409 [PMAP_NEST_INDEX
] = pmap_nest_internal
,
2410 [PMAP_PAGE_PROTECT_OPTIONS_INDEX
] = pmap_page_protect_options_internal
,
2411 [PMAP_PROTECT_OPTIONS_INDEX
] = pmap_protect_options_internal
,
2412 [PMAP_QUERY_PAGE_INFO_INDEX
] = pmap_query_page_info_internal
,
2413 [PMAP_QUERY_RESIDENT_INDEX
] = pmap_query_resident_internal
,
2414 [PMAP_REFERENCE_INDEX
] = pmap_reference_internal
,
2415 [PMAP_REMOVE_OPTIONS_INDEX
] = pmap_remove_options_internal
,
2416 [PMAP_RETURN_INDEX
] = pmap_return_internal
,
2417 [PMAP_SET_CACHE_ATTRIBUTES_INDEX
] = pmap_set_cache_attributes_internal
,
2418 [PMAP_UPDATE_COMPRESSOR_PAGE_INDEX
] = pmap_update_compressor_page_internal
,
2419 [PMAP_SET_NESTED_INDEX
] = pmap_set_nested_internal
,
2420 [PMAP_SET_PROCESS_INDEX
] = pmap_set_process_internal
,
2421 [PMAP_SWITCH_INDEX
] = pmap_switch_internal
,
2422 [PMAP_SWITCH_USER_TTB_INDEX
] = pmap_switch_user_ttb_internal
,
2423 [PMAP_CLEAR_USER_TTB_INDEX
] = pmap_clear_user_ttb_internal
,
2424 [PMAP_UNMAP_CPU_WINDOWS_COPY_INDEX
] = pmap_unmap_cpu_windows_copy_internal
,
2425 [PMAP_UNNEST_OPTIONS_INDEX
] = pmap_unnest_options_internal
,
2426 [PMAP_FOOTPRINT_SUSPEND_INDEX
] = pmap_footprint_suspend_internal
,
2427 [PMAP_CPU_DATA_INIT_INDEX
] = pmap_cpu_data_init_internal
,
2428 [PMAP_RELEASE_PAGES_TO_KERNEL_INDEX
] = pmap_release_ppl_pages_to_kernel_internal
,
2429 [PMAP_SET_VM_MAP_CS_ENFORCED_INDEX
] = pmap_set_vm_map_cs_enforced_internal
,
2430 [PMAP_SET_JIT_ENTITLED_INDEX
] = pmap_set_jit_entitled_internal
,
2431 [PMAP_IS_TRUST_CACHE_LOADED_INDEX
] = pmap_is_trust_cache_loaded_internal
,
2432 [PMAP_LOOKUP_IN_STATIC_TRUST_CACHE_INDEX
] = pmap_lookup_in_static_trust_cache_internal
,
2433 [PMAP_LOOKUP_IN_LOADED_TRUST_CACHES_INDEX
] = pmap_lookup_in_loaded_trust_caches_internal
,
2434 [PMAP_SET_COMPILATION_SERVICE_CDHASH_INDEX
] = pmap_set_compilation_service_cdhash_internal
,
2435 [PMAP_MATCH_COMPILATION_SERVICE_CDHASH_INDEX
] = pmap_match_compilation_service_cdhash_internal
,
2436 [PMAP_TRIM_INDEX
] = pmap_trim_internal
,
2437 [PMAP_LEDGER_ALLOC_INIT_INDEX
] = pmap_ledger_alloc_init_internal
,
2438 [PMAP_LEDGER_ALLOC_INDEX
] = pmap_ledger_alloc_internal
,
2439 [PMAP_LEDGER_FREE_INDEX
] = pmap_ledger_free_internal
,
2441 [PMAP_SIGN_USER_PTR
] = pmap_sign_user_ptr_internal
,
2442 [PMAP_AUTH_USER_PTR
] = pmap_auth_user_ptr_internal
,
2443 #endif /* HAS_APPLE_PAC */
2444 #if __ARM_RANGE_TLBI__
2445 [PHYS_ATTRIBUTE_CLEAR_RANGE_INDEX
] = phys_attribute_clear_range_internal
,
2446 #endif /* __ARM_RANGE_TLBI__ */
2447 #if __has_feature(ptrauth_calls) && defined(XNU_TARGET_OS_OSX)
2448 [PMAP_DISABLE_USER_JOP_INDEX
] = pmap_disable_user_jop_internal
,
2449 #endif /* __has_feature(ptrauth_calls) && defined(XNU_TARGET_OS_OSX) */
2451 #if DEVELOPMENT || DEBUG
2452 [PMAP_TEST_TEXT_CORRUPTION_INDEX
] = pmap_test_text_corruption_internal
,
2453 #endif /* DEVELOPMENT || DEBUG */
2459 * Allocates and initializes a per-CPU data structure for the pmap.
2461 MARK_AS_PMAP_TEXT
static void
2462 pmap_cpu_data_init_internal(unsigned int cpu_number
)
2464 pmap_cpu_data_t
* pmap_cpu_data
= pmap_get_cpu_data();
2467 /* Verify cacheline-aligned */
2468 assert(((vm_offset_t
)pmap_cpu_data
& ((1 << MAX_L2_CLINE
) - 1)) == 0);
2469 if (pmap_cpu_data
->cpu_number
!= PMAP_INVALID_CPU_NUM
) {
2470 panic("%s: pmap_cpu_data->cpu_number=%u, "
2472 __FUNCTION__
, pmap_cpu_data
->cpu_number
,
2476 pmap_cpu_data
->cpu_number
= cpu_number
;
2480 pmap_cpu_data_init(void)
2483 pmap_cpu_data_init_ppl(cpu_number());
2485 pmap_cpu_data_init_internal(cpu_number());
2490 pmap_cpu_data_array_init(void)
2494 pmap_paddr_t ppl_cpu_save_area_cur
= 0;
2495 pt_entry_t
template, *pte_p
;
2496 vm_offset_t stack_va
= (vm_offset_t
)pmap_stacks_start
+ ARM_PGBYTES
;
2497 assert((pmap_stacks_start
!= NULL
) && (pmap_stacks_end
!= NULL
));
2498 pmap_stacks_start_pa
= avail_start
;
2500 for (i
= 0; i
< MAX_CPUS
; i
++) {
2501 for (vm_offset_t cur_va
= stack_va
; cur_va
< (stack_va
+ PPL_STACK_SIZE
); cur_va
+= ARM_PGBYTES
) {
2502 assert(cur_va
< (vm_offset_t
)pmap_stacks_end
);
2503 pte_p
= pmap_pte(kernel_pmap
, cur_va
);
2504 assert(*pte_p
== ARM_PTE_EMPTY
);
2505 template = pa_to_pte(avail_start
) | ARM_PTE_AF
| ARM_PTE_SH(SH_OUTER_MEMORY
) | ARM_PTE_TYPE
|
2506 ARM_PTE_ATTRINDX(CACHE_ATTRINDX_DEFAULT
) | xprr_perm_to_pte(XPRR_PPL_RW_PERM
);
2507 #if __ARM_KERNEL_PROTECT__
2508 template |= ARM_PTE_NG
;
2509 #endif /* __ARM_KERNEL_PROTECT__ */
2510 WRITE_PTE(pte_p
, template);
2511 __builtin_arm_isb(ISB_SY
);
2512 avail_start
+= ARM_PGBYTES
;
2515 kasan_map_shadow(stack_va
, PPL_STACK_SIZE
, false);
2517 pmap_cpu_data_array
[i
].cpu_data
.cpu_number
= PMAP_INVALID_CPU_NUM
;
2518 pmap_cpu_data_array
[i
].cpu_data
.ppl_state
= PPL_STATE_KERNEL
;
2519 pmap_cpu_data_array
[i
].cpu_data
.ppl_stack
= (void*)(stack_va
+ PPL_STACK_SIZE
);
2520 stack_va
+= (PPL_STACK_SIZE
+ ARM_PGBYTES
);
2523 pmap_stacks_end_pa
= avail_start
;
2525 ppl_cpu_save_area_start
= avail_start
;
2526 ppl_cpu_save_area_end
= ppl_cpu_save_area_start
;
2527 ppl_cpu_save_area_cur
= ppl_cpu_save_area_start
;
2529 for (i
= 0; i
< MAX_CPUS
; i
++) {
2530 while ((ppl_cpu_save_area_end
- ppl_cpu_save_area_cur
) < sizeof(arm_context_t
)) {
2531 avail_start
+= PAGE_SIZE
;
2532 ppl_cpu_save_area_end
= avail_start
;
2535 pmap_cpu_data_array
[i
].cpu_data
.save_area
= (arm_context_t
*)phystokv(ppl_cpu_save_area_cur
);
2536 ppl_cpu_save_area_cur
+= sizeof(arm_context_t
);
2540 pmap_cpu_data_init();
2544 pmap_get_cpu_data(void)
2546 pmap_cpu_data_t
* pmap_cpu_data
= NULL
;
2549 extern pmap_cpu_data_t
* ml_get_ppl_cpu_data(void);
2550 pmap_cpu_data
= ml_get_ppl_cpu_data();
2552 pmap_cpu_data
= &getCpuDatap()->cpu_pmap_cpu_data
;
2555 return pmap_cpu_data
;
2560 * Disable interrupts and return previous state.
2562 * The PPL has its own interrupt state facility separately from
2563 * ml_set_interrupts_enable(), since that function is not part of the
2564 * PPL, and so doing things like manipulating untrusted data and
2567 * @return The previous interrupt state, to be restored with
2568 * pmap_interrupts_restore().
2570 static uint64_t __attribute__((warn_unused_result
)) __used
2571 pmap_interrupts_disable(void)
2573 uint64_t state
= __builtin_arm_rsr64("DAIF");
2575 if ((state
& DAIF_STANDARD_DISABLE
) != DAIF_STANDARD_DISABLE
) {
2576 __builtin_arm_wsr64("DAIFSet", DAIFSC_STANDARD_DISABLE
);
2583 * Restore previous interrupt state.
2585 * @param state The previous interrupt state to restore.
2588 pmap_interrupts_restore(uint64_t state
)
2591 assert((state
& ~DAIF_ALL
) == 0);
2593 if (state
!= DAIF_STANDARD_DISABLE
) {
2594 __builtin_arm_wsr64("DAIF", state
);
2599 * Query interrupt state.
2601 * ml_get_interrupts_enabled() is safe enough at the time of writing
2602 * this comment, but because it is not considered part of the PPL, so
2603 * could change without notice, and because it presently only checks
2604 * DAIF_IRQ, we have our own version.
2606 * @return true if interrupts are enable (not fully disabled).
2609 static bool __attribute__((warn_unused_result
)) __used
2610 pmap_interrupts_enabled(void)
2612 return (__builtin_arm_rsr64("DAIF") & DAIF_STANDARD_DISABLE
) != DAIF_STANDARD_DISABLE
;
2614 #endif /* __arm64__ */
2618 * pmap_set_range_xprr_perm takes a range (specified using start and end) that
2619 * falls within the physical aperture. All mappings within this range have
2620 * their protections changed from those specified by the expected_perm to those
2621 * specified by the new_perm.
2624 pmap_set_range_xprr_perm(vm_address_t start
,
2626 unsigned int expected_perm
,
2627 unsigned int new_perm
)
2629 #if (__ARM_VMSA__ == 7)
2630 #error This function is not supported on older ARM hardware
2634 vm_address_t va
= 0;
2635 vm_address_t tte_start
= 0;
2636 vm_address_t tte_end
= 0;
2638 tt_entry_t
*tte_p
= NULL
;
2639 pt_entry_t
*pte_p
= NULL
;
2640 pt_entry_t
*cpte_p
= NULL
;
2641 pt_entry_t
*bpte_p
= NULL
;
2642 pt_entry_t
*epte_p
= NULL
;
2645 pt_entry_t cpte
= 0;
2646 pt_entry_t
template = 0;
2653 * Validate our arguments; any invalid argument will be grounds for a
2656 if ((start
| end
) % ARM_PGBYTES
) {
2657 panic("%s: start or end not page aligned, "
2658 "start=%p, end=%p, new_perm=%u, expected_perm=%u",
2660 (void *)start
, (void *)end
, new_perm
, expected_perm
);
2664 panic("%s: start > end, "
2665 "start=%p, end=%p, new_perm=%u, expected_perm=%u",
2667 (void *)start
, (void *)end
, new_perm
, expected_perm
);
2670 bool in_physmap
= (start
>= physmap_base
) && (end
< physmap_end
);
2671 bool in_static
= (start
>= gVirtBase
) && (end
< static_memory_end
);
2673 if (!(in_physmap
|| in_static
)) {
2674 panic("%s: address not in static region or physical aperture, "
2675 "start=%p, end=%p, new_perm=%u, expected_perm=%u",
2677 (void *)start
, (void *)end
, new_perm
, expected_perm
);
2680 if ((new_perm
> XPRR_MAX_PERM
) || (expected_perm
> XPRR_MAX_PERM
)) {
2681 panic("%s: invalid XPRR index, "
2682 "start=%p, end=%p, new_perm=%u, expected_perm=%u",
2684 (void *)start
, (void *)end
, new_perm
, expected_perm
);
2688 * Walk over the PTEs for the given range, and set the protections on
2693 tte_end
= ((va
+ pt_attr_twig_size(native_pt_attr
)) & ~pt_attr_twig_offmask(native_pt_attr
));
2695 if (tte_end
> end
) {
2699 tte_p
= pmap_tte(pmap
, va
);
2702 * The physical aperture should not have holes.
2703 * The physical aperture should be contiguous.
2704 * Do not make eye contact with the physical aperture.
2706 if (tte_p
== NULL
) {
2707 panic("%s: physical aperture tte is NULL, "
2708 "start=%p, end=%p, new_perm=%u, expected_perm=%u",
2710 (void *)start
, (void *)end
, new_perm
, expected_perm
);
2715 if ((tte
& ARM_TTE_TYPE_MASK
) == ARM_TTE_TYPE_TABLE
) {
2717 * Walk over the given L3 page table page and update the
2720 pte_p
= (pt_entry_t
*)ttetokv(tte
);
2721 bpte_p
= &pte_p
[pte_index(pmap
, native_pt_attr
, va
)];
2722 epte_p
= bpte_p
+ ((tte_end
- va
) >> pt_attr_leaf_shift(native_pt_attr
));
2724 for (cpte_p
= bpte_p
; cpte_p
< epte_p
;
2725 cpte_p
+= PAGE_SIZE
/ ARM_PGBYTES
, va
+= PAGE_SIZE
) {
2726 int pai
= (int)pa_index(pte_to_pa(*cpte_p
));
2731 * Every PTE involved should be valid, should
2732 * not have the hint bit set, and should have
2733 * Every valid PTE involved should
2734 * not have the hint bit set and should have
2735 * the expected APRR index.
2737 if ((cpte
& ARM_PTE_TYPE_MASK
) ==
2738 ARM_PTE_TYPE_FAULT
) {
2739 panic("%s: physical aperture PTE is invalid, va=%p, "
2740 "start=%p, end=%p, new_perm=%u, expected_perm=%u",
2743 (void *)start
, (void *)end
, new_perm
, expected_perm
);
2748 if (cpte
& ARM_PTE_HINT_MASK
) {
2749 panic("%s: physical aperture PTE has hint bit set, va=%p, cpte=0x%llx, "
2750 "start=%p, end=%p, new_perm=%u, expected_perm=%u",
2753 (void *)start
, (void *)end
, new_perm
, expected_perm
);
2756 if (pte_to_xprr_perm(cpte
) != expected_perm
) {
2757 panic("%s: perm=%llu does not match expected_perm, cpte=0x%llx, "
2758 "start=%p, end=%p, new_perm=%u, expected_perm=%u",
2760 pte_to_xprr_perm(cpte
), cpte
,
2761 (void *)start
, (void *)end
, new_perm
, expected_perm
);
2765 template &= ~ARM_PTE_XPRR_MASK
;
2766 template |= xprr_perm_to_pte(new_perm
);
2768 WRITE_PTE_STRONG(cpte_p
, template);
2772 panic("%s: tte=0x%llx is not a table type entry, "
2773 "start=%p, end=%p, new_perm=%u, expected_perm=%u",
2776 (void *)start
, (void *)end
, new_perm
, expected_perm
);
2782 PMAP_UPDATE_TLBS(pmap
, start
, end
, false);
2783 #endif /* (__ARM_VMSA__ == 7) */
2787 * A convenience function for setting protections on a single page.
2790 pmap_set_xprr_perm(vm_address_t page_kva
,
2791 unsigned int expected_perm
,
2792 unsigned int new_perm
)
2794 pmap_set_range_xprr_perm(page_kva
, page_kva
+ PAGE_SIZE
, expected_perm
, new_perm
);
2796 #endif /* XNU_MONITOR */
2800 * pmap_pages_reclaim(): return a page by freeing an active pagetable page.
2801 * To be eligible, a pt page must be assigned to a non-kernel pmap.
2802 * It must not have any wired PTEs and must contain at least one valid PTE.
2803 * If no eligible page is found in the pt page list, return 0.
2809 boolean_t found_page
;
2814 * In a loop, check for a page in the reclaimed pt page list.
2815 * if one is present, unlink that page and return the physical page address.
2816 * Otherwise, scan the pt page list for an eligible pt page to reclaim.
2817 * If found, invoke pmap_remove_range() on its pmap and address range then
2818 * deallocates that pt page. This will end up adding the pt page to the
2819 * reclaimed pt page list.
2822 pmap_simple_lock(&pmap_pages_lock
);
2823 pmap_pages_request_count
++;
2824 pmap_pages_request_acum
++;
2827 if (pmap_pages_reclaim_list
!= (page_free_entry_t
*)NULL
) {
2828 page_free_entry_t
*page_entry
;
2830 page_entry
= pmap_pages_reclaim_list
;
2831 pmap_pages_reclaim_list
= pmap_pages_reclaim_list
->next
;
2832 pmap_simple_unlock(&pmap_pages_lock
);
2834 return (pmap_paddr_t
)ml_static_vtop((vm_offset_t
)page_entry
);
2837 pmap_simple_unlock(&pmap_pages_lock
);
2839 pmap_simple_lock(&pt_pages_lock
);
2840 ptdp
= (pt_desc_t
*)queue_first(&pt_page_list
);
2843 while (!queue_end(&pt_page_list
, (queue_entry_t
)ptdp
)) {
2844 if ((ptdp
->pmap
->nested
== FALSE
)
2845 && (pmap_try_lock(ptdp
->pmap
))) {
2846 assert(ptdp
->pmap
!= kernel_pmap
);
2847 unsigned refcnt_acc
= 0;
2848 unsigned wiredcnt_acc
= 0;
2850 for (i
= 0; i
< PT_INDEX_MAX
; i
++) {
2851 if (ptdp
->ptd_info
[i
].refcnt
== PT_DESC_REFCOUNT
) {
2852 /* Do not attempt to free a page that contains an L2 table */
2856 refcnt_acc
+= ptdp
->ptd_info
[i
].refcnt
;
2857 wiredcnt_acc
+= ptdp
->ptd_info
[i
].wiredcnt
;
2859 if ((wiredcnt_acc
== 0) && (refcnt_acc
!= 0)) {
2861 /* Leave ptdp->pmap locked here. We're about to reclaim
2862 * a tt page from it, so we don't want anyone else messing
2863 * with it while we do that. */
2866 pmap_unlock(ptdp
->pmap
);
2868 ptdp
= (pt_desc_t
*)queue_next((queue_t
)ptdp
);
2871 pmap_simple_unlock(&pt_pages_lock
);
2872 return (pmap_paddr_t
)0;
2874 bool need_strong_sync
= false;
2875 vm_map_address_t va
;
2877 pt_entry_t
*bpte
, *epte
;
2881 pmap_simple_unlock(&pt_pages_lock
);
2883 pmap_assert_locked_w(pmap
); // pmap write lock should be held from loop above
2885 const pt_attr_t
* const pt_attr
= pmap_get_pt_attr(pmap
);
2887 for (i
= 0; i
< (PAGE_SIZE
/ pt_attr_page_size(pt_attr
)); i
++) {
2888 va
= ptdp
->ptd_info
[i
].va
;
2890 /* If the VA is bogus, this may represent an unallocated region
2891 * or one which is in transition (already being freed or expanded).
2892 * Don't try to remove mappings here. */
2893 if (va
== (vm_offset_t
)-1) {
2897 tte_p
= pmap_tte(pmap
, va
);
2898 if ((tte_p
!= (tt_entry_t
*) NULL
)
2899 && ((*tte_p
& ARM_TTE_TYPE_MASK
) == ARM_TTE_TYPE_TABLE
)) {
2900 pte_p
= (pt_entry_t
*) ttetokv(*tte_p
);
2901 bpte
= &pte_p
[pte_index(pmap
, pt_attr
, va
)];
2902 epte
= bpte
+ pt_attr_page_size(pt_attr
) / sizeof(pt_entry_t
);
2904 * Use PMAP_OPTIONS_REMOVE to clear any
2905 * "compressed" markers and update the
2906 * "compressed" counter in pmap->stats.
2907 * This means that we lose accounting for
2908 * any compressed pages in this range
2909 * but the alternative is to not be able
2910 * to account for their future decompression,
2911 * which could cause the counter to drift
2914 pmap_remove_range_options(
2915 pmap
, va
, bpte
, epte
, NULL
,
2916 &need_strong_sync
, PMAP_OPTIONS_REMOVE
);
2917 if (ptd_get_info(ptdp
, pte_p
)->refcnt
!= 0) {
2918 panic("%s: ptdp %p, count %d", __FUNCTION__
, ptdp
, ptd_get_info(ptdp
, pte_p
)->refcnt
);
2921 pmap_tte_deallocate(pmap
, va
, va
+ (size_t)pt_attr_leaf_table_size(pt_attr
), need_strong_sync
,
2922 tte_p
, pt_attr_twig_level(pt_attr
));
2925 // Undo the lock we grabbed when we found ptdp above
2928 pmap_simple_lock(&pmap_pages_lock
);
2934 * Return a PPL page to the free list.
2936 MARK_AS_PMAP_TEXT
static void
2937 pmap_give_free_ppl_page(pmap_paddr_t paddr
)
2939 assert((paddr
& ARM_PGMASK
) == 0);
2940 void ** new_head
= (void **)phystokv(paddr
);
2941 pmap_simple_lock(&pmap_ppl_free_page_lock
);
2943 void * cur_head
= pmap_ppl_free_page_list
;
2944 *new_head
= cur_head
;
2945 pmap_ppl_free_page_list
= new_head
;
2946 pmap_ppl_free_page_count
++;
2948 pmap_simple_unlock(&pmap_ppl_free_page_lock
);
2952 * Get a PPL page from the free list.
2954 MARK_AS_PMAP_TEXT
static pmap_paddr_t
2955 pmap_get_free_ppl_page(void)
2957 pmap_paddr_t result
= 0;
2959 pmap_simple_lock(&pmap_ppl_free_page_lock
);
2961 if (pmap_ppl_free_page_list
!= NULL
) {
2962 void ** new_head
= NULL
;
2963 new_head
= *((void**)pmap_ppl_free_page_list
);
2964 result
= kvtophys((vm_offset_t
)pmap_ppl_free_page_list
);
2965 pmap_ppl_free_page_list
= new_head
;
2966 pmap_ppl_free_page_count
--;
2971 pmap_simple_unlock(&pmap_ppl_free_page_lock
);
2972 assert((result
& ARM_PGMASK
) == 0);
2978 * pmap_mark_page_as_ppl_page claims a page on behalf of the PPL by marking it
2979 * as PPL-owned and only allowing the PPL to write to it.
2981 MARK_AS_PMAP_TEXT
static void
2982 pmap_mark_page_as_ppl_page_internal(pmap_paddr_t pa
, bool initially_free
)
2984 vm_offset_t kva
= 0;
2985 unsigned int pai
= 0;
2989 * Mark each page that we allocate as belonging to the monitor, as we
2990 * intend to use it for monitor-y stuff (page tables, table pages, that
2993 if (!pa_valid(pa
)) {
2994 panic("%s: bad address, "
3000 pai
= (unsigned int)pa_index(pa
);
3003 /* A page that the PPL already owns can't be given to the PPL. */
3004 if (pa_test_monitor(pa
)) {
3005 panic("%s: page already belongs to PPL, "
3010 /* The page cannot be mapped outside of the physical aperture. */
3011 if (!pmap_verify_free((ppnum_t
)atop(pa
))) {
3012 panic("%s: page is not free, "
3019 attr
= pp_attr_table
[pai
];
3020 if (attr
& PP_ATTR_NO_MONITOR
) {
3021 panic("%s: page excluded from PPL, "
3026 } while (!OSCompareAndSwap16(attr
, attr
| PP_ATTR_MONITOR
, &pp_attr_table
[pai
]));
3031 pmap_set_xprr_perm(kva
, XPRR_KERN_RW_PERM
, XPRR_PPL_RW_PERM
);
3033 if (initially_free
) {
3034 pmap_give_free_ppl_page(pa
);
3039 pmap_mark_page_as_ppl_page(pmap_paddr_t pa
)
3041 pmap_mark_page_as_ppl_page_ppl(pa
, true);
3044 MARK_AS_PMAP_TEXT
static void
3045 pmap_mark_page_as_kernel_page(pmap_paddr_t pa
)
3047 vm_offset_t kva
= 0;
3048 unsigned int pai
= 0;
3050 pai
= (unsigned int)pa_index(pa
);
3053 if (!pa_test_monitor(pa
)) {
3054 panic("%s: page is not a PPL page, "
3060 pa_clear_monitor(pa
);
3064 pmap_set_xprr_perm(kva
, XPRR_PPL_RW_PERM
, XPRR_KERN_RW_PERM
);
3067 MARK_AS_PMAP_TEXT
static pmap_paddr_t
3068 pmap_release_ppl_pages_to_kernel_internal(void)
3070 pmap_paddr_t pa
= 0;
3072 if (pmap_ppl_free_page_count
<= PMAP_MIN_FREE_PPL_PAGES
) {
3076 pa
= pmap_get_free_ppl_page();
3082 pmap_mark_page_as_kernel_page(pa
);
3089 pmap_release_ppl_pages_to_kernel(void)
3091 pmap_paddr_t pa
= 0;
3092 vm_page_t m
= VM_PAGE_NULL
;
3093 vm_page_t local_freeq
= VM_PAGE_NULL
;
3094 uint64_t pmap_ppl_pages_returned_to_kernel_count
= 0;
3096 while (pmap_ppl_free_page_count
> PMAP_MIN_FREE_PPL_PAGES
) {
3097 pa
= pmap_release_ppl_pages_to_kernel_ppl();
3103 /* If we retrieved a page, add it to the free queue. */
3104 vm_object_lock(pmap_object
);
3105 m
= vm_page_lookup(pmap_object
, (pa
- gPhysBase
));
3106 assert(m
!= VM_PAGE_NULL
);
3107 assert(VM_PAGE_WIRED(m
));
3110 m
->vmp_snext
= local_freeq
;
3112 pmap_ppl_pages_returned_to_kernel_count
++;
3113 pmap_ppl_pages_returned_to_kernel_count_total
++;
3115 vm_object_unlock(pmap_object
);
3119 /* We need to hold the object lock for freeing pages. */
3120 vm_object_lock(pmap_object
);
3121 vm_page_free_list(local_freeq
, TRUE
);
3122 vm_object_unlock(pmap_object
);
3125 return pmap_ppl_pages_returned_to_kernel_count
;
3130 pmap_enqueue_pages(vm_page_t m
)
3133 vm_object_lock(pmap_object
);
3134 while (m
!= VM_PAGE_NULL
) {
3135 vm_page_insert_wired(m
, pmap_object
, (vm_object_offset_t
) ((ptoa(VM_PAGE_GET_PHYS_PAGE(m
))) - gPhysBase
), VM_KERN_MEMORY_PTE
);
3137 m
= NEXT_PAGE(m_prev
);
3138 *(NEXT_PAGE_PTR(m_prev
)) = VM_PAGE_NULL
;
3140 vm_object_unlock(pmap_object
);
3143 static kern_return_t
3144 pmap_pages_alloc_zeroed(
3150 ASSERT_NOT_HIBERNATING();
3152 if (size
!= PAGE_SIZE
) {
3153 panic("%s: size != PAGE_SIZE, "
3154 "pa=%p, size=%u, option=%u",
3160 assert(option
& PMAP_PAGES_ALLOCATE_NOWAIT
);
3162 *pa
= pmap_get_free_ppl_page();
3164 if ((*pa
== 0) && (option
& PMAP_PAGES_RECLAIM_NOWAIT
)) {
3165 *pa
= pmap_pages_reclaim();
3169 return KERN_RESOURCE_SHORTAGE
;
3171 bzero((void*)phystokv(*pa
), size
);
3172 return KERN_SUCCESS
;
3175 vm_page_t m
= VM_PAGE_NULL
;
3177 thread_t self
= current_thread();
3178 // We qualify to allocate reserved memory
3179 uint16_t thread_options
= self
->options
;
3180 self
->options
|= TH_OPT_VMPRIV
;
3181 if (__probable(size
== PAGE_SIZE
)) {
3182 while ((m
= vm_page_grab()) == VM_PAGE_NULL
) {
3183 if (option
& PMAP_PAGES_ALLOCATE_NOWAIT
) {
3189 if (m
!= VM_PAGE_NULL
) {
3190 vm_page_lock_queues();
3191 vm_page_wire(m
, VM_KERN_MEMORY_PTE
, TRUE
);
3192 vm_page_unlock_queues();
3194 } else if (size
== 2 * PAGE_SIZE
) {
3195 while (cpm_allocate(size
, &m
, 0, 1, TRUE
, 0) != KERN_SUCCESS
) {
3196 if (option
& PMAP_PAGES_ALLOCATE_NOWAIT
) {
3203 panic("%s: invalid size %u", __func__
, size
);
3206 self
->options
= thread_options
;
3208 if ((m
== VM_PAGE_NULL
) && (option
& PMAP_PAGES_RECLAIM_NOWAIT
)) {
3209 assert(size
== PAGE_SIZE
);
3210 *pa
= pmap_pages_reclaim();
3212 bzero((void*)phystokv(*pa
), size
);
3213 return KERN_SUCCESS
;
3217 if (m
== VM_PAGE_NULL
) {
3218 return KERN_RESOURCE_SHORTAGE
;
3221 *pa
= (pmap_paddr_t
)ptoa(VM_PAGE_GET_PHYS_PAGE(m
));
3223 pmap_enqueue_pages(m
);
3225 OSAddAtomic(size
>> PAGE_SHIFT
, &inuse_pmap_pages_count
);
3226 OSAddAtomic64(size
>> PAGE_SHIFT
, &alloc_pmap_pages_count
);
3228 bzero((void*)phystokv(*pa
), size
);
3229 return KERN_SUCCESS
;
3235 pmap_alloc_page_for_kern(unsigned int options
)
3240 while ((m
= vm_page_grab()) == VM_PAGE_NULL
) {
3241 if (options
& PMAP_PAGES_ALLOCATE_NOWAIT
) {
3247 vm_page_lock_queues();
3248 vm_page_wire(m
, VM_KERN_MEMORY_PTE
, TRUE
);
3249 vm_page_unlock_queues();
3251 paddr
= (pmap_paddr_t
)ptoa(VM_PAGE_GET_PHYS_PAGE(m
));
3253 if (__improbable(paddr
== 0)) {
3254 panic("%s: paddr is 0", __func__
);
3257 pmap_enqueue_pages(m
);
3259 OSAddAtomic(1, &inuse_pmap_pages_count
);
3260 OSAddAtomic64(1, &alloc_pmap_pages_count
);
3266 pmap_alloc_page_for_ppl(unsigned int options
)
3268 thread_t self
= current_thread();
3269 // We qualify to allocate reserved memory
3270 uint16_t thread_options
= self
->options
;
3271 self
->options
|= TH_OPT_VMPRIV
;
3272 pmap_paddr_t paddr
= pmap_alloc_page_for_kern(options
);
3273 self
->options
= thread_options
;
3275 pmap_mark_page_as_ppl_page(paddr
);
3280 pmap_alloc_pmap(void)
3282 pmap_t pmap
= PMAP_NULL
;
3284 pmap_simple_lock(&pmap_free_list_lock
);
3286 if (pmap_free_list
!= PMAP_NULL
) {
3287 pmap
= pmap_free_list
;
3288 pmap_free_list
= *((pmap_t
*)pmap
);
3290 if (!PMAP_PTR_IS_VALID(pmap
)) {
3291 panic("%s: allocated pmap is not valid, pmap=%p",
3292 __FUNCTION__
, pmap
);
3296 pmap_simple_unlock(&pmap_free_list_lock
);
3302 pmap_free_pmap(pmap_t pmap
)
3304 if (!PMAP_PTR_IS_VALID(pmap
)) {
3305 panic("%s: pmap is not valid, "
3311 pmap_simple_lock(&pmap_free_list_lock
);
3312 *((pmap_t
*)pmap
) = pmap_free_list
;
3313 pmap_free_list
= pmap
;
3314 pmap_simple_unlock(&pmap_free_list_lock
);
3318 pmap_bootstrap_pmap_free_list(void)
3320 pmap_t cur_head
= PMAP_NULL
;
3321 unsigned long i
= 0;
3323 simple_lock_init(&pmap_free_list_lock
, 0);
3325 for (i
= 0; i
< pmap_array_count
; i
++) {
3326 *((pmap_t
*)(&pmap_array
[i
])) = cur_head
;
3327 cur_head
= &pmap_array
[i
];
3330 pmap_free_list
= cur_head
;
3339 if (__improbable(pmap_pages_request_count
!= 0)) {
3340 page_free_entry_t
*page_entry
;
3342 pmap_simple_lock(&pmap_pages_lock
);
3344 if (pmap_pages_request_count
!= 0) {
3345 pmap_pages_request_count
--;
3346 page_entry
= (page_free_entry_t
*)phystokv(pa
);
3347 page_entry
->next
= pmap_pages_reclaim_list
;
3348 pmap_pages_reclaim_list
= page_entry
;
3349 pmap_simple_unlock(&pmap_pages_lock
);
3353 pmap_simple_unlock(&pmap_pages_lock
);
3359 pmap_give_free_ppl_page(pa
);
3362 pmap_paddr_t pa_max
;
3364 OSAddAtomic(-(size
>> PAGE_SHIFT
), &inuse_pmap_pages_count
);
3366 for (pa_max
= pa
+ size
; pa
< pa_max
; pa
= pa
+ PAGE_SIZE
) {
3367 vm_object_lock(pmap_object
);
3368 m
= vm_page_lookup(pmap_object
, (pa
- gPhysBase
));
3369 assert(m
!= VM_PAGE_NULL
);
3370 assert(VM_PAGE_WIRED(m
));
3371 vm_page_lock_queues();
3373 vm_page_unlock_queues();
3374 vm_object_unlock(pmap_object
);
3381 pmap_t pmap
, int bytes
)
3383 pmap_ledger_credit(pmap
, task_ledgers
.tkm_private
, bytes
);
3391 pmap_ledger_debit(pmap
, task_ledgers
.tkm_private
, bytes
);
3395 pmap_tt_ledger_credit(
3399 if (pmap
!= kernel_pmap
) {
3400 pmap_ledger_credit(pmap
, task_ledgers
.phys_footprint
, size
);
3401 pmap_ledger_credit(pmap
, task_ledgers
.page_table
, size
);
3406 pmap_tt_ledger_debit(
3410 if (pmap
!= kernel_pmap
) {
3411 pmap_ledger_debit(pmap
, task_ledgers
.phys_footprint
, size
);
3412 pmap_ledger_debit(pmap
, task_ledgers
.page_table
, size
);
3417 pmap_update_plru(uint16_t asid_index
)
3419 if (__probable(pmap_asid_plru
)) {
3420 unsigned plru_index
= asid_index
>> 6;
3421 if (__improbable(os_atomic_andnot(&asid_plru_bitmap
[plru_index
], (1ULL << (asid_index
& 63)), relaxed
) == 0)) {
3422 asid_plru_generation
[plru_index
] = ++asid_plru_gencount
;
3423 asid_plru_bitmap
[plru_index
] = ((plru_index
== (MAX_HW_ASIDS
>> 6)) ? ~(1ULL << 63) : UINT64_MAX
);
3429 alloc_asid(pmap_t pmap
)
3434 pmap_simple_lock(&asid_lock
);
3436 if (__probable(pmap_asid_plru
)) {
3437 unsigned plru_index
= 0;
3438 uint64_t lowest_gen
= asid_plru_generation
[0];
3439 uint64_t lowest_gen_bitmap
= asid_plru_bitmap
[0];
3440 for (unsigned i
= 1; i
< (sizeof(asid_plru_generation
) / sizeof(asid_plru_generation
[0])); ++i
) {
3441 if (asid_plru_generation
[i
] < lowest_gen
) {
3443 lowest_gen
= asid_plru_generation
[i
];
3444 lowest_gen_bitmap
= asid_plru_bitmap
[i
];
3448 for (; plru_index
< BITMAP_LEN(pmap_max_asids
); plru_index
+= ((MAX_HW_ASIDS
+ 1) >> 6)) {
3449 uint64_t temp_plru
= lowest_gen_bitmap
& asid_bitmap
[plru_index
];
3451 vasid
= (plru_index
<< 6) + lsb_first(temp_plru
);
3452 #if DEVELOPMENT || DEBUG
3459 if (__improbable(vasid
< 0)) {
3460 // bitmap_first() returns highest-order bits first, but a 0-based scheme works
3461 // slightly better with the collision detection scheme used by pmap_switch_internal().
3462 vasid
= bitmap_lsb_first(&asid_bitmap
[0], pmap_max_asids
);
3463 #if DEVELOPMENT || DEBUG
3467 if (__improbable(vasid
< 0)) {
3468 pmap_simple_unlock(&asid_lock
);
3471 assert((uint32_t)vasid
< pmap_max_asids
);
3472 assert(bitmap_test(&asid_bitmap
[0], (unsigned int)vasid
));
3473 bitmap_clear(&asid_bitmap
[0], (unsigned int)vasid
);
3474 pmap_simple_unlock(&asid_lock
);
3475 hw_asid
= vasid
% asid_chunk_size
;
3476 pmap
->sw_asid
= (uint8_t)(vasid
/ asid_chunk_size
);
3477 if (__improbable(hw_asid
== MAX_HW_ASIDS
)) {
3478 /* If we took a PLRU "miss" and ended up with a hardware ASID we can't actually support,
3479 * reassign to a reserved VASID. */
3480 assert(pmap
->sw_asid
< UINT8_MAX
);
3481 pmap
->sw_asid
= UINT8_MAX
;
3482 /* Allocate from the high end of the hardware ASID range to reduce the likelihood of
3483 * aliasing with vital system processes, which are likely to have lower ASIDs. */
3484 hw_asid
= MAX_HW_ASIDS
- 1 - (uint16_t)(vasid
/ asid_chunk_size
);
3485 assert(hw_asid
< MAX_HW_ASIDS
);
3487 pmap_update_plru(hw_asid
);
3488 hw_asid
+= 1; // Account for ASID 0, which is reserved for the kernel
3489 #if __ARM_KERNEL_PROTECT__
3490 hw_asid
<<= 1; // We're really handing out 2 hardware ASIDs, one for EL0 and one for EL1 access
3492 pmap
->hw_asid
= hw_asid
;
3497 free_asid(pmap_t pmap
)
3500 uint16_t hw_asid
= os_atomic_xchg(&pmap
->hw_asid
, 0, relaxed
);
3501 if (__improbable(hw_asid
== 0)) {
3505 #if __ARM_KERNEL_PROTECT__
3510 if (__improbable(pmap
->sw_asid
== UINT8_MAX
)) {
3511 vasid
= ((MAX_HW_ASIDS
- 1 - hw_asid
) * asid_chunk_size
) + MAX_HW_ASIDS
;
3513 vasid
= ((unsigned int)pmap
->sw_asid
* asid_chunk_size
) + hw_asid
;
3516 if (__probable(pmap_asid_plru
)) {
3517 os_atomic_or(&asid_plru_bitmap
[hw_asid
>> 6], (1ULL << (hw_asid
& 63)), relaxed
);
3519 pmap_simple_lock(&asid_lock
);
3520 assert(!bitmap_test(&asid_bitmap
[0], vasid
));
3521 bitmap_set(&asid_bitmap
[0], vasid
);
3522 pmap_simple_unlock(&asid_lock
);
3529 * Increase the padding for PPL devices to accommodate increased
3530 * mapping pressure from IOMMUs. This isn't strictly necessary, but
3531 * will reduce the need to retry mappings due to PV allocation failure.
3534 #define PV_LOW_WATER_MARK_DEFAULT (0x400)
3535 #define PV_KERN_LOW_WATER_MARK_DEFAULT (0x400)
3536 #define PV_ALLOC_CHUNK_INITIAL (0x400)
3537 #define PV_KERN_ALLOC_CHUNK_INITIAL (0x400)
3538 #define PV_CPU_MIN (0x80)
3539 #define PV_CPU_MAX (0x400)
3543 #define PV_LOW_WATER_MARK_DEFAULT (0x200)
3544 #define PV_KERN_LOW_WATER_MARK_DEFAULT (0x200)
3545 #define PV_ALLOC_CHUNK_INITIAL (0x200)
3546 #define PV_KERN_ALLOC_CHUNK_INITIAL (0x200)
3547 #define PV_CPU_MIN (0x40)
3548 #define PV_CPU_MAX (0x200)
3552 #define PV_ALLOC_INITIAL_TARGET (PV_ALLOC_CHUNK_INITIAL * 5)
3553 #define PV_KERN_ALLOC_INITIAL_TARGET (PV_KERN_ALLOC_CHUNK_INITIAL)
3555 uint32_t pv_page_count MARK_AS_PMAP_DATA
= 0;
3557 uint32_t pv_kern_low_water_mark MARK_AS_PMAP_DATA
= PV_KERN_LOW_WATER_MARK_DEFAULT
;
3558 uint32_t pv_alloc_initial_target MARK_AS_PMAP_DATA
= PV_ALLOC_INITIAL_TARGET
;
3559 uint32_t pv_kern_alloc_initial_target MARK_AS_PMAP_DATA
= PV_KERN_ALLOC_INITIAL_TARGET
;
3561 unsigned pmap_reserve_replenish_stat MARK_AS_PMAP_DATA
;
3562 unsigned pmap_kern_reserve_alloc_stat MARK_AS_PMAP_DATA
;
3564 static inline void pv_list_alloc(pv_entry_t
**pv_ep
);
3565 static inline void pv_list_kern_alloc(pv_entry_t
**pv_e
);
3566 static inline void pv_list_free(pv_entry_t
*pv_eh
, pv_entry_t
*pv_et
, int pv_cnt
, uint32_t kern_target
);
3568 static pv_alloc_return_t
3575 pmap_assert_locked_w(pmap
);
3577 ASSERT_PVH_LOCKED(pai
);
3578 pv_list_alloc(pvepp
);
3579 if (PV_ENTRY_NULL
!= *pvepp
) {
3580 return PV_ALLOC_SUCCESS
;
3583 unsigned alloc_flags
= PMAP_PAGES_ALLOCATE_NOWAIT
;
3585 unsigned alloc_flags
= 0;
3587 if ((pmap
== NULL
) || (kernel_pmap
== pmap
)) {
3588 pv_list_kern_alloc(pvepp
);
3590 if (PV_ENTRY_NULL
!= *pvepp
) {
3591 return PV_ALLOC_SUCCESS
;
3593 alloc_flags
= PMAP_PAGES_ALLOCATE_NOWAIT
| PMAP_PAGES_RECLAIM_NOWAIT
;
3601 pv_alloc_return_t pv_status
= PV_ALLOC_RETRY
;
3608 ret
= pmap_pages_alloc_zeroed(&pa
, PAGE_SIZE
, alloc_flags
);
3610 if (ret
!= KERN_SUCCESS
) {
3611 pv_status
= PV_ALLOC_FAIL
;
3612 goto pv_alloc_cleanup
;
3617 pv_e
= (pv_entry_t
*)phystokv(pa
);
3619 pv_cnt
= (PAGE_SIZE
/ sizeof(pv_entry_t
)) - 1;
3621 pv_et
= &pv_e
[pv_cnt
];
3623 pv_list_free(pv_eh
, pv_et
, pv_cnt
, pv_kern_low_water_mark
);
3636 pv_list_free(pvep
, pvep
, 1, pv_kern_low_water_mark
);
3640 pv_free_list_alloc(pv_free_list_t
*free_list
, pv_entry_t
**pv_ep
)
3642 assert(((free_list
->list
!= NULL
) && (free_list
->count
> 0)) ||
3643 ((free_list
->list
== NULL
) && (free_list
->count
== 0)));
3645 if ((*pv_ep
= free_list
->list
) != NULL
) {
3646 pv_entry_t
*pv_e
= *pv_ep
;
3647 if ((pv_e
->pve_next
== NULL
) && (free_list
->count
> 1)) {
3648 free_list
->list
= pv_e
+ 1;
3650 free_list
->list
= pv_e
->pve_next
;
3651 pv_e
->pve_next
= PV_ENTRY_NULL
;
3658 pv_list_alloc(pv_entry_t
**pv_ep
)
3660 assert(*pv_ep
== PV_ENTRY_NULL
);
3662 mp_disable_preemption();
3664 pmap_cpu_data_t
*pmap_cpu_data
= pmap_get_cpu_data();
3665 pv_free_list_alloc(&pmap_cpu_data
->pv_free
, pv_ep
);
3667 mp_enable_preemption();
3669 if (*pv_ep
!= PV_ENTRY_NULL
) {
3673 if (pv_kern_free
.count
< pv_kern_low_water_mark
) {
3675 * If the kernel reserved pool is low, let non-kernel mappings wait for a page
3681 pmap_simple_lock(&pv_free_list_lock
);
3682 pv_free_list_alloc(&pv_free
, pv_ep
);
3683 pmap_simple_unlock(&pv_free_list_lock
);
3687 pv_list_free(pv_entry_t
*pv_eh
, pv_entry_t
*pv_et
, int pv_cnt
, uint32_t kern_target
)
3690 bool limit_exceeded
= false;
3692 mp_disable_preemption();
3694 pmap_cpu_data_t
*pmap_cpu_data
= pmap_get_cpu_data();
3695 pv_et
->pve_next
= pmap_cpu_data
->pv_free
.list
;
3696 pmap_cpu_data
->pv_free
.list
= pv_eh
;
3697 if (pmap_cpu_data
->pv_free
.count
== PV_CPU_MIN
) {
3698 pmap_cpu_data
->pv_free_tail
= pv_et
;
3700 pmap_cpu_data
->pv_free
.count
+= pv_cnt
;
3701 if (__improbable(pmap_cpu_data
->pv_free
.count
> PV_CPU_MAX
)) {
3702 pv_et
= pmap_cpu_data
->pv_free_tail
;
3703 pv_cnt
= pmap_cpu_data
->pv_free
.count
- PV_CPU_MIN
;
3704 pmap_cpu_data
->pv_free
.list
= pmap_cpu_data
->pv_free_tail
->pve_next
;
3705 pmap_cpu_data
->pv_free
.count
= PV_CPU_MIN
;
3706 limit_exceeded
= true;
3709 mp_enable_preemption();
3711 if (__probable(!limit_exceeded
)) {
3715 if (__improbable(pv_kern_free
.count
< kern_target
)) {
3716 pmap_simple_lock(&pv_kern_free_list_lock
);
3717 pv_et
->pve_next
= pv_kern_free
.list
;
3718 pv_kern_free
.list
= pv_eh
;
3719 pv_kern_free
.count
+= pv_cnt
;
3720 pmap_simple_unlock(&pv_kern_free_list_lock
);
3722 pmap_simple_lock(&pv_free_list_lock
);
3723 pv_et
->pve_next
= pv_free
.list
;
3724 pv_free
.list
= pv_eh
;
3725 pv_free
.count
+= pv_cnt
;
3726 pmap_simple_unlock(&pv_free_list_lock
);
3731 pv_list_kern_alloc(pv_entry_t
**pv_ep
)
3733 assert(*pv_ep
== PV_ENTRY_NULL
);
3734 pmap_simple_lock(&pv_kern_free_list_lock
);
3735 if (pv_kern_free
.count
> 0) {
3736 pmap_kern_reserve_alloc_stat
++;
3738 pv_free_list_alloc(&pv_kern_free
, pv_ep
);
3739 pmap_simple_unlock(&pv_kern_free_list_lock
);
3743 mapping_adjust(void)
3745 // Not implemented for arm/arm64
3749 * Fills the kernel and general PV free lists back up to their low watermarks.
3751 MARK_AS_PMAP_TEXT
static kern_return_t
3752 mapping_replenish_internal(uint32_t kern_target_count
, uint32_t user_target_count
)
3758 kern_return_t ret
= KERN_SUCCESS
;
3760 while ((pv_free
.count
< user_target_count
) || (pv_kern_free
.count
< kern_target_count
)) {
3762 if ((ret
= pmap_pages_alloc_zeroed(&pa
, PAGE_SIZE
, PMAP_PAGES_ALLOCATE_NOWAIT
)) != KERN_SUCCESS
) {
3766 ret
= pmap_pages_alloc_zeroed(&pa
, PAGE_SIZE
, 0);
3767 assert(ret
== KERN_SUCCESS
);
3772 pv_eh
= (pv_entry_t
*)phystokv(pa
);
3773 pv_cnt
= PAGE_SIZE
/ sizeof(pv_entry_t
);
3774 pv_et
= &pv_eh
[pv_cnt
- 1];
3776 pmap_reserve_replenish_stat
+= pv_cnt
;
3777 pv_list_free(pv_eh
, pv_et
, pv_cnt
, kern_target_count
);
3784 * Creates a target number of free pv_entry_t objects for the kernel free list
3785 * and the general free list.
3787 MARK_AS_PMAP_TEXT
static kern_return_t
3788 mapping_free_prime_internal(void)
3790 return mapping_replenish_internal(pv_kern_alloc_initial_target
, pv_alloc_initial_target
);
3794 mapping_free_prime(void)
3796 kern_return_t kr
= KERN_FAILURE
;
3802 * Allocate the needed PPL pages up front, to minimize the chance that
3803 * we will need to call into the PPL multiple times.
3805 for (i
= 0; i
< pv_alloc_initial_target
; i
+= (PAGE_SIZE
/ sizeof(pv_entry_t
))) {
3806 pmap_alloc_page_for_ppl(0);
3809 for (i
= 0; i
< pv_kern_alloc_initial_target
; i
+= (PAGE_SIZE
/ sizeof(pv_entry_t
))) {
3810 pmap_alloc_page_for_ppl(0);
3813 while ((kr
= mapping_free_prime_ppl()) == KERN_RESOURCE_SHORTAGE
) {
3814 pmap_alloc_page_for_ppl(0);
3817 kr
= mapping_free_prime_internal();
3820 if (kr
!= KERN_SUCCESS
) {
3821 panic("%s: failed, kr=%d",
3829 unsigned int ptd_cnt
)
3831 simple_lock_init(&ptd_free_list_lock
, 0);
3832 // Region represented by ptdp should be cleared by pmap_bootstrap()
3833 *((void**)(&ptdp
[ptd_cnt
- 1])) = (void*)ptd_free_list
;
3834 ptd_free_list
= ptdp
;
3835 ptd_free_count
+= ptd_cnt
;
3836 ptd_preboot
= FALSE
;
3840 ptd_alloc_unlinked(void)
3846 pmap_simple_lock(&ptd_free_list_lock
);
3849 assert(((ptd_free_list
!= NULL
) && (ptd_free_count
> 0)) ||
3850 ((ptd_free_list
== NULL
) && (ptd_free_count
== 0)));
3852 if (ptd_free_count
== 0) {
3853 unsigned int ptd_cnt
= PAGE_SIZE
/ sizeof(pt_desc_t
);
3856 ptdp
= (pt_desc_t
*)avail_start
;
3857 avail_start
+= PAGE_SIZE
;
3858 bzero(ptdp
, PAGE_SIZE
);
3862 pmap_simple_unlock(&ptd_free_list_lock
);
3864 if (pmap_pages_alloc_zeroed(&pa
, PAGE_SIZE
, PMAP_PAGES_ALLOCATE_NOWAIT
) != KERN_SUCCESS
) {
3867 ptdp
= (pt_desc_t
*)phystokv(pa
);
3869 pmap_simple_lock(&ptd_free_list_lock
);
3872 *((void**)(&ptdp
[ptd_cnt
- 1])) = (void*)ptd_free_list
;
3873 ptd_free_list
= ptdp
;
3874 ptd_free_count
+= ptd_cnt
;
3877 if ((ptdp
= ptd_free_list
) != PTD_ENTRY_NULL
) {
3878 ptd_free_list
= (pt_desc_t
*)(*(void **)ptdp
);
3879 if ((ptd_free_list
== NULL
) && (ptd_free_count
> 1)) {
3880 ptd_free_list
= ptdp
+ 1;
3884 panic("%s: out of ptd entry",
3889 pmap_simple_unlock(&ptd_free_list_lock
);
3892 ptdp
->pt_page
.next
= NULL
;
3893 ptdp
->pt_page
.prev
= NULL
;
3896 for (i
= 0; i
< PT_INDEX_MAX
; i
++) {
3897 ptdp
->ptd_info
[i
].va
= (vm_offset_t
)-1;
3898 ptdp
->ptd_info
[i
].refcnt
= 0;
3899 ptdp
->ptd_info
[i
].wiredcnt
= 0;
3905 static inline pt_desc_t
*
3906 ptd_alloc(pmap_t pmap
)
3908 pt_desc_t
*ptdp
= ptd_alloc_unlinked();
3915 if (pmap
!= kernel_pmap
) {
3916 /* We should never try to reclaim kernel pagetable pages in
3917 * pmap_pages_reclaim(), so don't enter them into the list. */
3918 pmap_simple_lock(&pt_pages_lock
);
3919 queue_enter(&pt_page_list
, ptdp
, pt_desc_t
*, pt_page
);
3920 pmap_simple_unlock(&pt_pages_lock
);
3923 pmap_tt_ledger_credit(pmap
, sizeof(*ptdp
));
3928 ptd_deallocate(pt_desc_t
*ptdp
)
3930 pmap_t pmap
= ptdp
->pmap
;
3933 panic("%s: early boot, "
3939 if (ptdp
->pt_page
.next
!= NULL
) {
3940 pmap_simple_lock(&pt_pages_lock
);
3941 queue_remove(&pt_page_list
, ptdp
, pt_desc_t
*, pt_page
);
3942 pmap_simple_unlock(&pt_pages_lock
);
3944 pmap_simple_lock(&ptd_free_list_lock
);
3945 (*(void **)ptdp
) = (void *)ptd_free_list
;
3946 ptd_free_list
= (pt_desc_t
*)ptdp
;
3948 pmap_simple_unlock(&ptd_free_list_lock
);
3950 pmap_tt_ledger_debit(pmap
, sizeof(*ptdp
));
3958 vm_map_address_t va
,
3962 const pt_attr_t
* const pt_attr
= pmap_get_pt_attr(pmap
);
3964 if (ptdp
->pmap
!= pmap
) {
3965 panic("%s: pmap mismatch, "
3966 "ptdp=%p, pmap=%p, va=%p, level=%u, pte_p=%p",
3968 ptdp
, pmap
, (void*)va
, level
, pte_p
);
3971 assert(level
> pt_attr_root_level(pt_attr
));
3972 ptd_info_t
*ptd_info
= ptd_get_info(ptdp
, pte_p
);
3973 ptd_info
->va
= (vm_offset_t
) va
& ~pt_attr_ln_pt_offmask(pt_attr
, level
- 1);
3975 if (level
< pt_attr_leaf_level(pt_attr
)) {
3976 ptd_info
->refcnt
= PT_DESC_REFCOUNT
;
3985 return pa_valid(addr
);
3988 #if (__ARM_VMSA__ == 7)
3991 * Given an offset and a map, compute the address of the
3992 * corresponding translation table entry.
3994 static inline tt_entry_t
*
3995 pmap_tte(pmap_t pmap
,
3996 vm_map_address_t addr
)
3998 __unused
const pt_attr_t
* const pt_attr
= pmap_get_pt_attr(pmap
);
4000 if (!(tte_index(pmap
, pt_attr
, addr
) < pmap
->tte_index_max
)) {
4001 return (tt_entry_t
*)NULL
;
4003 return &pmap
->tte
[tte_index(pmap
, pt_attr
, addr
)];
4008 * Given an offset and a map, compute the address of the
4009 * pte. If the address is invalid with respect to the map
4010 * then PT_ENTRY_NULL is returned (and the map may need to grow).
4012 * This is only used internally.
4014 static inline pt_entry_t
*
4017 vm_map_address_t addr
)
4023 ttp
= pmap_tte(pmap
, addr
);
4024 if (ttp
== (tt_entry_t
*)NULL
) {
4025 return PT_ENTRY_NULL
;
4029 if ((tte
& ARM_TTE_TYPE_MASK
) == ARM_TTE_TYPE_BLOCK
) {
4030 panic("%s: Attempt to demote L1 block, tte=0x%lx, "
4032 __FUNCTION__
, (unsigned long)tte
,
4036 if ((tte
& ARM_TTE_TYPE_MASK
) != ARM_TTE_TYPE_TABLE
) {
4037 return PT_ENTRY_NULL
;
4039 ptp
= (pt_entry_t
*) ttetokv(tte
) + pte_index(pmap
, pt_addr
, addr
);
4043 __unused
static inline tt_entry_t
*
4044 pmap_ttne(pmap_t pmap
,
4045 unsigned int target_level
,
4046 vm_map_address_t addr
)
4048 tt_entry_t
* ret_ttep
= NULL
;
4050 switch (target_level
) {
4052 ret_ttep
= pmap_tte(pmap
, addr
);
4055 ret_ttep
= (tt_entry_t
*)pmap_pte(pmap
, addr
);
4058 panic("%s: bad level, "
4059 "pmap=%p, target_level=%u, addr=%p",
4061 pmap
, target_level
, (void *)addr
);
4069 static inline tt_entry_t
*
4070 pmap_ttne(pmap_t pmap
,
4071 unsigned int target_level
,
4072 vm_map_address_t addr
)
4074 tt_entry_t
* ttp
= NULL
;
4075 tt_entry_t
* ttep
= NULL
;
4076 tt_entry_t tte
= ARM_TTE_EMPTY
;
4077 unsigned int cur_level
;
4079 const pt_attr_t
* const pt_attr
= pmap_get_pt_attr(pmap
);
4083 assert(target_level
<= pt_attr
->pta_max_level
);
4085 for (cur_level
= pt_attr
->pta_root_level
; cur_level
<= target_level
; cur_level
++) {
4086 ttep
= &ttp
[ttn_index(pmap
, pt_attr
, addr
, cur_level
)];
4088 if (cur_level
== target_level
) {
4095 if ((tte
& (ARM_TTE_TYPE_MASK
| ARM_TTE_VALID
)) == (ARM_TTE_TYPE_BLOCK
| ARM_TTE_VALID
)) {
4096 panic("%s: Attempt to demote L%u block, tte=0x%llx, "
4097 "pmap=%p, target_level=%u, addr=%p",
4098 __FUNCTION__
, cur_level
, tte
,
4099 pmap
, target_level
, (void*)addr
);
4102 if ((tte
& (ARM_TTE_TYPE_MASK
| ARM_TTE_VALID
)) != (ARM_TTE_TYPE_TABLE
| ARM_TTE_VALID
)) {
4103 return TT_ENTRY_NULL
;
4106 ttp
= (tt_entry_t
*)phystokv(tte
& ARM_TTE_TABLE_MASK
);
4113 * Given an offset and a map, compute the address of level 1 translation table entry.
4114 * If the tranlation is invalid then PT_ENTRY_NULL is returned.
4116 static inline tt_entry_t
*
4117 pmap_tt1e(pmap_t pmap
,
4118 vm_map_address_t addr
)
4120 return pmap_ttne(pmap
, PMAP_TT_L1_LEVEL
, addr
);
4124 * Given an offset and a map, compute the address of level 2 translation table entry.
4125 * If the tranlation is invalid then PT_ENTRY_NULL is returned.
4127 static inline tt_entry_t
*
4128 pmap_tt2e(pmap_t pmap
,
4129 vm_map_address_t addr
)
4131 return pmap_ttne(pmap
, PMAP_TT_L2_LEVEL
, addr
);
4136 * Given an offset and a map, compute the address of level 3 translation table entry.
4137 * If the tranlation is invalid then PT_ENTRY_NULL is returned.
4139 static inline pt_entry_t
*
4142 vm_map_address_t addr
)
4144 return (pt_entry_t
*)pmap_ttne(pmap
, PMAP_TT_L3_LEVEL
, addr
);
4147 static inline tt_entry_t
*
4150 vm_map_address_t addr
)
4152 return pmap_tt2e(pmap
, addr
);
4155 static inline pt_entry_t
*
4158 vm_map_address_t addr
)
4160 return pmap_tt3e(pmap
, addr
);
4171 * Map memory at initialization. The physical addresses being
4172 * mapped are not managed and are never unmapped.
4174 * For now, VM is already on, we only need to map the
4179 vm_map_address_t virt
,
4189 while (start
< end
) {
4190 kr
= pmap_enter(kernel_pmap
, virt
, (ppnum_t
)atop(start
),
4191 prot
, VM_PROT_NONE
, flags
, FALSE
);
4193 if (kr
!= KERN_SUCCESS
) {
4194 panic("%s: failed pmap_enter, "
4195 "virt=%p, start_addr=%p, end_addr=%p, prot=%#x, flags=%#x",
4197 (void *) virt
, (void *) start
, (void *) end
, prot
, flags
);
4207 pmap_map_bd_with_options(
4208 vm_map_address_t virt
,
4216 vm_map_address_t vaddr
;
4218 pt_entry_t mem_attr
;
4220 switch (options
& PMAP_MAP_BD_MASK
) {
4221 case PMAP_MAP_BD_WCOMB
:
4222 mem_attr
= ARM_PTE_ATTRINDX(CACHE_ATTRINDX_WRITECOMB
);
4223 #if (__ARM_VMSA__ > 7)
4224 mem_attr
|= ARM_PTE_SH(SH_OUTER_MEMORY
);
4226 mem_attr
|= ARM_PTE_SH
;
4229 case PMAP_MAP_BD_POSTED
:
4230 mem_attr
= ARM_PTE_ATTRINDX(CACHE_ATTRINDX_POSTED
);
4232 case PMAP_MAP_BD_POSTED_REORDERED
:
4233 mem_attr
= ARM_PTE_ATTRINDX(CACHE_ATTRINDX_POSTED_REORDERED
);
4235 case PMAP_MAP_BD_POSTED_COMBINED_REORDERED
:
4236 mem_attr
= ARM_PTE_ATTRINDX(CACHE_ATTRINDX_POSTED_COMBINED_REORDERED
);
4239 mem_attr
= ARM_PTE_ATTRINDX(CACHE_ATTRINDX_DISABLE
);
4243 tmplate
= pa_to_pte(start
) | ARM_PTE_AP((prot
& VM_PROT_WRITE
) ? AP_RWNA
: AP_RONA
) |
4244 mem_attr
| ARM_PTE_TYPE
| ARM_PTE_NX
| ARM_PTE_PNX
| ARM_PTE_AF
;
4245 #if __ARM_KERNEL_PROTECT__
4246 tmplate
|= ARM_PTE_NG
;
4247 #endif /* __ARM_KERNEL_PROTECT__ */
4251 while (paddr
< end
) {
4252 ptep
= pmap_pte(kernel_pmap
, vaddr
);
4253 if (ptep
== PT_ENTRY_NULL
) {
4254 panic("%s: no PTE for vaddr=%p, "
4255 "virt=%p, start=%p, end=%p, prot=0x%x, options=0x%x",
4256 __FUNCTION__
, (void*)vaddr
,
4257 (void*)virt
, (void*)start
, (void*)end
, prot
, options
);
4260 assert(!ARM_PTE_IS_COMPRESSED(*ptep
, ptep
));
4261 WRITE_PTE_STRONG(ptep
, tmplate
);
4263 pte_increment_pa(tmplate
);
4269 flush_mmu_tlb_region(virt
, (unsigned)(end
- start
));
4276 * Back-door routine for mapping kernel VM at initialization.
4277 * Useful for mapping memory outside the range
4278 * [vm_first_phys, vm_last_phys] (i.e., devices).
4279 * Otherwise like pmap_map.
4283 vm_map_address_t virt
,
4290 vm_map_address_t vaddr
;
4293 /* not cacheable and not buffered */
4294 tmplate
= pa_to_pte(start
)
4295 | ARM_PTE_TYPE
| ARM_PTE_AF
| ARM_PTE_NX
| ARM_PTE_PNX
4296 | ARM_PTE_AP((prot
& VM_PROT_WRITE
) ? AP_RWNA
: AP_RONA
)
4297 | ARM_PTE_ATTRINDX(CACHE_ATTRINDX_DISABLE
);
4298 #if __ARM_KERNEL_PROTECT__
4299 tmplate
|= ARM_PTE_NG
;
4300 #endif /* __ARM_KERNEL_PROTECT__ */
4304 while (paddr
< end
) {
4305 ptep
= pmap_pte(kernel_pmap
, vaddr
);
4306 if (ptep
== PT_ENTRY_NULL
) {
4307 panic("pmap_map_bd");
4309 assert(!ARM_PTE_IS_COMPRESSED(*ptep
, ptep
));
4310 WRITE_PTE_STRONG(ptep
, tmplate
);
4312 pte_increment_pa(tmplate
);
4318 flush_mmu_tlb_region(virt
, (unsigned)(end
- start
));
4325 * Back-door routine for mapping kernel VM at initialization.
4326 * Useful for mapping memory specific physical addresses in early
4327 * boot (i.e., before kernel_map is initialized).
4329 * Maps are in the VM_HIGH_KERNEL_WINDOW area.
4333 pmap_map_high_window_bd(
4334 vm_offset_t pa_start
,
4338 pt_entry_t
*ptep
, pte
;
4339 #if (__ARM_VMSA__ == 7)
4340 vm_map_address_t va_start
= VM_HIGH_KERNEL_WINDOW
;
4341 vm_map_address_t va_max
= VM_MAX_KERNEL_ADDRESS
;
4343 vm_map_address_t va_start
= VREGION1_START
;
4344 vm_map_address_t va_max
= VREGION1_START
+ VREGION1_SIZE
;
4346 vm_map_address_t va_end
;
4347 vm_map_address_t va
;
4350 offset
= pa_start
& PAGE_MASK
;
4354 if (len
> (va_max
- va_start
)) {
4355 panic("%s: area too large, "
4356 "pa_start=%p, len=%p, prot=0x%x",
4358 (void*)pa_start
, (void*)len
, prot
);
4362 for (; va_start
< va_max
; va_start
+= PAGE_SIZE
) {
4363 ptep
= pmap_pte(kernel_pmap
, va_start
);
4364 assert(!ARM_PTE_IS_COMPRESSED(*ptep
, ptep
));
4365 if (*ptep
== ARM_PTE_TYPE_FAULT
) {
4369 if (va_start
> va_max
) {
4370 panic("%s: insufficient pages, "
4371 "pa_start=%p, len=%p, prot=0x%x",
4373 (void*)pa_start
, (void*)len
, prot
);
4376 for (va_end
= va_start
+ PAGE_SIZE
; va_end
< va_start
+ len
; va_end
+= PAGE_SIZE
) {
4377 ptep
= pmap_pte(kernel_pmap
, va_end
);
4378 assert(!ARM_PTE_IS_COMPRESSED(*ptep
, ptep
));
4379 if (*ptep
!= ARM_PTE_TYPE_FAULT
) {
4380 va_start
= va_end
+ PAGE_SIZE
;
4385 for (va
= va_start
; va
< va_end
; va
+= PAGE_SIZE
, pa_start
+= PAGE_SIZE
) {
4386 ptep
= pmap_pte(kernel_pmap
, va
);
4387 pte
= pa_to_pte(pa_start
)
4388 | ARM_PTE_TYPE
| ARM_PTE_AF
| ARM_PTE_NX
| ARM_PTE_PNX
4389 | ARM_PTE_AP((prot
& VM_PROT_WRITE
) ? AP_RWNA
: AP_RONA
)
4390 | ARM_PTE_ATTRINDX(CACHE_ATTRINDX_DEFAULT
);
4391 #if (__ARM_VMSA__ > 7)
4392 pte
|= ARM_PTE_SH(SH_OUTER_MEMORY
);
4396 #if __ARM_KERNEL_PROTECT__
4398 #endif /* __ARM_KERNEL_PROTECT__ */
4399 WRITE_PTE_STRONG(ptep
, pte
);
4401 PMAP_UPDATE_TLBS(kernel_pmap
, va_start
, va_start
+ len
, false);
4403 kasan_notify_address(va_start
, len
);
4408 #define PMAP_ALIGN(addr, align) ((addr) + ((align) - 1) & ~((align) - 1))
4411 pmap_compute_pv_targets(void)
4414 void const *prop
= NULL
;
4416 unsigned int prop_size
;
4418 err
= SecureDTLookupEntry(NULL
, "/defaults", &entry
);
4419 assert(err
== kSuccess
);
4421 if (kSuccess
== SecureDTGetProperty(entry
, "pmap-pv-count", &prop
, &prop_size
)) {
4422 if (prop_size
!= sizeof(pv_alloc_initial_target
)) {
4423 panic("pmap-pv-count property is not a 32-bit integer");
4425 pv_alloc_initial_target
= *((uint32_t const *)prop
);
4428 if (kSuccess
== SecureDTGetProperty(entry
, "pmap-kern-pv-count", &prop
, &prop_size
)) {
4429 if (prop_size
!= sizeof(pv_kern_alloc_initial_target
)) {
4430 panic("pmap-kern-pv-count property is not a 32-bit integer");
4432 pv_kern_alloc_initial_target
= *((uint32_t const *)prop
);
4435 if (kSuccess
== SecureDTGetProperty(entry
, "pmap-kern-pv-min", &prop
, &prop_size
)) {
4436 if (prop_size
!= sizeof(pv_kern_low_water_mark
)) {
4437 panic("pmap-kern-pv-min property is not a 32-bit integer");
4439 pv_kern_low_water_mark
= *((uint32_t const *)prop
);
4445 pmap_compute_max_asids(void)
4448 void const *prop
= NULL
;
4451 unsigned int prop_size
;
4453 err
= SecureDTLookupEntry(NULL
, "/defaults", &entry
);
4454 assert(err
== kSuccess
);
4456 if (kSuccess
!= SecureDTGetProperty(entry
, "pmap-max-asids", &prop
, &prop_size
)) {
4457 /* TODO: consider allowing maxproc limits to be scaled earlier so that
4458 * we can choose a more flexible default value here. */
4462 if (prop_size
!= sizeof(max_asids
)) {
4463 panic("pmap-max-asids property is not a 32-bit integer");
4466 max_asids
= *((uint32_t const *)prop
);
4467 /* Round up to the nearest 64 to make things a bit easier for the Pseudo-LRU allocator. */
4468 max_asids
= (max_asids
+ 63) & ~63UL;
4470 if (((max_asids
+ MAX_HW_ASIDS
) / (MAX_HW_ASIDS
+ 1)) > MIN(MAX_HW_ASIDS
, UINT8_MAX
)) {
4471 /* currently capped by size of pmap->sw_asid */
4472 panic("pmap-max-asids too large");
4474 if (max_asids
== 0) {
4475 panic("pmap-max-asids cannot be zero");
4482 pmap_compute_io_rgns(void)
4485 pmap_io_range_t
const *ranges
;
4487 void const *prop
= NULL
;
4489 unsigned int prop_size
;
4491 err
= SecureDTLookupEntry(NULL
, "/defaults", &entry
);
4492 assert(err
== kSuccess
);
4494 if (kSuccess
!= SecureDTGetProperty(entry
, "pmap-io-ranges", &prop
, &prop_size
)) {
4499 for (unsigned int i
= 0; i
< (prop_size
/ sizeof(*ranges
)); ++i
) {
4500 if (ranges
[i
].addr
& PAGE_MASK
) {
4501 panic("pmap I/O region %u addr 0x%llx is not page-aligned", i
, ranges
[i
].addr
);
4503 if (ranges
[i
].len
& PAGE_MASK
) {
4504 panic("pmap I/O region %u length 0x%llx is not page-aligned", i
, ranges
[i
].len
);
4506 if (os_add_overflow(ranges
[i
].addr
, ranges
[i
].len
, &rgn_end
)) {
4507 panic("pmap I/O region %u addr 0x%llx length 0x%llx wraps around", i
, ranges
[i
].addr
, ranges
[i
].len
);
4509 if (((ranges
[i
].addr
<= gPhysBase
) && (rgn_end
> gPhysBase
)) ||
4510 ((ranges
[i
].addr
< avail_end
) && (rgn_end
>= avail_end
)) ||
4511 ((ranges
[i
].addr
> gPhysBase
) && (rgn_end
< avail_end
))) {
4512 panic("pmap I/O region %u addr 0x%llx length 0x%llx overlaps physical memory", i
, ranges
[i
].addr
, ranges
[i
].len
);
4518 return num_io_rgns
* sizeof(*ranges
);
4522 * return < 0 for a < b
4526 typedef int (*cmpfunc_t
)(const void *a
, const void *b
);
4529 qsort(void *a
, size_t n
, size_t es
, cmpfunc_t cmp
);
4532 cmp_io_rgns(const void *a
, const void *b
)
4534 const pmap_io_range_t
*range_a
= a
;
4535 const pmap_io_range_t
*range_b
= b
;
4536 if ((range_b
->addr
+ range_b
->len
) <= range_a
->addr
) {
4538 } else if ((range_a
->addr
+ range_a
->len
) <= range_b
->addr
) {
4546 pmap_load_io_rgns(void)
4549 pmap_io_range_t
const *ranges
;
4550 void const *prop
= NULL
;
4552 unsigned int prop_size
;
4554 if (num_io_rgns
== 0) {
4558 err
= SecureDTLookupEntry(NULL
, "/defaults", &entry
);
4559 assert(err
== kSuccess
);
4561 err
= SecureDTGetProperty(entry
, "pmap-io-ranges", &prop
, &prop_size
);
4562 assert(err
== kSuccess
);
4565 for (unsigned int i
= 0; i
< (prop_size
/ sizeof(*ranges
)); ++i
) {
4566 io_attr_table
[i
] = ranges
[i
];
4569 qsort(io_attr_table
, num_io_rgns
, sizeof(*ranges
), cmp_io_rgns
);
4574 * pmap_get_arm64_prot
4576 * return effective armv8 VMSA block protections including
4577 * table AP/PXN/XN overrides of a pmap entry
4582 pmap_get_arm64_prot(
4587 unsigned int level
= 0;
4588 uint64_t tte_type
= 0;
4589 uint64_t effective_prot_bits
= 0;
4590 uint64_t aggregate_tte
= 0;
4591 uint64_t table_ap_bits
= 0, table_xn
= 0, table_pxn
= 0;
4592 const pt_attr_t
* const pt_attr
= pmap_get_pt_attr(pmap
);
4594 for (level
= pt_attr
->pta_root_level
; level
<= pt_attr
->pta_max_level
; level
++) {
4595 tte
= *pmap_ttne(pmap
, level
, addr
);
4597 if (!(tte
& ARM_TTE_VALID
)) {
4601 tte_type
= tte
& ARM_TTE_TYPE_MASK
;
4603 if ((tte_type
== ARM_TTE_TYPE_BLOCK
) ||
4604 (level
== pt_attr
->pta_max_level
)) {
4605 /* Block or page mapping; both have the same protection bit layout. */
4607 } else if (tte_type
== ARM_TTE_TYPE_TABLE
) {
4608 /* All of the table bits we care about are overrides, so just OR them together. */
4609 aggregate_tte
|= tte
;
4613 table_ap_bits
= ((aggregate_tte
>> ARM_TTE_TABLE_APSHIFT
) & AP_MASK
);
4614 table_xn
= (aggregate_tte
& ARM_TTE_TABLE_XN
);
4615 table_pxn
= (aggregate_tte
& ARM_TTE_TABLE_PXN
);
4617 /* Start with the PTE bits. */
4618 effective_prot_bits
= tte
& (ARM_PTE_APMASK
| ARM_PTE_NX
| ARM_PTE_PNX
);
4620 /* Table AP bits mask out block/page AP bits */
4621 effective_prot_bits
&= ~(ARM_PTE_AP(table_ap_bits
));
4623 /* XN/PXN bits can be OR'd in. */
4624 effective_prot_bits
|= (table_xn
? ARM_PTE_NX
: 0);
4625 effective_prot_bits
|= (table_pxn
? ARM_PTE_PNX
: 0);
4627 return effective_prot_bits
;
4629 #endif /* __arm64__ */
4633 * Bootstrap the system enough to run with virtual memory.
4635 * The early VM initialization code has already allocated
4636 * the first CPU's translation table and made entries for
4637 * all the one-to-one mappings to be found there.
4639 * We must set up the kernel pmap structures, the
4640 * physical-to-virtual translation lookup tables for the
4641 * physical memory to be managed (between avail_start and
4644 * Map the kernel's code and data, and allocate the system page table.
4645 * Page_size must already be set.
4648 * first_avail first available physical page -
4649 * after kernel page tables
4650 * avail_start PA of first managed physical page
4651 * avail_end PA of last managed physical page
4658 pmap_paddr_t pmap_struct_start
;
4659 vm_size_t pv_head_size
;
4660 vm_size_t ptd_root_table_size
;
4661 vm_size_t pp_attr_table_size
;
4662 vm_size_t io_attr_table_size
;
4663 vm_size_t asid_table_size
;
4664 unsigned int npages
;
4665 vm_map_offset_t maxoffset
;
4667 lck_grp_init(&pmap_lck_grp
, "pmap", LCK_GRP_ATTR_NULL
);
4671 #if DEVELOPMENT || DEBUG
4672 PE_parse_boot_argn("-unsafe_kernel_text", &pmap_ppl_disable
, sizeof(pmap_ppl_disable
));
4675 #if CONFIG_CSR_FROM_DT
4676 if (csr_unsafe_kernel_text
) {
4677 pmap_ppl_disable
= true;
4679 #endif /* CONFIG_CSR_FROM_DT */
4681 #endif /* XNU_MONITOR */
4683 #if DEVELOPMENT || DEBUG
4684 if (PE_parse_boot_argn("pmap_trace", &pmap_trace_mask
, sizeof(pmap_trace_mask
))) {
4685 kprintf("Kernel traces for pmap operations enabled\n");
4690 * Initialize the kernel pmap.
4693 #if ARM_PARAMETERIZED_PMAP
4694 kernel_pmap
->pmap_pt_attr
= native_pt_attr
;
4695 #endif /* ARM_PARAMETERIZED_PMAP */
4697 kernel_pmap
->disable_jop
= 0;
4698 #endif /* HAS_APPLE_PAC */
4699 kernel_pmap
->tte
= cpu_tte
;
4700 kernel_pmap
->ttep
= cpu_ttep
;
4701 #if (__ARM_VMSA__ > 7)
4702 kernel_pmap
->min
= ARM64_TTBR1_MIN_ADDR
;
4704 kernel_pmap
->min
= VM_MIN_KERNEL_AND_KEXT_ADDRESS
;
4706 kernel_pmap
->max
= VM_MAX_KERNEL_ADDRESS
;
4707 os_atomic_init(&kernel_pmap
->ref_count
, 1);
4708 kernel_pmap
->gc_status
= 0;
4709 kernel_pmap
->nx_enabled
= TRUE
;
4711 kernel_pmap
->is_64bit
= TRUE
;
4713 kernel_pmap
->is_64bit
= FALSE
;
4715 kernel_pmap
->stamp
= os_atomic_inc(&pmap_stamp
, relaxed
);
4717 #if ARM_PARAMETERIZED_PMAP
4718 kernel_pmap
->pmap_pt_attr
= native_pt_attr
;
4719 #endif /* ARM_PARAMETERIZED_PMAP */
4721 kernel_pmap
->nested_region_addr
= 0x0ULL
;
4722 kernel_pmap
->nested_region_size
= 0x0ULL
;
4723 kernel_pmap
->nested_region_asid_bitmap
= NULL
;
4724 kernel_pmap
->nested_region_asid_bitmap_size
= 0x0UL
;
4726 #if (__ARM_VMSA__ == 7)
4727 kernel_pmap
->tte_index_max
= 4 * NTTES
;
4729 kernel_pmap
->hw_asid
= 0;
4730 kernel_pmap
->sw_asid
= 0;
4732 pmap_lock_init(kernel_pmap
);
4733 memset((void *) &kernel_pmap
->stats
, 0, sizeof(kernel_pmap
->stats
));
4735 /* allocate space for and initialize the bookkeeping structures */
4736 io_attr_table_size
= pmap_compute_io_rgns();
4737 npages
= (unsigned int)atop(mem_size
);
4738 pp_attr_table_size
= npages
* sizeof(pp_attr_t
);
4739 pv_head_size
= round_page(sizeof(pv_entry_t
*) * npages
);
4740 // allocate enough initial PTDs to map twice the available physical memory
4741 ptd_root_table_size
= sizeof(pt_desc_t
) * (mem_size
/ ((PAGE_SIZE
/ sizeof(pt_entry_t
)) * ARM_PGBYTES
)) * 2;
4742 pmap_max_asids
= pmap_compute_max_asids();
4743 pmap_asid_plru
= (pmap_max_asids
> MAX_HW_ASIDS
);
4744 PE_parse_boot_argn("pmap_asid_plru", &pmap_asid_plru
, sizeof(pmap_asid_plru
));
4745 /* Align the range of available hardware ASIDs to a multiple of 64 to enable the
4746 * masking used by the PLRU scheme. This means we must handle the case in which
4747 * the returned hardware ASID is MAX_HW_ASIDS, which we do in alloc_asid() and free_asid(). */
4748 _Static_assert(sizeof(asid_plru_bitmap
[0] == sizeof(uint64_t)), "bitmap_t is not a 64-bit integer");
4749 _Static_assert(((MAX_HW_ASIDS
+ 1) % 64) == 0, "MAX_HW_ASIDS + 1 is not divisible by 64");
4750 asid_chunk_size
= (pmap_asid_plru
? (MAX_HW_ASIDS
+ 1) : MAX_HW_ASIDS
);
4752 asid_table_size
= sizeof(*asid_bitmap
) * BITMAP_LEN(pmap_max_asids
);
4754 pmap_compute_pv_targets();
4756 pmap_struct_start
= avail_start
;
4758 pp_attr_table
= (pp_attr_t
*) phystokv(avail_start
);
4759 avail_start
= PMAP_ALIGN(avail_start
+ pp_attr_table_size
, __alignof(pp_attr_t
));
4760 io_attr_table
= (pmap_io_range_t
*) phystokv(avail_start
);
4761 avail_start
= PMAP_ALIGN(avail_start
+ io_attr_table_size
, __alignof(pv_entry_t
*));
4762 pv_head_table
= (pv_entry_t
**) phystokv(avail_start
);
4763 avail_start
= PMAP_ALIGN(avail_start
+ pv_head_size
, __alignof(pt_desc_t
));
4764 ptd_root_table
= (pt_desc_t
*)phystokv(avail_start
);
4765 avail_start
= PMAP_ALIGN(avail_start
+ ptd_root_table_size
, __alignof(bitmap_t
));
4766 asid_bitmap
= (bitmap_t
*)phystokv(avail_start
);
4767 avail_start
= round_page(avail_start
+ asid_table_size
);
4769 memset((char *)phystokv(pmap_struct_start
), 0, avail_start
- pmap_struct_start
);
4771 pmap_load_io_rgns();
4772 ptd_bootstrap(ptd_root_table
, (unsigned int)(ptd_root_table_size
/ sizeof(pt_desc_t
)));
4775 pmap_array_begin
= (void *)phystokv(avail_start
);
4776 pmap_array
= pmap_array_begin
;
4777 avail_start
+= round_page(PMAP_ARRAY_SIZE
* sizeof(struct pmap
));
4778 pmap_array_end
= (void *)phystokv(avail_start
);
4780 pmap_array_count
= ((pmap_array_end
- pmap_array_begin
) / sizeof(struct pmap
));
4782 pmap_bootstrap_pmap_free_list();
4784 pmap_ledger_ptr_array_begin
= (void *)phystokv(avail_start
);
4785 pmap_ledger_ptr_array
= pmap_ledger_ptr_array_begin
;
4786 avail_start
+= round_page(MAX_PMAP_LEDGERS
* sizeof(void*));
4787 pmap_ledger_ptr_array_end
= (void *)phystokv(avail_start
);
4789 pmap_ledger_refcnt_begin
= (void *)phystokv(avail_start
);
4790 pmap_ledger_refcnt
= pmap_ledger_refcnt_begin
;
4791 avail_start
+= round_page(MAX_PMAP_LEDGERS
* sizeof(os_refcnt_t
));
4792 pmap_ledger_refcnt_end
= (void *)phystokv(avail_start
);
4794 pmap_cpu_data_array_init();
4796 vm_first_phys
= gPhysBase
;
4797 vm_last_phys
= trunc_page(avail_end
);
4799 queue_init(&map_pmap_list
);
4800 queue_enter(&map_pmap_list
, kernel_pmap
, pmap_t
, pmaps
);
4801 free_page_size_tt_list
= TT_FREE_ENTRY_NULL
;
4802 free_page_size_tt_count
= 0;
4803 free_page_size_tt_max
= 0;
4804 free_two_page_size_tt_list
= TT_FREE_ENTRY_NULL
;
4805 free_two_page_size_tt_count
= 0;
4806 free_two_page_size_tt_max
= 0;
4807 free_tt_list
= TT_FREE_ENTRY_NULL
;
4811 queue_init(&pt_page_list
);
4813 pmap_pages_request_count
= 0;
4814 pmap_pages_request_acum
= 0;
4815 pmap_pages_reclaim_list
= PAGE_FREE_ENTRY_NULL
;
4817 virtual_space_start
= vstart
;
4818 virtual_space_end
= VM_MAX_KERNEL_ADDRESS
;
4820 bitmap_full(&asid_bitmap
[0], pmap_max_asids
);
4821 bitmap_full(&asid_plru_bitmap
[0], MAX_HW_ASIDS
);
4822 // Clear the highest-order bit, which corresponds to MAX_HW_ASIDS + 1
4823 asid_plru_bitmap
[MAX_HW_ASIDS
>> 6] = ~(1ULL << 63);
4827 if (PE_parse_boot_argn("arm_maxoffset", &maxoffset
, sizeof(maxoffset
))) {
4828 maxoffset
= trunc_page(maxoffset
);
4829 if ((maxoffset
>= pmap_max_offset(FALSE
, ARM_PMAP_MAX_OFFSET_MIN
))
4830 && (maxoffset
<= pmap_max_offset(FALSE
, ARM_PMAP_MAX_OFFSET_MAX
))) {
4831 arm_pmap_max_offset_default
= maxoffset
;
4834 #if defined(__arm64__)
4835 if (PE_parse_boot_argn("arm64_maxoffset", &maxoffset
, sizeof(maxoffset
))) {
4836 maxoffset
= trunc_page(maxoffset
);
4837 if ((maxoffset
>= pmap_max_offset(TRUE
, ARM_PMAP_MAX_OFFSET_MIN
))
4838 && (maxoffset
<= pmap_max_offset(TRUE
, ARM_PMAP_MAX_OFFSET_MAX
))) {
4839 arm64_pmap_max_offset_default
= maxoffset
;
4844 PE_parse_boot_argn("pmap_panic_dev_wimg_on_managed", &pmap_panic_dev_wimg_on_managed
, sizeof(pmap_panic_dev_wimg_on_managed
));
4848 PE_parse_boot_argn("pmap_stats_assert",
4850 sizeof(pmap_stats_assert
));
4851 PE_parse_boot_argn("vm_footprint_suspend_allowed",
4852 &vm_footprint_suspend_allowed
,
4853 sizeof(vm_footprint_suspend_allowed
));
4854 #endif /* MACH_ASSERT */
4857 /* Shadow the CPU copy windows, as they fall outside of the physical aperture */
4858 kasan_map_shadow(CPUWINDOWS_BASE
, CPUWINDOWS_TOP
- CPUWINDOWS_BASE
, true);
4865 pa_set_range_monitor(pmap_paddr_t start_pa
, pmap_paddr_t end_pa
)
4867 pmap_paddr_t cur_pa
;
4868 for (cur_pa
= start_pa
; cur_pa
< end_pa
; cur_pa
+= ARM_PGBYTES
) {
4869 assert(pa_valid(cur_pa
));
4870 pa_set_monitor(cur_pa
);
4875 pa_set_range_xprr_perm(pmap_paddr_t start_pa
,
4876 pmap_paddr_t end_pa
,
4877 unsigned int expected_perm
,
4878 unsigned int new_perm
)
4880 vm_offset_t start_va
= phystokv(start_pa
);
4881 vm_offset_t end_va
= start_va
+ (end_pa
- start_pa
);
4883 pa_set_range_monitor(start_pa
, end_pa
);
4884 pmap_set_range_xprr_perm(start_va
, end_va
, expected_perm
, new_perm
);
4888 pmap_lockdown_kc(void)
4890 extern vm_offset_t vm_kernelcache_base
;
4891 extern vm_offset_t vm_kernelcache_top
;
4892 pmap_paddr_t start_pa
= kvtophys(vm_kernelcache_base
);
4893 pmap_paddr_t end_pa
= start_pa
+ (vm_kernelcache_top
- vm_kernelcache_base
);
4894 pmap_paddr_t cur_pa
= start_pa
;
4895 vm_offset_t cur_va
= vm_kernelcache_base
;
4896 while (cur_pa
< end_pa
) {
4897 vm_size_t range_size
= end_pa
- cur_pa
;
4898 vm_offset_t ptov_va
= phystokv_range(cur_pa
, &range_size
);
4899 if (ptov_va
!= cur_va
) {
4901 * If the physical address maps back to a virtual address that is non-linear
4902 * w.r.t. the kernelcache, that means it corresponds to memory that will be
4903 * reclaimed by the OS and should therefore not be locked down.
4905 cur_pa
+= range_size
;
4906 cur_va
+= range_size
;
4909 unsigned int pai
= (unsigned int)pa_index(cur_pa
);
4910 pv_entry_t
**pv_h
= pai_to_pvh(pai
);
4912 vm_offset_t pvh_flags
= pvh_get_flags(pv_h
);
4914 if (__improbable(pvh_flags
& PVH_FLAG_LOCKDOWN
)) {
4915 panic("pai %d already locked down", pai
);
4917 pvh_set_flags(pv_h
, pvh_flags
| PVH_FLAG_LOCKDOWN
);
4918 cur_pa
+= ARM_PGBYTES
;
4919 cur_va
+= ARM_PGBYTES
;
4921 #if defined(KERNEL_INTEGRITY_CTRR) && defined(CONFIG_XNUPOST)
4922 extern uint64_t ctrr_ro_test
;
4923 extern uint64_t ctrr_nx_test
;
4924 pmap_paddr_t exclude_pages
[] = {kvtophys((vm_offset_t
)&ctrr_ro_test
), kvtophys((vm_offset_t
)&ctrr_nx_test
)};
4925 for (unsigned i
= 0; i
< (sizeof(exclude_pages
) / sizeof(exclude_pages
[0])); ++i
) {
4926 pv_entry_t
**pv_h
= pai_to_pvh(pa_index(exclude_pages
[i
]));
4927 pvh_set_flags(pv_h
, pvh_get_flags(pv_h
) & ~PVH_FLAG_LOCKDOWN
);
4933 pmap_static_allocations_done(void)
4935 pmap_paddr_t monitor_start_pa
;
4936 pmap_paddr_t monitor_end_pa
;
4939 * Protect the bootstrap (V=P and V->P) page tables.
4941 * These bootstrap allocations will be used primarily for page tables.
4942 * If we wish to secure the page tables, we need to start by marking
4943 * these bootstrap allocations as pages that we want to protect.
4945 monitor_start_pa
= kvtophys((vm_offset_t
)&bootstrap_pagetables
);
4946 monitor_end_pa
= monitor_start_pa
+ BOOTSTRAP_TABLE_SIZE
;
4948 /* The bootstrap page tables are mapped RW at boostrap. */
4949 pa_set_range_xprr_perm(monitor_start_pa
, monitor_end_pa
, XPRR_KERN_RW_PERM
, XPRR_KERN_RO_PERM
);
4952 * We use avail_start as a pointer to the first address that has not
4953 * been reserved for bootstrap, so we know which pages to give to the
4954 * virtual memory layer.
4956 monitor_start_pa
= BootArgs
->topOfKernelData
;
4957 monitor_end_pa
= avail_start
;
4959 /* The other bootstrap allocations are mapped RW at bootstrap. */
4960 pa_set_range_xprr_perm(monitor_start_pa
, monitor_end_pa
, XPRR_KERN_RW_PERM
, XPRR_PPL_RW_PERM
);
4963 * The RO page tables are mapped RW in arm_vm_init() and later restricted
4964 * to RO in arm_vm_prot_finalize(), which is called after this function.
4965 * Here we only need to mark the underlying physical pages as PPL-owned to ensure
4966 * they can't be allocated for other uses. We don't need a special xPRR
4967 * protection index, as there is no PPL_RO index, and these pages are ultimately
4968 * protected by KTRR/CTRR. Furthermore, use of PPL_RW for these pages would
4969 * expose us to a functional issue on H11 devices where CTRR shifts the APRR
4970 * lookup table index to USER_XO before APRR is applied, leading the hardware
4971 * to believe we are dealing with an user XO page upon performing a translation.
4973 monitor_start_pa
= kvtophys((vm_offset_t
)&ropagetable_begin
);
4974 monitor_end_pa
= monitor_start_pa
+ ((vm_offset_t
)&ropagetable_end
- (vm_offset_t
)&ropagetable_begin
);
4975 pa_set_range_monitor(monitor_start_pa
, monitor_end_pa
);
4977 monitor_start_pa
= kvtophys(segPPLDATAB
);
4978 monitor_end_pa
= monitor_start_pa
+ segSizePPLDATA
;
4980 /* PPL data is RW for the PPL, RO for the kernel. */
4981 pa_set_range_xprr_perm(monitor_start_pa
, monitor_end_pa
, XPRR_KERN_RW_PERM
, XPRR_PPL_RW_PERM
);
4983 monitor_start_pa
= kvtophys(segPPLTEXTB
);
4984 monitor_end_pa
= monitor_start_pa
+ segSizePPLTEXT
;
4986 /* PPL text is RX for the PPL, RO for the kernel. */
4987 pa_set_range_xprr_perm(monitor_start_pa
, monitor_end_pa
, XPRR_KERN_RX_PERM
, XPRR_PPL_RX_PERM
);
4991 * In order to support DTrace, the save areas for the PPL must be
4992 * writable. This is due to the fact that DTrace will try to update
4995 if (pmap_ppl_disable
) {
4996 vm_offset_t monitor_start_va
= phystokv(ppl_cpu_save_area_start
);
4997 vm_offset_t monitor_end_va
= monitor_start_va
+ (ppl_cpu_save_area_end
- ppl_cpu_save_area_start
);
4999 pmap_set_range_xprr_perm(monitor_start_va
, monitor_end_va
, XPRR_PPL_RW_PERM
, XPRR_KERN_RW_PERM
);
5003 if (segSizePPLDATACONST
> 0) {
5004 monitor_start_pa
= kvtophys(segPPLDATACONSTB
);
5005 monitor_end_pa
= monitor_start_pa
+ segSizePPLDATACONST
;
5007 pa_set_range_xprr_perm(monitor_start_pa
, monitor_end_pa
, XPRR_KERN_RO_PERM
, XPRR_KERN_RO_PERM
);
5011 * Mark the original physical aperture mapping for the PPL stack pages RO as an additional security
5012 * precaution. The real RW mappings are at a different location with guard pages.
5014 pa_set_range_xprr_perm(pmap_stacks_start_pa
, pmap_stacks_end_pa
, XPRR_PPL_RW_PERM
, XPRR_KERN_RO_PERM
);
5016 /* Prevent remapping of the kernelcache */
5022 pmap_lockdown_ppl(void)
5024 /* Mark the PPL as being locked down. */
5026 #error "XPRR configuration error"
5029 #endif /* XNU_MONITOR */
5033 vm_offset_t
*startp
,
5037 *startp
= virtual_space_start
;
5038 *endp
= virtual_space_end
;
5043 pmap_virtual_region(
5044 unsigned int region_select
,
5045 vm_map_offset_t
*startp
,
5049 boolean_t ret
= FALSE
;
5050 #if defined(KERNEL_INTEGRITY_KTRR) || defined(KERNEL_INTEGRITY_CTRR)
5051 if (region_select
== 0) {
5053 * In this config, the bootstrap mappings should occupy their own L2
5054 * TTs, as they should be immutable after boot. Having the associated
5055 * TTEs and PTEs in their own pages allows us to lock down those pages,
5056 * while allowing the rest of the kernel address range to be remapped.
5058 #if (__ARM_VMSA__ > 7)
5059 *startp
= LOW_GLOBAL_BASE_ADDRESS
& ~ARM_TT_L2_OFFMASK
;
5061 #error Unsupported configuration
5063 #if defined(ARM_LARGE_MEMORY)
5064 *size
= ((KERNEL_PMAP_HEAP_RANGE_START
- *startp
) & ~PAGE_MASK
);
5066 *size
= ((VM_MAX_KERNEL_ADDRESS
- *startp
) & ~PAGE_MASK
);
5071 #if (__ARM_VMSA__ > 7)
5072 unsigned long low_global_vr_mask
= 0;
5073 vm_map_size_t low_global_vr_size
= 0;
5076 if (region_select
== 0) {
5077 #if (__ARM_VMSA__ == 7)
5078 *startp
= gVirtBase
& 0xFFC00000;
5079 *size
= ((virtual_space_start
- (gVirtBase
& 0xFFC00000)) + ~0xFFC00000) & 0xFFC00000;
5081 /* Round to avoid overlapping with the V=P area; round to at least the L2 block size. */
5082 if (!TEST_PAGE_SIZE_4K
) {
5083 *startp
= gVirtBase
& 0xFFFFFFFFFE000000;
5084 *size
= ((virtual_space_start
- (gVirtBase
& 0xFFFFFFFFFE000000)) + ~0xFFFFFFFFFE000000) & 0xFFFFFFFFFE000000;
5086 *startp
= gVirtBase
& 0xFFFFFFFFFF800000;
5087 *size
= ((virtual_space_start
- (gVirtBase
& 0xFFFFFFFFFF800000)) + ~0xFFFFFFFFFF800000) & 0xFFFFFFFFFF800000;
5092 if (region_select
== 1) {
5093 *startp
= VREGION1_START
;
5094 *size
= VREGION1_SIZE
;
5097 #if (__ARM_VMSA__ > 7)
5098 /* We need to reserve a range that is at least the size of an L2 block mapping for the low globals */
5099 if (!TEST_PAGE_SIZE_4K
) {
5100 low_global_vr_mask
= 0xFFFFFFFFFE000000;
5101 low_global_vr_size
= 0x2000000;
5103 low_global_vr_mask
= 0xFFFFFFFFFF800000;
5104 low_global_vr_size
= 0x800000;
5107 if (((gVirtBase
& low_global_vr_mask
) != LOW_GLOBAL_BASE_ADDRESS
) && (region_select
== 2)) {
5108 *startp
= LOW_GLOBAL_BASE_ADDRESS
;
5109 *size
= low_global_vr_size
;
5113 if (region_select
== 3) {
5114 /* In this config, we allow the bootstrap mappings to occupy the same
5115 * page table pages as the heap.
5117 *startp
= VM_MIN_KERNEL_ADDRESS
;
5118 *size
= LOW_GLOBAL_BASE_ADDRESS
- *startp
;
5127 * Routines to track and allocate physical pages during early boot.
5128 * On most systems that memory runs from first_avail through to avail_end
5131 * However if the system supports ECC and bad_ram_pages_count > 0, we
5132 * need to be careful and skip those pages.
5134 static unsigned int avail_page_count
= 0;
5135 static bool need_ram_ranges_init
= true;
5137 #if defined(__arm64__)
5138 pmap_paddr_t
*bad_ram_pages
= NULL
;
5139 unsigned int bad_ram_pages_count
= 0;
5142 * We use this sub-range of bad_ram_pages for pmap_next_page()
5144 static pmap_paddr_t
*skip_pages
;
5145 static unsigned int skip_pages_count
= 0;
5147 #define MAX_BAD_RAM_PAGE_COUNT 64
5148 static pmap_paddr_t bad_ram_pages_arr
[MAX_BAD_RAM_PAGE_COUNT
];
5151 * XXX - temporary code to get the bad pages array from boot-args.
5152 * expects a comma separated list of offsets from the start
5153 * of physical memory to be considered bad.
5155 * HERE JOE -- will eventually be replaced by data provided by iboot
5158 parse_bad_ram_pages_boot_arg(void)
5160 char buf
[256] = {0};
5165 extern uint64_t strtouq(const char *, char **, int);
5167 if (!PE_parse_boot_arg_str("bad_ram_pages", buf
, sizeof(buf
))) {
5171 while (*s
&& count
< MAX_BAD_RAM_PAGE_COUNT
) {
5172 num
= (pmap_paddr_t
)strtouq(s
, &end
, 0);
5178 bad_ram_pages_arr
[count
++] = gDramBase
+ num
;
5188 bad_ram_pages
= bad_ram_pages_arr
;
5189 bad_ram_pages_count
= count
;
5193 * Comparison routine for qsort of array of physical addresses.
5196 pmap_paddr_cmp(void *a
, void *b
)
5198 pmap_paddr_t
*x
= a
;
5199 pmap_paddr_t
*y
= b
;
5205 #endif /* defined(__arm64__) */
5208 * Look up ppn in the sorted bad_ram_pages array.
5211 pmap_is_bad_ram(__unused ppnum_t ppn
)
5213 #if defined(__arm64__)
5214 pmap_paddr_t pa
= ptoa(ppn
);
5216 int high
= bad_ram_pages_count
- 1;
5219 while (low
<= high
) {
5220 mid
= (low
+ high
) / 2;
5221 if (bad_ram_pages
[mid
] < pa
) {
5223 } else if (bad_ram_pages
[mid
] > pa
) {
5229 #endif /* defined(__arm64__) */
5234 * Initialize the count of available pages. If we have bad_ram_pages, then sort the list of them.
5235 * No lock needed here, as this code is called while kernel boot up is single threaded.
5238 initialize_ram_ranges(void)
5240 pmap_paddr_t first
= first_avail
;
5241 pmap_paddr_t end
= avail_end
;
5243 assert(first
<= end
);
5244 assert(first
== (first
& ~PAGE_MASK
));
5245 assert(end
== (end
& ~PAGE_MASK
));
5246 avail_page_count
= atop(end
- first
);
5248 #if defined(__arm64__)
5250 * XXX Temporary code for testing, until there is iboot support
5252 * Parse a list of known bad pages from a boot-args.
5254 parse_bad_ram_pages_boot_arg();
5257 * Sort and filter the bad pages list and adjust avail_page_count.
5259 if (bad_ram_pages_count
!= 0) {
5260 qsort(bad_ram_pages
, bad_ram_pages_count
, sizeof(*bad_ram_pages
), (cmpfunc_t
)pmap_paddr_cmp
);
5261 skip_pages
= bad_ram_pages
;
5262 skip_pages_count
= bad_ram_pages_count
;
5264 /* ignore any pages before first */
5265 while (skip_pages_count
> 0 && skip_pages
[0] < first
) {
5270 /* ignore any pages at or after end */
5271 while (skip_pages_count
> 0 && skip_pages
[skip_pages_count
- 1] >= end
) {
5275 avail_page_count
-= skip_pages_count
;
5277 #endif /* defined(__arm64__) */
5278 need_ram_ranges_init
= false;
5285 if (need_ram_ranges_init
) {
5286 initialize_ram_ranges();
5288 return avail_page_count
;
5292 pmap_free_pages_span(
5295 if (need_ram_ranges_init
) {
5296 initialize_ram_ranges();
5298 return (unsigned int)atop(avail_end
- first_avail
);
5305 __unused boolean_t might_free
)
5307 return pmap_next_page(pnum
);
5315 if (need_ram_ranges_init
) {
5316 initialize_ram_ranges();
5319 #if defined(__arm64__)
5321 * Skip over any known bad pages.
5323 while (skip_pages_count
> 0 && first_avail
== skip_pages
[0]) {
5324 first_avail
+= PAGE_SIZE
;
5328 #endif /* defined(__arm64__) */
5330 if (first_avail
!= avail_end
) {
5331 *pnum
= (ppnum_t
)atop(first_avail
);
5332 first_avail
+= PAGE_SIZE
;
5333 assert(avail_page_count
> 0);
5337 assert(avail_page_count
== 0);
5343 __unused ppnum_t pnum
)
5345 /* XXX Justin TBD - mark the page as unusable in pmap data structures */
5350 * Initialize the pmap module.
5351 * Called by vm_init, to initialize any structures that the pmap
5352 * system needs to map virtual memory.
5359 * Protect page zero in the kernel map.
5360 * (can be overruled by permanent transltion
5361 * table entries at page zero - see arm_vm_init).
5363 vm_protect(kernel_map
, 0, PAGE_SIZE
, TRUE
, VM_PROT_NONE
);
5365 pmap_initialized
= TRUE
;
5368 * Create the zone of physical maps
5369 * and the physical-to-virtual entries.
5371 pmap_zone
= zone_create_ext("pmap", sizeof(struct pmap
),
5372 ZC_ZFREE_CLEARMEM
, ZONE_ID_PMAP
, NULL
);
5376 * Initialize the pmap object (for tracking the vm_page_t
5377 * structures for pages we allocate to be page tables in
5380 _vm_object_allocate(mem_size
, pmap_object
);
5381 pmap_object
->copy_strategy
= MEMORY_OBJECT_COPY_NONE
;
5384 * The values of [hard_]maxproc may have been scaled, make sure
5385 * they are still less than the value of pmap_max_asids.
5387 if ((uint32_t)maxproc
> pmap_max_asids
) {
5388 maxproc
= pmap_max_asids
;
5390 if ((uint32_t)hard_maxproc
> pmap_max_asids
) {
5391 hard_maxproc
= pmap_max_asids
;
5395 pmap_pgtrace_init();
5405 pmap_paddr_t phys
= ptoa(ppnum
);
5407 assert(phys
!= vm_page_fictitious_addr
);
5409 if (!pa_valid(phys
)) {
5413 pai
= (int)pa_index(phys
);
5414 pv_h
= pai_to_pvh(pai
);
5416 return pvh_test_type(pv_h
, PVH_TYPE_NULL
);
5421 pmap_assert_free(ppnum_t ppnum
)
5423 assertf(pmap_verify_free(ppnum
), "page = 0x%x", ppnum
);
5430 MARK_AS_PMAP_TEXT
static void
5431 pmap_ledger_alloc_init_internal(size_t size
)
5433 pmap_simple_lock(&pmap_ledger_lock
);
5435 if (pmap_ledger_alloc_initialized
) {
5436 panic("%s: already initialized, "
5442 if ((size
> sizeof(pmap_ledger_data_t
)) ||
5443 ((sizeof(pmap_ledger_data_t
) - size
) % sizeof(struct ledger_entry
))) {
5444 panic("%s: size mismatch, expected %lu, "
5446 __func__
, PMAP_LEDGER_DATA_BYTES
,
5450 pmap_ledger_alloc_initialized
= true;
5452 pmap_simple_unlock(&pmap_ledger_lock
);
5455 MARK_AS_PMAP_TEXT
static ledger_t
5456 pmap_ledger_alloc_internal(void)
5459 uint64_t vaddr
, vstart
, vend
;
5462 ledger_t new_ledger
;
5463 uint64_t array_index
;
5465 pmap_simple_lock(&pmap_ledger_lock
);
5466 if (pmap_ledger_free_list
== NULL
) {
5467 paddr
= pmap_get_free_ppl_page();
5470 pmap_simple_unlock(&pmap_ledger_lock
);
5474 vstart
= phystokv(paddr
);
5475 vend
= vstart
+ PAGE_SIZE
;
5477 for (vaddr
= vstart
; (vaddr
< vend
) && ((vaddr
+ sizeof(pmap_ledger_t
)) <= vend
); vaddr
+= sizeof(pmap_ledger_t
)) {
5478 pmap_ledger_t
*free_ledger
;
5480 index
= pmap_ledger_ptr_array_free_index
++;
5482 if (index
>= MAX_PMAP_LEDGERS
) {
5483 panic("%s: pmap_ledger_ptr_array is full, index=%llu",
5487 free_ledger
= (pmap_ledger_t
*)vaddr
;
5489 pmap_ledger_ptr_array
[index
] = free_ledger
;
5490 free_ledger
->back_ptr
= &pmap_ledger_ptr_array
[index
];
5492 free_ledger
->next
= pmap_ledger_free_list
;
5493 pmap_ledger_free_list
= free_ledger
;
5496 pa_set_range_xprr_perm(paddr
, paddr
+ PAGE_SIZE
, XPRR_PPL_RW_PERM
, XPRR_KERN_RW_PERM
);
5499 new_ledger
= (ledger_t
)pmap_ledger_free_list
;
5500 pmap_ledger_free_list
= pmap_ledger_free_list
->next
;
5502 array_index
= pmap_ledger_validate(new_ledger
);
5503 os_ref_init(&pmap_ledger_refcnt
[array_index
], NULL
);
5505 pmap_simple_unlock(&pmap_ledger_lock
);
5510 MARK_AS_PMAP_TEXT
static void
5511 pmap_ledger_free_internal(ledger_t ledger
)
5513 pmap_ledger_t
* free_ledger
;
5515 free_ledger
= (pmap_ledger_t
*)ledger
;
5517 pmap_simple_lock(&pmap_ledger_lock
);
5518 uint64_t array_index
= pmap_ledger_validate(ledger
);
5520 if (os_ref_release(&pmap_ledger_refcnt
[array_index
]) != 0) {
5521 panic("%s: ledger still referenced, "
5527 free_ledger
->next
= pmap_ledger_free_list
;
5528 pmap_ledger_free_list
= free_ledger
;
5529 pmap_simple_unlock(&pmap_ledger_lock
);
5534 pmap_ledger_retain(ledger_t ledger
)
5536 pmap_simple_lock(&pmap_ledger_lock
);
5537 uint64_t array_index
= pmap_ledger_validate(ledger
);
5538 os_ref_retain(&pmap_ledger_refcnt
[array_index
]);
5539 pmap_simple_unlock(&pmap_ledger_lock
);
5543 pmap_ledger_release(ledger_t ledger
)
5545 pmap_simple_lock(&pmap_ledger_lock
);
5546 uint64_t array_index
= pmap_ledger_validate(ledger
);
5547 os_ref_release_live(&pmap_ledger_refcnt
[array_index
]);
5548 pmap_simple_unlock(&pmap_ledger_lock
);
5552 pmap_ledger_alloc_init(size_t size
)
5554 pmap_ledger_alloc_init_ppl(size
);
5558 pmap_ledger_alloc(void)
5560 ledger_t retval
= NULL
;
5562 while ((retval
= pmap_ledger_alloc_ppl()) == NULL
) {
5563 pmap_alloc_page_for_ppl(0);
5570 pmap_ledger_free(ledger_t ledger
)
5572 pmap_ledger_free_ppl(ledger
);
5574 #else /* XNU_MONITOR */
5577 pmap_ledger_alloc_init(size_t size
)
5579 panic("%s: unsupported, "
5586 pmap_ledger_alloc(void)
5588 panic("%s: unsupported",
5594 pmap_ledger_free(ledger_t ledger
)
5596 panic("%s: unsupported, "
5600 #endif /* XNU_MONITOR */
5603 pmap_root_alloc_size(pmap_t pmap
)
5605 #if (__ARM_VMSA__ > 7)
5606 #pragma unused(pmap)
5607 const pt_attr_t
* const pt_attr
= pmap_get_pt_attr(pmap
);
5608 unsigned int root_level
= pt_attr_root_level(pt_attr
);
5609 return ((pt_attr_ln_index_mask(pt_attr
, root_level
) >> pt_attr_ln_shift(pt_attr
, root_level
)) + 1) * sizeof(tt_entry_t
);
5612 return PMAP_ROOT_ALLOC_SIZE
;
5617 * Create and return a physical map.
5619 * If the size specified for the map
5620 * is zero, the map is an actual physical
5621 * map, and may be referenced by the
5624 * If the size specified is non-zero,
5625 * the map will be used in software only, and
5626 * is bounded by that size.
5628 MARK_AS_PMAP_TEXT
static pmap_t
5629 pmap_create_options_internal(
5636 unsigned tte_index_max
;
5638 bool is_64bit
= flags
& PMAP_CREATE_64BIT
;
5639 #if defined(HAS_APPLE_PAC)
5640 bool disable_jop
= flags
& PMAP_CREATE_DISABLE_JOP
;
5641 #endif /* defined(HAS_APPLE_PAC) */
5642 kern_return_t local_kr
= KERN_SUCCESS
;
5645 * A software use-only map doesn't even need a pmap.
5651 if (0 != (flags
& ~PMAP_CREATE_KNOWN_FLAGS
)) {
5656 if ((p
= pmap_alloc_pmap()) == PMAP_NULL
) {
5657 local_kr
= KERN_NO_SPACE
;
5658 goto pmap_create_fail
;
5662 pmap_ledger_validate(ledger
);
5663 pmap_ledger_retain(ledger
);
5667 * Allocate a pmap struct from the pmap_zone. Then allocate
5668 * the translation table of the right size for the pmap.
5670 if ((p
= (pmap_t
) zalloc(pmap_zone
)) == PMAP_NULL
) {
5671 local_kr
= KERN_RESOURCE_SHORTAGE
;
5672 goto pmap_create_fail
;
5679 p
->pmap_vm_map_cs_enforced
= false;
5681 if (flags
& PMAP_CREATE_64BIT
) {
5682 p
->min
= MACH_VM_MIN_ADDRESS
;
5683 p
->max
= MACH_VM_MAX_ADDRESS
;
5685 p
->min
= VM_MIN_ADDRESS
;
5686 p
->max
= VM_MAX_ADDRESS
;
5688 #if defined(HAS_APPLE_PAC)
5689 p
->disable_jop
= disable_jop
;
5690 #endif /* defined(HAS_APPLE_PAC) */
5692 p
->nested_region_true_start
= 0;
5693 p
->nested_region_true_end
= ~0;
5695 os_atomic_init(&p
->ref_count
, 1);
5697 p
->stamp
= os_atomic_inc(&pmap_stamp
, relaxed
);
5698 p
->nx_enabled
= TRUE
;
5699 p
->is_64bit
= is_64bit
;
5701 p
->nested_pmap
= PMAP_NULL
;
5703 #if ARM_PARAMETERIZED_PMAP
5704 /* Default to the native pt_attr */
5705 p
->pmap_pt_attr
= native_pt_attr
;
5706 #endif /* ARM_PARAMETERIZED_PMAP */
5707 #if __ARM_MIXED_PAGE_SIZE__
5708 if (flags
& PMAP_CREATE_FORCE_4K_PAGES
) {
5709 p
->pmap_pt_attr
= &pmap_pt_attr_4k
;
5711 #endif /* __ARM_MIXED_PAGE_SIZE__ */
5713 if (!pmap_get_pt_ops(p
)->alloc_id(p
)) {
5714 local_kr
= KERN_NO_SPACE
;
5719 memset((void *) &p
->stats
, 0, sizeof(p
->stats
));
5721 p
->tt_entry_free
= (tt_entry_t
*)0;
5722 tte_index_max
= ((unsigned)pmap_root_alloc_size(p
) / sizeof(tt_entry_t
));
5724 #if (__ARM_VMSA__ == 7)
5725 p
->tte_index_max
= tte_index_max
;
5729 p
->tte
= pmap_tt1_allocate(p
, pmap_root_alloc_size(p
), PMAP_TT_ALLOCATE_NOWAIT
);
5731 p
->tte
= pmap_tt1_allocate(p
, pmap_root_alloc_size(p
), 0);
5734 local_kr
= KERN_RESOURCE_SHORTAGE
;
5735 goto tt1_alloc_fail
;
5738 p
->ttep
= ml_static_vtop((vm_offset_t
)p
->tte
);
5739 PMAP_TRACE(4, PMAP_CODE(PMAP__TTE
), VM_KERNEL_ADDRHIDE(p
), VM_KERNEL_ADDRHIDE(p
->min
), VM_KERNEL_ADDRHIDE(p
->max
), p
->ttep
);
5741 /* nullify the translation table */
5742 for (i
= 0; i
< tte_index_max
; i
++) {
5743 p
->tte
[i
] = ARM_TTE_TYPE_FAULT
;
5746 FLUSH_PTE_RANGE(p
->tte
, p
->tte
+ tte_index_max
);
5749 * initialize the rest of the structure
5751 p
->nested_region_addr
= 0x0ULL
;
5752 p
->nested_region_size
= 0x0ULL
;
5753 p
->nested_region_asid_bitmap
= NULL
;
5754 p
->nested_region_asid_bitmap_size
= 0x0UL
;
5756 p
->nested_has_no_bounds_ref
= false;
5757 p
->nested_no_bounds_refcnt
= 0;
5758 p
->nested_bounds_set
= false;
5762 p
->pmap_stats_assert
= TRUE
;
5764 strlcpy(p
->pmap_procname
, "<nil>", sizeof(p
->pmap_procname
));
5765 #endif /* MACH_ASSERT */
5766 #if DEVELOPMENT || DEBUG
5767 p
->footprint_was_suspended
= FALSE
;
5768 #endif /* DEVELOPMENT || DEBUG */
5770 pmap_simple_lock(&pmaps_lock
);
5771 queue_enter(&map_pmap_list
, p
, pmap_t
, pmaps
);
5772 pmap_simple_unlock(&pmaps_lock
);
5777 pmap_get_pt_ops(p
)->free_id(p
);
5783 pmap_ledger_release(ledger
);
5786 zfree(pmap_zone
, p
);
5790 pmap_pin_kernel_pages((vm_offset_t
)kr
, sizeof(*kr
));
5794 pmap_unpin_kernel_pages((vm_offset_t
)kr
, sizeof(*kr
));
5800 pmap_create_options(
5806 kern_return_t kr
= KERN_SUCCESS
;
5808 PMAP_TRACE(1, PMAP_CODE(PMAP__CREATE
) | DBG_FUNC_START
, size
, flags
);
5810 ledger_reference(ledger
);
5814 pmap
= pmap_create_options_ppl(ledger
, size
, flags
, &kr
);
5815 if (kr
!= KERN_RESOURCE_SHORTAGE
) {
5818 assert(pmap
== PMAP_NULL
);
5819 pmap_alloc_page_for_ppl(0);
5823 pmap
= pmap_create_options_internal(ledger
, size
, flags
, &kr
);
5826 if (pmap
== PMAP_NULL
) {
5827 ledger_dereference(ledger
);
5830 PMAP_TRACE(1, PMAP_CODE(PMAP__CREATE
) | DBG_FUNC_END
, VM_KERNEL_ADDRHIDE(pmap
), PMAP_VASID(pmap
), pmap
->hw_asid
);
5837 * This symbol remains in place when the PPL is enabled so that the dispatch
5838 * table does not change from development to release configurations.
5841 #if MACH_ASSERT || XNU_MONITOR
5842 MARK_AS_PMAP_TEXT
static void
5843 pmap_set_process_internal(
5844 __unused pmap_t pmap
,
5846 __unused
char *procname
)
5853 VALIDATE_PMAP(pmap
);
5855 pmap
->pmap_pid
= pid
;
5856 strlcpy(pmap
->pmap_procname
, procname
, sizeof(pmap
->pmap_procname
));
5857 if (pmap_ledgers_panic_leeway
) {
5860 * Some processes somehow trigger some issues that make
5861 * the pmap stats and ledgers go off track, causing
5862 * some assertion failures and ledger panics.
5863 * Turn off the sanity checks if we allow some ledger leeway
5864 * because of that. We'll still do a final check in
5865 * pmap_check_ledgers() for discrepancies larger than the
5866 * allowed leeway after the address space has been fully
5869 pmap
->pmap_stats_assert
= FALSE
;
5870 ledger_disable_panic_on_negative(pmap
->ledger
,
5871 task_ledgers
.phys_footprint
);
5872 ledger_disable_panic_on_negative(pmap
->ledger
,
5873 task_ledgers
.internal
);
5874 ledger_disable_panic_on_negative(pmap
->ledger
,
5875 task_ledgers
.internal_compressed
);
5876 ledger_disable_panic_on_negative(pmap
->ledger
,
5877 task_ledgers
.iokit_mapped
);
5878 ledger_disable_panic_on_negative(pmap
->ledger
,
5879 task_ledgers
.alternate_accounting
);
5880 ledger_disable_panic_on_negative(pmap
->ledger
,
5881 task_ledgers
.alternate_accounting_compressed
);
5883 #endif /* MACH_ASSERT */
5885 #endif /* MACH_ASSERT || XNU_MONITOR */
5895 pmap_set_process_ppl(pmap
, pid
, procname
);
5897 pmap_set_process_internal(pmap
, pid
, procname
);
5900 #endif /* MACH_ASSERT */
5902 #if (__ARM_VMSA__ > 7)
5904 * pmap_deallocate_all_leaf_tts:
5906 * Recursive function for deallocating all leaf TTEs. Walks the given TT,
5907 * removing and deallocating all TTEs.
5909 MARK_AS_PMAP_TEXT
static void
5910 pmap_deallocate_all_leaf_tts(pmap_t pmap
, tt_entry_t
* first_ttep
, unsigned level
)
5912 tt_entry_t tte
= ARM_TTE_EMPTY
;
5913 tt_entry_t
* ttep
= NULL
;
5914 tt_entry_t
* last_ttep
= NULL
;
5916 const pt_attr_t
* const pt_attr
= pmap_get_pt_attr(pmap
);
5918 assert(level
< pt_attr_leaf_level(pt_attr
));
5920 last_ttep
= &first_ttep
[ttn_index(pmap
, pt_attr
, ~0, level
)];
5922 for (ttep
= first_ttep
; ttep
<= last_ttep
; ttep
++) {
5925 if (!(tte
& ARM_TTE_VALID
)) {
5929 if ((tte
& ARM_TTE_TYPE_MASK
) == ARM_TTE_TYPE_BLOCK
) {
5930 panic("%s: found block mapping, ttep=%p, tte=%p, "
5931 "pmap=%p, first_ttep=%p, level=%u",
5932 __FUNCTION__
, ttep
, (void *)tte
,
5933 pmap
, first_ttep
, level
);
5936 /* Must be valid, type table */
5937 if (level
< pt_attr_twig_level(pt_attr
)) {
5938 /* If we haven't reached the twig level, recurse to the next level. */
5939 pmap_deallocate_all_leaf_tts(pmap
, (tt_entry_t
*)phystokv((tte
) & ARM_TTE_TABLE_MASK
), level
+ 1);
5942 /* Remove the TTE. */
5944 pmap_tte_deallocate(pmap
, 0, 0, false, ttep
, level
);
5948 #endif /* (__ARM_VMSA__ > 7) */
5951 * We maintain stats and ledgers so that a task's physical footprint is:
5952 * phys_footprint = ((internal - alternate_accounting)
5953 * + (internal_compressed - alternate_accounting_compressed)
5955 * + purgeable_nonvolatile
5956 * + purgeable_nonvolatile_compressed
5958 * where "alternate_accounting" includes "iokit" and "purgeable" memory.
5962 * Retire the given physical map from service.
5963 * Should only be called if the map contains
5964 * no valid mappings.
5966 MARK_AS_PMAP_TEXT
static void
5967 pmap_destroy_internal(
5970 if (pmap
== PMAP_NULL
) {
5974 VALIDATE_PMAP(pmap
);
5976 __unused
const pt_attr_t
* const pt_attr
= pmap_get_pt_attr(pmap
);
5978 int32_t ref_count
= os_atomic_dec(&pmap
->ref_count
, relaxed
);
5979 if (ref_count
> 0) {
5981 } else if (ref_count
< 0) {
5982 panic("pmap %p: refcount underflow", pmap
);
5983 } else if (pmap
== kernel_pmap
) {
5984 panic("pmap %p: attempt to destroy kernel pmap", pmap
);
5987 #if (__ARM_VMSA__ > 7)
5988 pmap_unmap_sharedpage(pmap
);
5989 #endif /* (__ARM_VMSA__ > 7) */
5991 pmap_simple_lock(&pmaps_lock
);
5992 while (pmap
->gc_status
& PMAP_GC_INFLIGHT
) {
5993 pmap
->gc_status
|= PMAP_GC_WAIT
;
5994 assert_wait((event_t
) &pmap
->gc_status
, THREAD_UNINT
);
5995 pmap_simple_unlock(&pmaps_lock
);
5996 (void) thread_block(THREAD_CONTINUE_NULL
);
5997 pmap_simple_lock(&pmaps_lock
);
5999 queue_remove(&map_pmap_list
, pmap
, pmap_t
, pmaps
);
6000 pmap_simple_unlock(&pmaps_lock
);
6002 pmap_trim_self(pmap
);
6005 * Free the memory maps, then the
6008 #if (__ARM_VMSA__ == 7)
6013 for (i
= 0; i
< pmap
->tte_index_max
; i
++) {
6014 ttep
= &pmap
->tte
[i
];
6015 if ((*ttep
& ARM_TTE_TYPE_MASK
) == ARM_TTE_TYPE_TABLE
) {
6016 pmap_tte_deallocate(pmap
, 0, 0, false, ttep
, PMAP_TT_L1_LEVEL
);
6020 #else /* (__ARM_VMSA__ == 7) */
6021 pmap_deallocate_all_leaf_tts(pmap
, pmap
->tte
, pt_attr_root_level(pt_attr
));
6022 #endif /* (__ARM_VMSA__ == 7) */
6027 #if (__ARM_VMSA__ == 7)
6028 pmap_tt1_deallocate(pmap
, pmap
->tte
, pmap
->tte_index_max
* sizeof(tt_entry_t
), 0);
6029 pmap
->tte_index_max
= 0;
6030 #else /* (__ARM_VMSA__ == 7) */
6031 pmap_tt1_deallocate(pmap
, pmap
->tte
, pmap_root_alloc_size(pmap
), 0);
6032 #endif /* (__ARM_VMSA__ == 7) */
6033 pmap
->tte
= (tt_entry_t
*) NULL
;
6037 assert((tt_free_entry_t
*)pmap
->tt_entry_free
== NULL
);
6039 if (__improbable(pmap
->nested
)) {
6040 pmap_get_pt_ops(pmap
)->flush_tlb_region_async(pmap
->nested_region_addr
, pmap
->nested_region_size
, pmap
);
6043 pmap_get_pt_ops(pmap
)->flush_tlb_async(pmap
);
6045 /* return its asid to the pool */
6046 pmap_get_pt_ops(pmap
)->free_id(pmap
);
6047 /* release the reference we hold on the nested pmap */
6048 pmap_destroy_internal(pmap
->nested_pmap
);
6051 pmap_check_ledgers(pmap
);
6053 if (pmap
->nested_region_asid_bitmap
) {
6055 pmap_pages_free(kvtophys((vm_offset_t
)(pmap
->nested_region_asid_bitmap
)), PAGE_SIZE
);
6057 kheap_free(KHEAP_DATA_BUFFERS
, pmap
->nested_region_asid_bitmap
,
6058 pmap
->nested_region_asid_bitmap_size
* sizeof(unsigned int));
6064 pmap_ledger_release(pmap
->ledger
);
6067 pmap_lock_destroy(pmap
);
6068 pmap_free_pmap(pmap
);
6070 pmap_lock_destroy(pmap
);
6071 zfree(pmap_zone
, pmap
);
6081 PMAP_TRACE(1, PMAP_CODE(PMAP__DESTROY
) | DBG_FUNC_START
, VM_KERNEL_ADDRHIDE(pmap
), PMAP_VASID(pmap
), pmap
->hw_asid
);
6083 ledger
= pmap
->ledger
;
6086 pmap_destroy_ppl(pmap
);
6088 pmap_check_ledger_fields(ledger
);
6090 pmap_destroy_internal(pmap
);
6093 ledger_dereference(ledger
);
6095 PMAP_TRACE(1, PMAP_CODE(PMAP__DESTROY
) | DBG_FUNC_END
);
6100 * Add a reference to the specified pmap.
6102 MARK_AS_PMAP_TEXT
static void
6103 pmap_reference_internal(
6106 if (pmap
!= PMAP_NULL
) {
6107 VALIDATE_PMAP(pmap
);
6108 os_atomic_inc(&pmap
->ref_count
, relaxed
);
6117 pmap_reference_ppl(pmap
);
6119 pmap_reference_internal(pmap
);
6129 tt_entry_t
*tt1
= NULL
;
6130 tt_free_entry_t
*tt1_free
;
6133 vm_address_t va_end
;
6136 if ((size
< PAGE_SIZE
) && (size
!= PMAP_ROOT_ALLOC_SIZE
)) {
6140 pmap_simple_lock(&tt1_lock
);
6141 if ((size
== PAGE_SIZE
) && (free_page_size_tt_count
!= 0)) {
6142 free_page_size_tt_count
--;
6143 tt1
= (tt_entry_t
*)free_page_size_tt_list
;
6144 free_page_size_tt_list
= ((tt_free_entry_t
*)tt1
)->next
;
6145 } else if ((size
== 2 * PAGE_SIZE
) && (free_two_page_size_tt_count
!= 0)) {
6146 free_two_page_size_tt_count
--;
6147 tt1
= (tt_entry_t
*)free_two_page_size_tt_list
;
6148 free_two_page_size_tt_list
= ((tt_free_entry_t
*)tt1
)->next
;
6149 } else if ((size
< PAGE_SIZE
) && (free_tt_count
!= 0)) {
6151 tt1
= (tt_entry_t
*)free_tt_list
;
6152 free_tt_list
= (tt_free_entry_t
*)((tt_free_entry_t
*)tt1
)->next
;
6155 pmap_simple_unlock(&tt1_lock
);
6158 pmap_tt_ledger_credit(pmap
, size
);
6159 return (tt_entry_t
*)tt1
;
6162 ret
= pmap_pages_alloc_zeroed(&pa
, (unsigned)((size
< PAGE_SIZE
)? PAGE_SIZE
: size
), ((option
& PMAP_TT_ALLOCATE_NOWAIT
)? PMAP_PAGES_ALLOCATE_NOWAIT
: 0));
6164 if (ret
== KERN_RESOURCE_SHORTAGE
) {
6165 return (tt_entry_t
*)0;
6172 if (size
< PAGE_SIZE
) {
6173 va
= phystokv(pa
) + size
;
6174 tt_free_entry_t
*local_free_list
= (tt_free_entry_t
*)va
;
6175 tt_free_entry_t
*next_free
= NULL
;
6176 for (va_end
= phystokv(pa
) + PAGE_SIZE
; va
< va_end
; va
= va
+ size
) {
6177 tt1_free
= (tt_free_entry_t
*)va
;
6178 tt1_free
->next
= next_free
;
6179 next_free
= tt1_free
;
6181 pmap_simple_lock(&tt1_lock
);
6182 local_free_list
->next
= free_tt_list
;
6183 free_tt_list
= next_free
;
6184 free_tt_count
+= ((PAGE_SIZE
/ size
) - 1);
6185 if (free_tt_count
> free_tt_max
) {
6186 free_tt_max
= free_tt_count
;
6188 pmap_simple_unlock(&tt1_lock
);
6191 /* Always report root allocations in units of PMAP_ROOT_ALLOC_SIZE, which can be obtained by sysctl arm_pt_root_size.
6192 * Depending on the device, this can vary between 512b and 16K. */
6193 OSAddAtomic((uint32_t)(size
/ PMAP_ROOT_ALLOC_SIZE
), (pmap
== kernel_pmap
? &inuse_kernel_tteroot_count
: &inuse_user_tteroot_count
));
6194 OSAddAtomic64(size
/ PMAP_ROOT_ALLOC_SIZE
, &alloc_tteroot_count
);
6195 pmap_tt_ledger_credit(pmap
, size
);
6197 return (tt_entry_t
*) phystokv(pa
);
6201 pmap_tt1_deallocate(
6207 tt_free_entry_t
*tt_entry
;
6209 if ((size
< PAGE_SIZE
) && (size
!= PMAP_ROOT_ALLOC_SIZE
)) {
6213 tt_entry
= (tt_free_entry_t
*)tt
;
6215 pmap_simple_lock(&tt1_lock
);
6217 if (size
< PAGE_SIZE
) {
6219 if (free_tt_count
> free_tt_max
) {
6220 free_tt_max
= free_tt_count
;
6222 tt_entry
->next
= free_tt_list
;
6223 free_tt_list
= tt_entry
;
6226 if (size
== PAGE_SIZE
) {
6227 free_page_size_tt_count
++;
6228 if (free_page_size_tt_count
> free_page_size_tt_max
) {
6229 free_page_size_tt_max
= free_page_size_tt_count
;
6231 tt_entry
->next
= free_page_size_tt_list
;
6232 free_page_size_tt_list
= tt_entry
;
6235 if (size
== 2 * PAGE_SIZE
) {
6236 free_two_page_size_tt_count
++;
6237 if (free_two_page_size_tt_count
> free_two_page_size_tt_max
) {
6238 free_two_page_size_tt_max
= free_two_page_size_tt_count
;
6240 tt_entry
->next
= free_two_page_size_tt_list
;
6241 free_two_page_size_tt_list
= tt_entry
;
6244 if (option
& PMAP_TT_DEALLOCATE_NOBLOCK
) {
6245 pmap_simple_unlock(&tt1_lock
);
6246 pmap_tt_ledger_debit(pmap
, size
);
6250 while (free_page_size_tt_count
> FREE_PAGE_SIZE_TT_MAX
) {
6251 free_page_size_tt_count
--;
6252 tt
= (tt_entry_t
*)free_page_size_tt_list
;
6253 free_page_size_tt_list
= ((tt_free_entry_t
*)tt
)->next
;
6255 pmap_simple_unlock(&tt1_lock
);
6257 pmap_pages_free(ml_static_vtop((vm_offset_t
)tt
), PAGE_SIZE
);
6259 OSAddAtomic(-(int32_t)(PAGE_SIZE
/ PMAP_ROOT_ALLOC_SIZE
), (pmap
== kernel_pmap
? &inuse_kernel_tteroot_count
: &inuse_user_tteroot_count
));
6261 pmap_simple_lock(&tt1_lock
);
6264 while (free_two_page_size_tt_count
> FREE_TWO_PAGE_SIZE_TT_MAX
) {
6265 free_two_page_size_tt_count
--;
6266 tt
= (tt_entry_t
*)free_two_page_size_tt_list
;
6267 free_two_page_size_tt_list
= ((tt_free_entry_t
*)tt
)->next
;
6269 pmap_simple_unlock(&tt1_lock
);
6271 pmap_pages_free(ml_static_vtop((vm_offset_t
)tt
), 2 * PAGE_SIZE
);
6273 OSAddAtomic(-2 * (int32_t)(PAGE_SIZE
/ PMAP_ROOT_ALLOC_SIZE
), (pmap
== kernel_pmap
? &inuse_kernel_tteroot_count
: &inuse_user_tteroot_count
));
6275 pmap_simple_lock(&tt1_lock
);
6277 pmap_simple_unlock(&tt1_lock
);
6278 pmap_tt_ledger_debit(pmap
, size
);
6281 static kern_return_t
6286 unsigned int options
)
6292 if ((tt_free_entry_t
*)pmap
->tt_entry_free
!= NULL
) {
6293 tt_free_entry_t
*tt_free_cur
, *tt_free_next
;
6295 tt_free_cur
= ((tt_free_entry_t
*)pmap
->tt_entry_free
);
6296 tt_free_next
= tt_free_cur
->next
;
6297 tt_free_cur
->next
= NULL
;
6298 *ttp
= (tt_entry_t
*)tt_free_cur
;
6299 pmap
->tt_entry_free
= (tt_entry_t
*)tt_free_next
;
6307 * Allocate a VM page for the level x page table entries.
6309 while (pmap_pages_alloc_zeroed(&pa
, PAGE_SIZE
, ((options
& PMAP_TT_ALLOCATE_NOWAIT
)? PMAP_PAGES_ALLOCATE_NOWAIT
: 0)) != KERN_SUCCESS
) {
6310 if (options
& PMAP_OPTIONS_NOWAIT
) {
6311 return KERN_RESOURCE_SHORTAGE
;
6316 while ((ptdp
= ptd_alloc(pmap
)) == NULL
) {
6317 if (options
& PMAP_OPTIONS_NOWAIT
) {
6318 pmap_pages_free(pa
, PAGE_SIZE
);
6319 return KERN_RESOURCE_SHORTAGE
;
6324 if (level
< pt_attr_leaf_level(pmap_get_pt_attr(pmap
))) {
6325 OSAddAtomic64(1, &alloc_ttepages_count
);
6326 OSAddAtomic(1, (pmap
== kernel_pmap
? &inuse_kernel_ttepages_count
: &inuse_user_ttepages_count
));
6328 OSAddAtomic64(1, &alloc_ptepages_count
);
6329 OSAddAtomic(1, (pmap
== kernel_pmap
? &inuse_kernel_ptepages_count
: &inuse_user_ptepages_count
));
6332 pmap_tt_ledger_credit(pmap
, PAGE_SIZE
);
6334 PMAP_ZINFO_PALLOC(pmap
, PAGE_SIZE
);
6336 pvh_update_head_unlocked(pai_to_pvh(pa_index(pa
)), ptdp
, PVH_TYPE_PTDP
);
6338 uint64_t pmap_page_size
= pt_attr_page_size(pmap_get_pt_attr(pmap
));
6339 if (PAGE_SIZE
> pmap_page_size
) {
6341 vm_address_t va_end
;
6345 for (va_end
= phystokv(pa
) + PAGE_SIZE
, va
= phystokv(pa
) + pmap_page_size
; va
< va_end
; va
= va
+ pmap_page_size
) {
6346 ((tt_free_entry_t
*)va
)->next
= (tt_free_entry_t
*)pmap
->tt_entry_free
;
6347 pmap
->tt_entry_free
= (tt_entry_t
*)va
;
6352 *ttp
= (tt_entry_t
*)phystokv(pa
);
6359 return KERN_SUCCESS
;
6370 ptd_info_t
*ptd_info
;
6371 unsigned pt_acc_cnt
;
6373 vm_offset_t free_page
= 0;
6374 const pt_attr_t
* const pt_attr
= pmap_get_pt_attr(pmap
);
6375 unsigned max_pt_index
= PAGE_SIZE
/ pt_attr_page_size(pt_attr
);
6379 ptdp
= ptep_get_ptd((vm_offset_t
)ttp
);
6380 ptd_info
= ptd_get_info(ptdp
, ttp
);
6382 ptd_info
->va
= (vm_offset_t
)-1;
6384 if ((level
< pt_attr_leaf_level(pt_attr
)) && (ptd_info
->refcnt
== PT_DESC_REFCOUNT
)) {
6385 ptd_info
->refcnt
= 0;
6388 if (ptd_info
->refcnt
!= 0) {
6389 panic("pmap_tt_deallocate(): ptdp %p, count %d\n", ptdp
, ptd_info
->refcnt
);
6392 ptd_info
->refcnt
= 0;
6394 for (i
= 0, pt_acc_cnt
= 0; i
< max_pt_index
; i
++) {
6395 pt_acc_cnt
+= ptdp
->ptd_info
[i
].refcnt
;
6398 if (pt_acc_cnt
== 0) {
6399 tt_free_entry_t
*tt_free_list
= (tt_free_entry_t
*)&pmap
->tt_entry_free
;
6400 unsigned pt_free_entry_cnt
= 1;
6402 while (pt_free_entry_cnt
< max_pt_index
&& tt_free_list
) {
6403 tt_free_entry_t
*tt_free_list_next
;
6405 tt_free_list_next
= tt_free_list
->next
;
6406 if ((((vm_offset_t
)tt_free_list_next
) - ((vm_offset_t
)ttp
& ~PAGE_MASK
)) < PAGE_SIZE
) {
6407 pt_free_entry_cnt
++;
6409 tt_free_list
= tt_free_list_next
;
6411 if (pt_free_entry_cnt
== max_pt_index
) {
6412 tt_free_entry_t
*tt_free_list_cur
;
6414 free_page
= (vm_offset_t
)ttp
& ~PAGE_MASK
;
6415 tt_free_list
= (tt_free_entry_t
*)&pmap
->tt_entry_free
;
6416 tt_free_list_cur
= (tt_free_entry_t
*)&pmap
->tt_entry_free
;
6418 while (tt_free_list_cur
) {
6419 tt_free_entry_t
*tt_free_list_next
;
6421 tt_free_list_next
= tt_free_list_cur
->next
;
6422 if ((((vm_offset_t
)tt_free_list_next
) - free_page
) < PAGE_SIZE
) {
6423 tt_free_list
->next
= tt_free_list_next
->next
;
6425 tt_free_list
= tt_free_list_next
;
6427 tt_free_list_cur
= tt_free_list_next
;
6430 ((tt_free_entry_t
*)ttp
)->next
= (tt_free_entry_t
*)pmap
->tt_entry_free
;
6431 pmap
->tt_entry_free
= ttp
;
6434 ((tt_free_entry_t
*)ttp
)->next
= (tt_free_entry_t
*)pmap
->tt_entry_free
;
6435 pmap
->tt_entry_free
= ttp
;
6440 if (free_page
!= 0) {
6441 ptd_deallocate(ptep_get_ptd((vm_offset_t
)free_page
));
6442 *(pt_desc_t
**)pai_to_pvh(pa_index(ml_static_vtop(free_page
))) = NULL
;
6443 pmap_pages_free(ml_static_vtop(free_page
), PAGE_SIZE
);
6444 if (level
< pt_attr_leaf_level(pt_attr
)) {
6445 OSAddAtomic(-1, (pmap
== kernel_pmap
? &inuse_kernel_ttepages_count
: &inuse_user_ttepages_count
));
6447 OSAddAtomic(-1, (pmap
== kernel_pmap
? &inuse_kernel_ptepages_count
: &inuse_user_ptepages_count
));
6449 PMAP_ZINFO_PFREE(pmap
, PAGE_SIZE
);
6450 pmap_tt_ledger_debit(pmap
, PAGE_SIZE
);
6455 * Safely clear out a translation table entry.
6457 * @note If the TTE to clear out points to a leaf table, then that leaf table
6458 * must have a refcnt of zero before the TTE can be removed.
6460 * @param pmap The pmap containing the page table whose TTE is being removed.
6461 * @param va_start Beginning of the VA range mapped by the table being removed, for TLB maintenance
6462 * @param va_end Non-inclusive end of the VA range mapped by the table being removed, for TLB maintenance
6463 * @param need_strong_sync Indicates whether strong DSB should be used to synchronize TLB maintenance
6464 * @param ttep Pointer to the TTE that should be cleared out.
6465 * @param level The level of the page table that contains the TTE to be removed.
6470 vm_offset_t va_start
,
6472 bool need_strong_sync
,
6476 tt_entry_t tte
= *ttep
;
6478 if (__improbable(tte
== 0)) {
6479 panic("%s: null tt_entry ttep==%p", __func__
, ttep
);
6482 if (__improbable((level
== pt_attr_twig_level(pmap_get_pt_attr(pmap
))) &&
6483 (ptep_get_info((pt_entry_t
*)ttetokv(tte
))->refcnt
!= 0))) {
6484 panic("%s: non-zero pagetable refcount: pmap=%p ttep=%p ptd=%p refcnt=0x%x", __func__
,
6485 pmap
, ttep
, tte_get_ptd(tte
), ptep_get_info((pt_entry_t
*)ttetokv(tte
))->refcnt
);
6488 #if (__ARM_VMSA__ == 7)
6490 tt_entry_t
*ttep_4M
= (tt_entry_t
*) ((vm_offset_t
)ttep
& 0xFFFFFFF0);
6493 for (i
= 0; i
< 4; i
++, ttep_4M
++) {
6494 *ttep_4M
= (tt_entry_t
) 0;
6496 FLUSH_PTE_RANGE_STRONG(ttep_4M
- 4, ttep_4M
);
6499 *ttep
= (tt_entry_t
) 0;
6500 FLUSH_PTE_STRONG(ttep
);
6501 #endif /* (__ARM_VMSA__ == 7) */
6502 // If given a VA range, we're being asked to flush the TLB before the table in ttep is freed.
6503 if (va_end
> va_start
) {
6504 #if (__ARM_VMSA__ == 7)
6505 // Ensure intermediate translations are flushed for each 1MB block
6506 flush_mmu_tlb_entry_async((va_start
& ~ARM_TT_L1_PT_OFFMASK
) | (pmap
->hw_asid
& 0xff));
6507 flush_mmu_tlb_entry_async(((va_start
& ~ARM_TT_L1_PT_OFFMASK
) + ARM_TT_L1_SIZE
) | (pmap
->hw_asid
& 0xff));
6508 flush_mmu_tlb_entry_async(((va_start
& ~ARM_TT_L1_PT_OFFMASK
) + 2 * ARM_TT_L1_SIZE
) | (pmap
->hw_asid
& 0xff));
6509 flush_mmu_tlb_entry_async(((va_start
& ~ARM_TT_L1_PT_OFFMASK
) + 3 * ARM_TT_L1_SIZE
) | (pmap
->hw_asid
& 0xff));
6511 PMAP_UPDATE_TLBS(pmap
, va_start
, va_end
, need_strong_sync
);
6516 * Given a pointer to an entry within a `level` page table, delete the
6517 * page table at `level` + 1 that is represented by that entry. For instance,
6518 * to delete an unused L3 table, `ttep` would be a pointer to the L2 entry that
6519 * contains the PA of the L3 table, and `level` would be "2".
6521 * @note If the table getting deallocated is a leaf table, then that leaf table
6522 * must have a refcnt of zero before getting deallocated. All other levels
6523 * must have a refcnt of PT_DESC_REFCOUNT in their page table descriptor.
6525 * @param pmap The pmap that owns the page table to be deallocated.
6526 * @param va_start Beginning of the VA range mapped by the table being removed, for TLB maintenance
6527 * @param va_end Non-inclusive end of the VA range mapped by the table being removed, for TLB maintenance
6528 * @param need_strong_sync Indicates whether strong DSB should be used to synchronize TLB maintenance
6529 * @param ttep Pointer to the `level` TTE to remove.
6530 * @param level The level of the table that contains an entry pointing to the
6531 * table to be removed. The deallocated page table will be a
6532 * `level` + 1 table (so if `level` is 2, then an L3 table will be
6536 pmap_tte_deallocate(
6538 vm_offset_t va_start
,
6540 bool need_strong_sync
,
6547 pmap_assert_locked_w(pmap
);
6552 if (tte_get_ptd(tte
)->pmap
!= pmap
) {
6553 panic("%s: Passed in pmap doesn't own the page table to be deleted ptd=%p ptd->pmap=%p pmap=%p",
6554 __func__
, tte_get_ptd(tte
), tte_get_ptd(tte
)->pmap
, pmap
);
6556 #endif /* MACH_ASSERT */
6558 pmap_tte_remove(pmap
, va_start
, va_end
, need_strong_sync
, ttep
, level
);
6560 if ((tte
& ARM_TTE_TYPE_MASK
) == ARM_TTE_TYPE_TABLE
) {
6561 uint64_t pmap_page_size
= pt_attr_page_size(pmap_get_pt_attr(pmap
));
6563 pt_entry_t
*pte_p
= ((pt_entry_t
*) (ttetokv(tte
) & ~(pmap_page_size
- 1)));
6565 for (unsigned i
= 0; i
< (pmap_page_size
/ sizeof(*pte_p
)); i
++, pte_p
++) {
6566 if (__improbable(ARM_PTE_IS_COMPRESSED(*pte_p
, pte_p
))) {
6567 panic_plain("%s: Found compressed mapping in soon to be deleted "
6568 "L%d table tte=0x%llx pmap=%p, pte_p=%p, pte=0x%llx",
6569 __func__
, level
+ 1, (uint64_t)tte
, pmap
, pte_p
, (uint64_t)(*pte_p
));
6570 } else if (__improbable(((*pte_p
) & ARM_PTE_TYPE_MASK
) != ARM_PTE_TYPE_FAULT
)) {
6571 panic_plain("%s: Found valid mapping in soon to be deleted L%d "
6572 "table tte=0x%llx pmap=%p, pte_p=%p, pte=0x%llx",
6573 __func__
, level
+ 1, (uint64_t)tte
, pmap
, pte_p
, (uint64_t)(*pte_p
));
6576 #endif /* MACH_ASSERT */
6579 /* Clear any page offset: we mean to free the whole page, but armv7 TTEs may only be
6580 * aligned on 1K boundaries. We clear the surrounding "chunk" of 4 TTEs above. */
6581 pa
= tte_to_pa(tte
) & ~(pmap_page_size
- 1);
6582 pmap_tt_deallocate(pmap
, (tt_entry_t
*) phystokv(pa
), level
+ 1);
6588 * Remove a range of hardware page-table entries.
6589 * The entries given are the first (inclusive)
6590 * and last (exclusive) entries for the VM pages.
6591 * The virtual address is the va for the first pte.
6593 * The pmap must be locked.
6594 * If the pmap is not the kernel pmap, the range must lie
6595 * entirely within one pte-page. This is NOT checked.
6596 * Assumes that the pte-page exists.
6598 * Returns the number of PTE changed
6603 vm_map_address_t va
,
6607 bool need_strong_sync
= false;
6608 int num_changed
= pmap_remove_range_options(pmap
, va
, bpte
, epte
, NULL
,
6609 &need_strong_sync
, PMAP_OPTIONS_REMOVE
);
6610 if (num_changed
> 0) {
6611 PMAP_UPDATE_TLBS(pmap
, va
,
6612 va
+ (pt_attr_page_size(pmap_get_pt_attr(pmap
)) * (epte
- bpte
)), need_strong_sync
);
6618 #ifdef PVH_FLAG_EXEC
6621 * Update the access protection bits of the physical aperture mapping for a page.
6622 * This is useful, for example, in guranteeing that a verified executable page
6623 * has no writable mappings anywhere in the system, including the physical
6624 * aperture. flush_tlb_async can be set to true to avoid unnecessary TLB
6625 * synchronization overhead in cases where the call to this function is
6626 * guaranteed to be followed by other TLB operations.
6629 pmap_set_ptov_ap(unsigned int pai __unused
, unsigned int ap __unused
, boolean_t flush_tlb_async __unused
)
6631 #if __ARM_PTE_PHYSMAP__
6632 ASSERT_PVH_LOCKED(pai
);
6633 vm_offset_t kva
= phystokv(vm_first_phys
+ (pmap_paddr_t
)ptoa(pai
));
6634 pt_entry_t
*pte_p
= pmap_pte(kernel_pmap
, kva
);
6636 pt_entry_t tmplate
= *pte_p
;
6637 if ((tmplate
& ARM_PTE_APMASK
) == ARM_PTE_AP(ap
)) {
6640 tmplate
= (tmplate
& ~ARM_PTE_APMASK
) | ARM_PTE_AP(ap
);
6641 #if (__ARM_VMSA__ > 7)
6642 if (tmplate
& ARM_PTE_HINT_MASK
) {
6643 panic("%s: physical aperture PTE %p has hint bit set, va=%p, pte=0x%llx",
6644 __func__
, pte_p
, (void *)kva
, tmplate
);
6647 WRITE_PTE_STRONG(pte_p
, tmplate
);
6648 flush_mmu_tlb_region_asid_async(kva
, PAGE_SIZE
, kernel_pmap
);
6649 if (!flush_tlb_async
) {
6655 #endif /* defined(PVH_FLAG_EXEC) */
6663 int *num_alt_internal
,
6667 pv_entry_t
**pv_h
, **pve_pp
;
6670 ASSERT_NOT_HIBERNATING();
6671 ASSERT_PVH_LOCKED(pai
);
6672 pv_h
= pai_to_pvh(pai
);
6673 vm_offset_t pvh_flags
= pvh_get_flags(pv_h
);
6676 if (__improbable(pvh_flags
& PVH_FLAG_LOCKDOWN
)) {
6677 panic("%d is locked down (%#lx), cannot remove", pai
, pvh_flags
);
6681 if (pvh_test_type(pv_h
, PVH_TYPE_PTEP
)) {
6682 if (__improbable((cpte
!= pvh_ptep(pv_h
)))) {
6683 panic("%s: cpte=%p does not match pv_h=%p (%p), pai=0x%x\n", __func__
, cpte
, pv_h
, pvh_ptep(pv_h
), pai
);
6685 if (IS_ALTACCT_PAGE(pai
, PV_ENTRY_NULL
)) {
6686 assert(IS_INTERNAL_PAGE(pai
));
6688 (*num_alt_internal
)++;
6689 CLR_ALTACCT_PAGE(pai
, PV_ENTRY_NULL
);
6690 } else if (IS_INTERNAL_PAGE(pai
)) {
6691 if (IS_REUSABLE_PAGE(pai
)) {
6699 pvh_update_head(pv_h
, PV_ENTRY_NULL
, PVH_TYPE_NULL
);
6700 } else if (pvh_test_type(pv_h
, PVH_TYPE_PVEP
)) {
6702 pve_p
= pvh_list(pv_h
);
6704 while (pve_p
!= PV_ENTRY_NULL
&&
6705 (pve_get_ptep(pve_p
) != cpte
)) {
6706 pve_pp
= pve_link_field(pve_p
);
6707 pve_p
= PVE_NEXT_PTR(pve_next(pve_p
));
6710 if (__improbable((pve_p
== PV_ENTRY_NULL
))) {
6711 panic("%s: cpte=%p (pai=0x%x) not in pv_h=%p\n", __func__
, cpte
, pai
, pv_h
);
6715 if ((pmap
!= NULL
) && (kern_feature_override(KF_PMAPV_OVRD
) == FALSE
)) {
6716 pv_entry_t
*check_pve_p
= PVE_NEXT_PTR(pve_next(pve_p
));
6717 while (check_pve_p
!= PV_ENTRY_NULL
) {
6718 if (pve_get_ptep(check_pve_p
) == cpte
) {
6719 panic("%s: duplicate pve entry cpte=%p pmap=%p, pv_h=%p, pve_p=%p, pai=0x%x",
6720 __func__
, cpte
, pmap
, pv_h
, pve_p
, pai
);
6722 check_pve_p
= PVE_NEXT_PTR(pve_next(check_pve_p
));
6727 if (IS_ALTACCT_PAGE(pai
, pve_p
)) {
6728 assert(IS_INTERNAL_PAGE(pai
));
6730 (*num_alt_internal
)++;
6731 CLR_ALTACCT_PAGE(pai
, pve_p
);
6732 } else if (IS_INTERNAL_PAGE(pai
)) {
6733 if (IS_REUSABLE_PAGE(pai
)) {
6742 pvh_remove(pv_h
, pve_pp
, pve_p
);
6743 pv_free_entry(pve_p
);
6744 if (!pvh_test_type(pv_h
, PVH_TYPE_NULL
)) {
6745 pvh_set_flags(pv_h
, pvh_flags
);
6748 panic("%s: unexpected PV head %p, cpte=%p pmap=%p pv_h=%p pai=0x%x",
6749 __func__
, *pv_h
, cpte
, pmap
, pv_h
, pai
);
6752 #ifdef PVH_FLAG_EXEC
6753 if ((pvh_flags
& PVH_FLAG_EXEC
) && pvh_test_type(pv_h
, PVH_TYPE_NULL
)) {
6754 pmap_set_ptov_ap(pai
, AP_RWNA
, FALSE
);
6760 pmap_remove_range_options(
6762 vm_map_address_t va
,
6765 vm_map_address_t
*eva
,
6766 bool *need_strong_sync __unused
,
6771 int num_removed
, num_unwired
;
6772 int num_pte_changed
;
6775 int num_external
, num_internal
, num_reusable
;
6776 int num_alt_internal
;
6777 uint64_t num_compressed
, num_alt_compressed
;
6780 pmap_assert_locked_w(pmap
);
6782 const pt_attr_t
* const pt_attr
= pmap_get_pt_attr(pmap
);
6783 uint64_t pmap_page_size
= PAGE_RATIO
* pt_attr_page_size(pt_attr
);
6785 if (__improbable((uintptr_t)epte
> (((uintptr_t)bpte
+ pmap_page_size
) & ~(pmap_page_size
- 1)))) {
6786 panic("%s: PTE range [%p, %p) in pmap %p crosses page table boundary", __func__
, bpte
, epte
, pmap
);
6791 num_pte_changed
= 0;
6796 num_alt_internal
= 0;
6797 num_alt_compressed
= 0;
6799 for (cpte
= bpte
; cpte
< epte
;
6800 cpte
+= PAGE_RATIO
, va
+= pmap_page_size
) {
6802 boolean_t managed
= FALSE
;
6805 * Check for pending preemption on every iteration: the PV list may be arbitrarily long,
6806 * so we need to be as aggressive as possible in checking for preemption when we can.
6808 if (__improbable((eva
!= NULL
) && npages
++ && pmap_pending_preemption())) {
6812 spte
= *((volatile pt_entry_t
*)cpte
);
6815 if (pgtrace_enabled
) {
6816 pmap_pgtrace_remove_clone(pmap
, pte_to_pa(spte
), va
);
6821 if (pmap
!= kernel_pmap
&&
6822 (options
& PMAP_OPTIONS_REMOVE
) &&
6823 (ARM_PTE_IS_COMPRESSED(spte
, cpte
))) {
6825 * "pmap" must be locked at this point,
6826 * so this should not race with another
6827 * pmap_remove_range() or pmap_enter().
6830 /* one less "compressed"... */
6832 if (spte
& ARM_PTE_COMPRESSED_ALT
) {
6833 /* ... but it used to be "ALTACCT" */
6834 num_alt_compressed
++;
6838 WRITE_PTE_FAST(cpte
, ARM_PTE_TYPE_FAULT
);
6840 * "refcnt" also accounts for
6841 * our "compressed" markers,
6842 * so let's update it here.
6845 spte
= *((volatile pt_entry_t
*)cpte
);
6848 * It may be possible for the pte to transition from managed
6849 * to unmanaged in this timeframe; for now, elide the assert.
6850 * We should break out as a consequence of checking pa_valid.
6852 //assert(!ARM_PTE_IS_COMPRESSED(spte));
6853 pa
= pte_to_pa(spte
);
6854 if (!pa_valid(pa
)) {
6856 unsigned int cacheattr
= pmap_cache_attributes((ppnum_t
)atop(pa
));
6859 if (__improbable((cacheattr
& PP_ATTR_MONITOR
) &&
6860 (pte_to_xprr_perm(spte
) != XPRR_KERN_RO_PERM
) && !pmap_ppl_disable
)) {
6861 panic("%s: attempt to remove mapping of writable PPL-protected I/O address 0x%llx",
6862 __func__
, (uint64_t)pa
);
6867 pai
= (int)pa_index(pa
);
6869 spte
= *((volatile pt_entry_t
*)cpte
);
6870 pa
= pte_to_pa(spte
);
6871 if (pai
== (int)pa_index(pa
)) {
6873 break; // Leave pai locked as we will unlock it after we free the PV entry
6878 if (ARM_PTE_IS_COMPRESSED(*cpte
, cpte
)) {
6880 * There used to be a valid mapping here but it
6881 * has already been removed when the page was
6882 * sent to the VM compressor, so nothing left to
6888 /* remove the translation, do not flush the TLB */
6889 if (*cpte
!= ARM_PTE_TYPE_FAULT
) {
6890 assertf(!ARM_PTE_IS_COMPRESSED(*cpte
, cpte
), "unexpected compressed pte %p (=0x%llx)", cpte
, (uint64_t)*cpte
);
6891 assertf((*cpte
& ARM_PTE_TYPE_VALID
) == ARM_PTE_TYPE
, "invalid pte %p (=0x%llx)", cpte
, (uint64_t)*cpte
);
6893 if (managed
&& (pmap
!= kernel_pmap
) && (ptep_get_va(cpte
) != va
)) {
6894 panic("pmap_remove_range_options(): VA mismatch: cpte=%p ptd=%p pte=0x%llx va=0x%llx, cpte va=0x%llx",
6895 cpte
, ptep_get_ptd(cpte
), (uint64_t)*cpte
, (uint64_t)va
, (uint64_t)ptep_get_va(cpte
));
6898 WRITE_PTE_FAST(cpte
, ARM_PTE_TYPE_FAULT
);
6902 if ((spte
!= ARM_PTE_TYPE_FAULT
) &&
6903 (pmap
!= kernel_pmap
)) {
6904 assertf(!ARM_PTE_IS_COMPRESSED(spte
, cpte
), "unexpected compressed pte %p (=0x%llx)", cpte
, (uint64_t)spte
);
6905 assertf((spte
& ARM_PTE_TYPE_VALID
) == ARM_PTE_TYPE
, "invalid pte %p (=0x%llx)", cpte
, (uint64_t)spte
);
6909 if (pte_is_wired(spte
)) {
6910 pte_set_wired(pmap
, cpte
, 0);
6914 * if not managed, we're done
6920 * find and remove the mapping from the chain for this
6924 pmap_remove_pv(pmap
, cpte
, pai
, &num_internal
, &num_alt_internal
, &num_reusable
, &num_external
);
6933 OSAddAtomic(-num_removed
, (SInt32
*) &pmap
->stats
.resident_count
);
6934 pmap_ledger_debit(pmap
, task_ledgers
.phys_mem
, num_removed
* pmap_page_size
);
6936 if (pmap
!= kernel_pmap
) {
6937 if ((refcnt
!= 0) && (OSAddAtomic16(refcnt
, (SInt16
*) &(ptep_get_info(bpte
)->refcnt
)) <= 0)) {
6938 panic("pmap_remove_range_options: over-release of ptdp %p for pte [%p, %p)", ptep_get_ptd(bpte
), bpte
, epte
);
6940 /* update pmap stats... */
6941 OSAddAtomic(-num_unwired
, (SInt32
*) &pmap
->stats
.wired_count
);
6943 __assert_only
int32_t orig_external
= OSAddAtomic(-num_external
, &pmap
->stats
.external
);
6944 PMAP_STATS_ASSERTF(orig_external
>= num_external
,
6946 "pmap=%p bpte=%p epte=%p num_external=%d stats.external=%d",
6947 pmap
, bpte
, epte
, num_external
, orig_external
);
6950 __assert_only
int32_t orig_internal
= OSAddAtomic(-num_internal
, &pmap
->stats
.internal
);
6951 PMAP_STATS_ASSERTF(orig_internal
>= num_internal
,
6953 "pmap=%p bpte=%p epte=%p num_internal=%d stats.internal=%d num_reusable=%d stats.reusable=%d",
6955 num_internal
, orig_internal
,
6956 num_reusable
, pmap
->stats
.reusable
);
6959 __assert_only
int32_t orig_reusable
= OSAddAtomic(-num_reusable
, &pmap
->stats
.reusable
);
6960 PMAP_STATS_ASSERTF(orig_reusable
>= num_reusable
,
6962 "pmap=%p bpte=%p epte=%p num_internal=%d stats.internal=%d num_reusable=%d stats.reusable=%d",
6964 num_internal
, pmap
->stats
.internal
,
6965 num_reusable
, orig_reusable
);
6967 if (num_compressed
) {
6968 __assert_only
uint64_t orig_compressed
= OSAddAtomic64(-num_compressed
, &pmap
->stats
.compressed
);
6969 PMAP_STATS_ASSERTF(orig_compressed
>= num_compressed
,
6971 "pmap=%p bpte=%p epte=%p num_compressed=%lld num_alt_compressed=%lld stats.compressed=%lld",
6972 pmap
, bpte
, epte
, num_compressed
, num_alt_compressed
,
6975 /* ... and ledgers */
6976 pmap_ledger_debit(pmap
, task_ledgers
.wired_mem
, (num_unwired
) * pmap_page_size
);
6977 pmap_ledger_debit(pmap
, task_ledgers
.internal
, (num_internal
) * pmap_page_size
);
6978 pmap_ledger_debit(pmap
, task_ledgers
.alternate_accounting
, (num_alt_internal
) * pmap_page_size
);
6979 pmap_ledger_debit(pmap
, task_ledgers
.alternate_accounting_compressed
, (num_alt_compressed
) * pmap_page_size
);
6980 pmap_ledger_debit(pmap
, task_ledgers
.internal_compressed
, (num_compressed
) * pmap_page_size
);
6981 /* make needed adjustments to phys_footprint */
6982 pmap_ledger_debit(pmap
, task_ledgers
.phys_footprint
,
6986 num_alt_compressed
)) * pmap_page_size
);
6989 /* flush the ptable entries we have written */
6990 if (num_pte_changed
> 0) {
6991 FLUSH_PTE_RANGE_STRONG(bpte
, epte
);
6994 return num_pte_changed
;
6999 * Remove the given range of addresses
7000 * from the specified map.
7002 * It is assumed that the start and end are properly
7003 * rounded to the hardware page size.
7008 vm_map_address_t start
,
7009 vm_map_address_t end
)
7011 pmap_remove_options(pmap
, start
, end
, PMAP_OPTIONS_REMOVE
);
7014 MARK_AS_PMAP_TEXT
static vm_map_address_t
7015 pmap_remove_options_internal(
7017 vm_map_address_t start
,
7018 vm_map_address_t end
,
7021 vm_map_address_t eva
= end
;
7022 pt_entry_t
*bpte
, *epte
;
7025 int remove_count
= 0;
7026 bool need_strong_sync
= false;
7028 if (__improbable(end
< start
)) {
7029 panic("%s: invalid address range %p, %p", __func__
, (void*)start
, (void*)end
);
7032 VALIDATE_PMAP(pmap
);
7034 __unused
const pt_attr_t
* const pt_attr
= pmap_get_pt_attr(pmap
);
7038 tte_p
= pmap_tte(pmap
, start
);
7040 if (tte_p
== (tt_entry_t
*) NULL
) {
7044 if ((*tte_p
& ARM_TTE_TYPE_MASK
) == ARM_TTE_TYPE_TABLE
) {
7045 pte_p
= (pt_entry_t
*) ttetokv(*tte_p
);
7046 bpte
= &pte_p
[pte_index(pmap
, pt_attr
, start
)];
7047 epte
= bpte
+ ((end
- start
) >> pt_attr_leaf_shift(pt_attr
));
7049 remove_count
= pmap_remove_range_options(pmap
, start
, bpte
, epte
, &eva
,
7050 &need_strong_sync
, options
);
7052 if ((pmap
!= kernel_pmap
) && (pmap
->nested
== FALSE
) && (ptep_get_info(pte_p
)->refcnt
== 0)) {
7053 pmap_tte_deallocate(pmap
, start
, eva
, need_strong_sync
, tte_p
, pt_attr_twig_level(pt_attr
));
7054 remove_count
= 0; // pmap_tte_deallocate has flushed the TLB for us
7061 if (remove_count
> 0) {
7062 PMAP_UPDATE_TLBS(pmap
, start
, eva
, need_strong_sync
);
7068 pmap_remove_options(
7070 vm_map_address_t start
,
7071 vm_map_address_t end
,
7074 vm_map_address_t va
;
7076 if (pmap
== PMAP_NULL
) {
7080 __unused
const pt_attr_t
* const pt_attr
= pmap_get_pt_attr(pmap
);
7082 PMAP_TRACE(2, PMAP_CODE(PMAP__REMOVE
) | DBG_FUNC_START
,
7083 VM_KERNEL_ADDRHIDE(pmap
), VM_KERNEL_ADDRHIDE(start
),
7084 VM_KERNEL_ADDRHIDE(end
));
7087 if ((start
| end
) & pt_attr_leaf_offmask(pt_attr
)) {
7088 panic("pmap_remove_options() pmap %p start 0x%llx end 0x%llx\n",
7089 pmap
, (uint64_t)start
, (uint64_t)end
);
7091 if ((end
< start
) || (start
< pmap
->min
) || (end
> pmap
->max
)) {
7092 panic("pmap_remove_options(): invalid address range, pmap=%p, start=0x%llx, end=0x%llx\n",
7093 pmap
, (uint64_t)start
, (uint64_t)end
);
7096 assert(get_preemption_level() == 0);
7099 * Invalidate the translation buffer first
7105 l
= ((va
+ pt_attr_twig_size(pt_attr
)) & ~pt_attr_twig_offmask(pt_attr
));
7111 va
= pmap_remove_options_ppl(pmap
, va
, l
, options
);
7113 pmap_ledger_check_balance(pmap
);
7115 va
= pmap_remove_options_internal(pmap
, va
, l
, options
);
7119 PMAP_TRACE(2, PMAP_CODE(PMAP__REMOVE
) | DBG_FUNC_END
);
7124 * Remove phys addr if mapped in specified map
7127 pmap_remove_some_phys(
7128 __unused pmap_t map
,
7129 __unused ppnum_t pn
)
7131 /* Implement to support working set code */
7137 #if !__ARM_USER_PROTECT__
7143 #if __ARM_USER_PROTECT__
7144 thread
->machine
.uptw_ttb
= ((unsigned int) pmap
->ttep
) | TTBR_SETUP
;
7145 thread
->machine
.asid
= pmap
->hw_asid
;
7150 pmap_flush_core_tlb_asid_async(pmap_t pmap
)
7152 #if (__ARM_VMSA__ == 7)
7153 flush_core_tlb_asid_async(pmap
->hw_asid
);
7155 flush_core_tlb_asid_async(((uint64_t) pmap
->hw_asid
) << TLBI_ASID_SHIFT
);
7160 pmap_user_ttb_is_clear(void)
7162 #if (__ARM_VMSA__ > 7)
7163 return get_mmu_ttb() == (invalid_ttep
& TTBR_BADDR_MASK
);
7165 return get_mmu_ttb() == kernel_pmap
->ttep
;
7169 MARK_AS_PMAP_TEXT
static void
7170 pmap_switch_internal(
7173 VALIDATE_PMAP(pmap
);
7174 pmap_cpu_data_t
*cpu_data_ptr
= pmap_get_cpu_data();
7175 uint16_t asid_index
= pmap
->hw_asid
;
7176 bool do_asid_flush
= false;
7178 if (__improbable((asid_index
== 0) && (pmap
!= kernel_pmap
))) {
7179 panic("%s: attempt to activate pmap with invalid ASID %p", __func__
, pmap
);
7181 #if __ARM_KERNEL_PROTECT__
7185 #if (__ARM_VMSA__ > 7)
7186 pmap_t last_nested_pmap
= cpu_data_ptr
->cpu_nested_pmap
;
7187 __unused
const pt_attr_t
*last_nested_pmap_attr
= cpu_data_ptr
->cpu_nested_pmap_attr
;
7188 __unused vm_map_address_t last_nested_region_addr
= cpu_data_ptr
->cpu_nested_region_addr
;
7189 __unused vm_map_offset_t last_nested_region_size
= cpu_data_ptr
->cpu_nested_region_size
;
7190 bool do_shared_region_flush
= ((pmap
!= kernel_pmap
) && (last_nested_pmap
!= NULL
) && (pmap
->nested_pmap
!= last_nested_pmap
));
7191 bool break_before_make
= do_shared_region_flush
;
7193 bool do_shared_region_flush
= false;
7194 bool break_before_make
= false;
7197 if ((pmap_max_asids
> MAX_HW_ASIDS
) && (asid_index
> 0)) {
7199 pmap_update_plru(asid_index
);
7202 assert(asid_index
< (sizeof(cpu_data_ptr
->cpu_sw_asids
) / sizeof(*cpu_data_ptr
->cpu_sw_asids
)));
7204 /* Extract the "virtual" bits of the ASIDs (which could cause us to alias). */
7205 uint8_t new_sw_asid
= pmap
->sw_asid
;
7206 uint8_t last_sw_asid
= cpu_data_ptr
->cpu_sw_asids
[asid_index
];
7208 if (new_sw_asid
!= last_sw_asid
) {
7210 * If the virtual ASID of the new pmap does not match the virtual ASID
7211 * last seen on this CPU for the physical ASID (that was a mouthful),
7212 * then this switch runs the risk of aliasing. We need to flush the
7213 * TLB for this phyiscal ASID in this case.
7215 cpu_data_ptr
->cpu_sw_asids
[asid_index
] = new_sw_asid
;
7216 do_asid_flush
= true;
7217 break_before_make
= true;
7221 #if __ARM_MIXED_PAGE_SIZE__
7222 if (pmap_get_pt_attr(pmap
)->pta_tcr_value
!= get_tcr()) {
7223 break_before_make
= true;
7226 if (__improbable(break_before_make
&& !pmap_user_ttb_is_clear())) {
7227 PMAP_TRACE(1, PMAP_CODE(PMAP__CLEAR_USER_TTB
), VM_KERNEL_ADDRHIDE(pmap
), PMAP_VASID(pmap
), pmap
->hw_asid
);
7228 pmap_clear_user_ttb_internal();
7231 #if (__ARM_VMSA__ > 7)
7232 /* If we're switching to a different nested pmap (i.e. shared region), we'll need
7233 * to flush the userspace mappings for that region. Those mappings are global
7234 * and will not be protected by the ASID. It should also be cheaper to flush the
7235 * entire local TLB rather than to do a broadcast MMU flush by VA region. */
7236 if (__improbable(do_shared_region_flush
)) {
7237 #if __ARM_RANGE_TLBI__
7238 uint64_t page_shift_prev
= pt_attr_leaf_shift(last_nested_pmap_attr
);
7239 vm_map_offset_t npages_prev
= last_nested_region_size
>> page_shift_prev
;
7241 /* NOTE: here we flush the global TLB entries for the previous nested region only.
7242 * There may still be non-global entries that overlap with the incoming pmap's
7243 * nested region. On Apple SoCs at least, this is acceptable. Those non-global entries
7244 * must necessarily belong to a different ASID than the incoming pmap, or they would
7245 * be flushed in the do_asid_flush case below. This will prevent them from conflicting
7246 * with the incoming pmap's nested region. However, the ARMv8 ARM is not crystal clear
7247 * on whether such a global/inactive-nonglobal overlap is acceptable, so we may need
7248 * to consider additional invalidation here in the future. */
7249 if (npages_prev
<= ARM64_TLB_RANGE_PAGES
) {
7250 flush_core_tlb_allrange_async(generate_rtlbi_param((ppnum_t
)npages_prev
, 0, last_nested_region_addr
, page_shift_prev
));
7252 do_asid_flush
= false;
7253 flush_core_tlb_async();
7256 do_asid_flush
= false;
7257 flush_core_tlb_async();
7258 #endif // __ARM_RANGE_TLBI__
7260 #endif // (__ARM_VMSA__ > 7)
7261 if (__improbable(do_asid_flush
)) {
7262 pmap_flush_core_tlb_asid_async(pmap
);
7263 #if DEVELOPMENT || DEBUG
7264 os_atomic_inc(&pmap_asid_flushes
, relaxed
);
7267 if (__improbable(do_asid_flush
|| do_shared_region_flush
)) {
7271 pmap_switch_user_ttb_internal(pmap
);
7278 PMAP_TRACE(1, PMAP_CODE(PMAP__SWITCH
) | DBG_FUNC_START
, VM_KERNEL_ADDRHIDE(pmap
), PMAP_VASID(pmap
), pmap
->hw_asid
);
7280 pmap_switch_ppl(pmap
);
7282 pmap_switch_internal(pmap
);
7284 PMAP_TRACE(1, PMAP_CODE(PMAP__SWITCH
) | DBG_FUNC_END
);
7288 pmap_require(pmap_t pmap
)
7291 VALIDATE_PMAP(pmap
);
7293 if (pmap
!= kernel_pmap
) {
7294 zone_id_require(ZONE_ID_PMAP
, sizeof(struct pmap
), pmap
);
7304 pmap_page_protect_options(ppnum
, prot
, 0, NULL
);
7308 * Routine: pmap_page_protect_options
7311 * Lower the permission for all mappings to a given
7314 MARK_AS_PMAP_TEXT
static void
7315 pmap_page_protect_options_with_flush_range(
7318 unsigned int options
,
7319 pmap_tlb_flush_range_t
*flush_range
)
7321 pmap_paddr_t phys
= ptoa(ppnum
);
7323 pv_entry_t
**pve_pp
;
7328 pv_entry_t
*new_pve_p
;
7329 pt_entry_t
*new_pte_p
;
7330 vm_offset_t pvh_flags
;
7334 boolean_t tlb_flush_needed
= FALSE
;
7335 unsigned int pvh_cnt
= 0;
7337 assert(ppnum
!= vm_page_fictitious_addr
);
7339 /* Only work with managed pages. */
7340 if (!pa_valid(phys
)) {
7345 * Determine the new protection.
7349 return; /* nothing to do */
7351 case VM_PROT_READ
| VM_PROT_EXECUTE
:
7355 /* PPL security model requires that we flush TLBs before we exit if the page may be recycled. */
7356 options
= options
& ~PMAP_OPTIONS_NOFLUSH
;
7361 pai
= (int)pa_index(phys
);
7363 pv_h
= pai_to_pvh(pai
);
7364 pvh_flags
= pvh_get_flags(pv_h
);
7367 if (__improbable(remove
&& (pvh_flags
& PVH_FLAG_LOCKDOWN
))) {
7368 panic("%d is locked down (%#llx), cannot remove", pai
, pvh_get_flags(pv_h
));
7372 pte_p
= PT_ENTRY_NULL
;
7373 pve_p
= PV_ENTRY_NULL
;
7375 pveh_p
= PV_ENTRY_NULL
;
7376 pvet_p
= PV_ENTRY_NULL
;
7377 new_pve_p
= PV_ENTRY_NULL
;
7378 new_pte_p
= PT_ENTRY_NULL
;
7379 if (pvh_test_type(pv_h
, PVH_TYPE_PTEP
)) {
7380 pte_p
= pvh_ptep(pv_h
);
7381 } else if (pvh_test_type(pv_h
, PVH_TYPE_PVEP
)) {
7382 pve_p
= pvh_list(pv_h
);
7386 while ((pve_p
!= PV_ENTRY_NULL
) || (pte_p
!= PT_ENTRY_NULL
)) {
7387 vm_map_address_t va
= 0;
7389 pt_entry_t tmplate
= ARM_PTE_TYPE_FAULT
;
7390 boolean_t update
= FALSE
;
7392 if (pve_p
!= PV_ENTRY_NULL
) {
7393 pte_p
= pve_get_ptep(pve_p
);
7396 #ifdef PVH_FLAG_IOMMU
7397 if ((vm_offset_t
)pte_p
& PVH_FLAG_IOMMU
) {
7399 if (__improbable(pvh_flags
& PVH_FLAG_LOCKDOWN
)) {
7400 panic("pmap_page_protect: ppnum 0x%x locked down, cannot be owned by iommu 0x%llx, pve_p=%p",
7401 ppnum
, (uint64_t)pte_p
& ~PVH_FLAG_IOMMU
, pve_p
);
7405 if (options
& PMAP_OPTIONS_COMPRESSOR
) {
7406 panic("pmap_page_protect: attempt to compress ppnum 0x%x owned by iommu 0x%llx, pve_p=%p",
7407 ppnum
, (uint64_t)pte_p
& ~PVH_FLAG_IOMMU
, pve_p
);
7409 if (pve_p
!= PV_ENTRY_NULL
) {
7410 pv_entry_t
*temp_pve_p
= PVE_NEXT_PTR(pve_next(pve_p
));
7411 pvh_remove(pv_h
, pve_pp
, pve_p
);
7412 pveh_p
= pvh_list(pv_h
);
7413 pve_next(pve_p
) = new_pve_p
;
7422 goto protect_skip_pve
;
7425 pmap
= ptep_get_pmap(pte_p
);
7426 va
= ptep_get_va(pte_p
);
7428 if (pte_p
== PT_ENTRY_NULL
) {
7429 panic("pmap_page_protect: pmap=%p prot=%d options=%u, pv_h=%p, pveh_p=%p, pve_p=%p, va=0x%llx ppnum: 0x%x\n",
7430 pmap
, prot
, options
, pv_h
, pveh_p
, pve_p
, (uint64_t)va
, ppnum
);
7431 } else if ((pmap
== NULL
) || (atop(pte_to_pa(*pte_p
)) != ppnum
)) {
7433 if (kern_feature_override(KF_PMAPV_OVRD
) == FALSE
) {
7434 pv_entry_t
*check_pve_p
= pveh_p
;
7435 while (check_pve_p
!= PV_ENTRY_NULL
) {
7436 if ((check_pve_p
!= pve_p
) && (pve_get_ptep(check_pve_p
) == pte_p
)) {
7437 panic("pmap_page_protect: duplicate pve entry pte_p=%p pmap=%p prot=%d options=%u, pv_h=%p, pveh_p=%p, pve_p=%p, pte=0x%llx, va=0x%llx ppnum: 0x%x\n",
7438 pte_p
, pmap
, prot
, options
, pv_h
, pveh_p
, pve_p
, (uint64_t)*pte_p
, (uint64_t)va
, ppnum
);
7440 check_pve_p
= PVE_NEXT_PTR(pve_next(check_pve_p
));
7444 panic("pmap_page_protect: bad pve entry pte_p=%p pmap=%p prot=%d options=%u, pv_h=%p, pveh_p=%p, pve_p=%p, pte=0x%llx, va=0x%llx ppnum: 0x%x\n",
7445 pte_p
, pmap
, prot
, options
, pv_h
, pveh_p
, pve_p
, (uint64_t)*pte_p
, (uint64_t)va
, ppnum
);
7448 #if DEVELOPMENT || DEBUG
7449 if ((prot
& VM_PROT_EXECUTE
) || !nx_enabled
|| !pmap
->nx_enabled
)
7451 if ((prot
& VM_PROT_EXECUTE
))
7459 /* Remove the mapping if new protection is NONE */
7461 boolean_t is_altacct
= FALSE
;
7462 const pt_attr_t
* const pt_attr
= pmap_get_pt_attr(pmap
);
7463 pt_entry_t spte
= *pte_p
;
7465 if (IS_ALTACCT_PAGE(pai
, pve_p
)) {
7471 if (pte_is_wired(spte
)) {
7472 pte_set_wired(pmap
, pte_p
, 0);
7474 if (pmap
!= kernel_pmap
) {
7475 pmap_ledger_debit(pmap
, task_ledgers
.wired_mem
, pt_attr_page_size(pt_attr
) * PAGE_RATIO
);
7476 OSAddAtomic(-1, (SInt32
*) &pmap
->stats
.wired_count
);
7480 if (spte
!= ARM_PTE_TYPE_FAULT
&&
7481 pmap
!= kernel_pmap
&&
7482 (options
& PMAP_OPTIONS_COMPRESSOR
) &&
7483 IS_INTERNAL_PAGE(pai
)) {
7484 assert(!ARM_PTE_IS_COMPRESSED(*pte_p
, pte_p
));
7485 /* mark this PTE as having been "compressed" */
7486 tmplate
= ARM_PTE_COMPRESSED
;
7488 tmplate
|= ARM_PTE_COMPRESSED_ALT
;
7492 tmplate
= ARM_PTE_TYPE_FAULT
;
7496 * The entry must be written before the refcnt is decremented to
7497 * prevent use-after-free races with code paths that deallocate page
7498 * tables based on a zero refcnt.
7500 if (spte
!= tmplate
) {
7501 WRITE_PTE_STRONG(pte_p
, tmplate
);
7505 if ((spte
!= ARM_PTE_TYPE_FAULT
) &&
7506 (tmplate
== ARM_PTE_TYPE_FAULT
) &&
7507 (pmap
!= kernel_pmap
)) {
7508 if (OSAddAtomic16(-1, (SInt16
*) &(ptep_get_info(pte_p
)->refcnt
)) <= 0) {
7509 panic("pmap_page_protect_options(): over-release of ptdp %p for pte %p\n", ptep_get_ptd(pte_p
), pte_p
);
7514 pmap_ledger_debit(pmap
, task_ledgers
.phys_mem
, pt_attr_page_size(pt_attr
) * PAGE_RATIO
);
7515 OSAddAtomic(-1, (SInt32
*) &pmap
->stats
.resident_count
);
7519 * We only ever compress internal pages.
7521 if (options
& PMAP_OPTIONS_COMPRESSOR
) {
7522 assert(IS_INTERNAL_PAGE(pai
));
7526 if (pmap
!= kernel_pmap
) {
7527 if (IS_REUSABLE_PAGE(pai
) &&
7528 IS_INTERNAL_PAGE(pai
) &&
7530 __assert_only
int32_t orig_reusable
= OSAddAtomic(-1, &pmap
->stats
.reusable
);
7531 PMAP_STATS_ASSERTF(orig_reusable
> 0, pmap
, "stats.reusable %d", orig_reusable
);
7532 } else if (IS_INTERNAL_PAGE(pai
)) {
7533 __assert_only
int32_t orig_internal
= OSAddAtomic(-1, &pmap
->stats
.internal
);
7534 PMAP_STATS_ASSERTF(orig_internal
> 0, pmap
, "stats.internal %d", orig_internal
);
7536 __assert_only
int32_t orig_external
= OSAddAtomic(-1, &pmap
->stats
.external
);
7537 PMAP_STATS_ASSERTF(orig_external
> 0, pmap
, "stats.external %d", orig_external
);
7539 if ((options
& PMAP_OPTIONS_COMPRESSOR
) &&
7540 IS_INTERNAL_PAGE(pai
)) {
7541 /* adjust "compressed" stats */
7542 OSAddAtomic64(+1, &pmap
->stats
.compressed
);
7543 PMAP_STATS_PEAK(pmap
->stats
.compressed
);
7544 pmap
->stats
.compressed_lifetime
++;
7547 if (IS_ALTACCT_PAGE(pai
, pve_p
)) {
7548 assert(IS_INTERNAL_PAGE(pai
));
7549 pmap_ledger_debit(pmap
, task_ledgers
.internal
, pt_attr_page_size(pt_attr
) * PAGE_RATIO
);
7550 pmap_ledger_debit(pmap
, task_ledgers
.alternate_accounting
, pt_attr_page_size(pt_attr
) * PAGE_RATIO
);
7551 if (options
& PMAP_OPTIONS_COMPRESSOR
) {
7552 pmap_ledger_credit(pmap
, task_ledgers
.internal_compressed
, pt_attr_page_size(pt_attr
) * PAGE_RATIO
);
7553 pmap_ledger_credit(pmap
, task_ledgers
.alternate_accounting_compressed
, pt_attr_page_size(pt_attr
) * PAGE_RATIO
);
7557 * Cleanup our marker before
7558 * we free this pv_entry.
7560 CLR_ALTACCT_PAGE(pai
, pve_p
);
7561 } else if (IS_REUSABLE_PAGE(pai
)) {
7562 assert(IS_INTERNAL_PAGE(pai
));
7563 if (options
& PMAP_OPTIONS_COMPRESSOR
) {
7564 pmap_ledger_credit(pmap
, task_ledgers
.internal_compressed
, pt_attr_page_size(pt_attr
) * PAGE_RATIO
);
7565 /* was not in footprint, but is now */
7566 pmap_ledger_credit(pmap
, task_ledgers
.phys_footprint
, pt_attr_page_size(pt_attr
) * PAGE_RATIO
);
7568 } else if (IS_INTERNAL_PAGE(pai
)) {
7569 pmap_ledger_debit(pmap
, task_ledgers
.internal
, pt_attr_page_size(pt_attr
) * PAGE_RATIO
);
7572 * Update all stats related to physical footprint, which only
7573 * deals with internal pages.
7575 if (options
& PMAP_OPTIONS_COMPRESSOR
) {
7577 * This removal is only being done so we can send this page to
7578 * the compressor; therefore it mustn't affect total task footprint.
7580 pmap_ledger_credit(pmap
, task_ledgers
.internal_compressed
, pt_attr_page_size(pt_attr
) * PAGE_RATIO
);
7583 * This internal page isn't going to the compressor, so adjust stats to keep
7584 * phys_footprint up to date.
7586 pmap_ledger_debit(pmap
, task_ledgers
.phys_footprint
, pt_attr_page_size(pt_attr
) * PAGE_RATIO
);
7589 /* external page: no impact on ledgers */
7593 if (pve_p
!= PV_ENTRY_NULL
) {
7594 assert(pve_next(pve_p
) == PVE_NEXT_PTR(pve_next(pve_p
)));
7598 const pt_attr_t
*const pt_attr
= pmap_get_pt_attr(pmap
);
7602 if (pmap
== kernel_pmap
) {
7603 tmplate
= ((spte
& ~ARM_PTE_APMASK
) | ARM_PTE_AP(AP_RONA
));
7605 tmplate
= ((spte
& ~ARM_PTE_APMASK
) | pt_attr_leaf_ro(pt_attr
));
7608 pte_set_was_writeable(tmplate
, false);
7610 * While the naive implementation of this would serve to add execute
7611 * permission, this is not how the VM uses this interface, or how
7612 * x86_64 implements it. So ignore requests to add execute permissions.
7615 tmplate
|= pt_attr_leaf_xn(pt_attr
);
7619 if (*pte_p
!= ARM_PTE_TYPE_FAULT
&&
7620 !ARM_PTE_IS_COMPRESSED(*pte_p
, pte_p
) &&
7621 *pte_p
!= tmplate
) {
7622 if (options
& PMAP_OPTIONS_NOFLUSH
) {
7623 WRITE_PTE_FAST(pte_p
, tmplate
);
7625 WRITE_PTE_STRONG(pte_p
, tmplate
);
7631 /* Invalidate TLBs for all CPUs using it */
7632 if (update
&& !(options
& PMAP_OPTIONS_NOFLUSH
)) {
7633 if (remove
|| !flush_range
||
7634 ((flush_range
->ptfr_pmap
!= pmap
) || va
>= flush_range
->ptfr_end
|| va
< flush_range
->ptfr_start
)) {
7635 pmap_get_pt_ops(pmap
)->flush_tlb_region_async(va
,
7636 pt_attr_page_size(pmap_get_pt_attr(pmap
)) * PAGE_RATIO
, pmap
);
7638 tlb_flush_needed
= TRUE
;
7641 #ifdef PVH_FLAG_IOMMU
7644 pte_p
= PT_ENTRY_NULL
;
7646 if (pve_p
!= PV_ENTRY_NULL
) {
7648 assert(pve_next(pve_p
) == PVE_NEXT_PTR(pve_next(pve_p
)));
7650 pve_pp
= pve_link_field(pve_p
);
7651 pve_p
= PVE_NEXT_PTR(pve_next(pve_p
));
7655 #ifdef PVH_FLAG_EXEC
7656 if (remove
&& (pvh_get_flags(pv_h
) & PVH_FLAG_EXEC
)) {
7657 pmap_set_ptov_ap(pai
, AP_RWNA
, tlb_flush_needed
);
7660 /* if we removed a bunch of entries, take care of them now */
7662 if (new_pve_p
!= PV_ENTRY_NULL
) {
7663 pvh_update_head(pv_h
, new_pve_p
, PVH_TYPE_PVEP
);
7664 pvh_set_flags(pv_h
, pvh_flags
);
7665 } else if (new_pte_p
!= PT_ENTRY_NULL
) {
7666 pvh_update_head(pv_h
, new_pte_p
, PVH_TYPE_PTEP
);
7667 pvh_set_flags(pv_h
, pvh_flags
);
7669 pvh_update_head(pv_h
, PV_ENTRY_NULL
, PVH_TYPE_NULL
);
7673 if (flush_range
&& tlb_flush_needed
) {
7675 flush_range
->ptfr_flush_needed
= true;
7676 tlb_flush_needed
= FALSE
;
7681 * If we removed PV entries, ensure prior TLB flushes are complete before we drop the PVH
7682 * lock to allow the backing pages to be repurposed. This is a security precaution, aimed
7683 * primarily at XNU_MONITOR configurations, to reduce the likelihood of an attacker causing
7684 * a page to be repurposed while it is still live in the TLBs.
7686 if (remove
&& tlb_flush_needed
) {
7692 if (!remove
&& tlb_flush_needed
) {
7696 if (remove
&& (pvet_p
!= PV_ENTRY_NULL
)) {
7697 pv_list_free(pveh_p
, pvet_p
, pvh_cnt
, pv_kern_low_water_mark
);
7701 MARK_AS_PMAP_TEXT
static void
7702 pmap_page_protect_options_internal(
7705 unsigned int options
,
7710 * If the argument is non-NULL, the VM layer is conveying its intention that the TLBs should
7711 * ultimately be flushed. The nature of ARM TLB maintenance is such that we can flush the
7712 * TLBs much more precisely if we do so inline with the pagetable updates, and PPL security
7713 * model requires that we not exit the PPL without performing required TLB flushes anyway.
7714 * In that case, force the flush to take place.
7716 options
&= ~PMAP_OPTIONS_NOFLUSH
;
7718 pmap_page_protect_options_with_flush_range(ppnum
, prot
, options
, NULL
);
7722 pmap_page_protect_options(
7725 unsigned int options
,
7728 pmap_paddr_t phys
= ptoa(ppnum
);
7730 assert(ppnum
!= vm_page_fictitious_addr
);
7732 /* Only work with managed pages. */
7733 if (!pa_valid(phys
)) {
7738 * Determine the new protection.
7740 if (prot
== VM_PROT_ALL
) {
7741 return; /* nothing to do */
7744 PMAP_TRACE(2, PMAP_CODE(PMAP__PAGE_PROTECT
) | DBG_FUNC_START
, ppnum
, prot
);
7747 pmap_page_protect_options_ppl(ppnum
, prot
, options
, arg
);
7749 pmap_page_protect_options_internal(ppnum
, prot
, options
, arg
);
7752 PMAP_TRACE(2, PMAP_CODE(PMAP__PAGE_PROTECT
) | DBG_FUNC_END
);
7756 #if __has_feature(ptrauth_calls) && defined(XNU_TARGET_OS_OSX)
7757 MARK_AS_PMAP_TEXT
void
7758 pmap_disable_user_jop_internal(pmap_t pmap
)
7760 if (pmap
== kernel_pmap
) {
7761 panic("%s: called with kernel_pmap\n", __func__
);
7763 VALIDATE_PMAP(pmap
);
7764 pmap
->disable_jop
= true;
7768 pmap_disable_user_jop(pmap_t pmap
)
7771 pmap_disable_user_jop_ppl(pmap
);
7773 pmap_disable_user_jop_internal(pmap
);
7776 #endif /* __has_feature(ptrauth_calls) && defined(XNU_TARGET_OS_OSX) */
7779 * Indicates if the pmap layer enforces some additional restrictions on the
7780 * given set of protections.
7783 pmap_has_prot_policy(__unused pmap_t pmap
, __unused
bool translated_allow_execute
, __unused vm_prot_t prot
)
7789 * Set the physical protection on the
7790 * specified range of this map as requested.
7791 * VERY IMPORTANT: Will not increase permissions.
7792 * VERY IMPORTANT: Only pmap_enter() is allowed to grant permissions.
7801 pmap_protect_options(pmap
, b
, e
, prot
, 0, NULL
);
7804 MARK_AS_PMAP_TEXT
static vm_map_address_t
7805 pmap_protect_options_internal(
7807 vm_map_address_t start
,
7808 vm_map_address_t end
,
7810 unsigned int options
,
7811 __unused
void *args
)
7813 const pt_attr_t
*const pt_attr
= pmap_get_pt_attr(pmap
);
7815 pt_entry_t
*bpte_p
, *epte_p
;
7817 boolean_t set_NX
= TRUE
;
7818 #if (__ARM_VMSA__ > 7)
7819 boolean_t set_XO
= FALSE
;
7821 boolean_t should_have_removed
= FALSE
;
7822 bool need_strong_sync
= false;
7824 if (__improbable((end
< start
) || (end
> ((start
+ pt_attr_twig_size(pt_attr
)) & ~pt_attr_twig_offmask(pt_attr
))))) {
7825 panic("%s: invalid address range %p, %p", __func__
, (void*)start
, (void*)end
);
7828 #if DEVELOPMENT || DEBUG
7829 if (options
& PMAP_OPTIONS_PROTECT_IMMEDIATE
) {
7830 if ((prot
& VM_PROT_ALL
) == VM_PROT_NONE
) {
7831 should_have_removed
= TRUE
;
7836 /* Determine the new protection. */
7838 #if (__ARM_VMSA__ > 7)
7839 case VM_PROT_EXECUTE
:
7844 case VM_PROT_READ
| VM_PROT_EXECUTE
:
7846 case VM_PROT_READ
| VM_PROT_WRITE
:
7848 return end
; /* nothing to do */
7850 should_have_removed
= TRUE
;
7854 if (should_have_removed
) {
7855 panic("%s: should have been a remove operation, "
7856 "pmap=%p, start=%p, end=%p, prot=%#x, options=%#x, args=%p",
7858 pmap
, (void *)start
, (void *)end
, prot
, options
, args
);
7861 #if DEVELOPMENT || DEBUG
7862 if ((prot
& VM_PROT_EXECUTE
) || !nx_enabled
|| !pmap
->nx_enabled
)
7864 if ((prot
& VM_PROT_EXECUTE
))
7872 const uint64_t pmap_page_size
= PAGE_RATIO
* pt_attr_page_size(pt_attr
);
7873 vm_map_address_t va
= start
;
7874 unsigned int npages
= 0;
7876 VALIDATE_PMAP(pmap
);
7879 tte_p
= pmap_tte(pmap
, start
);
7881 if ((tte_p
!= (tt_entry_t
*) NULL
) && (*tte_p
& ARM_TTE_TYPE_MASK
) == ARM_TTE_TYPE_TABLE
) {
7882 bpte_p
= (pt_entry_t
*) ttetokv(*tte_p
);
7883 bpte_p
= &bpte_p
[pte_index(pmap
, pt_attr
, start
)];
7884 epte_p
= bpte_p
+ ((end
- start
) >> pt_attr_leaf_shift(pt_attr
));
7887 for (pte_p
= bpte_p
;
7889 pte_p
+= PAGE_RATIO
, va
+= pmap_page_size
) {
7891 if (__improbable(!(npages
% PMAP_DEFAULT_PREEMPTION_CHECK_PAGE_INTERVAL
) &&
7892 pmap_pending_preemption())) {
7896 #if DEVELOPMENT || DEBUG
7897 boolean_t force_write
= FALSE
;
7900 spte
= *((volatile pt_entry_t
*)pte_p
);
7902 if ((spte
== ARM_PTE_TYPE_FAULT
) ||
7903 ARM_PTE_IS_COMPRESSED(spte
, pte_p
)) {
7909 boolean_t managed
= FALSE
;
7913 * It may be possible for the pte to transition from managed
7914 * to unmanaged in this timeframe; for now, elide the assert.
7915 * We should break out as a consequence of checking pa_valid.
7917 // assert(!ARM_PTE_IS_COMPRESSED(spte));
7918 pa
= pte_to_pa(spte
);
7919 if (!pa_valid(pa
)) {
7922 pai
= (int)pa_index(pa
);
7924 spte
= *((volatile pt_entry_t
*)pte_p
);
7925 pa
= pte_to_pa(spte
);
7926 if (pai
== (int)pa_index(pa
)) {
7928 break; // Leave the PVH locked as we will unlock it after we free the PTE
7933 if ((spte
== ARM_PTE_TYPE_FAULT
) ||
7934 ARM_PTE_IS_COMPRESSED(spte
, pte_p
)) {
7940 if (pmap
== kernel_pmap
) {
7941 #if DEVELOPMENT || DEBUG
7942 if ((options
& PMAP_OPTIONS_PROTECT_IMMEDIATE
) && (prot
& VM_PROT_WRITE
)) {
7944 tmplate
= ((spte
& ~ARM_PTE_APMASK
) | ARM_PTE_AP(AP_RWNA
));
7948 tmplate
= ((spte
& ~ARM_PTE_APMASK
) | ARM_PTE_AP(AP_RONA
));
7951 #if DEVELOPMENT || DEBUG
7952 if ((options
& PMAP_OPTIONS_PROTECT_IMMEDIATE
) && (prot
& VM_PROT_WRITE
)) {
7954 tmplate
= ((spte
& ~ARM_PTE_APMASK
) | pt_attr_leaf_rw(pt_attr
));
7958 tmplate
= ((spte
& ~ARM_PTE_APMASK
) | pt_attr_leaf_ro(pt_attr
));
7963 * XXX Removing "NX" would
7964 * grant "execute" access
7965 * immediately, bypassing any
7966 * checks VM might want to do
7967 * in its soft fault path.
7968 * pmap_protect() and co. are
7969 * not allowed to increase
7970 * access permissions.
7973 tmplate
|= pt_attr_leaf_xn(pt_attr
);
7975 #if (__ARM_VMSA__ > 7)
7976 if (pmap
== kernel_pmap
) {
7977 /* do NOT clear "PNX"! */
7978 tmplate
|= ARM_PTE_NX
;
7980 /* do NOT clear "NX"! */
7981 tmplate
|= pt_attr_leaf_x(pt_attr
);
7983 tmplate
&= ~ARM_PTE_APMASK
;
7984 tmplate
|= pt_attr_leaf_rona(pt_attr
);
7990 #if DEVELOPMENT || DEBUG
7993 * TODO: Run CS/Monitor checks here.
7997 * We are marking the page as writable,
7998 * so we consider it to be modified and
8001 pa_set_bits(pa
, PP_ATTR_REFERENCED
| PP_ATTR_MODIFIED
);
8002 tmplate
|= ARM_PTE_AF
;
8004 if (IS_REFFAULT_PAGE(pai
)) {
8005 CLR_REFFAULT_PAGE(pai
);
8008 if (IS_MODFAULT_PAGE(pai
)) {
8009 CLR_MODFAULT_PAGE(pai
);
8012 } else if (options
& PMAP_OPTIONS_PROTECT_IMMEDIATE
) {
8014 * An immediate request for anything other than
8015 * write should still mark the page as
8016 * referenced if managed.
8019 pa_set_bits(pa
, PP_ATTR_REFERENCED
);
8020 tmplate
|= ARM_PTE_AF
;
8022 if (IS_REFFAULT_PAGE(pai
)) {
8023 CLR_REFFAULT_PAGE(pai
);
8029 /* We do not expect to write fast fault the entry. */
8030 pte_set_was_writeable(tmplate
, false);
8032 WRITE_PTE_FAST(pte_p
, tmplate
);
8035 ASSERT_PVH_LOCKED(pai
);
8039 FLUSH_PTE_RANGE_STRONG(bpte_p
, pte_p
);
8040 PMAP_UPDATE_TLBS(pmap
, start
, va
, need_strong_sync
);
8050 pmap_protect_options(
8055 unsigned int options
,
8056 __unused
void *args
)
8058 vm_map_address_t l
, beg
;
8060 __unused
const pt_attr_t
* const pt_attr
= pmap_get_pt_attr(pmap
);
8062 if ((b
| e
) & pt_attr_leaf_offmask(pt_attr
)) {
8063 panic("pmap_protect_options() pmap %p start 0x%llx end 0x%llx\n",
8064 pmap
, (uint64_t)b
, (uint64_t)e
);
8067 assert(get_preemption_level() == 0);
8069 #if DEVELOPMENT || DEBUG
8070 if (options
& PMAP_OPTIONS_PROTECT_IMMEDIATE
) {
8071 if ((prot
& VM_PROT_ALL
) == VM_PROT_NONE
) {
8072 pmap_remove_options(pmap
, b
, e
, options
);
8078 /* Determine the new protection. */
8080 case VM_PROT_EXECUTE
:
8082 case VM_PROT_READ
| VM_PROT_EXECUTE
:
8084 case VM_PROT_READ
| VM_PROT_WRITE
:
8086 return; /* nothing to do */
8088 pmap_remove_options(pmap
, b
, e
, options
);
8093 PMAP_TRACE(2, PMAP_CODE(PMAP__PROTECT
) | DBG_FUNC_START
,
8094 VM_KERNEL_ADDRHIDE(pmap
), VM_KERNEL_ADDRHIDE(b
),
8095 VM_KERNEL_ADDRHIDE(e
));
8100 l
= ((beg
+ pt_attr_twig_size(pt_attr
)) & ~pt_attr_twig_offmask(pt_attr
));
8107 beg
= pmap_protect_options_ppl(pmap
, beg
, l
, prot
, options
, args
);
8109 beg
= pmap_protect_options_internal(pmap
, beg
, l
, prot
, options
, args
);
8113 PMAP_TRACE(2, PMAP_CODE(PMAP__PROTECT
) | DBG_FUNC_END
);
8116 /* Map a (possibly) autogenned block */
8125 __unused
unsigned int flags
)
8128 addr64_t original_va
= va
;
8131 for (page
= 0; page
< size
; page
++) {
8132 kr
= pmap_enter(pmap
, va
, pa
, prot
, VM_PROT_NONE
, attr
, TRUE
);
8134 if (kr
!= KERN_SUCCESS
) {
8136 * This will panic for now, as it is unclear that
8137 * removing the mappings is correct.
8139 panic("%s: failed pmap_enter, "
8140 "pmap=%p, va=%#llx, pa=%u, size=%u, prot=%#x, flags=%#x",
8142 pmap
, va
, pa
, size
, prot
, flags
);
8144 pmap_remove(pmap
, original_va
, va
- original_va
);
8152 return KERN_SUCCESS
;
8161 vm_prot_t fault_type
,
8165 return pmap_enter_options_addr(pmap
, v
, pa
, prot
, fault_type
, flags
, wired
, 0, NULL
);
8169 * Insert the given physical page (p) at
8170 * the specified virtual address (v) in the
8171 * target physical map with the protection requested.
8173 * If specified, the page will be wired down, meaning
8174 * that the related pte can not be reclaimed.
8176 * NB: This is the only routine which MAY NOT lazy-evaluate
8177 * or lose information. That is, this routine must actually
8178 * insert this page into the given map eventually (must make
8179 * forward progress eventually.
8187 vm_prot_t fault_type
,
8191 return pmap_enter_addr(pmap
, v
, ((pmap_paddr_t
)pn
) << PAGE_SHIFT
, prot
, fault_type
, flags
, wired
);
8195 pmap_enter_pte(pmap_t pmap
, pt_entry_t
*pte_p
, pt_entry_t pte
, vm_map_address_t v
)
8197 const pt_attr_t
* const pt_attr
= pmap_get_pt_attr(pmap
);
8199 if (pmap
!= kernel_pmap
&& ((pte
& ARM_PTE_WIRED
) != (*pte_p
& ARM_PTE_WIRED
))) {
8200 SInt16
*ptd_wiredcnt_ptr
= (SInt16
*)&(ptep_get_info(pte_p
)->wiredcnt
);
8201 if (pte
& ARM_PTE_WIRED
) {
8202 OSAddAtomic16(1, ptd_wiredcnt_ptr
);
8203 pmap_ledger_credit(pmap
, task_ledgers
.wired_mem
, pt_attr_page_size(pt_attr
) * PAGE_RATIO
);
8204 OSAddAtomic(1, (SInt32
*) &pmap
->stats
.wired_count
);
8206 OSAddAtomic16(-1, ptd_wiredcnt_ptr
);
8207 pmap_ledger_debit(pmap
, task_ledgers
.wired_mem
, pt_attr_page_size(pt_attr
) * PAGE_RATIO
);
8208 OSAddAtomic(-1, (SInt32
*) &pmap
->stats
.wired_count
);
8211 if (*pte_p
!= ARM_PTE_TYPE_FAULT
&&
8212 !ARM_PTE_IS_COMPRESSED(*pte_p
, pte_p
)) {
8213 WRITE_PTE_STRONG(pte_p
, pte
);
8214 PMAP_UPDATE_TLBS(pmap
, v
, v
+ (pt_attr_page_size(pt_attr
) * PAGE_RATIO
), false);
8216 WRITE_PTE(pte_p
, pte
);
8217 __builtin_arm_isb(ISB_SY
);
8220 PMAP_TRACE(4 + pt_attr_leaf_level(pt_attr
), PMAP_CODE(PMAP__TTE
), VM_KERNEL_ADDRHIDE(pmap
),
8221 VM_KERNEL_ADDRHIDE(v
), VM_KERNEL_ADDRHIDE(v
+ (pt_attr_page_size(pt_attr
) * PAGE_RATIO
)), pte
);
8224 MARK_AS_PMAP_TEXT
static pt_entry_t
8225 wimg_to_pte(unsigned int wimg
)
8229 switch (wimg
& (VM_WIMG_MASK
)) {
8232 pte
= ARM_PTE_ATTRINDX(CACHE_ATTRINDX_DISABLE
);
8233 pte
|= ARM_PTE_NX
| ARM_PTE_PNX
;
8235 case VM_WIMG_POSTED
:
8236 pte
= ARM_PTE_ATTRINDX(CACHE_ATTRINDX_POSTED
);
8237 pte
|= ARM_PTE_NX
| ARM_PTE_PNX
;
8239 case VM_WIMG_POSTED_REORDERED
:
8240 pte
= ARM_PTE_ATTRINDX(CACHE_ATTRINDX_POSTED_REORDERED
);
8241 pte
|= ARM_PTE_NX
| ARM_PTE_PNX
;
8243 case VM_WIMG_POSTED_COMBINED_REORDERED
:
8244 pte
= ARM_PTE_ATTRINDX(CACHE_ATTRINDX_POSTED_COMBINED_REORDERED
);
8245 pte
|= ARM_PTE_NX
| ARM_PTE_PNX
;
8248 pte
= ARM_PTE_ATTRINDX(CACHE_ATTRINDX_WRITECOMB
);
8249 pte
|= ARM_PTE_NX
| ARM_PTE_PNX
;
8252 pte
= ARM_PTE_ATTRINDX(CACHE_ATTRINDX_WRITETHRU
);
8253 #if (__ARM_VMSA__ > 7)
8254 pte
|= ARM_PTE_SH(SH_OUTER_MEMORY
);
8259 case VM_WIMG_COPYBACK
:
8260 pte
= ARM_PTE_ATTRINDX(CACHE_ATTRINDX_WRITEBACK
);
8261 #if (__ARM_VMSA__ > 7)
8262 pte
|= ARM_PTE_SH(SH_OUTER_MEMORY
);
8267 case VM_WIMG_INNERWBACK
:
8268 pte
= ARM_PTE_ATTRINDX(CACHE_ATTRINDX_INNERWRITEBACK
);
8269 #if (__ARM_VMSA__ > 7)
8270 pte
|= ARM_PTE_SH(SH_INNER_MEMORY
);
8276 pte
= ARM_PTE_ATTRINDX(CACHE_ATTRINDX_DEFAULT
);
8277 #if (__ARM_VMSA__ > 7)
8278 pte
|= ARM_PTE_SH(SH_OUTER_MEMORY
);
8287 static pv_alloc_return_t
8292 unsigned int options
,
8294 boolean_t
*is_altacct
)
8297 pv_h
= pai_to_pvh(pai
);
8298 boolean_t first_cpu_mapping
;
8300 ASSERT_NOT_HIBERNATING();
8301 ASSERT_PVH_LOCKED(pai
);
8303 vm_offset_t pvh_flags
= pvh_get_flags(pv_h
);
8306 if (__improbable(pvh_flags
& PVH_FLAG_LOCKDOWN
)) {
8307 panic("%d is locked down (%#lx), cannot enter", pai
, pvh_flags
);
8312 /* An IOMMU mapping may already be present for a page that hasn't yet
8313 * had a CPU mapping established, so we use PVH_FLAG_CPU to determine
8314 * if this is the first CPU mapping. We base internal/reusable
8315 * accounting on the options specified for the first CPU mapping.
8316 * PVH_FLAG_CPU, and thus this accounting, will then persist as long
8317 * as there are *any* mappings of the page. The accounting for a
8318 * page should not need to change until the page is recycled by the
8319 * VM layer, and we assert that there are no mappings when a page
8320 * is recycled. An IOMMU mapping of a freed/recycled page is
8321 * considered a security violation & potential DMA corruption path.*/
8322 first_cpu_mapping
= ((pmap
!= NULL
) && !(pvh_flags
& PVH_FLAG_CPU
));
8323 if (first_cpu_mapping
) {
8324 pvh_flags
|= PVH_FLAG_CPU
;
8327 first_cpu_mapping
= pvh_test_type(pv_h
, PVH_TYPE_NULL
);
8330 if (first_cpu_mapping
) {
8331 if (options
& PMAP_OPTIONS_INTERNAL
) {
8332 SET_INTERNAL_PAGE(pai
);
8334 CLR_INTERNAL_PAGE(pai
);
8336 if ((options
& PMAP_OPTIONS_INTERNAL
) &&
8337 (options
& PMAP_OPTIONS_REUSABLE
)) {
8338 SET_REUSABLE_PAGE(pai
);
8340 CLR_REUSABLE_PAGE(pai
);
8343 if (pvh_test_type(pv_h
, PVH_TYPE_NULL
)) {
8344 pvh_update_head(pv_h
, pte_p
, PVH_TYPE_PTEP
);
8345 if (pmap
!= NULL
&& pmap
!= kernel_pmap
&&
8346 ((options
& PMAP_OPTIONS_ALT_ACCT
) ||
8347 PMAP_FOOTPRINT_SUSPENDED(pmap
)) &&
8348 IS_INTERNAL_PAGE(pai
)) {
8350 * Make a note to ourselves that this mapping is using alternative
8351 * accounting. We'll need this in order to know which ledger to
8352 * debit when the mapping is removed.
8354 * The altacct bit must be set while the pv head is locked. Defer
8355 * the ledger accounting until after we've dropped the lock.
8357 SET_ALTACCT_PAGE(pai
, PV_ENTRY_NULL
);
8360 CLR_ALTACCT_PAGE(pai
, PV_ENTRY_NULL
);
8363 pv_alloc_return_t ret
;
8364 if (pvh_test_type(pv_h
, PVH_TYPE_PTEP
)) {
8368 * convert pvh list from PVH_TYPE_PTEP to PVH_TYPE_PVEP
8370 pte1_p
= pvh_ptep(pv_h
);
8371 pvh_set_flags(pv_h
, pvh_flags
);
8372 if ((*pve_p
== PV_ENTRY_NULL
) && ((ret
= pv_alloc(pmap
, pai
, pve_p
)) != PV_ALLOC_SUCCESS
)) {
8376 pve_set_ptep(*pve_p
, pte1_p
);
8377 (*pve_p
)->pve_next
= PV_ENTRY_NULL
;
8379 if (IS_ALTACCT_PAGE(pai
, PV_ENTRY_NULL
)) {
8381 * transfer "altacct" from
8382 * pp_attr to this pve
8384 CLR_ALTACCT_PAGE(pai
, PV_ENTRY_NULL
);
8385 SET_ALTACCT_PAGE(pai
, *pve_p
);
8387 pvh_update_head(pv_h
, *pve_p
, PVH_TYPE_PVEP
);
8388 *pve_p
= PV_ENTRY_NULL
;
8389 } else if (!pvh_test_type(pv_h
, PVH_TYPE_PVEP
)) {
8390 panic("%s: unexpected PV head %p, pte_p=%p pmap=%p pv_h=%p",
8391 __func__
, *pv_h
, pte_p
, pmap
, pv_h
);
8394 * Set up pv_entry for this new mapping and then
8395 * add it to the list for this physical page.
8397 pvh_set_flags(pv_h
, pvh_flags
);
8398 if ((*pve_p
== PV_ENTRY_NULL
) && ((ret
= pv_alloc(pmap
, pai
, pve_p
)) != PV_ALLOC_SUCCESS
)) {
8402 pve_set_ptep(*pve_p
, pte_p
);
8403 (*pve_p
)->pve_next
= PV_ENTRY_NULL
;
8405 pvh_add(pv_h
, *pve_p
);
8407 if (pmap
!= NULL
&& pmap
!= kernel_pmap
&&
8408 ((options
& PMAP_OPTIONS_ALT_ACCT
) ||
8409 PMAP_FOOTPRINT_SUSPENDED(pmap
)) &&
8410 IS_INTERNAL_PAGE(pai
)) {
8412 * Make a note to ourselves that this
8413 * mapping is using alternative
8414 * accounting. We'll need this in order
8415 * to know which ledger to debit when
8416 * the mapping is removed.
8418 * The altacct bit must be set while
8419 * the pv head is locked. Defer the
8420 * ledger accounting until after we've
8423 SET_ALTACCT_PAGE(pai
, *pve_p
);
8427 *pve_p
= PV_ENTRY_NULL
;
8430 pvh_set_flags(pv_h
, pvh_flags
);
8432 return PV_ALLOC_SUCCESS
;
8435 MARK_AS_PMAP_TEXT
static kern_return_t
8436 pmap_enter_options_internal(
8441 vm_prot_t fault_type
,
8444 unsigned int options
)
8446 ppnum_t pn
= (ppnum_t
)atop(pa
);
8452 boolean_t set_XO
= FALSE
;
8453 boolean_t refcnt_updated
;
8454 boolean_t wiredcnt_updated
;
8455 unsigned int wimg_bits
;
8456 boolean_t was_compressed
, was_alt_compressed
;
8457 kern_return_t kr
= KERN_SUCCESS
;
8459 VALIDATE_PMAP(pmap
);
8462 if (__improbable((options
& PMAP_OPTIONS_NOWAIT
) == 0)) {
8463 panic("pmap_enter_options() called without PMAP_OPTIONS_NOWAIT set");
8467 __unused
const pt_attr_t
* const pt_attr
= pmap_get_pt_attr(pmap
);
8469 if ((v
) & pt_attr_leaf_offmask(pt_attr
)) {
8470 panic("pmap_enter_options() pmap %p v 0x%llx\n",
8474 if ((pa
) & pt_attr_leaf_offmask(pt_attr
)) {
8475 panic("pmap_enter_options() pmap %p pa 0x%llx\n",
8476 pmap
, (uint64_t)pa
);
8479 if ((prot
& VM_PROT_EXECUTE
) && (pmap
== kernel_pmap
)) {
8480 #if defined(KERNEL_INTEGRITY_CTRR) && defined(CONFIG_XNUPOST)
8481 extern vm_offset_t ctrr_test_page
;
8482 if (__probable(v
!= ctrr_test_page
))
8484 panic("pmap_enter_options(): attempt to add executable mapping to kernel_pmap");
8487 #if DEVELOPMENT || DEBUG
8488 if ((prot
& VM_PROT_EXECUTE
) || !nx_enabled
|| !pmap
->nx_enabled
)
8490 if ((prot
& VM_PROT_EXECUTE
))
8498 #if (__ARM_VMSA__ > 7)
8499 if (prot
== VM_PROT_EXECUTE
) {
8504 assert(pn
!= vm_page_fictitious_addr
);
8506 refcnt_updated
= FALSE
;
8507 wiredcnt_updated
= FALSE
;
8508 pve_p
= PV_ENTRY_NULL
;
8509 was_compressed
= FALSE
;
8510 was_alt_compressed
= FALSE
;
8515 * Expand pmap to include this pte. Assume that
8516 * pmap is always expanded to include enough hardware
8517 * pages to map one VM page.
8519 while ((pte_p
= pmap_pte(pmap
, v
)) == PT_ENTRY_NULL
) {
8520 /* Must unlock to expand the pmap. */
8523 kr
= pmap_expand(pmap
, v
, options
, pt_attr_leaf_level(pt_attr
));
8525 if (kr
!= KERN_SUCCESS
) {
8532 if (options
& PMAP_OPTIONS_NOENTER
) {
8534 return KERN_SUCCESS
;
8541 if (ARM_PTE_IS_COMPRESSED(spte
, pte_p
) && !refcnt_updated
) {
8543 * "pmap" should be locked at this point, so this should
8544 * not race with another pmap_enter() or pmap_remove_range().
8546 assert(pmap
!= kernel_pmap
);
8548 /* one less "compressed" */
8549 OSAddAtomic64(-1, &pmap
->stats
.compressed
);
8550 pmap_ledger_debit(pmap
, task_ledgers
.internal_compressed
,
8551 pt_attr_page_size(pt_attr
) * PAGE_RATIO
);
8553 was_compressed
= TRUE
;
8554 if (spte
& ARM_PTE_COMPRESSED_ALT
) {
8555 was_alt_compressed
= TRUE
;
8556 pmap_ledger_debit(pmap
, task_ledgers
.alternate_accounting_compressed
, pt_attr_page_size(pt_attr
) * PAGE_RATIO
);
8558 /* was part of the footprint */
8559 pmap_ledger_debit(pmap
, task_ledgers
.phys_footprint
, pt_attr_page_size(pt_attr
) * PAGE_RATIO
);
8562 /* clear "compressed" marker */
8563 /* XXX is it necessary since we're about to overwrite it ? */
8564 WRITE_PTE_FAST(pte_p
, ARM_PTE_TYPE_FAULT
);
8565 spte
= ARM_PTE_TYPE_FAULT
;
8568 * We're replacing a "compressed" marker with a valid PTE,
8569 * so no change for "refcnt".
8571 refcnt_updated
= TRUE
;
8574 if ((spte
!= ARM_PTE_TYPE_FAULT
) && (pte_to_pa(spte
) != pa
)) {
8575 pmap_remove_range(pmap
, v
, pte_p
, pte_p
+ PAGE_RATIO
);
8578 pte
= pa_to_pte(pa
) | ARM_PTE_TYPE
;
8581 pte
|= ARM_PTE_WIRED
;
8585 pte
|= pt_attr_leaf_xn(pt_attr
);
8587 #if (__ARM_VMSA__ > 7)
8588 if (pmap
== kernel_pmap
) {
8591 pte
|= pt_attr_leaf_x(pt_attr
);
8596 if (pmap
== kernel_pmap
) {
8597 #if __ARM_KERNEL_PROTECT__
8599 #endif /* __ARM_KERNEL_PROTECT__ */
8600 if (prot
& VM_PROT_WRITE
) {
8601 pte
|= ARM_PTE_AP(AP_RWNA
);
8602 pa_set_bits(pa
, PP_ATTR_MODIFIED
| PP_ATTR_REFERENCED
);
8604 pte
|= ARM_PTE_AP(AP_RONA
);
8605 pa_set_bits(pa
, PP_ATTR_REFERENCED
);
8607 #if (__ARM_VMSA__ == 7)
8608 if ((_COMM_PAGE_BASE_ADDRESS
<= v
) && (v
< _COMM_PAGE_BASE_ADDRESS
+ _COMM_PAGE_AREA_LENGTH
)) {
8609 pte
= (pte
& ~(ARM_PTE_APMASK
)) | ARM_PTE_AP(AP_RORO
);
8613 if (!pmap
->nested
) {
8615 } else if ((pmap
->nested_region_asid_bitmap
)
8616 && (v
>= pmap
->nested_region_addr
)
8617 && (v
< (pmap
->nested_region_addr
+ pmap
->nested_region_size
))) {
8618 unsigned int index
= (unsigned int)((v
- pmap
->nested_region_addr
) >> pt_attr_twig_shift(pt_attr
));
8620 if ((pmap
->nested_region_asid_bitmap
)
8621 && testbit(index
, (int *)pmap
->nested_region_asid_bitmap
)) {
8626 if (pmap
->nested_pmap
!= NULL
) {
8627 vm_map_address_t nest_vaddr
;
8628 pt_entry_t
*nest_pte_p
;
8632 if ((nest_vaddr
>= pmap
->nested_region_addr
)
8633 && (nest_vaddr
< (pmap
->nested_region_addr
+ pmap
->nested_region_size
))
8634 && ((nest_pte_p
= pmap_pte(pmap
->nested_pmap
, nest_vaddr
)) != PT_ENTRY_NULL
)
8635 && (*nest_pte_p
!= ARM_PTE_TYPE_FAULT
)
8636 && (!ARM_PTE_IS_COMPRESSED(*nest_pte_p
, nest_pte_p
))
8637 && (((*nest_pte_p
) & ARM_PTE_NG
) != ARM_PTE_NG
)) {
8638 unsigned int index
= (unsigned int)((v
- pmap
->nested_region_addr
) >> pt_attr_twig_shift(pt_attr
));
8640 if ((pmap
->nested_pmap
->nested_region_asid_bitmap
)
8641 && !testbit(index
, (int *)pmap
->nested_pmap
->nested_region_asid_bitmap
)) {
8642 panic("pmap_enter(): Global attribute conflict nest_pte_p=%p pmap=%p v=0x%llx spte=0x%llx \n",
8643 nest_pte_p
, pmap
, (uint64_t)v
, (uint64_t)*nest_pte_p
);
8648 if (prot
& VM_PROT_WRITE
) {
8649 if (pa_valid(pa
) && (!pa_test_bits(pa
, PP_ATTR_MODIFIED
))) {
8650 assert(!pmap
->nested
); /* no write access in a nested pmap */
8651 if (fault_type
& VM_PROT_WRITE
) {
8653 pte
|= pt_attr_leaf_rwna(pt_attr
);
8655 pte
|= pt_attr_leaf_rw(pt_attr
);
8657 pa_set_bits(pa
, PP_ATTR_REFERENCED
| PP_ATTR_MODIFIED
);
8660 pte
|= pt_attr_leaf_rona(pt_attr
);
8662 pte
|= pt_attr_leaf_ro(pt_attr
);
8665 * Mark the page as MODFAULT so that a subsequent write
8666 * may be handled through arm_fast_fault().
8668 pa_set_bits(pa
, PP_ATTR_REFERENCED
| PP_ATTR_MODFAULT
);
8669 pte_set_was_writeable(pte
, true);
8673 pte
|= pt_attr_leaf_rwna(pt_attr
);
8675 pte
|= pt_attr_leaf_rw(pt_attr
);
8677 pa_set_bits(pa
, PP_ATTR_REFERENCED
);
8681 pte
|= pt_attr_leaf_rona(pt_attr
);
8683 pte
|= pt_attr_leaf_ro(pt_attr
);;
8685 pa_set_bits(pa
, PP_ATTR_REFERENCED
);
8691 volatile uint16_t *refcnt
= NULL
;
8692 volatile uint16_t *wiredcnt
= NULL
;
8693 if (pmap
!= kernel_pmap
) {
8694 ptd_info_t
*ptd_info
= ptep_get_info(pte_p
);
8695 refcnt
= &ptd_info
->refcnt
;
8696 wiredcnt
= &ptd_info
->wiredcnt
;
8697 /* Bump the wired count to keep the PTE page from being reclaimed. We need this because
8698 * we may drop the PVH and pmap locks later in pmap_enter() if we need to allocate
8699 * a new PV entry. */
8700 if (!wiredcnt_updated
) {
8701 OSAddAtomic16(1, (volatile int16_t*)wiredcnt
);
8702 wiredcnt_updated
= TRUE
;
8704 if (!refcnt_updated
) {
8705 OSAddAtomic16(1, (volatile int16_t*)refcnt
);
8706 refcnt_updated
= TRUE
;
8712 boolean_t is_altacct
, is_internal
;
8714 is_internal
= FALSE
;
8717 pai
= (int)pa_index(pa
);
8722 if ((flags
& (VM_WIMG_MASK
| VM_WIMG_USE_DEFAULT
))) {
8723 wimg_bits
= (flags
& (VM_WIMG_MASK
| VM_WIMG_USE_DEFAULT
));
8725 wimg_bits
= pmap_cache_attributes(pn
);
8728 /* We may be retrying this operation after dropping the PVH lock.
8729 * Cache attributes for the physical page may have changed while the lock
8730 * was dropped, so clear any cache attributes we may have previously set
8731 * in the PTE template. */
8732 pte
&= ~(ARM_PTE_ATTRINDXMASK
| ARM_PTE_SHMASK
);
8733 pte
|= pmap_get_pt_ops(pmap
)->wimg_to_pte(wimg_bits
);
8736 /* The regular old kernel is not allowed to remap PPL pages. */
8737 if (__improbable(pa_test_monitor(pa
))) {
8738 panic("%s: page belongs to PPL, "
8739 "pmap=%p, v=0x%llx, pa=%p, prot=0x%x, fault_type=0x%x, flags=0x%x, wired=%u, options=0x%x",
8741 pmap
, v
, (void*)pa
, prot
, fault_type
, flags
, wired
, options
);
8744 if (__improbable(pvh_get_flags(pai_to_pvh(pai
)) & PVH_FLAG_LOCKDOWN
)) {
8745 panic("%s: page locked down, "
8746 "pmap=%p, v=0x%llx, pa=%p, prot=0x%x, fault_type=0x%x, flags=0x%x, wired=%u, options=0x%x",
8748 pmap
, v
, (void *)pa
, prot
, fault_type
, flags
, wired
, options
);
8753 if (pte
== *pte_p
) {
8755 * This pmap_enter operation has been completed by another thread
8756 * undo refcnt on pt and return
8759 goto Pmap_enter_cleanup
;
8760 } else if (pte_to_pa(*pte_p
) == pa
) {
8761 pmap_enter_pte(pmap
, pte_p
, pte
, v
);
8763 goto Pmap_enter_cleanup
;
8764 } else if (*pte_p
!= ARM_PTE_TYPE_FAULT
) {
8766 * pte has been modified by another thread
8767 * hold refcnt on pt and retry pmap_enter operation
8770 goto Pmap_enter_retry
;
8772 pv_alloc_return_t pv_status
= pmap_enter_pv(pmap
, pte_p
, pai
, options
, &pve_p
, &is_altacct
);
8773 if (pv_status
== PV_ALLOC_RETRY
) {
8774 goto Pmap_enter_loop
;
8775 } else if (pv_status
== PV_ALLOC_FAIL
) {
8777 kr
= KERN_RESOURCE_SHORTAGE
;
8778 goto Pmap_enter_cleanup
;
8781 pmap_enter_pte(pmap
, pte_p
, pte
, v
);
8783 if (pmap
!= kernel_pmap
) {
8784 if (IS_REUSABLE_PAGE(pai
) &&
8786 assert(IS_INTERNAL_PAGE(pai
));
8787 OSAddAtomic(+1, &pmap
->stats
.reusable
);
8788 PMAP_STATS_PEAK(pmap
->stats
.reusable
);
8789 } else if (IS_INTERNAL_PAGE(pai
)) {
8790 OSAddAtomic(+1, &pmap
->stats
.internal
);
8791 PMAP_STATS_PEAK(pmap
->stats
.internal
);
8794 OSAddAtomic(+1, &pmap
->stats
.external
);
8795 PMAP_STATS_PEAK(pmap
->stats
.external
);
8801 if (pmap
!= kernel_pmap
) {
8802 pmap_ledger_credit(pmap
, task_ledgers
.phys_mem
, pt_attr_page_size(pt_attr
) * PAGE_RATIO
);
8806 * Make corresponding adjustments to
8807 * phys_footprint statistics.
8809 pmap_ledger_credit(pmap
, task_ledgers
.internal
, pt_attr_page_size(pt_attr
) * PAGE_RATIO
);
8812 * If this page is internal and
8813 * in an IOKit region, credit
8814 * the task's total count of
8815 * dirty, internal IOKit pages.
8816 * It should *not* count towards
8817 * the task's total physical
8818 * memory footprint, because
8819 * this entire region was
8820 * already billed to the task
8821 * at the time the mapping was
8824 * Put another way, this is
8826 * alternate_accounting++, so
8827 * net effect on phys_footprint
8828 * is 0. That means: don't
8829 * touch phys_footprint here.
8831 pmap_ledger_credit(pmap
, task_ledgers
.alternate_accounting
, pt_attr_page_size(pt_attr
) * PAGE_RATIO
);
8833 pmap_ledger_credit(pmap
, task_ledgers
.phys_footprint
, pt_attr_page_size(pt_attr
) * PAGE_RATIO
);
8838 OSAddAtomic(1, (SInt32
*) &pmap
->stats
.resident_count
);
8839 if (pmap
->stats
.resident_count
> pmap
->stats
.resident_max
) {
8840 pmap
->stats
.resident_max
= pmap
->stats
.resident_count
;
8843 if (prot
& VM_PROT_EXECUTE
) {
8845 goto Pmap_enter_cleanup
;
8848 wimg_bits
= pmap_cache_attributes(pn
);
8849 if ((flags
& (VM_WIMG_MASK
| VM_WIMG_USE_DEFAULT
))) {
8850 wimg_bits
= (wimg_bits
& (~VM_WIMG_MASK
)) | (flags
& (VM_WIMG_MASK
| VM_WIMG_USE_DEFAULT
));
8853 pte
|= pmap_get_pt_ops(pmap
)->wimg_to_pte(wimg_bits
);
8856 if ((wimg_bits
& PP_ATTR_MONITOR
) && !pmap_ppl_disable
) {
8857 uint64_t xprr_perm
= pte_to_xprr_perm(pte
);
8858 switch (xprr_perm
) {
8859 case XPRR_KERN_RO_PERM
:
8861 case XPRR_KERN_RW_PERM
:
8862 pte
&= ~ARM_PTE_XPRR_MASK
;
8863 pte
|= xprr_perm_to_pte(XPRR_PPL_RW_PERM
);
8866 panic("Unsupported xPRR perm %llu for pte 0x%llx", xprr_perm
, (uint64_t)pte
);
8870 pmap_enter_pte(pmap
, pte_p
, pte
, v
);
8873 goto Pmap_enter_return
;
8877 if (refcnt
!= NULL
) {
8878 assert(refcnt_updated
);
8879 if (OSAddAtomic16(-1, (volatile int16_t*)refcnt
) <= 0) {
8880 panic("pmap_enter(): over-release of ptdp %p for pte %p\n", ptep_get_ptd(pte_p
), pte_p
);
8887 if (pgtrace_enabled
) {
8888 // Clone and invalidate original mapping if eligible
8889 pmap_pgtrace_enter_clone(pmap
, v
+ ARM_PGBYTES
, 0, 0);
8891 #endif /* CONFIG_PGTRACE */
8893 if (pve_p
!= PV_ENTRY_NULL
) {
8894 pv_free_entry(pve_p
);
8897 if (wiredcnt_updated
&& (OSAddAtomic16(-1, (volatile int16_t*)wiredcnt
) <= 0)) {
8898 panic("pmap_enter(): over-unwire of ptdp %p for pte %p\n", ptep_get_ptd(pte_p
), pte_p
);
8907 pmap_enter_options_addr(
8912 vm_prot_t fault_type
,
8915 unsigned int options
,
8918 kern_return_t kr
= KERN_FAILURE
;
8921 PMAP_TRACE(2, PMAP_CODE(PMAP__ENTER
) | DBG_FUNC_START
,
8922 VM_KERNEL_ADDRHIDE(pmap
), VM_KERNEL_ADDRHIDE(v
), pa
, prot
);
8927 * If NOWAIT was not requested, loop until the enter does not
8928 * fail due to lack of resources.
8930 while ((kr
= pmap_enter_options_ppl(pmap
, v
, pa
, prot
, fault_type
, flags
, wired
, options
| PMAP_OPTIONS_NOWAIT
)) == KERN_RESOURCE_SHORTAGE
) {
8931 pmap_alloc_page_for_ppl((options
& PMAP_OPTIONS_NOWAIT
) ? PMAP_PAGES_ALLOCATE_NOWAIT
: 0);
8932 if (options
& PMAP_OPTIONS_NOWAIT
) {
8937 pmap_ledger_check_balance(pmap
);
8939 kr
= pmap_enter_options_internal(pmap
, v
, pa
, prot
, fault_type
, flags
, wired
, options
);
8942 PMAP_TRACE(2, PMAP_CODE(PMAP__ENTER
) | DBG_FUNC_END
, kr
);
8953 vm_prot_t fault_type
,
8956 unsigned int options
,
8959 return pmap_enter_options_addr(pmap
, v
, ((pmap_paddr_t
)pn
) << PAGE_SHIFT
, prot
, fault_type
, flags
, wired
, options
, arg
);
8963 * Routine: pmap_change_wiring
8964 * Function: Change the wiring attribute for a map/virtual-address
8966 * In/out conditions:
8967 * The mapping must already exist in the pmap.
8969 MARK_AS_PMAP_TEXT
static void
8970 pmap_change_wiring_internal(
8978 VALIDATE_PMAP(pmap
);
8982 const pt_attr_t
* pt_attr
= pmap_get_pt_attr(pmap
);
8984 pte_p
= pmap_pte(pmap
, v
);
8985 if (pte_p
== PT_ENTRY_NULL
) {
8988 * The PTE may have already been cleared by a disconnect/remove operation, and the L3 table
8989 * may have been freed by a remove operation.
8991 goto pmap_change_wiring_return
;
8993 panic("%s: Attempt to wire nonexistent PTE for pmap %p", __func__
, pmap
);
8997 * Use volatile loads to prevent the compiler from collapsing references to 'pa' back to loads of pte_p
8998 * until we've grabbed the final PVH lock; PTE contents may change during this time.
9000 pa
= pte_to_pa(*((volatile pt_entry_t
*)pte_p
));
9002 while (pa_valid(pa
)) {
9003 pmap_paddr_t new_pa
;
9005 LOCK_PVH((int)pa_index(pa
));
9006 new_pa
= pte_to_pa(*((volatile pt_entry_t
*)pte_p
));
9012 UNLOCK_PVH((int)pa_index(pa
));
9016 /* PTE checks must be performed after acquiring the PVH lock (if applicable for the PA) */
9017 if ((*pte_p
== ARM_PTE_EMPTY
) || (ARM_PTE_IS_COMPRESSED(*pte_p
, pte_p
))) {
9019 /* PTE cleared by prior remove/disconnect operation */
9020 goto pmap_change_wiring_cleanup
;
9022 panic("%s: Attempt to wire empty/compressed PTE %p (=0x%llx) for pmap %p",
9023 __func__
, pte_p
, (uint64_t)*pte_p
, pmap
);
9027 assertf((*pte_p
& ARM_PTE_TYPE_VALID
) == ARM_PTE_TYPE
, "invalid pte %p (=0x%llx)", pte_p
, (uint64_t)*pte_p
);
9028 if (wired
!= pte_is_wired(*pte_p
)) {
9029 pte_set_wired(pmap
, pte_p
, wired
);
9030 if (pmap
!= kernel_pmap
) {
9032 OSAddAtomic(+1, (SInt32
*) &pmap
->stats
.wired_count
);
9033 pmap_ledger_credit(pmap
, task_ledgers
.wired_mem
, pt_attr_page_size(pt_attr
) * PAGE_RATIO
);
9034 } else if (!wired
) {
9035 __assert_only
int32_t orig_wired
= OSAddAtomic(-1, (SInt32
*) &pmap
->stats
.wired_count
);
9036 PMAP_STATS_ASSERTF(orig_wired
> 0, pmap
, "stats.wired_count %d", orig_wired
);
9037 pmap_ledger_debit(pmap
, task_ledgers
.wired_mem
, pt_attr_page_size(pt_attr
) * PAGE_RATIO
);
9042 pmap_change_wiring_cleanup
:
9044 UNLOCK_PVH((int)pa_index(pa
));
9047 pmap_change_wiring_return
:
9058 pmap_change_wiring_ppl(pmap
, v
, wired
);
9060 pmap_ledger_check_balance(pmap
);
9062 pmap_change_wiring_internal(pmap
, v
, wired
);
9066 MARK_AS_PMAP_TEXT
static pmap_paddr_t
9067 pmap_find_pa_internal(
9071 pmap_paddr_t pa
= 0;
9073 VALIDATE_PMAP(pmap
);
9075 if (pmap
!= kernel_pmap
) {
9079 pa
= pmap_vtophys(pmap
, va
);
9081 if (pmap
!= kernel_pmap
) {
9082 pmap_unlock_ro(pmap
);
9089 pmap_find_pa_nofault(pmap_t pmap
, addr64_t va
)
9091 pmap_paddr_t pa
= 0;
9093 if (pmap
== kernel_pmap
) {
9095 } else if ((current_thread()->map
) && (pmap
== vm_map_pmap(current_thread()->map
))) {
9097 * Note that this doesn't account for PAN: mmu_uvtop() may return a valid
9098 * translation even if PAN would prevent kernel access through the translation.
9099 * It's therefore assumed the UVA will be accessed in a PAN-disabled context.
9111 pmap_paddr_t pa
= pmap_find_pa_nofault(pmap
, va
);
9119 return pmap_find_pa_ppl(pmap
, va
);
9121 return pmap_find_pa_internal(pmap
, va
);
9124 return pmap_vtophys(pmap
, va
);
9129 pmap_find_phys_nofault(
9134 ppn
= atop(pmap_find_pa_nofault(pmap
, va
));
9144 ppn
= atop(pmap_find_pa(pmap
, va
));
9159 return pmap_vtophys(kernel_pmap
, va
);
9167 if ((va
< pmap
->min
) || (va
>= pmap
->max
)) {
9171 #if (__ARM_VMSA__ == 7)
9172 tt_entry_t
*tte_p
, tte
;
9176 tte_p
= pmap_tte(pmap
, va
);
9177 if (tte_p
== (tt_entry_t
*) NULL
) {
9178 return (pmap_paddr_t
) 0;
9182 if ((tte
& ARM_TTE_TYPE_MASK
) == ARM_TTE_TYPE_TABLE
) {
9183 pte_p
= (pt_entry_t
*) ttetokv(tte
) + pte_index(pmap
, pt_attr
, va
);
9184 pa
= pte_to_pa(*pte_p
) | (va
& ARM_PGMASK
);
9185 //LIONEL ppn = (ppnum_t) atop(pte_to_pa(*pte_p) | (va & ARM_PGMASK));
9186 #if DEVELOPMENT || DEBUG
9187 if (atop(pa
) != 0 &&
9188 ARM_PTE_IS_COMPRESSED(*pte_p
, pte_p
)) {
9189 panic("pmap_vtophys(%p,0x%llx): compressed pte_p=%p 0x%llx with ppn=0x%x\n",
9190 pmap
, va
, pte_p
, (uint64_t) (*pte_p
), atop(pa
));
9192 #endif /* DEVELOPMENT || DEBUG */
9193 } else if ((tte
& ARM_TTE_TYPE_MASK
) == ARM_TTE_TYPE_BLOCK
) {
9194 if ((tte
& ARM_TTE_BLOCK_SUPER
) == ARM_TTE_BLOCK_SUPER
) {
9195 pa
= suptte_to_pa(tte
) | (va
& ARM_TT_L1_SUPER_OFFMASK
);
9197 pa
= sectte_to_pa(tte
) | (va
& ARM_TT_L1_BLOCK_OFFMASK
);
9203 tt_entry_t
* ttp
= NULL
;
9204 tt_entry_t
* ttep
= NULL
;
9205 tt_entry_t tte
= ARM_TTE_EMPTY
;
9206 pmap_paddr_t pa
= 0;
9207 unsigned int cur_level
;
9209 const pt_attr_t
* const pt_attr
= pmap_get_pt_attr(pmap
);
9213 for (cur_level
= pt_attr_root_level(pt_attr
); cur_level
<= pt_attr_leaf_level(pt_attr
); cur_level
++) {
9214 ttep
= &ttp
[ttn_index(pmap
, pt_attr
, va
, cur_level
)];
9218 const uint64_t valid_mask
= pt_attr
->pta_level_info
[cur_level
].valid_mask
;
9219 const uint64_t type_mask
= pt_attr
->pta_level_info
[cur_level
].type_mask
;
9220 const uint64_t type_block
= pt_attr
->pta_level_info
[cur_level
].type_block
;
9221 const uint64_t offmask
= pt_attr
->pta_level_info
[cur_level
].offmask
;
9223 if ((tte
& valid_mask
) != valid_mask
) {
9224 return (pmap_paddr_t
) 0;
9227 /* This detects both leaf entries and intermediate block mappings. */
9228 if ((tte
& type_mask
) == type_block
) {
9229 pa
= ((tte
& ARM_TTE_PA_MASK
& ~offmask
) | (va
& offmask
));
9233 ttp
= (tt_entry_t
*)phystokv(tte
& ARM_TTE_TABLE_MASK
);
9241 * pmap_init_pte_page - Initialize a page table page.
9248 unsigned int ttlevel
,
9249 boolean_t alloc_ptd
)
9251 pt_desc_t
*ptdp
= NULL
;
9254 pvh
= (vm_offset_t
*)(pai_to_pvh(pa_index(ml_static_vtop((vm_offset_t
)pte_p
))));
9256 if (pvh_test_type(pvh
, PVH_TYPE_NULL
)) {
9259 * This path should only be invoked from arm_vm_init. If we are emulating 16KB pages
9260 * on 4KB hardware, we may already have allocated a page table descriptor for a
9261 * bootstrap request, so we check for an existing PTD here.
9263 ptdp
= ptd_alloc(pmap
);
9265 panic("%s: unable to allocate PTD", __func__
);
9267 pvh_update_head_unlocked(pvh
, ptdp
, PVH_TYPE_PTDP
);
9269 panic("pmap_init_pte_page(): pte_p %p", pte_p
);
9271 } else if (pvh_test_type(pvh
, PVH_TYPE_PTDP
)) {
9272 ptdp
= (pt_desc_t
*)(pvh_list(pvh
));
9274 panic("pmap_init_pte_page(): invalid PVH type for pte_p %p", pte_p
);
9277 // below barrier ensures previous updates to the page are visible to PTW before
9278 // it is linked to the PTE of previous level
9279 __builtin_arm_dmb(DMB_ISHST
);
9280 ptd_init(ptdp
, pmap
, va
, ttlevel
, pte_p
);
9284 * Routine: pmap_expand
9286 * Expands a pmap to be able to map the specified virtual address.
9288 * Allocates new memory for the default (COARSE) translation table
9289 * entry, initializes all the pte entries to ARM_PTE_TYPE_FAULT and
9290 * also allocates space for the corresponding pv entries.
9292 * Nothing should be locked.
9294 static kern_return_t
9298 unsigned int options
,
9301 __unused
const pt_attr_t
* const pt_attr
= pmap_get_pt_attr(pmap
);
9303 #if (__ARM_VMSA__ == 7)
9309 #if DEVELOPMENT || DEBUG
9311 * We no longer support root level expansion; panic in case something
9312 * still attempts to trigger it.
9314 i
= tte_index(pmap
, pt_attr
, v
);
9316 if (i
>= pmap
->tte_index_max
) {
9317 panic("%s: index out of range, index=%u, max=%u, "
9318 "pmap=%p, addr=%p, options=%u, level=%u",
9319 __func__
, i
, pmap
->tte_index_max
,
9320 pmap
, (void *)v
, options
, level
);
9322 #endif /* DEVELOPMENT || DEBUG */
9325 return KERN_SUCCESS
;
9329 tt_entry_t
*tte_next_p
;
9333 if (pmap_pte(pmap
, v
) != PT_ENTRY_NULL
) {
9335 return KERN_SUCCESS
;
9337 tte_p
= &pmap
->tte
[ttenum(v
& ~ARM_TT_L1_PT_OFFMASK
)];
9338 for (i
= 0, tte_next_p
= tte_p
; i
< 4; i
++) {
9339 if (tte_to_pa(*tte_next_p
)) {
9340 pa
= tte_to_pa(*tte_next_p
);
9345 pa
= pa
& ~PAGE_MASK
;
9347 tte_p
= &pmap
->tte
[ttenum(v
)];
9348 *tte_p
= pa_to_tte(pa
) | (((v
>> ARM_TT_L1_SHIFT
) & 0x3) << 10) | ARM_TTE_TYPE_TABLE
;
9350 PMAP_TRACE(5, PMAP_CODE(PMAP__TTE
), VM_KERNEL_ADDRHIDE(pmap
), VM_KERNEL_ADDRHIDE(v
& ~ARM_TT_L1_OFFMASK
),
9351 VM_KERNEL_ADDRHIDE((v
& ~ARM_TT_L1_OFFMASK
) + ARM_TT_L1_SIZE
), *tte_p
);
9353 return KERN_SUCCESS
;
9357 v
= v
& ~ARM_TT_L1_PT_OFFMASK
;
9360 while (pmap_pte(pmap
, v
) == PT_ENTRY_NULL
) {
9362 * Allocate a VM page for the level 2 page table entries.
9364 while (pmap_tt_allocate(pmap
, &tt_p
, PMAP_TT_L2_LEVEL
, ((options
& PMAP_TT_ALLOCATE_NOWAIT
)? PMAP_PAGES_ALLOCATE_NOWAIT
: 0)) != KERN_SUCCESS
) {
9365 if (options
& PMAP_OPTIONS_NOWAIT
) {
9366 return KERN_RESOURCE_SHORTAGE
;
9373 * See if someone else expanded us first
9375 if (pmap_pte(pmap
, v
) == PT_ENTRY_NULL
) {
9376 tt_entry_t
*tte_next_p
;
9378 pmap_init_pte_page(pmap
, (pt_entry_t
*) tt_p
, v
, PMAP_TT_L2_LEVEL
, FALSE
);
9379 pa
= kvtophys((vm_offset_t
)tt_p
);
9380 tte_p
= &pmap
->tte
[ttenum(v
)];
9381 for (i
= 0, tte_next_p
= tte_p
; i
< 4; i
++) {
9382 *tte_next_p
= pa_to_tte(pa
) | ARM_TTE_TYPE_TABLE
;
9383 PMAP_TRACE(5, PMAP_CODE(PMAP__TTE
), VM_KERNEL_ADDRHIDE(pmap
), VM_KERNEL_ADDRHIDE((v
& ~ARM_TT_L1_PT_OFFMASK
) + (i
* ARM_TT_L1_SIZE
)),
9384 VM_KERNEL_ADDRHIDE((v
& ~ARM_TT_L1_PT_OFFMASK
) + ((i
+ 1) * ARM_TT_L1_SIZE
)), *tte_p
);
9388 FLUSH_PTE_RANGE(tte_p
, tte_p
+ 4);
9391 tt_p
= (tt_entry_t
*)NULL
;
9394 if (tt_p
!= (tt_entry_t
*)NULL
) {
9395 pmap_tt_deallocate(pmap
, tt_p
, PMAP_TT_L2_LEVEL
);
9396 tt_p
= (tt_entry_t
*)NULL
;
9399 return KERN_SUCCESS
;
9402 unsigned int ttlevel
= pt_attr_root_level(pt_attr
);
9407 tt_p
= (tt_entry_t
*)NULL
;
9409 for (; ttlevel
< level
; ttlevel
++) {
9412 if (pmap_ttne(pmap
, ttlevel
+ 1, v
) == PT_ENTRY_NULL
) {
9413 pmap_unlock_ro(pmap
);
9414 while (pmap_tt_allocate(pmap
, &tt_p
, ttlevel
+ 1, ((options
& PMAP_TT_ALLOCATE_NOWAIT
)? PMAP_PAGES_ALLOCATE_NOWAIT
: 0)) != KERN_SUCCESS
) {
9415 if (options
& PMAP_OPTIONS_NOWAIT
) {
9416 return KERN_RESOURCE_SHORTAGE
;
9419 panic("%s: failed to allocate tt, "
9420 "pmap=%p, v=%p, options=0x%x, level=%u",
9422 pmap
, (void *)v
, options
, level
);
9428 if ((pmap_ttne(pmap
, ttlevel
+ 1, v
) == PT_ENTRY_NULL
)) {
9429 pmap_init_pte_page(pmap
, (pt_entry_t
*) tt_p
, v
, ttlevel
+ 1, FALSE
);
9430 pa
= kvtophys((vm_offset_t
)tt_p
);
9431 tte_p
= pmap_ttne(pmap
, ttlevel
, v
);
9432 *tte_p
= (pa
& ARM_TTE_TABLE_MASK
) | ARM_TTE_TYPE_TABLE
| ARM_TTE_VALID
;
9433 PMAP_TRACE(4 + ttlevel
, PMAP_CODE(PMAP__TTE
), VM_KERNEL_ADDRHIDE(pmap
), VM_KERNEL_ADDRHIDE(v
& ~pt_attr_ln_offmask(pt_attr
, ttlevel
)),
9434 VM_KERNEL_ADDRHIDE((v
& ~pt_attr_ln_offmask(pt_attr
, ttlevel
)) + pt_attr_ln_size(pt_attr
, ttlevel
)), *tte_p
);
9436 tt_p
= (tt_entry_t
*)NULL
;
9440 pmap_unlock_ro(pmap
);
9443 if (tt_p
!= (tt_entry_t
*)NULL
) {
9444 pmap_tt_deallocate(pmap
, tt_p
, ttlevel
+ 1);
9445 tt_p
= (tt_entry_t
*)NULL
;
9449 return KERN_SUCCESS
;
9454 * Routine: pmap_collect
9456 * Garbage collects the physical map system for
9457 * pages which are no longer used.
9458 * Success need not be guaranteed -- that is, there
9459 * may well be pages which are not referenced, but
9460 * others may be collected.
9463 pmap_collect(pmap_t pmap
)
9465 if (pmap
== PMAP_NULL
) {
9471 if ((pmap
->nested
== FALSE
) && (pmap
!= kernel_pmap
)) {
9472 /* TODO: Scan for vm page assigned to top level page tables with no reference */
9483 * Pmap garbage collection
9484 * Called by the pageout daemon when pages are scarce.
9493 * We cannot invoke the scheduler from the PPL, so for now we elide the
9494 * GC logic if the PPL is enabled.
9498 pmap_t pmap
, pmap_next
;
9501 if (pmap_gc_allowed
&&
9502 (pmap_gc_allowed_by_time_throttle
||
9504 pmap_gc_forced
= FALSE
;
9505 pmap_gc_allowed_by_time_throttle
= FALSE
;
9506 pmap_simple_lock(&pmaps_lock
);
9507 pmap
= CAST_DOWN_EXPLICIT(pmap_t
, queue_first(&map_pmap_list
));
9508 while (!queue_end(&map_pmap_list
, (queue_entry_t
)pmap
)) {
9509 if (!(pmap
->gc_status
& PMAP_GC_INFLIGHT
)) {
9510 pmap
->gc_status
|= PMAP_GC_INFLIGHT
;
9512 pmap_simple_unlock(&pmaps_lock
);
9516 pmap_simple_lock(&pmaps_lock
);
9517 gc_wait
= (pmap
->gc_status
& PMAP_GC_WAIT
);
9518 pmap
->gc_status
&= ~(PMAP_GC_INFLIGHT
| PMAP_GC_WAIT
);
9519 pmap_next
= CAST_DOWN_EXPLICIT(pmap_t
, queue_next(&pmap
->pmaps
));
9521 if (!queue_end(&map_pmap_list
, (queue_entry_t
)pmap_next
)) {
9522 pmap_next
->gc_status
|= PMAP_GC_INFLIGHT
;
9524 pmap_simple_unlock(&pmaps_lock
);
9525 thread_wakeup((event_t
) &pmap
->gc_status
);
9526 pmap_simple_lock(&pmaps_lock
);
9530 pmap_simple_unlock(&pmaps_lock
);
9536 * Called by the VM to reclaim pages that we can reclaim quickly and cheaply.
9539 pmap_release_pages_fast(void)
9542 return pmap_release_ppl_pages_to_kernel();
9543 #else /* XNU_MONITOR */
9549 * By default, don't attempt pmap GC more frequently
9550 * than once / 1 minutes.
9554 compute_pmap_gc_throttle(
9557 pmap_gc_allowed_by_time_throttle
= TRUE
;
9561 * pmap_attribute_cache_sync(vm_offset_t pa)
9563 * Invalidates all of the instruction cache on a physical page and
9564 * pushes any dirty data from the data cache for the same physical page
9568 pmap_attribute_cache_sync(
9571 __unused vm_machine_attribute_t attribute
,
9572 __unused vm_machine_attribute_val_t
* value
)
9574 if (size
> PAGE_SIZE
) {
9575 panic("pmap_attribute_cache_sync size: 0x%llx\n", (uint64_t)size
);
9577 cache_sync_page(pp
);
9580 return KERN_SUCCESS
;
9584 * pmap_sync_page_data_phys(ppnum_t pp)
9586 * Invalidates all of the instruction cache on a physical page and
9587 * pushes any dirty data from the data cache for the same physical page
9590 pmap_sync_page_data_phys(
9593 cache_sync_page(pp
);
9597 * pmap_sync_page_attributes_phys(ppnum_t pp)
9599 * Write back and invalidate all cachelines on a physical page.
9602 pmap_sync_page_attributes_phys(
9605 flush_dcache((vm_offset_t
) (pp
<< PAGE_SHIFT
), PAGE_SIZE
, TRUE
);
9609 /* temporary workaround */
9613 mach_vm_offset_t va
)
9618 pte_p
= pmap_pte(map
->pmap
, va
);
9623 return (spte
& ARM_PTE_ATTRINDXMASK
) == ARM_PTE_ATTRINDX(CACHE_ATTRINDX_DEFAULT
);
9635 addr
= (unsigned int *) phystokv(ptoa(pn
));
9636 count
= PAGE_SIZE
/ sizeof(unsigned int);
9642 extern void mapping_set_mod(ppnum_t pn
);
9648 pmap_set_modify(pn
);
9651 extern void mapping_set_ref(ppnum_t pn
);
9657 pmap_set_reference(pn
);
9661 * Clear specified attribute bits.
9663 * Try to force an arm_fast_fault() for all mappings of
9664 * the page - to force attributes to be set again at fault time.
9665 * If the forcing succeeds, clear the cached bits at the head.
9666 * Otherwise, something must have been wired, so leave the cached
9669 MARK_AS_PMAP_TEXT
static void
9670 phys_attribute_clear_with_flush_range(
9675 pmap_tlb_flush_range_t
*flush_range
)
9677 pmap_paddr_t pa
= ptoa(pn
);
9678 vm_prot_t allow_mode
= VM_PROT_ALL
;
9681 if (__improbable(bits
& PP_ATTR_PPL_OWNED_BITS
)) {
9682 panic("%s: illegal request, "
9683 "pn=%u, bits=%#x, options=%#x, arg=%p, flush_range=%p",
9685 pn
, bits
, options
, arg
, flush_range
);
9688 if ((arg
!= NULL
) || (flush_range
!= NULL
)) {
9689 options
= options
& ~PMAP_OPTIONS_NOFLUSH
;
9692 if (__improbable((bits
& PP_ATTR_MODIFIED
) &&
9693 (options
& PMAP_OPTIONS_NOFLUSH
))) {
9694 panic("phys_attribute_clear(0x%x,0x%x,0x%x,%p,%p): "
9695 "should not clear 'modified' without flushing TLBs\n",
9696 pn
, bits
, options
, arg
, flush_range
);
9699 assert(pn
!= vm_page_fictitious_addr
);
9701 if (options
& PMAP_OPTIONS_CLEAR_WRITE
) {
9702 assert(bits
== PP_ATTR_MODIFIED
);
9704 pmap_page_protect_options_with_flush_range(pn
, (VM_PROT_ALL
& ~VM_PROT_WRITE
), options
, flush_range
);
9706 * We short circuit this case; it should not need to
9707 * invoke arm_force_fast_fault, so just clear the modified bit.
9708 * pmap_page_protect has taken care of resetting
9709 * the state so that we'll see the next write as a fault to
9710 * the VM (i.e. we don't want a fast fault).
9712 pa_clear_bits(pa
, bits
);
9715 if (bits
& PP_ATTR_REFERENCED
) {
9716 allow_mode
&= ~(VM_PROT_READ
| VM_PROT_EXECUTE
);
9718 if (bits
& PP_ATTR_MODIFIED
) {
9719 allow_mode
&= ~VM_PROT_WRITE
;
9722 if (bits
== PP_ATTR_NOENCRYPT
) {
9724 * We short circuit this case; it should not need to
9725 * invoke arm_force_fast_fault, so just clear and
9726 * return. On ARM, this bit is just a debugging aid.
9728 pa_clear_bits(pa
, bits
);
9732 if (arm_force_fast_fault_with_flush_range(pn
, allow_mode
, options
, flush_range
)) {
9733 pa_clear_bits(pa
, bits
);
9737 MARK_AS_PMAP_TEXT
static void
9738 phys_attribute_clear_internal(
9744 phys_attribute_clear_with_flush_range(pn
, bits
, options
, arg
, NULL
);
9747 #if __ARM_RANGE_TLBI__
9748 MARK_AS_PMAP_TEXT
static vm_map_address_t
9749 phys_attribute_clear_twig_internal(
9751 vm_map_address_t start
,
9752 vm_map_address_t end
,
9754 unsigned int options
,
9755 pmap_tlb_flush_range_t
*flush_range
)
9757 pmap_assert_locked_r(pmap
);
9758 const pt_attr_t
* const pt_attr
= pmap_get_pt_attr(pmap
);
9759 assert(end
>= start
);
9760 assert((end
- start
) <= pt_attr_twig_size(pt_attr
));
9761 const uint64_t pmap_page_size
= pt_attr_page_size(pt_attr
);
9762 vm_map_address_t va
= start
;
9763 pt_entry_t
*pte_p
, *start_pte_p
, *end_pte_p
, *curr_pte_p
;
9765 tte_p
= pmap_tte(pmap
, start
);
9766 unsigned int npages
= 0;
9768 if (tte_p
== (tt_entry_t
*) NULL
) {
9772 if ((*tte_p
& ARM_TTE_TYPE_MASK
) == ARM_TTE_TYPE_TABLE
) {
9773 pte_p
= (pt_entry_t
*) ttetokv(*tte_p
);
9775 start_pte_p
= &pte_p
[pte_index(pmap
, pt_attr
, start
)];
9776 end_pte_p
= start_pte_p
+ ((end
- start
) >> pt_attr_leaf_shift(pt_attr
));
9777 assert(end_pte_p
>= start_pte_p
);
9778 for (curr_pte_p
= start_pte_p
; curr_pte_p
< end_pte_p
; curr_pte_p
++, va
+= pmap_page_size
) {
9779 if (__improbable(npages
++ && pmap_pending_preemption())) {
9782 pmap_paddr_t pa
= pte_to_pa(*((volatile pt_entry_t
*)curr_pte_p
));
9784 ppnum_t pn
= (ppnum_t
) atop(pa
);
9785 phys_attribute_clear_with_flush_range(pn
, bits
, options
, NULL
, flush_range
);
9792 MARK_AS_PMAP_TEXT
static vm_map_address_t
9793 phys_attribute_clear_range_internal(
9795 vm_map_address_t start
,
9796 vm_map_address_t end
,
9798 unsigned int options
)
9800 if (__improbable(end
< start
)) {
9801 panic("%s: invalid address range %p, %p", __func__
, (void*)start
, (void*)end
);
9803 VALIDATE_PMAP(pmap
);
9805 vm_map_address_t va
= start
;
9806 pmap_tlb_flush_range_t flush_range
= {
9808 .ptfr_start
= start
,
9810 .ptfr_flush_needed
= false
9814 const pt_attr_t
* const pt_attr
= pmap_get_pt_attr(pmap
);
9817 vm_map_address_t curr_end
;
9819 curr_end
= ((va
+ pt_attr_twig_size(pt_attr
)) & ~pt_attr_twig_offmask(pt_attr
));
9820 if (curr_end
> end
) {
9824 va
= phys_attribute_clear_twig_internal(pmap
, va
, curr_end
, bits
, options
, &flush_range
);
9825 if ((va
< curr_end
) || pmap_pending_preemption()) {
9829 pmap_unlock_ro(pmap
);
9830 if (flush_range
.ptfr_flush_needed
) {
9831 flush_range
.ptfr_end
= va
;
9832 pmap_get_pt_ops(pmap
)->flush_tlb_region_async(
9833 flush_range
.ptfr_start
,
9834 flush_range
.ptfr_end
- flush_range
.ptfr_start
,
9835 flush_range
.ptfr_pmap
);
9842 phys_attribute_clear_range(
9844 vm_map_address_t start
,
9845 vm_map_address_t end
,
9847 unsigned int options
)
9849 assert(get_preemption_level() == 0);
9851 PMAP_TRACE(3, PMAP_CODE(PMAP__ATTRIBUTE_CLEAR_RANGE
) | DBG_FUNC_START
, bits
);
9853 while (start
< end
) {
9855 start
= phys_attribute_clear_range_ppl(pmap
, start
, end
, bits
, options
);
9857 start
= phys_attribute_clear_range_internal(pmap
, start
, end
, bits
, options
);
9861 PMAP_TRACE(3, PMAP_CODE(PMAP__ATTRIBUTE_CLEAR_RANGE
) | DBG_FUNC_END
);
9863 #endif /* __ARM_RANGE_TLBI__ */
9866 phys_attribute_clear(
9873 * Do we really want this tracepoint? It will be extremely chatty.
9874 * Also, should we have a corresponding trace point for the set path?
9876 PMAP_TRACE(3, PMAP_CODE(PMAP__ATTRIBUTE_CLEAR
) | DBG_FUNC_START
, pn
, bits
);
9879 phys_attribute_clear_ppl(pn
, bits
, options
, arg
);
9881 phys_attribute_clear_internal(pn
, bits
, options
, arg
);
9884 PMAP_TRACE(3, PMAP_CODE(PMAP__ATTRIBUTE_CLEAR
) | DBG_FUNC_END
);
9888 * Set specified attribute bits.
9890 * Set cached value in the pv head because we have
9891 * no per-mapping hardware support for referenced and
9894 MARK_AS_PMAP_TEXT
static void
9895 phys_attribute_set_internal(
9899 pmap_paddr_t pa
= ptoa(pn
);
9900 assert(pn
!= vm_page_fictitious_addr
);
9903 if (bits
& PP_ATTR_PPL_OWNED_BITS
) {
9904 panic("%s: illegal request, "
9911 pa_set_bits(pa
, (uint16_t)bits
);
9922 phys_attribute_set_ppl(pn
, bits
);
9924 phys_attribute_set_internal(pn
, bits
);
9930 * Check specified attribute bits.
9932 * use the software cached bits (since no hw support).
9935 phys_attribute_test(
9939 pmap_paddr_t pa
= ptoa(pn
);
9940 assert(pn
!= vm_page_fictitious_addr
);
9941 return pa_test_bits(pa
, bits
);
9946 * Set the modify/reference bits on the specified physical page.
9949 pmap_set_modify(ppnum_t pn
)
9951 phys_attribute_set(pn
, PP_ATTR_MODIFIED
);
9956 * Clear the modify bits on the specified physical page.
9962 phys_attribute_clear(pn
, PP_ATTR_MODIFIED
, 0, NULL
);
9969 * Return whether or not the specified physical page is modified
9970 * by any physical maps.
9976 return phys_attribute_test(pn
, PP_ATTR_MODIFIED
);
9981 * Set the reference bit on the specified physical page.
9987 phys_attribute_set(pn
, PP_ATTR_REFERENCED
);
9991 * Clear the reference bits on the specified physical page.
9994 pmap_clear_reference(
9997 phys_attribute_clear(pn
, PP_ATTR_REFERENCED
, 0, NULL
);
10002 * pmap_is_referenced:
10004 * Return whether or not the specified physical page is referenced
10005 * by any physical maps.
10008 pmap_is_referenced(
10011 return phys_attribute_test(pn
, PP_ATTR_REFERENCED
);
10015 * pmap_get_refmod(phys)
10016 * returns the referenced and modified bits of the specified
10023 return ((phys_attribute_test(pn
, PP_ATTR_MODIFIED
)) ? VM_MEM_MODIFIED
: 0)
10024 | ((phys_attribute_test(pn
, PP_ATTR_REFERENCED
)) ? VM_MEM_REFERENCED
: 0);
10027 static inline unsigned int
10028 pmap_clear_refmod_mask_to_modified_bits(const unsigned int mask
)
10030 return ((mask
& VM_MEM_MODIFIED
) ? PP_ATTR_MODIFIED
: 0) |
10031 ((mask
& VM_MEM_REFERENCED
) ? PP_ATTR_REFERENCED
: 0);
10035 * pmap_clear_refmod(phys, mask)
10036 * clears the referenced and modified bits as specified by the mask
10037 * of the specified physical page.
10040 pmap_clear_refmod_options(
10043 unsigned int options
,
10048 bits
= pmap_clear_refmod_mask_to_modified_bits(mask
);
10049 phys_attribute_clear(pn
, bits
, options
, arg
);
10053 * Perform pmap_clear_refmod_options on a virtual address range.
10054 * The operation will be performed in bulk & tlb flushes will be coalesced
10057 * Returns true if the operation is supported on this platform.
10058 * If this function returns false, the operation is not supported and
10059 * nothing has been modified in the pmap.
10062 pmap_clear_refmod_range_options(
10063 pmap_t pmap __unused
,
10064 vm_map_address_t start __unused
,
10065 vm_map_address_t end __unused
,
10066 unsigned int mask __unused
,
10067 unsigned int options __unused
)
10069 #if __ARM_RANGE_TLBI__
10071 bits
= pmap_clear_refmod_mask_to_modified_bits(mask
);
10072 phys_attribute_clear_range(pmap
, start
, end
, bits
, options
);
10074 #else /* __ARM_RANGE_TLBI__ */
10075 #pragma unused(pmap, start, end, mask, options)
10077 * This operation allows the VM to bulk modify refmod bits on a virtually
10078 * contiguous range of addresses. This is large performance improvement on
10079 * platforms that support ranged tlbi instructions. But on older platforms,
10080 * we can only flush per-page or the entire asid. So we currently
10081 * only support this operation on platforms that support ranged tlbi.
10082 * instructions. On other platforms, we require that
10083 * the VM modify the bits on a per-page basis.
10086 #endif /* __ARM_RANGE_TLBI__ */
10094 pmap_clear_refmod_options(pn
, mask
, 0, NULL
);
10098 pmap_disconnect_options(
10100 unsigned int options
,
10103 if ((options
& PMAP_OPTIONS_COMPRESSOR_IFF_MODIFIED
)) {
10105 * On ARM, the "modified" bit is managed by software, so
10106 * we know up-front if the physical page is "modified",
10107 * without having to scan all the PTEs pointing to it.
10108 * The caller should have made the VM page "busy" so noone
10109 * should be able to establish any new mapping and "modify"
10110 * the page behind us.
10112 if (pmap_is_modified(pn
)) {
10114 * The page has been modified and will be sent to
10115 * the VM compressor.
10117 options
|= PMAP_OPTIONS_COMPRESSOR
;
10120 * The page hasn't been modified and will be freed
10121 * instead of compressed.
10126 /* disconnect the page */
10127 pmap_page_protect_options(pn
, 0, options
, arg
);
10129 /* return ref/chg status */
10130 return pmap_get_refmod(pn
);
10138 * Disconnect all mappings for this page and return reference and change status
10139 * in generic format.
10146 pmap_page_protect(pn
, 0); /* disconnect the page */
10147 return pmap_get_refmod(pn
); /* return ref/chg status */
10151 pmap_has_managed_page(ppnum_t first
, ppnum_t last
)
10153 if (ptoa(first
) >= vm_last_phys
) {
10156 if (ptoa(last
) < vm_first_phys
) {
10164 * The state maintained by the noencrypt functions is used as a
10165 * debugging aid on ARM. This incurs some overhead on the part
10166 * of the caller. A special case check in phys_attribute_clear
10167 * (the most expensive path) currently minimizes this overhead,
10168 * but stubbing these functions out on RELEASE kernels yields
10175 #if DEVELOPMENT || DEBUG
10176 boolean_t result
= FALSE
;
10178 if (!pa_valid(ptoa(pn
))) {
10182 result
= (phys_attribute_test(pn
, PP_ATTR_NOENCRYPT
));
10192 pmap_set_noencrypt(
10195 #if DEVELOPMENT || DEBUG
10196 if (!pa_valid(ptoa(pn
))) {
10200 phys_attribute_set(pn
, PP_ATTR_NOENCRYPT
);
10207 pmap_clear_noencrypt(
10210 #if DEVELOPMENT || DEBUG
10211 if (!pa_valid(ptoa(pn
))) {
10215 phys_attribute_clear(pn
, PP_ATTR_NOENCRYPT
, 0, NULL
);
10223 pmap_is_monitor(ppnum_t pn
)
10225 assert(pa_valid(ptoa(pn
)));
10226 return phys_attribute_test(pn
, PP_ATTR_MONITOR
);
10231 pmap_lock_phys_page(ppnum_t pn
)
10235 pmap_paddr_t phys
= ptoa(pn
);
10237 if (pa_valid(phys
)) {
10238 pai
= (int)pa_index(phys
);
10244 { simple_lock(&phys_backup_lock
, LCK_GRP_NULL
);}
10249 pmap_unlock_phys_page(ppnum_t pn
)
10253 pmap_paddr_t phys
= ptoa(pn
);
10255 if (pa_valid(phys
)) {
10256 pai
= (int)pa_index(phys
);
10262 { simple_unlock(&phys_backup_lock
);}
10265 MARK_AS_PMAP_TEXT
static void
10266 pmap_switch_user_ttb_internal(
10269 VALIDATE_PMAP(pmap
);
10270 pmap_cpu_data_t
*cpu_data_ptr
;
10271 cpu_data_ptr
= pmap_get_cpu_data();
10273 #if (__ARM_VMSA__ == 7)
10274 cpu_data_ptr
->cpu_user_pmap
= pmap
;
10275 cpu_data_ptr
->cpu_user_pmap_stamp
= pmap
->stamp
;
10277 #if MACH_ASSERT && __ARM_USER_PROTECT__
10279 unsigned int ttbr0_val
, ttbr1_val
;
10280 __asm__
volatile ("mrc p15,0,%0,c2,c0,0\n" : "=r"(ttbr0_val
));
10281 __asm__
volatile ("mrc p15,0,%0,c2,c0,1\n" : "=r"(ttbr1_val
));
10282 if (ttbr0_val
!= ttbr1_val
) {
10283 panic("Misaligned ttbr0 %08X\n", ttbr0_val
);
10285 if (pmap
->ttep
& 0x1000) {
10286 panic("Misaligned ttbr0 %08X\n", pmap
->ttep
);
10290 #if !__ARM_USER_PROTECT__
10291 set_mmu_ttb(pmap
->ttep
);
10292 set_context_id(pmap
->hw_asid
);
10295 #else /* (__ARM_VMSA__ == 7) */
10297 if (pmap
!= kernel_pmap
) {
10298 cpu_data_ptr
->cpu_nested_pmap
= pmap
->nested_pmap
;
10299 cpu_data_ptr
->cpu_nested_pmap_attr
= (cpu_data_ptr
->cpu_nested_pmap
== NULL
) ?
10300 NULL
: pmap_get_pt_attr(cpu_data_ptr
->cpu_nested_pmap
);
10301 cpu_data_ptr
->cpu_nested_region_addr
= pmap
->nested_region_addr
;
10302 cpu_data_ptr
->cpu_nested_region_size
= pmap
->nested_region_size
;
10306 #if __ARM_MIXED_PAGE_SIZE__
10307 if ((pmap
!= kernel_pmap
) && (pmap_get_pt_attr(pmap
)->pta_tcr_value
!= get_tcr())) {
10308 set_tcr(pmap_get_pt_attr(pmap
)->pta_tcr_value
);
10310 #endif /* __ARM_MIXED_PAGE_SIZE__ */
10312 if (pmap
!= kernel_pmap
) {
10313 set_mmu_ttb((pmap
->ttep
& TTBR_BADDR_MASK
) | (((uint64_t)pmap
->hw_asid
) << TTBR_ASID_SHIFT
));
10314 } else if (!pmap_user_ttb_is_clear()) {
10315 pmap_clear_user_ttb_internal();
10318 #endif /* (__ARM_VMSA__ == 7) */
10322 pmap_switch_user_ttb(
10325 PMAP_TRACE(1, PMAP_CODE(PMAP__SWITCH_USER_TTB
) | DBG_FUNC_START
, VM_KERNEL_ADDRHIDE(pmap
), PMAP_VASID(pmap
), pmap
->hw_asid
);
10327 pmap_switch_user_ttb_ppl(pmap
);
10329 pmap_switch_user_ttb_internal(pmap
);
10331 PMAP_TRACE(1, PMAP_CODE(PMAP__SWITCH_USER_TTB
) | DBG_FUNC_END
);
10334 MARK_AS_PMAP_TEXT
static void
10335 pmap_clear_user_ttb_internal(void)
10337 #if (__ARM_VMSA__ > 7)
10338 set_mmu_ttb(invalid_ttep
& TTBR_BADDR_MASK
);
10340 set_mmu_ttb(kernel_pmap
->ttep
);
10345 pmap_clear_user_ttb(void)
10347 PMAP_TRACE(3, PMAP_CODE(PMAP__CLEAR_USER_TTB
) | DBG_FUNC_START
, NULL
, 0, 0);
10349 pmap_clear_user_ttb_ppl();
10351 pmap_clear_user_ttb_internal();
10353 PMAP_TRACE(3, PMAP_CODE(PMAP__CLEAR_USER_TTB
) | DBG_FUNC_END
);
10356 MARK_AS_PMAP_TEXT
static boolean_t
10357 arm_force_fast_fault_with_flush_range(
10359 vm_prot_t allow_mode
,
10361 pmap_tlb_flush_range_t
*flush_range
)
10363 pmap_paddr_t phys
= ptoa(ppnum
);
10369 boolean_t is_reusable
, is_internal
;
10370 boolean_t tlb_flush_needed
= FALSE
;
10371 boolean_t ref_fault
;
10372 boolean_t mod_fault
;
10373 boolean_t clear_write_fault
= FALSE
;
10374 boolean_t ref_aliases_mod
= FALSE
;
10375 bool mustsynch
= ((options
& PMAP_OPTIONS_FF_LOCKED
) == 0);
10377 assert(ppnum
!= vm_page_fictitious_addr
);
10379 if (!pa_valid(phys
)) {
10380 return FALSE
; /* Not a managed page. */
10386 pai
= (int)pa_index(phys
);
10387 if (__probable(mustsynch
)) {
10390 pv_h
= pai_to_pvh(pai
);
10392 pte_p
= PT_ENTRY_NULL
;
10393 pve_p
= PV_ENTRY_NULL
;
10394 if (pvh_test_type(pv_h
, PVH_TYPE_PTEP
)) {
10395 pte_p
= pvh_ptep(pv_h
);
10396 } else if (pvh_test_type(pv_h
, PVH_TYPE_PVEP
)) {
10397 pve_p
= pvh_list(pv_h
);
10400 is_reusable
= IS_REUSABLE_PAGE(pai
);
10401 is_internal
= IS_INTERNAL_PAGE(pai
);
10403 while ((pve_p
!= PV_ENTRY_NULL
) || (pte_p
!= PT_ENTRY_NULL
)) {
10404 vm_map_address_t va
;
10406 pt_entry_t tmplate
;
10408 boolean_t update_pte
;
10410 if (pve_p
!= PV_ENTRY_NULL
) {
10411 pte_p
= pve_get_ptep(pve_p
);
10414 if (pte_p
== PT_ENTRY_NULL
) {
10415 panic("pte_p is NULL: pve_p=%p ppnum=0x%x\n", pve_p
, ppnum
);
10417 #ifdef PVH_FLAG_IOMMU
10418 if ((vm_offset_t
)pte_p
& PVH_FLAG_IOMMU
) {
10422 if (*pte_p
== ARM_PTE_EMPTY
) {
10423 panic("pte is NULL: pte_p=%p ppnum=0x%x\n", pte_p
, ppnum
);
10425 if (ARM_PTE_IS_COMPRESSED(*pte_p
, pte_p
)) {
10426 panic("pte is COMPRESSED: pte_p=%p ppnum=0x%x\n", pte_p
, ppnum
);
10429 pmap
= ptep_get_pmap(pte_p
);
10430 const pt_attr_t
* pt_attr
= pmap_get_pt_attr(pmap
);
10431 va
= ptep_get_va(pte_p
);
10433 assert(va
>= pmap
->min
&& va
< pmap
->max
);
10435 /* update pmap stats and ledgers */
10436 if (IS_ALTACCT_PAGE(pai
, pve_p
)) {
10438 * We do not track "reusable" status for
10439 * "alternate accounting" mappings.
10441 } else if ((options
& PMAP_OPTIONS_CLEAR_REUSABLE
) &&
10444 pmap
!= kernel_pmap
) {
10445 /* one less "reusable" */
10446 __assert_only
int32_t orig_reusable
= OSAddAtomic(-1, &pmap
->stats
.reusable
);
10447 PMAP_STATS_ASSERTF(orig_reusable
> 0, pmap
, "stats.reusable %d", orig_reusable
);
10448 /* one more "internal" */
10449 __assert_only
int32_t orig_internal
= OSAddAtomic(+1, &pmap
->stats
.internal
);
10450 PMAP_STATS_PEAK(pmap
->stats
.internal
);
10451 PMAP_STATS_ASSERTF(orig_internal
>= 0, pmap
, "stats.internal %d", orig_internal
);
10452 pmap_ledger_credit(pmap
, task_ledgers
.internal
, pt_attr_page_size(pt_attr
) * PAGE_RATIO
);
10453 assert(!IS_ALTACCT_PAGE(pai
, pve_p
));
10454 assert(IS_INTERNAL_PAGE(pai
));
10455 pmap_ledger_credit(pmap
, task_ledgers
.phys_footprint
, pt_attr_page_size(pt_attr
) * PAGE_RATIO
);
10458 * Since the page is being marked non-reusable, we assume that it will be
10459 * modified soon. Avoid the cost of another trap to handle the fast
10460 * fault when we next write to this page.
10462 clear_write_fault
= TRUE
;
10463 } else if ((options
& PMAP_OPTIONS_SET_REUSABLE
) &&
10466 pmap
!= kernel_pmap
) {
10467 /* one more "reusable" */
10468 __assert_only
int32_t orig_reusable
= OSAddAtomic(+1, &pmap
->stats
.reusable
);
10469 PMAP_STATS_PEAK(pmap
->stats
.reusable
);
10470 PMAP_STATS_ASSERTF(orig_reusable
>= 0, pmap
, "stats.reusable %d", orig_reusable
);
10471 /* one less "internal" */
10472 __assert_only
int32_t orig_internal
= OSAddAtomic(-1, &pmap
->stats
.internal
);
10473 PMAP_STATS_ASSERTF(orig_internal
> 0, pmap
, "stats.internal %d", orig_internal
);
10474 pmap_ledger_debit(pmap
, task_ledgers
.internal
, pt_attr_page_size(pt_attr
) * PAGE_RATIO
);
10475 assert(!IS_ALTACCT_PAGE(pai
, pve_p
));
10476 assert(IS_INTERNAL_PAGE(pai
));
10477 pmap_ledger_debit(pmap
, task_ledgers
.phys_footprint
, pt_attr_page_size(pt_attr
) * PAGE_RATIO
);
10480 bool wiredskip
= pte_is_wired(*pte_p
) &&
10481 ((options
& PMAP_OPTIONS_FF_WIRED
) == 0);
10490 update_pte
= FALSE
;
10492 if ((allow_mode
& VM_PROT_READ
) != VM_PROT_READ
) {
10493 /* read protection sets the pte to fault */
10494 tmplate
= tmplate
& ~ARM_PTE_AF
;
10498 if ((allow_mode
& VM_PROT_WRITE
) != VM_PROT_WRITE
) {
10499 /* take away write permission if set */
10500 if (pmap
== kernel_pmap
) {
10501 if ((tmplate
& ARM_PTE_APMASK
) == ARM_PTE_AP(AP_RWNA
)) {
10502 tmplate
= ((tmplate
& ~ARM_PTE_APMASK
) | ARM_PTE_AP(AP_RONA
));
10503 pte_set_was_writeable(tmplate
, true);
10508 if ((tmplate
& ARM_PTE_APMASK
) == pt_attr_leaf_rw(pt_attr
)) {
10509 tmplate
= ((tmplate
& ~ARM_PTE_APMASK
) | pt_attr_leaf_ro(pt_attr
));
10510 pte_set_was_writeable(tmplate
, true);
10517 #if MACH_ASSERT && XNU_MONITOR
10518 if (is_pte_xprr_protected(pmap
, spte
)) {
10519 if (pte_to_xprr_perm(spte
) != pte_to_xprr_perm(tmplate
)) {
10520 panic("%s: attempted to mutate an xPRR mapping pte_p=%p, pmap=%p, pv_h=%p, pve_p=%p, pte=0x%llx, tmplate=0x%llx, va=0x%llx, "
10521 "ppnum=0x%x, options=0x%x, allow_mode=0x%x",
10522 __FUNCTION__
, pte_p
, pmap
, pv_h
, pve_p
, (unsigned long long)spte
, (unsigned long long)tmplate
, (unsigned long long)va
,
10523 ppnum
, options
, allow_mode
);
10526 #endif /* MACH_ASSERT && XNU_MONITOR */
10528 if (result
&& update_pte
) {
10529 if (options
& PMAP_OPTIONS_NOFLUSH
) {
10530 WRITE_PTE_FAST(pte_p
, tmplate
);
10532 WRITE_PTE_STRONG(pte_p
, tmplate
);
10533 if (!flush_range
||
10534 ((flush_range
->ptfr_pmap
!= pmap
) || va
>= flush_range
->ptfr_end
|| va
< flush_range
->ptfr_start
)) {
10535 pmap_get_pt_ops(pmap
)->flush_tlb_region_async(va
,
10536 pt_attr_page_size(pt_attr
) * PAGE_RATIO
, pmap
);
10538 tlb_flush_needed
= TRUE
;
10543 pte_p
= PT_ENTRY_NULL
;
10544 if (pve_p
!= PV_ENTRY_NULL
) {
10545 pve_p
= PVE_NEXT_PTR(pve_next(pve_p
));
10550 * If we are using the same approach for ref and mod
10551 * faults on this PTE, do not clear the write fault;
10552 * this would cause both ref and mod to be set on the
10553 * page again, and prevent us from taking ANY read/write
10554 * fault on the mapping.
10556 if (clear_write_fault
&& !ref_aliases_mod
) {
10557 arm_clear_fast_fault(ppnum
, VM_PROT_WRITE
);
10559 if (tlb_flush_needed
) {
10561 /* Delayed flush. Signal to the caller that the flush is needed. */
10562 flush_range
->ptfr_flush_needed
= true;
10568 /* update global "reusable" status for this page */
10570 if ((options
& PMAP_OPTIONS_CLEAR_REUSABLE
) &&
10572 CLR_REUSABLE_PAGE(pai
);
10573 } else if ((options
& PMAP_OPTIONS_SET_REUSABLE
) &&
10575 SET_REUSABLE_PAGE(pai
);
10580 SET_MODFAULT_PAGE(pai
);
10583 SET_REFFAULT_PAGE(pai
);
10585 if (__probable(mustsynch
)) {
10591 MARK_AS_PMAP_TEXT
static boolean_t
10592 arm_force_fast_fault_internal(
10594 vm_prot_t allow_mode
,
10597 if (__improbable((options
& (PMAP_OPTIONS_FF_LOCKED
| PMAP_OPTIONS_NOFLUSH
)) != 0)) {
10598 panic("arm_force_fast_fault(0x%x, 0x%x, 0x%x): invalid options", ppnum
, allow_mode
, options
);
10600 return arm_force_fast_fault_with_flush_range(ppnum
, allow_mode
, options
, NULL
);
10604 * Routine: arm_force_fast_fault
10607 * Force all mappings for this page to fault according
10608 * to the access modes allowed, so we can gather ref/modify
10613 arm_force_fast_fault(
10615 vm_prot_t allow_mode
,
10617 __unused
void *arg
)
10619 pmap_paddr_t phys
= ptoa(ppnum
);
10621 assert(ppnum
!= vm_page_fictitious_addr
);
10623 if (!pa_valid(phys
)) {
10624 return FALSE
; /* Not a managed page. */
10628 return arm_force_fast_fault_ppl(ppnum
, allow_mode
, options
);
10630 return arm_force_fast_fault_internal(ppnum
, allow_mode
, options
);
10635 * Routine: arm_clear_fast_fault
10638 * Clear pending force fault for all mappings for this page based on
10639 * the observed fault type, update ref/modify bits.
10641 MARK_AS_PMAP_TEXT
static boolean_t
10642 arm_clear_fast_fault(
10644 vm_prot_t fault_type
)
10646 pmap_paddr_t pa
= ptoa(ppnum
);
10651 boolean_t tlb_flush_needed
= FALSE
;
10654 assert(ppnum
!= vm_page_fictitious_addr
);
10656 if (!pa_valid(pa
)) {
10657 return FALSE
; /* Not a managed page. */
10661 pai
= (int)pa_index(pa
);
10662 ASSERT_PVH_LOCKED(pai
);
10663 pv_h
= pai_to_pvh(pai
);
10665 pte_p
= PT_ENTRY_NULL
;
10666 pve_p
= PV_ENTRY_NULL
;
10667 if (pvh_test_type(pv_h
, PVH_TYPE_PTEP
)) {
10668 pte_p
= pvh_ptep(pv_h
);
10669 } else if (pvh_test_type(pv_h
, PVH_TYPE_PVEP
)) {
10670 pve_p
= pvh_list(pv_h
);
10673 while ((pve_p
!= PV_ENTRY_NULL
) || (pte_p
!= PT_ENTRY_NULL
)) {
10674 vm_map_address_t va
;
10676 pt_entry_t tmplate
;
10679 if (pve_p
!= PV_ENTRY_NULL
) {
10680 pte_p
= pve_get_ptep(pve_p
);
10683 if (pte_p
== PT_ENTRY_NULL
) {
10684 panic("pte_p is NULL: pve_p=%p ppnum=0x%x\n", pve_p
, ppnum
);
10686 #ifdef PVH_FLAG_IOMMU
10687 if ((vm_offset_t
)pte_p
& PVH_FLAG_IOMMU
) {
10691 if (*pte_p
== ARM_PTE_EMPTY
) {
10692 panic("pte is NULL: pte_p=%p ppnum=0x%x\n", pte_p
, ppnum
);
10695 pmap
= ptep_get_pmap(pte_p
);
10696 va
= ptep_get_va(pte_p
);
10698 assert(va
>= pmap
->min
&& va
< pmap
->max
);
10703 if ((fault_type
& VM_PROT_WRITE
) && (pte_was_writeable(spte
))) {
10705 if (pmap
== kernel_pmap
) {
10706 tmplate
= ((spte
& ~ARM_PTE_APMASK
) | ARM_PTE_AP(AP_RWNA
));
10708 assert(!pmap
->nested
); /* no write access in a nested pmap */
10709 tmplate
= ((spte
& ~ARM_PTE_APMASK
) | pt_attr_leaf_rw(pmap_get_pt_attr(pmap
)));
10713 tmplate
|= ARM_PTE_AF
;
10715 pte_set_was_writeable(tmplate
, false);
10716 pa_set_bits(pa
, PP_ATTR_REFERENCED
| PP_ATTR_MODIFIED
);
10717 } else if ((fault_type
& VM_PROT_READ
) && ((spte
& ARM_PTE_AF
) != ARM_PTE_AF
)) {
10718 tmplate
= spte
| ARM_PTE_AF
;
10721 pa_set_bits(pa
, PP_ATTR_REFERENCED
);
10725 #if MACH_ASSERT && XNU_MONITOR
10726 if (is_pte_xprr_protected(pmap
, spte
)) {
10727 if (pte_to_xprr_perm(spte
) != pte_to_xprr_perm(tmplate
)) {
10728 panic("%s: attempted to mutate an xPRR mapping pte_p=%p, pmap=%p, pv_h=%p, pve_p=%p, pte=0x%llx, tmplate=0x%llx, va=0x%llx, "
10729 "ppnum=0x%x, fault_type=0x%x",
10730 __FUNCTION__
, pte_p
, pmap
, pv_h
, pve_p
, (unsigned long long)spte
, (unsigned long long)tmplate
, (unsigned long long)va
,
10731 ppnum
, fault_type
);
10734 #endif /* MACH_ASSERT && XNU_MONITOR */
10736 if (spte
!= tmplate
) {
10737 if (spte
!= ARM_PTE_TYPE_FAULT
) {
10738 WRITE_PTE_STRONG(pte_p
, tmplate
);
10739 pmap_get_pt_ops(pmap
)->flush_tlb_region_async(va
,
10740 pt_attr_page_size(pmap_get_pt_attr(pmap
)) * PAGE_RATIO
, pmap
);
10741 tlb_flush_needed
= TRUE
;
10743 WRITE_PTE(pte_p
, tmplate
);
10744 __builtin_arm_isb(ISB_SY
);
10749 #ifdef PVH_FLAG_IOMMU
10752 pte_p
= PT_ENTRY_NULL
;
10753 if (pve_p
!= PV_ENTRY_NULL
) {
10754 pve_p
= PVE_NEXT_PTR(pve_next(pve_p
));
10757 if (tlb_flush_needed
) {
10764 * Determine if the fault was induced by software tracking of
10765 * modify/reference bits. If so, re-enable the mapping (and set
10766 * the appropriate bits).
10768 * Returns KERN_SUCCESS if the fault was induced and was
10769 * successfully handled.
10771 * Returns KERN_FAILURE if the fault was not induced and
10772 * the function was unable to deal with it.
10774 * Returns KERN_PROTECTION_FAILURE if the pmap layer explictly
10775 * disallows this type of access.
10777 MARK_AS_PMAP_TEXT
static kern_return_t
10778 arm_fast_fault_internal(
10780 vm_map_address_t va
,
10781 vm_prot_t fault_type
,
10782 __unused
bool was_af_fault
,
10783 __unused
bool from_user
)
10785 kern_return_t result
= KERN_FAILURE
;
10787 pt_entry_t spte
= ARM_PTE_TYPE_FAULT
;
10790 VALIDATE_PMAP(pmap
);
10792 pmap_lock_ro(pmap
);
10795 * If the entry doesn't exist, is completely invalid, or is already
10796 * valid, we can't fix it here.
10799 ptep
= pmap_pte(pmap
, va
);
10800 if (ptep
!= PT_ENTRY_NULL
) {
10802 spte
= *((volatile pt_entry_t
*)ptep
);
10804 pa
= pte_to_pa(spte
);
10806 if ((spte
== ARM_PTE_TYPE_FAULT
) ||
10807 ARM_PTE_IS_COMPRESSED(spte
, ptep
)) {
10808 pmap_unlock_ro(pmap
);
10812 if (!pa_valid(pa
)) {
10813 pmap_unlock_ro(pmap
);
10815 if (pmap_cache_attributes((ppnum_t
)atop(pa
)) & PP_ATTR_MONITOR
) {
10816 return KERN_PROTECTION_FAILURE
;
10821 pai
= (int)pa_index(pa
);
10826 pmap_unlock_ro(pmap
);
10831 if ((result
!= KERN_SUCCESS
) &&
10832 ((IS_REFFAULT_PAGE(pai
)) || ((fault_type
& VM_PROT_WRITE
) && IS_MODFAULT_PAGE(pai
)))) {
10834 * An attempted access will always clear ref/mod fault state, as
10835 * appropriate for the fault type. arm_clear_fast_fault will
10836 * update the associated PTEs for the page as appropriate; if
10837 * any PTEs are updated, we redrive the access. If the mapping
10838 * does not actually allow for the attempted access, the
10839 * following fault will (hopefully) fail to update any PTEs, and
10840 * thus cause arm_fast_fault to decide that it failed to handle
10843 if (IS_REFFAULT_PAGE(pai
)) {
10844 CLR_REFFAULT_PAGE(pai
);
10846 if ((fault_type
& VM_PROT_WRITE
) && IS_MODFAULT_PAGE(pai
)) {
10847 CLR_MODFAULT_PAGE(pai
);
10850 if (arm_clear_fast_fault((ppnum_t
)atop(pa
), fault_type
)) {
10852 * Should this preserve KERN_PROTECTION_FAILURE? The
10853 * cost of not doing so is a another fault in a case
10854 * that should already result in an exception.
10856 result
= KERN_SUCCESS
;
10861 * If the PTE already has sufficient permissions, we can report the fault as handled.
10862 * This may happen, for example, if multiple threads trigger roughly simultaneous faults
10863 * on mappings of the same page
10865 if ((result
== KERN_FAILURE
) && (spte
& ARM_PTE_AF
)) {
10866 uintptr_t ap_ro
, ap_rw
, ap_x
;
10867 if (pmap
== kernel_pmap
) {
10868 ap_ro
= ARM_PTE_AP(AP_RONA
);
10869 ap_rw
= ARM_PTE_AP(AP_RWNA
);
10872 ap_ro
= pt_attr_leaf_ro(pmap_get_pt_attr(pmap
));
10873 ap_rw
= pt_attr_leaf_rw(pmap_get_pt_attr(pmap
));
10874 ap_x
= pt_attr_leaf_x(pmap_get_pt_attr(pmap
));
10877 * NOTE: this doesn't currently handle user-XO mappings. Depending upon the
10878 * hardware they may be xPRR-protected, in which case they'll be handled
10879 * by the is_pte_xprr_protected() case above. Additionally, the exception
10880 * handling path currently does not call arm_fast_fault() without at least
10881 * VM_PROT_READ in fault_type.
10883 if (((spte
& ARM_PTE_APMASK
) == ap_rw
) ||
10884 (!(fault_type
& VM_PROT_WRITE
) && ((spte
& ARM_PTE_APMASK
) == ap_ro
))) {
10885 if (!(fault_type
& VM_PROT_EXECUTE
) || ((spte
& ARM_PTE_XMASK
) == ap_x
)) {
10886 result
= KERN_SUCCESS
;
10892 pmap_unlock_ro(pmap
);
10899 vm_map_address_t va
,
10900 vm_prot_t fault_type
,
10902 __unused
bool from_user
)
10904 kern_return_t result
= KERN_FAILURE
;
10906 if (va
< pmap
->min
|| va
>= pmap
->max
) {
10910 PMAP_TRACE(3, PMAP_CODE(PMAP__FAST_FAULT
) | DBG_FUNC_START
,
10911 VM_KERNEL_ADDRHIDE(pmap
), VM_KERNEL_ADDRHIDE(va
), fault_type
,
10914 #if (__ARM_VMSA__ == 7)
10915 if (pmap
!= kernel_pmap
) {
10916 pmap_cpu_data_t
*cpu_data_ptr
= pmap_get_cpu_data();
10918 pmap_t cur_user_pmap
;
10920 cur_pmap
= current_pmap();
10921 cur_user_pmap
= cpu_data_ptr
->cpu_user_pmap
;
10923 if ((cur_user_pmap
== cur_pmap
) && (cur_pmap
== pmap
)) {
10924 if (cpu_data_ptr
->cpu_user_pmap_stamp
!= pmap
->stamp
) {
10925 pmap_set_pmap(pmap
, current_thread());
10926 result
= KERN_SUCCESS
;
10934 result
= arm_fast_fault_ppl(pmap
, va
, fault_type
, was_af_fault
, from_user
);
10936 result
= arm_fast_fault_internal(pmap
, va
, fault_type
, was_af_fault
, from_user
);
10939 #if (__ARM_VMSA__ == 7)
10943 PMAP_TRACE(3, PMAP_CODE(PMAP__FAST_FAULT
) | DBG_FUNC_END
, result
);
10953 bcopy_phys((addr64_t
) (ptoa(psrc
)),
10954 (addr64_t
) (ptoa(pdst
)),
10960 * pmap_copy_page copies the specified (machine independent) pages.
10963 pmap_copy_part_page(
10965 vm_offset_t src_offset
,
10967 vm_offset_t dst_offset
,
10970 bcopy_phys((addr64_t
) (ptoa(psrc
) + src_offset
),
10971 (addr64_t
) (ptoa(pdst
) + dst_offset
),
10977 * pmap_zero_page zeros the specified (machine independent) page.
10983 assert(pn
!= vm_page_fictitious_addr
);
10984 bzero_phys((addr64_t
) ptoa(pn
), PAGE_SIZE
);
10988 * pmap_zero_part_page
10989 * zeros the specified (machine independent) part of a page.
10992 pmap_zero_part_page(
10994 vm_offset_t offset
,
10997 assert(pn
!= vm_page_fictitious_addr
);
10998 assert(offset
+ len
<= PAGE_SIZE
);
10999 bzero_phys((addr64_t
) (ptoa(pn
) + offset
), len
);
11006 pt_entry_t
*ptep
, pte
;
11008 ptep
= pmap_pte(kernel_pmap
, LOWGLOBAL_ALIAS
);
11009 assert(ptep
!= PT_ENTRY_NULL
);
11010 assert(*ptep
== ARM_PTE_EMPTY
);
11012 pte
= pa_to_pte(ml_static_vtop((vm_offset_t
)&lowGlo
)) | AP_RONA
| ARM_PTE_NX
| ARM_PTE_PNX
| ARM_PTE_AF
| ARM_PTE_TYPE
;
11013 #if __ARM_KERNEL_PROTECT__
11015 #endif /* __ARM_KERNEL_PROTECT__ */
11016 pte
|= ARM_PTE_ATTRINDX(CACHE_ATTRINDX_WRITEBACK
);
11017 #if (__ARM_VMSA__ > 7)
11018 pte
|= ARM_PTE_SH(SH_OUTER_MEMORY
);
11023 FLUSH_PTE_RANGE(ptep
, (ptep
+ 1));
11024 PMAP_UPDATE_TLBS(kernel_pmap
, LOWGLOBAL_ALIAS
, LOWGLOBAL_ALIAS
+ PAGE_SIZE
, false);
11028 pmap_cpu_windows_copy_addr(int cpu_num
, unsigned int index
)
11030 if (__improbable(index
>= CPUWINDOWS_MAX
)) {
11031 panic("%s: invalid index %u", __func__
, index
);
11033 return (vm_offset_t
)(CPUWINDOWS_BASE
+ (PAGE_SIZE
* ((CPUWINDOWS_MAX
* cpu_num
) + index
)));
11036 MARK_AS_PMAP_TEXT
static unsigned int
11037 pmap_map_cpu_windows_copy_internal(
11040 unsigned int wimg_bits
)
11042 pt_entry_t
*ptep
= NULL
, pte
;
11043 pmap_cpu_data_t
*pmap_cpu_data
= pmap_get_cpu_data();
11044 unsigned int cpu_num
;
11046 vm_offset_t cpu_copywindow_vaddr
= 0;
11047 bool need_strong_sync
= false;
11050 unsigned int cacheattr
= (!pa_valid(ptoa(pn
)) ? pmap_cache_attributes(pn
) : 0);
11051 need_strong_sync
= ((cacheattr
& PMAP_IO_RANGE_STRONG_SYNC
) != 0);
11055 #ifdef __ARM_COHERENT_IO__
11056 if (__improbable(pa_valid(ptoa(pn
)) && !pmap_ppl_disable
)) {
11057 panic("%s: attempted to map a managed page, "
11058 "pn=%u, prot=0x%x, wimg_bits=0x%x",
11060 pn
, prot
, wimg_bits
);
11062 if (__improbable((cacheattr
& PP_ATTR_MONITOR
) && (prot
!= VM_PROT_READ
) && !pmap_ppl_disable
)) {
11063 panic("%s: attempt to map PPL-protected I/O address 0x%llx as writable", __func__
, (uint64_t)ptoa(pn
));
11066 #else /* __ARM_COHERENT_IO__ */
11067 #error CPU copy windows are not properly supported with both the PPL and incoherent IO
11068 #endif /* __ARM_COHERENT_IO__ */
11069 #endif /* XNU_MONITOR */
11070 cpu_num
= pmap_cpu_data
->cpu_number
;
11072 for (i
= 0; i
< CPUWINDOWS_MAX
; i
++) {
11073 cpu_copywindow_vaddr
= pmap_cpu_windows_copy_addr(cpu_num
, i
);
11074 ptep
= pmap_pte(kernel_pmap
, cpu_copywindow_vaddr
);
11075 assert(!ARM_PTE_IS_COMPRESSED(*ptep
, ptep
));
11076 if (*ptep
== ARM_PTE_TYPE_FAULT
) {
11080 if (i
== CPUWINDOWS_MAX
) {
11081 panic("pmap_map_cpu_windows_copy: out of window\n");
11084 pte
= pa_to_pte(ptoa(pn
)) | ARM_PTE_TYPE
| ARM_PTE_AF
| ARM_PTE_NX
| ARM_PTE_PNX
;
11085 #if __ARM_KERNEL_PROTECT__
11087 #endif /* __ARM_KERNEL_PROTECT__ */
11089 pte
|= wimg_to_pte(wimg_bits
);
11091 if (prot
& VM_PROT_WRITE
) {
11092 pte
|= ARM_PTE_AP(AP_RWNA
);
11094 pte
|= ARM_PTE_AP(AP_RONA
);
11097 WRITE_PTE_FAST(ptep
, pte
);
11099 * Invalidate tlb. Cover nested cpu_copywindow_vaddr usage with the interrupted context
11100 * in pmap_unmap_cpu_windows_copy() after clearing the pte and before tlb invalidate.
11102 FLUSH_PTE_STRONG(ptep
);
11103 PMAP_UPDATE_TLBS(kernel_pmap
, cpu_copywindow_vaddr
, cpu_copywindow_vaddr
+ PAGE_SIZE
, pmap_cpu_data
->copywindow_strong_sync
[i
]);
11104 pmap_cpu_data
->copywindow_strong_sync
[i
] = need_strong_sync
;
11110 pmap_map_cpu_windows_copy(
11113 unsigned int wimg_bits
)
11116 return pmap_map_cpu_windows_copy_ppl(pn
, prot
, wimg_bits
);
11118 return pmap_map_cpu_windows_copy_internal(pn
, prot
, wimg_bits
);
11122 MARK_AS_PMAP_TEXT
static void
11123 pmap_unmap_cpu_windows_copy_internal(
11124 unsigned int index
)
11127 unsigned int cpu_num
;
11128 vm_offset_t cpu_copywindow_vaddr
= 0;
11129 pmap_cpu_data_t
*pmap_cpu_data
= pmap_get_cpu_data();
11131 cpu_num
= pmap_cpu_data
->cpu_number
;
11133 cpu_copywindow_vaddr
= pmap_cpu_windows_copy_addr(cpu_num
, index
);
11134 /* Issue full-system DSB to ensure prior operations on the per-CPU window
11135 * (which are likely to have been on I/O memory) are complete before
11136 * tearing down the mapping. */
11137 __builtin_arm_dsb(DSB_SY
);
11138 ptep
= pmap_pte(kernel_pmap
, cpu_copywindow_vaddr
);
11139 WRITE_PTE_STRONG(ptep
, ARM_PTE_TYPE_FAULT
);
11140 PMAP_UPDATE_TLBS(kernel_pmap
, cpu_copywindow_vaddr
, cpu_copywindow_vaddr
+ PAGE_SIZE
, pmap_cpu_data
->copywindow_strong_sync
[index
]);
11144 pmap_unmap_cpu_windows_copy(
11145 unsigned int index
)
11148 return pmap_unmap_cpu_windows_copy_ppl(index
);
11150 return pmap_unmap_cpu_windows_copy_internal(index
);
11156 MARK_AS_PMAP_TEXT
void
11157 pmap_invoke_with_page(
11158 ppnum_t page_number
,
11160 void (*callback
)(void *ctx
, ppnum_t page_number
, const void *page
))
11162 #pragma unused(page_number, ctx, callback)
11166 * Loop over every pmap_io_range (I/O ranges marked as owned by
11167 * the PPL in the device tree) and conditionally call callback() on each range
11168 * that needs to be included in the hibernation image.
11170 * @param ctx Will be passed as-is into the callback method. Use NULL if no
11171 * context is needed in the callback.
11172 * @param callback Callback function invoked on each range (gated by flag).
11174 MARK_AS_PMAP_TEXT
void
11175 pmap_hibernate_invoke(void *ctx
, void (*callback
)(void *ctx
, uint64_t addr
, uint64_t len
))
11177 for (unsigned int i
= 0; i
< num_io_rgns
; ++i
) {
11178 if (io_attr_table
[i
].wimg
& PMAP_IO_RANGE_NEEDS_HIBERNATING
) {
11179 callback(ctx
, io_attr_table
[i
].addr
, io_attr_table
[i
].len
);
11185 * Set the HASHED pv_head_table flag for the passed in physical page if it's a
11186 * PPL-owned page. Otherwise, do nothing.
11188 * @param addr Physical address of the page to set the HASHED flag on.
11190 MARK_AS_PMAP_TEXT
void
11191 pmap_set_ppl_hashed_flag(const pmap_paddr_t addr
)
11193 /* Ignore non-managed kernel memory. */
11194 if (!pa_valid(addr
)) {
11198 const int pai
= (int)pa_index(addr
);
11199 if (pp_attr_table
[pai
] & PP_ATTR_MONITOR
) {
11200 pv_entry_t
**pv_h
= pai_to_pvh(pai
);
11202 /* Mark that the PPL-owned page has been hashed into the hibernation image. */
11204 pvh_set_flags(pv_h
, pvh_get_flags(pv_h
) | PVH_FLAG_HASHED
);
11210 * Loop through every physical page in the system and clear out the HASHED flag
11211 * on every PPL-owned page. That flag is used to keep track of which pages have
11212 * been hashed into the hibernation image during the hibernation entry process.
11214 * The HASHED flag needs to be cleared out between hibernation cycles because the
11215 * pv_head_table and pp_attr_table's might have been copied into the hibernation
11216 * image with the HASHED flag set on certain pages. It's important to clear the
11217 * HASHED flag to ensure that the enforcement of all PPL-owned memory being hashed
11218 * into the hibernation image can't be compromised across hibernation cycles.
11220 MARK_AS_PMAP_TEXT
void
11221 pmap_clear_ppl_hashed_flag_all(void)
11223 const int last_index
= (int)pa_index(vm_last_phys
);
11224 pv_entry_t
**pv_h
= NULL
;
11226 for (int pai
= 0; pai
< last_index
; ++pai
) {
11227 pv_h
= pai_to_pvh(pai
);
11229 /* Test for PPL-owned pages that have the HASHED flag set in its pv_head_table entry. */
11230 if ((pvh_get_flags(pv_h
) & PVH_FLAG_HASHED
) &&
11231 (pp_attr_table
[pai
] & PP_ATTR_MONITOR
)) {
11233 pvh_set_flags(pv_h
, pvh_get_flags(pv_h
) & ~PVH_FLAG_HASHED
);
11240 * Enforce that all PPL-owned pages were hashed into the hibernation image. The
11241 * ppl_hib driver will call this after all wired pages have been copied into the
11242 * hibernation image.
11244 MARK_AS_PMAP_TEXT
void
11245 pmap_check_ppl_hashed_flag_all(void)
11247 const int last_index
= (int)pa_index(vm_last_phys
);
11248 pv_entry_t
**pv_h
= NULL
;
11250 for (int pai
= 0; pai
< last_index
; ++pai
) {
11251 pv_h
= pai_to_pvh(pai
);
11254 * The PMAP stacks are explicitly not saved into the image so skip checking
11255 * the pages that contain the PMAP stacks.
11257 const bool is_pmap_stack
= (pai
>= (int)pa_index(pmap_stacks_start_pa
)) &&
11258 (pai
< (int)pa_index(pmap_stacks_end_pa
));
11260 if (!is_pmap_stack
&&
11261 (pp_attr_table
[pai
] & PP_ATTR_MONITOR
) &&
11262 !(pvh_get_flags(pv_h
) & PVH_FLAG_HASHED
)) {
11263 panic("Found PPL-owned page that was not hashed into the hibernation image: pai %d", pai
);
11268 #endif /* XNU_MONITOR */
11271 * Indicate that a pmap is intended to be used as a nested pmap
11272 * within one or more larger address spaces. This must be set
11273 * before pmap_nest() is called with this pmap as the 'subordinate'.
11275 MARK_AS_PMAP_TEXT
static void
11276 pmap_set_nested_internal(
11279 VALIDATE_PMAP(pmap
);
11280 pmap
->nested
= TRUE
;
11281 pmap_get_pt_ops(pmap
)->free_id(pmap
);
11289 pmap_set_nested_ppl(pmap
);
11291 pmap_set_nested_internal(pmap
);
11296 * pmap_trim_range(pmap, start, end)
11298 * pmap = pmap to operate on
11299 * start = start of the range
11300 * end = end of the range
11302 * Attempts to deallocate TTEs for the given range in the nested range.
11304 MARK_AS_PMAP_TEXT
static void
11311 addr64_t nested_region_start
;
11312 addr64_t nested_region_end
;
11313 addr64_t adjusted_start
;
11314 addr64_t adjusted_end
;
11315 addr64_t adjust_offmask
;
11316 tt_entry_t
* tte_p
;
11317 pt_entry_t
* pte_p
;
11318 __unused
const pt_attr_t
* const pt_attr
= pmap_get_pt_attr(pmap
);
11320 if (__improbable(end
< start
)) {
11321 panic("%s: invalid address range, "
11322 "pmap=%p, start=%p, end=%p",
11324 pmap
, (void*)start
, (void*)end
);
11327 nested_region_start
= pmap
->nested_region_addr
;
11328 nested_region_end
= nested_region_start
+ pmap
->nested_region_size
;
11330 if (__improbable((start
< nested_region_start
) || (end
> nested_region_end
))) {
11331 panic("%s: range outside nested region %p-%p, "
11332 "pmap=%p, start=%p, end=%p",
11333 __func__
, (void *)nested_region_start
, (void *)nested_region_end
,
11334 pmap
, (void*)start
, (void*)end
);
11337 /* Contract the range to TT page boundaries. */
11338 adjust_offmask
= pt_attr_leaf_table_offmask(pt_attr
);
11339 adjusted_start
= ((start
+ adjust_offmask
) & ~adjust_offmask
);
11340 adjusted_end
= end
& ~adjust_offmask
;
11342 /* Iterate over the range, trying to remove TTEs. */
11343 for (cur
= adjusted_start
; (cur
< adjusted_end
) && (cur
>= adjusted_start
); cur
+= pt_attr_twig_size(pt_attr
)) {
11346 tte_p
= pmap_tte(pmap
, cur
);
11348 if (tte_p
== (tt_entry_t
*) NULL
) {
11352 if ((*tte_p
& ARM_TTE_TYPE_MASK
) == ARM_TTE_TYPE_TABLE
) {
11353 pte_p
= (pt_entry_t
*) ttetokv(*tte_p
);
11355 if ((ptep_get_info(pte_p
)->refcnt
== 0) &&
11356 (pmap
!= kernel_pmap
)) {
11357 if (pmap
->nested
== TRUE
) {
11358 /* Deallocate for the nested map. */
11359 pmap_tte_deallocate(pmap
, cur
, cur
+ PAGE_SIZE
, false, tte_p
, pt_attr_twig_level(pt_attr
));
11361 /* Just remove for the parent map. */
11362 pmap_tte_remove(pmap
, cur
, cur
+ PAGE_SIZE
, false, tte_p
, pt_attr_twig_level(pt_attr
));
11371 #if (__ARM_VMSA__ > 7)
11372 /* Remove empty L2 TTs. */
11373 adjusted_start
= ((start
+ pt_attr_ln_offmask(pt_attr
, PMAP_TT_L1_LEVEL
)) & ~pt_attr_ln_offmask(pt_attr
, PMAP_TT_L1_LEVEL
));
11374 adjusted_end
= end
& ~pt_attr_ln_offmask(pt_attr
, PMAP_TT_L1_LEVEL
);
11376 for (cur
= adjusted_start
; (cur
< adjusted_end
) && (cur
>= adjusted_start
); cur
+= pt_attr_ln_size(pt_attr
, PMAP_TT_L1_LEVEL
)) {
11377 /* For each L1 entry in our range... */
11380 bool remove_tt1e
= true;
11381 tt_entry_t
* tt1e_p
= pmap_tt1e(pmap
, cur
);
11382 tt_entry_t
* tt2e_start
;
11383 tt_entry_t
* tt2e_end
;
11384 tt_entry_t
* tt2e_p
;
11387 if (tt1e_p
== NULL
) {
11394 if (tt1e
== ARM_TTE_TYPE_FAULT
) {
11399 tt2e_start
= &((tt_entry_t
*) phystokv(tt1e
& ARM_TTE_TABLE_MASK
))[0];
11400 tt2e_end
= &tt2e_start
[pt_attr_page_size(pt_attr
) / sizeof(*tt2e_start
)];
11402 for (tt2e_p
= tt2e_start
; tt2e_p
< tt2e_end
; tt2e_p
++) {
11403 if (*tt2e_p
!= ARM_TTE_TYPE_FAULT
) {
11405 * If any TTEs are populated, don't remove the
11408 remove_tt1e
= false;
11413 pmap_tte_deallocate(pmap
, cur
, cur
+ PAGE_SIZE
, false, tt1e_p
, PMAP_TT_L1_LEVEL
);
11418 #endif /* (__ARM_VMSA__ > 7) */
11422 * pmap_trim_internal(grand, subord, vstart, size)
11424 * grand = pmap subord is nested in
11425 * subord = nested pmap
11426 * vstart = start of the used range in grand
11427 * size = size of the used range
11429 * Attempts to trim the shared region page tables down to only cover the given
11430 * range in subord and grand.
11432 MARK_AS_PMAP_TEXT
static void
11433 pmap_trim_internal(
11440 addr64_t adjust_offmask
;
11442 if (__improbable(os_add_overflow(vstart
, size
, &vend
))) {
11443 panic("%s: grand addr wraps around, "
11444 "grand=%p, subord=%p, vstart=%p, size=%#llx",
11445 __func__
, grand
, subord
, (void*)vstart
, size
);
11448 VALIDATE_PMAP(grand
);
11449 VALIDATE_PMAP(subord
);
11451 __unused
const pt_attr_t
* const pt_attr
= pmap_get_pt_attr(grand
);
11455 if (__improbable(!subord
->nested
)) {
11456 panic("%s: subord is not nestable, "
11457 "grand=%p, subord=%p, vstart=%p, size=%#llx",
11458 __func__
, grand
, subord
, (void*)vstart
, size
);
11461 if (__improbable(grand
->nested
)) {
11462 panic("%s: grand is nestable, "
11463 "grand=%p, subord=%p, vstart=%p, size=%#llx",
11464 __func__
, grand
, subord
, (void*)vstart
, size
);
11467 if (__improbable(grand
->nested_pmap
!= subord
)) {
11468 panic("%s: grand->nested != subord, "
11469 "grand=%p, subord=%p, vstart=%p, size=%#llx",
11470 __func__
, grand
, subord
, (void*)vstart
, size
);
11473 if (__improbable((size
!= 0) &&
11474 ((vstart
< grand
->nested_region_addr
) || (vend
> (grand
->nested_region_addr
+ grand
->nested_region_size
))))) {
11475 panic("%s: grand range not in nested region, "
11476 "grand=%p, subord=%p, vstart=%p, size=%#llx",
11477 __func__
, grand
, subord
, (void*)vstart
, size
);
11481 if (!grand
->nested_has_no_bounds_ref
) {
11482 assert(subord
->nested_bounds_set
);
11484 if (!grand
->nested_bounds_set
) {
11485 /* Inherit the bounds from subord. */
11486 grand
->nested_region_true_start
= subord
->nested_region_true_start
;
11487 grand
->nested_region_true_end
= subord
->nested_region_true_end
;
11488 grand
->nested_bounds_set
= true;
11491 pmap_unlock(subord
);
11495 if ((!subord
->nested_bounds_set
) && size
) {
11496 adjust_offmask
= pt_attr_leaf_table_offmask(pt_attr
);
11498 subord
->nested_region_true_start
= vstart
;
11499 subord
->nested_region_true_end
= vend
;
11500 subord
->nested_region_true_start
&= ~adjust_offmask
;
11502 if (__improbable(os_add_overflow(subord
->nested_region_true_end
, adjust_offmask
, &subord
->nested_region_true_end
))) {
11503 panic("%s: padded true end wraps around, "
11504 "grand=%p, subord=%p, vstart=%p, size=%#llx",
11505 __func__
, grand
, subord
, (void*)vstart
, size
);
11508 subord
->nested_region_true_end
&= ~adjust_offmask
;
11509 subord
->nested_bounds_set
= true;
11512 if (subord
->nested_bounds_set
) {
11513 /* Inherit the bounds from subord. */
11514 grand
->nested_region_true_start
= subord
->nested_region_true_start
;
11515 grand
->nested_region_true_end
= subord
->nested_region_true_end
;
11516 grand
->nested_bounds_set
= true;
11518 /* If we know the bounds, we can trim the pmap. */
11519 grand
->nested_has_no_bounds_ref
= false;
11520 pmap_unlock(subord
);
11522 /* Don't trim if we don't know the bounds. */
11523 pmap_unlock(subord
);
11527 /* Trim grand to only cover the given range. */
11528 pmap_trim_range(grand
, grand
->nested_region_addr
, grand
->nested_region_true_start
);
11529 pmap_trim_range(grand
, grand
->nested_region_true_end
, (grand
->nested_region_addr
+ grand
->nested_region_size
));
11531 /* Try to trim subord. */
11532 pmap_trim_subord(subord
);
11535 MARK_AS_PMAP_TEXT
static void
11536 pmap_trim_self(pmap_t pmap
)
11538 if (pmap
->nested_has_no_bounds_ref
&& pmap
->nested_pmap
) {
11539 /* If we have a no bounds ref, we need to drop it. */
11540 pmap_lock_ro(pmap
->nested_pmap
);
11541 pmap
->nested_has_no_bounds_ref
= false;
11542 boolean_t nested_bounds_set
= pmap
->nested_pmap
->nested_bounds_set
;
11543 vm_map_offset_t nested_region_true_start
= pmap
->nested_pmap
->nested_region_true_start
;
11544 vm_map_offset_t nested_region_true_end
= pmap
->nested_pmap
->nested_region_true_end
;
11545 pmap_unlock_ro(pmap
->nested_pmap
);
11547 if (nested_bounds_set
) {
11548 pmap_trim_range(pmap
, pmap
->nested_region_addr
, nested_region_true_start
);
11549 pmap_trim_range(pmap
, nested_region_true_end
, (pmap
->nested_region_addr
+ pmap
->nested_region_size
));
11552 * Try trimming the nested pmap, in case we had the
11555 pmap_trim_subord(pmap
->nested_pmap
);
11560 * pmap_trim_subord(grand, subord)
11562 * grand = pmap that we have nested subord in
11563 * subord = nested pmap we are attempting to trim
11565 * Trims subord if possible
11567 MARK_AS_PMAP_TEXT
static void
11568 pmap_trim_subord(pmap_t subord
)
11570 bool contract_subord
= false;
11574 subord
->nested_no_bounds_refcnt
--;
11576 if ((subord
->nested_no_bounds_refcnt
== 0) && (subord
->nested_bounds_set
)) {
11577 /* If this was the last no bounds reference, trim subord. */
11578 contract_subord
= true;
11581 pmap_unlock(subord
);
11583 if (contract_subord
) {
11584 pmap_trim_range(subord
, subord
->nested_region_addr
, subord
->nested_region_true_start
);
11585 pmap_trim_range(subord
, subord
->nested_region_true_end
, subord
->nested_region_addr
+ subord
->nested_region_size
);
11597 pmap_trim_ppl(grand
, subord
, vstart
, size
);
11599 pmap_ledger_check_balance(grand
);
11600 pmap_ledger_check_balance(subord
);
11602 pmap_trim_internal(grand
, subord
, vstart
, size
);
11608 pmap_sign_user_ptr_internal(void *value
, ptrauth_key key
, uint64_t discriminator
, uint64_t jop_key
)
11611 uint64_t current_intr_state
= pmap_interrupts_disable();
11613 uint64_t saved_jop_state
= ml_enable_user_jop_key(jop_key
);
11615 case ptrauth_key_asia
:
11616 res
= ptrauth_sign_unauthenticated(value
, ptrauth_key_asia
, discriminator
);
11618 case ptrauth_key_asda
:
11619 res
= ptrauth_sign_unauthenticated(value
, ptrauth_key_asda
, discriminator
);
11622 panic("attempt to sign user pointer without process independent key");
11624 ml_disable_user_jop_key(jop_key
, saved_jop_state
);
11626 pmap_interrupts_restore(current_intr_state
);
11632 pmap_sign_user_ptr(void *value
, ptrauth_key key
, uint64_t discriminator
, uint64_t jop_key
)
11634 return pmap_sign_user_ptr_internal(value
, key
, discriminator
, jop_key
);
11638 pmap_auth_user_ptr_internal(void *value
, ptrauth_key key
, uint64_t discriminator
, uint64_t jop_key
)
11640 if ((key
!= ptrauth_key_asia
) && (key
!= ptrauth_key_asda
)) {
11641 panic("attempt to auth user pointer without process independent key");
11645 uint64_t current_intr_state
= pmap_interrupts_disable();
11647 uint64_t saved_jop_state
= ml_enable_user_jop_key(jop_key
);
11648 res
= ml_auth_ptr_unchecked(value
, key
, discriminator
);
11649 ml_disable_user_jop_key(jop_key
, saved_jop_state
);
11651 pmap_interrupts_restore(current_intr_state
);
11657 pmap_auth_user_ptr(void *value
, ptrauth_key key
, uint64_t discriminator
, uint64_t jop_key
)
11659 return pmap_auth_user_ptr_internal(value
, key
, discriminator
, jop_key
);
11661 #endif /* HAS_APPLE_PAC */
11664 * kern_return_t pmap_nest(grand, subord, vstart, size)
11666 * grand = the pmap that we will nest subord into
11667 * subord = the pmap that goes into the grand
11668 * vstart = start of range in pmap to be inserted
11669 * size = Size of nest area (up to 16TB)
11671 * Inserts a pmap into another. This is used to implement shared segments.
11675 MARK_AS_PMAP_TEXT
static kern_return_t
11676 pmap_nest_internal(
11682 kern_return_t kr
= KERN_FAILURE
;
11683 vm_map_offset_t vaddr
;
11684 tt_entry_t
*stte_p
;
11685 tt_entry_t
*gtte_p
;
11687 unsigned int num_tte
;
11688 unsigned int nested_region_asid_bitmap_size
;
11689 unsigned int* nested_region_asid_bitmap
;
11690 int expand_options
= 0;
11691 bool deref_subord
= true;
11692 pmap_t __ptrauth_only subord_addr
;
11695 if (__improbable(os_add_overflow(vstart
, size
, &vend
))) {
11696 panic("%s: %p grand addr wraps around: 0x%llx + 0x%llx", __func__
, grand
, vstart
, size
);
11699 VALIDATE_PMAP(grand
);
11700 pmap_reference_internal(subord
); // This call will also validate subord
11702 __unused
const pt_attr_t
* const pt_attr
= pmap_get_pt_attr(grand
);
11703 assert(pmap_get_pt_attr(subord
) == pt_attr
);
11706 expand_options
|= PMAP_TT_ALLOCATE_NOWAIT
;
11709 if (__improbable(((size
| vstart
) & (pt_attr_leaf_table_offmask(pt_attr
))) != 0x0ULL
)) {
11710 panic("pmap_nest() pmap %p unaligned nesting request 0x%llx, 0x%llx\n", grand
, vstart
, size
);
11713 if (__improbable(!subord
->nested
)) {
11714 panic("%s: subordinate pmap %p is not nestable", __func__
, subord
);
11717 if (subord
->nested_region_asid_bitmap
== NULL
) {
11718 nested_region_asid_bitmap_size
= (unsigned int)(size
>> pt_attr_twig_shift(pt_attr
)) / (sizeof(unsigned int) * NBBY
);
11721 pmap_paddr_t pa
= 0;
11723 if (__improbable((nested_region_asid_bitmap_size
* sizeof(unsigned int)) > PAGE_SIZE
)) {
11724 panic("%s: nested_region_asid_bitmap_size=%u will not fit in a page, "
11725 "grand=%p, subord=%p, vstart=0x%llx, size=%llx",
11726 __FUNCTION__
, nested_region_asid_bitmap_size
,
11727 grand
, subord
, vstart
, size
);
11730 kr
= pmap_pages_alloc_zeroed(&pa
, PAGE_SIZE
, PMAP_PAGES_ALLOCATE_NOWAIT
);
11732 if (kr
!= KERN_SUCCESS
) {
11738 nested_region_asid_bitmap
= (unsigned int *)phystokv(pa
);
11740 nested_region_asid_bitmap
= kheap_alloc(KHEAP_DATA_BUFFERS
,
11741 nested_region_asid_bitmap_size
* sizeof(unsigned int),
11742 Z_WAITOK
| Z_ZERO
);
11746 if (subord
->nested_region_asid_bitmap
== NULL
) {
11747 subord
->nested_region_asid_bitmap_size
= nested_region_asid_bitmap_size
;
11748 subord
->nested_region_addr
= vstart
;
11749 subord
->nested_region_size
= (mach_vm_offset_t
) size
;
11752 * Ensure that the rest of the subord->nested_region_* fields are
11753 * initialized and visible before setting the nested_region_asid_bitmap
11754 * field (which is used as the flag to say that the rest are initialized).
11756 __builtin_arm_dmb(DMB_ISHST
);
11757 subord
->nested_region_asid_bitmap
= nested_region_asid_bitmap
;
11758 nested_region_asid_bitmap
= NULL
;
11760 pmap_unlock(subord
);
11761 if (nested_region_asid_bitmap
!= NULL
) {
11763 pmap_pages_free(kvtophys((vm_offset_t
)nested_region_asid_bitmap
), PAGE_SIZE
);
11765 kheap_free(KHEAP_DATA_BUFFERS
, nested_region_asid_bitmap
,
11766 nested_region_asid_bitmap_size
* sizeof(unsigned int));
11772 * Ensure subsequent reads of the subord->nested_region_* fields don't get
11773 * speculated before their initialization.
11775 __builtin_arm_dmb(DMB_ISHLD
);
11777 if ((subord
->nested_region_addr
+ subord
->nested_region_size
) < vend
) {
11779 unsigned int new_nested_region_asid_bitmap_size
;
11780 unsigned int* new_nested_region_asid_bitmap
;
11782 nested_region_asid_bitmap
= NULL
;
11783 nested_region_asid_bitmap_size
= 0;
11784 new_size
= vend
- subord
->nested_region_addr
;
11786 /* We explicitly add 1 to the bitmap allocation size in order to avoid issues with truncation. */
11787 new_nested_region_asid_bitmap_size
= (unsigned int)((new_size
>> pt_attr_twig_shift(pt_attr
)) / (sizeof(unsigned int) * NBBY
)) + 1;
11790 pmap_paddr_t pa
= 0;
11792 if (__improbable((new_nested_region_asid_bitmap_size
* sizeof(unsigned int)) > PAGE_SIZE
)) {
11793 panic("%s: new_nested_region_asid_bitmap_size=%u will not fit in a page, "
11794 "grand=%p, subord=%p, vstart=0x%llx, new_size=%llx",
11795 __FUNCTION__
, new_nested_region_asid_bitmap_size
,
11796 grand
, subord
, vstart
, new_size
);
11799 kr
= pmap_pages_alloc_zeroed(&pa
, PAGE_SIZE
, PMAP_PAGES_ALLOCATE_NOWAIT
);
11801 if (kr
!= KERN_SUCCESS
) {
11807 new_nested_region_asid_bitmap
= (unsigned int *)phystokv(pa
);
11809 new_nested_region_asid_bitmap
= kheap_alloc(KHEAP_DATA_BUFFERS
,
11810 new_nested_region_asid_bitmap_size
* sizeof(unsigned int),
11811 Z_WAITOK
| Z_ZERO
);
11814 if (subord
->nested_region_size
< new_size
) {
11815 bcopy(subord
->nested_region_asid_bitmap
,
11816 new_nested_region_asid_bitmap
, subord
->nested_region_asid_bitmap_size
);
11817 nested_region_asid_bitmap_size
= subord
->nested_region_asid_bitmap_size
;
11818 nested_region_asid_bitmap
= subord
->nested_region_asid_bitmap
;
11819 subord
->nested_region_asid_bitmap
= new_nested_region_asid_bitmap
;
11820 subord
->nested_region_asid_bitmap_size
= new_nested_region_asid_bitmap_size
;
11821 subord
->nested_region_size
= new_size
;
11822 new_nested_region_asid_bitmap
= NULL
;
11824 pmap_unlock(subord
);
11825 if (nested_region_asid_bitmap
!= NULL
) {
11827 pmap_pages_free(kvtophys((vm_offset_t
)nested_region_asid_bitmap
), PAGE_SIZE
);
11829 kheap_free(KHEAP_DATA_BUFFERS
, nested_region_asid_bitmap
,
11830 nested_region_asid_bitmap_size
* sizeof(unsigned int));
11833 if (new_nested_region_asid_bitmap
!= NULL
) {
11835 pmap_pages_free(kvtophys((vm_offset_t
)new_nested_region_asid_bitmap
), PAGE_SIZE
);
11837 kheap_free(KHEAP_DATA_BUFFERS
, new_nested_region_asid_bitmap
,
11838 new_nested_region_asid_bitmap_size
* sizeof(unsigned int));
11845 #if __has_feature(ptrauth_calls)
11846 subord_addr
= ptrauth_sign_unauthenticated(subord
,
11847 ptrauth_key_process_independent_data
,
11848 ptrauth_blend_discriminator(&grand
->nested_pmap
, ptrauth_string_discriminator("pmap.nested_pmap")));
11850 subord_addr
= subord
;
11851 #endif // __has_feature(ptrauth_calls)
11853 if (os_atomic_cmpxchg(&grand
->nested_pmap
, PMAP_NULL
, subord_addr
, relaxed
)) {
11855 * If this is grand's first nesting operation, keep the reference on subord.
11856 * It will be released by pmap_destroy_internal() when grand is destroyed.
11858 deref_subord
= false;
11860 if (!subord
->nested_bounds_set
) {
11862 * We are nesting without the shared regions bounds
11863 * being known. We'll have to trim the pmap later.
11865 grand
->nested_has_no_bounds_ref
= true;
11866 subord
->nested_no_bounds_refcnt
++;
11869 grand
->nested_region_addr
= vstart
;
11870 grand
->nested_region_size
= (mach_vm_offset_t
) size
;
11872 if (__improbable(grand
->nested_pmap
!= subord
)) {
11873 panic("pmap_nest() pmap %p has a nested pmap\n", grand
);
11874 } else if (__improbable(grand
->nested_region_addr
> vstart
)) {
11875 panic("pmap_nest() pmap %p : attempt to nest outside the nested region\n", grand
);
11876 } else if ((grand
->nested_region_addr
+ grand
->nested_region_size
) < vend
) {
11877 grand
->nested_region_size
= (mach_vm_offset_t
)(vstart
- grand
->nested_region_addr
+ size
);
11881 #if (__ARM_VMSA__ == 7)
11882 vaddr
= (vm_map_offset_t
) vstart
;
11883 num_tte
= size
>> ARM_TT_L1_SHIFT
;
11885 for (i
= 0; i
< num_tte
; i
++) {
11886 if (((subord
->nested_region_true_start
) > vaddr
) || ((subord
->nested_region_true_end
) <= vaddr
)) {
11890 stte_p
= pmap_tte(subord
, vaddr
);
11891 if ((stte_p
== (tt_entry_t
*)NULL
) || (((*stte_p
) & ARM_TTE_TYPE_MASK
) != ARM_TTE_TYPE_TABLE
)) {
11892 pmap_unlock(subord
);
11893 kr
= pmap_expand(subord
, vaddr
, expand_options
, PMAP_TT_L2_LEVEL
);
11895 if (kr
!= KERN_SUCCESS
) {
11902 pmap_unlock(subord
);
11904 stte_p
= pmap_tte(grand
, vaddr
);
11905 if (stte_p
== (tt_entry_t
*)NULL
) {
11906 pmap_unlock(grand
);
11907 kr
= pmap_expand(grand
, vaddr
, expand_options
, PMAP_TT_L1_LEVEL
);
11909 if (kr
!= KERN_SUCCESS
) {
11914 pmap_unlock(grand
);
11920 vaddr
+= ARM_TT_L1_SIZE
;
11924 vaddr
= (vm_map_offset_t
) vstart
;
11925 num_tte
= (unsigned int)(size
>> pt_attr_twig_shift(pt_attr
));
11927 for (i
= 0; i
< num_tte
; i
++) {
11928 if (((subord
->nested_region_true_start
) > vaddr
) || ((subord
->nested_region_true_end
) <= vaddr
)) {
11932 stte_p
= pmap_tte(subord
, vaddr
);
11933 if (stte_p
== PT_ENTRY_NULL
|| *stte_p
== ARM_TTE_EMPTY
) {
11934 pmap_unlock(subord
);
11935 kr
= pmap_expand(subord
, vaddr
, expand_options
, pt_attr_leaf_level(pt_attr
));
11937 if (kr
!= KERN_SUCCESS
) {
11945 vaddr
+= pt_attr_twig_size(pt_attr
);
11948 pmap_unlock(subord
);
11951 * copy tte's from subord pmap into grand pmap
11955 vaddr
= (vm_map_offset_t
) vstart
;
11958 #if (__ARM_VMSA__ == 7)
11959 for (i
= 0; i
< num_tte
; i
++) {
11960 if (((subord
->nested_region_true_start
) > vaddr
) || ((subord
->nested_region_true_end
) <= vaddr
)) {
11964 stte_p
= pmap_tte(subord
, vaddr
);
11965 gtte_p
= pmap_tte(grand
, vaddr
);
11969 vaddr
+= ARM_TT_L1_SIZE
;
11972 for (i
= 0; i
< num_tte
; i
++) {
11973 if (((subord
->nested_region_true_start
) > vaddr
) || ((subord
->nested_region_true_end
) <= vaddr
)) {
11977 stte_p
= pmap_tte(subord
, vaddr
);
11978 gtte_p
= pmap_tte(grand
, vaddr
);
11979 if (gtte_p
== PT_ENTRY_NULL
) {
11980 pmap_unlock(grand
);
11981 kr
= pmap_expand(grand
, vaddr
, expand_options
, pt_attr_twig_level(pt_attr
));
11984 if (kr
!= KERN_SUCCESS
) {
11988 gtte_p
= pmap_tt2e(grand
, vaddr
);
11993 vaddr
+= pt_attr_twig_size(pt_attr
);
12000 stte_p
= pmap_tte(grand
, vstart
);
12001 FLUSH_PTE_RANGE_STRONG(stte_p
, stte_p
+ num_tte
);
12002 PMAP_UPDATE_TLBS(grand
, vstart
, vend
, false);
12004 pmap_unlock(grand
);
12008 if (deref_subord
) {
12009 pmap_destroy_internal(subord
);
12021 kern_return_t kr
= KERN_FAILURE
;
12023 PMAP_TRACE(2, PMAP_CODE(PMAP__NEST
) | DBG_FUNC_START
,
12024 VM_KERNEL_ADDRHIDE(grand
), VM_KERNEL_ADDRHIDE(subord
),
12025 VM_KERNEL_ADDRHIDE(vstart
));
12028 while ((kr
= pmap_nest_ppl(grand
, subord
, vstart
, size
)) == KERN_RESOURCE_SHORTAGE
) {
12029 pmap_alloc_page_for_ppl(0);
12032 pmap_ledger_check_balance(grand
);
12033 pmap_ledger_check_balance(subord
);
12035 kr
= pmap_nest_internal(grand
, subord
, vstart
, size
);
12038 PMAP_TRACE(2, PMAP_CODE(PMAP__NEST
) | DBG_FUNC_END
, kr
);
12044 * kern_return_t pmap_unnest(grand, vaddr)
12046 * grand = the pmap that will have the virtual range unnested
12047 * vaddr = start of range in pmap to be unnested
12048 * size = size of range in pmap to be unnested
12058 return pmap_unnest_options(grand
, vaddr
, size
, 0);
12061 MARK_AS_PMAP_TEXT
static kern_return_t
12062 pmap_unnest_options_internal(
12066 unsigned int option
)
12068 vm_map_offset_t start
;
12069 vm_map_offset_t addr
;
12071 unsigned int current_index
;
12072 unsigned int start_index
;
12073 unsigned int max_index
;
12074 unsigned int num_tte
;
12078 if (__improbable(os_add_overflow(vaddr
, size
, &vend
))) {
12079 panic("%s: %p vaddr wraps around: 0x%llx + 0x%llx", __func__
, grand
, vaddr
, size
);
12082 VALIDATE_PMAP(grand
);
12084 __unused
const pt_attr_t
* const pt_attr
= pmap_get_pt_attr(grand
);
12086 if (((size
| vaddr
) & pt_attr_twig_offmask(pt_attr
)) != 0x0ULL
) {
12087 panic("pmap_unnest(): unaligned request");
12090 if ((option
& PMAP_UNNEST_CLEAN
) == 0) {
12091 if (grand
->nested_pmap
== NULL
) {
12092 panic("%s: %p has no nested pmap", __func__
, grand
);
12095 if ((vaddr
< grand
->nested_region_addr
) || (vend
> (grand
->nested_region_addr
+ grand
->nested_region_size
))) {
12096 panic("%s: %p: unnest request to region not-fully-nested region [%p, %p)", __func__
, grand
, (void*)vaddr
, (void*)vend
);
12099 pmap_lock(grand
->nested_pmap
);
12102 start_index
= (unsigned int)((vaddr
- grand
->nested_region_addr
) >> pt_attr_twig_shift(pt_attr
));
12103 max_index
= (unsigned int)(start_index
+ (size
>> pt_attr_twig_shift(pt_attr
)));
12104 num_tte
= (unsigned int)(size
>> pt_attr_twig_shift(pt_attr
));
12106 for (current_index
= start_index
, addr
= start
; current_index
< max_index
; current_index
++, addr
+= pt_attr_twig_size(pt_attr
)) {
12107 pt_entry_t
*bpte
, *epte
, *cpte
;
12109 if (addr
< grand
->nested_pmap
->nested_region_true_start
) {
12110 /* We haven't reached the interesting range. */
12114 if (addr
>= grand
->nested_pmap
->nested_region_true_end
) {
12115 /* We're done with the interesting range. */
12119 bpte
= pmap_pte(grand
->nested_pmap
, addr
);
12120 epte
= bpte
+ (pt_attr_leaf_index_mask(pt_attr
) >> pt_attr_leaf_shift(pt_attr
));
12122 if (!testbit(current_index
, (int *)grand
->nested_pmap
->nested_region_asid_bitmap
)) {
12123 setbit(current_index
, (int *)grand
->nested_pmap
->nested_region_asid_bitmap
);
12125 for (cpte
= bpte
; cpte
<= epte
; cpte
++) {
12128 boolean_t managed
= FALSE
;
12131 if ((*cpte
!= ARM_PTE_TYPE_FAULT
)
12132 && (!ARM_PTE_IS_COMPRESSED(*cpte
, cpte
))) {
12133 spte
= *((volatile pt_entry_t
*)cpte
);
12135 pa
= pte_to_pa(spte
);
12136 if (!pa_valid(pa
)) {
12139 pai
= (int)pa_index(pa
);
12141 spte
= *((volatile pt_entry_t
*)cpte
);
12142 pa
= pte_to_pa(spte
);
12143 if (pai
== (int)pa_index(pa
)) {
12145 break; // Leave the PVH locked as we'll unlock it after we update the PTE
12150 if (((spte
& ARM_PTE_NG
) != ARM_PTE_NG
)) {
12151 WRITE_PTE_FAST(cpte
, (spte
| ARM_PTE_NG
));
12155 ASSERT_PVH_LOCKED(pai
);
12162 FLUSH_PTE_RANGE_STRONG(bpte
, epte
);
12165 flush_mmu_tlb_region_asid_async(vaddr
, (unsigned)size
, grand
->nested_pmap
);
12168 pmap_unlock(grand
->nested_pmap
);
12174 * invalidate all pdes for segment at vaddr in pmap grand
12179 num_tte
= (unsigned int)(size
>> pt_attr_twig_shift(pt_attr
));
12181 for (i
= 0; i
< num_tte
; i
++, addr
+= pt_attr_twig_size(pt_attr
)) {
12182 if (addr
< grand
->nested_pmap
->nested_region_true_start
) {
12183 /* We haven't reached the interesting range. */
12187 if (addr
>= grand
->nested_pmap
->nested_region_true_end
) {
12188 /* We're done with the interesting range. */
12192 tte_p
= pmap_tte(grand
, addr
);
12193 *tte_p
= ARM_TTE_TYPE_FAULT
;
12196 tte_p
= pmap_tte(grand
, start
);
12197 FLUSH_PTE_RANGE_STRONG(tte_p
, tte_p
+ num_tte
);
12198 PMAP_UPDATE_TLBS(grand
, start
, vend
, false);
12200 pmap_unlock(grand
);
12202 return KERN_SUCCESS
;
12206 pmap_unnest_options(
12210 unsigned int option
)
12212 kern_return_t kr
= KERN_FAILURE
;
12214 PMAP_TRACE(2, PMAP_CODE(PMAP__UNNEST
) | DBG_FUNC_START
,
12215 VM_KERNEL_ADDRHIDE(grand
), VM_KERNEL_ADDRHIDE(vaddr
));
12218 kr
= pmap_unnest_options_ppl(grand
, vaddr
, size
, option
);
12220 kr
= pmap_unnest_options_internal(grand
, vaddr
, size
, option
);
12223 PMAP_TRACE(2, PMAP_CODE(PMAP__UNNEST
) | DBG_FUNC_END
, kr
);
12229 pmap_adjust_unnest_parameters(
12231 __unused vm_map_offset_t
*s
,
12232 __unused vm_map_offset_t
*e
)
12234 return TRUE
; /* to get to log_unnest_badness()... */
12238 * disable no-execute capability on
12239 * the specified pmap
12241 #if DEVELOPMENT || DEBUG
12246 pmap
->nx_enabled
= FALSE
;
12251 __unused pmap_t pmap
)
12257 * flush a range of hardware TLB entries.
12258 * NOTE: assumes the smallest TLB entry in use will be for
12259 * an ARM small page (4K).
12262 #define ARM_FULL_TLB_FLUSH_THRESHOLD 64
12264 #if __ARM_RANGE_TLBI__
12265 #define ARM64_RANGE_TLB_FLUSH_THRESHOLD 1
12266 #define ARM64_FULL_TLB_FLUSH_THRESHOLD ARM64_TLB_RANGE_PAGES
12268 #define ARM64_FULL_TLB_FLUSH_THRESHOLD 256
12269 #endif // __ARM_RANGE_TLBI__
12272 flush_mmu_tlb_region_asid_async(
12277 #if (__ARM_VMSA__ == 7)
12278 vm_offset_t end
= va
+ length
;
12281 asid
= pmap
->hw_asid
;
12283 if (length
/ ARM_SMALL_PAGE_SIZE
> ARM_FULL_TLB_FLUSH_THRESHOLD
) {
12284 boolean_t flush_all
= FALSE
;
12286 if ((asid
== 0) || (pmap
->nested
== TRUE
)) {
12290 flush_mmu_tlb_async();
12292 flush_mmu_tlb_asid_async(asid
);
12297 if (pmap
->nested
== TRUE
) {
12298 #if !__ARM_MP_EXT__
12301 va
= arm_trunc_page(va
);
12303 flush_mmu_tlb_mva_entries_async(va
);
12304 va
+= ARM_SMALL_PAGE_SIZE
;
12309 va
= arm_trunc_page(va
) | (asid
& 0xff);
12310 flush_mmu_tlb_entries_async(va
, end
);
12313 unsigned long pmap_page_shift
= pt_attr_leaf_shift(pmap_get_pt_attr(pmap
));
12314 const uint64_t pmap_page_size
= 1ULL << pmap_page_shift
;
12315 ppnum_t npages
= (ppnum_t
)(length
>> pmap_page_shift
);
12318 asid
= pmap
->hw_asid
;
12320 if (npages
> ARM64_FULL_TLB_FLUSH_THRESHOLD
) {
12321 boolean_t flush_all
= FALSE
;
12323 if ((asid
== 0) || (pmap
->nested
== TRUE
)) {
12327 flush_mmu_tlb_async();
12329 flush_mmu_tlb_asid_async((uint64_t)asid
<< TLBI_ASID_SHIFT
);
12333 #if __ARM_RANGE_TLBI__
12334 if (npages
> ARM64_RANGE_TLB_FLUSH_THRESHOLD
) {
12335 va
= generate_rtlbi_param(npages
, asid
, va
, pmap_page_shift
);
12336 if (pmap
->nested
== TRUE
) {
12337 flush_mmu_tlb_allrange_async(va
);
12339 flush_mmu_tlb_range_async(va
);
12344 vm_offset_t end
= tlbi_asid(asid
) | tlbi_addr(va
+ length
);
12345 va
= tlbi_asid(asid
) | tlbi_addr(va
);
12347 if (pmap
->nested
== TRUE
) {
12348 flush_mmu_tlb_allentries_async(va
, end
, pmap_page_size
);
12350 flush_mmu_tlb_entries_async(va
, end
, pmap_page_size
);
12356 MARK_AS_PMAP_TEXT
static void
12357 flush_mmu_tlb_full_asid_async(pmap_t pmap
)
12359 #if (__ARM_VMSA__ == 7)
12360 flush_mmu_tlb_asid_async(pmap
->hw_asid
);
12361 #else /* (__ARM_VMSA__ == 7) */
12362 flush_mmu_tlb_asid_async((uint64_t)(pmap
->hw_asid
) << TLBI_ASID_SHIFT
);
12363 #endif /* (__ARM_VMSA__ == 7) */
12367 flush_mmu_tlb_region(
12371 flush_mmu_tlb_region_asid_async(va
, length
, kernel_pmap
);
12375 static pmap_io_range_t
*
12376 pmap_find_io_attr(pmap_paddr_t paddr
)
12378 pmap_io_range_t find_range
= {.addr
= paddr
& ~PAGE_MASK
, .len
= PAGE_SIZE
};
12379 unsigned int begin
= 0, end
= num_io_rgns
- 1;
12380 if ((num_io_rgns
== 0) || (paddr
< io_attr_table
[begin
].addr
) ||
12381 (paddr
>= (io_attr_table
[end
].addr
+ io_attr_table
[end
].len
))) {
12386 unsigned int middle
= (begin
+ end
) / 2;
12387 int cmp
= cmp_io_rgns(&find_range
, &io_attr_table
[middle
]);
12389 return &io_attr_table
[middle
];
12390 } else if (begin
== end
) {
12392 } else if (cmp
> 0) {
12393 begin
= middle
+ 1;
12403 pmap_cache_attributes(
12406 pmap_paddr_t paddr
;
12408 unsigned int result
;
12409 pp_attr_t pp_attr_current
;
12413 assert(vm_last_phys
> vm_first_phys
); // Check that pmap has been bootstrapped
12415 if (!pa_valid(paddr
)) {
12416 pmap_io_range_t
*io_rgn
= pmap_find_io_attr(paddr
);
12417 return (io_rgn
== NULL
) ? VM_WIMG_IO
: io_rgn
->wimg
;
12420 result
= VM_WIMG_DEFAULT
;
12422 pai
= (int)pa_index(paddr
);
12424 pp_attr_current
= pp_attr_table
[pai
];
12425 if (pp_attr_current
& PP_ATTR_WIMG_MASK
) {
12426 result
= pp_attr_current
& PP_ATTR_WIMG_MASK
;
12431 MARK_AS_PMAP_TEXT
static void
12432 pmap_sync_wimg(ppnum_t pn
, unsigned int wimg_bits_prev
, unsigned int wimg_bits_new
)
12434 if ((wimg_bits_prev
!= wimg_bits_new
)
12435 && ((wimg_bits_prev
== VM_WIMG_COPYBACK
)
12436 || ((wimg_bits_prev
== VM_WIMG_INNERWBACK
)
12437 && (wimg_bits_new
!= VM_WIMG_COPYBACK
))
12438 || ((wimg_bits_prev
== VM_WIMG_WTHRU
)
12439 && ((wimg_bits_new
!= VM_WIMG_COPYBACK
) || (wimg_bits_new
!= VM_WIMG_INNERWBACK
))))) {
12440 pmap_sync_page_attributes_phys(pn
);
12443 if ((wimg_bits_new
== VM_WIMG_RT
) && (wimg_bits_prev
!= VM_WIMG_RT
)) {
12444 pmap_force_dcache_clean(phystokv(ptoa(pn
)), PAGE_SIZE
);
12448 MARK_AS_PMAP_TEXT
static __unused
void
12449 pmap_update_compressor_page_internal(ppnum_t pn
, unsigned int prev_cacheattr
, unsigned int new_cacheattr
)
12451 pmap_paddr_t paddr
= ptoa(pn
);
12452 int pai
= (int)pa_index(paddr
);
12454 if (__improbable(!pa_valid(paddr
))) {
12455 panic("%s called on non-managed page 0x%08x", __func__
, pn
);
12461 if (__improbable(pa_test_monitor(paddr
))) {
12462 panic("%s invoked on PPL page 0x%08x", __func__
, pn
);
12466 pmap_update_cache_attributes_locked(pn
, new_cacheattr
);
12470 pmap_sync_wimg(pn
, prev_cacheattr
& VM_WIMG_MASK
, new_cacheattr
& VM_WIMG_MASK
);
12474 pmap_map_compressor_page(ppnum_t pn
)
12476 #if __ARM_PTE_PHYSMAP__
12477 unsigned int cacheattr
= pmap_cache_attributes(pn
) & VM_WIMG_MASK
;
12478 if (cacheattr
!= VM_WIMG_DEFAULT
) {
12480 pmap_update_compressor_page_ppl(pn
, cacheattr
, VM_WIMG_DEFAULT
);
12482 pmap_update_compressor_page_internal(pn
, cacheattr
, VM_WIMG_DEFAULT
);
12486 return (void*)phystokv(ptoa(pn
));
12490 pmap_unmap_compressor_page(ppnum_t pn __unused
, void *kva __unused
)
12492 #if __ARM_PTE_PHYSMAP__
12493 unsigned int cacheattr
= pmap_cache_attributes(pn
) & VM_WIMG_MASK
;
12494 if (cacheattr
!= VM_WIMG_DEFAULT
) {
12496 pmap_update_compressor_page_ppl(pn
, VM_WIMG_DEFAULT
, cacheattr
);
12498 pmap_update_compressor_page_internal(pn
, VM_WIMG_DEFAULT
, cacheattr
);
12504 MARK_AS_PMAP_TEXT
static boolean_t
12505 pmap_batch_set_cache_attributes_internal(
12507 unsigned int cacheattr
,
12508 unsigned int page_cnt
,
12509 unsigned int page_index
,
12513 pmap_paddr_t paddr
;
12515 pp_attr_t pp_attr_current
;
12516 pp_attr_t pp_attr_template
;
12517 unsigned int wimg_bits_prev
, wimg_bits_new
;
12519 if (cacheattr
& VM_WIMG_USE_DEFAULT
) {
12520 cacheattr
= VM_WIMG_DEFAULT
;
12523 if ((doit
== FALSE
) && (*res
== 0)) {
12524 pmap_pin_kernel_pages((vm_offset_t
)res
, sizeof(*res
));
12526 pmap_unpin_kernel_pages((vm_offset_t
)res
, sizeof(*res
));
12527 if (platform_cache_batch_wimg(cacheattr
& (VM_WIMG_MASK
), page_cnt
<< PAGE_SHIFT
) == FALSE
) {
12534 if (!pa_valid(paddr
)) {
12535 panic("pmap_batch_set_cache_attributes(): pn 0x%08x not managed", pn
);
12538 pai
= (int)pa_index(paddr
);
12543 if (pa_test_monitor(paddr
)) {
12544 panic("%s invoked on PPL page 0x%llx", __func__
, (uint64_t)paddr
);
12550 pp_attr_current
= pp_attr_table
[pai
];
12551 wimg_bits_prev
= VM_WIMG_DEFAULT
;
12552 if (pp_attr_current
& PP_ATTR_WIMG_MASK
) {
12553 wimg_bits_prev
= pp_attr_current
& PP_ATTR_WIMG_MASK
;
12556 pp_attr_template
= (pp_attr_current
& ~PP_ATTR_WIMG_MASK
) | PP_ATTR_WIMG(cacheattr
& (VM_WIMG_MASK
));
12562 /* WIMG bits should only be updated under the PVH lock, but we should do this in a CAS loop
12563 * to avoid losing simultaneous updates to other bits like refmod. */
12564 } while (!OSCompareAndSwap16(pp_attr_current
, pp_attr_template
, &pp_attr_table
[pai
]));
12566 wimg_bits_new
= VM_WIMG_DEFAULT
;
12567 if (pp_attr_template
& PP_ATTR_WIMG_MASK
) {
12568 wimg_bits_new
= pp_attr_template
& PP_ATTR_WIMG_MASK
;
12572 if (wimg_bits_new
!= wimg_bits_prev
) {
12573 pmap_update_cache_attributes_locked(pn
, cacheattr
);
12576 if ((wimg_bits_new
== VM_WIMG_RT
) && (wimg_bits_prev
!= VM_WIMG_RT
)) {
12577 pmap_force_dcache_clean(phystokv(paddr
), PAGE_SIZE
);
12580 if (wimg_bits_new
== VM_WIMG_COPYBACK
) {
12583 if (wimg_bits_prev
== wimg_bits_new
) {
12584 pmap_pin_kernel_pages((vm_offset_t
)res
, sizeof(*res
));
12586 pmap_unpin_kernel_pages((vm_offset_t
)res
, sizeof(*res
));
12587 if (!platform_cache_batch_wimg(wimg_bits_new
, (*res
) << PAGE_SHIFT
)) {
12594 if (page_cnt
== (page_index
+ 1)) {
12595 wimg_bits_prev
= VM_WIMG_COPYBACK
;
12596 if (((wimg_bits_prev
!= wimg_bits_new
))
12597 && ((wimg_bits_prev
== VM_WIMG_COPYBACK
)
12598 || ((wimg_bits_prev
== VM_WIMG_INNERWBACK
)
12599 && (wimg_bits_new
!= VM_WIMG_COPYBACK
))
12600 || ((wimg_bits_prev
== VM_WIMG_WTHRU
)
12601 && ((wimg_bits_new
!= VM_WIMG_COPYBACK
) || (wimg_bits_new
!= VM_WIMG_INNERWBACK
))))) {
12602 platform_cache_flush_wimg(wimg_bits_new
);
12610 pmap_batch_set_cache_attributes(
12612 unsigned int cacheattr
,
12613 unsigned int page_cnt
,
12614 unsigned int page_index
,
12619 return pmap_batch_set_cache_attributes_ppl(pn
, cacheattr
, page_cnt
, page_index
, doit
, res
);
12621 return pmap_batch_set_cache_attributes_internal(pn
, cacheattr
, page_cnt
, page_index
, doit
, res
);
12625 MARK_AS_PMAP_TEXT
static void
12626 pmap_set_cache_attributes_priv(
12628 unsigned int cacheattr
,
12629 boolean_t external __unused
)
12631 pmap_paddr_t paddr
;
12633 pp_attr_t pp_attr_current
;
12634 pp_attr_t pp_attr_template
;
12635 unsigned int wimg_bits_prev
, wimg_bits_new
;
12639 if (!pa_valid(paddr
)) {
12640 return; /* Not a managed page. */
12643 if (cacheattr
& VM_WIMG_USE_DEFAULT
) {
12644 cacheattr
= VM_WIMG_DEFAULT
;
12647 pai
= (int)pa_index(paddr
);
12652 if (external
&& pa_test_monitor(paddr
)) {
12653 panic("%s invoked on PPL page 0x%llx", __func__
, (uint64_t)paddr
);
12654 } else if (!external
&& !pa_test_monitor(paddr
)) {
12655 panic("%s invoked on non-PPL page 0x%llx", __func__
, (uint64_t)paddr
);
12660 pp_attr_current
= pp_attr_table
[pai
];
12661 wimg_bits_prev
= VM_WIMG_DEFAULT
;
12662 if (pp_attr_current
& PP_ATTR_WIMG_MASK
) {
12663 wimg_bits_prev
= pp_attr_current
& PP_ATTR_WIMG_MASK
;
12666 pp_attr_template
= (pp_attr_current
& ~PP_ATTR_WIMG_MASK
) | PP_ATTR_WIMG(cacheattr
& (VM_WIMG_MASK
));
12668 /* WIMG bits should only be updated under the PVH lock, but we should do this in a CAS loop
12669 * to avoid losing simultaneous updates to other bits like refmod. */
12670 } while (!OSCompareAndSwap16(pp_attr_current
, pp_attr_template
, &pp_attr_table
[pai
]));
12672 wimg_bits_new
= VM_WIMG_DEFAULT
;
12673 if (pp_attr_template
& PP_ATTR_WIMG_MASK
) {
12674 wimg_bits_new
= pp_attr_template
& PP_ATTR_WIMG_MASK
;
12677 if (wimg_bits_new
!= wimg_bits_prev
) {
12678 pmap_update_cache_attributes_locked(pn
, cacheattr
);
12683 pmap_sync_wimg(pn
, wimg_bits_prev
, wimg_bits_new
);
12686 MARK_AS_PMAP_TEXT
static void
12687 pmap_set_cache_attributes_internal(
12689 unsigned int cacheattr
)
12691 pmap_set_cache_attributes_priv(pn
, cacheattr
, TRUE
);
12695 pmap_set_cache_attributes(
12697 unsigned int cacheattr
)
12700 pmap_set_cache_attributes_ppl(pn
, cacheattr
);
12702 pmap_set_cache_attributes_internal(pn
, cacheattr
);
12706 MARK_AS_PMAP_TEXT
void
12707 pmap_update_cache_attributes_locked(
12709 unsigned attributes
)
12711 pmap_paddr_t phys
= ptoa(ppnum
);
12715 pt_entry_t tmplate
;
12717 boolean_t tlb_flush_needed
= FALSE
;
12719 PMAP_TRACE(2, PMAP_CODE(PMAP__UPDATE_CACHING
) | DBG_FUNC_START
, ppnum
, attributes
);
12721 if (pmap_panic_dev_wimg_on_managed
) {
12722 switch (attributes
& VM_WIMG_MASK
) {
12723 case VM_WIMG_IO
: // nGnRnE
12724 case VM_WIMG_POSTED
: // nGnRE
12725 /* supported on DRAM, but slow, so we disallow */
12727 case VM_WIMG_POSTED_REORDERED
: // nGRE
12728 case VM_WIMG_POSTED_COMBINED_REORDERED
: // GRE
12729 /* unsupported on DRAM */
12731 panic("%s: trying to use unsupported VM_WIMG type for managed page, VM_WIMG=%x, ppnum=%#x",
12732 __FUNCTION__
, attributes
& VM_WIMG_MASK
, ppnum
);
12736 /* not device type memory, all good */
12742 #if __ARM_PTE_PHYSMAP__
12743 vm_offset_t kva
= phystokv(phys
);
12744 pte_p
= pmap_pte(kernel_pmap
, kva
);
12747 tmplate
&= ~(ARM_PTE_ATTRINDXMASK
| ARM_PTE_SHMASK
);
12749 tmplate
|= (wimg_to_pte(attributes
) & ~ARM_PTE_XPRR_MASK
);
12751 tmplate
|= wimg_to_pte(attributes
);
12753 #if (__ARM_VMSA__ > 7)
12754 if (tmplate
& ARM_PTE_HINT_MASK
) {
12755 panic("%s: physical aperture PTE %p has hint bit set, va=%p, pte=0x%llx",
12756 __FUNCTION__
, pte_p
, (void *)kva
, tmplate
);
12759 WRITE_PTE_STRONG(pte_p
, tmplate
);
12760 flush_mmu_tlb_region_asid_async(kva
, PAGE_SIZE
, kernel_pmap
);
12761 tlb_flush_needed
= TRUE
;
12764 pai
= (unsigned int)pa_index(phys
);
12766 pv_h
= pai_to_pvh(pai
);
12768 pte_p
= PT_ENTRY_NULL
;
12769 pve_p
= PV_ENTRY_NULL
;
12770 if (pvh_test_type(pv_h
, PVH_TYPE_PTEP
)) {
12771 pte_p
= pvh_ptep(pv_h
);
12772 } else if (pvh_test_type(pv_h
, PVH_TYPE_PVEP
)) {
12773 pve_p
= pvh_list(pv_h
);
12774 pte_p
= PT_ENTRY_NULL
;
12777 while ((pve_p
!= PV_ENTRY_NULL
) || (pte_p
!= PT_ENTRY_NULL
)) {
12778 vm_map_address_t va
;
12781 if (pve_p
!= PV_ENTRY_NULL
) {
12782 pte_p
= pve_get_ptep(pve_p
);
12784 #ifdef PVH_FLAG_IOMMU
12785 if ((vm_offset_t
)pte_p
& PVH_FLAG_IOMMU
) {
12786 goto cache_skip_pve
;
12789 pmap
= ptep_get_pmap(pte_p
);
12790 va
= ptep_get_va(pte_p
);
12793 tmplate
&= ~(ARM_PTE_ATTRINDXMASK
| ARM_PTE_SHMASK
);
12794 tmplate
|= pmap_get_pt_ops(pmap
)->wimg_to_pte(attributes
);
12796 WRITE_PTE_STRONG(pte_p
, tmplate
);
12797 pmap_get_pt_ops(pmap
)->flush_tlb_region_async(va
,
12798 pt_attr_page_size(pmap_get_pt_attr(pmap
)) * PAGE_RATIO
, pmap
);
12799 tlb_flush_needed
= TRUE
;
12801 #ifdef PVH_FLAG_IOMMU
12804 pte_p
= PT_ENTRY_NULL
;
12805 if (pve_p
!= PV_ENTRY_NULL
) {
12806 pve_p
= PVE_NEXT_PTR(pve_next(pve_p
));
12809 if (tlb_flush_needed
) {
12810 pmap_sync_tlb((attributes
& VM_WIMG_MASK
) == VM_WIMG_RT
);
12813 PMAP_TRACE(2, PMAP_CODE(PMAP__UPDATE_CACHING
) | DBG_FUNC_END
, ppnum
, attributes
);
12816 #if (__ARM_VMSA__ == 7)
12818 pmap_create_sharedpages(vm_map_address_t
*kernel_data_addr
, vm_map_address_t
*kernel_text_addr
,
12819 vm_map_address_t
*user_commpage_addr
)
12824 assert(kernel_data_addr
!= NULL
);
12825 assert(kernel_text_addr
!= NULL
);
12826 assert(user_commpage_addr
!= NULL
);
12828 (void) pmap_pages_alloc_zeroed(&pa
, PAGE_SIZE
, 0);
12830 kr
= pmap_enter(kernel_pmap
, _COMM_PAGE_BASE_ADDRESS
, atop(pa
), VM_PROT_READ
| VM_PROT_WRITE
, VM_PROT_NONE
, VM_WIMG_USE_DEFAULT
, TRUE
);
12831 assert(kr
== KERN_SUCCESS
);
12833 *kernel_data_addr
= phystokv(pa
);
12834 // We don't have PFZ for 32 bit arm, always NULL
12835 *kernel_text_addr
= 0;
12836 *user_commpage_addr
= 0;
12839 #else /* __ARM_VMSA__ == 7 */
12844 vm_address_t address
,
12845 tt_entry_t
template)
12847 tt_entry_t
*ptep
, pte
;
12849 ptep
= pmap_tt3e(pmap
, address
);
12850 if (ptep
== NULL
) {
12851 panic("%s: no ptep?\n", __FUNCTION__
);
12855 pte
= tte_to_pa(pte
) | template;
12856 WRITE_PTE_STRONG(ptep
, pte
);
12859 /* Note absence of non-global bit */
12860 #define PMAP_COMM_PAGE_PTE_TEMPLATE (ARM_PTE_TYPE_VALID \
12861 | ARM_PTE_ATTRINDX(CACHE_ATTRINDX_WRITEBACK) \
12862 | ARM_PTE_SH(SH_INNER_MEMORY) | ARM_PTE_NX \
12863 | ARM_PTE_PNX | ARM_PTE_AP(AP_RORO) | ARM_PTE_AF)
12865 /* Note absence of non-global bit and no-execute bit. */
12866 #define PMAP_COMM_PAGE_TEXT_PTE_TEMPLATE (ARM_PTE_TYPE_VALID \
12867 | ARM_PTE_ATTRINDX(CACHE_ATTRINDX_WRITEBACK) \
12868 | ARM_PTE_SH(SH_INNER_MEMORY) | ARM_PTE_PNX \
12869 | ARM_PTE_AP(AP_RORO) | ARM_PTE_AF)
12872 pmap_create_sharedpages(vm_map_address_t
*kernel_data_addr
, vm_map_address_t
*kernel_text_addr
,
12873 vm_map_address_t
*user_text_addr
)
12876 pmap_paddr_t data_pa
= 0; // data address
12877 pmap_paddr_t text_pa
= 0; // text address
12879 *kernel_data_addr
= 0;
12880 *kernel_text_addr
= 0;
12881 *user_text_addr
= 0;
12884 data_pa
= pmap_alloc_page_for_kern(0);
12886 memset((char *) phystokv(data_pa
), 0, PAGE_SIZE
);
12888 text_pa
= pmap_alloc_page_for_kern(0);
12890 memset((char *) phystokv(text_pa
), 0, PAGE_SIZE
);
12893 #else /* XNU_MONITOR */
12894 (void) pmap_pages_alloc_zeroed(&data_pa
, PAGE_SIZE
, 0);
12896 (void) pmap_pages_alloc_zeroed(&text_pa
, PAGE_SIZE
, 0);
12899 #endif /* XNU_MONITOR */
12901 #ifdef CONFIG_XNUPOST
12903 * The kernel pmap maintains a user accessible mapping of the commpage
12906 kr
= pmap_enter(kernel_pmap
, _COMM_HIGH_PAGE64_BASE_ADDRESS
, (ppnum_t
)atop(data_pa
), VM_PROT_READ
, VM_PROT_NONE
, VM_WIMG_USE_DEFAULT
, TRUE
);
12907 assert(kr
== KERN_SUCCESS
);
12910 * This mapping should not be global (as we only expect to reference it
12913 pmap_update_tt3e(kernel_pmap
, _COMM_HIGH_PAGE64_BASE_ADDRESS
, PMAP_COMM_PAGE_PTE_TEMPLATE
| ARM_PTE_NG
);
12916 kasan_map_shadow(_COMM_HIGH_PAGE64_BASE_ADDRESS
, PAGE_SIZE
, true);
12918 #endif /* CONFIG_XNUPOST */
12921 * In order to avoid burning extra pages on mapping the shared page, we
12922 * create a dedicated pmap for the shared page. We forcibly nest the
12923 * translation tables from this pmap into other pmaps. The level we
12924 * will nest at depends on the MMU configuration (page size, TTBR range,
12925 * etc). Typically, this is at L1 for 4K tasks and L2 for 16K tasks.
12927 * Note that this is NOT "the nested pmap" (which is used to nest the
12930 * Note that we update parameters of the entry for our unique needs (NG
12933 sharedpage_pmap_default
= pmap_create_options(NULL
, 0x0, 0);
12934 assert(sharedpage_pmap_default
!= NULL
);
12936 /* The user 64-bit mapping... */
12937 kr
= pmap_enter_addr(sharedpage_pmap_default
, _COMM_PAGE64_BASE_ADDRESS
, data_pa
, VM_PROT_READ
, VM_PROT_NONE
, VM_WIMG_USE_DEFAULT
, TRUE
);
12938 assert(kr
== KERN_SUCCESS
);
12939 pmap_update_tt3e(sharedpage_pmap_default
, _COMM_PAGE64_BASE_ADDRESS
, PMAP_COMM_PAGE_PTE_TEMPLATE
);
12941 /* User mapping of comm page text section for 64 bit mapping only
12943 * We don't insert it into the 32 bit mapping because we don't want 32 bit
12944 * user processes to get this page mapped in, they should never call into
12947 * The data comm page is in a pre-reserved L3 VA range and the text commpage
12948 * is slid in the same L3 as the data commpage. It is either outside the
12949 * max of user VA or is pre-reserved in the vm_map_exec(). This means that
12950 * it is reserved and unavailable to mach VM for future mappings.
12952 const pt_attr_t
* const pt_attr
= pmap_get_pt_attr(sharedpage_pmap_default
);
12953 int num_ptes
= pt_attr_leaf_size(pt_attr
) >> PTE_SHIFT
;
12955 vm_map_address_t commpage_text_va
= 0;
12958 int text_leaf_index
= random() % num_ptes
;
12960 // Generate a VA for the commpage text with the same root and twig index as data
12961 // comm page, but with new leaf index we've just generated.
12962 commpage_text_va
= (_COMM_PAGE64_BASE_ADDRESS
& ~pt_attr_leaf_index_mask(pt_attr
));
12963 commpage_text_va
|= (text_leaf_index
<< pt_attr_leaf_shift(pt_attr
));
12964 } while (commpage_text_va
== _COMM_PAGE64_BASE_ADDRESS
); // Try again if we collide (should be unlikely)
12966 // Assert that this is empty
12967 __assert_only pt_entry_t
*ptep
= pmap_pte(sharedpage_pmap_default
, commpage_text_va
);
12968 assert(ptep
!= PT_ENTRY_NULL
);
12969 assert(*ptep
== ARM_TTE_EMPTY
);
12971 // At this point, we've found the address we want to insert our comm page at
12972 kr
= pmap_enter_addr(sharedpage_pmap_default
, commpage_text_va
, text_pa
, VM_PROT_READ
| VM_PROT_EXECUTE
, VM_PROT_NONE
, VM_WIMG_USE_DEFAULT
, TRUE
);
12973 assert(kr
== KERN_SUCCESS
);
12974 // Mark it as global page R/X so that it doesn't get thrown out on tlb flush
12975 pmap_update_tt3e(sharedpage_pmap_default
, commpage_text_va
, PMAP_COMM_PAGE_TEXT_PTE_TEMPLATE
);
12977 *user_text_addr
= commpage_text_va
;
12980 /* ...and the user 32-bit mapping. */
12981 kr
= pmap_enter_addr(sharedpage_pmap_default
, _COMM_PAGE32_BASE_ADDRESS
, data_pa
, VM_PROT_READ
, VM_PROT_NONE
, VM_WIMG_USE_DEFAULT
, TRUE
);
12982 assert(kr
== KERN_SUCCESS
);
12983 pmap_update_tt3e(sharedpage_pmap_default
, _COMM_PAGE32_BASE_ADDRESS
, PMAP_COMM_PAGE_PTE_TEMPLATE
);
12985 #if __ARM_MIXED_PAGE_SIZE__
12987 * To handle 4K tasks a new view/pmap of the shared page is needed. These are a
12988 * new set of page tables that point to the exact same 16K shared page as
12989 * before. Only the first 4K of the 16K shared page is mapped since that's
12990 * the only part that contains relevant data.
12992 sharedpage_pmap_4k
= pmap_create_options(NULL
, 0x0, PMAP_CREATE_FORCE_4K_PAGES
);
12993 assert(sharedpage_pmap_4k
!= NULL
);
12995 /* The user 64-bit mapping... */
12996 kr
= pmap_enter_addr(sharedpage_pmap_4k
, _COMM_PAGE64_BASE_ADDRESS
, data_pa
, VM_PROT_READ
, VM_PROT_NONE
, VM_WIMG_USE_DEFAULT
, TRUE
);
12997 assert(kr
== KERN_SUCCESS
);
12998 pmap_update_tt3e(sharedpage_pmap_4k
, _COMM_PAGE64_BASE_ADDRESS
, PMAP_COMM_PAGE_PTE_TEMPLATE
);
13000 /* ...and the user 32-bit mapping. */
13001 kr
= pmap_enter_addr(sharedpage_pmap_4k
, _COMM_PAGE32_BASE_ADDRESS
, data_pa
, VM_PROT_READ
, VM_PROT_NONE
, VM_WIMG_USE_DEFAULT
, TRUE
);
13002 assert(kr
== KERN_SUCCESS
);
13003 pmap_update_tt3e(sharedpage_pmap_4k
, _COMM_PAGE32_BASE_ADDRESS
, PMAP_COMM_PAGE_PTE_TEMPLATE
);
13007 /* For manipulation in kernel, go straight to physical page */
13008 *kernel_data_addr
= phystokv(data_pa
);
13009 *kernel_text_addr
= (text_pa
) ? phystokv(text_pa
) : 0;
13016 * Asserts to ensure that the TTEs we nest to map the shared page do not overlap
13017 * with user controlled TTEs for regions that aren't explicitly reserved by the
13018 * VM (e.g., _COMM_PAGE64_NESTING_START/_COMM_PAGE64_BASE_ADDRESS).
13020 #if (ARM_PGSHIFT == 14)
13021 static_assert((_COMM_PAGE32_BASE_ADDRESS
& ~ARM_TT_L2_OFFMASK
) >= VM_MAX_ADDRESS
);
13022 #elif (ARM_PGSHIFT == 12)
13023 static_assert((_COMM_PAGE32_BASE_ADDRESS
& ~ARM_TT_L1_OFFMASK
) >= VM_MAX_ADDRESS
);
13025 #error Nested shared page mapping is unsupported on this config
13028 MARK_AS_PMAP_TEXT
static kern_return_t
13029 pmap_insert_sharedpage_internal(
13032 kern_return_t kr
= KERN_SUCCESS
;
13033 vm_offset_t sharedpage_vaddr
;
13034 pt_entry_t
*ttep
, *src_ttep
;
13036 pmap_t sharedpage_pmap
= sharedpage_pmap_default
;
13038 const pt_attr_t
* const pt_attr
= pmap_get_pt_attr(pmap
);
13039 const unsigned int sharedpage_level
= pt_attr_sharedpage_level(pt_attr
);
13041 #if __ARM_MIXED_PAGE_SIZE__
13042 #if !__ARM_16K_PG__
13043 /* The following code assumes that sharedpage_pmap_default is a 16KB pmap. */
13044 #error "pmap_insert_sharedpage_internal requires a 16KB default kernel page size when __ARM_MIXED_PAGE_SIZE__ is enabled"
13045 #endif /* !__ARM_16K_PG__ */
13047 /* Choose the correct shared page pmap to use. */
13048 const uint64_t pmap_page_size
= pt_attr_page_size(pt_attr
);
13049 if (pmap_page_size
== 16384) {
13050 sharedpage_pmap
= sharedpage_pmap_default
;
13051 } else if (pmap_page_size
== 4096) {
13052 sharedpage_pmap
= sharedpage_pmap_4k
;
13054 panic("No shared page pmap exists for the wanted page size: %llu", pmap_page_size
);
13056 #endif /* __ARM_MIXED_PAGE_SIZE__ */
13058 VALIDATE_PMAP(pmap
);
13060 options
|= PMAP_OPTIONS_NOWAIT
;
13061 #endif /* XNU_MONITOR */
13063 #if _COMM_PAGE_AREA_LENGTH != PAGE_SIZE
13064 #error We assume a single page.
13067 if (pmap_is_64bit(pmap
)) {
13068 sharedpage_vaddr
= _COMM_PAGE64_BASE_ADDRESS
;
13070 sharedpage_vaddr
= _COMM_PAGE32_BASE_ADDRESS
;
13077 * For 4KB pages, we either "nest" at the level one page table (1GB) or level
13078 * two (2MB) depending on the address space layout. For 16KB pages, each level
13079 * one entry is 64GB, so we must go to the second level entry (32MB) in order
13082 * Note: This is not "nesting" in the shared cache sense. This definition of
13083 * nesting just means inserting pointers to pre-allocated tables inside of
13084 * the passed in pmap to allow us to share page tables (which map the shared
13085 * page) for every task. This saves at least one page of memory per process
13086 * compared to creating new page tables in every process for mapping the
13091 * Allocate the twig page tables if needed, and slam a pointer to the shared
13092 * page's tables into place.
13094 while ((ttep
= pmap_ttne(pmap
, sharedpage_level
, sharedpage_vaddr
)) == TT_ENTRY_NULL
) {
13097 kr
= pmap_expand(pmap
, sharedpage_vaddr
, options
, sharedpage_level
);
13099 if (kr
!= KERN_SUCCESS
) {
13101 if (kr
== KERN_RESOURCE_SHORTAGE
) {
13106 panic("Failed to pmap_expand for commpage, pmap=%p", pmap
);
13113 if (*ttep
!= ARM_PTE_EMPTY
) {
13114 panic("%s: Found something mapped at the commpage address?!", __FUNCTION__
);
13117 src_ttep
= pmap_ttne(sharedpage_pmap
, sharedpage_level
, sharedpage_vaddr
);
13120 FLUSH_PTE_STRONG(ttep
);
13128 pmap_unmap_sharedpage(
13132 vm_offset_t sharedpage_vaddr
;
13133 pmap_t sharedpage_pmap
= sharedpage_pmap_default
;
13135 const pt_attr_t
* const pt_attr
= pmap_get_pt_attr(pmap
);
13136 const unsigned int sharedpage_level
= pt_attr_sharedpage_level(pt_attr
);
13138 #if __ARM_MIXED_PAGE_SIZE__
13139 #if !__ARM_16K_PG__
13140 /* The following code assumes that sharedpage_pmap_default is a 16KB pmap. */
13141 #error "pmap_unmap_sharedpage requires a 16KB default kernel page size when __ARM_MIXED_PAGE_SIZE__ is enabled"
13142 #endif /* !__ARM_16K_PG__ */
13144 /* Choose the correct shared page pmap to use. */
13145 const uint64_t pmap_page_size
= pt_attr_page_size(pt_attr
);
13146 if (pmap_page_size
== 16384) {
13147 sharedpage_pmap
= sharedpage_pmap_default
;
13148 } else if (pmap_page_size
== 4096) {
13149 sharedpage_pmap
= sharedpage_pmap_4k
;
13151 panic("No shared page pmap exists for the wanted page size: %llu", pmap_page_size
);
13153 #endif /* __ARM_MIXED_PAGE_SIZE__ */
13155 #if _COMM_PAGE_AREA_LENGTH != PAGE_SIZE
13156 #error We assume a single page.
13159 if (pmap_is_64bit(pmap
)) {
13160 sharedpage_vaddr
= _COMM_PAGE64_BASE_ADDRESS
;
13162 sharedpage_vaddr
= _COMM_PAGE32_BASE_ADDRESS
;
13166 ttep
= pmap_ttne(pmap
, sharedpage_level
, sharedpage_vaddr
);
13168 if (ttep
== NULL
) {
13172 /* It had better be mapped to the shared page. */
13173 if (*ttep
!= ARM_TTE_EMPTY
&& *ttep
!= *pmap_ttne(sharedpage_pmap
, sharedpage_level
, sharedpage_vaddr
)) {
13174 panic("%s: Something other than commpage mapped in shared page slot?", __FUNCTION__
);
13177 *ttep
= ARM_TTE_EMPTY
;
13178 FLUSH_PTE_STRONG(ttep
);
13180 flush_mmu_tlb_region_asid_async(sharedpage_vaddr
, PAGE_SIZE
, pmap
);
13185 pmap_insert_sharedpage(
13189 kern_return_t kr
= KERN_FAILURE
;
13191 while ((kr
= pmap_insert_sharedpage_ppl(pmap
)) == KERN_RESOURCE_SHORTAGE
) {
13192 pmap_alloc_page_for_ppl(0);
13195 pmap_ledger_check_balance(pmap
);
13197 if (kr
!= KERN_SUCCESS
) {
13198 panic("%s: failed to insert the shared page, kr=%d, "
13204 pmap_insert_sharedpage_internal(pmap
);
13212 return pmap
->is_64bit
;
13217 pmap_t pmap __unused
)
13224 /* ARMTODO -- an implementation that accounts for
13225 * holes in the physical map, if any.
13231 return pa_valid(ptoa(pn
));
13235 pmap_bootloader_page(
13238 pmap_paddr_t paddr
= ptoa(pn
);
13240 if (pa_valid(paddr
)) {
13243 pmap_io_range_t
*io_rgn
= pmap_find_io_attr(paddr
);
13244 return (io_rgn
!= NULL
) && (io_rgn
->wimg
& PMAP_IO_RANGE_CARVEOUT
);
13247 MARK_AS_PMAP_TEXT
static boolean_t
13248 pmap_is_empty_internal(
13250 vm_map_offset_t va_start
,
13251 vm_map_offset_t va_end
)
13253 vm_map_offset_t block_start
, block_end
;
13256 if (pmap
== NULL
) {
13260 VALIDATE_PMAP(pmap
);
13262 __unused
const pt_attr_t
* const pt_attr
= pmap_get_pt_attr(pmap
);
13263 unsigned int initial_not_in_kdp
= not_in_kdp
;
13265 if ((pmap
!= kernel_pmap
) && (initial_not_in_kdp
)) {
13266 pmap_lock_ro(pmap
);
13269 #if (__ARM_VMSA__ == 7)
13270 if (tte_index(pmap
, pt_attr
, va_end
) >= pmap
->tte_index_max
) {
13271 if ((pmap
!= kernel_pmap
) && (initial_not_in_kdp
)) {
13272 pmap_unlock_ro(pmap
);
13278 /* TODO: This will be faster if we increment ttep at each level. */
13279 block_start
= va_start
;
13281 while (block_start
< va_end
) {
13282 pt_entry_t
*bpte_p
, *epte_p
;
13285 block_end
= (block_start
+ pt_attr_twig_size(pt_attr
)) & ~pt_attr_twig_offmask(pt_attr
);
13286 if (block_end
> va_end
) {
13287 block_end
= va_end
;
13290 tte_p
= pmap_tte(pmap
, block_start
);
13291 if ((tte_p
!= PT_ENTRY_NULL
)
13292 && ((*tte_p
& ARM_TTE_TYPE_MASK
) == ARM_TTE_TYPE_TABLE
)) {
13293 pte_p
= (pt_entry_t
*) ttetokv(*tte_p
);
13294 bpte_p
= &pte_p
[pte_index(pmap
, pt_attr
, block_start
)];
13295 epte_p
= &pte_p
[pte_index(pmap
, pt_attr
, block_end
)];
13297 for (pte_p
= bpte_p
; pte_p
< epte_p
; pte_p
++) {
13298 if (*pte_p
!= ARM_PTE_EMPTY
) {
13299 if ((pmap
!= kernel_pmap
) && (initial_not_in_kdp
)) {
13300 pmap_unlock_ro(pmap
);
13306 block_start
= block_end
;
13309 if ((pmap
!= kernel_pmap
) && (initial_not_in_kdp
)) {
13310 pmap_unlock_ro(pmap
);
13319 vm_map_offset_t va_start
,
13320 vm_map_offset_t va_end
)
13323 return pmap_is_empty_ppl(pmap
, va_start
, va_end
);
13325 return pmap_is_empty_internal(pmap
, va_start
, va_end
);
13332 unsigned int option
)
13334 return (is64
) ? pmap_max_64bit_offset(option
) : pmap_max_32bit_offset(option
);
13338 pmap_max_64bit_offset(
13339 __unused
unsigned int option
)
13341 vm_map_offset_t max_offset_ret
= 0;
13343 #if defined(__arm64__)
13344 #define ARM64_MIN_MAX_ADDRESS (SHARED_REGION_BASE_ARM64 + SHARED_REGION_SIZE_ARM64 + 0x20000000) // end of shared region + 512MB for various purposes
13345 _Static_assert((ARM64_MIN_MAX_ADDRESS
> SHARED_REGION_BASE_ARM64
) && (ARM64_MIN_MAX_ADDRESS
<= MACH_VM_MAX_ADDRESS
),
13346 "Minimum address space size outside allowable range");
13347 const vm_map_offset_t min_max_offset
= ARM64_MIN_MAX_ADDRESS
; // end of shared region + 512MB for various purposes
13348 if (option
== ARM_PMAP_MAX_OFFSET_DEFAULT
) {
13349 max_offset_ret
= arm64_pmap_max_offset_default
;
13350 } else if (option
== ARM_PMAP_MAX_OFFSET_MIN
) {
13351 max_offset_ret
= min_max_offset
;
13352 } else if (option
== ARM_PMAP_MAX_OFFSET_MAX
) {
13353 max_offset_ret
= MACH_VM_MAX_ADDRESS
;
13354 } else if (option
== ARM_PMAP_MAX_OFFSET_DEVICE
) {
13355 if (arm64_pmap_max_offset_default
) {
13356 max_offset_ret
= arm64_pmap_max_offset_default
;
13357 } else if (max_mem
> 0xC0000000) {
13358 max_offset_ret
= min_max_offset
+ 0x138000000; // Max offset is 13.375GB for devices with > 3GB of memory
13359 } else if (max_mem
> 0x40000000) {
13360 max_offset_ret
= min_max_offset
+ 0x38000000; // Max offset is 9.375GB for devices with > 1GB and <= 3GB of memory
13362 max_offset_ret
= min_max_offset
;
13364 } else if (option
== ARM_PMAP_MAX_OFFSET_JUMBO
) {
13365 if (arm64_pmap_max_offset_default
) {
13366 // Allow the boot-arg to override jumbo size
13367 max_offset_ret
= arm64_pmap_max_offset_default
;
13369 max_offset_ret
= MACH_VM_MAX_ADDRESS
; // Max offset is 64GB for pmaps with special "jumbo" blessing
13372 panic("pmap_max_64bit_offset illegal option 0x%x\n", option
);
13375 assert(max_offset_ret
<= MACH_VM_MAX_ADDRESS
);
13376 assert(max_offset_ret
>= min_max_offset
);
13378 panic("Can't run pmap_max_64bit_offset on non-64bit architectures\n");
13381 return max_offset_ret
;
13385 pmap_max_32bit_offset(
13386 unsigned int option
)
13388 vm_map_offset_t max_offset_ret
= 0;
13390 if (option
== ARM_PMAP_MAX_OFFSET_DEFAULT
) {
13391 max_offset_ret
= arm_pmap_max_offset_default
;
13392 } else if (option
== ARM_PMAP_MAX_OFFSET_MIN
) {
13393 max_offset_ret
= 0x80000000;
13394 } else if (option
== ARM_PMAP_MAX_OFFSET_MAX
) {
13395 max_offset_ret
= VM_MAX_ADDRESS
;
13396 } else if (option
== ARM_PMAP_MAX_OFFSET_DEVICE
) {
13397 if (arm_pmap_max_offset_default
) {
13398 max_offset_ret
= arm_pmap_max_offset_default
;
13399 } else if (max_mem
> 0x20000000) {
13400 max_offset_ret
= 0x80000000;
13402 max_offset_ret
= 0x80000000;
13404 } else if (option
== ARM_PMAP_MAX_OFFSET_JUMBO
) {
13405 max_offset_ret
= 0x80000000;
13407 panic("pmap_max_32bit_offset illegal option 0x%x\n", option
);
13410 assert(max_offset_ret
<= MACH_VM_MAX_ADDRESS
);
13411 return max_offset_ret
;
13416 * Constrain DTrace copyin/copyout actions
13418 extern kern_return_t
dtrace_copyio_preflight(addr64_t
);
13419 extern kern_return_t
dtrace_copyio_postflight(addr64_t
);
13422 dtrace_copyio_preflight(
13423 __unused addr64_t va
)
13425 if (current_map() == kernel_map
) {
13426 return KERN_FAILURE
;
13428 return KERN_SUCCESS
;
13433 dtrace_copyio_postflight(
13434 __unused addr64_t va
)
13436 return KERN_SUCCESS
;
13438 #endif /* CONFIG_DTRACE */
13442 pmap_flush_context_init(__unused pmap_flush_context
*pfc
)
13449 __unused pmap_flush_context
*cpus_to_flush
)
13451 /* not implemented yet */
13458 * Enforce that the address range described by kva and nbytes is not currently
13459 * PPL-owned, and won't become PPL-owned while pinned. This is to prevent
13460 * unintentionally writing to PPL-owned memory.
13463 pmap_pin_kernel_pages(vm_offset_t kva
, size_t nbytes
)
13466 if (os_add_overflow(kva
, nbytes
, &end
)) {
13467 panic("%s(%p, 0x%llx): overflow", __func__
, (void*)kva
, (uint64_t)nbytes
);
13469 for (vm_offset_t ckva
= kva
; ckva
< end
; ckva
= round_page(ckva
+ 1)) {
13470 pmap_paddr_t pa
= kvtophys(ckva
);
13471 if (!pa_valid(pa
)) {
13472 panic("%s(%p): invalid physical page 0x%llx", __func__
, (void*)kva
, (uint64_t)pa
);
13475 unsigned int pai
= (unsigned int)pa_index(pa
);
13476 if (ckva
== phystokv(pa
)) {
13477 panic("%s(%p): attempt to pin static mapping for page 0x%llx", __func__
, (void*)kva
, (uint64_t)pa
);
13480 attr
= pp_attr_table
[pai
] & ~PP_ATTR_NO_MONITOR
;
13481 if (attr
& PP_ATTR_MONITOR
) {
13482 panic("%s(%p): physical page 0x%llx belongs to PPL", __func__
, (void*)kva
, (uint64_t)pa
);
13484 } while (!OSCompareAndSwap16(attr
, attr
| PP_ATTR_NO_MONITOR
, &pp_attr_table
[pai
]));
13489 pmap_unpin_kernel_pages(vm_offset_t kva
, size_t nbytes
)
13492 if (os_add_overflow(kva
, nbytes
, &end
)) {
13493 panic("%s(%p, 0x%llx): overflow", __func__
, (void*)kva
, (uint64_t)nbytes
);
13495 for (vm_offset_t ckva
= kva
; ckva
< end
; ckva
= round_page(ckva
+ 1)) {
13496 pmap_paddr_t pa
= kvtophys(ckva
);
13497 if (!pa_valid(pa
)) {
13498 panic("%s(%p): invalid physical page 0x%llx", __func__
, (void*)kva
, (uint64_t)pa
);
13500 if (!(pp_attr_table
[pa_index(pa
)] & PP_ATTR_NO_MONITOR
)) {
13501 panic("%s(%p): physical page 0x%llx not pinned", __func__
, (void*)kva
, (uint64_t)pa
);
13503 assert(!(pp_attr_table
[pa_index(pa
)] & PP_ATTR_MONITOR
));
13504 pa_clear_no_monitor(pa
);
13509 * Lock down a page, making all mappings read-only, and preventing
13510 * further mappings or removal of this particular kva's mapping.
13511 * Effectively, it makes the page at kva immutable.
13513 MARK_AS_PMAP_TEXT
static void
13514 pmap_ppl_lockdown_page(vm_address_t kva
)
13516 pmap_paddr_t pa
= kvtophys(kva
);
13517 unsigned int pai
= (unsigned int)pa_index(pa
);
13519 pv_entry_t
**pv_h
= pai_to_pvh(pai
);
13521 if (__improbable(pa_test_monitor(pa
))) {
13522 panic("%#lx: page %llx belongs to PPL", kva
, pa
);
13525 if (__improbable(pvh_get_flags(pv_h
) & (PVH_FLAG_LOCKDOWN
| PVH_FLAG_EXEC
))) {
13526 panic("%#lx: already locked down/executable (%#llx)", kva
, pvh_get_flags(pv_h
));
13529 pt_entry_t
*pte_p
= pmap_pte(kernel_pmap
, kva
);
13531 if (pte_p
== PT_ENTRY_NULL
) {
13532 panic("%#lx: NULL pte", kva
);
13535 pt_entry_t tmplate
= *pte_p
;
13536 if (__improbable((tmplate
& ARM_PTE_APMASK
) != ARM_PTE_AP(AP_RWNA
))) {
13537 panic("%#lx: not a kernel r/w page (%#llx)", kva
, tmplate
& ARM_PTE_APMASK
);
13540 pvh_set_flags(pv_h
, pvh_get_flags(pv_h
) | PVH_FLAG_LOCKDOWN
);
13542 pmap_set_ptov_ap(pai
, AP_RONA
, FALSE
);
13546 pmap_page_protect_options_internal((ppnum_t
)atop(pa
), VM_PROT_READ
, 0, NULL
);
13550 * Release a page from being locked down to the PPL, making it writable
13551 * to the kernel once again.
13553 MARK_AS_PMAP_TEXT
static void
13554 pmap_ppl_unlockdown_page(vm_address_t kva
)
13556 pmap_paddr_t pa
= kvtophys(kva
);
13557 unsigned int pai
= (unsigned int)pa_index(pa
);
13559 pv_entry_t
**pv_h
= pai_to_pvh(pai
);
13561 vm_offset_t pvh_flags
= pvh_get_flags(pv_h
);
13563 if (__improbable(!(pvh_flags
& PVH_FLAG_LOCKDOWN
))) {
13564 panic("unlockdown attempt on not locked down virtual %#lx/pai %d", kva
, pai
);
13567 pvh_set_flags(pv_h
, pvh_flags
& ~PVH_FLAG_LOCKDOWN
);
13568 pmap_set_ptov_ap(pai
, AP_RWNA
, FALSE
);
13572 #else /* XNU_MONITOR */
13574 static void __unused
13575 pmap_pin_kernel_pages(vm_offset_t kva __unused
, size_t nbytes __unused
)
13579 static void __unused
13580 pmap_unpin_kernel_pages(vm_offset_t kva __unused
, size_t nbytes __unused
)
13584 #endif /* !XNU_MONITOR */
13587 #define PMAP_RESIDENT_INVALID ((mach_vm_size_t)-1)
13589 MARK_AS_PMAP_TEXT
static mach_vm_size_t
13590 pmap_query_resident_internal(
13592 vm_map_address_t start
,
13593 vm_map_address_t end
,
13594 mach_vm_size_t
*compressed_bytes_p
)
13596 mach_vm_size_t resident_bytes
= 0;
13597 mach_vm_size_t compressed_bytes
= 0;
13599 pt_entry_t
*bpte
, *epte
;
13603 if (pmap
== NULL
) {
13604 return PMAP_RESIDENT_INVALID
;
13607 VALIDATE_PMAP(pmap
);
13609 const pt_attr_t
* const pt_attr
= pmap_get_pt_attr(pmap
);
13611 /* Ensure that this request is valid, and addresses exactly one TTE. */
13612 if (__improbable((start
% pt_attr_page_size(pt_attr
)) ||
13613 (end
% pt_attr_page_size(pt_attr
)))) {
13614 panic("%s: address range %p, %p not page-aligned to 0x%llx", __func__
, (void*)start
, (void*)end
, pt_attr_page_size(pt_attr
));
13617 if (__improbable((end
< start
) || (end
> ((start
+ pt_attr_twig_size(pt_attr
)) & ~pt_attr_twig_offmask(pt_attr
))))) {
13618 panic("%s: invalid address range %p, %p", __func__
, (void*)start
, (void*)end
);
13621 pmap_lock_ro(pmap
);
13622 tte_p
= pmap_tte(pmap
, start
);
13623 if (tte_p
== (tt_entry_t
*) NULL
) {
13624 pmap_unlock_ro(pmap
);
13625 return PMAP_RESIDENT_INVALID
;
13627 if ((*tte_p
& ARM_TTE_TYPE_MASK
) == ARM_TTE_TYPE_TABLE
) {
13628 pte_p
= (pt_entry_t
*) ttetokv(*tte_p
);
13629 bpte
= &pte_p
[pte_index(pmap
, pt_attr
, start
)];
13630 epte
= &pte_p
[pte_index(pmap
, pt_attr
, end
)];
13632 for (; bpte
< epte
; bpte
++) {
13633 if (ARM_PTE_IS_COMPRESSED(*bpte
, bpte
)) {
13634 compressed_bytes
+= pt_attr_page_size(pt_attr
);
13635 } else if (pa_valid(pte_to_pa(*bpte
))) {
13636 resident_bytes
+= pt_attr_page_size(pt_attr
);
13640 pmap_unlock_ro(pmap
);
13642 if (compressed_bytes_p
) {
13643 pmap_pin_kernel_pages((vm_offset_t
)compressed_bytes_p
, sizeof(*compressed_bytes_p
));
13644 *compressed_bytes_p
+= compressed_bytes
;
13645 pmap_unpin_kernel_pages((vm_offset_t
)compressed_bytes_p
, sizeof(*compressed_bytes_p
));
13648 return resident_bytes
;
13652 pmap_query_resident(
13654 vm_map_address_t start
,
13655 vm_map_address_t end
,
13656 mach_vm_size_t
*compressed_bytes_p
)
13658 mach_vm_size_t total_resident_bytes
;
13659 mach_vm_size_t compressed_bytes
;
13660 vm_map_address_t va
;
13663 if (pmap
== PMAP_NULL
) {
13664 if (compressed_bytes_p
) {
13665 *compressed_bytes_p
= 0;
13670 __unused
const pt_attr_t
* const pt_attr
= pmap_get_pt_attr(pmap
);
13672 total_resident_bytes
= 0;
13673 compressed_bytes
= 0;
13675 PMAP_TRACE(3, PMAP_CODE(PMAP__QUERY_RESIDENT
) | DBG_FUNC_START
,
13676 VM_KERNEL_ADDRHIDE(pmap
), VM_KERNEL_ADDRHIDE(start
),
13677 VM_KERNEL_ADDRHIDE(end
));
13681 vm_map_address_t l
;
13682 mach_vm_size_t resident_bytes
;
13684 l
= ((va
+ pt_attr_twig_size(pt_attr
)) & ~pt_attr_twig_offmask(pt_attr
));
13690 resident_bytes
= pmap_query_resident_ppl(pmap
, va
, l
, compressed_bytes_p
);
13692 resident_bytes
= pmap_query_resident_internal(pmap
, va
, l
, compressed_bytes_p
);
13694 if (resident_bytes
== PMAP_RESIDENT_INVALID
) {
13698 total_resident_bytes
+= resident_bytes
;
13703 if (compressed_bytes_p
) {
13704 *compressed_bytes_p
= compressed_bytes
;
13707 PMAP_TRACE(3, PMAP_CODE(PMAP__QUERY_RESIDENT
) | DBG_FUNC_END
,
13708 total_resident_bytes
);
13710 return total_resident_bytes
;
13715 pmap_check_ledgers(
13721 if (pmap
->pmap_pid
== 0) {
13723 * This pmap was not or is no longer fully associated
13724 * with a task (e.g. the old pmap after a fork()/exec() or
13725 * spawn()). Its "ledger" still points at a task that is
13726 * now using a different (and active) address space, so
13727 * we can't check that all the pmap ledgers are balanced here.
13729 * If the "pid" is set, that means that we went through
13730 * pmap_set_process() in task_terminate_internal(), so
13731 * this task's ledger should not have been re-used and
13732 * all the pmap ledgers should be back to 0.
13737 pid
= pmap
->pmap_pid
;
13738 procname
= pmap
->pmap_procname
;
13740 vm_map_pmap_check_ledgers(pmap
, pmap
->ledger
, pid
, procname
);
13742 PMAP_STATS_ASSERTF(pmap
->stats
.resident_count
== 0, pmap
, "stats.resident_count %d", pmap
->stats
.resident_count
);
13744 PMAP_STATS_ASSERTF(pmap
->stats
.wired_count
== 0, pmap
, "stats.wired_count %d", pmap
->stats
.wired_count
);
13746 PMAP_STATS_ASSERTF(pmap
->stats
.device
== 0, pmap
, "stats.device %d", pmap
->stats
.device
);
13747 PMAP_STATS_ASSERTF(pmap
->stats
.internal
== 0, pmap
, "stats.internal %d", pmap
->stats
.internal
);
13748 PMAP_STATS_ASSERTF(pmap
->stats
.external
== 0, pmap
, "stats.external %d", pmap
->stats
.external
);
13749 PMAP_STATS_ASSERTF(pmap
->stats
.reusable
== 0, pmap
, "stats.reusable %d", pmap
->stats
.reusable
);
13750 PMAP_STATS_ASSERTF(pmap
->stats
.compressed
== 0, pmap
, "stats.compressed %lld", pmap
->stats
.compressed
);
13752 #endif /* MACH_ASSERT */
13755 pmap_advise_pagezero_range(__unused pmap_t p
, __unused
uint64_t a
)
13761 #define PROF_START uint64_t t, nanot;\
13762 t = mach_absolute_time();
13764 #define PROF_END absolutetime_to_nanoseconds(mach_absolute_time()-t, &nanot);\
13765 kprintf("%s: took %llu ns\n", __func__, nanot);
13767 #define PMAP_PGTRACE_LOCK(p) \
13769 *(p) = pmap_interrupts_disable(); \
13770 if (simple_lock_try(&(pmap_pgtrace.lock), LCK_GRP_NULL)) break; \
13771 pmap_interrupts_restore(*(p)); \
13774 #define PMAP_PGTRACE_UNLOCK(p) \
13776 simple_unlock(&(pmap_pgtrace.lock)); \
13777 pmap_interrupts_restore(*(p)); \
13780 #define PGTRACE_WRITE_PTE(pte_p, pte_entry) \
13782 *(pte_p) = (pte_entry); \
13783 FLUSH_PTE(pte_p); \
13786 #define PGTRACE_MAX_MAP 16 // maximum supported va to same pa
13793 } pmap_pgtrace_page_state_t
;
13796 queue_chain_t chain
;
13800 * maps - list of va maps to upper pa
13801 * map_pool - map pool
13802 * map_waste - waste can
13807 queue_head_t map_pool
;
13808 queue_head_t map_waste
;
13809 pmap_pgtrace_page_state_t state
;
13810 } pmap_pgtrace_page_t
;
13813 queue_chain_t chain
;
13815 vm_map_offset_t va
;
13818 static ZONE_VIEW_DEFINE(ZV_PMAP_VA
, "pmap va",
13819 KHEAP_ID_DEFAULT
, sizeof(pmap_va_t
));
13821 static ZONE_VIEW_DEFINE(ZV_PMAP_PGTRACE
, "pmap pgtrace",
13822 KHEAP_ID_DEFAULT
, sizeof(pmap_pgtrace_page_t
));
13826 * pages - list of tracing page info
13828 queue_head_t pages
;
13829 decl_simple_lock_data(, lock
);
13830 } pmap_pgtrace
= {};
13833 pmap_pgtrace_init(void)
13835 queue_init(&(pmap_pgtrace
.pages
));
13836 simple_lock_init(&(pmap_pgtrace
.lock
), 0);
13840 if (PE_parse_boot_argn("pgtrace", &enabled
, sizeof(enabled
))) {
13841 pgtrace_enabled
= enabled
;
13845 // find a page with given pa - pmap_pgtrace should be locked
13846 inline static pmap_pgtrace_page_t
*
13847 pmap_pgtrace_find_page(pmap_paddr_t pa
)
13849 queue_head_t
*q
= &(pmap_pgtrace
.pages
);
13850 pmap_pgtrace_page_t
*p
;
13852 queue_iterate(q
, p
, pmap_pgtrace_page_t
*, chain
) {
13853 if (p
->state
== UNDEFINED
) {
13856 if (p
->state
== PA_UNDEFINED
) {
13867 // enter clone of given pmap, va page and range - pmap should be locked
13869 pmap_pgtrace_enter_clone(pmap_t pmap
, vm_map_offset_t va_page
, vm_map_offset_t start
, vm_map_offset_t end
)
13872 queue_head_t
*q
= &(pmap_pgtrace
.pages
);
13873 pmap_paddr_t pa_page
;
13874 pt_entry_t
*ptep
, *cptep
;
13875 pmap_pgtrace_page_t
*p
;
13876 bool found
= false;
13878 pmap_assert_locked_w(pmap
);
13879 assert(va_page
== arm_trunc_page(va_page
));
13881 PMAP_PGTRACE_LOCK(&ints
);
13883 ptep
= pmap_pte(pmap
, va_page
);
13885 // target pte should exist
13886 if (!ptep
|| !(*ptep
& ARM_PTE_TYPE_VALID
)) {
13887 PMAP_PGTRACE_UNLOCK(&ints
);
13891 queue_head_t
*mapq
;
13892 queue_head_t
*mappool
;
13893 pmap_pgtrace_map_t
*map
= NULL
;
13895 pa_page
= pte_to_pa(*ptep
);
13897 // find if we have a page info defined for this
13898 queue_iterate(q
, p
, pmap_pgtrace_page_t
*, chain
) {
13900 mappool
= &(p
->map_pool
);
13902 switch (p
->state
) {
13904 queue_iterate(mapq
, map
, pmap_pgtrace_map_t
*, chain
) {
13905 if (map
->cloned
== false && map
->pmap
== pmap
&& map
->ova
== va_page
) {
13907 map
->range
.start
= start
;
13908 map
->range
.end
= end
;
13916 if (p
->pa
!= pa_page
) {
13919 queue_iterate(mapq
, map
, pmap_pgtrace_map_t
*, chain
) {
13920 if (map
->cloned
== false) {
13922 map
->ova
= va_page
;
13923 map
->range
.start
= start
;
13924 map
->range
.end
= end
;
13932 if (p
->pa
!= pa_page
) {
13935 queue_iterate(mapq
, map
, pmap_pgtrace_map_t
*, chain
) {
13936 if (map
->cloned
== true && map
->pmap
== pmap
&& map
->ova
== va_page
) {
13937 kprintf("%s: skip existing mapping at va=%llx\n", __func__
, va_page
);
13939 } else if (map
->cloned
== true && map
->pmap
== kernel_pmap
&& map
->cva
[1] == va_page
) {
13940 kprintf("%s: skip clone mapping at va=%llx\n", __func__
, va_page
);
13942 } else if (map
->cloned
== false && map
->pmap
== pmap
&& map
->ova
== va_page
) {
13943 // range should be already defined as well
13951 panic("invalid state p->state=%x\n", p
->state
);
13954 if (found
== true) {
13959 // do not clone if no page info found
13960 if (found
== false) {
13961 PMAP_PGTRACE_UNLOCK(&ints
);
13965 // copy pre, target and post ptes to clone ptes
13966 for (int i
= 0; i
< 3; i
++) {
13967 ptep
= pmap_pte(pmap
, va_page
+ (i
- 1) * ARM_PGBYTES
);
13968 cptep
= pmap_pte(kernel_pmap
, map
->cva
[i
]);
13969 assert(cptep
!= NULL
);
13970 if (ptep
== NULL
) {
13971 PGTRACE_WRITE_PTE(cptep
, (pt_entry_t
)NULL
);
13973 PGTRACE_WRITE_PTE(cptep
, *ptep
);
13975 PMAP_UPDATE_TLBS(kernel_pmap
, map
->cva
[i
], map
->cva
[i
] + ARM_PGBYTES
, false);
13978 // get ptes for original and clone
13979 ptep
= pmap_pte(pmap
, va_page
);
13980 cptep
= pmap_pte(kernel_pmap
, map
->cva
[1]);
13982 // invalidate original pte and mark it as a pgtrace page
13983 PGTRACE_WRITE_PTE(ptep
, (*ptep
| ARM_PTE_PGTRACE
) & ~ARM_PTE_TYPE_VALID
);
13984 PMAP_UPDATE_TLBS(pmap
, map
->ova
, map
->ova
+ ARM_PGBYTES
, false);
13986 map
->cloned
= true;
13987 p
->state
= DEFINED
;
13989 kprintf("%s: pa_page=%llx va_page=%llx cva[1]=%llx pmap=%p ptep=%p cptep=%p\n", __func__
, pa_page
, va_page
, map
->cva
[1], pmap
, ptep
, cptep
);
13991 PMAP_PGTRACE_UNLOCK(&ints
);
13996 // This function removes trace bit and validate pte if applicable. Pmap must be locked.
13998 pmap_pgtrace_remove_clone(pmap_t pmap
, pmap_paddr_t pa
, vm_map_offset_t va
)
14000 uint64_t ints
, found
= false;
14001 pmap_pgtrace_page_t
*p
;
14004 PMAP_PGTRACE_LOCK(&ints
);
14006 // we must have this page info
14007 p
= pmap_pgtrace_find_page(pa
);
14012 // find matching map
14013 queue_head_t
*mapq
= &(p
->maps
);
14014 queue_head_t
*mappool
= &(p
->map_pool
);
14015 pmap_pgtrace_map_t
*map
;
14017 queue_iterate(mapq
, map
, pmap_pgtrace_map_t
*, chain
) {
14018 if (map
->pmap
== pmap
&& map
->ova
== va
) {
14028 if (map
->cloned
== true) {
14029 // Restore back the pte to original state
14030 ptep
= pmap_pte(pmap
, map
->ova
);
14032 PGTRACE_WRITE_PTE(ptep
, *ptep
| ARM_PTE_TYPE_VALID
);
14033 PMAP_UPDATE_TLBS(pmap
, va
, va
+ ARM_PGBYTES
, false);
14035 // revert clone pages
14036 for (int i
= 0; i
< 3; i
++) {
14037 ptep
= pmap_pte(kernel_pmap
, map
->cva
[i
]);
14038 assert(ptep
!= NULL
);
14039 PGTRACE_WRITE_PTE(ptep
, map
->cva_spte
[i
]);
14040 PMAP_UPDATE_TLBS(kernel_pmap
, map
->cva
[i
], map
->cva
[i
] + ARM_PGBYTES
, false);
14044 queue_remove(mapq
, map
, pmap_pgtrace_map_t
*, chain
);
14046 map
->ova
= (vm_map_offset_t
)NULL
;
14047 map
->cloned
= false;
14048 queue_enter_first(mappool
, map
, pmap_pgtrace_map_t
*, chain
);
14050 kprintf("%s: p=%p pa=%llx va=%llx\n", __func__
, p
, pa
, va
);
14053 PMAP_PGTRACE_UNLOCK(&ints
);
14056 // remove all clones of given pa - pmap must be locked
14058 pmap_pgtrace_remove_all_clone(pmap_paddr_t pa
)
14061 pmap_pgtrace_page_t
*p
;
14064 PMAP_PGTRACE_LOCK(&ints
);
14066 // we must have this page info
14067 p
= pmap_pgtrace_find_page(pa
);
14069 PMAP_PGTRACE_UNLOCK(&ints
);
14073 queue_head_t
*mapq
= &(p
->maps
);
14074 queue_head_t
*mappool
= &(p
->map_pool
);
14075 queue_head_t
*mapwaste
= &(p
->map_waste
);
14076 pmap_pgtrace_map_t
*map
;
14078 // move maps to waste
14079 while (!queue_empty(mapq
)) {
14080 queue_remove_first(mapq
, map
, pmap_pgtrace_map_t
*, chain
);
14081 queue_enter_first(mapwaste
, map
, pmap_pgtrace_map_t
*, chain
);
14084 PMAP_PGTRACE_UNLOCK(&ints
);
14086 // sanitize maps in waste
14087 queue_iterate(mapwaste
, map
, pmap_pgtrace_map_t
*, chain
) {
14088 if (map
->cloned
== true) {
14089 pmap_lock(map
->pmap
);
14091 // restore back original pte
14092 ptep
= pmap_pte(map
->pmap
, map
->ova
);
14094 PGTRACE_WRITE_PTE(ptep
, *ptep
| ARM_PTE_TYPE_VALID
);
14095 PMAP_UPDATE_TLBS(map
->pmap
, map
->ova
, map
->ova
+ ARM_PGBYTES
, false);
14097 // revert clone ptes
14098 for (int i
= 0; i
< 3; i
++) {
14099 ptep
= pmap_pte(kernel_pmap
, map
->cva
[i
]);
14100 assert(ptep
!= NULL
);
14101 PGTRACE_WRITE_PTE(ptep
, map
->cva_spte
[i
]);
14102 PMAP_UPDATE_TLBS(kernel_pmap
, map
->cva
[i
], map
->cva
[i
] + ARM_PGBYTES
, false);
14105 pmap_unlock(map
->pmap
);
14109 map
->ova
= (vm_map_offset_t
)NULL
;
14110 map
->cloned
= false;
14113 PMAP_PGTRACE_LOCK(&ints
);
14115 // recycle maps back to map_pool
14116 while (!queue_empty(mapwaste
)) {
14117 queue_remove_first(mapwaste
, map
, pmap_pgtrace_map_t
*, chain
);
14118 queue_enter_first(mappool
, map
, pmap_pgtrace_map_t
*, chain
);
14121 PMAP_PGTRACE_UNLOCK(&ints
);
14125 pmap_pgtrace_get_search_space(pmap_t pmap
, vm_map_offset_t
*startp
, vm_map_offset_t
*endp
)
14128 vm_map_offset_t end
;
14130 if (pmap
== kernel_pmap
) {
14131 tsz
= (get_tcr() >> TCR_T1SZ_SHIFT
) & TCR_TSZ_MASK
;
14132 *startp
= MAX(VM_MIN_KERNEL_ADDRESS
, (UINT64_MAX
>> (64 - tsz
)) << (64 - tsz
));
14133 *endp
= VM_MAX_KERNEL_ADDRESS
;
14135 tsz
= (get_tcr() >> TCR_T0SZ_SHIFT
) & TCR_TSZ_MASK
;
14139 end
= ((uint64_t)1 << (64 - tsz
)) - 1;
14146 assert(*endp
> *startp
);
14151 // has pa mapped in given pmap? then clone it
14153 pmap_pgtrace_clone_from_pa(pmap_t pmap
, pmap_paddr_t pa
, vm_map_offset_t start_offset
, vm_map_offset_t end_offset
)
14156 vm_map_offset_t min
, max
;
14157 vm_map_offset_t cur_page
, end_page
;
14161 __unused
const pt_attr_t
* const pt_attr
= pmap_get_pt_attr(pmap
);
14163 pmap_pgtrace_get_search_space(pmap
, &min
, &max
);
14165 cur_page
= arm_trunc_page(min
);
14166 end_page
= arm_trunc_page(max
);
14167 while (cur_page
<= end_page
) {
14168 vm_map_offset_t add
= 0;
14172 // skip uninterested space
14173 if (pmap
== kernel_pmap
&&
14174 ((vm_kernel_base
<= cur_page
&& cur_page
< vm_kernel_top
) ||
14175 (vm_kext_base
<= cur_page
&& cur_page
< vm_kext_top
))) {
14177 goto unlock_continue
;
14180 // check whether we can skip l1
14181 ttep
= pmap_tt1e(pmap
, cur_page
);
14184 if ((tte
& (ARM_TTE_TYPE_MASK
| ARM_TTE_VALID
)) != (ARM_TTE_TYPE_TABLE
| ARM_TTE_VALID
)) {
14185 add
= ARM_TT_L1_SIZE
;
14186 goto unlock_continue
;
14190 tte
= ((tt_entry_t
*) phystokv(tte
& ARM_TTE_TABLE_MASK
))[tt2_index(pmap
, pt_attr
, cur_page
)];
14192 if ((tte
& (ARM_TTE_TYPE_MASK
| ARM_TTE_VALID
)) != (ARM_TTE_TYPE_TABLE
| ARM_TTE_VALID
)) {
14193 add
= ARM_TT_L2_SIZE
;
14194 goto unlock_continue
;
14198 ptep
= &(((pt_entry_t
*) phystokv(tte
& ARM_TTE_TABLE_MASK
))[tt3_index(pmap
, pt_attr
, cur_page
)]);
14199 if (ptep
== PT_ENTRY_NULL
) {
14200 add
= ARM_TT_L3_SIZE
;
14201 goto unlock_continue
;
14204 if (arm_trunc_page(pa
) == pte_to_pa(*ptep
)) {
14205 if (pmap_pgtrace_enter_clone(pmap
, cur_page
, start_offset
, end_offset
) == true) {
14216 if (cur_page
+ add
< cur_page
) {
14227 // search pv table and clone vas of given pa
14229 pmap_pgtrace_clone_from_pvtable(pmap_paddr_t pa
, vm_map_offset_t start_offset
, vm_map_offset_t end_offset
)
14237 queue_head_t pmapvaq
;
14240 queue_init(&pmapvaq
);
14242 pai
= pa_index(pa
);
14244 pvh
= pai_to_pvh(pai
);
14246 // collect pmap/va pair from pvh
14247 if (pvh_test_type(pvh
, PVH_TYPE_PTEP
)) {
14248 ptep
= pvh_ptep(pvh
);
14249 pmap
= ptep_get_pmap(ptep
);
14251 pmapva
= (pmap_va_t
*)zalloc(ZV_PMAP_VA
);
14252 pmapva
->pmap
= pmap
;
14253 pmapva
->va
= ptep_get_va(ptep
);
14255 queue_enter_first(&pmapvaq
, pmapva
, pmap_va_t
*, chain
);
14256 } else if (pvh_test_type(pvh
, PVH_TYPE_PVEP
)) {
14259 pvep
= pvh_list(pvh
);
14261 ptep
= pve_get_ptep(pvep
);
14262 pmap
= ptep_get_pmap(ptep
);
14264 pmapva
= (pmap_va_t
*)zalloc(ZV_PMAP_VA
);
14265 pmapva
->pmap
= pmap
;
14266 pmapva
->va
= ptep_get_va(ptep
);
14268 queue_enter_first(&pmapvaq
, pmapva
, pmap_va_t
*, chain
);
14270 pvep
= PVE_NEXT_PTR(pve_next(pvep
));
14276 // clone them while making sure mapping still exists
14277 queue_iterate(&pmapvaq
, pmapva
, pmap_va_t
*, chain
) {
14278 pmap_lock(pmapva
->pmap
);
14279 ptep
= pmap_pte(pmapva
->pmap
, pmapva
->va
);
14280 if (pte_to_pa(*ptep
) == pa
) {
14281 if (pmap_pgtrace_enter_clone(pmapva
->pmap
, pmapva
->va
, start_offset
, end_offset
) == true) {
14285 pmap_unlock(pmapva
->pmap
);
14287 zfree(ZV_PMAP_VA
, pmapva
);
14293 // allocate a page info
14294 static pmap_pgtrace_page_t
*
14295 pmap_pgtrace_alloc_page(void)
14297 pmap_pgtrace_page_t
*p
;
14298 queue_head_t
*mapq
;
14299 queue_head_t
*mappool
;
14300 queue_head_t
*mapwaste
;
14301 pmap_pgtrace_map_t
*map
;
14303 p
= zalloc(ZV_PMAP_PGTRACE
);
14306 p
->state
= UNDEFINED
;
14309 mappool
= &(p
->map_pool
);
14310 mapwaste
= &(p
->map_waste
);
14312 queue_init(mappool
);
14313 queue_init(mapwaste
);
14315 for (int i
= 0; i
< PGTRACE_MAX_MAP
; i
++) {
14316 vm_map_offset_t newcva
;
14319 vm_map_entry_t entry
;
14322 vm_object_reference(kernel_object
);
14323 kr
= vm_map_find_space(kernel_map
, &newcva
, vm_map_round_page(3 * ARM_PGBYTES
, PAGE_MASK
), 0, 0, VM_MAP_KERNEL_FLAGS_NONE
, VM_KERN_MEMORY_DIAG
, &entry
);
14324 if (kr
!= KERN_SUCCESS
) {
14325 panic("%s VM couldn't find any space kr=%d\n", __func__
, kr
);
14327 VME_OBJECT_SET(entry
, kernel_object
);
14328 VME_OFFSET_SET(entry
, newcva
);
14329 vm_map_unlock(kernel_map
);
14331 // fill default clone page info and add to pool
14332 map
= zalloc(ZV_PMAP_PGTRACE
);
14333 for (int j
= 0; j
< 3; j
++) {
14334 vm_map_offset_t addr
= newcva
+ j
* ARM_PGBYTES
;
14336 // pre-expand pmap while preemption enabled
14337 kr
= pmap_expand(kernel_pmap
, addr
, 0, PMAP_TT_L3_LEVEL
);
14338 if (kr
!= KERN_SUCCESS
) {
14339 panic("%s: pmap_expand(kernel_pmap, addr=%llx) returns kr=%d\n", __func__
, addr
, kr
);
14342 cptep
= pmap_pte(kernel_pmap
, addr
);
14343 assert(cptep
!= NULL
);
14345 map
->cva
[j
] = addr
;
14346 map
->cva_spte
[j
] = *cptep
;
14348 map
->range
.start
= map
->range
.end
= 0;
14349 map
->cloned
= false;
14350 queue_enter_first(mappool
, map
, pmap_pgtrace_map_t
*, chain
);
14356 // free a page info
14358 pmap_pgtrace_free_page(pmap_pgtrace_page_t
*p
)
14360 queue_head_t
*mapq
;
14361 queue_head_t
*mappool
;
14362 queue_head_t
*mapwaste
;
14363 pmap_pgtrace_map_t
*map
;
14368 mappool
= &(p
->map_pool
);
14369 mapwaste
= &(p
->map_waste
);
14371 while (!queue_empty(mapq
)) {
14372 queue_remove_first(mapq
, map
, pmap_pgtrace_map_t
*, chain
);
14373 zfree(ZV_PMAP_PGTRACE
, map
);
14376 while (!queue_empty(mappool
)) {
14377 queue_remove_first(mappool
, map
, pmap_pgtrace_map_t
*, chain
);
14378 zfree(ZV_PMAP_PGTRACE
, map
);
14381 while (!queue_empty(mapwaste
)) {
14382 queue_remove_first(mapwaste
, map
, pmap_pgtrace_map_t
*, chain
);
14383 zfree(ZV_PMAP_PGTRACE
, map
);
14386 zfree(ZV_PMAP_PGTRACE
, p
);
14389 // construct page infos with the given address range
14391 pmap_pgtrace_add_page(pmap_t pmap
, vm_map_offset_t start
, vm_map_offset_t end
)
14395 queue_head_t
*q
= &(pmap_pgtrace
.pages
);
14397 vm_map_offset_t cur_page
, end_page
;
14400 kprintf("%s: invalid start=%llx > end=%llx\n", __func__
, start
, end
);
14406 // add each page in given range
14407 cur_page
= arm_trunc_page(start
);
14408 end_page
= arm_trunc_page(end
);
14409 while (cur_page
<= end_page
) {
14410 pmap_paddr_t pa_page
= 0;
14411 uint64_t num_cloned
= 0;
14412 pmap_pgtrace_page_t
*p
= NULL
, *newp
;
14413 bool free_newp
= true;
14414 pmap_pgtrace_page_state_t state
;
14416 // do all allocations outside of spinlocks
14417 newp
= pmap_pgtrace_alloc_page();
14419 // keep lock orders in pmap, kernel_pmap and pgtrace lock
14420 if (pmap
!= NULL
) {
14421 pmap_lock_ro(pmap
);
14423 if (pmap
!= kernel_pmap
) {
14424 pmap_lock_ro(kernel_pmap
);
14427 // addresses are physical if pmap is null
14428 if (pmap
== NULL
) {
14430 pa_page
= cur_page
;
14431 state
= VA_UNDEFINED
;
14433 ptep
= pmap_pte(pmap
, cur_page
);
14434 if (ptep
!= NULL
) {
14435 pa_page
= pte_to_pa(*ptep
);
14438 state
= PA_UNDEFINED
;
14442 // search if we have a page info already
14443 PMAP_PGTRACE_LOCK(&ints
);
14444 if (state
!= PA_UNDEFINED
) {
14445 p
= pmap_pgtrace_find_page(pa_page
);
14448 // add pre-allocated page info if nothing found
14450 queue_enter_first(q
, newp
, pmap_pgtrace_page_t
*, chain
);
14455 // now p points what we want
14458 queue_head_t
*mapq
= &(p
->maps
);
14459 queue_head_t
*mappool
= &(p
->map_pool
);
14460 pmap_pgtrace_map_t
*map
;
14461 vm_map_offset_t start_offset
, end_offset
;
14463 // calculate trace offsets in the page
14464 if (cur_page
> start
) {
14467 start_offset
= start
- cur_page
;
14469 if (cur_page
== end_page
) {
14470 end_offset
= end
- end_page
;
14472 end_offset
= ARM_PGBYTES
- 1;
14475 kprintf("%s: pmap=%p cur_page=%llx ptep=%p state=%d start_offset=%llx end_offset=%llx\n", __func__
, pmap
, cur_page
, ptep
, state
, start_offset
, end_offset
);
14478 assert(!queue_empty(mappool
));
14479 queue_remove_first(mappool
, map
, pmap_pgtrace_map_t
*, chain
);
14480 if (p
->state
== PA_UNDEFINED
) {
14482 map
->ova
= cur_page
;
14483 map
->range
.start
= start_offset
;
14484 map
->range
.end
= end_offset
;
14485 } else if (p
->state
== VA_UNDEFINED
) {
14487 map
->range
.start
= start_offset
;
14488 map
->range
.end
= end_offset
;
14489 } else if (p
->state
== DEFINED
) {
14492 map
->ova
= cur_page
;
14493 map
->range
.start
= start_offset
;
14494 map
->range
.end
= end_offset
;
14496 panic("invalid p->state=%d\n", p
->state
);
14500 map
->cloned
= false;
14501 queue_enter(mapq
, map
, pmap_pgtrace_map_t
*, chain
);
14504 PMAP_PGTRACE_UNLOCK(&ints
);
14505 if (pmap
!= kernel_pmap
) {
14506 pmap_unlock_ro(kernel_pmap
);
14508 if (pmap
!= NULL
) {
14509 pmap_unlock_ro(pmap
);
14513 if (pa_valid(pa_page
)) {
14514 num_cloned
= pmap_pgtrace_clone_from_pvtable(pa_page
, start_offset
, end_offset
);
14516 if (pmap
== NULL
) {
14517 num_cloned
+= pmap_pgtrace_clone_from_pa(kernel_pmap
, pa_page
, start_offset
, end_offset
);
14519 num_cloned
+= pmap_pgtrace_clone_from_pa(pmap
, pa_page
, start_offset
, end_offset
);
14522 // free pre-allocations if we didn't add it to the q
14524 pmap_pgtrace_free_page(newp
);
14527 if (num_cloned
== 0) {
14528 kprintf("%s: no mapping found for pa_page=%llx but will be added when a page entered\n", __func__
, pa_page
);
14534 if (cur_page
+ ARM_PGBYTES
< cur_page
) {
14537 cur_page
+= ARM_PGBYTES
;
14546 // delete page infos for given address range
14548 pmap_pgtrace_delete_page(pmap_t pmap
, vm_map_offset_t start
, vm_map_offset_t end
)
14552 queue_head_t
*q
= &(pmap_pgtrace
.pages
);
14553 pmap_pgtrace_page_t
*p
;
14554 vm_map_offset_t cur_page
, end_page
;
14556 kprintf("%s start=%llx end=%llx\n", __func__
, start
, end
);
14561 pmap_paddr_t pa_page
;
14563 // remove page info from start to end
14564 cur_page
= arm_trunc_page(start
);
14565 end_page
= arm_trunc_page(end
);
14566 while (cur_page
<= end_page
) {
14569 if (pmap
== NULL
) {
14570 pa_page
= cur_page
;
14573 ptep
= pmap_pte(pmap
, cur_page
);
14574 if (ptep
== NULL
) {
14578 pa_page
= pte_to_pa(*ptep
);
14582 // remove all clones and validate
14583 pmap_pgtrace_remove_all_clone(pa_page
);
14585 // find page info and delete
14586 PMAP_PGTRACE_LOCK(&ints
);
14587 p
= pmap_pgtrace_find_page(pa_page
);
14589 queue_remove(q
, p
, pmap_pgtrace_page_t
*, chain
);
14592 PMAP_PGTRACE_UNLOCK(&ints
);
14594 // free outside of locks
14596 pmap_pgtrace_free_page(p
);
14601 if (cur_page
+ ARM_PGBYTES
< cur_page
) {
14604 cur_page
+= ARM_PGBYTES
;
14614 pmap_pgtrace_fault(pmap_t pmap
, vm_map_offset_t va
, arm_saved_state_t
*ss
)
14617 pgtrace_run_result_t res
;
14618 pmap_pgtrace_page_t
*p
;
14619 uint64_t ints
, found
= false;
14622 // Quick check if we are interested
14623 ptep
= pmap_pte(pmap
, va
);
14624 if (!ptep
|| !(*ptep
& ARM_PTE_PGTRACE
)) {
14625 return KERN_FAILURE
;
14628 PMAP_PGTRACE_LOCK(&ints
);
14630 // Check again since access is serialized
14631 ptep
= pmap_pte(pmap
, va
);
14632 if (!ptep
|| !(*ptep
& ARM_PTE_PGTRACE
)) {
14633 PMAP_PGTRACE_UNLOCK(&ints
);
14634 return KERN_FAILURE
;
14635 } else if ((*ptep
& ARM_PTE_TYPE_VALID
) == ARM_PTE_TYPE_VALID
) {
14636 // Somehow this cpu's tlb has not updated
14637 kprintf("%s Somehow this cpu's tlb has not updated?\n", __func__
);
14638 PMAP_UPDATE_TLBS(pmap
, va
, va
+ ARM_PGBYTES
, false);
14640 PMAP_PGTRACE_UNLOCK(&ints
);
14641 return KERN_SUCCESS
;
14644 // Find if this pa is what we are tracing
14645 pa
= pte_to_pa(*ptep
);
14647 p
= pmap_pgtrace_find_page(arm_trunc_page(pa
));
14649 panic("%s Can't find va=%llx pa=%llx from tracing pages\n", __func__
, va
, pa
);
14652 // find if pmap and va are also matching
14653 queue_head_t
*mapq
= &(p
->maps
);
14654 queue_head_t
*mapwaste
= &(p
->map_waste
);
14655 pmap_pgtrace_map_t
*map
;
14657 queue_iterate(mapq
, map
, pmap_pgtrace_map_t
*, chain
) {
14658 if (map
->pmap
== pmap
&& map
->ova
== arm_trunc_page(va
)) {
14664 // if not found, search map waste as they are still valid
14666 queue_iterate(mapwaste
, map
, pmap_pgtrace_map_t
*, chain
) {
14667 if (map
->pmap
== pmap
&& map
->ova
== arm_trunc_page(va
)) {
14675 panic("%s Can't find va=%llx pa=%llx from tracing pages\n", __func__
, va
, pa
);
14678 // Decode and run it on the clone map
14679 bzero(&res
, sizeof(res
));
14680 pgtrace_decode_and_run(*(uint32_t *)get_saved_state_pc(ss
), // instruction
14681 va
, map
->cva
, // fault va and clone page vas
14684 // write a log if in range
14685 vm_map_offset_t offset
= va
- map
->ova
;
14686 if (map
->range
.start
<= offset
&& offset
<= map
->range
.end
) {
14687 pgtrace_write_log(res
);
14690 PMAP_PGTRACE_UNLOCK(&ints
);
14692 // Return to next instruction
14693 add_saved_state_pc(ss
, sizeof(uint32_t));
14695 return KERN_SUCCESS
;
14700 * The minimum shared region nesting size is used by the VM to determine when to
14701 * break up large mappings to nested regions. The smallest size that these
14702 * mappings can be broken into is determined by what page table level those
14703 * regions are being nested in at and the size of the page tables.
14705 * For instance, if a nested region is nesting at L2 for a process utilizing
14706 * 16KB page tables, then the minimum nesting size would be 32MB (size of an L2
14709 * @param pmap The target pmap to determine the block size based on whether it's
14710 * using 16KB or 4KB page tables.
14713 pmap_shared_region_size_min(__unused pmap_t pmap
)
14715 #if (__ARM_VMSA__ > 7)
14716 const pt_attr_t
* const pt_attr
= pmap_get_pt_attr(pmap
);
14719 * We always nest the shared region at L2 (32MB for 16KB pages, 2MB for
14720 * 4KB pages). This means that a target pmap will contain L2 entries that
14721 * point to shared L3 page tables in the shared region pmap.
14723 return pt_attr_twig_size(pt_attr
);
14726 return ARM_NESTING_SIZE_MIN
;
14731 * The concept of a nesting size maximum was made to accomodate restrictions in
14732 * place for nesting regions on PowerPC. There are no restrictions to max nesting
14733 * sizes on x86/armv7/armv8 and this should get removed.
14735 * TODO: <rdar://problem/65247502> Completely remove pmap_nesting_size_max()
14738 pmap_nesting_size_max(__unused pmap_t pmap
)
14740 return ARM_NESTING_SIZE_MAX
;
14744 pmap_enforces_execute_only(
14745 #if (__ARM_VMSA__ == 7)
14750 #if (__ARM_VMSA__ > 7)
14751 return pmap
!= kernel_pmap
;
14757 MARK_AS_PMAP_TEXT
void
14758 pmap_set_vm_map_cs_enforced_internal(
14762 VALIDATE_PMAP(pmap
);
14763 pmap
->pmap_vm_map_cs_enforced
= new_value
;
14767 pmap_set_vm_map_cs_enforced(
14772 pmap_set_vm_map_cs_enforced_ppl(pmap
, new_value
);
14774 pmap_set_vm_map_cs_enforced_internal(pmap
, new_value
);
14778 extern int cs_process_enforcement_enable
;
14780 pmap_get_vm_map_cs_enforced(
14783 if (cs_process_enforcement_enable
) {
14786 return pmap
->pmap_vm_map_cs_enforced
;
14789 MARK_AS_PMAP_TEXT
void
14790 pmap_set_jit_entitled_internal(
14791 __unused pmap_t pmap
)
14797 pmap_set_jit_entitled(
14801 pmap_set_jit_entitled_ppl(pmap
);
14803 pmap_set_jit_entitled_internal(pmap
);
14808 pmap_get_jit_entitled(
14809 __unused pmap_t pmap
)
14814 MARK_AS_PMAP_TEXT
static kern_return_t
14815 pmap_query_page_info_internal(
14817 vm_map_offset_t va
,
14824 pv_entry_t
**pv_h
, *pve_p
;
14826 if (pmap
== PMAP_NULL
|| pmap
== kernel_pmap
) {
14827 pmap_pin_kernel_pages((vm_offset_t
)disp_p
, sizeof(*disp_p
));
14829 pmap_unpin_kernel_pages((vm_offset_t
)disp_p
, sizeof(*disp_p
));
14830 return KERN_INVALID_ARGUMENT
;
14835 VALIDATE_PMAP(pmap
);
14836 pmap_lock_ro(pmap
);
14838 pte
= pmap_pte(pmap
, va
);
14839 if (pte
== PT_ENTRY_NULL
) {
14843 pa
= pte_to_pa(*((volatile pt_entry_t
*)pte
));
14845 if (ARM_PTE_IS_COMPRESSED(*pte
, pte
)) {
14846 disp
|= PMAP_QUERY_PAGE_COMPRESSED
;
14847 if (*pte
& ARM_PTE_COMPRESSED_ALT
) {
14848 disp
|= PMAP_QUERY_PAGE_COMPRESSED_ALTACCT
;
14852 disp
|= PMAP_QUERY_PAGE_PRESENT
;
14853 pai
= (int) pa_index(pa
);
14854 if (!pa_valid(pa
)) {
14858 pv_h
= pai_to_pvh(pai
);
14859 pve_p
= PV_ENTRY_NULL
;
14860 if (pvh_test_type(pv_h
, PVH_TYPE_PVEP
)) {
14861 pve_p
= pvh_list(pv_h
);
14862 while (pve_p
!= PV_ENTRY_NULL
&&
14863 pve_get_ptep(pve_p
) != pte
) {
14864 pve_p
= PVE_NEXT_PTR(pve_next(pve_p
));
14867 if (IS_ALTACCT_PAGE(pai
, pve_p
)) {
14868 disp
|= PMAP_QUERY_PAGE_ALTACCT
;
14869 } else if (IS_REUSABLE_PAGE(pai
)) {
14870 disp
|= PMAP_QUERY_PAGE_REUSABLE
;
14871 } else if (IS_INTERNAL_PAGE(pai
)) {
14872 disp
|= PMAP_QUERY_PAGE_INTERNAL
;
14878 pmap_unlock_ro(pmap
);
14879 pmap_pin_kernel_pages((vm_offset_t
)disp_p
, sizeof(*disp_p
));
14881 pmap_unpin_kernel_pages((vm_offset_t
)disp_p
, sizeof(*disp_p
));
14882 return KERN_SUCCESS
;
14886 pmap_query_page_info(
14888 vm_map_offset_t va
,
14892 return pmap_query_page_info_ppl(pmap
, va
, disp_p
);
14894 return pmap_query_page_info_internal(pmap
, va
, disp_p
);
14898 MARK_AS_PMAP_TEXT kern_return_t
14899 pmap_return_internal(__unused boolean_t do_panic
, __unused boolean_t do_recurse
)
14902 return KERN_SUCCESS
;
14906 pmap_return(boolean_t do_panic
, boolean_t do_recurse
)
14909 return pmap_return_ppl(do_panic
, do_recurse
);
14911 return pmap_return_internal(do_panic
, do_recurse
);
14919 pmap_load_legacy_trust_cache(struct pmap_legacy_trust_cache __unused
*trust_cache
,
14920 const vm_size_t __unused trust_cache_len
)
14923 return KERN_NOT_SUPPORTED
;
14927 pmap_load_image4_trust_cache(struct pmap_image4_trust_cache __unused
*trust_cache
,
14928 const vm_size_t __unused trust_cache_len
,
14929 uint8_t const * __unused img4_manifest
,
14930 const vm_size_t __unused img4_manifest_buffer_len
,
14931 const vm_size_t __unused img4_manifest_actual_len
,
14932 bool __unused dry_run
)
14935 return PMAP_TC_UNKNOWN_FORMAT
;
14946 pmap_lockdown_image4_slab(__unused vm_offset_t slab
, __unused vm_size_t slab_len
, __unused
uint64_t flags
)
14952 pmap_claim_reserved_ppl_page(void)
14959 pmap_free_reserved_ppl_page(void __unused
*kva
)
14965 MARK_AS_PMAP_TEXT
static bool
14966 pmap_is_trust_cache_loaded_internal(const uuid_t uuid
)
14968 bool found
= false;
14970 pmap_simple_lock(&pmap_loaded_trust_caches_lock
);
14972 for (struct pmap_image4_trust_cache
const *c
= pmap_image4_trust_caches
; c
!= NULL
; c
= c
->next
) {
14973 if (bcmp(uuid
, c
->module->uuid
, sizeof(uuid_t
)) == 0) {
14979 #ifdef PLATFORM_BridgeOS
14980 for (struct pmap_legacy_trust_cache
const *c
= pmap_legacy_trust_caches
; c
!= NULL
; c
= c
->next
) {
14981 if (bcmp(uuid
, c
->uuid
, sizeof(uuid_t
)) == 0) {
14989 pmap_simple_unlock(&pmap_loaded_trust_caches_lock
);
14994 pmap_is_trust_cache_loaded(const uuid_t uuid
)
14997 return pmap_is_trust_cache_loaded_ppl(uuid
);
14999 return pmap_is_trust_cache_loaded_internal(uuid
);
15003 MARK_AS_PMAP_TEXT
static bool
15004 pmap_lookup_in_loaded_trust_caches_internal(const uint8_t cdhash
[CS_CDHASH_LEN
])
15006 struct pmap_image4_trust_cache
const *cache
= NULL
;
15007 #ifdef PLATFORM_BridgeOS
15008 struct pmap_legacy_trust_cache
const *legacy
= NULL
;
15011 pmap_simple_lock(&pmap_loaded_trust_caches_lock
);
15013 for (cache
= pmap_image4_trust_caches
; cache
!= NULL
; cache
= cache
->next
) {
15014 uint8_t hash_type
= 0, flags
= 0;
15016 if (lookup_in_trust_cache_module(cache
->module, cdhash
, &hash_type
, &flags
)) {
15021 #ifdef PLATFORM_BridgeOS
15022 for (legacy
= pmap_legacy_trust_caches
; legacy
!= NULL
; legacy
= legacy
->next
) {
15023 for (uint32_t i
= 0; i
< legacy
->num_hashes
; i
++) {
15024 if (bcmp(legacy
->hashes
[i
], cdhash
, CS_CDHASH_LEN
) == 0) {
15032 pmap_simple_unlock(&pmap_loaded_trust_caches_lock
);
15034 if (cache
!= NULL
) {
15036 #ifdef PLATFORM_BridgeOS
15037 } else if (legacy
!= NULL
) {
15046 pmap_lookup_in_loaded_trust_caches(const uint8_t cdhash
[CS_CDHASH_LEN
])
15049 return pmap_lookup_in_loaded_trust_caches_ppl(cdhash
);
15051 return pmap_lookup_in_loaded_trust_caches_internal(cdhash
);
15055 MARK_AS_PMAP_TEXT
static uint32_t
15056 pmap_lookup_in_static_trust_cache_internal(const uint8_t cdhash
[CS_CDHASH_LEN
])
15058 // Awkward indirection, because the PPL macros currently force their functions to be static.
15059 return lookup_in_static_trust_cache(cdhash
);
15063 pmap_lookup_in_static_trust_cache(const uint8_t cdhash
[CS_CDHASH_LEN
])
15066 return pmap_lookup_in_static_trust_cache_ppl(cdhash
);
15068 return pmap_lookup_in_static_trust_cache_internal(cdhash
);
15072 MARK_AS_PMAP_TEXT
static void
15073 pmap_set_compilation_service_cdhash_internal(const uint8_t cdhash
[CS_CDHASH_LEN
])
15075 pmap_simple_lock(&pmap_compilation_service_cdhash_lock
);
15076 memcpy(pmap_compilation_service_cdhash
, cdhash
, CS_CDHASH_LEN
);
15077 pmap_simple_unlock(&pmap_compilation_service_cdhash_lock
);
15079 pmap_cs_log("Added Compilation Service CDHash through the PPL: 0x%02X 0x%02X 0x%02X 0x%02X", cdhash
[0], cdhash
[1], cdhash
[2], cdhash
[4]);
15082 MARK_AS_PMAP_TEXT
static bool
15083 pmap_match_compilation_service_cdhash_internal(const uint8_t cdhash
[CS_CDHASH_LEN
])
15085 bool match
= false;
15087 pmap_simple_lock(&pmap_compilation_service_cdhash_lock
);
15088 if (bcmp(pmap_compilation_service_cdhash
, cdhash
, CS_CDHASH_LEN
) == 0) {
15091 pmap_simple_unlock(&pmap_compilation_service_cdhash_lock
);
15094 pmap_cs_log("Matched Compilation Service CDHash through the PPL");
15101 pmap_set_compilation_service_cdhash(const uint8_t cdhash
[CS_CDHASH_LEN
])
15104 pmap_set_compilation_service_cdhash_ppl(cdhash
);
15106 pmap_set_compilation_service_cdhash_internal(cdhash
);
15111 pmap_match_compilation_service_cdhash(const uint8_t cdhash
[CS_CDHASH_LEN
])
15114 return pmap_match_compilation_service_cdhash_ppl(cdhash
);
15116 return pmap_match_compilation_service_cdhash_internal(cdhash
);
15120 MARK_AS_PMAP_TEXT
static void
15121 pmap_footprint_suspend_internal(
15125 #if DEVELOPMENT || DEBUG
15127 current_thread()->pmap_footprint_suspended
= TRUE
;
15128 map
->pmap
->footprint_was_suspended
= TRUE
;
15130 current_thread()->pmap_footprint_suspended
= FALSE
;
15132 #else /* DEVELOPMENT || DEBUG */
15135 #endif /* DEVELOPMENT || DEBUG */
15139 pmap_footprint_suspend(
15144 pmap_footprint_suspend_ppl(map
, suspend
);
15146 pmap_footprint_suspend_internal(map
, suspend
);
15150 #if defined(__arm64__) && (DEVELOPMENT || DEBUG)
15152 struct page_table_dump_header
{
15154 uint64_t num_entries
;
15159 static kern_return_t
15160 pmap_dump_page_tables_recurse(pmap_t pmap
,
15161 const tt_entry_t
*ttp
,
15162 unsigned int cur_level
,
15163 unsigned int level_mask
,
15167 size_t *bytes_copied
)
15169 const pt_attr_t
* const pt_attr
= pmap_get_pt_attr(pmap
);
15170 uint64_t num_entries
= pt_attr_page_size(pt_attr
) / sizeof(*ttp
);
15172 uint64_t size
= pt_attr
->pta_level_info
[cur_level
].size
;
15173 uint64_t valid_mask
= pt_attr
->pta_level_info
[cur_level
].valid_mask
;
15174 uint64_t type_mask
= pt_attr
->pta_level_info
[cur_level
].type_mask
;
15175 uint64_t type_block
= pt_attr
->pta_level_info
[cur_level
].type_block
;
15177 void *bufp
= (uint8_t*)buf_start
+ *bytes_copied
;
15179 if (cur_level
== pt_attr_root_level(pt_attr
)) {
15180 num_entries
= pmap_root_alloc_size(pmap
) / sizeof(tt_entry_t
);
15183 uint64_t tt_size
= num_entries
* sizeof(tt_entry_t
);
15184 const tt_entry_t
*tt_end
= &ttp
[num_entries
];
15186 if (((vm_offset_t
)buf_end
- (vm_offset_t
)bufp
) < (tt_size
+ sizeof(struct page_table_dump_header
))) {
15187 return KERN_INSUFFICIENT_BUFFER_SIZE
;
15190 if (level_mask
& (1U << cur_level
)) {
15191 struct page_table_dump_header
*header
= (struct page_table_dump_header
*)bufp
;
15192 header
->pa
= ml_static_vtop((vm_offset_t
)ttp
);
15193 header
->num_entries
= num_entries
;
15194 header
->start_va
= start_va
;
15195 header
->end_va
= start_va
+ (num_entries
* size
);
15197 bcopy(ttp
, (uint8_t*)bufp
+ sizeof(*header
), tt_size
);
15198 *bytes_copied
= *bytes_copied
+ sizeof(*header
) + tt_size
;
15200 uint64_t current_va
= start_va
;
15202 for (const tt_entry_t
*ttep
= ttp
; ttep
< tt_end
; ttep
++, current_va
+= size
) {
15203 tt_entry_t tte
= *ttep
;
15205 if (!(tte
& valid_mask
)) {
15209 if ((tte
& type_mask
) == type_block
) {
15212 if (cur_level
>= pt_attr_leaf_level(pt_attr
)) {
15213 panic("%s: corrupt entry %#llx at %p, "
15214 "ttp=%p, cur_level=%u, bufp=%p, buf_end=%p",
15215 __FUNCTION__
, tte
, ttep
,
15216 ttp
, cur_level
, bufp
, buf_end
);
15219 const tt_entry_t
*next_tt
= (const tt_entry_t
*)phystokv(tte
& ARM_TTE_TABLE_MASK
);
15221 kern_return_t recurse_result
= pmap_dump_page_tables_recurse(pmap
, next_tt
, cur_level
+ 1,
15222 level_mask
, current_va
, buf_start
, buf_end
, bytes_copied
);
15224 if (recurse_result
!= KERN_SUCCESS
) {
15225 return recurse_result
;
15230 return KERN_SUCCESS
;
15234 pmap_dump_page_tables(pmap_t pmap
, void *bufp
, void *buf_end
, unsigned int level_mask
, size_t *bytes_copied
)
15237 panic("pmap_dump_page_tables must only be called from kernel debugger context");
15239 return pmap_dump_page_tables_recurse(pmap
, pmap
->tte
, pt_attr_root_level(pmap_get_pt_attr(pmap
)),
15240 level_mask
, pmap
->min
, bufp
, buf_end
, bytes_copied
);
15243 #else /* defined(__arm64__) && (DEVELOPMENT || DEBUG) */
15246 pmap_dump_page_tables(pmap_t pmap __unused
, void *bufp __unused
, void *buf_end __unused
,
15247 unsigned int level_mask __unused
, size_t *bytes_copied __unused
)
15249 return KERN_NOT_SUPPORTED
;
15251 #endif /* !defined(__arm64__) */
15254 #ifdef CONFIG_XNUPOST
15256 static volatile bool pmap_test_took_fault
= false;
15259 pmap_test_fault_handler(arm_saved_state_t
* state
)
15261 bool retval
= false;
15262 uint32_t esr
= get_saved_state_esr(state
);
15263 esr_exception_class_t
class = ESR_EC(esr
);
15264 fault_status_t fsc
= ISS_IA_FSC(ESR_ISS(esr
));
15266 if ((class == ESR_EC_DABORT_EL1
) &&
15267 ((fsc
== FSC_PERMISSION_FAULT_L3
) || (fsc
== FSC_ACCESS_FLAG_FAULT_L3
))) {
15268 pmap_test_took_fault
= true;
15269 /* return to the instruction immediately after the call to NX page */
15270 set_saved_state_pc(state
, get_saved_state_pc(state
) + 4);
15278 pmap_test_access(pmap_t pmap
, vm_map_address_t va
, bool should_fault
, bool is_write
)
15281 * We're switching pmaps without using the normal thread mechanism;
15282 * disable interrupts and preemption to avoid any unexpected memory
15285 uint64_t old_int_state
= pmap_interrupts_disable();
15286 pmap_t old_pmap
= current_pmap();
15287 mp_disable_preemption();
15290 pmap_test_took_fault
= false;
15292 /* Disable PAN; pmap shouldn't be the kernel pmap. */
15293 #if __ARM_PAN_AVAILABLE__
15294 __builtin_arm_wsr("pan", 0);
15295 #endif /* __ARM_PAN_AVAILABLE__ */
15296 ml_expect_fault_begin(pmap_test_fault_handler
, va
);
15299 *((volatile uint64_t*)(va
)) = 0xdec0de;
15301 volatile uint64_t tmp
= *((volatile uint64_t*)(va
));
15305 /* Save the fault bool, and undo the gross stuff we did. */
15306 bool took_fault
= pmap_test_took_fault
;
15307 ml_expect_fault_end();
15308 #if __ARM_PAN_AVAILABLE__
15309 __builtin_arm_wsr("pan", 1);
15310 #endif /* __ARM_PAN_AVAILABLE__ */
15312 pmap_switch(old_pmap
);
15313 mp_enable_preemption();
15314 pmap_interrupts_restore(old_int_state
);
15315 bool retval
= (took_fault
== should_fault
);
15320 pmap_test_read(pmap_t pmap
, vm_map_address_t va
, bool should_fault
)
15322 bool retval
= pmap_test_access(pmap
, va
, should_fault
, false);
15326 "pmap=%p, va=%p, should_fault=%u",
15327 __func__
, should_fault
? "did not fault" : "faulted",
15328 pmap
, (void*)va
, (unsigned)should_fault
);
15335 pmap_test_write(pmap_t pmap
, vm_map_address_t va
, bool should_fault
)
15337 bool retval
= pmap_test_access(pmap
, va
, should_fault
, true);
15341 "pmap=%p, va=%p, should_fault=%u",
15342 __func__
, should_fault
? "did not fault" : "faulted",
15343 pmap
, (void*)va
, (unsigned)should_fault
);
15350 pmap_test_check_refmod(pmap_paddr_t pa
, unsigned int should_be_set
)
15352 unsigned int should_be_clear
= (~should_be_set
) & (VM_MEM_REFERENCED
| VM_MEM_MODIFIED
);
15353 unsigned int bits
= pmap_get_refmod((ppnum_t
)atop(pa
));
15355 bool retval
= (((bits
& should_be_set
) == should_be_set
) && ((bits
& should_be_clear
) == 0));
15358 T_FAIL("%s: bits=%u, "
15359 "pa=%p, should_be_set=%u",
15361 (void*)pa
, should_be_set
);
15367 static __attribute__((noinline
)) bool
15368 pmap_test_read_write(pmap_t pmap
, vm_map_address_t va
, bool allow_read
, bool allow_write
)
15370 bool retval
= (pmap_test_read(pmap
, va
, !allow_read
) | pmap_test_write(pmap
, va
, !allow_write
));
15375 pmap_test_test_config(unsigned int flags
)
15377 T_LOG("running pmap_test_test_config flags=0x%X", flags
);
15378 unsigned int map_count
= 0;
15379 unsigned long page_ratio
= 0;
15380 pmap_t pmap
= pmap_create_options(NULL
, 0, flags
);
15383 panic("Failed to allocate pmap");
15386 __unused
const pt_attr_t
* const pt_attr
= pmap_get_pt_attr(pmap
);
15387 uintptr_t native_page_size
= pt_attr_page_size(native_pt_attr
);
15388 uintptr_t pmap_page_size
= pt_attr_page_size(pt_attr
);
15389 uintptr_t pmap_twig_size
= pt_attr_twig_size(pt_attr
);
15391 if (pmap_page_size
<= native_page_size
) {
15392 page_ratio
= native_page_size
/ pmap_page_size
;
15395 * We claim to support a page_ratio of less than 1, which is
15396 * not currently supported by the pmap layer; panic.
15398 panic("%s: page_ratio < 1, native_page_size=%lu, pmap_page_size=%lu"
15400 __func__
, native_page_size
, pmap_page_size
,
15404 if (PAGE_RATIO
> 1) {
15406 * The kernel is deliberately pretending to have 16KB pages.
15407 * The pmap layer has code that supports this, so pretend the
15408 * page size is larger than it is.
15410 pmap_page_size
= PAGE_SIZE
;
15411 native_page_size
= PAGE_SIZE
;
15415 * Get two pages from the VM; one to be mapped wired, and one to be
15418 vm_page_t unwired_vm_page
= vm_page_grab();
15419 vm_page_t wired_vm_page
= vm_page_grab();
15421 if ((unwired_vm_page
== VM_PAGE_NULL
) || (wired_vm_page
== VM_PAGE_NULL
)) {
15422 panic("Failed to grab VM pages");
15425 ppnum_t pn
= VM_PAGE_GET_PHYS_PAGE(unwired_vm_page
);
15426 ppnum_t wired_pn
= VM_PAGE_GET_PHYS_PAGE(wired_vm_page
);
15428 pmap_paddr_t pa
= ptoa(pn
);
15429 pmap_paddr_t wired_pa
= ptoa(wired_pn
);
15432 * We'll start mappings at the second twig TT. This keeps us from only
15433 * using the first entry in each TT, which would trivially be address
15434 * 0; one of the things we will need to test is retrieving the VA for
15437 vm_map_address_t va_base
= pmap_twig_size
;
15438 vm_map_address_t wired_va_base
= ((2 * pmap_twig_size
) - pmap_page_size
);
15440 if (wired_va_base
< (va_base
+ (page_ratio
* pmap_page_size
))) {
15442 * Not exactly a functional failure, but this test relies on
15443 * there being a spare PTE slot we can use to pin the TT.
15445 panic("Cannot pin translation table");
15449 * Create the wired mapping; this will prevent the pmap layer from
15450 * reclaiming our test TTs, which would interfere with this test
15451 * ("interfere" -> "make it panic").
15453 pmap_enter_addr(pmap
, wired_va_base
, wired_pa
, VM_PROT_READ
, VM_PROT_READ
, 0, true);
15456 * Create read-only mappings of the nonwired page; if the pmap does
15457 * not use the same page size as the kernel, create multiple mappings
15458 * so that the kernel page is fully mapped.
15460 for (map_count
= 0; map_count
< page_ratio
; map_count
++) {
15461 pmap_enter_addr(pmap
, va_base
+ (pmap_page_size
* map_count
), pa
+ (pmap_page_size
* (map_count
)), VM_PROT_READ
, VM_PROT_READ
, 0, false);
15464 /* Validate that all the PTEs have the expected PA and VA. */
15465 for (map_count
= 0; map_count
< page_ratio
; map_count
++) {
15466 pt_entry_t
* ptep
= pmap_pte(pmap
, va_base
+ (pmap_page_size
* map_count
));
15468 if (pte_to_pa(*ptep
) != (pa
+ (pmap_page_size
* map_count
))) {
15469 T_FAIL("Unexpected pa=%p, expected %p, map_count=%u",
15470 (void*)pte_to_pa(*ptep
), (void*)(pa
+ (pmap_page_size
* map_count
)), map_count
);
15473 if (ptep_get_va(ptep
) != (va_base
+ (pmap_page_size
* map_count
))) {
15474 T_FAIL("Unexpected va=%p, expected %p, map_count=%u",
15475 (void*)ptep_get_va(ptep
), (void*)(va_base
+ (pmap_page_size
* map_count
)), map_count
);
15479 T_LOG("Validate that reads to our mapping do not fault.");
15480 pmap_test_read(pmap
, va_base
, false);
15482 T_LOG("Validate that writes to our mapping fault.");
15483 pmap_test_write(pmap
, va_base
, true);
15485 T_LOG("Make the first mapping writable.");
15486 pmap_enter_addr(pmap
, va_base
, pa
, VM_PROT_READ
| VM_PROT_WRITE
, VM_PROT_READ
| VM_PROT_WRITE
, 0, false);
15488 T_LOG("Validate that writes to our mapping do not fault.");
15489 pmap_test_write(pmap
, va_base
, false);
15492 T_LOG("Make the first mapping XO.");
15493 pmap_enter_addr(pmap
, va_base
, pa
, VM_PROT_EXECUTE
, VM_PROT_EXECUTE
, 0, false);
15495 T_LOG("Validate that reads to our mapping do not fault.");
15496 pmap_test_read(pmap
, va_base
, false);
15498 T_LOG("Validate that writes to our mapping fault.");
15499 pmap_test_write(pmap
, va_base
, true);
15503 * For page ratios of greater than 1: validate that writes to the other
15504 * mappings still fault. Remove the mappings afterwards (we're done
15505 * with page ratio testing).
15507 for (map_count
= 1; map_count
< page_ratio
; map_count
++) {
15508 pmap_test_write(pmap
, va_base
+ (pmap_page_size
* map_count
), true);
15509 pmap_remove(pmap
, va_base
+ (pmap_page_size
* map_count
), va_base
+ (pmap_page_size
* map_count
) + pmap_page_size
);
15512 T_LOG("Mark the page unreferenced and unmodified.");
15513 pmap_clear_refmod(pn
, VM_MEM_MODIFIED
| VM_MEM_REFERENCED
);
15514 pmap_test_check_refmod(pa
, 0);
15517 * Begin testing the ref/mod state machine. Re-enter the mapping with
15518 * different protection/fault_type settings, and confirm that the
15519 * ref/mod state matches our expectations at each step.
15521 T_LOG("!ref/!mod: read, no fault. Expect ref/!mod");
15522 pmap_enter_addr(pmap
, va_base
, pa
, VM_PROT_READ
, VM_PROT_NONE
, 0, false);
15523 pmap_test_check_refmod(pa
, VM_MEM_REFERENCED
);
15525 T_LOG("!ref/!mod: read, read fault. Expect ref/!mod");
15526 pmap_clear_refmod(pn
, VM_MEM_MODIFIED
| VM_MEM_REFERENCED
);
15527 pmap_enter_addr(pmap
, va_base
, pa
, VM_PROT_READ
, VM_PROT_READ
, 0, false);
15528 pmap_test_check_refmod(pa
, VM_MEM_REFERENCED
);
15530 T_LOG("!ref/!mod: rw, read fault. Expect ref/!mod");
15531 pmap_clear_refmod(pn
, VM_MEM_MODIFIED
| VM_MEM_REFERENCED
);
15532 pmap_enter_addr(pmap
, va_base
, pa
, VM_PROT_READ
| VM_PROT_WRITE
, VM_PROT_NONE
, 0, false);
15533 pmap_test_check_refmod(pa
, VM_MEM_REFERENCED
);
15535 T_LOG("ref/!mod: rw, read fault. Expect ref/!mod");
15536 pmap_enter_addr(pmap
, va_base
, pa
, VM_PROT_READ
| VM_PROT_WRITE
, VM_PROT_READ
, 0, false);
15537 pmap_test_check_refmod(pa
, VM_MEM_REFERENCED
);
15539 T_LOG("!ref/!mod: rw, rw fault. Expect ref/mod");
15540 pmap_clear_refmod(pn
, VM_MEM_MODIFIED
| VM_MEM_REFERENCED
);
15541 pmap_enter_addr(pmap
, va_base
, pa
, VM_PROT_READ
| VM_PROT_WRITE
, VM_PROT_READ
| VM_PROT_WRITE
, 0, false);
15542 pmap_test_check_refmod(pa
, VM_MEM_REFERENCED
| VM_MEM_MODIFIED
);
15545 * Shared memory testing; we'll have two mappings; one read-only,
15548 vm_map_address_t rw_base
= va_base
;
15549 vm_map_address_t ro_base
= va_base
+ pmap_page_size
;
15551 pmap_enter_addr(pmap
, rw_base
, pa
, VM_PROT_READ
| VM_PROT_WRITE
, VM_PROT_READ
| VM_PROT_WRITE
, 0, false);
15552 pmap_enter_addr(pmap
, ro_base
, pa
, VM_PROT_READ
, VM_PROT_READ
, 0, false);
15555 * Test that we take faults as expected for unreferenced/unmodified
15556 * pages. Also test the arm_fast_fault interface, to ensure that
15557 * mapping permissions change as expected.
15559 T_LOG("!ref/!mod: expect no access");
15560 pmap_clear_refmod(pn
, VM_MEM_MODIFIED
| VM_MEM_REFERENCED
);
15561 pmap_test_read_write(pmap
, ro_base
, false, false);
15562 pmap_test_read_write(pmap
, rw_base
, false, false);
15564 T_LOG("Read fault; expect !ref/!mod -> ref/!mod, read access");
15565 arm_fast_fault(pmap
, rw_base
, VM_PROT_READ
, false, false);
15566 pmap_test_check_refmod(pa
, VM_MEM_REFERENCED
);
15567 pmap_test_read_write(pmap
, ro_base
, true, false);
15568 pmap_test_read_write(pmap
, rw_base
, true, false);
15570 T_LOG("Write fault; expect ref/!mod -> ref/mod, read and write access");
15571 arm_fast_fault(pmap
, rw_base
, VM_PROT_READ
| VM_PROT_WRITE
, false, false);
15572 pmap_test_check_refmod(pa
, VM_MEM_REFERENCED
| VM_MEM_MODIFIED
);
15573 pmap_test_read_write(pmap
, ro_base
, true, false);
15574 pmap_test_read_write(pmap
, rw_base
, true, true);
15576 T_LOG("Write fault; expect !ref/!mod -> ref/mod, read and write access");
15577 pmap_clear_refmod(pn
, VM_MEM_MODIFIED
| VM_MEM_REFERENCED
);
15578 arm_fast_fault(pmap
, rw_base
, VM_PROT_READ
| VM_PROT_WRITE
, false, false);
15579 pmap_test_check_refmod(pa
, VM_MEM_REFERENCED
| VM_MEM_MODIFIED
);
15580 pmap_test_read_write(pmap
, ro_base
, true, false);
15581 pmap_test_read_write(pmap
, rw_base
, true, true);
15583 T_LOG("RW protect both mappings; should not change protections.");
15584 pmap_protect(pmap
, ro_base
, ro_base
+ pmap_page_size
, VM_PROT_READ
| VM_PROT_WRITE
);
15585 pmap_protect(pmap
, rw_base
, rw_base
+ pmap_page_size
, VM_PROT_READ
| VM_PROT_WRITE
);
15586 pmap_test_read_write(pmap
, ro_base
, true, false);
15587 pmap_test_read_write(pmap
, rw_base
, true, true);
15589 T_LOG("Read protect both mappings; RW mapping should become RO.");
15590 pmap_protect(pmap
, ro_base
, ro_base
+ pmap_page_size
, VM_PROT_READ
);
15591 pmap_protect(pmap
, rw_base
, rw_base
+ pmap_page_size
, VM_PROT_READ
);
15592 pmap_test_read_write(pmap
, ro_base
, true, false);
15593 pmap_test_read_write(pmap
, rw_base
, true, false);
15595 T_LOG("RW protect the page; mappings should not change protections.");
15596 pmap_enter_addr(pmap
, rw_base
, pa
, VM_PROT_READ
| VM_PROT_WRITE
, VM_PROT_READ
| VM_PROT_WRITE
, 0, false);
15597 pmap_page_protect(pn
, VM_PROT_ALL
);
15598 pmap_test_read_write(pmap
, ro_base
, true, false);
15599 pmap_test_read_write(pmap
, rw_base
, true, true);
15601 T_LOG("Read protect the page; RW mapping should become RO.");
15602 pmap_page_protect(pn
, VM_PROT_READ
);
15603 pmap_test_read_write(pmap
, ro_base
, true, false);
15604 pmap_test_read_write(pmap
, rw_base
, true, false);
15606 T_LOG("Validate that disconnect removes all known mappings of the page.");
15607 pmap_disconnect(pn
);
15608 if (!pmap_verify_free(pn
)) {
15609 T_FAIL("Page still has mappings");
15612 T_LOG("Remove the wired mapping, so we can tear down the test map.");
15613 pmap_remove(pmap
, wired_va_base
, wired_va_base
+ pmap_page_size
);
15614 pmap_destroy(pmap
);
15616 T_LOG("Release the pages back to the VM.");
15617 vm_page_lock_queues();
15618 vm_page_free(unwired_vm_page
);
15619 vm_page_free(wired_vm_page
);
15620 vm_page_unlock_queues();
15622 T_LOG("Testing successful!");
15625 #endif /* __arm64__ */
15630 T_LOG("Starting pmap_tests");
15633 flags
|= PMAP_CREATE_64BIT
;
15635 #if __ARM_MIXED_PAGE_SIZE__
15636 T_LOG("Testing VM_PAGE_SIZE_4KB");
15637 pmap_test_test_config(flags
| PMAP_CREATE_FORCE_4K_PAGES
);
15638 T_LOG("Testing VM_PAGE_SIZE_16KB");
15639 pmap_test_test_config(flags
);
15640 #else /* __ARM_MIXED_PAGE_SIZE__ */
15641 pmap_test_test_config(flags
);
15642 #endif /* __ARM_MIXED_PAGE_SIZE__ */
15644 #endif /* __arm64__ */
15645 T_PASS("completed pmap_test successfully");
15646 return KERN_SUCCESS
;
15648 #endif /* CONFIG_XNUPOST */
15651 * The following function should never make it to RELEASE code, since
15652 * it provides a way to get the PPL to modify text pages.
15654 #if DEVELOPMENT || DEBUG
15656 #define ARM_UNDEFINED_INSN 0xe7f000f0
15657 #define ARM_UNDEFINED_INSN_THUMB 0xde00
15660 * Forcibly overwrite executable text with an illegal instruction.
15662 * @note Only used for xnu unit testing.
15664 * @param pa The physical address to corrupt.
15666 * @return KERN_SUCCESS on success.
15669 pmap_test_text_corruption(pmap_paddr_t pa
)
15672 return pmap_test_text_corruption_ppl(pa
);
15673 #else /* XNU_MONITOR */
15674 return pmap_test_text_corruption_internal(pa
);
15675 #endif /* XNU_MONITOR */
15678 MARK_AS_PMAP_TEXT kern_return_t
15679 pmap_test_text_corruption_internal(pmap_paddr_t pa
)
15681 vm_offset_t va
= phystokv(pa
);
15682 unsigned int pai
= pa_index(pa
);
15684 assert(pa_valid(pa
));
15688 pv_entry_t
**pv_h
= pai_to_pvh(pai
);
15689 assert(!pvh_test_type(pv_h
, PVH_TYPE_NULL
));
15690 #if defined(PVH_FLAG_EXEC)
15691 const bool need_ap_twiddle
= pvh_get_flags(pv_h
) & PVH_FLAG_EXEC
;
15693 if (need_ap_twiddle
) {
15694 pmap_set_ptov_ap(pai
, AP_RWNA
, FALSE
);
15696 #endif /* defined(PVH_FLAG_EXEC) */
15699 * The low bit in an instruction address indicates a THUMB instruction
15702 va
&= ~(vm_offset_t
)1;
15703 *(uint16_t *)va
= ARM_UNDEFINED_INSN_THUMB
;
15705 *(uint32_t *)va
= ARM_UNDEFINED_INSN
;
15708 #if defined(PVH_FLAG_EXEC)
15709 if (need_ap_twiddle
) {
15710 pmap_set_ptov_ap(pai
, AP_RONA
, FALSE
);
15712 #endif /* defined(PVH_FLAG_EXEC) */
15714 InvalidatePoU_IcacheRegion(va
, sizeof(uint32_t));
15718 return KERN_SUCCESS
;
15721 #endif /* DEVELOPMENT || DEBUG */