2 * Copyright (c) 2011-2019 Apple Inc. All rights reserved.
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
29 #include <mach_assert.h>
30 #include <mach_ldebug.h>
32 #include <mach/shared_region.h>
33 #include <mach/vm_param.h>
34 #include <mach/vm_prot.h>
35 #include <mach/vm_map.h>
36 #include <mach/machine/vm_param.h>
37 #include <mach/machine/vm_types.h>
39 #include <mach/boolean.h>
40 #include <kern/bits.h>
41 #include <kern/thread.h>
42 #include <kern/sched.h>
43 #include <kern/zalloc.h>
44 #include <kern/kalloc.h>
45 #include <kern/ledger.h>
47 #include <kern/trustcache.h>
49 #include <os/overflow.h>
52 #include <vm/vm_map.h>
53 #include <vm/vm_kern.h>
54 #include <vm/vm_protos.h>
55 #include <vm/vm_object.h>
56 #include <vm/vm_page.h>
57 #include <vm/vm_pageout.h>
60 #include <libkern/img4/interface.h>
61 #include <libkern/section_keywords.h>
63 #include <machine/atomic.h>
64 #include <machine/thread.h>
65 #include <machine/lowglobals.h>
67 #include <arm/caches_internal.h>
68 #include <arm/cpu_data.h>
69 #include <arm/cpu_data_internal.h>
70 #include <arm/cpu_capabilities.h>
71 #include <arm/cpu_number.h>
72 #include <arm/machine_cpu.h>
73 #include <arm/misc_protos.h>
76 #if (__ARM_VMSA__ > 7)
77 #include <arm64/proc_reg.h>
78 #include <pexpert/arm64/boot.h>
81 #include <arm64/pgtrace.h>
82 #if CONFIG_PGTRACE_NONKEXT
83 #include <arm64/pgtrace_decoder.h>
84 #endif // CONFIG_PGTRACE_NONKEXT
88 #include <pexpert/device_tree.h>
90 #include <san/kasan.h>
91 #include <sys/cdefs.h>
93 #if defined(HAS_APPLE_PAC)
97 #define PMAP_TT_L0_LEVEL 0x0
98 #define PMAP_TT_L1_LEVEL 0x1
99 #define PMAP_TT_L2_LEVEL 0x2
100 #define PMAP_TT_L3_LEVEL 0x3
101 #if (__ARM_VMSA__ == 7)
102 #define PMAP_TT_MAX_LEVEL PMAP_TT_L2_LEVEL
104 #define PMAP_TT_MAX_LEVEL PMAP_TT_L3_LEVEL
106 #define PMAP_TT_LEAF_LEVEL PMAP_TT_MAX_LEVEL
107 #define PMAP_TT_TWIG_LEVEL (PMAP_TT_MAX_LEVEL - 1)
109 static bool alloc_asid(pmap_t pmap
);
110 static void free_asid(pmap_t pmap
);
111 static void flush_mmu_tlb_region_asid_async(vm_offset_t va
, unsigned length
, pmap_t pmap
);
112 static void flush_mmu_tlb_tte_asid_async(vm_offset_t va
, pmap_t pmap
);
113 static void flush_mmu_tlb_full_asid_async(pmap_t pmap
);
114 static pt_entry_t
wimg_to_pte(unsigned int wimg
);
116 struct page_table_ops
{
117 bool (*alloc_id
)(pmap_t pmap
);
118 void (*free_id
)(pmap_t pmap
);
119 void (*flush_tlb_region_async
)(vm_offset_t va
, unsigned length
, pmap_t pmap
);
120 void (*flush_tlb_tte_async
)(vm_offset_t va
, pmap_t pmap
);
121 void (*flush_tlb_async
)(pmap_t pmap
);
122 pt_entry_t (*wimg_to_pte
)(unsigned int wimg
);
125 static const struct page_table_ops native_pt_ops
=
127 .alloc_id
= alloc_asid
,
128 .free_id
= free_asid
,
129 .flush_tlb_region_async
= flush_mmu_tlb_region_asid_async
,
130 .flush_tlb_tte_async
= flush_mmu_tlb_tte_asid_async
,
131 .flush_tlb_async
= flush_mmu_tlb_full_asid_async
,
132 .wimg_to_pte
= wimg_to_pte
,
135 #if (__ARM_VMSA__ > 7)
136 const struct page_table_level_info pmap_table_level_info_16k
[] =
139 .size
= ARM_16K_TT_L0_SIZE
,
140 .offmask
= ARM_16K_TT_L0_OFFMASK
,
141 .shift
= ARM_16K_TT_L0_SHIFT
,
142 .index_mask
= ARM_16K_TT_L0_INDEX_MASK
,
143 .valid_mask
= ARM_TTE_VALID
,
144 .type_mask
= ARM_TTE_TYPE_MASK
,
145 .type_block
= ARM_TTE_TYPE_BLOCK
148 .size
= ARM_16K_TT_L1_SIZE
,
149 .offmask
= ARM_16K_TT_L1_OFFMASK
,
150 .shift
= ARM_16K_TT_L1_SHIFT
,
151 .index_mask
= ARM_16K_TT_L1_INDEX_MASK
,
152 .valid_mask
= ARM_TTE_VALID
,
153 .type_mask
= ARM_TTE_TYPE_MASK
,
154 .type_block
= ARM_TTE_TYPE_BLOCK
157 .size
= ARM_16K_TT_L2_SIZE
,
158 .offmask
= ARM_16K_TT_L2_OFFMASK
,
159 .shift
= ARM_16K_TT_L2_SHIFT
,
160 .index_mask
= ARM_16K_TT_L2_INDEX_MASK
,
161 .valid_mask
= ARM_TTE_VALID
,
162 .type_mask
= ARM_TTE_TYPE_MASK
,
163 .type_block
= ARM_TTE_TYPE_BLOCK
166 .size
= ARM_16K_TT_L3_SIZE
,
167 .offmask
= ARM_16K_TT_L3_OFFMASK
,
168 .shift
= ARM_16K_TT_L3_SHIFT
,
169 .index_mask
= ARM_16K_TT_L3_INDEX_MASK
,
170 .valid_mask
= ARM_PTE_TYPE_VALID
,
171 .type_mask
= ARM_PTE_TYPE_MASK
,
172 .type_block
= ARM_TTE_TYPE_L3BLOCK
176 const struct page_table_level_info pmap_table_level_info_4k
[] =
179 .size
= ARM_4K_TT_L0_SIZE
,
180 .offmask
= ARM_4K_TT_L0_OFFMASK
,
181 .shift
= ARM_4K_TT_L0_SHIFT
,
182 .index_mask
= ARM_4K_TT_L0_INDEX_MASK
,
183 .valid_mask
= ARM_TTE_VALID
,
184 .type_mask
= ARM_TTE_TYPE_MASK
,
185 .type_block
= ARM_TTE_TYPE_BLOCK
188 .size
= ARM_4K_TT_L1_SIZE
,
189 .offmask
= ARM_4K_TT_L1_OFFMASK
,
190 .shift
= ARM_4K_TT_L1_SHIFT
,
191 .index_mask
= ARM_4K_TT_L1_INDEX_MASK
,
192 .valid_mask
= ARM_TTE_VALID
,
193 .type_mask
= ARM_TTE_TYPE_MASK
,
194 .type_block
= ARM_TTE_TYPE_BLOCK
197 .size
= ARM_4K_TT_L2_SIZE
,
198 .offmask
= ARM_4K_TT_L2_OFFMASK
,
199 .shift
= ARM_4K_TT_L2_SHIFT
,
200 .index_mask
= ARM_4K_TT_L2_INDEX_MASK
,
201 .valid_mask
= ARM_TTE_VALID
,
202 .type_mask
= ARM_TTE_TYPE_MASK
,
203 .type_block
= ARM_TTE_TYPE_BLOCK
206 .size
= ARM_4K_TT_L3_SIZE
,
207 .offmask
= ARM_4K_TT_L3_OFFMASK
,
208 .shift
= ARM_4K_TT_L3_SHIFT
,
209 .index_mask
= ARM_4K_TT_L3_INDEX_MASK
,
210 .valid_mask
= ARM_PTE_TYPE_VALID
,
211 .type_mask
= ARM_PTE_TYPE_MASK
,
212 .type_block
= ARM_TTE_TYPE_L3BLOCK
216 struct page_table_attr
{
217 const struct page_table_level_info
* const pta_level_info
;
218 const struct page_table_ops
* const pta_ops
;
219 const uintptr_t ap_ro
;
220 const uintptr_t ap_rw
;
221 const uintptr_t ap_rona
;
222 const uintptr_t ap_rwna
;
223 const uintptr_t ap_xn
;
224 const uintptr_t ap_x
;
225 const unsigned int pta_root_level
;
226 const unsigned int pta_max_level
;
229 const struct page_table_attr pmap_pt_attr_4k
= {
230 .pta_level_info
= pmap_table_level_info_4k
,
231 .pta_root_level
= PMAP_TT_L1_LEVEL
,
232 .pta_max_level
= PMAP_TT_L3_LEVEL
,
233 .pta_ops
= &native_pt_ops
,
234 .ap_ro
= ARM_PTE_AP(AP_RORO
),
235 .ap_rw
= ARM_PTE_AP(AP_RWRW
),
236 .ap_rona
= ARM_PTE_AP(AP_RONA
),
237 .ap_rwna
= ARM_PTE_AP(AP_RWNA
),
238 .ap_xn
= ARM_PTE_PNX
| ARM_PTE_NX
,
242 const struct page_table_attr pmap_pt_attr_16k
= {
243 .pta_level_info
= pmap_table_level_info_16k
,
244 .pta_root_level
= PMAP_TT_L1_LEVEL
,
245 .pta_max_level
= PMAP_TT_L3_LEVEL
,
246 .pta_ops
= &native_pt_ops
,
247 .ap_ro
= ARM_PTE_AP(AP_RORO
),
248 .ap_rw
= ARM_PTE_AP(AP_RWRW
),
249 .ap_rona
= ARM_PTE_AP(AP_RONA
),
250 .ap_rwna
= ARM_PTE_AP(AP_RWNA
),
251 .ap_xn
= ARM_PTE_PNX
| ARM_PTE_NX
,
256 const struct page_table_attr
* const native_pt_attr
= &pmap_pt_attr_16k
;
257 #else /* !__ARM_16K_PG__ */
258 const struct page_table_attr
* const native_pt_attr
= &pmap_pt_attr_4k
;
259 #endif /* !__ARM_16K_PG__ */
262 #else /* (__ARM_VMSA__ > 7) */
264 * We don't support pmap parameterization for VMSA7, so use an opaque
265 * page_table_attr structure.
267 const struct page_table_attr
* const native_pt_attr
= NULL
;
268 #endif /* (__ARM_VMSA__ > 7) */
270 typedef struct page_table_attr pt_attr_t
;
272 /* Macro for getting pmap attributes; not a function for const propagation. */
273 #if ARM_PARAMETERIZED_PMAP
274 /* The page table attributes are linked to the pmap */
275 #define pmap_get_pt_attr(pmap) ((pmap)->pmap_pt_attr)
276 #define pmap_get_pt_ops(pmap) ((pmap)->pmap_pt_attr->pta_ops)
277 #else /* !ARM_PARAMETERIZED_PMAP */
278 /* The page table attributes are fixed (to allow for const propagation) */
279 #define pmap_get_pt_attr(pmap) (native_pt_attr)
280 #define pmap_get_pt_ops(pmap) (&native_pt_ops)
281 #endif /* !ARM_PARAMETERIZED_PMAP */
283 #if (__ARM_VMSA__ > 7)
284 static inline uint64_t
285 pt_attr_ln_size(const pt_attr_t
* const pt_attr
, unsigned int level
)
287 return pt_attr
->pta_level_info
[level
].size
;
290 __unused
static inline uint64_t
291 pt_attr_ln_shift(const pt_attr_t
* const pt_attr
, unsigned int level
)
293 return pt_attr
->pta_level_info
[level
].shift
;
296 __unused
static inline uint64_t
297 pt_attr_ln_offmask(const pt_attr_t
* const pt_attr
, unsigned int level
)
299 return pt_attr
->pta_level_info
[level
].offmask
;
302 static inline unsigned int
303 pt_attr_twig_level(const pt_attr_t
* const pt_attr
)
305 return pt_attr
->pta_max_level
- 1;
308 static inline unsigned int
309 pt_attr_root_level(const pt_attr_t
* const pt_attr
)
311 return pt_attr
->pta_root_level
;
314 static __unused
inline uint64_t
315 pt_attr_leaf_size(const pt_attr_t
* const pt_attr
)
317 return pt_attr
->pta_level_info
[pt_attr
->pta_max_level
].size
;
320 static __unused
inline uint64_t
321 pt_attr_leaf_offmask(const pt_attr_t
* const pt_attr
)
323 return pt_attr
->pta_level_info
[pt_attr
->pta_max_level
].offmask
;
326 static inline uint64_t
327 pt_attr_leaf_shift(const pt_attr_t
* const pt_attr
)
329 return pt_attr
->pta_level_info
[pt_attr
->pta_max_level
].shift
;
332 static __unused
inline uint64_t
333 pt_attr_leaf_index_mask(const pt_attr_t
* const pt_attr
)
335 return pt_attr
->pta_level_info
[pt_attr
->pta_max_level
].index_mask
;
338 static inline uint64_t
339 pt_attr_twig_size(const pt_attr_t
* const pt_attr
)
341 return pt_attr
->pta_level_info
[pt_attr
->pta_max_level
- 1].size
;
344 static inline uint64_t
345 pt_attr_twig_offmask(const pt_attr_t
* const pt_attr
)
347 return pt_attr
->pta_level_info
[pt_attr
->pta_max_level
- 1].offmask
;
350 static inline uint64_t
351 pt_attr_twig_shift(const pt_attr_t
* const pt_attr
)
353 return pt_attr
->pta_level_info
[pt_attr
->pta_max_level
- 1].shift
;
356 static __unused
inline uint64_t
357 pt_attr_twig_index_mask(const pt_attr_t
* const pt_attr
)
359 return pt_attr
->pta_level_info
[pt_attr
->pta_max_level
- 1].index_mask
;
362 static inline uint64_t
363 pt_attr_leaf_table_size(const pt_attr_t
* const pt_attr
)
365 return pt_attr_twig_size(pt_attr
);
368 static inline uint64_t
369 pt_attr_leaf_table_offmask(const pt_attr_t
* const pt_attr
)
371 return pt_attr_twig_offmask(pt_attr
);
374 static inline uintptr_t
375 pt_attr_leaf_rw(const pt_attr_t
* const pt_attr
)
377 return pt_attr
->ap_rw
;
380 static inline uintptr_t
381 pt_attr_leaf_ro(const pt_attr_t
* const pt_attr
)
383 return pt_attr
->ap_ro
;
386 static inline uintptr_t
387 pt_attr_leaf_rona(const pt_attr_t
* const pt_attr
)
389 return pt_attr
->ap_rona
;
392 static inline uintptr_t
393 pt_attr_leaf_rwna(const pt_attr_t
* const pt_attr
)
395 return pt_attr
->ap_rwna
;
398 static inline uintptr_t
399 pt_attr_leaf_xn(const pt_attr_t
* const pt_attr
)
401 return pt_attr
->ap_xn
;
404 static inline uintptr_t
405 pt_attr_leaf_x(const pt_attr_t
* const pt_attr
)
407 return pt_attr
->ap_x
;
410 #else /* (__ARM_VMSA__ > 7) */
412 static inline unsigned int
413 pt_attr_twig_level(__unused
const pt_attr_t
* const pt_attr
)
415 return PMAP_TT_L1_LEVEL
;
418 static inline uint64_t
419 pt_attr_twig_size(__unused
const pt_attr_t
* const pt_attr
)
421 return ARM_TT_TWIG_SIZE
;
424 static inline uint64_t
425 pt_attr_twig_offmask(__unused
const pt_attr_t
* const pt_attr
)
427 return ARM_TT_TWIG_OFFMASK
;
430 static inline uint64_t
431 pt_attr_twig_shift(__unused
const pt_attr_t
* const pt_attr
)
433 return ARM_TT_TWIG_SHIFT
;
436 static __unused
inline uint64_t
437 pt_attr_twig_index_mask(__unused
const pt_attr_t
* const pt_attr
)
439 return ARM_TT_TWIG_INDEX_MASK
;
442 __unused
static inline uint64_t
443 pt_attr_leaf_size(__unused
const pt_attr_t
* const pt_attr
)
445 return ARM_TT_LEAF_SIZE
;
448 __unused
static inline uint64_t
449 pt_attr_leaf_offmask(__unused
const pt_attr_t
* const pt_attr
)
451 return ARM_TT_LEAF_OFFMASK
;
454 static inline uint64_t
455 pt_attr_leaf_shift(__unused
const pt_attr_t
* const pt_attr
)
457 return ARM_TT_LEAF_SHIFT
;
460 static __unused
inline uint64_t
461 pt_attr_leaf_index_mask(__unused
const pt_attr_t
* const pt_attr
)
463 return ARM_TT_LEAF_INDEX_MASK
;
466 static inline uint64_t
467 pt_attr_leaf_table_size(__unused
const pt_attr_t
* const pt_attr
)
469 return ARM_TT_L1_PT_SIZE
;
472 static inline uint64_t
473 pt_attr_leaf_table_offmask(__unused
const pt_attr_t
* const pt_attr
)
475 return ARM_TT_L1_PT_OFFMASK
;
478 static inline uintptr_t
479 pt_attr_leaf_rw(__unused
const pt_attr_t
* const pt_attr
)
481 return ARM_PTE_AP(AP_RWRW
);
484 static inline uintptr_t
485 pt_attr_leaf_ro(__unused
const pt_attr_t
* const pt_attr
)
487 return ARM_PTE_AP(AP_RORO
);
490 static inline uintptr_t
491 pt_attr_leaf_rona(__unused
const pt_attr_t
* const pt_attr
)
493 return ARM_PTE_AP(AP_RONA
);
496 static inline uintptr_t
497 pt_attr_leaf_rwna(__unused
const pt_attr_t
* const pt_attr
)
499 return ARM_PTE_AP(AP_RWNA
);
502 static inline uintptr_t
503 pt_attr_leaf_xn(__unused
const pt_attr_t
* const pt_attr
)
508 #endif /* (__ARM_VMSA__ > 7) */
511 pmap_sync_tlb(bool strong __unused
)
517 int vm_footprint_suspend_allowed
= 1;
519 extern int pmap_ledgers_panic
;
520 extern int pmap_ledgers_panic_leeway
;
522 int pmap_stats_assert
= 1;
523 #define PMAP_STATS_ASSERTF(cond, pmap, fmt, ...) \
525 if (pmap_stats_assert && (pmap)->pmap_stats_assert) \
526 assertf(cond, fmt, ##__VA_ARGS__); \
528 #else /* MACH_ASSERT */
529 #define PMAP_STATS_ASSERTF(cond, pmap, fmt, ...)
530 #endif /* MACH_ASSERT */
532 #if DEVELOPMENT || DEBUG
533 #define PMAP_FOOTPRINT_SUSPENDED(pmap) \
534 (current_thread()->pmap_footprint_suspended)
535 #else /* DEVELOPMENT || DEBUG */
536 #define PMAP_FOOTPRINT_SUSPENDED(pmap) (FALSE)
537 #endif /* DEVELOPMENT || DEBUG */
542 * PPL External References.
544 extern vm_offset_t segPPLDATAB
;
545 extern unsigned long segSizePPLDATA
;
546 extern vm_offset_t segPPLTEXTB
;
547 extern unsigned long segSizePPLTEXT
;
548 #if __APRR_SUPPORTED__
549 extern vm_offset_t segPPLTRAMPB
;
550 extern unsigned long segSizePPLTRAMP
;
551 extern void ppl_trampoline_start
;
552 extern void ppl_trampoline_end
;
554 extern vm_offset_t segPPLDATACONSTB
;
555 extern unsigned long segSizePPLDATACONST
;
559 * PPL Global Variables
562 #if (DEVELOPMENT || DEBUG)
563 /* Indicates if the PPL will enforce mapping policies; set by -unsafe_kernel_text */
564 SECURITY_READ_ONLY_LATE(boolean_t
) pmap_ppl_disable
= FALSE
;
566 const boolean_t pmap_ppl_disable
= FALSE
;
569 /* Indicates if the PPL has started applying APRR. */
570 boolean_t pmap_ppl_locked_down MARK_AS_PMAP_DATA
= FALSE
;
573 * The PPL cannot invoke the kernel in order to allocate memory, so we must
574 * maintain a list of free pages that the PPL owns. The kernel can give the PPL
577 decl_simple_lock_data(, pmap_ppl_free_page_lock MARK_AS_PMAP_DATA
);
578 void ** pmap_ppl_free_page_list MARK_AS_PMAP_DATA
= NULL
;
579 uint64_t pmap_ppl_free_page_count MARK_AS_PMAP_DATA
= 0;
580 uint64_t pmap_ppl_pages_returned_to_kernel_count_total
= 0;
582 struct pmap_cpu_data_array_entry pmap_cpu_data_array
[MAX_CPUS
] MARK_AS_PMAP_DATA
;
584 #ifdef CPU_CLUSTER_OFFSETS
585 const uint64_t pmap_cluster_offsets
[] = CPU_CLUSTER_OFFSETS
;
586 _Static_assert((sizeof(pmap_cluster_offsets
) / sizeof(pmap_cluster_offsets
[0])) == __ARM_CLUSTER_COUNT__
,
587 "pmap_cluster_offsets[] count does not match __ARM_CLUSTER_COUNT__");
590 extern void *pmap_stacks_start
;
591 extern void *pmap_stacks_end
;
592 SECURITY_READ_ONLY_LATE(pmap_paddr_t
) pmap_stacks_start_pa
= 0;
593 SECURITY_READ_ONLY_LATE(pmap_paddr_t
) pmap_stacks_end_pa
= 0;
594 SECURITY_READ_ONLY_LATE(pmap_paddr_t
) ppl_cpu_save_area_start
= 0;
595 SECURITY_READ_ONLY_LATE(pmap_paddr_t
) ppl_cpu_save_area_end
= 0;
597 /* Allocation data/locks for pmap structures. */
598 decl_simple_lock_data(, pmap_free_list_lock MARK_AS_PMAP_DATA
);
599 SECURITY_READ_ONLY_LATE(unsigned long) pmap_array_count
= 0;
600 SECURITY_READ_ONLY_LATE(void *) pmap_array_begin
= NULL
;
601 SECURITY_READ_ONLY_LATE(void *) pmap_array_end
= NULL
;
602 SECURITY_READ_ONLY_LATE(pmap_t
) pmap_array
= NULL
;
603 pmap_t pmap_free_list MARK_AS_PMAP_DATA
= NULL
;
605 /* Allocation data/locks/structs for task ledger structures. */
606 #define PMAP_LEDGER_DATA_BYTES \
607 (((sizeof(task_ledgers) / sizeof(int)) * sizeof(struct ledger_entry)) + sizeof(struct ledger))
610 * Maximum number of ledgers allowed are maximum number of tasks
611 * allowed on system plus some more i.e. ~10% of total tasks = 200.
613 #define MAX_PMAP_LEDGERS (MAX_ASID + 200)
615 typedef struct pmap_ledger_data
{
616 char pld_data
[PMAP_LEDGER_DATA_BYTES
];
617 } pmap_ledger_data_t
;
619 typedef struct pmap_ledger
{
621 struct pmap_ledger_data ple_data
;
622 struct pmap_ledger
* next
;
625 struct pmap_ledger
** back_ptr
;
628 SECURITY_READ_ONLY_LATE(bool) pmap_ledger_alloc_initialized
= false;
629 decl_simple_lock_data(, pmap_ledger_lock MARK_AS_PMAP_DATA
);
630 SECURITY_READ_ONLY_LATE(void *) pmap_ledger_refcnt_begin
= NULL
;
631 SECURITY_READ_ONLY_LATE(void *) pmap_ledger_refcnt_end
= NULL
;
632 SECURITY_READ_ONLY_LATE(os_refcnt_t
*) pmap_ledger_refcnt
= NULL
;
633 SECURITY_READ_ONLY_LATE(void *) pmap_ledger_ptr_array_begin
= NULL
;
634 SECURITY_READ_ONLY_LATE(void *) pmap_ledger_ptr_array_end
= NULL
;
635 SECURITY_READ_ONLY_LATE(pmap_ledger_t
* *) pmap_ledger_ptr_array
= NULL
;
636 uint64_t pmap_ledger_ptr_array_free_index MARK_AS_PMAP_DATA
= 0;
637 pmap_ledger_t
* pmap_ledger_free_list MARK_AS_PMAP_DATA
= NULL
;
639 #define pmap_ledger_debit(p, e, a) ledger_debit_nocheck((p)->ledger, e, a)
640 #define pmap_ledger_credit(p, e, a) ledger_credit_nocheck((p)->ledger, e, a)
643 pmap_check_ledger_fields(ledger_t ledger
)
645 if (ledger
== NULL
) {
649 thread_t cur_thread
= current_thread();
650 ledger_check_new_balance(cur_thread
, ledger
, task_ledgers
.alternate_accounting
);
651 ledger_check_new_balance(cur_thread
, ledger
, task_ledgers
.alternate_accounting_compressed
);
652 ledger_check_new_balance(cur_thread
, ledger
, task_ledgers
.internal
);
653 ledger_check_new_balance(cur_thread
, ledger
, task_ledgers
.internal_compressed
);
654 ledger_check_new_balance(cur_thread
, ledger
, task_ledgers
.page_table
);
655 ledger_check_new_balance(cur_thread
, ledger
, task_ledgers
.phys_footprint
);
656 ledger_check_new_balance(cur_thread
, ledger
, task_ledgers
.phys_mem
);
657 ledger_check_new_balance(cur_thread
, ledger
, task_ledgers
.tkm_private
);
658 ledger_check_new_balance(cur_thread
, ledger
, task_ledgers
.wired_mem
);
661 #define pmap_ledger_check_balance(p) pmap_check_ledger_fields((p)->ledger)
663 #else /* XNU_MONITOR */
665 #define pmap_ledger_debit(p, e, a) ledger_debit((p)->ledger, e, a)
666 #define pmap_ledger_credit(p, e, a) ledger_credit((p)->ledger, e, a)
668 #endif /* !XNU_MONITOR */
670 #if DEVELOPMENT || DEBUG
671 int panic_on_unsigned_execute
= 0;
672 #endif /* DEVELOPMENT || DEBUG */
675 /* Virtual memory region for early allocation */
676 #if (__ARM_VMSA__ == 7)
677 #define VREGION1_HIGH_WINDOW (0)
679 #define VREGION1_HIGH_WINDOW (PE_EARLY_BOOT_VA)
681 #define VREGION1_START ((VM_MAX_KERNEL_ADDRESS & CPUWINDOWS_BASE_MASK) - VREGION1_HIGH_WINDOW)
682 #define VREGION1_SIZE (trunc_page(VM_MAX_KERNEL_ADDRESS - (VREGION1_START)))
684 extern unsigned int not_in_kdp
;
686 extern vm_offset_t first_avail
;
688 extern pmap_paddr_t avail_start
;
689 extern pmap_paddr_t avail_end
;
691 extern vm_offset_t virtual_space_start
; /* Next available kernel VA */
692 extern vm_offset_t virtual_space_end
; /* End of kernel address space */
693 extern vm_offset_t static_memory_end
;
695 extern int maxproc
, hard_maxproc
;
697 #if (__ARM_VMSA__ > 7)
698 /* The number of address bits one TTBR can cover. */
699 #define PGTABLE_ADDR_BITS (64ULL - T0SZ_BOOT)
702 * The bounds on our TTBRs. These are for sanity checking that
703 * an address is accessible by a TTBR before we attempt to map it.
705 #define ARM64_TTBR0_MIN_ADDR (0ULL)
706 #define ARM64_TTBR0_MAX_ADDR (0ULL + (1ULL << PGTABLE_ADDR_BITS) - 1)
707 #define ARM64_TTBR1_MIN_ADDR (0ULL - (1ULL << PGTABLE_ADDR_BITS))
708 #define ARM64_TTBR1_MAX_ADDR (~0ULL)
710 /* The level of the root of a page table. */
711 const uint64_t arm64_root_pgtable_level
= (3 - ((PGTABLE_ADDR_BITS
- 1 - ARM_PGSHIFT
) / (ARM_PGSHIFT
- TTE_SHIFT
)));
713 /* The number of entries in the root TT of a page table. */
714 const uint64_t arm64_root_pgtable_num_ttes
= (2 << ((PGTABLE_ADDR_BITS
- 1 - ARM_PGSHIFT
) % (ARM_PGSHIFT
- TTE_SHIFT
)));
716 const uint64_t arm64_root_pgtable_level
= 0;
717 const uint64_t arm64_root_pgtable_num_ttes
= 0;
720 struct pmap kernel_pmap_store MARK_AS_PMAP_DATA
;
721 SECURITY_READ_ONLY_LATE(pmap_t
) kernel_pmap
= &kernel_pmap_store
;
723 struct vm_object pmap_object_store
__attribute__((aligned(VM_PACKED_POINTER_ALIGNMENT
))); /* store pt pages */
724 vm_object_t pmap_object
= &pmap_object_store
;
726 static struct zone
*pmap_zone
; /* zone of pmap structures */
728 decl_simple_lock_data(, pmaps_lock MARK_AS_PMAP_DATA
);
729 decl_simple_lock_data(, tt1_lock MARK_AS_PMAP_DATA
);
730 unsigned int pmap_stamp MARK_AS_PMAP_DATA
;
731 queue_head_t map_pmap_list MARK_AS_PMAP_DATA
;
733 decl_simple_lock_data(, pt_pages_lock MARK_AS_PMAP_DATA
);
734 queue_head_t pt_page_list MARK_AS_PMAP_DATA
; /* pt page ptd entries list */
736 decl_simple_lock_data(, pmap_pages_lock MARK_AS_PMAP_DATA
);
738 typedef struct page_free_entry
{
739 struct page_free_entry
*next
;
742 #define PAGE_FREE_ENTRY_NULL ((page_free_entry_t *) 0)
744 page_free_entry_t
*pmap_pages_reclaim_list MARK_AS_PMAP_DATA
; /* Reclaimed pt page list */
745 unsigned int pmap_pages_request_count MARK_AS_PMAP_DATA
; /* Pending requests to reclaim pt page */
746 unsigned long long pmap_pages_request_acum MARK_AS_PMAP_DATA
;
749 typedef struct tt_free_entry
{
750 struct tt_free_entry
*next
;
753 #define TT_FREE_ENTRY_NULL ((tt_free_entry_t *) 0)
755 tt_free_entry_t
*free_page_size_tt_list MARK_AS_PMAP_DATA
;
756 unsigned int free_page_size_tt_count MARK_AS_PMAP_DATA
;
757 unsigned int free_page_size_tt_max MARK_AS_PMAP_DATA
;
758 #define FREE_PAGE_SIZE_TT_MAX 4
759 tt_free_entry_t
*free_two_page_size_tt_list MARK_AS_PMAP_DATA
;
760 unsigned int free_two_page_size_tt_count MARK_AS_PMAP_DATA
;
761 unsigned int free_two_page_size_tt_max MARK_AS_PMAP_DATA
;
762 #define FREE_TWO_PAGE_SIZE_TT_MAX 4
763 tt_free_entry_t
*free_tt_list MARK_AS_PMAP_DATA
;
764 unsigned int free_tt_count MARK_AS_PMAP_DATA
;
765 unsigned int free_tt_max MARK_AS_PMAP_DATA
;
767 #define TT_FREE_ENTRY_NULL ((tt_free_entry_t *) 0)
769 boolean_t pmap_gc_allowed MARK_AS_PMAP_DATA
= TRUE
;
770 boolean_t pmap_gc_forced MARK_AS_PMAP_DATA
= FALSE
;
771 boolean_t pmap_gc_allowed_by_time_throttle
= TRUE
;
773 unsigned int inuse_user_ttepages_count MARK_AS_PMAP_DATA
= 0; /* non-root, non-leaf user pagetable pages, in units of PAGE_SIZE */
774 unsigned int inuse_user_ptepages_count MARK_AS_PMAP_DATA
= 0; /* leaf user pagetable pages, in units of PAGE_SIZE */
775 unsigned int inuse_user_tteroot_count MARK_AS_PMAP_DATA
= 0; /* root user pagetables, in units of PMAP_ROOT_ALLOC_SIZE */
776 unsigned int inuse_kernel_ttepages_count MARK_AS_PMAP_DATA
= 0; /* non-root, non-leaf kernel pagetable pages, in units of PAGE_SIZE */
777 unsigned int inuse_kernel_ptepages_count MARK_AS_PMAP_DATA
= 0; /* leaf kernel pagetable pages, in units of PAGE_SIZE */
778 unsigned int inuse_kernel_tteroot_count MARK_AS_PMAP_DATA
= 0; /* root kernel pagetables, in units of PMAP_ROOT_ALLOC_SIZE */
779 unsigned int inuse_pmap_pages_count
= 0; /* debugging */
781 SECURITY_READ_ONLY_LATE(tt_entry_t
*) invalid_tte
= 0;
782 SECURITY_READ_ONLY_LATE(pmap_paddr_t
) invalid_ttep
= 0;
784 SECURITY_READ_ONLY_LATE(tt_entry_t
*) cpu_tte
= 0; /* set by arm_vm_init() - keep out of bss */
785 SECURITY_READ_ONLY_LATE(pmap_paddr_t
) cpu_ttep
= 0; /* set by arm_vm_init() - phys tte addr */
787 #if DEVELOPMENT || DEBUG
788 int nx_enabled
= 1; /* enable no-execute protection */
789 int allow_data_exec
= 0; /* No apps may execute data */
790 int allow_stack_exec
= 0; /* No apps may execute from the stack */
791 unsigned long pmap_asid_flushes MARK_AS_PMAP_DATA
= 0;
792 #else /* DEVELOPMENT || DEBUG */
793 const int nx_enabled
= 1; /* enable no-execute protection */
794 const int allow_data_exec
= 0; /* No apps may execute data */
795 const int allow_stack_exec
= 0; /* No apps may execute from the stack */
796 #endif /* DEVELOPMENT || DEBUG */
799 * pv_entry_t - structure to track the active mappings for a given page
801 typedef struct pv_entry
{
802 struct pv_entry
*pve_next
; /* next alias */
803 pt_entry_t
*pve_ptep
; /* page table entry */
805 #if __arm__ && (__BIGGEST_ALIGNMENT__ > 4)
806 /* For the newer ARMv7k ABI where 64-bit types are 64-bit aligned, but pointers
808 * Since pt_desc is 64-bit aligned and we cast often from pv_entry to
811 __attribute__ ((aligned(8))) pv_entry_t
;
816 #define PV_ENTRY_NULL ((pv_entry_t *) 0)
820 * We use the least significant bit of the "pve_next" pointer in a "pv_entry"
821 * as a marker for pages mapped through an "alternate accounting" mapping.
822 * These macros set, clear and test for this marker and extract the actual
823 * value of the "pve_next" pointer.
825 #define PVE_NEXT_ALTACCT ((uintptr_t) 0x1)
826 #define PVE_NEXT_SET_ALTACCT(pve_next_p) \
827 *(pve_next_p) = (struct pv_entry *) (((uintptr_t) *(pve_next_p)) | \
829 #define PVE_NEXT_CLR_ALTACCT(pve_next_p) \
830 *(pve_next_p) = (struct pv_entry *) (((uintptr_t) *(pve_next_p)) & \
832 #define PVE_NEXT_IS_ALTACCT(pve_next) \
833 ((((uintptr_t) (pve_next)) & PVE_NEXT_ALTACCT) ? TRUE : FALSE)
834 #define PVE_NEXT_PTR(pve_next) \
835 ((struct pv_entry *)(((uintptr_t) (pve_next)) & \
838 static void pmap_check_ledgers(pmap_t pmap
);
841 pmap_check_ledgers(__unused pmap_t pmap
)
844 #endif /* MACH_ASSERT */
846 SECURITY_READ_ONLY_LATE(pv_entry_t
* *) pv_head_table
; /* array of pv entry pointers */
848 pv_entry_t
*pv_free_list MARK_AS_PMAP_DATA
;
849 pv_entry_t
*pv_kern_free_list MARK_AS_PMAP_DATA
;
850 decl_simple_lock_data(, pv_free_list_lock MARK_AS_PMAP_DATA
);
851 decl_simple_lock_data(, pv_kern_free_list_lock MARK_AS_PMAP_DATA
);
853 decl_simple_lock_data(, phys_backup_lock
);
856 * pt_desc - structure to keep info on page assigned to page tables
858 #if (__ARM_VMSA__ == 7)
859 #define PT_INDEX_MAX 1
861 #if (ARM_PGSHIFT == 14)
862 #define PT_INDEX_MAX 1
864 #define PT_INDEX_MAX 4
868 #define PT_DESC_REFCOUNT 0x4000U
869 #define PT_DESC_IOMMU_REFCOUNT 0x8000U
871 typedef struct pt_desc
{
872 queue_chain_t pt_page
;
877 * Locate this struct towards the end of the pt_desc; our long term
878 * goal is to make this a VLA to avoid wasting memory if we don't need
883 * For non-leaf pagetables, should always be PT_DESC_REFCOUNT
884 * For leaf pagetables, should reflect the number of non-empty PTEs
885 * For IOMMU pages, should always be PT_DESC_IOMMU_REFCOUNT
887 unsigned short refcnt
;
889 * For non-leaf pagetables, should be 0
890 * For leaf pagetables, should reflect the number of wired entries
891 * For IOMMU pages, may optionally reflect a driver-defined refcount (IOMMU operations are implicitly wired)
893 unsigned short wiredcnt
;
895 } ptd_info
[PT_INDEX_MAX
];
899 #define PTD_ENTRY_NULL ((pt_desc_t *) 0)
901 SECURITY_READ_ONLY_LATE(pt_desc_t
*) ptd_root_table
;
903 pt_desc_t
*ptd_free_list MARK_AS_PMAP_DATA
= PTD_ENTRY_NULL
;
904 SECURITY_READ_ONLY_LATE(boolean_t
) ptd_preboot
= TRUE
;
905 unsigned int ptd_free_count MARK_AS_PMAP_DATA
= 0;
906 decl_simple_lock_data(, ptd_free_list_lock MARK_AS_PMAP_DATA
);
909 * physical page attribute
911 typedef u_int16_t pp_attr_t
;
913 #define PP_ATTR_WIMG_MASK 0x003F
914 #define PP_ATTR_WIMG(x) ((x) & PP_ATTR_WIMG_MASK)
916 #define PP_ATTR_REFERENCED 0x0040
917 #define PP_ATTR_MODIFIED 0x0080
919 #define PP_ATTR_INTERNAL 0x0100
920 #define PP_ATTR_REUSABLE 0x0200
921 #define PP_ATTR_ALTACCT 0x0400
922 #define PP_ATTR_NOENCRYPT 0x0800
924 #define PP_ATTR_REFFAULT 0x1000
925 #define PP_ATTR_MODFAULT 0x2000
929 * Denotes that a page is owned by the PPL. This is modified/checked with the
930 * PVH lock held, to avoid ownership related races. This does not need to be a
931 * PP_ATTR bit (as we have the lock), but for now this is a convenient place to
934 #define PP_ATTR_MONITOR 0x4000
937 * Denotes that a page *cannot* be owned by the PPL. This is required in order
938 * to temporarily 'pin' kernel pages that are used to store PPL output parameters.
939 * Otherwise a malicious or buggy caller could pass PPL-owned memory for these
940 * parameters and in so doing stage a write gadget against the PPL.
942 #define PP_ATTR_NO_MONITOR 0x8000
945 * All of the bits owned by the PPL; kernel requests to set or clear these bits
948 #define PP_ATTR_PPL_OWNED_BITS (PP_ATTR_MONITOR | PP_ATTR_NO_MONITOR)
951 SECURITY_READ_ONLY_LATE(pp_attr_t
*) pp_attr_table
;
953 typedef struct pmap_io_range
{
956 #define PMAP_IO_RANGE_STRONG_SYNC (1UL << 31) // Strong DSB required for pages in this range
957 #define PMAP_IO_RANGE_CARVEOUT (1UL << 30) // Corresponds to memory carved out by bootloader
958 uint32_t wimg
; // lower 16 bits treated as pp_attr_t, upper 16 bits contain additional mapping flags
959 uint32_t signature
; // 4CC
960 } __attribute__((packed
)) pmap_io_range_t
;
962 SECURITY_READ_ONLY_LATE(pmap_io_range_t
*) io_attr_table
;
964 SECURITY_READ_ONLY_LATE(pmap_paddr_t
) vm_first_phys
= (pmap_paddr_t
) 0;
965 SECURITY_READ_ONLY_LATE(pmap_paddr_t
) vm_last_phys
= (pmap_paddr_t
) 0;
967 SECURITY_READ_ONLY_LATE(unsigned int) num_io_rgns
= 0;
969 SECURITY_READ_ONLY_LATE(boolean_t
) pmap_initialized
= FALSE
; /* Has pmap_init completed? */
971 SECURITY_READ_ONLY_LATE(uint64_t) pmap_nesting_size_min
;
972 SECURITY_READ_ONLY_LATE(uint64_t) pmap_nesting_size_max
;
974 SECURITY_READ_ONLY_LATE(vm_map_offset_t
) arm_pmap_max_offset_default
= 0x0;
975 #if defined(__arm64__)
976 SECURITY_READ_ONLY_LATE(vm_map_offset_t
) arm64_pmap_max_offset_default
= 0x0;
979 #define PMAP_MAX_SW_ASID ((MAX_ASID + MAX_HW_ASID - 1) / MAX_HW_ASID)
980 _Static_assert(PMAP_MAX_SW_ASID
<= (UINT8_MAX
+ 1),
981 "VASID bits can't be represented by an 8-bit integer");
983 decl_simple_lock_data(, asid_lock MARK_AS_PMAP_DATA
);
984 static bitmap_t asid_bitmap
[BITMAP_LEN(MAX_ASID
)] MARK_AS_PMAP_DATA
;
987 #if (__ARM_VMSA__ > 7)
988 SECURITY_READ_ONLY_LATE(pmap_t
) sharedpage_pmap
;
993 * We define our target as 8 pages; enough for 2 page table pages, a PTD page,
994 * and a PV page; in essence, twice as many pages as may be necessary to satisfy
995 * a single pmap_enter request.
997 #define PMAP_MIN_FREE_PPL_PAGES 8
1000 #define pa_index(pa) \
1001 (atop((pa) - vm_first_phys))
1003 #define pai_to_pvh(pai) \
1004 (&pv_head_table[pai])
1006 #define pa_valid(x) \
1007 ((x) >= vm_first_phys && (x) < vm_last_phys)
1009 /* PTE Define Macros */
1011 #define pte_is_wired(pte) \
1012 (((pte) & ARM_PTE_WIRED_MASK) == ARM_PTE_WIRED)
1014 #define pte_set_wired(ptep, wired) \
1016 SInt16 *ptd_wiredcnt_ptr; \
1017 ptd_wiredcnt_ptr = (SInt16 *)&(ptep_get_ptd(ptep)->ptd_info[ARM_PT_DESC_INDEX(ptep)].wiredcnt); \
1019 *ptep |= ARM_PTE_WIRED; \
1020 OSAddAtomic16(1, ptd_wiredcnt_ptr); \
1022 *ptep &= ~ARM_PTE_WIRED; \
1023 OSAddAtomic16(-1, ptd_wiredcnt_ptr); \
1027 #define pte_was_writeable(pte) \
1028 (((pte) & ARM_PTE_WRITEABLE) == ARM_PTE_WRITEABLE)
1030 #define pte_set_was_writeable(pte, was_writeable) \
1032 if ((was_writeable)) { \
1033 (pte) |= ARM_PTE_WRITEABLE; \
1035 (pte) &= ~ARM_PTE_WRITEABLE; \
1039 /* PVE Define Macros */
1041 #define pve_next(pve) \
1044 #define pve_link_field(pve) \
1047 #define pve_link(pp, e) \
1048 ((pve_next(e) = pve_next(pp)), (pve_next(pp) = (e)))
1050 #define pve_unlink(pp, e) \
1051 (pve_next(pp) = pve_next(e))
1053 /* bits held in the ptep pointer field */
1055 #define pve_get_ptep(pve) \
1058 #define pve_set_ptep(pve, ptep_new) \
1060 (pve)->pve_ptep = (ptep_new); \
1063 /* PTEP Define Macros */
1065 /* mask for page descriptor index */
1066 #define ARM_TT_PT_INDEX_MASK ARM_PGMASK
1068 #if (__ARM_VMSA__ == 7)
1069 #define ARM_PT_DESC_INDEX_MASK 0x00000
1070 #define ARM_PT_DESC_INDEX_SHIFT 0
1073 * Shift value used for reconstructing the virtual address for a PTE.
1075 #define ARM_TT_PT_ADDR_SHIFT (10U)
1077 #define ptep_get_va(ptep) \
1078 ((((pt_desc_t *) (pvh_list(pai_to_pvh(pa_index(ml_static_vtop((((vm_offset_t)(ptep) & ~ARM_PGMASK))))))))->ptd_info[ARM_PT_DESC_INDEX(ptep)].va)+ ((((unsigned)(ptep)) & ARM_TT_PT_INDEX_MASK)<<ARM_TT_PT_ADDR_SHIFT))
1080 #define ptep_get_pmap(ptep) \
1081 ((((pt_desc_t *) (pvh_list(pai_to_pvh(pa_index(ml_static_vtop((((vm_offset_t)(ptep) & ~ARM_PGMASK))))))))->pmap))
1085 #if (ARM_PGSHIFT == 12)
1086 #define ARM_PT_DESC_INDEX_MASK ((PAGE_SHIFT_CONST == ARM_PGSHIFT )? 0x00000ULL : 0x03000ULL)
1087 #define ARM_PT_DESC_INDEX_SHIFT ((PAGE_SHIFT_CONST == ARM_PGSHIFT )? 0 : 12)
1089 * Shift value used for reconstructing the virtual address for a PTE.
1091 #define ARM_TT_PT_ADDR_SHIFT (9ULL)
1094 #define ARM_PT_DESC_INDEX_MASK (0x00000)
1095 #define ARM_PT_DESC_INDEX_SHIFT (0)
1097 * Shift value used for reconstructing the virtual address for a PTE.
1099 #define ARM_TT_PT_ADDR_SHIFT (11ULL)
1103 #define ARM_PT_DESC_INDEX(ptep) \
1104 (((unsigned)(ptep) & ARM_PT_DESC_INDEX_MASK) >> ARM_PT_DESC_INDEX_SHIFT)
1106 #define ptep_get_va(ptep) \
1107 ((((pt_desc_t *) (pvh_list(pai_to_pvh(pa_index(ml_static_vtop((((vm_offset_t)(ptep) & ~ARM_PGMASK))))))))->ptd_info[ARM_PT_DESC_INDEX(ptep)].va)+ ((((unsigned)(ptep)) & ARM_TT_PT_INDEX_MASK)<<ARM_TT_PT_ADDR_SHIFT))
1109 #define ptep_get_pmap(ptep) \
1110 ((((pt_desc_t *) (pvh_list(pai_to_pvh(pa_index(ml_static_vtop((((vm_offset_t)(ptep) & ~ARM_PGMASK))))))))->pmap))
1114 #define ARM_PT_DESC_INDEX(ptep) \
1115 (((unsigned)(ptep) & ARM_PT_DESC_INDEX_MASK) >> ARM_PT_DESC_INDEX_SHIFT)
1117 #define ptep_get_ptd(ptep) \
1118 ((struct pt_desc *)(pvh_list(pai_to_pvh(pa_index(ml_static_vtop((vm_offset_t)(ptep)))))))
1121 /* PVH Define Macros */
1124 #define PVH_TYPE_NULL 0x0UL
1125 #define PVH_TYPE_PVEP 0x1UL
1126 #define PVH_TYPE_PTEP 0x2UL
1127 #define PVH_TYPE_PTDP 0x3UL
1129 #define PVH_TYPE_MASK (0x3UL)
1133 /* All flags listed below are stored in the PV head pointer unless otherwise noted */
1134 #define PVH_FLAG_IOMMU 0x4UL /* Stored in each PTE, or in PV head for single-PTE PV heads */
1135 #define PVH_FLAG_IOMMU_TABLE (1ULL << 63) /* Stored in each PTE, or in PV head for single-PTE PV heads */
1136 #define PVH_FLAG_CPU (1ULL << 62)
1137 #define PVH_LOCK_BIT 61
1138 #define PVH_FLAG_LOCK (1ULL << PVH_LOCK_BIT)
1139 #define PVH_FLAG_EXEC (1ULL << 60)
1140 #define PVH_FLAG_LOCKDOWN (1ULL << 59)
1141 #define PVH_HIGH_FLAGS (PVH_FLAG_CPU | PVH_FLAG_LOCK | PVH_FLAG_EXEC | PVH_FLAG_LOCKDOWN)
1143 #else /* !__arm64__ */
1145 #define PVH_LOCK_BIT 31
1146 #define PVH_FLAG_LOCK (1UL << PVH_LOCK_BIT)
1147 #define PVH_HIGH_FLAGS PVH_FLAG_LOCK
1151 #define PVH_LIST_MASK (~PVH_TYPE_MASK)
1153 #define pvh_test_type(h, b) \
1154 ((*(vm_offset_t *)(h) & (PVH_TYPE_MASK)) == (b))
1156 #define pvh_ptep(h) \
1157 ((pt_entry_t *)((*(vm_offset_t *)(h) & PVH_LIST_MASK) | PVH_HIGH_FLAGS))
1159 #define pvh_list(h) \
1160 ((pv_entry_t *)((*(vm_offset_t *)(h) & PVH_LIST_MASK) | PVH_HIGH_FLAGS))
1162 #define pvh_get_flags(h) \
1163 (*(vm_offset_t *)(h) & PVH_HIGH_FLAGS)
1165 #define pvh_set_flags(h, f) \
1167 os_atomic_store((vm_offset_t *)(h), (*(vm_offset_t *)(h) & ~PVH_HIGH_FLAGS) | (f), \
1171 #define pvh_update_head(h, e, t) \
1173 assert(*(vm_offset_t *)(h) & PVH_FLAG_LOCK); \
1174 os_atomic_store((vm_offset_t *)(h), (vm_offset_t)(e) | (t) | PVH_FLAG_LOCK, \
1178 #define pvh_update_head_unlocked(h, e, t) \
1180 assert(!(*(vm_offset_t *)(h) & PVH_FLAG_LOCK)); \
1181 *(vm_offset_t *)(h) = ((vm_offset_t)(e) | (t)) & ~PVH_FLAG_LOCK; \
1184 #define pvh_add(h, e) \
1186 assert(!pvh_test_type((h), PVH_TYPE_PTEP)); \
1187 pve_next(e) = pvh_list(h); \
1188 pvh_update_head((h), (e), PVH_TYPE_PVEP); \
1191 #define pvh_remove(h, p, e) \
1193 assert(!PVE_NEXT_IS_ALTACCT(pve_next((e)))); \
1195 if (PVE_NEXT_PTR(pve_next((e))) == PV_ENTRY_NULL) { \
1196 pvh_update_head((h), PV_ENTRY_NULL, PVH_TYPE_NULL); \
1198 pvh_update_head((h), PVE_NEXT_PTR(pve_next((e))), PVH_TYPE_PVEP); \
1203 * preserve the "alternate accounting" bit \
1204 * when updating "p" (the previous entry's \
1207 boolean_t __is_altacct; \
1208 __is_altacct = PVE_NEXT_IS_ALTACCT(*(p)); \
1209 *(p) = PVE_NEXT_PTR(pve_next((e))); \
1210 if (__is_altacct) { \
1211 PVE_NEXT_SET_ALTACCT((p)); \
1213 PVE_NEXT_CLR_ALTACCT((p)); \
1219 /* PPATTR Define Macros */
1221 #define ppattr_set_bits(h, b) \
1223 while (!OSCompareAndSwap16(*(pp_attr_t *)(h), *(pp_attr_t *)(h) | (b), (pp_attr_t *)(h))); \
1226 #define ppattr_clear_bits(h, b) \
1228 while (!OSCompareAndSwap16(*(pp_attr_t *)(h), *(pp_attr_t *)(h) & ~(b), (pp_attr_t *)(h))); \
1231 #define ppattr_test_bits(h, b) \
1232 ((*(pp_attr_t *)(h) & (b)) == (b))
1234 #define pa_set_bits(x, b) \
1237 ppattr_set_bits(&pp_attr_table[pa_index(x)], \
1241 #define pa_test_bits(x, b) \
1242 (pa_valid(x) ? ppattr_test_bits(&pp_attr_table[pa_index(x)],\
1245 #define pa_clear_bits(x, b) \
1248 ppattr_clear_bits(&pp_attr_table[pa_index(x)], \
1252 #define pa_set_modify(x) \
1253 pa_set_bits(x, PP_ATTR_MODIFIED)
1255 #define pa_clear_modify(x) \
1256 pa_clear_bits(x, PP_ATTR_MODIFIED)
1258 #define pa_set_reference(x) \
1259 pa_set_bits(x, PP_ATTR_REFERENCED)
1261 #define pa_clear_reference(x) \
1262 pa_clear_bits(x, PP_ATTR_REFERENCED)
1265 #define pa_set_monitor(x) \
1266 pa_set_bits((x), PP_ATTR_MONITOR)
1268 #define pa_clear_monitor(x) \
1269 pa_clear_bits((x), PP_ATTR_MONITOR)
1271 #define pa_test_monitor(x) \
1272 pa_test_bits((x), PP_ATTR_MONITOR)
1274 #define pa_set_no_monitor(x) \
1275 pa_set_bits((x), PP_ATTR_NO_MONITOR)
1277 #define pa_clear_no_monitor(x) \
1278 pa_clear_bits((x), PP_ATTR_NO_MONITOR)
1280 #define pa_test_no_monitor(x) \
1281 pa_test_bits((x), PP_ATTR_NO_MONITOR)
1284 #define IS_INTERNAL_PAGE(pai) \
1285 ppattr_test_bits(&pp_attr_table[pai], PP_ATTR_INTERNAL)
1286 #define SET_INTERNAL_PAGE(pai) \
1287 ppattr_set_bits(&pp_attr_table[pai], PP_ATTR_INTERNAL)
1288 #define CLR_INTERNAL_PAGE(pai) \
1289 ppattr_clear_bits(&pp_attr_table[pai], PP_ATTR_INTERNAL)
1291 #define IS_REUSABLE_PAGE(pai) \
1292 ppattr_test_bits(&pp_attr_table[pai], PP_ATTR_REUSABLE)
1293 #define SET_REUSABLE_PAGE(pai) \
1294 ppattr_set_bits(&pp_attr_table[pai], PP_ATTR_REUSABLE)
1295 #define CLR_REUSABLE_PAGE(pai) \
1296 ppattr_clear_bits(&pp_attr_table[pai], PP_ATTR_REUSABLE)
1298 #define IS_ALTACCT_PAGE(pai, pve_p) \
1299 (((pve_p) == NULL) \
1300 ? ppattr_test_bits(&pp_attr_table[pai], PP_ATTR_ALTACCT) \
1301 : PVE_NEXT_IS_ALTACCT(pve_next((pve_p))))
1302 #define SET_ALTACCT_PAGE(pai, pve_p) \
1303 if ((pve_p) == NULL) { \
1304 ppattr_set_bits(&pp_attr_table[pai], PP_ATTR_ALTACCT); \
1306 PVE_NEXT_SET_ALTACCT(&pve_next((pve_p))); \
1308 #define CLR_ALTACCT_PAGE(pai, pve_p) \
1309 if ((pve_p) == NULL) { \
1310 ppattr_clear_bits(&pp_attr_table[pai], PP_ATTR_ALTACCT); \
1312 PVE_NEXT_CLR_ALTACCT(&pve_next((pve_p))); \
1315 #define IS_REFFAULT_PAGE(pai) \
1316 ppattr_test_bits(&pp_attr_table[pai], PP_ATTR_REFFAULT)
1317 #define SET_REFFAULT_PAGE(pai) \
1318 ppattr_set_bits(&pp_attr_table[pai], PP_ATTR_REFFAULT)
1319 #define CLR_REFFAULT_PAGE(pai) \
1320 ppattr_clear_bits(&pp_attr_table[pai], PP_ATTR_REFFAULT)
1322 #define IS_MODFAULT_PAGE(pai) \
1323 ppattr_test_bits(&pp_attr_table[pai], PP_ATTR_MODFAULT)
1324 #define SET_MODFAULT_PAGE(pai) \
1325 ppattr_set_bits(&pp_attr_table[pai], PP_ATTR_MODFAULT)
1326 #define CLR_MODFAULT_PAGE(pai) \
1327 ppattr_clear_bits(&pp_attr_table[pai], PP_ATTR_MODFAULT)
1329 #define tte_get_ptd(tte) \
1330 ((struct pt_desc *)(pvh_list(pai_to_pvh(pa_index((vm_offset_t)((tte) & ~PAGE_MASK))))))
1333 #if (__ARM_VMSA__ == 7)
1335 #define tte_index(pmap, pt_attr, addr) \
1338 #define pte_index(pmap, pt_attr, addr) \
1343 #define ttn_index(pmap, pt_attr, addr, pt_level) \
1344 (((addr) & (pt_attr)->pta_level_info[(pt_level)].index_mask) >> (pt_attr)->pta_level_info[(pt_level)].shift)
1346 #define tt0_index(pmap, pt_attr, addr) \
1347 ttn_index((pmap), (pt_attr), (addr), PMAP_TT_L0_LEVEL)
1349 #define tt1_index(pmap, pt_attr, addr) \
1350 ttn_index((pmap), (pt_attr), (addr), PMAP_TT_L1_LEVEL)
1352 #define tt2_index(pmap, pt_attr, addr) \
1353 ttn_index((pmap), (pt_attr), (addr), PMAP_TT_L2_LEVEL)
1355 #define tt3_index(pmap, pt_attr, addr) \
1356 ttn_index((pmap), (pt_attr), (addr), PMAP_TT_L3_LEVEL)
1358 #define tte_index(pmap, pt_attr, addr) \
1359 tt2_index((pmap), (pt_attr), (addr))
1361 #define pte_index(pmap, pt_attr, addr) \
1362 tt3_index((pmap), (pt_attr), (addr))
1367 * Lock on pmap system
1370 lck_grp_t pmap_lck_grp
;
1372 #define PMAP_LOCK_INIT(pmap) { \
1373 simple_lock_init(&(pmap)->lock, 0); \
1376 #define PMAP_LOCK(pmap) { \
1377 pmap_simple_lock(&(pmap)->lock); \
1380 #define PMAP_UNLOCK(pmap) { \
1381 pmap_simple_unlock(&(pmap)->lock); \
1385 #define PMAP_ASSERT_LOCKED(pmap) { \
1386 simple_lock_assert(&(pmap)->lock, LCK_ASSERT_OWNED); \
1389 #define PMAP_ASSERT_LOCKED(pmap)
1392 #if defined(__arm64__)
1393 #define PVH_LOCK_WORD 1 /* Assumes little-endian */
1395 #define PVH_LOCK_WORD 0
1398 #define ASSERT_PVH_LOCKED(index) \
1400 assert((vm_offset_t)(pv_head_table[index]) & PVH_FLAG_LOCK); \
1403 #define LOCK_PVH(index) \
1405 pmap_lock_bit((uint32_t*)(&pv_head_table[index]) + PVH_LOCK_WORD, PVH_LOCK_BIT - (PVH_LOCK_WORD * 32)); \
1408 #define UNLOCK_PVH(index) \
1410 ASSERT_PVH_LOCKED(index); \
1411 pmap_unlock_bit((uint32_t*)(&pv_head_table[index]) + PVH_LOCK_WORD, PVH_LOCK_BIT - (PVH_LOCK_WORD * 32)); \
1414 #define PMAP_UPDATE_TLBS(pmap, s, e, strong) { \
1415 pmap_get_pt_ops(pmap)->flush_tlb_region_async(s, (unsigned)(e - s), pmap); \
1416 pmap_sync_tlb(strong); \
1419 #define FLUSH_PTE_RANGE(spte, epte) \
1420 __builtin_arm_dmb(DMB_ISH);
1422 #define FLUSH_PTE(pte_p) \
1423 __builtin_arm_dmb(DMB_ISH);
1425 #define FLUSH_PTE_STRONG(pte_p) \
1426 __builtin_arm_dsb(DSB_ISH);
1428 #define FLUSH_PTE_RANGE_STRONG(spte, epte) \
1429 __builtin_arm_dsb(DSB_ISH);
1431 #define WRITE_PTE_FAST(pte_p, pte_entry) \
1432 __unreachable_ok_push \
1433 if (TEST_PAGE_RATIO_4) { \
1434 if (((unsigned)(pte_p)) & 0x1f) { \
1435 panic("%s: WRITE_PTE_FAST is unaligned, " \
1436 "pte_p=%p, pte_entry=%p", \
1438 pte_p, (void*)pte_entry); \
1440 if (((pte_entry) & ~ARM_PTE_COMPRESSED_MASK) == ARM_PTE_EMPTY) { \
1441 *(pte_p) = (pte_entry); \
1442 *((pte_p)+1) = (pte_entry); \
1443 *((pte_p)+2) = (pte_entry); \
1444 *((pte_p)+3) = (pte_entry); \
1446 *(pte_p) = (pte_entry); \
1447 *((pte_p)+1) = (pte_entry) | 0x1000; \
1448 *((pte_p)+2) = (pte_entry) | 0x2000; \
1449 *((pte_p)+3) = (pte_entry) | 0x3000; \
1452 *(pte_p) = (pte_entry); \
1454 __unreachable_ok_pop
1456 #define WRITE_PTE(pte_p, pte_entry) \
1457 WRITE_PTE_FAST(pte_p, pte_entry); \
1460 #define WRITE_PTE_STRONG(pte_p, pte_entry) \
1461 WRITE_PTE_FAST(pte_p, pte_entry); \
1462 FLUSH_PTE_STRONG(pte_p);
1465 * Other useful macros.
1467 #define current_pmap() \
1468 (vm_map_pmap(current_thread()->map))
1472 * PPL-related macros.
1474 #define ARRAY_ELEM_PTR_IS_VALID(_ptr_, _elem_size_, _array_begin_, _array_end_) \
1475 (((_ptr_) >= (typeof(_ptr_))_array_begin_) && \
1476 ((_ptr_) < (typeof(_ptr_))_array_end_) && \
1477 !((((void *)(_ptr_)) - ((void *)_array_begin_)) % (_elem_size_)))
1479 #define PMAP_PTR_IS_VALID(x) ARRAY_ELEM_PTR_IS_VALID(x, sizeof(struct pmap), pmap_array_begin, pmap_array_end)
1481 #define USER_PMAP_IS_VALID(x) (PMAP_PTR_IS_VALID(x) && (os_atomic_load(&(x)->ref_count, relaxed) > 0))
1483 #define VALIDATE_USER_PMAP(x) \
1484 if (__improbable(!USER_PMAP_IS_VALID(x))) \
1485 panic("%s: invalid pmap %p", __func__, (x));
1487 #define VALIDATE_PMAP(x) \
1488 if (__improbable(((x) != kernel_pmap) && !USER_PMAP_IS_VALID(x))) \
1489 panic("%s: invalid pmap %p", __func__, (x));
1491 #define VALIDATE_LEDGER_PTR(x) \
1492 if (__improbable(!ARRAY_ELEM_PTR_IS_VALID(x, sizeof(void *), pmap_ledger_ptr_array_begin, pmap_ledger_ptr_array_end))) \
1493 panic("%s: invalid ledger ptr %p", __func__, (x));
1495 #define ARRAY_ELEM_INDEX(x, _elem_size_, _array_begin_) ((uint64_t)((((void *)(x)) - (_array_begin_)) / (_elem_size_)))
1498 pmap_ledger_validate(void * ledger
)
1500 uint64_t array_index
;
1501 pmap_ledger_t
** ledger_ptr_array_ptr
= ((pmap_ledger_t
*)ledger
)->back_ptr
;
1502 VALIDATE_LEDGER_PTR(ledger_ptr_array_ptr
);
1503 array_index
= ARRAY_ELEM_INDEX(ledger_ptr_array_ptr
, sizeof(pmap_ledger_t
*), pmap_ledger_ptr_array_begin
);
1505 if (array_index
>= MAX_PMAP_LEDGERS
) {
1506 panic("%s: ledger %p array index invalid, index was %#llx", __func__
, ledger
, array_index
);
1509 pmap_ledger_t
*ledger_ptr
= *ledger_ptr_array_ptr
;
1511 if (__improbable(ledger_ptr
!= ledger
)) {
1512 panic("%s: ledger pointer mismatch, %p != %p", __func__
, ledger
, ledger_ptr
);
1518 #else /* XNU_MONITOR */
1520 #define VALIDATE_USER_PMAP(x)
1521 #define VALIDATE_PMAP(x)
1522 #define VALIDATE_LEDGER(x)
1526 #if DEVELOPMENT || DEBUG
1529 * Trace levels are controlled by a bitmask in which each
1530 * level can be enabled/disabled by the (1<<level) position
1532 * Level 1: pmap lifecycle (create/destroy/switch)
1533 * Level 2: mapping lifecycle (enter/remove/protect/nest/unnest)
1534 * Level 3: internal state management (tte/attributes/fast-fault)
1537 SECURITY_READ_ONLY_LATE(unsigned int) pmap_trace_mask
= 0;
1539 #define PMAP_TRACE(level, ...) \
1540 if (__improbable((1 << (level)) & pmap_trace_mask)) { \
1541 KDBG_RELEASE(__VA_ARGS__); \
1545 #define PMAP_TRACE(level, ...)
1551 * Internal function prototypes (forward declarations).
1554 static void pv_init(
1557 static boolean_t
pv_alloc(
1560 pv_entry_t
**pvepp
);
1562 static void pv_free(
1565 static void pv_list_free(
1570 static void ptd_bootstrap(
1571 pt_desc_t
*ptdp
, unsigned int ptd_cnt
);
1573 static inline pt_desc_t
*ptd_alloc_unlinked(bool reclaim
);
1575 static pt_desc_t
*ptd_alloc(pmap_t pmap
, bool reclaim
);
1577 static void ptd_deallocate(pt_desc_t
*ptdp
);
1579 static void ptd_init(
1580 pt_desc_t
*ptdp
, pmap_t pmap
, vm_map_address_t va
, unsigned int ttlevel
, pt_entry_t
* pte_p
);
1582 static void pmap_zone_init(
1585 static void pmap_set_reference(
1588 ppnum_t
pmap_vtophys(
1589 pmap_t pmap
, addr64_t va
);
1591 void pmap_switch_user_ttb(
1594 static kern_return_t
pmap_expand(
1595 pmap_t
, vm_map_address_t
, unsigned int options
, unsigned int level
);
1597 static int pmap_remove_range(
1598 pmap_t
, vm_map_address_t
, pt_entry_t
*, pt_entry_t
*, uint32_t *);
1600 static int pmap_remove_range_options(
1601 pmap_t
, vm_map_address_t
, pt_entry_t
*, pt_entry_t
*, uint32_t *, bool *, int);
1603 static tt_entry_t
*pmap_tt1_allocate(
1604 pmap_t
, vm_size_t
, unsigned int);
1606 #define PMAP_TT_ALLOCATE_NOWAIT 0x1
1608 static void pmap_tt1_deallocate(
1609 pmap_t
, tt_entry_t
*, vm_size_t
, unsigned int);
1611 #define PMAP_TT_DEALLOCATE_NOBLOCK 0x1
1613 static kern_return_t
pmap_tt_allocate(
1614 pmap_t
, tt_entry_t
**, unsigned int, unsigned int);
1616 #define PMAP_TT_ALLOCATE_NOWAIT 0x1
1618 static void pmap_tte_deallocate(
1619 pmap_t
, tt_entry_t
*, unsigned int);
1621 #ifdef __ARM64_PMAP_SUBPAGE_L1__
1622 #if (__ARM_VMSA__ <= 7)
1623 #error This is not supported for old-style page tables
1624 #endif /* (__ARM_VMSA__ <= 7) */
1625 #define PMAP_ROOT_ALLOC_SIZE (((ARM_TT_L1_INDEX_MASK >> ARM_TT_L1_SHIFT) + 1) * sizeof(tt_entry_t))
1626 #else /* !defined(__ARM64_PMAP_SUBPAGE_L1__) */
1627 #if (__ARM_VMSA__ <= 7)
1628 #define PMAP_ROOT_ALLOC_SIZE (ARM_PGBYTES * 2)
1629 #else /* (__ARM_VMSA__ > 7) */
1630 #define PMAP_ROOT_ALLOC_SIZE (ARM_PGBYTES)
1631 #endif /* (__ARM_VMSA__ > 7) */
1632 #endif /* !defined(__ARM64_PMAP_SUBPAGE_L1__) */
1634 const unsigned int arm_hardware_page_size
= ARM_PGBYTES
;
1635 const unsigned int arm_pt_desc_size
= sizeof(pt_desc_t
);
1636 const unsigned int arm_pt_root_size
= PMAP_ROOT_ALLOC_SIZE
;
1638 #define PMAP_TT_DEALLOCATE_NOBLOCK 0x1
1640 #if (__ARM_VMSA__ > 7)
1642 static inline tt_entry_t
*pmap_tt1e(
1643 pmap_t
, vm_map_address_t
);
1645 static inline tt_entry_t
*pmap_tt2e(
1646 pmap_t
, vm_map_address_t
);
1648 static inline pt_entry_t
*pmap_tt3e(
1649 pmap_t
, vm_map_address_t
);
1651 static inline pt_entry_t
*pmap_ttne(
1652 pmap_t
, unsigned int, vm_map_address_t
);
1654 static void pmap_unmap_sharedpage(
1658 pmap_is_64bit(pmap_t
);
1662 static inline tt_entry_t
*pmap_tte(
1663 pmap_t
, vm_map_address_t
);
1665 static inline pt_entry_t
*pmap_pte(
1666 pmap_t
, vm_map_address_t
);
1668 static void pmap_update_cache_attributes_locked(
1671 boolean_t
arm_clear_fast_fault(
1673 vm_prot_t fault_type
);
1675 static pmap_paddr_t
pmap_pages_reclaim(
1678 static kern_return_t
pmap_pages_alloc(
1683 #define PMAP_PAGES_ALLOCATE_NOWAIT 0x1
1684 #define PMAP_PAGES_RECLAIM_NOWAIT 0x2
1686 static void pmap_pages_free(
1690 static void pmap_pin_kernel_pages(vm_offset_t kva
, size_t nbytes
);
1692 static void pmap_unpin_kernel_pages(vm_offset_t kva
, size_t nbytes
);
1694 static void pmap_trim_self(pmap_t pmap
);
1695 static void pmap_trim_subord(pmap_t subord
);
1697 #if __APRR_SUPPORTED__
1698 static uint64_t pte_to_xprr_perm(pt_entry_t pte
);
1699 static pt_entry_t
xprr_perm_to_pte(uint64_t perm
);
1700 #endif /* __APRR_SUPPORTED__*/
1703 static pmap_paddr_t
pmap_alloc_page_for_kern(void);
1704 static void pmap_alloc_page_for_ppl(void);
1708 * This macro generates prototypes for the *_internal functions, which
1709 * represent the PPL interface. When the PPL is enabled, this will also
1710 * generate prototypes for the PPL entrypoints (*_ppl), as well as generating
1713 #define GEN_ASM_NAME(__function_name) _##__function_name##_ppl
1715 #define PMAP_SUPPORT_PROTOTYPES_WITH_ASM_INTERNAL(__return_type, __function_name, __function_args, __function_index, __assembly_function_name) \
1716 static __return_type __function_name##_internal __function_args; \
1717 extern __return_type __function_name##_ppl __function_args; \
1718 __asm__ (".text \n" \
1720 ".globl " #__assembly_function_name "\n" \
1721 #__assembly_function_name ":\n" \
1722 "mov x15, " #__function_index "\n" \
1723 "b _aprr_ppl_enter\n")
1725 #define PMAP_SUPPORT_PROTOTYPES_WITH_ASM(__return_type, __function_name, __function_args, __function_index, __assembly_function_name) \
1726 PMAP_SUPPORT_PROTOTYPES_WITH_ASM_INTERNAL(__return_type, __function_name, __function_args, __function_index, __assembly_function_name)
1728 #define PMAP_SUPPORT_PROTOTYPES(__return_type, __function_name, __function_args, __function_index) \
1729 PMAP_SUPPORT_PROTOTYPES_WITH_ASM(__return_type, __function_name, __function_args, __function_index, GEN_ASM_NAME(__function_name))
1730 #else /* XNU_MONITOR */
1731 #define PMAP_SUPPORT_PROTOTYPES(__return_type, __function_name, __function_args, __function_index) \
1732 static __return_type __function_name##_internal __function_args
1733 #endif /* XNU_MONITOR */
1735 PMAP_SUPPORT_PROTOTYPES(
1737 arm_fast_fault
, (pmap_t pmap
,
1738 vm_map_address_t va
,
1739 vm_prot_t fault_type
,
1741 bool from_user
), ARM_FAST_FAULT_INDEX
);
1744 PMAP_SUPPORT_PROTOTYPES(
1746 arm_force_fast_fault
, (ppnum_t ppnum
,
1747 vm_prot_t allow_mode
,
1748 int options
), ARM_FORCE_FAST_FAULT_INDEX
);
1750 PMAP_SUPPORT_PROTOTYPES(
1752 mapping_free_prime
, (void), MAPPING_FREE_PRIME_INDEX
);
1754 PMAP_SUPPORT_PROTOTYPES(
1756 mapping_replenish
, (uint32_t kern_target_count
, uint32_t user_target_count
), MAPPING_REPLENISH_INDEX
);
1758 PMAP_SUPPORT_PROTOTYPES(
1760 pmap_batch_set_cache_attributes
, (ppnum_t pn
,
1761 unsigned int cacheattr
,
1762 unsigned int page_cnt
,
1763 unsigned int page_index
,
1765 unsigned int *res
), PMAP_BATCH_SET_CACHE_ATTRIBUTES_INDEX
);
1767 PMAP_SUPPORT_PROTOTYPES(
1769 pmap_change_wiring
, (pmap_t pmap
,
1771 boolean_t wired
), PMAP_CHANGE_WIRING_INDEX
);
1773 PMAP_SUPPORT_PROTOTYPES(
1775 pmap_create_options
, (ledger_t ledger
,
1777 unsigned int flags
), PMAP_CREATE_INDEX
);
1779 PMAP_SUPPORT_PROTOTYPES(
1781 pmap_destroy
, (pmap_t pmap
), PMAP_DESTROY_INDEX
);
1783 PMAP_SUPPORT_PROTOTYPES(
1785 pmap_enter_options
, (pmap_t pmap
,
1789 vm_prot_t fault_type
,
1792 unsigned int options
), PMAP_ENTER_OPTIONS_INDEX
);
1794 PMAP_SUPPORT_PROTOTYPES(
1796 pmap_extract
, (pmap_t pmap
,
1797 vm_map_address_t va
), PMAP_EXTRACT_INDEX
);
1799 PMAP_SUPPORT_PROTOTYPES(
1801 pmap_find_phys
, (pmap_t pmap
,
1802 addr64_t va
), PMAP_FIND_PHYS_INDEX
);
1804 #if (__ARM_VMSA__ > 7)
1805 PMAP_SUPPORT_PROTOTYPES(
1807 pmap_insert_sharedpage
, (pmap_t pmap
), PMAP_INSERT_SHAREDPAGE_INDEX
);
1811 PMAP_SUPPORT_PROTOTYPES(
1813 pmap_is_empty
, (pmap_t pmap
,
1814 vm_map_offset_t va_start
,
1815 vm_map_offset_t va_end
), PMAP_IS_EMPTY_INDEX
);
1818 PMAP_SUPPORT_PROTOTYPES(
1820 pmap_map_cpu_windows_copy
, (ppnum_t pn
,
1822 unsigned int wimg_bits
), PMAP_MAP_CPU_WINDOWS_COPY_INDEX
);
1824 PMAP_SUPPORT_PROTOTYPES(
1826 pmap_nest
, (pmap_t grand
,
1830 uint64_t size
), PMAP_NEST_INDEX
);
1832 PMAP_SUPPORT_PROTOTYPES(
1834 pmap_page_protect_options
, (ppnum_t ppnum
,
1836 unsigned int options
), PMAP_PAGE_PROTECT_OPTIONS_INDEX
);
1838 PMAP_SUPPORT_PROTOTYPES(
1840 pmap_protect_options
, (pmap_t pmap
,
1841 vm_map_address_t start
,
1842 vm_map_address_t end
,
1844 unsigned int options
,
1845 void *args
), PMAP_PROTECT_OPTIONS_INDEX
);
1847 PMAP_SUPPORT_PROTOTYPES(
1849 pmap_query_page_info
, (pmap_t pmap
,
1851 int *disp_p
), PMAP_QUERY_PAGE_INFO_INDEX
);
1853 PMAP_SUPPORT_PROTOTYPES(
1855 pmap_query_resident
, (pmap_t pmap
,
1856 vm_map_address_t start
,
1857 vm_map_address_t end
,
1858 mach_vm_size_t
* compressed_bytes_p
), PMAP_QUERY_RESIDENT_INDEX
);
1860 PMAP_SUPPORT_PROTOTYPES(
1862 pmap_reference
, (pmap_t pmap
), PMAP_REFERENCE_INDEX
);
1864 PMAP_SUPPORT_PROTOTYPES(
1866 pmap_remove_options
, (pmap_t pmap
,
1867 vm_map_address_t start
,
1868 vm_map_address_t end
,
1869 int options
), PMAP_REMOVE_OPTIONS_INDEX
);
1871 PMAP_SUPPORT_PROTOTYPES(
1873 pmap_return
, (boolean_t do_panic
,
1874 boolean_t do_recurse
), PMAP_RETURN_INDEX
);
1876 PMAP_SUPPORT_PROTOTYPES(
1878 pmap_set_cache_attributes
, (ppnum_t pn
,
1879 unsigned int cacheattr
), PMAP_SET_CACHE_ATTRIBUTES_INDEX
);
1881 PMAP_SUPPORT_PROTOTYPES(
1883 pmap_update_compressor_page
, (ppnum_t pn
,
1884 unsigned int prev_cacheattr
, unsigned int new_cacheattr
), PMAP_UPDATE_COMPRESSOR_PAGE_INDEX
);
1886 PMAP_SUPPORT_PROTOTYPES(
1888 pmap_set_nested
, (pmap_t pmap
), PMAP_SET_NESTED_INDEX
);
1890 #if MACH_ASSERT || XNU_MONITOR
1891 PMAP_SUPPORT_PROTOTYPES(
1893 pmap_set_process
, (pmap_t pmap
,
1895 char *procname
), PMAP_SET_PROCESS_INDEX
);
1898 PMAP_SUPPORT_PROTOTYPES(
1900 pmap_unmap_cpu_windows_copy
, (unsigned int index
), PMAP_UNMAP_CPU_WINDOWS_COPY_INDEX
);
1902 PMAP_SUPPORT_PROTOTYPES(
1904 pmap_unnest_options
, (pmap_t grand
,
1907 unsigned int option
), PMAP_UNNEST_OPTIONS_INDEX
);
1910 PMAP_SUPPORT_PROTOTYPES(
1912 pmap_cpu_data_init
, (unsigned int cpu_number
), PMAP_CPU_DATA_INIT_INDEX
);
1915 PMAP_SUPPORT_PROTOTYPES(
1917 phys_attribute_set
, (ppnum_t pn
,
1918 unsigned int bits
), PHYS_ATTRIBUTE_SET_INDEX
);
1921 PMAP_SUPPORT_PROTOTYPES(
1923 pmap_mark_page_as_ppl_page
, (pmap_paddr_t pa
), PMAP_MARK_PAGE_AS_PMAP_PAGE_INDEX
);
1926 PMAP_SUPPORT_PROTOTYPES(
1928 phys_attribute_clear
, (ppnum_t pn
,
1931 void *arg
), PHYS_ATTRIBUTE_CLEAR_INDEX
);
1933 PMAP_SUPPORT_PROTOTYPES(
1935 pmap_switch
, (pmap_t pmap
), PMAP_SWITCH_INDEX
);
1937 PMAP_SUPPORT_PROTOTYPES(
1939 pmap_switch_user_ttb
, (pmap_t pmap
), PMAP_SWITCH_USER_TTB_INDEX
);
1941 PMAP_SUPPORT_PROTOTYPES(
1943 pmap_clear_user_ttb
, (void), PMAP_CLEAR_USER_TTB_INDEX
);
1946 PMAP_SUPPORT_PROTOTYPES(
1948 pmap_release_ppl_pages_to_kernel
, (void), PMAP_RELEASE_PAGES_TO_KERNEL_INDEX
);
1951 PMAP_SUPPORT_PROTOTYPES(
1953 pmap_set_jit_entitled
, (pmap_t pmap
), PMAP_SET_JIT_ENTITLED_INDEX
);
1955 PMAP_SUPPORT_PROTOTYPES(
1957 pmap_trim
, (pmap_t grand
,
1961 uint64_t size
), PMAP_TRIM_INDEX
);
1963 #if HAS_APPLE_PAC && XNU_MONITOR
1964 PMAP_SUPPORT_PROTOTYPES(
1966 pmap_sign_user_ptr
, (void *value
, ptrauth_key key
, uint64_t discriminator
), PMAP_SIGN_USER_PTR
);
1967 PMAP_SUPPORT_PROTOTYPES(
1969 pmap_auth_user_ptr
, (void *value
, ptrauth_key key
, uint64_t discriminator
), PMAP_AUTH_USER_PTR
);
1970 #endif /* HAS_APPLE_PAC && XNU_MONITOR */
1976 static void pmap_mark_page_as_ppl_page(pmap_paddr_t pa
);
1979 void pmap_footprint_suspend(vm_map_t map
,
1981 PMAP_SUPPORT_PROTOTYPES(
1983 pmap_footprint_suspend
, (vm_map_t map
,
1985 PMAP_FOOTPRINT_SUSPEND_INDEX
);
1988 PMAP_SUPPORT_PROTOTYPES(
1990 pmap_ledger_alloc_init
, (size_t),
1991 PMAP_LEDGER_ALLOC_INIT_INDEX
);
1993 PMAP_SUPPORT_PROTOTYPES(
1995 pmap_ledger_alloc
, (void),
1996 PMAP_LEDGER_ALLOC_INDEX
);
1998 PMAP_SUPPORT_PROTOTYPES(
2000 pmap_ledger_free
, (ledger_t
),
2001 PMAP_LEDGER_FREE_INDEX
);
2005 boolean_t pgtrace_enabled
= 0;
2008 queue_chain_t chain
;
2011 * pmap - pmap for below addresses
2012 * ova - original va page address
2013 * cva - clone va addresses for pre, target and post pages
2014 * cva_spte - clone saved ptes
2015 * range - trace range in this map
2016 * cloned - has been cloned or not
2019 vm_map_offset_t ova
;
2020 vm_map_offset_t cva
[3];
2021 pt_entry_t cva_spte
[3];
2027 } pmap_pgtrace_map_t
;
2029 static void pmap_pgtrace_init(void);
2030 static bool pmap_pgtrace_enter_clone(pmap_t pmap
, vm_map_offset_t va_page
, vm_map_offset_t start
, vm_map_offset_t end
);
2031 static void pmap_pgtrace_remove_clone(pmap_t pmap
, pmap_paddr_t pa_page
, vm_map_offset_t va_page
);
2032 static void pmap_pgtrace_remove_all_clone(pmap_paddr_t pa
);
2035 #if (__ARM_VMSA__ > 7)
2037 * The low global vector page is mapped at a fixed alias.
2038 * Since the page size is 16k for H8 and newer we map the globals to a 16k
2039 * aligned address. Readers of the globals (e.g. lldb, panic server) need
2040 * to check both addresses anyway for backward compatibility. So for now
2041 * we leave H6 and H7 where they were.
2043 #if (ARM_PGSHIFT == 14)
2044 #define LOWGLOBAL_ALIAS (LOW_GLOBAL_BASE_ADDRESS + 0x4000)
2046 #define LOWGLOBAL_ALIAS (LOW_GLOBAL_BASE_ADDRESS + 0x2000)
2050 #define LOWGLOBAL_ALIAS (0xFFFF1000)
2053 long long alloc_tteroot_count
__attribute__((aligned(8))) MARK_AS_PMAP_DATA
= 0LL;
2054 long long alloc_ttepages_count
__attribute__((aligned(8))) MARK_AS_PMAP_DATA
= 0LL;
2055 long long alloc_ptepages_count
__attribute__((aligned(8))) MARK_AS_PMAP_DATA
= 0LL;
2056 long long alloc_pmap_pages_count
__attribute__((aligned(8))) = 0LL;
2058 int pt_fake_zone_index
= -1; /* index of pmap fake zone */
2062 * Table of function pointers used for PPL dispatch.
2064 const void * const ppl_handler_table
[PMAP_COUNT
] = {
2065 [ARM_FAST_FAULT_INDEX
] = arm_fast_fault_internal
,
2066 [ARM_FORCE_FAST_FAULT_INDEX
] = arm_force_fast_fault_internal
,
2067 [MAPPING_FREE_PRIME_INDEX
] = mapping_free_prime_internal
,
2068 [MAPPING_REPLENISH_INDEX
] = mapping_replenish_internal
,
2069 [PHYS_ATTRIBUTE_CLEAR_INDEX
] = phys_attribute_clear_internal
,
2070 [PHYS_ATTRIBUTE_SET_INDEX
] = phys_attribute_set_internal
,
2071 [PMAP_BATCH_SET_CACHE_ATTRIBUTES_INDEX
] = pmap_batch_set_cache_attributes_internal
,
2072 [PMAP_CHANGE_WIRING_INDEX
] = pmap_change_wiring_internal
,
2073 [PMAP_CREATE_INDEX
] = pmap_create_options_internal
,
2074 [PMAP_DESTROY_INDEX
] = pmap_destroy_internal
,
2075 [PMAP_ENTER_OPTIONS_INDEX
] = pmap_enter_options_internal
,
2076 [PMAP_EXTRACT_INDEX
] = pmap_extract_internal
,
2077 [PMAP_FIND_PHYS_INDEX
] = pmap_find_phys_internal
,
2078 [PMAP_INSERT_SHAREDPAGE_INDEX
] = pmap_insert_sharedpage_internal
,
2079 [PMAP_IS_EMPTY_INDEX
] = pmap_is_empty_internal
,
2080 [PMAP_MAP_CPU_WINDOWS_COPY_INDEX
] = pmap_map_cpu_windows_copy_internal
,
2081 [PMAP_MARK_PAGE_AS_PMAP_PAGE_INDEX
] = pmap_mark_page_as_ppl_page_internal
,
2082 [PMAP_NEST_INDEX
] = pmap_nest_internal
,
2083 [PMAP_PAGE_PROTECT_OPTIONS_INDEX
] = pmap_page_protect_options_internal
,
2084 [PMAP_PROTECT_OPTIONS_INDEX
] = pmap_protect_options_internal
,
2085 [PMAP_QUERY_PAGE_INFO_INDEX
] = pmap_query_page_info_internal
,
2086 [PMAP_QUERY_RESIDENT_INDEX
] = pmap_query_resident_internal
,
2087 [PMAP_REFERENCE_INDEX
] = pmap_reference_internal
,
2088 [PMAP_REMOVE_OPTIONS_INDEX
] = pmap_remove_options_internal
,
2089 [PMAP_RETURN_INDEX
] = pmap_return_internal
,
2090 [PMAP_SET_CACHE_ATTRIBUTES_INDEX
] = pmap_set_cache_attributes_internal
,
2091 [PMAP_UPDATE_COMPRESSOR_PAGE_INDEX
] = pmap_update_compressor_page_internal
,
2092 [PMAP_SET_NESTED_INDEX
] = pmap_set_nested_internal
,
2093 [PMAP_SET_PROCESS_INDEX
] = pmap_set_process_internal
,
2094 [PMAP_SWITCH_INDEX
] = pmap_switch_internal
,
2095 [PMAP_SWITCH_USER_TTB_INDEX
] = pmap_switch_user_ttb_internal
,
2096 [PMAP_CLEAR_USER_TTB_INDEX
] = pmap_clear_user_ttb_internal
,
2097 [PMAP_UNMAP_CPU_WINDOWS_COPY_INDEX
] = pmap_unmap_cpu_windows_copy_internal
,
2098 [PMAP_UNNEST_OPTIONS_INDEX
] = pmap_unnest_options_internal
,
2099 [PMAP_FOOTPRINT_SUSPEND_INDEX
] = pmap_footprint_suspend_internal
,
2100 [PMAP_CPU_DATA_INIT_INDEX
] = pmap_cpu_data_init_internal
,
2101 [PMAP_RELEASE_PAGES_TO_KERNEL_INDEX
] = pmap_release_ppl_pages_to_kernel_internal
,
2102 [PMAP_SET_JIT_ENTITLED_INDEX
] = pmap_set_jit_entitled_internal
,
2103 [PMAP_TRIM_INDEX
] = pmap_trim_internal
,
2104 [PMAP_LEDGER_ALLOC_INIT_INDEX
] = pmap_ledger_alloc_init_internal
,
2105 [PMAP_LEDGER_ALLOC_INDEX
] = pmap_ledger_alloc_internal
,
2106 [PMAP_LEDGER_FREE_INDEX
] = pmap_ledger_free_internal
,
2107 #if HAS_APPLE_PAC && XNU_MONITOR
2108 [PMAP_SIGN_USER_PTR
] = pmap_sign_user_ptr_internal
,
2109 [PMAP_AUTH_USER_PTR
] = pmap_auth_user_ptr_internal
,
2110 #endif /* HAS_APPLE_PAC && XNU_MONITOR */
2114 pmap_get_ppl_cpu_id(void)
2116 uint64_t mpidr_el1_value
= 0;
2118 /* We identify the CPU based on the constant bits of MPIDR_EL1. */
2119 MRS(mpidr_el1_value
, "MPIDR_EL1");
2121 #ifdef CPU_CLUSTER_OFFSETS
2122 uint64_t cluster_id
= (mpidr_el1_value
& MPIDR_AFF1_MASK
) >> MPIDR_AFF1_SHIFT
;
2123 assert(cluster_id
< (sizeof(pmap_cluster_offsets
) / sizeof(pmap_cluster_offsets
[0])));
2125 /* For multi-cluster configurations, AFF0 reflects the core number within the cluster. */
2126 mpidr_el1_value
= (mpidr_el1_value
& MPIDR_AFF0_MASK
) + pmap_cluster_offsets
[cluster_id
];
2129 * AFF2 is not constant (it can change for e-core versus p-core on H9),
2132 mpidr_el1_value
&= MPIDR_AFF0_MASK
;
2135 if (mpidr_el1_value
> MAX_CPUS
) {
2136 panic("%s: mpidr_el1_value=%#llx > MAX_CPUS=%#x",
2137 __FUNCTION__
, mpidr_el1_value
, MAX_CPUS
);
2140 return mpidr_el1_value
;
2148 * Allocates and initializes a per-CPU data structure for the pmap.
2150 MARK_AS_PMAP_TEXT
static void
2151 pmap_cpu_data_init_internal(unsigned int cpu_number
)
2153 pmap_cpu_data_t
* pmap_cpu_data
= pmap_get_cpu_data();
2156 /* Verify cacheline-aligned */
2157 assert(((vm_offset_t
)pmap_cpu_data
& ((1 << L2_CLINE
) - 1)) == 0);
2158 if (pmap_cpu_data
->cpu_number
!= PMAP_INVALID_CPU_NUM
) {
2159 panic("%s: pmap_cpu_data->cpu_number=%u, "
2161 __FUNCTION__
, pmap_cpu_data
->cpu_number
,
2165 pmap_cpu_data
->cpu_number
= cpu_number
;
2169 pmap_cpu_data_init(void)
2172 pmap_cpu_data_init_ppl(cpu_number());
2174 pmap_cpu_data_init_internal(cpu_number());
2179 pmap_cpu_data_array_init(void)
2183 pmap_paddr_t ppl_cpu_save_area_cur
= 0;
2184 pt_entry_t
template, *pte_p
;
2185 vm_offset_t stack_va
= (vm_offset_t
)pmap_stacks_start
+ ARM_PGBYTES
;
2186 assert((pmap_stacks_start
!= NULL
) && (pmap_stacks_end
!= NULL
));
2187 pmap_stacks_start_pa
= avail_start
;
2189 for (i
= 0; i
< MAX_CPUS
; i
++) {
2190 for (vm_offset_t cur_va
= stack_va
; cur_va
< (stack_va
+ PPL_STACK_SIZE
); cur_va
+= ARM_PGBYTES
) {
2191 assert(cur_va
< (vm_offset_t
)pmap_stacks_end
);
2192 pte_p
= pmap_pte(kernel_pmap
, cur_va
);
2193 assert(*pte_p
== ARM_PTE_EMPTY
);
2194 template = pa_to_pte(avail_start
) | ARM_PTE_AF
| ARM_PTE_SH(SH_OUTER_MEMORY
) | ARM_PTE_TYPE
|
2195 ARM_PTE_ATTRINDX(CACHE_ATTRINDX_DEFAULT
) | xprr_perm_to_pte(XPRR_PPL_RW_PERM
);
2196 #if __ARM_KERNEL_PROTECT__
2197 template |= ARM_PTE_NG
;
2198 #endif /* __ARM_KERNEL_PROTECT__ */
2199 WRITE_PTE(pte_p
, template);
2200 __builtin_arm_isb(ISB_SY
);
2201 avail_start
+= ARM_PGBYTES
;
2204 kasan_map_shadow(stack_va
, PPL_STACK_SIZE
, false);
2206 pmap_cpu_data_array
[i
].cpu_data
.cpu_id
= i
;
2207 pmap_cpu_data_array
[i
].cpu_data
.cpu_number
= PMAP_INVALID_CPU_NUM
;
2208 pmap_cpu_data_array
[i
].cpu_data
.ppl_state
= PPL_STATE_KERNEL
;
2209 pmap_cpu_data_array
[i
].cpu_data
.ppl_stack
= (void*)(stack_va
+ PPL_STACK_SIZE
);
2210 stack_va
+= (PPL_STACK_SIZE
+ ARM_PGBYTES
);
2213 pmap_stacks_end_pa
= avail_start
;
2215 ppl_cpu_save_area_start
= avail_start
;
2216 ppl_cpu_save_area_end
= ppl_cpu_save_area_start
;
2217 ppl_cpu_save_area_cur
= ppl_cpu_save_area_start
;
2219 for (i
= 0; i
< MAX_CPUS
; i
++) {
2220 while ((ppl_cpu_save_area_end
- ppl_cpu_save_area_cur
) < sizeof(arm_context_t
)) {
2221 avail_start
+= PAGE_SIZE
;
2222 ppl_cpu_save_area_end
= avail_start
;
2225 pmap_cpu_data_array
[i
].cpu_data
.save_area
= (arm_context_t
*)phystokv(ppl_cpu_save_area_cur
);
2226 ppl_cpu_save_area_cur
+= sizeof(arm_context_t
);
2230 pmap_cpu_data_init();
2234 pmap_get_cpu_data(void)
2236 pmap_cpu_data_t
* pmap_cpu_data
= NULL
;
2239 uint64_t cpu_id
= 0;
2241 cpu_id
= pmap_get_ppl_cpu_id();
2242 pmap_cpu_data
= &pmap_cpu_data_array
[cpu_id
].cpu_data
;
2244 if (pmap_cpu_data
->cpu_id
!= cpu_id
) {
2245 panic("%s: CPU ID mismatch, cpu_id=0x%#llx, pmap_cpu_data->cpu_id=%#llx",
2246 __FUNCTION__
, cpu_id
, pmap_cpu_data
->cpu_id
);
2249 pmap_cpu_data
= &getCpuDatap()->cpu_pmap_cpu_data
;
2252 return pmap_cpu_data
;
2257 * pmap_set_range_xprr_perm takes a range (specified using start and end) that
2258 * falls within the physical aperture. All mappings within this range have
2259 * their protections changed from those specified by the expected_perm to those
2260 * specified by the new_perm.
2263 pmap_set_range_xprr_perm(vm_address_t start
,
2265 unsigned int expected_perm
,
2266 unsigned int new_perm
)
2268 #if (__ARM_VMSA__ == 7)
2269 #error This function is not supported on older ARM hardware
2273 vm_address_t va
= 0;
2274 vm_address_t tte_start
= 0;
2275 vm_address_t tte_end
= 0;
2277 tt_entry_t
*tte_p
= NULL
;
2278 pt_entry_t
*pte_p
= NULL
;
2279 pt_entry_t
*cpte_p
= NULL
;
2280 pt_entry_t
*bpte_p
= NULL
;
2281 pt_entry_t
*epte_p
= NULL
;
2284 pt_entry_t cpte
= 0;
2285 pt_entry_t
template = 0;
2292 * Validate our arguments; any invalid argument will be grounds for a
2295 if ((start
| end
) % ARM_PGBYTES
) {
2296 panic("%s: start or end not page aligned, "
2297 "start=%p, end=%p, new_perm=%u, expected_perm=%u",
2299 (void *)start
, (void *)end
, new_perm
, expected_perm
);
2303 panic("%s: start > end, "
2304 "start=%p, end=%p, new_perm=%u, expected_perm=%u",
2306 (void *)start
, (void *)end
, new_perm
, expected_perm
);
2309 if (start
< gVirtBase
) {
2310 panic("%s: start is before physical aperture, "
2311 "start=%p, end=%p, new_perm=%u, expected_perm=%u",
2313 (void *)start
, (void *)end
, new_perm
, expected_perm
);
2316 if (end
> static_memory_end
) {
2317 panic("%s: end is after physical aperture, "
2318 "start=%p, end=%p, new_perm=%u, expected_perm=%u",
2320 (void *)start
, (void *)end
, new_perm
, expected_perm
);
2323 if ((new_perm
> XPRR_MAX_PERM
) || (expected_perm
> XPRR_MAX_PERM
)) {
2324 panic("%s: invalid XPRR index, "
2325 "start=%p, end=%p, new_perm=%u, expected_perm=%u",
2327 (void *)start
, (void *)end
, new_perm
, expected_perm
);
2331 * Walk over the PTEs for the given range, and set the protections on
2336 tte_end
= ((va
+ pt_attr_twig_size(native_pt_attr
)) & ~pt_attr_twig_offmask(native_pt_attr
));
2338 if (tte_end
> end
) {
2342 tte_p
= pmap_tte(pmap
, va
);
2345 * The physical aperture should not have holes.
2346 * The physical aperture should be contiguous.
2347 * Do not make eye contact with the physical aperture.
2349 if (tte_p
== NULL
) {
2350 panic("%s: physical aperture tte is NULL, "
2351 "start=%p, end=%p, new_perm=%u, expected_perm=%u",
2353 (void *)start
, (void *)end
, new_perm
, expected_perm
);
2358 if ((tte
& ARM_TTE_TYPE_MASK
) == ARM_TTE_TYPE_TABLE
) {
2360 * Walk over the given L3 page table page and update the
2363 pte_p
= (pt_entry_t
*)ttetokv(tte
);
2364 bpte_p
= &pte_p
[ptenum(va
)];
2365 epte_p
= bpte_p
+ ((tte_end
- va
) >> pt_attr_leaf_shift(native_pt_attr
));
2367 for (cpte_p
= bpte_p
; cpte_p
< epte_p
;
2368 cpte_p
+= PAGE_SIZE
/ ARM_PGBYTES
, va
+= PAGE_SIZE
) {
2369 int pai
= (int)pa_index(pte_to_pa(*cpte_p
));
2374 * Every PTE involved should be valid, should
2375 * not have the hint bit set, and should have
2376 * Every valid PTE involved should
2377 * not have the hint bit set and should have
2378 * the expected APRR index.
2380 if ((cpte
& ARM_PTE_TYPE_MASK
) ==
2381 ARM_PTE_TYPE_FAULT
) {
2382 panic("%s: physical aperture PTE is invalid, va=%p, "
2383 "start=%p, end=%p, new_perm=%u, expected_perm=%u",
2386 (void *)start
, (void *)end
, new_perm
, expected_perm
);
2391 if (cpte
& ARM_PTE_HINT_MASK
) {
2392 panic("%s: physical aperture PTE has hint bit set, va=%p, cpte=0x%llx, "
2393 "start=%p, end=%p, new_perm=%u, expected_perm=%u",
2396 (void *)start
, (void *)end
, new_perm
, expected_perm
);
2399 if (pte_to_xprr_perm(cpte
) != expected_perm
) {
2400 panic("%s: perm=%llu does not match expected_perm, cpte=0x%llx, "
2401 "start=%p, end=%p, new_perm=%u, expected_perm=%u",
2403 pte_to_xprr_perm(cpte
), cpte
,
2404 (void *)start
, (void *)end
, new_perm
, expected_perm
);
2408 template &= ~ARM_PTE_XPRR_MASK
;
2409 template |= xprr_perm_to_pte(new_perm
);
2411 WRITE_PTE_STRONG(cpte_p
, template);
2415 panic("%s: tte=0x%llx is not a table type entry, "
2416 "start=%p, end=%p, new_perm=%u, expected_perm=%u",
2419 (void *)start
, (void *)end
, new_perm
, expected_perm
);
2425 PMAP_UPDATE_TLBS(pmap
, start
, end
, false);
2426 #endif /* (__ARM_VMSA__ == 7) */
2430 * A convenience function for setting protections on a single page.
2433 pmap_set_xprr_perm(vm_address_t page_kva
,
2434 unsigned int expected_perm
,
2435 unsigned int new_perm
)
2437 pmap_set_range_xprr_perm(page_kva
, page_kva
+ PAGE_SIZE
, expected_perm
, new_perm
);
2439 #endif /* XNU_MONITOR */
2447 boolean_t found_page
;
2452 * pmap_pages_reclaim() is returning a page by freeing an active pt page.
2453 * To be eligible, a pt page is assigned to a user pmap. It doesn't have any wired pte
2454 * entry and it contains at least one valid pte entry.
2456 * In a loop, check for a page in the reclaimed pt page list.
2457 * if one is present, unlink that page and return the physical page address.
2458 * Otherwise, scan the pt page list for an eligible pt page to reclaim.
2459 * If found, invoke pmap_remove_range() on its pmap and address range then
2460 * deallocates that pt page. This will end up adding the pt page to the
2461 * reclaimed pt page list.
2462 * If no eligible page were found in the pt page list, panic.
2465 pmap_simple_lock(&pmap_pages_lock
);
2466 pmap_pages_request_count
++;
2467 pmap_pages_request_acum
++;
2470 if (pmap_pages_reclaim_list
!= (page_free_entry_t
*)NULL
) {
2471 page_free_entry_t
*page_entry
;
2473 page_entry
= pmap_pages_reclaim_list
;
2474 pmap_pages_reclaim_list
= pmap_pages_reclaim_list
->next
;
2475 pmap_simple_unlock(&pmap_pages_lock
);
2477 return (pmap_paddr_t
)ml_static_vtop((vm_offset_t
)page_entry
);
2480 pmap_simple_unlock(&pmap_pages_lock
);
2482 pmap_simple_lock(&pt_pages_lock
);
2483 ptdp
= (pt_desc_t
*)queue_first(&pt_page_list
);
2486 while (!queue_end(&pt_page_list
, (queue_entry_t
)ptdp
)) {
2487 if ((ptdp
->pmap
->nested
== FALSE
)
2488 && (pmap_simple_lock_try(&ptdp
->pmap
->lock
))) {
2489 assert(ptdp
->pmap
!= kernel_pmap
);
2490 unsigned refcnt_acc
= 0;
2491 unsigned wiredcnt_acc
= 0;
2493 for (i
= 0; i
< PT_INDEX_MAX
; i
++) {
2494 if (ptdp
->ptd_info
[i
].refcnt
== PT_DESC_REFCOUNT
) {
2495 /* Do not attempt to free a page that contains an L2 table */
2499 refcnt_acc
+= ptdp
->ptd_info
[i
].refcnt
;
2500 wiredcnt_acc
+= ptdp
->ptd_info
[i
].wiredcnt
;
2502 if ((wiredcnt_acc
== 0) && (refcnt_acc
!= 0)) {
2504 /* Leave ptdp->pmap locked here. We're about to reclaim
2505 * a tt page from it, so we don't want anyone else messing
2506 * with it while we do that. */
2509 pmap_simple_unlock(&ptdp
->pmap
->lock
);
2511 ptdp
= (pt_desc_t
*)queue_next((queue_t
)ptdp
);
2514 panic("%s: No eligible page in pt_page_list", __FUNCTION__
);
2516 int remove_count
= 0;
2517 bool need_strong_sync
= false;
2518 vm_map_address_t va
;
2520 pt_entry_t
*bpte
, *epte
;
2523 uint32_t rmv_spte
= 0;
2525 pmap_simple_unlock(&pt_pages_lock
);
2527 PMAP_ASSERT_LOCKED(pmap
); // pmap lock should be held from loop above
2529 __unused
const pt_attr_t
* const pt_attr
= pmap_get_pt_attr(pmap
);
2531 for (i
= 0; i
< PT_INDEX_MAX
; i
++) {
2532 va
= ptdp
->ptd_info
[i
].va
;
2534 /* If the VA is bogus, this may represent an unallocated region
2535 * or one which is in transition (already being freed or expanded).
2536 * Don't try to remove mappings here. */
2537 if (va
== (vm_offset_t
)-1) {
2541 tte_p
= pmap_tte(pmap
, va
);
2542 if ((tte_p
!= (tt_entry_t
*) NULL
)
2543 && ((*tte_p
& ARM_TTE_TYPE_MASK
) == ARM_TTE_TYPE_TABLE
)) {
2544 pte_p
= (pt_entry_t
*) ttetokv(*tte_p
);
2545 bpte
= &pte_p
[pte_index(pmap
, pt_attr
, va
)];
2546 epte
= bpte
+ PAGE_SIZE
/ sizeof(pt_entry_t
);
2548 * Use PMAP_OPTIONS_REMOVE to clear any
2549 * "compressed" markers and update the
2550 * "compressed" counter in pmap->stats.
2551 * This means that we lose accounting for
2552 * any compressed pages in this range
2553 * but the alternative is to not be able
2554 * to account for their future decompression,
2555 * which could cause the counter to drift
2558 remove_count
+= pmap_remove_range_options(
2559 pmap
, va
, bpte
, epte
,
2560 &rmv_spte
, &need_strong_sync
, PMAP_OPTIONS_REMOVE
);
2561 if (ptdp
->ptd_info
[ARM_PT_DESC_INDEX(pte_p
)].refcnt
!= 0) {
2562 panic("%s: ptdp %p, count %d", __FUNCTION__
, ptdp
, ptdp
->ptd_info
[ARM_PT_DESC_INDEX(pte_p
)].refcnt
);
2565 pmap_tte_deallocate(pmap
, tte_p
, PMAP_TT_TWIG_LEVEL
);
2567 if (remove_count
> 0) {
2568 pmap_get_pt_ops(pmap
)->flush_tlb_region_async(va
, (unsigned int)pt_attr_leaf_table_size(pt_attr
), pmap
);
2570 pmap_get_pt_ops(pmap
)->flush_tlb_tte_async(va
, pmap
);
2574 // Undo the lock we grabbed when we found ptdp above
2576 pmap_sync_tlb(need_strong_sync
);
2578 pmap_simple_lock(&pmap_pages_lock
);
2584 * Return a PPL page to the free list.
2587 pmap_give_free_ppl_page(pmap_paddr_t paddr
)
2589 assert((paddr
& ARM_PGMASK
) == 0);
2590 void ** new_head
= (void **)phystokv(paddr
);
2591 pmap_simple_lock(&pmap_ppl_free_page_lock
);
2593 void * cur_head
= pmap_ppl_free_page_list
;
2594 *new_head
= cur_head
;
2595 pmap_ppl_free_page_list
= new_head
;
2596 pmap_ppl_free_page_count
++;
2598 pmap_simple_unlock(&pmap_ppl_free_page_lock
);
2602 * Get a PPL page from the free list.
2605 pmap_get_free_ppl_page(void)
2607 pmap_paddr_t result
= 0;
2609 pmap_simple_lock(&pmap_ppl_free_page_lock
);
2611 if (pmap_ppl_free_page_list
!= NULL
) {
2612 void ** new_head
= NULL
;
2613 new_head
= *((void**)pmap_ppl_free_page_list
);
2614 result
= kvtophys((vm_offset_t
)pmap_ppl_free_page_list
);
2615 pmap_ppl_free_page_list
= new_head
;
2616 pmap_ppl_free_page_count
--;
2621 pmap_simple_unlock(&pmap_ppl_free_page_lock
);
2622 assert((result
& ARM_PGMASK
) == 0);
2628 * pmap_mark_page_as_ppl_page claims a page on behalf of the PPL by marking it
2629 * as PPL-owned and only allowing the PPL to write to it.
2631 MARK_AS_PMAP_TEXT
static void
2632 pmap_mark_page_as_ppl_page_internal(pmap_paddr_t pa
)
2634 vm_offset_t kva
= 0;
2635 unsigned int pai
= 0;
2639 * Mark each page that we allocate as belonging to the monitor, as we
2640 * intend to use it for monitor-y stuff (page tables, table pages, that
2643 assert(!TEST_PAGE_RATIO_4
);
2645 if (!pa_valid(pa
)) {
2646 panic("%s: bad address, "
2652 pai
= (unsigned int)pa_index(pa
);
2655 /* A page that the PPL already owns can't be given to the PPL. */
2656 if (pa_test_monitor(pa
)) {
2657 panic("%s: page already belongs to PPL, "
2662 /* The page cannot be mapped outside of the physical aperture. */
2663 if (!pmap_verify_free((ppnum_t
)atop(pa
))) {
2664 panic("%s: page is not free, "
2671 attr
= pp_attr_table
[pai
];
2672 if (attr
& PP_ATTR_NO_MONITOR
) {
2673 panic("%s: page excluded from PPL, "
2678 } while (!OSCompareAndSwap16(attr
, attr
| PP_ATTR_MONITOR
, &pp_attr_table
[pai
]));
2683 pmap_set_xprr_perm(kva
, XPRR_KERN_RW_PERM
, XPRR_PPL_RW_PERM
);
2684 bzero((void *)(kva
& ~PAGE_MASK
), PAGE_SIZE
);
2686 pmap_give_free_ppl_page(pa
);
2690 pmap_mark_page_as_ppl_page(pmap_paddr_t pa
)
2692 pmap_mark_page_as_ppl_page_ppl(pa
);
2696 pmap_mark_page_as_kernel_page(pmap_paddr_t pa
)
2698 vm_offset_t kva
= 0;
2699 unsigned int pai
= 0;
2701 pai
= (unsigned int)pa_index(pa
);
2704 if (!pa_test_monitor(pa
)) {
2705 panic("%s: page is not a PPL page, "
2711 pa_clear_monitor(pa
);
2715 pmap_set_xprr_perm(kva
, XPRR_PPL_RW_PERM
, XPRR_KERN_RW_PERM
);
2718 MARK_AS_PMAP_TEXT
static pmap_paddr_t
2719 pmap_release_ppl_pages_to_kernel_internal(void)
2721 pmap_paddr_t pa
= 0;
2723 if (pmap_ppl_free_page_count
<= PMAP_MIN_FREE_PPL_PAGES
) {
2727 pa
= pmap_get_free_ppl_page();
2733 pmap_mark_page_as_kernel_page(pa
);
2740 pmap_release_ppl_pages_to_kernel(void)
2742 pmap_paddr_t pa
= 0;
2743 vm_page_t m
= VM_PAGE_NULL
;
2744 vm_page_t local_freeq
= VM_PAGE_NULL
;
2745 uint64_t pmap_ppl_pages_returned_to_kernel_count
= 0;
2747 while (pmap_ppl_free_page_count
> PMAP_MIN_FREE_PPL_PAGES
) {
2748 pa
= pmap_release_ppl_pages_to_kernel_ppl();
2754 /* If we retrieved a page, add it to the free queue. */
2755 vm_object_lock(pmap_object
);
2756 m
= vm_page_lookup(pmap_object
, (pa
- gPhysBase
));
2757 assert(m
!= VM_PAGE_NULL
);
2758 assert(VM_PAGE_WIRED(m
));
2761 m
->vmp_snext
= local_freeq
;
2763 pmap_ppl_pages_returned_to_kernel_count
++;
2764 pmap_ppl_pages_returned_to_kernel_count_total
++;
2766 vm_object_unlock(pmap_object
);
2770 /* We need to hold the object lock for freeing pages. */
2771 vm_object_lock(pmap_object
);
2772 vm_page_free_list(local_freeq
, TRUE
);
2773 vm_object_unlock(pmap_object
);
2776 return pmap_ppl_pages_returned_to_kernel_count
;
2780 static kern_return_t
2787 if (size
!= PAGE_SIZE
) {
2788 panic("%s: size != PAGE_SIZE, "
2789 "pa=%p, size=%u, option=%u",
2794 if (option
& PMAP_PAGES_RECLAIM_NOWAIT
) {
2795 *pa
= pmap_pages_reclaim();
2797 return KERN_SUCCESS
;
2800 assert(option
& PMAP_PAGES_ALLOCATE_NOWAIT
);
2802 *pa
= pmap_get_free_ppl_page();
2805 return KERN_RESOURCE_SHORTAGE
;
2807 return KERN_SUCCESS
;
2810 vm_page_t m
= VM_PAGE_NULL
, m_prev
;
2812 if (option
& PMAP_PAGES_RECLAIM_NOWAIT
) {
2813 assert(size
== PAGE_SIZE
);
2814 *pa
= pmap_pages_reclaim();
2815 return KERN_SUCCESS
;
2817 if (size
== PAGE_SIZE
) {
2818 while ((m
= vm_page_grab()) == VM_PAGE_NULL
) {
2819 if (option
& PMAP_PAGES_ALLOCATE_NOWAIT
) {
2820 return KERN_RESOURCE_SHORTAGE
;
2825 vm_page_lock_queues();
2826 vm_page_wire(m
, VM_KERN_MEMORY_PTE
, TRUE
);
2827 vm_page_unlock_queues();
2829 if (size
== 2 * PAGE_SIZE
) {
2830 while (cpm_allocate(size
, &m
, 0, 1, TRUE
, 0) != KERN_SUCCESS
) {
2831 if (option
& PMAP_PAGES_ALLOCATE_NOWAIT
) {
2832 return KERN_RESOURCE_SHORTAGE
;
2839 *pa
= (pmap_paddr_t
)ptoa(VM_PAGE_GET_PHYS_PAGE(m
));
2841 vm_object_lock(pmap_object
);
2842 while (m
!= VM_PAGE_NULL
) {
2843 vm_page_insert_wired(m
, pmap_object
, (vm_object_offset_t
) ((ptoa(VM_PAGE_GET_PHYS_PAGE(m
))) - gPhysBase
), VM_KERN_MEMORY_PTE
);
2845 m
= NEXT_PAGE(m_prev
);
2846 *(NEXT_PAGE_PTR(m_prev
)) = VM_PAGE_NULL
;
2848 vm_object_unlock(pmap_object
);
2850 OSAddAtomic(size
>> PAGE_SHIFT
, &inuse_pmap_pages_count
);
2851 OSAddAtomic64(size
>> PAGE_SHIFT
, &alloc_pmap_pages_count
);
2853 return KERN_SUCCESS
;
2859 pmap_alloc_page_for_kern(void)
2861 pmap_paddr_t paddr
= 0;
2862 vm_page_t m
, m_prev
;
2864 while ((m
= vm_page_grab()) == VM_PAGE_NULL
) {
2868 vm_page_lock_queues();
2869 vm_page_wire(m
, VM_KERN_MEMORY_PTE
, TRUE
);
2870 vm_page_unlock_queues();
2872 paddr
= (pmap_paddr_t
)ptoa(VM_PAGE_GET_PHYS_PAGE(m
));
2875 panic("%s: paddr is 0",
2879 vm_object_lock(pmap_object
);
2881 while (m
!= VM_PAGE_NULL
) {
2882 vm_page_insert_wired(m
, pmap_object
, (vm_object_offset_t
) ((ptoa(VM_PAGE_GET_PHYS_PAGE(m
))) - gPhysBase
), VM_KERN_MEMORY_PTE
);
2884 m
= NEXT_PAGE(m_prev
);
2885 *(NEXT_PAGE_PTR(m_prev
)) = VM_PAGE_NULL
;
2888 vm_object_unlock(pmap_object
);
2890 OSAddAtomic(1, &inuse_pmap_pages_count
);
2891 OSAddAtomic64(1, &alloc_pmap_pages_count
);
2897 pmap_alloc_page_for_ppl(void)
2899 pmap_mark_page_as_ppl_page(pmap_alloc_page_for_kern());
2903 pmap_alloc_pmap(void)
2905 pmap_t pmap
= PMAP_NULL
;
2907 pmap_simple_lock(&pmap_free_list_lock
);
2909 if (pmap_free_list
!= PMAP_NULL
) {
2910 pmap
= pmap_free_list
;
2911 pmap_free_list
= *((pmap_t
*)pmap
);
2913 if (!PMAP_PTR_IS_VALID(pmap
)) {
2914 panic("%s: allocated pmap is not valid, pmap=%p",
2915 __FUNCTION__
, pmap
);
2919 pmap_simple_unlock(&pmap_free_list_lock
);
2925 pmap_free_pmap(pmap_t pmap
)
2927 if (!PMAP_PTR_IS_VALID(pmap
)) {
2928 panic("%s: pmap is not valid, "
2934 pmap_simple_lock(&pmap_free_list_lock
);
2935 *((pmap_t
*)pmap
) = pmap_free_list
;
2936 pmap_free_list
= pmap
;
2937 pmap_simple_unlock(&pmap_free_list_lock
);
2941 pmap_bootstrap_pmap_free_list(void)
2943 pmap_t cur_head
= PMAP_NULL
;
2944 unsigned long i
= 0;
2946 simple_lock_init(&pmap_free_list_lock
, 0);
2948 for (i
= 0; i
< pmap_array_count
; i
++) {
2949 *((pmap_t
*)(&pmap_array
[i
])) = cur_head
;
2950 cur_head
= &pmap_array
[i
];
2953 pmap_free_list
= cur_head
;
2962 pmap_simple_lock(&pmap_pages_lock
);
2964 if (pmap_pages_request_count
!= 0) {
2965 page_free_entry_t
*page_entry
;
2967 pmap_pages_request_count
--;
2968 page_entry
= (page_free_entry_t
*)phystokv(pa
);
2969 page_entry
->next
= pmap_pages_reclaim_list
;
2970 pmap_pages_reclaim_list
= page_entry
;
2971 pmap_simple_unlock(&pmap_pages_lock
);
2976 pmap_simple_unlock(&pmap_pages_lock
);
2981 pmap_give_free_ppl_page(pa
);
2984 pmap_paddr_t pa_max
;
2986 OSAddAtomic(-(size
>> PAGE_SHIFT
), &inuse_pmap_pages_count
);
2988 for (pa_max
= pa
+ size
; pa
< pa_max
; pa
= pa
+ PAGE_SIZE
) {
2989 vm_object_lock(pmap_object
);
2990 m
= vm_page_lookup(pmap_object
, (pa
- gPhysBase
));
2991 assert(m
!= VM_PAGE_NULL
);
2992 assert(VM_PAGE_WIRED(m
));
2993 vm_page_lock_queues();
2995 vm_page_unlock_queues();
2996 vm_object_unlock(pmap_object
);
3003 pmap_t pmap
, int bytes
)
3005 pmap_ledger_credit(pmap
, task_ledgers
.tkm_private
, bytes
);
3013 pmap_ledger_debit(pmap
, task_ledgers
.tkm_private
, bytes
);
3017 pmap_tt_ledger_credit(
3021 if (pmap
!= kernel_pmap
) {
3022 pmap_ledger_credit(pmap
, task_ledgers
.phys_footprint
, size
);
3023 pmap_ledger_credit(pmap
, task_ledgers
.page_table
, size
);
3028 pmap_tt_ledger_debit(
3032 if (pmap
!= kernel_pmap
) {
3033 pmap_ledger_debit(pmap
, task_ledgers
.phys_footprint
, size
);
3034 pmap_ledger_debit(pmap
, task_ledgers
.page_table
, size
);
3039 alloc_asid(pmap_t pmap
)
3044 pmap_simple_lock(&asid_lock
);
3045 vasid
= bitmap_first(&asid_bitmap
[0], MAX_ASID
);
3047 pmap_simple_unlock(&asid_lock
);
3050 assert(vasid
< MAX_ASID
);
3051 bitmap_clear(&asid_bitmap
[0], (unsigned int)vasid
);
3052 pmap_simple_unlock(&asid_lock
);
3053 // bitmap_first() returns highest-order bits first, but a 0-based scheme works
3054 // slightly better with the collision detection scheme used by pmap_switch_internal().
3055 vasid
= MAX_ASID
- 1 - vasid
;
3056 hw_asid
= vasid
% MAX_HW_ASID
;
3057 pmap
->sw_asid
= vasid
/ MAX_HW_ASID
;
3058 hw_asid
+= 1; // Account for ASID 0, which is reserved for the kernel
3059 #if __ARM_KERNEL_PROTECT__
3060 hw_asid
<<= 1; // We're really handing out 2 hardware ASIDs, one for EL0 and one for EL1 access
3062 pmap
->hw_asid
= hw_asid
;
3067 free_asid(pmap_t pmap
)
3070 uint16_t hw_asid
= pmap
->hw_asid
;
3071 assert(hw_asid
!= 0); // Should not try to free kernel ASID
3073 #if __ARM_KERNEL_PROTECT__
3078 vasid
= ((unsigned int)pmap
->sw_asid
* MAX_HW_ASID
) + hw_asid
;
3079 vasid
= MAX_ASID
- 1 - vasid
;
3081 pmap_simple_lock(&asid_lock
);
3082 assert(!bitmap_test(&asid_bitmap
[0], vasid
));
3083 bitmap_set(&asid_bitmap
[0], vasid
);
3084 pmap_simple_unlock(&asid_lock
);
3088 #ifndef PMAP_PV_LOAD_FACTOR
3089 #define PMAP_PV_LOAD_FACTOR 1
3092 #define PV_LOW_WATER_MARK_DEFAULT (0x200 * PMAP_PV_LOAD_FACTOR)
3093 #define PV_KERN_LOW_WATER_MARK_DEFAULT (0x200 * PMAP_PV_LOAD_FACTOR)
3094 #define PV_ALLOC_CHUNK_INITIAL (0x200 * PMAP_PV_LOAD_FACTOR)
3095 #define PV_KERN_ALLOC_CHUNK_INITIAL (0x200 * PMAP_PV_LOAD_FACTOR)
3096 #define PV_ALLOC_INITIAL_TARGET (PV_ALLOC_CHUNK_INITIAL * 5)
3097 #define PV_KERN_ALLOC_INITIAL_TARGET (PV_KERN_ALLOC_CHUNK_INITIAL)
3099 uint32_t pv_free_count MARK_AS_PMAP_DATA
= 0;
3100 uint32_t pv_page_count MARK_AS_PMAP_DATA
= 0;
3101 uint32_t pv_kern_free_count MARK_AS_PMAP_DATA
= 0;
3103 uint32_t pv_low_water_mark MARK_AS_PMAP_DATA
;
3104 uint32_t pv_kern_low_water_mark MARK_AS_PMAP_DATA
;
3105 uint32_t pv_alloc_chunk MARK_AS_PMAP_DATA
;
3106 uint32_t pv_kern_alloc_chunk MARK_AS_PMAP_DATA
;
3108 thread_t mapping_replenish_thread
;
3109 event_t mapping_replenish_event
;
3110 volatile uint32_t mappingrecurse
= 0;
3112 unsigned pmap_mapping_thread_wakeups
;
3113 unsigned pmap_reserve_replenish_stat MARK_AS_PMAP_DATA
;
3114 unsigned pmap_kern_reserve_alloc_stat MARK_AS_PMAP_DATA
;
3121 simple_lock_init(&pv_free_list_lock
, 0);
3122 simple_lock_init(&pv_kern_free_list_lock
, 0);
3123 pv_free_list
= PV_ENTRY_NULL
;
3124 pv_free_count
= 0x0U
;
3125 pv_kern_free_list
= PV_ENTRY_NULL
;
3126 pv_kern_free_count
= 0x0U
;
3129 static inline void PV_ALLOC(pv_entry_t
**pv_ep
);
3130 static inline void PV_KERN_ALLOC(pv_entry_t
**pv_e
);
3131 static inline void PV_FREE_LIST(pv_entry_t
*pv_eh
, pv_entry_t
*pv_et
, int pv_cnt
, uint32_t kern_target
);
3140 PMAP_ASSERT_LOCKED(pmap
);
3142 ASSERT_PVH_LOCKED(pai
);
3144 if (PV_ENTRY_NULL
== *pvepp
) {
3145 if ((pmap
== NULL
) || (kernel_pmap
== pmap
)) {
3146 PV_KERN_ALLOC(pvepp
);
3148 if (PV_ENTRY_NULL
== *pvepp
) {
3162 ret
= pmap_pages_alloc(&pa
, PAGE_SIZE
, PMAP_PAGES_ALLOCATE_NOWAIT
);
3164 if (ret
== KERN_RESOURCE_SHORTAGE
) {
3165 ret
= pmap_pages_alloc(&pa
, PAGE_SIZE
, PMAP_PAGES_RECLAIM_NOWAIT
);
3168 if (ret
!= KERN_SUCCESS
) {
3169 panic("%s: failed to alloc page for kernel, ret=%d, "
3170 "pmap=%p, pai=%u, pvepp=%p",
3177 pv_e
= (pv_entry_t
*)phystokv(pa
);
3179 pv_eh
= pv_et
= PV_ENTRY_NULL
;
3183 for (j
= 1; j
< (PAGE_SIZE
/ sizeof(pv_entry_t
)); j
++) {
3184 pv_e
->pve_next
= pv_eh
;
3187 if (pv_et
== PV_ENTRY_NULL
) {
3193 PV_FREE_LIST(pv_eh
, pv_et
, pv_cnt
, pv_kern_low_water_mark
);
3214 * The PPL has no guarantee that its allocation
3215 * will succeed, so steal pages if necessary to
3216 * ensure that we can free up a PV allocation.
3218 ret
= pmap_pages_alloc(&pa
, PAGE_SIZE
, PMAP_PAGES_ALLOCATE_NOWAIT
);
3220 if (ret
== KERN_RESOURCE_SHORTAGE
) {
3221 ret
= pmap_pages_alloc(&pa
, PAGE_SIZE
, PMAP_PAGES_RECLAIM_NOWAIT
);
3224 ret
= pmap_pages_alloc(&pa
, PAGE_SIZE
, 0);
3227 if (ret
!= KERN_SUCCESS
) {
3228 panic("%s: failed to alloc page, ret=%d, "
3229 "pmap=%p, pai=%u, pvepp=%p",
3236 pv_e
= (pv_entry_t
*)phystokv(pa
);
3238 pv_eh
= pv_et
= PV_ENTRY_NULL
;
3242 for (j
= 1; j
< (PAGE_SIZE
/ sizeof(pv_entry_t
)); j
++) {
3243 pv_e
->pve_next
= pv_eh
;
3246 if (pv_et
== PV_ENTRY_NULL
) {
3253 PV_FREE_LIST(pv_eh
, pv_et
, pv_cnt
, pv_kern_low_water_mark
);
3260 assert(PV_ENTRY_NULL
!= *pvepp
);
3268 PV_FREE_LIST(pvep
, pvep
, 1, pv_kern_low_water_mark
);
3277 PV_FREE_LIST(pvehp
, pvetp
, cnt
, pv_kern_low_water_mark
);
3281 pv_water_mark_check(void)
3283 if (__improbable((pv_free_count
< pv_low_water_mark
) || (pv_kern_free_count
< pv_kern_low_water_mark
))) {
3284 if (!mappingrecurse
&& os_atomic_cmpxchg(&mappingrecurse
, 0, 1, acq_rel
)) {
3285 thread_wakeup(&mapping_replenish_event
);
3291 PV_ALLOC(pv_entry_t
**pv_ep
)
3293 assert(*pv_ep
== PV_ENTRY_NULL
);
3295 if (pv_kern_free_count
< pv_kern_low_water_mark
) {
3297 * If the kernel reserved pool is low, let non-kernel mappings wait for a page
3303 pmap_simple_lock(&pv_free_list_lock
);
3305 if ((*pv_ep
= pv_free_list
) != 0) {
3306 pv_free_list
= (pv_entry_t
*)(*pv_ep
)->pve_next
;
3307 (*pv_ep
)->pve_next
= PV_ENTRY_NULL
;
3311 pmap_simple_unlock(&pv_free_list_lock
);
3315 PV_FREE_LIST(pv_entry_t
*pv_eh
, pv_entry_t
*pv_et
, int pv_cnt
, uint32_t kern_target
)
3317 bool use_kernel_list
= false;
3318 pmap_simple_lock(&pv_kern_free_list_lock
);
3319 if (pv_kern_free_count
< kern_target
) {
3320 pv_et
->pve_next
= pv_kern_free_list
;
3321 pv_kern_free_list
= pv_eh
;
3322 pv_kern_free_count
+= pv_cnt
;
3323 use_kernel_list
= true;
3325 pmap_simple_unlock(&pv_kern_free_list_lock
);
3327 if (!use_kernel_list
) {
3328 pmap_simple_lock(&pv_free_list_lock
);
3329 pv_et
->pve_next
= (pv_entry_t
*)pv_free_list
;
3330 pv_free_list
= pv_eh
;
3331 pv_free_count
+= pv_cnt
;
3332 pmap_simple_unlock(&pv_free_list_lock
);
3337 PV_KERN_ALLOC(pv_entry_t
**pv_e
)
3339 assert(*pv_e
== PV_ENTRY_NULL
);
3340 pmap_simple_lock(&pv_kern_free_list_lock
);
3342 if ((*pv_e
= pv_kern_free_list
) != 0) {
3343 pv_kern_free_list
= (pv_entry_t
*)(*pv_e
)->pve_next
;
3344 (*pv_e
)->pve_next
= PV_ENTRY_NULL
;
3345 pv_kern_free_count
--;
3346 pmap_kern_reserve_alloc_stat
++;
3349 pmap_simple_unlock(&pv_kern_free_list_lock
);
3353 * Creates a target number of free pv_entry_t objects for the kernel free list
3354 * and the general free list.
3356 MARK_AS_PMAP_TEXT
static kern_return_t
3357 mapping_free_prime_internal(void)
3359 SECURITY_READ_ONLY_LATE(static boolean_t
) mapping_free_prime_internal_called
= FALSE
;
3360 SECURITY_READ_ONLY_LATE(static boolean_t
) mapping_free_prime_internal_done
= FALSE
;
3362 if (mapping_free_prime_internal_done
) {
3363 return KERN_FAILURE
;
3366 if (!mapping_free_prime_internal_called
) {
3367 mapping_free_prime_internal_called
= TRUE
;
3369 pv_low_water_mark
= PV_LOW_WATER_MARK_DEFAULT
;
3371 /* Alterable via sysctl */
3372 pv_kern_low_water_mark
= PV_KERN_LOW_WATER_MARK_DEFAULT
;
3374 pv_kern_alloc_chunk
= PV_KERN_ALLOC_CHUNK_INITIAL
;
3375 pv_alloc_chunk
= PV_ALLOC_CHUNK_INITIAL
;
3378 return mapping_replenish_internal(PV_KERN_ALLOC_INITIAL_TARGET
, PV_ALLOC_INITIAL_TARGET
);
3382 mapping_free_prime(void)
3384 kern_return_t kr
= KERN_FAILURE
;
3390 * Allocate the needed PPL pages up front, to minimize the change that
3391 * we will need to call into the PPL multiple times.
3393 for (i
= 0; i
< PV_ALLOC_INITIAL_TARGET
; i
+= (PAGE_SIZE
/ sizeof(pv_entry_t
))) {
3394 pmap_alloc_page_for_ppl();
3397 for (i
= 0; i
< PV_KERN_ALLOC_INITIAL_TARGET
; i
+= (PAGE_SIZE
/ sizeof(pv_entry_t
))) {
3398 pmap_alloc_page_for_ppl();
3401 while ((kr
= mapping_free_prime_ppl()) == KERN_RESOURCE_SHORTAGE
) {
3402 pmap_alloc_page_for_ppl();
3405 kr
= mapping_free_prime_internal();
3408 if (kr
!= KERN_SUCCESS
) {
3409 panic("%s: failed, kr=%d",
3414 void mapping_replenish(void);
3417 mapping_adjust(void)
3421 mres
= kernel_thread_start_priority((thread_continue_t
)mapping_replenish
, NULL
, MAXPRI_KERNEL
, &mapping_replenish_thread
);
3422 if (mres
!= KERN_SUCCESS
) {
3423 panic("%s: mapping_replenish thread creation failed",
3426 thread_deallocate(mapping_replenish_thread
);
3430 * Fills the kernel and general PV free lists back up to their low watermarks.
3432 MARK_AS_PMAP_TEXT
static kern_return_t
3433 mapping_replenish_internal(uint32_t kern_target_count
, uint32_t user_target_count
)
3441 kern_return_t ret
= KERN_SUCCESS
;
3443 while ((pv_free_count
< user_target_count
) || (pv_kern_free_count
< kern_target_count
)) {
3445 pv_eh
= pv_et
= PV_ENTRY_NULL
;
3448 if ((ret
= pmap_pages_alloc(&pa
, PAGE_SIZE
, PMAP_PAGES_ALLOCATE_NOWAIT
)) != KERN_SUCCESS
) {
3452 ret
= pmap_pages_alloc(&pa
, PAGE_SIZE
, 0);
3453 assert(ret
== KERN_SUCCESS
);
3458 pv_e
= (pv_entry_t
*)phystokv(pa
);
3460 for (j
= 0; j
< (PAGE_SIZE
/ sizeof(pv_entry_t
)); j
++) {
3461 pv_e
->pve_next
= pv_eh
;
3464 if (pv_et
== PV_ENTRY_NULL
) {
3470 pmap_reserve_replenish_stat
+= pv_cnt
;
3471 PV_FREE_LIST(pv_eh
, pv_et
, pv_cnt
, kern_target_count
);
3478 * Continuation function that keeps the PV free lists from running out of free
3481 __attribute__((noreturn
))
3483 mapping_replenish(void)
3487 /* We qualify for VM privileges...*/
3488 current_thread()->options
|= TH_OPT_VMPRIV
;
3493 while ((kr
= mapping_replenish_ppl(pv_kern_low_water_mark
, pv_low_water_mark
)) == KERN_RESOURCE_SHORTAGE
) {
3494 pmap_alloc_page_for_ppl();
3497 kr
= mapping_replenish_internal(pv_kern_low_water_mark
, pv_low_water_mark
);
3500 if (kr
!= KERN_SUCCESS
) {
3501 panic("%s: failed, kr=%d", __FUNCTION__
, kr
);
3504 /* Check if the kernel pool has been depleted since the
3505 * first pass, to reduce refill latency.
3507 if (pv_kern_free_count
< pv_kern_low_water_mark
) {
3510 /* Block sans continuation to avoid yielding kernel stack */
3511 assert_wait(&mapping_replenish_event
, THREAD_UNINT
);
3513 thread_block(THREAD_CONTINUE_NULL
);
3514 pmap_mapping_thread_wakeups
++;
3522 unsigned int ptd_cnt
)
3524 simple_lock_init(&ptd_free_list_lock
, 0);
3525 while (ptd_cnt
!= 0) {
3526 (*(void **)ptdp
) = (void *)ptd_free_list
;
3527 ptd_free_list
= ptdp
;
3532 ptd_preboot
= FALSE
;
3536 ptd_alloc_unlinked(bool reclaim
)
3542 pmap_simple_lock(&ptd_free_list_lock
);
3545 if (ptd_free_count
== 0) {
3546 unsigned int ptd_cnt
;
3547 pt_desc_t
*ptdp_next
;
3550 ptdp
= (pt_desc_t
*)avail_start
;
3551 avail_start
+= ARM_PGBYTES
;
3553 ptd_cnt
= ARM_PGBYTES
/ sizeof(pt_desc_t
);
3558 pmap_simple_unlock(&ptd_free_list_lock
);
3560 if (pmap_pages_alloc(&pa
, PAGE_SIZE
, PMAP_PAGES_ALLOCATE_NOWAIT
) != KERN_SUCCESS
) {
3562 ret
= pmap_pages_alloc(&pa
, PAGE_SIZE
, PMAP_PAGES_RECLAIM_NOWAIT
);
3563 assert(ret
== KERN_SUCCESS
);
3568 ptdp
= (pt_desc_t
*)phystokv(pa
);
3570 pmap_simple_lock(&ptd_free_list_lock
);
3572 ptd_cnt
= PAGE_SIZE
/ sizeof(pt_desc_t
);
3575 while (ptd_cnt
!= 0) {
3576 (*(void **)ptdp_next
) = (void *)ptd_free_list
;
3577 ptd_free_list
= ptdp_next
;
3584 if ((ptdp
= ptd_free_list
) != PTD_ENTRY_NULL
) {
3585 ptd_free_list
= (pt_desc_t
*)(*(void **)ptdp
);
3588 panic("%s: out of ptd entry",
3593 pmap_simple_unlock(&ptd_free_list_lock
);
3596 ptdp
->pt_page
.next
= NULL
;
3597 ptdp
->pt_page
.prev
= NULL
;
3600 for (i
= 0; i
< PT_INDEX_MAX
; i
++) {
3601 ptdp
->ptd_info
[i
].va
= (vm_offset_t
)-1;
3602 ptdp
->ptd_info
[i
].refcnt
= 0;
3603 ptdp
->ptd_info
[i
].wiredcnt
= 0;
3609 static inline pt_desc_t
*
3610 ptd_alloc(pmap_t pmap
, bool reclaim
)
3612 pt_desc_t
*ptdp
= ptd_alloc_unlinked(reclaim
);
3619 if (pmap
!= kernel_pmap
) {
3620 /* We should never try to reclaim kernel pagetable pages in
3621 * pmap_pages_reclaim(), so don't enter them into the list. */
3622 pmap_simple_lock(&pt_pages_lock
);
3623 queue_enter(&pt_page_list
, ptdp
, pt_desc_t
*, pt_page
);
3624 pmap_simple_unlock(&pt_pages_lock
);
3627 pmap_tt_ledger_credit(pmap
, sizeof(*ptdp
));
3632 ptd_deallocate(pt_desc_t
*ptdp
)
3634 pmap_t pmap
= ptdp
->pmap
;
3637 panic("%s: early boot, "
3643 if (ptdp
->pt_page
.next
!= NULL
) {
3644 pmap_simple_lock(&pt_pages_lock
);
3645 queue_remove(&pt_page_list
, ptdp
, pt_desc_t
*, pt_page
);
3646 pmap_simple_unlock(&pt_pages_lock
);
3648 pmap_simple_lock(&ptd_free_list_lock
);
3649 (*(void **)ptdp
) = (void *)ptd_free_list
;
3650 ptd_free_list
= (pt_desc_t
*)ptdp
;
3652 pmap_simple_unlock(&ptd_free_list_lock
);
3654 pmap_tt_ledger_debit(pmap
, sizeof(*ptdp
));
3662 vm_map_address_t va
,
3666 if (ptdp
->pmap
!= pmap
) {
3667 panic("%s: pmap mismatch, "
3668 "ptdp=%p, pmap=%p, va=%p, level=%u, pte_p=%p",
3670 ptdp
, pmap
, (void*)va
, level
, pte_p
);
3673 #if (__ARM_VMSA__ == 7)
3675 ptdp
->ptd_info
[ARM_PT_DESC_INDEX(pte_p
)].va
= (vm_offset_t
) va
& ~(ARM_TT_L1_PT_OFFMASK
);
3677 assert(level
> pt_attr_root_level(pmap_get_pt_attr(pmap
)));
3678 ptdp
->ptd_info
[ARM_PT_DESC_INDEX(pte_p
)].va
= (vm_offset_t
) va
& ~(pt_attr_ln_offmask(pmap_get_pt_attr(pmap
), level
- 1));
3680 if (level
< PMAP_TT_MAX_LEVEL
) {
3681 ptdp
->ptd_info
[ARM_PT_DESC_INDEX(pte_p
)].refcnt
= PT_DESC_REFCOUNT
;
3690 return pa_valid(addr
);
3693 #if (__ARM_VMSA__ == 7)
3696 * Given an offset and a map, compute the address of the
3697 * corresponding translation table entry.
3699 static inline tt_entry_t
*
3700 pmap_tte(pmap_t pmap
,
3701 vm_map_address_t addr
)
3703 __unused
const pt_attr_t
* const pt_attr
= pmap_get_pt_attr(pmap
);
3705 if (!(tte_index(pmap
, pt_attr
, addr
) < pmap
->tte_index_max
)) {
3706 return (tt_entry_t
*)NULL
;
3708 return &pmap
->tte
[tte_index(pmap
, pt_attr
, addr
)];
3713 * Given an offset and a map, compute the address of the
3714 * pte. If the address is invalid with respect to the map
3715 * then PT_ENTRY_NULL is returned (and the map may need to grow).
3717 * This is only used internally.
3719 static inline pt_entry_t
*
3722 vm_map_address_t addr
)
3728 ttp
= pmap_tte(pmap
, addr
);
3729 if (ttp
== (tt_entry_t
*)NULL
) {
3730 return PT_ENTRY_NULL
;
3734 if ((tte
& ARM_TTE_TYPE_MASK
) == ARM_TTE_TYPE_BLOCK
) {
3735 panic("%s: Attempt to demote L1 block, tte=0x%lx, "
3737 __FUNCTION__
, (unsigned long)tte
,
3741 if ((tte
& ARM_TTE_TYPE_MASK
) != ARM_TTE_TYPE_TABLE
) {
3742 return PT_ENTRY_NULL
;
3744 ptp
= (pt_entry_t
*) ttetokv(tte
) + ptenum(addr
);
3748 __unused
static inline tt_entry_t
*
3749 pmap_ttne(pmap_t pmap
,
3750 unsigned int target_level
,
3751 vm_map_address_t addr
)
3753 tt_entry_t
* ret_ttep
= NULL
;
3755 switch (target_level
) {
3757 ret_ttep
= pmap_tte(pmap
, addr
);
3760 ret_ttep
= (tt_entry_t
*)pmap_pte(pmap
, addr
);
3763 panic("%s: bad level, "
3764 "pmap=%p, target_level=%u, addr=%p",
3766 pmap
, target_level
, (void *)addr
);
3774 static inline tt_entry_t
*
3775 pmap_ttne(pmap_t pmap
,
3776 unsigned int target_level
,
3777 vm_map_address_t addr
)
3779 tt_entry_t
* ttp
= NULL
;
3780 tt_entry_t
* ttep
= NULL
;
3781 tt_entry_t tte
= ARM_TTE_EMPTY
;
3782 unsigned int cur_level
;
3784 const pt_attr_t
* const pt_attr
= pmap_get_pt_attr(pmap
);
3788 assert(target_level
<= pt_attr
->pta_max_level
);
3790 for (cur_level
= pt_attr
->pta_root_level
; cur_level
<= target_level
; cur_level
++) {
3791 ttep
= &ttp
[ttn_index(pmap
, pt_attr
, addr
, cur_level
)];
3793 if (cur_level
== target_level
) {
3800 if ((tte
& (ARM_TTE_TYPE_MASK
| ARM_TTE_VALID
)) == (ARM_TTE_TYPE_BLOCK
| ARM_TTE_VALID
)) {
3801 panic("%s: Attempt to demote L%u block, tte=0x%llx, "
3802 "pmap=%p, target_level=%u, addr=%p",
3803 __FUNCTION__
, cur_level
, tte
,
3804 pmap
, target_level
, (void*)addr
);
3807 if ((tte
& (ARM_TTE_TYPE_MASK
| ARM_TTE_VALID
)) != (ARM_TTE_TYPE_TABLE
| ARM_TTE_VALID
)) {
3808 return TT_ENTRY_NULL
;
3811 ttp
= (tt_entry_t
*)phystokv(tte
& ARM_TTE_TABLE_MASK
);
3818 * Given an offset and a map, compute the address of level 1 translation table entry.
3819 * If the tranlation is invalid then PT_ENTRY_NULL is returned.
3821 static inline tt_entry_t
*
3822 pmap_tt1e(pmap_t pmap
,
3823 vm_map_address_t addr
)
3825 return pmap_ttne(pmap
, PMAP_TT_L1_LEVEL
, addr
);
3829 * Given an offset and a map, compute the address of level 2 translation table entry.
3830 * If the tranlation is invalid then PT_ENTRY_NULL is returned.
3832 static inline tt_entry_t
*
3833 pmap_tt2e(pmap_t pmap
,
3834 vm_map_address_t addr
)
3836 return pmap_ttne(pmap
, PMAP_TT_L2_LEVEL
, addr
);
3841 * Given an offset and a map, compute the address of level 3 translation table entry.
3842 * If the tranlation is invalid then PT_ENTRY_NULL is returned.
3844 static inline pt_entry_t
*
3847 vm_map_address_t addr
)
3849 return (pt_entry_t
*)pmap_ttne(pmap
, PMAP_TT_L3_LEVEL
, addr
);
3852 static inline tt_entry_t
*
3855 vm_map_address_t addr
)
3857 return pmap_tt2e(pmap
, addr
);
3860 static inline pt_entry_t
*
3863 vm_map_address_t addr
)
3865 return pmap_tt3e(pmap
, addr
);
3870 #if __APRR_SUPPORTED__
3872 * Indicates whether the given PTE has special restrictions due to the current
3876 is_pte_aprr_protected(pt_entry_t pte
)
3878 uint64_t aprr_el0_value
;
3879 uint64_t aprr_el1_value
;
3880 uint64_t aprr_index
;
3882 MRS(aprr_el0_value
, APRR_EL0
);
3883 MRS(aprr_el1_value
, APRR_EL1
);
3884 aprr_index
= PTE_TO_APRR_INDEX(pte
);
3886 /* Check to see if this mapping had APRR restrictions. */
3887 if ((APRR_EXTRACT_IDX_ATTR(aprr_el0_value
, aprr_index
) != APRR_EXTRACT_IDX_ATTR(APRR_EL0_RESET
, aprr_index
)) ||
3888 (APRR_EXTRACT_IDX_ATTR(aprr_el1_value
, aprr_index
) != APRR_EXTRACT_IDX_ATTR(APRR_EL1_RESET
, aprr_index
))
3895 #endif /* __APRR_SUPPORTED__ */
3898 #if __APRR_SUPPORTED__
3900 is_pte_xprr_protected(pt_entry_t pte
)
3902 #if __APRR_SUPPORTED__
3903 return is_pte_aprr_protected(pte
);
3904 #else /* __APRR_SUPPORTED__ */
3905 #error "XPRR configuration error"
3906 #endif /* __APRR_SUPPORTED__ */
3908 #endif /* __APRR_SUPPORTED__*/
3910 #if __APRR_SUPPORTED__
3912 __unused
pte_to_xprr_perm(pt_entry_t pte
)
3914 #if __APRR_SUPPORTED__
3915 switch (PTE_TO_APRR_INDEX(pte
)) {
3916 case APRR_FIRM_RX_INDEX
: return XPRR_FIRM_RX_PERM
;
3917 case APRR_FIRM_RO_INDEX
: return XPRR_FIRM_RO_PERM
;
3918 case APRR_PPL_RW_INDEX
: return XPRR_PPL_RW_PERM
;
3919 case APRR_KERN_RW_INDEX
: return XPRR_KERN_RW_PERM
;
3920 case APRR_FIRM_RW_INDEX
: return XPRR_FIRM_RW_PERM
;
3921 case APRR_KERN0_RW_INDEX
: return XPRR_KERN0_RW_PERM
;
3922 case APRR_USER_JIT_INDEX
: return XPRR_USER_JIT_PERM
;
3923 case APRR_USER_RW_INDEX
: return XPRR_USER_RW_PERM
;
3924 case APRR_PPL_RX_INDEX
: return XPRR_PPL_RX_PERM
;
3925 case APRR_KERN_RX_INDEX
: return XPRR_KERN_RX_PERM
;
3926 case APRR_USER_XO_INDEX
: return XPRR_USER_XO_PERM
;
3927 case APRR_KERN_RO_INDEX
: return XPRR_KERN_RO_PERM
;
3928 case APRR_KERN0_RX_INDEX
: return XPRR_KERN0_RO_PERM
;
3929 case APRR_KERN0_RO_INDEX
: return XPRR_KERN0_RO_PERM
;
3930 case APRR_USER_RX_INDEX
: return XPRR_USER_RX_PERM
;
3931 case APRR_USER_RO_INDEX
: return XPRR_USER_RO_PERM
;
3932 default: return XPRR_MAX_PERM
;
3935 #error "XPRR configuration error"
3939 #if __APRR_SUPPORTED__
3941 xprr_perm_to_aprr_index(uint64_t perm
)
3944 case XPRR_FIRM_RX_PERM
: return APRR_FIRM_RX_INDEX
;
3945 case XPRR_FIRM_RO_PERM
: return APRR_FIRM_RO_INDEX
;
3946 case XPRR_PPL_RW_PERM
: return APRR_PPL_RW_INDEX
;
3947 case XPRR_KERN_RW_PERM
: return APRR_KERN_RW_INDEX
;
3948 case XPRR_FIRM_RW_PERM
: return APRR_FIRM_RW_INDEX
;
3949 case XPRR_KERN0_RW_PERM
: return APRR_KERN0_RW_INDEX
;
3950 case XPRR_USER_JIT_PERM
: return APRR_USER_JIT_INDEX
;
3951 case XPRR_USER_RW_PERM
: return APRR_USER_RW_INDEX
;
3952 case XPRR_PPL_RX_PERM
: return APRR_PPL_RX_INDEX
;
3953 case XPRR_KERN_RX_PERM
: return APRR_KERN_RX_INDEX
;
3954 case XPRR_USER_XO_PERM
: return APRR_USER_XO_INDEX
;
3955 case XPRR_KERN_RO_PERM
: return APRR_KERN_RO_INDEX
;
3956 case XPRR_KERN0_RX_PERM
: return APRR_KERN0_RO_INDEX
;
3957 case XPRR_KERN0_RO_PERM
: return APRR_KERN0_RO_INDEX
;
3958 case XPRR_USER_RX_PERM
: return APRR_USER_RX_INDEX
;
3959 case XPRR_USER_RO_PERM
: return APRR_USER_RO_INDEX
;
3960 default: return APRR_MAX_INDEX
;
3963 #endif /* __APRR_SUPPORTED__ */
3966 __unused
xprr_perm_to_pte(uint64_t perm
)
3968 #if __APRR_SUPPORTED__
3969 return APRR_INDEX_TO_PTE(xprr_perm_to_aprr_index(perm
));
3971 #error "XPRR configuration error"
3974 #endif /* __APRR_SUPPORTED__*/
3978 * Map memory at initialization. The physical addresses being
3979 * mapped are not managed and are never unmapped.
3981 * For now, VM is already on, we only need to map the
3986 vm_map_address_t virt
,
3996 while (start
< end
) {
3997 kr
= pmap_enter(kernel_pmap
, virt
, (ppnum_t
)atop(start
),
3998 prot
, VM_PROT_NONE
, flags
, FALSE
);
4000 if (kr
!= KERN_SUCCESS
) {
4001 panic("%s: failed pmap_enter, "
4002 "virt=%p, start_addr=%p, end_addr=%p, prot=%#x, flags=%#x",
4004 (void *) virt
, (void *) start
, (void *) end
, prot
, flags
);
4014 pmap_map_bd_with_options(
4015 vm_map_address_t virt
,
4023 vm_map_address_t vaddr
;
4025 pt_entry_t mem_attr
;
4027 switch (options
& PMAP_MAP_BD_MASK
) {
4028 case PMAP_MAP_BD_WCOMB
:
4029 mem_attr
= ARM_PTE_ATTRINDX(CACHE_ATTRINDX_WRITECOMB
);
4030 #if (__ARM_VMSA__ > 7)
4031 mem_attr
|= ARM_PTE_SH(SH_OUTER_MEMORY
);
4033 mem_attr
|= ARM_PTE_SH
;
4036 case PMAP_MAP_BD_POSTED
:
4037 mem_attr
= ARM_PTE_ATTRINDX(CACHE_ATTRINDX_POSTED
);
4039 case PMAP_MAP_BD_POSTED_REORDERED
:
4040 mem_attr
= ARM_PTE_ATTRINDX(CACHE_ATTRINDX_POSTED_REORDERED
);
4042 case PMAP_MAP_BD_POSTED_COMBINED_REORDERED
:
4043 mem_attr
= ARM_PTE_ATTRINDX(CACHE_ATTRINDX_POSTED_COMBINED_REORDERED
);
4046 mem_attr
= ARM_PTE_ATTRINDX(CACHE_ATTRINDX_DISABLE
);
4050 tmplate
= pa_to_pte(start
) | ARM_PTE_AP((prot
& VM_PROT_WRITE
) ? AP_RWNA
: AP_RONA
) |
4051 mem_attr
| ARM_PTE_TYPE
| ARM_PTE_NX
| ARM_PTE_PNX
| ARM_PTE_AF
;
4052 #if __ARM_KERNEL_PROTECT__
4053 tmplate
|= ARM_PTE_NG
;
4054 #endif /* __ARM_KERNEL_PROTECT__ */
4058 while (paddr
< end
) {
4059 ptep
= pmap_pte(kernel_pmap
, vaddr
);
4060 if (ptep
== PT_ENTRY_NULL
) {
4061 panic("%s: no PTE for vaddr=%p, "
4062 "virt=%p, start=%p, end=%p, prot=0x%x, options=0x%x",
4063 __FUNCTION__
, (void*)vaddr
,
4064 (void*)virt
, (void*)start
, (void*)end
, prot
, options
);
4067 assert(!ARM_PTE_IS_COMPRESSED(*ptep
, ptep
));
4068 WRITE_PTE_STRONG(ptep
, tmplate
);
4070 pte_increment_pa(tmplate
);
4076 flush_mmu_tlb_region(virt
, (unsigned)(end
- start
));
4083 * Back-door routine for mapping kernel VM at initialization.
4084 * Useful for mapping memory outside the range
4085 * [vm_first_phys, vm_last_phys] (i.e., devices).
4086 * Otherwise like pmap_map.
4090 vm_map_address_t virt
,
4097 vm_map_address_t vaddr
;
4100 /* not cacheable and not buffered */
4101 tmplate
= pa_to_pte(start
)
4102 | ARM_PTE_TYPE
| ARM_PTE_AF
| ARM_PTE_NX
| ARM_PTE_PNX
4103 | ARM_PTE_AP((prot
& VM_PROT_WRITE
) ? AP_RWNA
: AP_RONA
)
4104 | ARM_PTE_ATTRINDX(CACHE_ATTRINDX_DISABLE
);
4105 #if __ARM_KERNEL_PROTECT__
4106 tmplate
|= ARM_PTE_NG
;
4107 #endif /* __ARM_KERNEL_PROTECT__ */
4111 while (paddr
< end
) {
4112 ptep
= pmap_pte(kernel_pmap
, vaddr
);
4113 if (ptep
== PT_ENTRY_NULL
) {
4114 panic("pmap_map_bd");
4116 assert(!ARM_PTE_IS_COMPRESSED(*ptep
, ptep
));
4117 WRITE_PTE_STRONG(ptep
, tmplate
);
4119 pte_increment_pa(tmplate
);
4125 flush_mmu_tlb_region(virt
, (unsigned)(end
- start
));
4132 * Back-door routine for mapping kernel VM at initialization.
4133 * Useful for mapping memory specific physical addresses in early
4134 * boot (i.e., before kernel_map is initialized).
4136 * Maps are in the VM_HIGH_KERNEL_WINDOW area.
4140 pmap_map_high_window_bd(
4141 vm_offset_t pa_start
,
4145 pt_entry_t
*ptep
, pte
;
4146 #if (__ARM_VMSA__ == 7)
4147 vm_map_address_t va_start
= VM_HIGH_KERNEL_WINDOW
;
4148 vm_map_address_t va_max
= VM_MAX_KERNEL_ADDRESS
;
4150 vm_map_address_t va_start
= VREGION1_START
;
4151 vm_map_address_t va_max
= VREGION1_START
+ VREGION1_SIZE
;
4153 vm_map_address_t va_end
;
4154 vm_map_address_t va
;
4157 offset
= pa_start
& PAGE_MASK
;
4161 if (len
> (va_max
- va_start
)) {
4162 panic("%s: area too large, "
4163 "pa_start=%p, len=%p, prot=0x%x",
4165 (void*)pa_start
, (void*)len
, prot
);
4169 for (; va_start
< va_max
; va_start
+= PAGE_SIZE
) {
4170 ptep
= pmap_pte(kernel_pmap
, va_start
);
4171 assert(!ARM_PTE_IS_COMPRESSED(*ptep
, ptep
));
4172 if (*ptep
== ARM_PTE_TYPE_FAULT
) {
4176 if (va_start
> va_max
) {
4177 panic("%s: insufficient pages, "
4178 "pa_start=%p, len=%p, prot=0x%x",
4180 (void*)pa_start
, (void*)len
, prot
);
4183 for (va_end
= va_start
+ PAGE_SIZE
; va_end
< va_start
+ len
; va_end
+= PAGE_SIZE
) {
4184 ptep
= pmap_pte(kernel_pmap
, va_end
);
4185 assert(!ARM_PTE_IS_COMPRESSED(*ptep
, ptep
));
4186 if (*ptep
!= ARM_PTE_TYPE_FAULT
) {
4187 va_start
= va_end
+ PAGE_SIZE
;
4192 for (va
= va_start
; va
< va_end
; va
+= PAGE_SIZE
, pa_start
+= PAGE_SIZE
) {
4193 ptep
= pmap_pte(kernel_pmap
, va
);
4194 pte
= pa_to_pte(pa_start
)
4195 | ARM_PTE_TYPE
| ARM_PTE_AF
| ARM_PTE_NX
| ARM_PTE_PNX
4196 | ARM_PTE_AP((prot
& VM_PROT_WRITE
) ? AP_RWNA
: AP_RONA
)
4197 | ARM_PTE_ATTRINDX(CACHE_ATTRINDX_DEFAULT
);
4198 #if (__ARM_VMSA__ > 7)
4199 pte
|= ARM_PTE_SH(SH_OUTER_MEMORY
);
4203 #if __ARM_KERNEL_PROTECT__
4205 #endif /* __ARM_KERNEL_PROTECT__ */
4206 WRITE_PTE_STRONG(ptep
, pte
);
4208 PMAP_UPDATE_TLBS(kernel_pmap
, va_start
, va_start
+ len
, false);
4210 kasan_notify_address(va_start
, len
);
4215 #define PMAP_ALIGN(addr, align) ((addr) + ((align) - 1) & ~((align) - 1))
4218 pmap_compute_io_rgns(void)
4221 pmap_io_range_t
*ranges
;
4225 unsigned int prop_size
;
4227 err
= DTLookupEntry(NULL
, "/defaults", &entry
);
4228 assert(err
== kSuccess
);
4230 if (kSuccess
!= DTGetProperty(entry
, "pmap-io-ranges", &prop
, &prop_size
)) {
4235 for (unsigned int i
= 0; i
< (prop_size
/ sizeof(*ranges
)); ++i
) {
4236 if (ranges
[i
].addr
& PAGE_MASK
) {
4237 panic("pmap I/O region %u addr 0x%llx is not page-aligned", i
, ranges
[i
].addr
);
4239 if (ranges
[i
].len
& PAGE_MASK
) {
4240 panic("pmap I/O region %u length 0x%llx is not page-aligned", i
, ranges
[i
].len
);
4242 if (os_add_overflow(ranges
[i
].addr
, ranges
[i
].len
, &rgn_end
)) {
4243 panic("pmap I/O region %u addr 0x%llx length 0x%llx wraps around", i
, ranges
[i
].addr
, ranges
[i
].len
);
4245 if (((ranges
[i
].addr
<= gPhysBase
) && (rgn_end
> gPhysBase
)) ||
4246 ((ranges
[i
].addr
< avail_end
) && (rgn_end
>= avail_end
)) ||
4247 ((ranges
[i
].addr
> gPhysBase
) && (rgn_end
< avail_end
))) {
4248 panic("pmap I/O region %u addr 0x%llx length 0x%llx overlaps physical memory", i
, ranges
[i
].addr
, ranges
[i
].len
);
4254 return num_io_rgns
* sizeof(*ranges
);
4258 * return < 0 for a < b
4262 typedef int (*cmpfunc_t
)(const void *a
, const void *b
);
4265 qsort(void *a
, size_t n
, size_t es
, cmpfunc_t cmp
);
4268 cmp_io_rgns(const void *a
, const void *b
)
4270 const pmap_io_range_t
*range_a
= a
;
4271 const pmap_io_range_t
*range_b
= b
;
4272 if ((range_b
->addr
+ range_b
->len
) <= range_a
->addr
) {
4274 } else if ((range_a
->addr
+ range_a
->len
) <= range_b
->addr
) {
4282 pmap_load_io_rgns(void)
4285 pmap_io_range_t
*ranges
;
4288 unsigned int prop_size
;
4290 if (num_io_rgns
== 0) {
4294 err
= DTLookupEntry(NULL
, "/defaults", &entry
);
4295 assert(err
== kSuccess
);
4297 err
= DTGetProperty(entry
, "pmap-io-ranges", &prop
, &prop_size
);
4298 assert(err
== kSuccess
);
4301 for (unsigned int i
= 0; i
< (prop_size
/ sizeof(*ranges
)); ++i
) {
4302 io_attr_table
[i
] = ranges
[i
];
4305 qsort(io_attr_table
, num_io_rgns
, sizeof(*ranges
), cmp_io_rgns
);
4310 * pmap_get_arm64_prot
4312 * return effective armv8 VMSA block protections including
4313 * table AP/PXN/XN overrides of a pmap entry
4318 pmap_get_arm64_prot(
4323 unsigned int level
= 0;
4324 uint64_t tte_type
= 0;
4325 uint64_t effective_prot_bits
= 0;
4326 uint64_t aggregate_tte
= 0;
4327 uint64_t table_ap_bits
= 0, table_xn
= 0, table_pxn
= 0;
4328 const pt_attr_t
* const pt_attr
= pmap_get_pt_attr(pmap
);
4330 for (level
= pt_attr
->pta_root_level
; level
<= pt_attr
->pta_max_level
; level
++) {
4331 tte
= *pmap_ttne(pmap
, level
, addr
);
4333 if (!(tte
& ARM_TTE_VALID
)) {
4337 tte_type
= tte
& ARM_TTE_TYPE_MASK
;
4339 if ((tte_type
== ARM_TTE_TYPE_BLOCK
) ||
4340 (level
== pt_attr
->pta_max_level
)) {
4341 /* Block or page mapping; both have the same protection bit layout. */
4343 } else if (tte_type
== ARM_TTE_TYPE_TABLE
) {
4344 /* All of the table bits we care about are overrides, so just OR them together. */
4345 aggregate_tte
|= tte
;
4349 table_ap_bits
= ((aggregate_tte
>> ARM_TTE_TABLE_APSHIFT
) & AP_MASK
);
4350 table_xn
= (aggregate_tte
& ARM_TTE_TABLE_XN
);
4351 table_pxn
= (aggregate_tte
& ARM_TTE_TABLE_PXN
);
4353 /* Start with the PTE bits. */
4354 effective_prot_bits
= tte
& (ARM_PTE_APMASK
| ARM_PTE_NX
| ARM_PTE_PNX
);
4356 /* Table AP bits mask out block/page AP bits */
4357 effective_prot_bits
&= ~(ARM_PTE_AP(table_ap_bits
));
4359 /* XN/PXN bits can be OR'd in. */
4360 effective_prot_bits
|= (table_xn
? ARM_PTE_NX
: 0);
4361 effective_prot_bits
|= (table_pxn
? ARM_PTE_PNX
: 0);
4363 return effective_prot_bits
;
4365 #endif /* __arm64__ */
4369 * Bootstrap the system enough to run with virtual memory.
4371 * The early VM initialization code has already allocated
4372 * the first CPU's translation table and made entries for
4373 * all the one-to-one mappings to be found there.
4375 * We must set up the kernel pmap structures, the
4376 * physical-to-virtual translation lookup tables for the
4377 * physical memory to be managed (between avail_start and
4380 * Map the kernel's code and data, and allocate the system page table.
4381 * Page_size must already be set.
4384 * first_avail first available physical page -
4385 * after kernel page tables
4386 * avail_start PA of first managed physical page
4387 * avail_end PA of last managed physical page
4394 pmap_paddr_t pmap_struct_start
;
4395 vm_size_t pv_head_size
;
4396 vm_size_t ptd_root_table_size
;
4397 vm_size_t pp_attr_table_size
;
4398 vm_size_t io_attr_table_size
;
4399 unsigned int npages
;
4400 vm_map_offset_t maxoffset
;
4402 lck_grp_init(&pmap_lck_grp
, "pmap", LCK_GRP_ATTR_NULL
);
4406 #if DEVELOPMENT || DEBUG
4407 PE_parse_boot_argn("-unsafe_kernel_text", &pmap_ppl_disable
, sizeof(pmap_ppl_disable
));
4410 simple_lock_init(&pmap_ppl_free_page_lock
, 0);
4412 #if __APRR_SUPPORTED__
4413 if (((uintptr_t)(&ppl_trampoline_start
)) % PAGE_SIZE
) {
4414 panic("%s: ppl_trampoline_start is not page aligned, "
4420 if (((uintptr_t)(&ppl_trampoline_end
)) % PAGE_SIZE
) {
4421 panic("%s: ppl_trampoline_end is not page aligned, "
4426 #endif /* __APRR_SUPPORTED__ */
4427 #endif /* XNU_MONITOR */
4429 #if DEVELOPMENT || DEBUG
4430 if (PE_parse_boot_argn("pmap_trace", &pmap_trace_mask
, sizeof(pmap_trace_mask
))) {
4431 kprintf("Kernel traces for pmap operations enabled\n");
4436 * Initialize the kernel pmap.
4439 #if ARM_PARAMETERIZED_PMAP
4440 kernel_pmap
->pmap_pt_attr
= native_pt_attr
;
4441 #endif /* ARM_PARAMETERIZED_PMAP */
4443 kernel_pmap
->disable_jop
= 0;
4444 #endif /* HAS_APPLE_PAC */
4445 kernel_pmap
->tte
= cpu_tte
;
4446 kernel_pmap
->ttep
= cpu_ttep
;
4447 #if (__ARM_VMSA__ > 7)
4448 kernel_pmap
->min
= ARM64_TTBR1_MIN_ADDR
;
4450 kernel_pmap
->min
= VM_MIN_KERNEL_AND_KEXT_ADDRESS
;
4452 kernel_pmap
->max
= VM_MAX_KERNEL_ADDRESS
;
4453 os_atomic_init(&kernel_pmap
->ref_count
, 1);
4454 kernel_pmap
->gc_status
= 0;
4455 kernel_pmap
->nx_enabled
= TRUE
;
4457 kernel_pmap
->is_64bit
= TRUE
;
4459 kernel_pmap
->is_64bit
= FALSE
;
4461 kernel_pmap
->stamp
= os_atomic_inc(&pmap_stamp
, relaxed
);
4463 kernel_pmap
->nested_region_grand_addr
= 0x0ULL
;
4464 kernel_pmap
->nested_region_subord_addr
= 0x0ULL
;
4465 kernel_pmap
->nested_region_size
= 0x0ULL
;
4466 kernel_pmap
->nested_region_asid_bitmap
= NULL
;
4467 kernel_pmap
->nested_region_asid_bitmap_size
= 0x0UL
;
4469 #if (__ARM_VMSA__ == 7)
4470 kernel_pmap
->tte_index_max
= 4 * NTTES
;
4472 kernel_pmap
->hw_asid
= 0;
4473 kernel_pmap
->sw_asid
= 0;
4475 PMAP_LOCK_INIT(kernel_pmap
);
4476 memset((void *) &kernel_pmap
->stats
, 0, sizeof(kernel_pmap
->stats
));
4478 /* allocate space for and initialize the bookkeeping structures */
4479 io_attr_table_size
= pmap_compute_io_rgns();
4480 npages
= (unsigned int)atop(mem_size
);
4481 pp_attr_table_size
= npages
* sizeof(pp_attr_t
);
4482 pv_head_size
= round_page(sizeof(pv_entry_t
*) * npages
);
4483 // allocate enough initial PTDs to map twice the available physical memory
4484 ptd_root_table_size
= sizeof(pt_desc_t
) * (mem_size
/ ((PAGE_SIZE
/ sizeof(pt_entry_t
)) * ARM_PGBYTES
)) * 2;
4486 pmap_struct_start
= avail_start
;
4488 pp_attr_table
= (pp_attr_t
*) phystokv(avail_start
);
4489 avail_start
= PMAP_ALIGN(avail_start
+ pp_attr_table_size
, __alignof(pp_attr_t
));
4490 io_attr_table
= (pmap_io_range_t
*) phystokv(avail_start
);
4491 avail_start
= PMAP_ALIGN(avail_start
+ io_attr_table_size
, __alignof(pv_entry_t
*));
4492 pv_head_table
= (pv_entry_t
**) phystokv(avail_start
);
4493 avail_start
= PMAP_ALIGN(avail_start
+ pv_head_size
, __alignof(pt_desc_t
));
4494 ptd_root_table
= (pt_desc_t
*)phystokv(avail_start
);
4495 avail_start
= round_page(avail_start
+ ptd_root_table_size
);
4497 memset((char *)phystokv(pmap_struct_start
), 0, avail_start
- pmap_struct_start
);
4499 pmap_load_io_rgns();
4500 ptd_bootstrap(ptd_root_table
, (unsigned int)(ptd_root_table_size
/ sizeof(pt_desc_t
)));
4503 pmap_array_begin
= (void *)phystokv(avail_start
);
4504 pmap_array
= pmap_array_begin
;
4505 avail_start
+= round_page(MAX_ASID
* sizeof(struct pmap
));
4506 pmap_array_end
= (void *)phystokv(avail_start
);
4508 pmap_array_count
= ((pmap_array_end
- pmap_array_begin
) / sizeof(struct pmap
));
4510 pmap_bootstrap_pmap_free_list();
4512 pmap_ledger_ptr_array_begin
= (void *)phystokv(avail_start
);
4513 pmap_ledger_ptr_array
= pmap_ledger_ptr_array_begin
;
4514 avail_start
+= round_page(MAX_PMAP_LEDGERS
* sizeof(void*));
4515 pmap_ledger_ptr_array_end
= (void *)phystokv(avail_start
);
4517 pmap_ledger_refcnt_begin
= (void *)phystokv(avail_start
);
4518 pmap_ledger_refcnt
= pmap_ledger_refcnt_begin
;
4519 avail_start
+= round_page(MAX_PMAP_LEDGERS
* sizeof(os_refcnt_t
));
4520 pmap_ledger_refcnt_end
= (void *)phystokv(avail_start
);
4522 simple_lock_init(&pmap_ledger_lock
, 0);
4524 pmap_cpu_data_array_init();
4526 vm_first_phys
= gPhysBase
;
4527 vm_last_phys
= trunc_page(avail_end
);
4529 simple_lock_init(&pmaps_lock
, 0);
4530 simple_lock_init(&asid_lock
, 0);
4531 simple_lock_init(&tt1_lock
, 0);
4532 queue_init(&map_pmap_list
);
4533 queue_enter(&map_pmap_list
, kernel_pmap
, pmap_t
, pmaps
);
4534 free_page_size_tt_list
= TT_FREE_ENTRY_NULL
;
4535 free_page_size_tt_count
= 0;
4536 free_page_size_tt_max
= 0;
4537 free_two_page_size_tt_list
= TT_FREE_ENTRY_NULL
;
4538 free_two_page_size_tt_count
= 0;
4539 free_two_page_size_tt_max
= 0;
4540 free_tt_list
= TT_FREE_ENTRY_NULL
;
4544 simple_lock_init(&pt_pages_lock
, 0);
4545 queue_init(&pt_page_list
);
4547 simple_lock_init(&pmap_pages_lock
, 0);
4548 pmap_pages_request_count
= 0;
4549 pmap_pages_request_acum
= 0;
4550 pmap_pages_reclaim_list
= PAGE_FREE_ENTRY_NULL
;
4552 virtual_space_start
= vstart
;
4553 virtual_space_end
= VM_MAX_KERNEL_ADDRESS
;
4555 bitmap_full(&asid_bitmap
[0], MAX_ASID
);
4559 if (PE_parse_boot_argn("arm_maxoffset", &maxoffset
, sizeof(maxoffset
))) {
4560 maxoffset
= trunc_page(maxoffset
);
4561 if ((maxoffset
>= pmap_max_offset(FALSE
, ARM_PMAP_MAX_OFFSET_MIN
))
4562 && (maxoffset
<= pmap_max_offset(FALSE
, ARM_PMAP_MAX_OFFSET_MAX
))) {
4563 arm_pmap_max_offset_default
= maxoffset
;
4566 #if defined(__arm64__)
4567 if (PE_parse_boot_argn("arm64_maxoffset", &maxoffset
, sizeof(maxoffset
))) {
4568 maxoffset
= trunc_page(maxoffset
);
4569 if ((maxoffset
>= pmap_max_offset(TRUE
, ARM_PMAP_MAX_OFFSET_MIN
))
4570 && (maxoffset
<= pmap_max_offset(TRUE
, ARM_PMAP_MAX_OFFSET_MAX
))) {
4571 arm64_pmap_max_offset_default
= maxoffset
;
4576 #if DEVELOPMENT || DEBUG
4577 PE_parse_boot_argn("panic_on_unsigned_execute", &panic_on_unsigned_execute
, sizeof(panic_on_unsigned_execute
));
4578 #endif /* DEVELOPMENT || DEBUG */
4580 pmap_nesting_size_min
= ARM_NESTING_SIZE_MIN
;
4581 pmap_nesting_size_max
= ARM_NESTING_SIZE_MAX
;
4583 simple_lock_init(&phys_backup_lock
, 0);
4587 PE_parse_boot_argn("pmap_stats_assert",
4589 sizeof(pmap_stats_assert
));
4590 PE_parse_boot_argn("vm_footprint_suspend_allowed",
4591 &vm_footprint_suspend_allowed
,
4592 sizeof(vm_footprint_suspend_allowed
));
4593 #endif /* MACH_ASSERT */
4596 /* Shadow the CPU copy windows, as they fall outside of the physical aperture */
4597 kasan_map_shadow(CPUWINDOWS_BASE
, CPUWINDOWS_TOP
- CPUWINDOWS_BASE
, true);
4604 pa_set_range_monitor(pmap_paddr_t start_pa
, pmap_paddr_t end_pa
)
4606 pmap_paddr_t cur_pa
;
4607 for (cur_pa
= start_pa
; cur_pa
< end_pa
; cur_pa
+= ARM_PGBYTES
) {
4608 assert(pa_valid(cur_pa
));
4609 pa_set_monitor(cur_pa
);
4614 pa_set_range_xprr_perm(pmap_paddr_t start_pa
,
4615 pmap_paddr_t end_pa
,
4616 unsigned int expected_perm
,
4617 unsigned int new_perm
)
4619 vm_offset_t start_va
= phystokv(start_pa
);
4620 vm_offset_t end_va
= start_va
+ (end_pa
- start_pa
);
4622 pa_set_range_monitor(start_pa
, end_pa
);
4623 pmap_set_range_xprr_perm(start_va
, end_va
, expected_perm
, new_perm
);
4627 pmap_static_allocations_done(void)
4629 pmap_paddr_t monitor_start_pa
;
4630 pmap_paddr_t monitor_end_pa
;
4633 * We allocate memory for bootstrap starting at topOfKernelData (which
4634 * is at the end of the device tree and ramdisk data, if applicable).
4635 * We use avail_start as a pointer to the first address that has not
4636 * been reserved for bootstrap, so we know which pages to give to the
4637 * virtual memory layer.
4639 * These bootstrap allocations will be used primarily for page tables.
4640 * If we wish to secure the page tables, we need to start by marking
4641 * these bootstrap allocations as pages that we want to protect.
4643 monitor_start_pa
= BootArgs
->topOfKernelData
;
4644 monitor_end_pa
= BootArgs
->topOfKernelData
+ BOOTSTRAP_TABLE_SIZE
;
4647 * The bootstrap page tables are mapped RO at boostrap.
4649 * Note that this function call requests switching XPRR permissions from
4650 * XPRR_KERN_RO_PERM to XPRR_KERN_RO_PERM. Whilst this may seem redundant,
4651 * pa_set_range_xprr_perm() does other things too, such as calling
4652 * pa_set_range_monitor() on the requested address range and performing a number
4653 * of integrity checks on the PTEs. We should still
4654 * call this function for all PPL-owned memory, regardless of whether
4655 * permissions are required to be changed or not.
4657 pa_set_range_xprr_perm(monitor_start_pa
, monitor_end_pa
, XPRR_KERN_RO_PERM
, XPRR_KERN_RO_PERM
);
4659 monitor_start_pa
= BootArgs
->topOfKernelData
+ BOOTSTRAP_TABLE_SIZE
;
4660 monitor_end_pa
= avail_start
;
4662 /* The other bootstrap allocations are mapped RW at bootstrap. */
4663 pa_set_range_xprr_perm(monitor_start_pa
, monitor_end_pa
, XPRR_KERN_RW_PERM
, XPRR_PPL_RW_PERM
);
4666 * The RO page tables are mapped RW at bootstrap and remain RW after the call
4667 * to pa_set_range_xprr_perm(). We do this, as opposed to using XPRR_PPL_RW_PERM,
4668 * to work around a functional issue on H11 devices where CTRR shifts the APRR
4669 * lookup table index to USER_XO before APRR is applied, hence causing the hardware
4670 * to believe we are dealing with an user XO page upon performing a translation.
4672 * Note that this workaround does not pose a security risk, because the RO
4673 * page tables still remain read-only, due to KTRR/CTRR, and further protecting
4674 * them at the APRR level would be unnecessary.
4676 monitor_start_pa
= kvtophys((vm_offset_t
)&ropagetable_begin
);
4677 monitor_end_pa
= monitor_start_pa
+ ((vm_offset_t
)&ropagetable_end
- (vm_offset_t
)&ropagetable_begin
);
4678 pa_set_range_xprr_perm(monitor_start_pa
, monitor_end_pa
, XPRR_KERN_RW_PERM
, XPRR_KERN_RW_PERM
);
4680 monitor_start_pa
= kvtophys(segPPLDATAB
);
4681 monitor_end_pa
= monitor_start_pa
+ segSizePPLDATA
;
4683 /* PPL data is RW for the PPL, RO for the kernel. */
4684 pa_set_range_xprr_perm(monitor_start_pa
, monitor_end_pa
, XPRR_KERN_RW_PERM
, XPRR_PPL_RW_PERM
);
4686 monitor_start_pa
= kvtophys(segPPLTEXTB
);
4687 monitor_end_pa
= monitor_start_pa
+ segSizePPLTEXT
;
4689 /* PPL text is RX for the PPL, RO for the kernel. */
4690 pa_set_range_xprr_perm(monitor_start_pa
, monitor_end_pa
, XPRR_KERN_RX_PERM
, XPRR_PPL_RX_PERM
);
4692 #if __APRR_SUPPORTED__
4693 monitor_start_pa
= kvtophys(segPPLTRAMPB
);
4694 monitor_end_pa
= monitor_start_pa
+ segSizePPLTRAMP
;
4697 * The PPLTRAMP pages will be a mix of PPL RX/kernel RO and
4698 * PPL RX/kernel RX. However, all of these pages belong to the PPL.
4700 pa_set_range_monitor(monitor_start_pa
, monitor_end_pa
);
4704 * In order to support DTrace, the save areas for the PPL must be
4705 * writable. This is due to the fact that DTrace will try to update
4708 if (pmap_ppl_disable
) {
4709 vm_offset_t monitor_start_va
= phystokv(ppl_cpu_save_area_start
);
4710 vm_offset_t monitor_end_va
= monitor_start_va
+ (ppl_cpu_save_area_end
- ppl_cpu_save_area_start
);
4712 pmap_set_range_xprr_perm(monitor_start_va
, monitor_end_va
, XPRR_PPL_RW_PERM
, XPRR_KERN_RW_PERM
);
4715 #if __APRR_SUPPORTED__
4716 /* The trampoline must also be specially protected. */
4717 pmap_set_range_xprr_perm((vm_offset_t
)&ppl_trampoline_start
, (vm_offset_t
)&ppl_trampoline_end
, XPRR_KERN_RX_PERM
, XPRR_PPL_RX_PERM
);
4720 if (segSizePPLDATACONST
> 0) {
4721 monitor_start_pa
= kvtophys(segPPLDATACONSTB
);
4722 monitor_end_pa
= monitor_start_pa
+ segSizePPLDATACONST
;
4724 pa_set_range_xprr_perm(monitor_start_pa
, monitor_end_pa
, XPRR_KERN_RO_PERM
, XPRR_KERN_RO_PERM
);
4728 * Mark the original physical aperture mapping for the PPL stack pages RO as an additional security
4729 * precaution. The real RW mappings are at a different location with guard pages.
4731 pa_set_range_xprr_perm(pmap_stacks_start_pa
, pmap_stacks_end_pa
, XPRR_PPL_RW_PERM
, XPRR_KERN_RO_PERM
);
4736 pmap_lockdown_ppl(void)
4738 /* Mark the PPL as being locked down. */
4740 #if __APRR_SUPPORTED__
4741 pmap_ppl_locked_down
= TRUE
;
4742 /* Force a trap into to the PPL to update APRR_EL1. */
4743 pmap_return(FALSE
, FALSE
);
4745 #error "XPRR configuration error"
4746 #endif /* __APRR_SUPPORTED__ */
4749 #endif /* XNU_MONITOR */
4753 vm_offset_t
*startp
,
4757 *startp
= virtual_space_start
;
4758 *endp
= virtual_space_end
;
4763 pmap_virtual_region(
4764 unsigned int region_select
,
4765 vm_map_offset_t
*startp
,
4769 boolean_t ret
= FALSE
;
4770 #if __ARM64_PMAP_SUBPAGE_L1__ && __ARM_16K_PG__
4771 if (region_select
== 0) {
4773 * In this config, the bootstrap mappings should occupy their own L2
4774 * TTs, as they should be immutable after boot. Having the associated
4775 * TTEs and PTEs in their own pages allows us to lock down those pages,
4776 * while allowing the rest of the kernel address range to be remapped.
4778 #if (__ARM_VMSA__ > 7)
4779 *startp
= LOW_GLOBAL_BASE_ADDRESS
& ~ARM_TT_L2_OFFMASK
;
4781 #error Unsupported configuration
4783 *size
= ((VM_MAX_KERNEL_ADDRESS
- *startp
) & ~PAGE_MASK
);
4787 #if (__ARM_VMSA__ > 7)
4788 unsigned long low_global_vr_mask
= 0;
4789 vm_map_size_t low_global_vr_size
= 0;
4792 if (region_select
== 0) {
4793 #if (__ARM_VMSA__ == 7)
4794 *startp
= gVirtBase
& 0xFFC00000;
4795 *size
= ((virtual_space_start
- (gVirtBase
& 0xFFC00000)) + ~0xFFC00000) & 0xFFC00000;
4797 /* Round to avoid overlapping with the V=P area; round to at least the L2 block size. */
4798 if (!TEST_PAGE_SIZE_4K
) {
4799 *startp
= gVirtBase
& 0xFFFFFFFFFE000000;
4800 *size
= ((virtual_space_start
- (gVirtBase
& 0xFFFFFFFFFE000000)) + ~0xFFFFFFFFFE000000) & 0xFFFFFFFFFE000000;
4802 *startp
= gVirtBase
& 0xFFFFFFFFFF800000;
4803 *size
= ((virtual_space_start
- (gVirtBase
& 0xFFFFFFFFFF800000)) + ~0xFFFFFFFFFF800000) & 0xFFFFFFFFFF800000;
4808 if (region_select
== 1) {
4809 *startp
= VREGION1_START
;
4810 *size
= VREGION1_SIZE
;
4813 #if (__ARM_VMSA__ > 7)
4814 /* We need to reserve a range that is at least the size of an L2 block mapping for the low globals */
4815 if (!TEST_PAGE_SIZE_4K
) {
4816 low_global_vr_mask
= 0xFFFFFFFFFE000000;
4817 low_global_vr_size
= 0x2000000;
4819 low_global_vr_mask
= 0xFFFFFFFFFF800000;
4820 low_global_vr_size
= 0x800000;
4823 if (((gVirtBase
& low_global_vr_mask
) != LOW_GLOBAL_BASE_ADDRESS
) && (region_select
== 2)) {
4824 *startp
= LOW_GLOBAL_BASE_ADDRESS
;
4825 *size
= low_global_vr_size
;
4829 if (region_select
== 3) {
4830 /* In this config, we allow the bootstrap mappings to occupy the same
4831 * page table pages as the heap.
4833 *startp
= VM_MIN_KERNEL_ADDRESS
;
4834 *size
= LOW_GLOBAL_BASE_ADDRESS
- *startp
;
4846 return (unsigned int)atop(avail_end
- first_avail
);
4853 __unused boolean_t might_free
)
4855 return pmap_next_page(pnum
);
4863 if (first_avail
!= avail_end
) {
4864 *pnum
= (ppnum_t
)atop(first_avail
);
4865 first_avail
+= PAGE_SIZE
;
4873 * Initialize the pmap module.
4874 * Called by vm_init, to initialize any structures that the pmap
4875 * system needs to map virtual memory.
4882 * Protect page zero in the kernel map.
4883 * (can be overruled by permanent transltion
4884 * table entries at page zero - see arm_vm_init).
4886 vm_protect(kernel_map
, 0, PAGE_SIZE
, TRUE
, VM_PROT_NONE
);
4888 pmap_initialized
= TRUE
;
4894 * Initialize the pmap object (for tracking the vm_page_t
4895 * structures for pages we allocate to be page tables in
4898 _vm_object_allocate(mem_size
, pmap_object
);
4899 pmap_object
->copy_strategy
= MEMORY_OBJECT_COPY_NONE
;
4904 * The values of [hard_]maxproc may have been scaled, make sure
4905 * they are still less than the value of MAX_ASID.
4907 if (maxproc
> MAX_ASID
) {
4910 if (hard_maxproc
> MAX_ASID
) {
4911 hard_maxproc
= MAX_ASID
;
4915 pmap_pgtrace_init();
4925 pmap_paddr_t phys
= ptoa(ppnum
);
4927 assert(phys
!= vm_page_fictitious_addr
);
4929 if (!pa_valid(phys
)) {
4933 pai
= (int)pa_index(phys
);
4934 pv_h
= pai_to_pvh(pai
);
4936 return pvh_test_type(pv_h
, PVH_TYPE_NULL
);
4941 pmap_assert_free(ppnum_t ppnum
)
4943 assertf(pmap_verify_free(ppnum
), "page = 0x%x", ppnum
);
4950 * Initialize zones used by pmap.
4957 * Create the zone of physical maps
4958 * and the physical-to-virtual entries.
4960 pmap_zone
= zinit((vm_size_t
) sizeof(struct pmap
), (vm_size_t
) sizeof(struct pmap
) * 256,
4965 MARK_AS_PMAP_TEXT
static void
4966 pmap_ledger_alloc_init_internal(size_t size
)
4968 pmap_simple_lock(&pmap_ledger_lock
);
4970 if (pmap_ledger_alloc_initialized
) {
4971 panic("%s: already initialized, "
4977 if (size
!= sizeof(pmap_ledger_data_t
)) {
4978 panic("%s: size mismatch, expected %lu, "
4980 __func__
, PMAP_LEDGER_DATA_BYTES
,
4984 pmap_ledger_alloc_initialized
= true;
4986 pmap_simple_unlock(&pmap_ledger_lock
);
4989 MARK_AS_PMAP_TEXT
static ledger_t
4990 pmap_ledger_alloc_internal(void)
4993 uint64_t vaddr
, vstart
, vend
;
4996 ledger_t new_ledger
;
4997 uint64_t array_index
;
4999 pmap_simple_lock(&pmap_ledger_lock
);
5000 if (pmap_ledger_free_list
== NULL
) {
5001 paddr
= pmap_get_free_ppl_page();
5004 pmap_simple_unlock(&pmap_ledger_lock
);
5008 vstart
= phystokv(paddr
);
5009 vend
= vstart
+ PAGE_SIZE
;
5011 for (vaddr
= vstart
; (vaddr
< vend
) && ((vaddr
+ sizeof(pmap_ledger_t
)) <= vend
); vaddr
+= sizeof(pmap_ledger_t
)) {
5012 pmap_ledger_t
*free_ledger
;
5014 index
= pmap_ledger_ptr_array_free_index
++;
5016 if (index
>= MAX_PMAP_LEDGERS
) {
5017 panic("%s: pmap_ledger_ptr_array is full, index=%llu",
5021 free_ledger
= (pmap_ledger_t
*)vaddr
;
5023 pmap_ledger_ptr_array
[index
] = free_ledger
;
5024 free_ledger
->back_ptr
= &pmap_ledger_ptr_array
[index
];
5026 free_ledger
->next
= pmap_ledger_free_list
;
5027 pmap_ledger_free_list
= free_ledger
;
5030 pa_set_range_xprr_perm(paddr
, paddr
+ PAGE_SIZE
, XPRR_PPL_RW_PERM
, XPRR_KERN_RW_PERM
);
5033 new_ledger
= (ledger_t
)pmap_ledger_free_list
;
5034 pmap_ledger_free_list
= pmap_ledger_free_list
->next
;
5036 array_index
= pmap_ledger_validate(new_ledger
);
5037 os_ref_init(&pmap_ledger_refcnt
[array_index
], NULL
);
5039 pmap_simple_unlock(&pmap_ledger_lock
);
5044 MARK_AS_PMAP_TEXT
static void
5045 pmap_ledger_free_internal(ledger_t ledger
)
5047 pmap_ledger_t
* free_ledger
;
5049 free_ledger
= (pmap_ledger_t
*)ledger
;
5051 pmap_simple_lock(&pmap_ledger_lock
);
5052 uint64_t array_index
= pmap_ledger_validate(ledger
);
5054 if (os_ref_release(&pmap_ledger_refcnt
[array_index
]) != 0) {
5055 panic("%s: ledger still referenced, "
5061 free_ledger
->next
= pmap_ledger_free_list
;
5062 pmap_ledger_free_list
= free_ledger
;
5063 pmap_simple_unlock(&pmap_ledger_lock
);
5068 pmap_ledger_retain(ledger_t ledger
)
5070 pmap_simple_lock(&pmap_ledger_lock
);
5071 uint64_t array_index
= pmap_ledger_validate(ledger
);
5072 os_ref_retain(&pmap_ledger_refcnt
[array_index
]);
5073 pmap_simple_unlock(&pmap_ledger_lock
);
5077 pmap_ledger_release(ledger_t ledger
)
5079 pmap_simple_lock(&pmap_ledger_lock
);
5080 uint64_t array_index
= pmap_ledger_validate(ledger
);
5081 os_ref_release_live(&pmap_ledger_refcnt
[array_index
]);
5082 pmap_simple_unlock(&pmap_ledger_lock
);
5086 pmap_ledger_alloc_init(size_t size
)
5088 pmap_ledger_alloc_init_ppl(size
);
5092 pmap_ledger_alloc(void)
5094 ledger_t retval
= NULL
;
5096 while ((retval
= pmap_ledger_alloc_ppl()) == NULL
) {
5097 pmap_alloc_page_for_ppl();
5104 pmap_ledger_free(ledger_t ledger
)
5106 pmap_ledger_free_ppl(ledger
);
5108 #else /* XNU_MONITOR */
5111 pmap_ledger_alloc_init(size_t size
)
5113 panic("%s: unsupported, "
5120 pmap_ledger_alloc(void)
5122 panic("%s: unsupported",
5128 pmap_ledger_free(ledger_t ledger
)
5130 panic("%s: unsupported, "
5134 #endif /* XNU_MONITOR */
5137 * Create and return a physical map.
5139 * If the size specified for the map
5140 * is zero, the map is an actual physical
5141 * map, and may be referenced by the
5144 * If the size specified is non-zero,
5145 * the map will be used in software only, and
5146 * is bounded by that size.
5148 MARK_AS_PMAP_TEXT
static pmap_t
5149 pmap_create_options_internal(
5155 unsigned tte_index_max
;
5157 bool is_64bit
= flags
& PMAP_CREATE_64BIT
;
5158 #if defined(HAS_APPLE_PAC)
5159 bool disable_jop
= flags
& PMAP_CREATE_DISABLE_JOP
;
5160 #endif /* defined(HAS_APPLE_PAC) */
5163 * A software use-only map doesn't even need a pmap.
5170 if ((p
= pmap_alloc_pmap()) == PMAP_NULL
) {
5175 pmap_ledger_validate(ledger
);
5176 pmap_ledger_retain(ledger
);
5180 * Allocate a pmap struct from the pmap_zone. Then allocate
5181 * the translation table of the right size for the pmap.
5183 if ((p
= (pmap_t
) zalloc(pmap_zone
)) == PMAP_NULL
) {
5190 if (flags
& PMAP_CREATE_64BIT
) {
5191 p
->min
= MACH_VM_MIN_ADDRESS
;
5192 p
->max
= MACH_VM_MAX_ADDRESS
;
5194 p
->min
= VM_MIN_ADDRESS
;
5195 p
->max
= VM_MAX_ADDRESS
;
5198 #if defined(HAS_APPLE_PAC)
5199 p
->disable_jop
= disable_jop
;
5200 #endif /* defined(HAS_APPLE_PAC) */
5202 p
->nested_region_true_start
= 0;
5203 p
->nested_region_true_end
= ~0;
5205 os_atomic_init(&p
->ref_count
, 1);
5207 p
->stamp
= os_atomic_inc(&pmap_stamp
, relaxed
);
5208 p
->nx_enabled
= TRUE
;
5209 p
->is_64bit
= is_64bit
;
5211 p
->nested_pmap
= PMAP_NULL
;
5213 #if ARM_PARAMETERIZED_PMAP
5214 p
->pmap_pt_attr
= native_pt_attr
;
5215 #endif /* ARM_PARAMETERIZED_PMAP */
5217 if (!pmap_get_pt_ops(p
)->alloc_id(p
)) {
5224 memset((void *) &p
->stats
, 0, sizeof(p
->stats
));
5226 p
->tt_entry_free
= (tt_entry_t
*)0;
5227 tte_index_max
= PMAP_ROOT_ALLOC_SIZE
/ sizeof(tt_entry_t
);
5229 #if (__ARM_VMSA__ == 7)
5230 p
->tte_index_max
= tte_index_max
;
5234 p
->tte
= pmap_tt1_allocate(p
, PMAP_ROOT_ALLOC_SIZE
, PMAP_TT_ALLOCATE_NOWAIT
);
5236 p
->tte
= pmap_tt1_allocate(p
, PMAP_ROOT_ALLOC_SIZE
, 0);
5239 goto tt1_alloc_fail
;
5242 p
->ttep
= ml_static_vtop((vm_offset_t
)p
->tte
);
5243 PMAP_TRACE(3, PMAP_CODE(PMAP__TTE
), VM_KERNEL_ADDRHIDE(p
), VM_KERNEL_ADDRHIDE(p
->min
), VM_KERNEL_ADDRHIDE(p
->max
), p
->ttep
);
5245 /* nullify the translation table */
5246 for (i
= 0; i
< tte_index_max
; i
++) {
5247 p
->tte
[i
] = ARM_TTE_TYPE_FAULT
;
5250 FLUSH_PTE_RANGE(p
->tte
, p
->tte
+ tte_index_max
);
5253 * initialize the rest of the structure
5255 p
->nested_region_grand_addr
= 0x0ULL
;
5256 p
->nested_region_subord_addr
= 0x0ULL
;
5257 p
->nested_region_size
= 0x0ULL
;
5258 p
->nested_region_asid_bitmap
= NULL
;
5259 p
->nested_region_asid_bitmap_size
= 0x0UL
;
5261 p
->nested_has_no_bounds_ref
= false;
5262 p
->nested_no_bounds_refcnt
= 0;
5263 p
->nested_bounds_set
= false;
5267 p
->pmap_stats_assert
= TRUE
;
5269 strlcpy(p
->pmap_procname
, "<nil>", sizeof(p
->pmap_procname
));
5270 #endif /* MACH_ASSERT */
5271 #if DEVELOPMENT || DEBUG
5272 p
->footprint_was_suspended
= FALSE
;
5273 #endif /* DEVELOPMENT || DEBUG */
5275 pmap_simple_lock(&pmaps_lock
);
5276 queue_enter(&map_pmap_list
, p
, pmap_t
, pmaps
);
5277 pmap_simple_unlock(&pmaps_lock
);
5282 pmap_get_pt_ops(p
)->free_id(p
);
5288 pmap_ledger_release(ledger
);
5291 zfree(pmap_zone
, p
);
5297 pmap_create_options(
5304 PMAP_TRACE(1, PMAP_CODE(PMAP__CREATE
) | DBG_FUNC_START
, size
, flags
);
5306 ledger_reference(ledger
);
5310 * TODO: It should be valid for pmap_create_options_internal to fail; we could
5313 while ((pmap
= pmap_create_options_ppl(ledger
, size
, flags
)) == PMAP_NULL
) {
5314 pmap_alloc_page_for_ppl();
5317 pmap
= pmap_create_options_internal(ledger
, size
, flags
);
5320 if (pmap
== PMAP_NULL
) {
5321 ledger_dereference(ledger
);
5324 PMAP_TRACE(1, PMAP_CODE(PMAP__CREATE
) | DBG_FUNC_END
, VM_KERNEL_ADDRHIDE(pmap
), PMAP_VASID(pmap
), pmap
->hw_asid
);
5331 * This symbol remains in place when the PPL is enabled so that the dispatch
5332 * table does not change from development to release configurations.
5335 #if MACH_ASSERT || XNU_MONITOR
5336 MARK_AS_PMAP_TEXT
static void
5337 pmap_set_process_internal(
5338 __unused pmap_t pmap
,
5340 __unused
char *procname
)
5347 VALIDATE_PMAP(pmap
);
5349 pmap
->pmap_pid
= pid
;
5350 strlcpy(pmap
->pmap_procname
, procname
, sizeof(pmap
->pmap_procname
));
5351 if (pmap_ledgers_panic_leeway
) {
5354 * Some processes somehow trigger some issues that make
5355 * the pmap stats and ledgers go off track, causing
5356 * some assertion failures and ledger panics.
5357 * Turn off the sanity checks if we allow some ledger leeway
5358 * because of that. We'll still do a final check in
5359 * pmap_check_ledgers() for discrepancies larger than the
5360 * allowed leeway after the address space has been fully
5363 pmap
->pmap_stats_assert
= FALSE
;
5364 ledger_disable_panic_on_negative(pmap
->ledger
,
5365 task_ledgers
.phys_footprint
);
5366 ledger_disable_panic_on_negative(pmap
->ledger
,
5367 task_ledgers
.internal
);
5368 ledger_disable_panic_on_negative(pmap
->ledger
,
5369 task_ledgers
.internal_compressed
);
5370 ledger_disable_panic_on_negative(pmap
->ledger
,
5371 task_ledgers
.iokit_mapped
);
5372 ledger_disable_panic_on_negative(pmap
->ledger
,
5373 task_ledgers
.alternate_accounting
);
5374 ledger_disable_panic_on_negative(pmap
->ledger
,
5375 task_ledgers
.alternate_accounting_compressed
);
5377 #endif /* MACH_ASSERT */
5379 #endif /* MACH_ASSERT || XNU_MONITOR */
5389 pmap_set_process_ppl(pmap
, pid
, procname
);
5391 pmap_set_process_internal(pmap
, pid
, procname
);
5394 #endif /* MACH_ASSERT */
5397 * We maintain stats and ledgers so that a task's physical footprint is:
5398 * phys_footprint = ((internal - alternate_accounting)
5399 * + (internal_compressed - alternate_accounting_compressed)
5401 * + purgeable_nonvolatile
5402 * + purgeable_nonvolatile_compressed
5404 * where "alternate_accounting" includes "iokit" and "purgeable" memory.
5409 * Retire the given physical map from service.
5410 * Should only be called if the map contains
5411 * no valid mappings.
5413 MARK_AS_PMAP_TEXT
static void
5414 pmap_destroy_internal(
5417 if (pmap
== PMAP_NULL
) {
5421 VALIDATE_PMAP(pmap
);
5423 __unused
const pt_attr_t
* const pt_attr
= pmap_get_pt_attr(pmap
);
5425 int32_t ref_count
= os_atomic_dec(&pmap
->ref_count
, relaxed
);
5426 if (ref_count
> 0) {
5428 } else if (ref_count
< 0) {
5429 panic("pmap %p: refcount underflow", pmap
);
5430 } else if (pmap
== kernel_pmap
) {
5431 panic("pmap %p: attempt to destroy kernel pmap", pmap
);
5436 #if (__ARM_VMSA__ > 7)
5437 pmap_unmap_sharedpage(pmap
);
5438 #endif /* (__ARM_VMSA__ > 7) */
5440 pmap_simple_lock(&pmaps_lock
);
5441 while (pmap
->gc_status
& PMAP_GC_INFLIGHT
) {
5442 pmap
->gc_status
|= PMAP_GC_WAIT
;
5443 assert_wait((event_t
) &pmap
->gc_status
, THREAD_UNINT
);
5444 pmap_simple_unlock(&pmaps_lock
);
5445 (void) thread_block(THREAD_CONTINUE_NULL
);
5446 pmap_simple_lock(&pmaps_lock
);
5448 queue_remove(&map_pmap_list
, pmap
, pmap_t
, pmaps
);
5449 pmap_simple_unlock(&pmaps_lock
);
5451 pmap_trim_self(pmap
);
5454 * Free the memory maps, then the
5457 #if (__ARM_VMSA__ == 7)
5461 for (i
= 0; i
< pmap
->tte_index_max
; i
++) {
5462 ttep
= &pmap
->tte
[i
];
5463 if ((*ttep
& ARM_TTE_TYPE_MASK
) == ARM_TTE_TYPE_TABLE
) {
5464 pmap_tte_deallocate(pmap
, ttep
, PMAP_TT_L1_LEVEL
);
5468 #else /* (__ARM_VMSA__ == 7) */
5472 for (level
= pt_attr
->pta_max_level
- 1; level
>= pt_attr
->pta_root_level
; level
--) {
5473 for (c
= pmap
->min
; c
< pmap
->max
; c
+= pt_attr_ln_size(pt_attr
, level
)) {
5474 ttep
= pmap_ttne(pmap
, level
, c
);
5476 if ((ttep
!= PT_ENTRY_NULL
) && (*ttep
& ARM_TTE_TYPE_MASK
) == ARM_TTE_TYPE_TABLE
) {
5478 pmap_tte_deallocate(pmap
, ttep
, level
);
5483 #endif /* (__ARM_VMSA__ == 7) */
5488 #if (__ARM_VMSA__ == 7)
5489 pmap_tt1_deallocate(pmap
, pmap
->tte
, pmap
->tte_index_max
* sizeof(tt_entry_t
), 0);
5490 pmap
->tte_index_max
= 0;
5491 #else /* (__ARM_VMSA__ == 7) */
5492 pmap_tt1_deallocate(pmap
, pmap
->tte
, PMAP_ROOT_ALLOC_SIZE
, 0);
5493 #endif /* (__ARM_VMSA__ == 7) */
5494 pmap
->tte
= (tt_entry_t
*) NULL
;
5498 assert((tt_free_entry_t
*)pmap
->tt_entry_free
== NULL
);
5500 pmap_get_pt_ops(pmap
)->flush_tlb_async(pmap
);
5503 /* return its asid to the pool */
5504 pmap_get_pt_ops(pmap
)->free_id(pmap
);
5505 pmap_check_ledgers(pmap
);
5507 if (pmap
->nested_region_asid_bitmap
) {
5509 pmap_pages_free(kvtophys((vm_offset_t
)(pmap
->nested_region_asid_bitmap
)), PAGE_SIZE
);
5511 kfree(pmap
->nested_region_asid_bitmap
, pmap
->nested_region_asid_bitmap_size
* sizeof(unsigned int));
5517 pmap_ledger_release(pmap
->ledger
);
5520 pmap_free_pmap(pmap
);
5522 zfree(pmap_zone
, pmap
);
5532 PMAP_TRACE(1, PMAP_CODE(PMAP__DESTROY
) | DBG_FUNC_START
, VM_KERNEL_ADDRHIDE(pmap
), PMAP_VASID(pmap
), pmap
->hw_asid
);
5534 ledger
= pmap
->ledger
;
5537 pmap_destroy_ppl(pmap
);
5539 pmap_check_ledger_fields(ledger
);
5541 pmap_destroy_internal(pmap
);
5544 ledger_dereference(ledger
);
5546 PMAP_TRACE(1, PMAP_CODE(PMAP__DESTROY
) | DBG_FUNC_END
);
5551 * Add a reference to the specified pmap.
5553 MARK_AS_PMAP_TEXT
static void
5554 pmap_reference_internal(
5557 if (pmap
!= PMAP_NULL
) {
5558 VALIDATE_PMAP(pmap
);
5559 os_atomic_inc(&pmap
->ref_count
, relaxed
);
5568 pmap_reference_ppl(pmap
);
5570 pmap_reference_internal(pmap
);
5580 tt_entry_t
*tt1
= NULL
;
5581 tt_free_entry_t
*tt1_free
;
5584 vm_address_t va_end
;
5587 pmap_simple_lock(&tt1_lock
);
5588 if ((size
== PAGE_SIZE
) && (free_page_size_tt_count
!= 0)) {
5589 free_page_size_tt_count
--;
5590 tt1
= (tt_entry_t
*)free_page_size_tt_list
;
5591 free_page_size_tt_list
= ((tt_free_entry_t
*)tt1
)->next
;
5592 } else if ((size
== 2 * PAGE_SIZE
) && (free_two_page_size_tt_count
!= 0)) {
5593 free_two_page_size_tt_count
--;
5594 tt1
= (tt_entry_t
*)free_two_page_size_tt_list
;
5595 free_two_page_size_tt_list
= ((tt_free_entry_t
*)tt1
)->next
;
5596 } else if ((size
< PAGE_SIZE
) && (free_tt_count
!= 0)) {
5598 tt1
= (tt_entry_t
*)free_tt_list
;
5599 free_tt_list
= (tt_free_entry_t
*)((tt_free_entry_t
*)tt1
)->next
;
5602 pmap_simple_unlock(&tt1_lock
);
5605 pmap_tt_ledger_credit(pmap
, size
);
5606 return (tt_entry_t
*)tt1
;
5609 ret
= pmap_pages_alloc(&pa
, (unsigned)((size
< PAGE_SIZE
)? PAGE_SIZE
: size
), ((option
& PMAP_TT_ALLOCATE_NOWAIT
)? PMAP_PAGES_ALLOCATE_NOWAIT
: 0));
5611 if (ret
== KERN_RESOURCE_SHORTAGE
) {
5612 return (tt_entry_t
*)0;
5619 if (size
< PAGE_SIZE
) {
5620 va
= phystokv(pa
) + size
;
5621 tt_free_entry_t
*local_free_list
= (tt_free_entry_t
*)va
;
5622 tt_free_entry_t
*next_free
= NULL
;
5623 for (va_end
= phystokv(pa
) + PAGE_SIZE
; va
< va_end
; va
= va
+ size
) {
5624 tt1_free
= (tt_free_entry_t
*)va
;
5625 tt1_free
->next
= next_free
;
5626 next_free
= tt1_free
;
5628 pmap_simple_lock(&tt1_lock
);
5629 local_free_list
->next
= free_tt_list
;
5630 free_tt_list
= next_free
;
5631 free_tt_count
+= ((PAGE_SIZE
/ size
) - 1);
5632 if (free_tt_count
> free_tt_max
) {
5633 free_tt_max
= free_tt_count
;
5635 pmap_simple_unlock(&tt1_lock
);
5638 /* Always report root allocations in units of PMAP_ROOT_ALLOC_SIZE, which can be obtained by sysctl arm_pt_root_size.
5639 * Depending on the device, this can vary between 512b and 16K. */
5640 OSAddAtomic((uint32_t)(size
/ PMAP_ROOT_ALLOC_SIZE
), (pmap
== kernel_pmap
? &inuse_kernel_tteroot_count
: &inuse_user_tteroot_count
));
5641 OSAddAtomic64(size
/ PMAP_ROOT_ALLOC_SIZE
, &alloc_tteroot_count
);
5642 pmap_tt_ledger_credit(pmap
, size
);
5644 return (tt_entry_t
*) phystokv(pa
);
5648 pmap_tt1_deallocate(
5654 tt_free_entry_t
*tt_entry
;
5656 tt_entry
= (tt_free_entry_t
*)tt
;
5658 pmap_simple_lock(&tt1_lock
);
5660 if (size
< PAGE_SIZE
) {
5662 if (free_tt_count
> free_tt_max
) {
5663 free_tt_max
= free_tt_count
;
5665 tt_entry
->next
= free_tt_list
;
5666 free_tt_list
= tt_entry
;
5669 if (size
== PAGE_SIZE
) {
5670 free_page_size_tt_count
++;
5671 if (free_page_size_tt_count
> free_page_size_tt_max
) {
5672 free_page_size_tt_max
= free_page_size_tt_count
;
5674 tt_entry
->next
= free_page_size_tt_list
;
5675 free_page_size_tt_list
= tt_entry
;
5678 if (size
== 2 * PAGE_SIZE
) {
5679 free_two_page_size_tt_count
++;
5680 if (free_two_page_size_tt_count
> free_two_page_size_tt_max
) {
5681 free_two_page_size_tt_max
= free_two_page_size_tt_count
;
5683 tt_entry
->next
= free_two_page_size_tt_list
;
5684 free_two_page_size_tt_list
= tt_entry
;
5687 if (option
& PMAP_TT_DEALLOCATE_NOBLOCK
) {
5688 pmap_simple_unlock(&tt1_lock
);
5689 pmap_tt_ledger_debit(pmap
, size
);
5693 while (free_page_size_tt_count
> FREE_PAGE_SIZE_TT_MAX
) {
5694 free_page_size_tt_count
--;
5695 tt
= (tt_entry_t
*)free_page_size_tt_list
;
5696 free_page_size_tt_list
= ((tt_free_entry_t
*)tt
)->next
;
5698 pmap_simple_unlock(&tt1_lock
);
5700 pmap_pages_free(ml_static_vtop((vm_offset_t
)tt
), PAGE_SIZE
);
5702 OSAddAtomic(-(int32_t)(PAGE_SIZE
/ PMAP_ROOT_ALLOC_SIZE
), (pmap
== kernel_pmap
? &inuse_kernel_tteroot_count
: &inuse_user_tteroot_count
));
5704 pmap_simple_lock(&tt1_lock
);
5707 while (free_two_page_size_tt_count
> FREE_TWO_PAGE_SIZE_TT_MAX
) {
5708 free_two_page_size_tt_count
--;
5709 tt
= (tt_entry_t
*)free_two_page_size_tt_list
;
5710 free_two_page_size_tt_list
= ((tt_free_entry_t
*)tt
)->next
;
5712 pmap_simple_unlock(&tt1_lock
);
5714 pmap_pages_free(ml_static_vtop((vm_offset_t
)tt
), 2 * PAGE_SIZE
);
5716 OSAddAtomic(-2 * (int32_t)(PAGE_SIZE
/ PMAP_ROOT_ALLOC_SIZE
), (pmap
== kernel_pmap
? &inuse_kernel_tteroot_count
: &inuse_user_tteroot_count
));
5718 pmap_simple_lock(&tt1_lock
);
5720 pmap_simple_unlock(&tt1_lock
);
5721 pmap_tt_ledger_debit(pmap
, size
);
5724 static kern_return_t
5729 unsigned int options
)
5735 if ((tt_free_entry_t
*)pmap
->tt_entry_free
!= NULL
) {
5736 tt_free_entry_t
*tt_free_next
;
5738 tt_free_next
= ((tt_free_entry_t
*)pmap
->tt_entry_free
)->next
;
5739 *ttp
= (tt_entry_t
*)pmap
->tt_entry_free
;
5740 pmap
->tt_entry_free
= (tt_entry_t
*)tt_free_next
;
5748 * Allocate a VM page for the level x page table entries.
5750 while (pmap_pages_alloc(&pa
, PAGE_SIZE
, ((options
& PMAP_TT_ALLOCATE_NOWAIT
)? PMAP_PAGES_ALLOCATE_NOWAIT
: 0)) != KERN_SUCCESS
) {
5751 if (options
& PMAP_OPTIONS_NOWAIT
) {
5752 return KERN_RESOURCE_SHORTAGE
;
5757 while ((ptdp
= ptd_alloc(pmap
, false)) == NULL
) {
5758 if (options
& PMAP_OPTIONS_NOWAIT
) {
5759 pmap_pages_free(pa
, PAGE_SIZE
);
5760 return KERN_RESOURCE_SHORTAGE
;
5765 if (level
< PMAP_TT_MAX_LEVEL
) {
5766 OSAddAtomic64(1, &alloc_ttepages_count
);
5767 OSAddAtomic(1, (pmap
== kernel_pmap
? &inuse_kernel_ttepages_count
: &inuse_user_ttepages_count
));
5769 OSAddAtomic64(1, &alloc_ptepages_count
);
5770 OSAddAtomic(1, (pmap
== kernel_pmap
? &inuse_kernel_ptepages_count
: &inuse_user_ptepages_count
));
5773 pmap_tt_ledger_credit(pmap
, PAGE_SIZE
);
5775 PMAP_ZINFO_PALLOC(pmap
, PAGE_SIZE
);
5777 pvh_update_head_unlocked(pai_to_pvh(pa_index(pa
)), ptdp
, PVH_TYPE_PTDP
);
5779 __unreachable_ok_push
5780 if (TEST_PAGE_RATIO_4
) {
5782 vm_address_t va_end
;
5786 for (va_end
= phystokv(pa
) + PAGE_SIZE
, va
= phystokv(pa
) + ARM_PGBYTES
; va
< va_end
; va
= va
+ ARM_PGBYTES
) {
5787 ((tt_free_entry_t
*)va
)->next
= (tt_free_entry_t
*)pmap
->tt_entry_free
;
5788 pmap
->tt_entry_free
= (tt_entry_t
*)va
;
5792 __unreachable_ok_pop
5794 *ttp
= (tt_entry_t
*)phystokv(pa
);
5801 return KERN_SUCCESS
;
5812 unsigned pt_acc_cnt
;
5813 unsigned i
, max_pt_index
= PAGE_RATIO
;
5814 vm_offset_t free_page
= 0;
5818 ptdp
= ptep_get_ptd((vm_offset_t
)ttp
);
5820 ptdp
->ptd_info
[ARM_PT_DESC_INDEX(ttp
)].va
= (vm_offset_t
)-1;
5822 if ((level
< PMAP_TT_MAX_LEVEL
) && (ptdp
->ptd_info
[ARM_PT_DESC_INDEX(ttp
)].refcnt
== PT_DESC_REFCOUNT
)) {
5823 ptdp
->ptd_info
[ARM_PT_DESC_INDEX(ttp
)].refcnt
= 0;
5826 if (ptdp
->ptd_info
[ARM_PT_DESC_INDEX(ttp
)].refcnt
!= 0) {
5827 panic("pmap_tt_deallocate(): ptdp %p, count %d\n", ptdp
, ptdp
->ptd_info
[ARM_PT_DESC_INDEX(ttp
)].refcnt
);
5830 ptdp
->ptd_info
[ARM_PT_DESC_INDEX(ttp
)].refcnt
= 0;
5832 for (i
= 0, pt_acc_cnt
= 0; i
< max_pt_index
; i
++) {
5833 pt_acc_cnt
+= ptdp
->ptd_info
[i
].refcnt
;
5836 if (pt_acc_cnt
== 0) {
5837 tt_free_entry_t
*tt_free_list
= (tt_free_entry_t
*)&pmap
->tt_entry_free
;
5838 unsigned pt_free_entry_cnt
= 1;
5840 while (pt_free_entry_cnt
< max_pt_index
&& tt_free_list
) {
5841 tt_free_entry_t
*tt_free_list_next
;
5843 tt_free_list_next
= tt_free_list
->next
;
5844 if ((((vm_offset_t
)tt_free_list_next
) - ((vm_offset_t
)ttp
& ~PAGE_MASK
)) < PAGE_SIZE
) {
5845 pt_free_entry_cnt
++;
5847 tt_free_list
= tt_free_list_next
;
5849 if (pt_free_entry_cnt
== max_pt_index
) {
5850 tt_free_entry_t
*tt_free_list_cur
;
5852 free_page
= (vm_offset_t
)ttp
& ~PAGE_MASK
;
5853 tt_free_list
= (tt_free_entry_t
*)&pmap
->tt_entry_free
;
5854 tt_free_list_cur
= (tt_free_entry_t
*)&pmap
->tt_entry_free
;
5856 while (tt_free_list_cur
) {
5857 tt_free_entry_t
*tt_free_list_next
;
5859 tt_free_list_next
= tt_free_list_cur
->next
;
5860 if ((((vm_offset_t
)tt_free_list_next
) - free_page
) < PAGE_SIZE
) {
5861 tt_free_list
->next
= tt_free_list_next
->next
;
5863 tt_free_list
= tt_free_list_next
;
5865 tt_free_list_cur
= tt_free_list_next
;
5868 ((tt_free_entry_t
*)ttp
)->next
= (tt_free_entry_t
*)pmap
->tt_entry_free
;
5869 pmap
->tt_entry_free
= ttp
;
5872 ((tt_free_entry_t
*)ttp
)->next
= (tt_free_entry_t
*)pmap
->tt_entry_free
;
5873 pmap
->tt_entry_free
= ttp
;
5878 if (free_page
!= 0) {
5879 ptd_deallocate(ptep_get_ptd((vm_offset_t
)free_page
));
5880 *(pt_desc_t
**)pai_to_pvh(pa_index(ml_static_vtop(free_page
))) = NULL
;
5881 pmap_pages_free(ml_static_vtop(free_page
), PAGE_SIZE
);
5882 if (level
< PMAP_TT_MAX_LEVEL
) {
5883 OSAddAtomic(-1, (pmap
== kernel_pmap
? &inuse_kernel_ttepages_count
: &inuse_user_ttepages_count
));
5885 OSAddAtomic(-1, (pmap
== kernel_pmap
? &inuse_kernel_ptepages_count
: &inuse_user_ptepages_count
));
5887 PMAP_ZINFO_PFREE(pmap
, PAGE_SIZE
);
5888 pmap_tt_ledger_debit(pmap
, PAGE_SIZE
);
5898 tt_entry_t tte
= *ttep
;
5901 panic("pmap_tte_deallocate(): null tt_entry ttep==%p\n", ttep
);
5904 if (((level
+ 1) == PMAP_TT_MAX_LEVEL
) && (tte_get_ptd(tte
)->ptd_info
[ARM_PT_DESC_INDEX(ttetokv(*ttep
))].refcnt
!= 0)) {
5905 panic("pmap_tte_deallocate(): pmap=%p ttep=%p ptd=%p refcnt=0x%x \n", pmap
, ttep
,
5906 tte_get_ptd(tte
), (tte_get_ptd(tte
)->ptd_info
[ARM_PT_DESC_INDEX(ttetokv(*ttep
))].refcnt
));
5909 #if (__ARM_VMSA__ == 7)
5911 tt_entry_t
*ttep_4M
= (tt_entry_t
*) ((vm_offset_t
)ttep
& 0xFFFFFFF0);
5914 for (i
= 0; i
< 4; i
++, ttep_4M
++) {
5915 *ttep_4M
= (tt_entry_t
) 0;
5917 FLUSH_PTE_RANGE_STRONG(ttep_4M
- 4, ttep_4M
);
5920 *ttep
= (tt_entry_t
) 0;
5921 FLUSH_PTE_STRONG(ttep
);
5926 pmap_tte_deallocate(
5934 PMAP_ASSERT_LOCKED(pmap
);
5939 if (tte_get_ptd(tte
)->pmap
!= pmap
) {
5940 panic("pmap_tte_deallocate(): ptd=%p ptd->pmap=%p pmap=%p \n",
5941 tte_get_ptd(tte
), tte_get_ptd(tte
)->pmap
, pmap
);
5945 pmap_tte_remove(pmap
, ttep
, level
);
5947 if ((tte
& ARM_TTE_TYPE_MASK
) == ARM_TTE_TYPE_TABLE
) {
5950 pt_entry_t
*pte_p
= ((pt_entry_t
*) (ttetokv(tte
) & ~ARM_PGMASK
));
5953 for (i
= 0; i
< (ARM_PGBYTES
/ sizeof(*pte_p
)); i
++, pte_p
++) {
5954 if (ARM_PTE_IS_COMPRESSED(*pte_p
, pte_p
)) {
5955 panic("pmap_tte_deallocate: tte=0x%llx pmap=%p, pte_p=%p, pte=0x%llx compressed\n",
5956 (uint64_t)tte
, pmap
, pte_p
, (uint64_t)(*pte_p
));
5957 } else if (((*pte_p
) & ARM_PTE_TYPE_MASK
) != ARM_PTE_TYPE_FAULT
) {
5958 panic("pmap_tte_deallocate: tte=0x%llx pmap=%p, pte_p=%p, pte=0x%llx\n",
5959 (uint64_t)tte
, pmap
, pte_p
, (uint64_t)(*pte_p
));
5966 /* Clear any page offset: we mean to free the whole page, but armv7 TTEs may only be
5967 * aligned on 1K boundaries. We clear the surrounding "chunk" of 4 TTEs above. */
5968 pa
= tte_to_pa(tte
) & ~ARM_PGMASK
;
5969 pmap_tt_deallocate(pmap
, (tt_entry_t
*) phystokv(pa
), level
+ 1);
5975 * Remove a range of hardware page-table entries.
5976 * The entries given are the first (inclusive)
5977 * and last (exclusive) entries for the VM pages.
5978 * The virtual address is the va for the first pte.
5980 * The pmap must be locked.
5981 * If the pmap is not the kernel pmap, the range must lie
5982 * entirely within one pte-page. This is NOT checked.
5983 * Assumes that the pte-page exists.
5985 * Returns the number of PTE changed, and sets *rmv_cnt
5986 * to the number of SPTE changed.
5991 vm_map_address_t va
,
5996 bool need_strong_sync
= false;
5997 int num_changed
= pmap_remove_range_options(pmap
, va
, bpte
, epte
, rmv_cnt
,
5998 &need_strong_sync
, PMAP_OPTIONS_REMOVE
);
5999 if (num_changed
> 0) {
6000 PMAP_UPDATE_TLBS(pmap
, va
, va
+ (PAGE_SIZE
* (epte
- bpte
)), need_strong_sync
);
6006 #ifdef PVH_FLAG_EXEC
6009 * Update the access protection bits of the physical aperture mapping for a page.
6010 * This is useful, for example, in guranteeing that a verified executable page
6011 * has no writable mappings anywhere in the system, including the physical
6012 * aperture. flush_tlb_async can be set to true to avoid unnecessary TLB
6013 * synchronization overhead in cases where the call to this function is
6014 * guaranteed to be followed by other TLB operations.
6017 pmap_set_ptov_ap(unsigned int pai __unused
, unsigned int ap __unused
, boolean_t flush_tlb_async __unused
)
6019 #if __ARM_PTE_PHYSMAP__
6020 ASSERT_PVH_LOCKED(pai
);
6021 vm_offset_t kva
= phystokv(vm_first_phys
+ (pmap_paddr_t
)ptoa(pai
));
6022 pt_entry_t
*pte_p
= pmap_pte(kernel_pmap
, kva
);
6024 pt_entry_t tmplate
= *pte_p
;
6025 if ((tmplate
& ARM_PTE_APMASK
) == ARM_PTE_AP(ap
)) {
6028 tmplate
= (tmplate
& ~ARM_PTE_APMASK
) | ARM_PTE_AP(ap
);
6029 #if (__ARM_VMSA__ > 7)
6030 if (tmplate
& ARM_PTE_HINT_MASK
) {
6031 panic("%s: physical aperture PTE %p has hint bit set, va=%p, pte=0x%llx",
6032 __func__
, pte_p
, (void *)kva
, tmplate
);
6035 WRITE_PTE_STRONG(pte_p
, tmplate
);
6036 flush_mmu_tlb_region_asid_async(kva
, PAGE_SIZE
, kernel_pmap
);
6037 if (!flush_tlb_async
) {
6043 #endif /* defined(PVH_FLAG_EXEC) */
6051 int *num_alt_internal
,
6055 pv_entry_t
**pv_h
, **pve_pp
;
6058 ASSERT_PVH_LOCKED(pai
);
6059 pv_h
= pai_to_pvh(pai
);
6060 vm_offset_t pvh_flags
= pvh_get_flags(pv_h
);
6063 if (pvh_flags
& PVH_FLAG_LOCKDOWN
) {
6064 panic("%d is locked down (%#lx), cannot remove", pai
, pvh_flags
);
6068 if (pvh_test_type(pv_h
, PVH_TYPE_PTEP
)) {
6069 if (__builtin_expect((cpte
!= pvh_ptep(pv_h
)), 0)) {
6070 panic("%s: cpte=%p does not match pv_h=%p (%p), pai=0x%x\n", __func__
, cpte
, pv_h
, pvh_ptep(pv_h
), pai
);
6072 if (IS_ALTACCT_PAGE(pai
, PV_ENTRY_NULL
)) {
6073 assert(IS_INTERNAL_PAGE(pai
));
6075 (*num_alt_internal
)++;
6076 CLR_ALTACCT_PAGE(pai
, PV_ENTRY_NULL
);
6077 } else if (IS_INTERNAL_PAGE(pai
)) {
6078 if (IS_REUSABLE_PAGE(pai
)) {
6086 pvh_update_head(pv_h
, PV_ENTRY_NULL
, PVH_TYPE_NULL
);
6087 } else if (pvh_test_type(pv_h
, PVH_TYPE_PVEP
)) {
6089 pve_p
= pvh_list(pv_h
);
6091 while (pve_p
!= PV_ENTRY_NULL
&&
6092 (pve_get_ptep(pve_p
) != cpte
)) {
6093 pve_pp
= pve_link_field(pve_p
);
6094 pve_p
= PVE_NEXT_PTR(pve_next(pve_p
));
6097 if (__builtin_expect((pve_p
== PV_ENTRY_NULL
), 0)) {
6098 panic("%s: cpte=%p (pai=0x%x) not in pv_h=%p\n", __func__
, cpte
, pai
, pv_h
);
6102 if ((pmap
!= NULL
) && (kern_feature_override(KF_PMAPV_OVRD
) == FALSE
)) {
6103 pv_entry_t
*check_pve_p
= PVE_NEXT_PTR(pve_next(pve_p
));
6104 while (check_pve_p
!= PV_ENTRY_NULL
) {
6105 if (pve_get_ptep(check_pve_p
) == cpte
) {
6106 panic("%s: duplicate pve entry cpte=%p pmap=%p, pv_h=%p, pve_p=%p, pai=0x%x",
6107 __func__
, cpte
, pmap
, pv_h
, pve_p
, pai
);
6109 check_pve_p
= PVE_NEXT_PTR(pve_next(check_pve_p
));
6114 if (IS_ALTACCT_PAGE(pai
, pve_p
)) {
6115 assert(IS_INTERNAL_PAGE(pai
));
6117 (*num_alt_internal
)++;
6118 CLR_ALTACCT_PAGE(pai
, pve_p
);
6119 } else if (IS_INTERNAL_PAGE(pai
)) {
6120 if (IS_REUSABLE_PAGE(pai
)) {
6129 pvh_remove(pv_h
, pve_pp
, pve_p
);
6131 if (!pvh_test_type(pv_h
, PVH_TYPE_NULL
)) {
6132 pvh_set_flags(pv_h
, pvh_flags
);
6135 panic("%s: unexpected PV head %p, cpte=%p pmap=%p pv_h=%p pai=0x%x",
6136 __func__
, *pv_h
, cpte
, pmap
, pv_h
, pai
);
6139 #ifdef PVH_FLAG_EXEC
6140 if ((pvh_flags
& PVH_FLAG_EXEC
) && pvh_test_type(pv_h
, PVH_TYPE_NULL
)) {
6141 pmap_set_ptov_ap(pai
, AP_RWNA
, FALSE
);
6147 pmap_remove_range_options(
6149 vm_map_address_t va
,
6153 bool *need_strong_sync __unused
,
6157 int num_removed
, num_unwired
;
6158 int num_pte_changed
;
6161 int num_external
, num_internal
, num_reusable
;
6162 int num_alt_internal
;
6163 uint64_t num_compressed
, num_alt_compressed
;
6165 PMAP_ASSERT_LOCKED(pmap
);
6169 num_pte_changed
= 0;
6174 num_alt_internal
= 0;
6175 num_alt_compressed
= 0;
6177 for (cpte
= bpte
; cpte
< epte
;
6178 cpte
+= PAGE_SIZE
/ ARM_PGBYTES
, va
+= PAGE_SIZE
) {
6180 boolean_t managed
= FALSE
;
6185 if (pgtrace_enabled
) {
6186 pmap_pgtrace_remove_clone(pmap
, pte_to_pa(spte
), va
);
6191 if (pmap
!= kernel_pmap
&&
6192 (options
& PMAP_OPTIONS_REMOVE
) &&
6193 (ARM_PTE_IS_COMPRESSED(spte
, cpte
))) {
6195 * "pmap" must be locked at this point,
6196 * so this should not race with another
6197 * pmap_remove_range() or pmap_enter().
6200 /* one less "compressed"... */
6202 if (spte
& ARM_PTE_COMPRESSED_ALT
) {
6203 /* ... but it used to be "ALTACCT" */
6204 num_alt_compressed
++;
6208 WRITE_PTE_FAST(cpte
, ARM_PTE_TYPE_FAULT
);
6210 * "refcnt" also accounts for
6211 * our "compressed" markers,
6212 * so let's update it here.
6214 if (OSAddAtomic16(-1, (SInt16
*) &(ptep_get_ptd(cpte
)->ptd_info
[ARM_PT_DESC_INDEX(cpte
)].refcnt
)) <= 0) {
6215 panic("pmap_remove_range_options: over-release of ptdp %p for pte %p\n", ptep_get_ptd(cpte
), cpte
);
6220 * It may be possible for the pte to transition from managed
6221 * to unmanaged in this timeframe; for now, elide the assert.
6222 * We should break out as a consequence of checking pa_valid.
6224 //assert(!ARM_PTE_IS_COMPRESSED(spte));
6225 pa
= pte_to_pa(spte
);
6226 if (!pa_valid(pa
)) {
6227 #if XNU_MONITOR || HAS_MILD_DSB
6228 unsigned int cacheattr
= pmap_cache_attributes((ppnum_t
)atop(pa
));
6231 if (!pmap_ppl_disable
&& (cacheattr
& PP_ATTR_MONITOR
)) {
6232 panic("%s: attempt to remove mapping of PPL-protected I/O address 0x%llx", __func__
, (uint64_t)pa
);
6237 pai
= (int)pa_index(pa
);
6240 pa
= pte_to_pa(spte
);
6241 if (pai
== (int)pa_index(pa
)) {
6243 break; // Leave pai locked as we will unlock it after we free the PV entry
6248 if (ARM_PTE_IS_COMPRESSED(*cpte
, cpte
)) {
6250 * There used to be a valid mapping here but it
6251 * has already been removed when the page was
6252 * sent to the VM compressor, so nothing left to
6258 /* remove the translation, do not flush the TLB */
6259 if (*cpte
!= ARM_PTE_TYPE_FAULT
) {
6260 assertf(!ARM_PTE_IS_COMPRESSED(*cpte
, cpte
), "unexpected compressed pte %p (=0x%llx)", cpte
, (uint64_t)*cpte
);
6261 assertf((*cpte
& ARM_PTE_TYPE_VALID
) == ARM_PTE_TYPE
, "invalid pte %p (=0x%llx)", cpte
, (uint64_t)*cpte
);
6263 if (managed
&& (pmap
!= kernel_pmap
) && (ptep_get_va(cpte
) != va
)) {
6264 panic("pmap_remove_range_options(): cpte=%p ptd=%p pte=0x%llx va=0x%llx\n",
6265 cpte
, ptep_get_ptd(cpte
), (uint64_t)*cpte
, (uint64_t)va
);
6268 WRITE_PTE_FAST(cpte
, ARM_PTE_TYPE_FAULT
);
6272 if ((spte
!= ARM_PTE_TYPE_FAULT
) &&
6273 (pmap
!= kernel_pmap
)) {
6274 assertf(!ARM_PTE_IS_COMPRESSED(spte
, cpte
), "unexpected compressed pte %p (=0x%llx)", cpte
, (uint64_t)spte
);
6275 assertf((spte
& ARM_PTE_TYPE_VALID
) == ARM_PTE_TYPE
, "invalid pte %p (=0x%llx)", cpte
, (uint64_t)spte
);
6276 if (OSAddAtomic16(-1, (SInt16
*) &(ptep_get_ptd(cpte
)->ptd_info
[ARM_PT_DESC_INDEX(cpte
)].refcnt
)) <= 0) {
6277 panic("pmap_remove_range_options: over-release of ptdp %p for pte %p\n", ptep_get_ptd(cpte
), cpte
);
6284 if (pte_is_wired(spte
)) {
6285 pte_set_wired(cpte
, 0);
6289 * if not managed, we're done
6295 * find and remove the mapping from the chain for this
6299 pmap_remove_pv(pmap
, cpte
, pai
, &num_internal
, &num_alt_internal
, &num_reusable
, &num_external
);
6308 OSAddAtomic(-num_removed
, (SInt32
*) &pmap
->stats
.resident_count
);
6309 pmap_ledger_debit(pmap
, task_ledgers
.phys_mem
, machine_ptob(num_removed
));
6311 if (pmap
!= kernel_pmap
) {
6312 /* sanity checks... */
6314 if (pmap
->stats
.internal
< num_internal
) {
6315 if ((!pmap_stats_assert
||
6316 !pmap
->pmap_stats_assert
)) {
6317 printf("%d[%s] pmap_remove_range_options(%p,0x%llx,%p,%p,0x%x): num_internal=%d num_removed=%d num_unwired=%d num_external=%d num_reusable=%d num_compressed=%lld num_alt_internal=%d num_alt_compressed=%lld num_pte_changed=%d stats.internal=%d stats.reusable=%d\n",
6319 pmap
->pmap_procname
,
6334 pmap
->stats
.internal
,
6335 pmap
->stats
.reusable
);
6337 panic("%d[%s] pmap_remove_range_options(%p,0x%llx,%p,%p,0x%x): num_internal=%d num_removed=%d num_unwired=%d num_external=%d num_reusable=%d num_compressed=%lld num_alt_internal=%d num_alt_compressed=%lld num_pte_changed=%d stats.internal=%d stats.reusable=%d",
6339 pmap
->pmap_procname
,
6354 pmap
->stats
.internal
,
6355 pmap
->stats
.reusable
);
6358 #endif /* MACH_ASSERT */
6359 PMAP_STATS_ASSERTF(pmap
->stats
.external
>= num_external
,
6361 "pmap=%p num_external=%d stats.external=%d",
6362 pmap
, num_external
, pmap
->stats
.external
);
6363 PMAP_STATS_ASSERTF(pmap
->stats
.internal
>= num_internal
,
6365 "pmap=%p num_internal=%d stats.internal=%d num_reusable=%d stats.reusable=%d",
6367 num_internal
, pmap
->stats
.internal
,
6368 num_reusable
, pmap
->stats
.reusable
);
6369 PMAP_STATS_ASSERTF(pmap
->stats
.reusable
>= num_reusable
,
6371 "pmap=%p num_internal=%d stats.internal=%d num_reusable=%d stats.reusable=%d",
6373 num_internal
, pmap
->stats
.internal
,
6374 num_reusable
, pmap
->stats
.reusable
);
6375 PMAP_STATS_ASSERTF(pmap
->stats
.compressed
>= num_compressed
,
6377 "pmap=%p num_compressed=%lld num_alt_compressed=%lld stats.compressed=%lld",
6378 pmap
, num_compressed
, num_alt_compressed
,
6379 pmap
->stats
.compressed
);
6381 /* update pmap stats... */
6382 OSAddAtomic(-num_unwired
, (SInt32
*) &pmap
->stats
.wired_count
);
6384 OSAddAtomic(-num_external
, &pmap
->stats
.external
);
6387 OSAddAtomic(-num_internal
, &pmap
->stats
.internal
);
6390 OSAddAtomic(-num_reusable
, &pmap
->stats
.reusable
);
6392 if (num_compressed
) {
6393 OSAddAtomic64(-num_compressed
, &pmap
->stats
.compressed
);
6395 /* ... and ledgers */
6396 pmap_ledger_debit(pmap
, task_ledgers
.wired_mem
, machine_ptob(num_unwired
));
6397 pmap_ledger_debit(pmap
, task_ledgers
.internal
, machine_ptob(num_internal
));
6398 pmap_ledger_debit(pmap
, task_ledgers
.alternate_accounting
, machine_ptob(num_alt_internal
));
6399 pmap_ledger_debit(pmap
, task_ledgers
.alternate_accounting_compressed
, machine_ptob(num_alt_compressed
));
6400 pmap_ledger_debit(pmap
, task_ledgers
.internal_compressed
, machine_ptob(num_compressed
));
6401 /* make needed adjustments to phys_footprint */
6402 pmap_ledger_debit(pmap
, task_ledgers
.phys_footprint
,
6403 machine_ptob((num_internal
-
6406 num_alt_compressed
)));
6409 /* flush the ptable entries we have written */
6410 if (num_pte_changed
> 0) {
6411 FLUSH_PTE_RANGE_STRONG(bpte
, epte
);
6414 return num_pte_changed
;
6419 * Remove the given range of addresses
6420 * from the specified map.
6422 * It is assumed that the start and end are properly
6423 * rounded to the hardware page size.
6428 vm_map_address_t start
,
6429 vm_map_address_t end
)
6431 pmap_remove_options(pmap
, start
, end
, PMAP_OPTIONS_REMOVE
);
6434 MARK_AS_PMAP_TEXT
static int
6435 pmap_remove_options_internal(
6437 vm_map_address_t start
,
6438 vm_map_address_t end
,
6441 int remove_count
= 0;
6442 pt_entry_t
*bpte
, *epte
;
6445 uint32_t rmv_spte
= 0;
6446 bool need_strong_sync
= false;
6447 bool flush_tte
= false;
6449 if (__improbable(end
< start
)) {
6450 panic("%s: invalid address range %p, %p", __func__
, (void*)start
, (void*)end
);
6453 VALIDATE_PMAP(pmap
);
6455 __unused
const pt_attr_t
* const pt_attr
= pmap_get_pt_attr(pmap
);
6459 tte_p
= pmap_tte(pmap
, start
);
6461 if (tte_p
== (tt_entry_t
*) NULL
) {
6465 if ((*tte_p
& ARM_TTE_TYPE_MASK
) == ARM_TTE_TYPE_TABLE
) {
6466 pte_p
= (pt_entry_t
*) ttetokv(*tte_p
);
6467 bpte
= &pte_p
[ptenum(start
)];
6468 epte
= bpte
+ ((end
- start
) >> pt_attr_leaf_shift(pt_attr
));
6470 remove_count
+= pmap_remove_range_options(pmap
, start
, bpte
, epte
,
6471 &rmv_spte
, &need_strong_sync
, options
);
6473 if (rmv_spte
&& (ptep_get_ptd(pte_p
)->ptd_info
[ARM_PT_DESC_INDEX(pte_p
)].refcnt
== 0) &&
6474 (pmap
!= kernel_pmap
) && (pmap
->nested
== FALSE
)) {
6475 pmap_tte_deallocate(pmap
, tte_p
, pt_attr_twig_level(pt_attr
));
6483 if (remove_count
> 0) {
6484 PMAP_UPDATE_TLBS(pmap
, start
, end
, need_strong_sync
);
6485 } else if (flush_tte
> 0) {
6486 pmap_get_pt_ops(pmap
)->flush_tlb_tte_async(start
, pmap
);
6489 return remove_count
;
6493 pmap_remove_options(
6495 vm_map_address_t start
,
6496 vm_map_address_t end
,
6499 int remove_count
= 0;
6500 vm_map_address_t va
;
6502 if (pmap
== PMAP_NULL
) {
6506 __unused
const pt_attr_t
* const pt_attr
= pmap_get_pt_attr(pmap
);
6508 PMAP_TRACE(2, PMAP_CODE(PMAP__REMOVE
) | DBG_FUNC_START
,
6509 VM_KERNEL_ADDRHIDE(pmap
), VM_KERNEL_ADDRHIDE(start
),
6510 VM_KERNEL_ADDRHIDE(end
));
6513 if ((start
| end
) & PAGE_MASK
) {
6514 panic("pmap_remove_options() pmap %p start 0x%llx end 0x%llx\n",
6515 pmap
, (uint64_t)start
, (uint64_t)end
);
6517 if ((end
< start
) || (start
< pmap
->min
) || (end
> pmap
->max
)) {
6518 panic("pmap_remove_options(): invalid address range, pmap=%p, start=0x%llx, end=0x%llx\n",
6519 pmap
, (uint64_t)start
, (uint64_t)end
);
6524 * Invalidate the translation buffer first
6530 l
= ((va
+ pt_attr_twig_size(pt_attr
)) & ~pt_attr_twig_offmask(pt_attr
));
6536 remove_count
+= pmap_remove_options_ppl(pmap
, va
, l
, options
);
6538 pmap_ledger_check_balance(pmap
);
6540 remove_count
+= pmap_remove_options_internal(pmap
, va
, l
, options
);
6546 PMAP_TRACE(2, PMAP_CODE(PMAP__REMOVE
) | DBG_FUNC_END
);
6551 * Remove phys addr if mapped in specified map
6554 pmap_remove_some_phys(
6555 __unused pmap_t map
,
6556 __unused ppnum_t pn
)
6558 /* Implement to support working set code */
6564 #if !__ARM_USER_PROTECT__
6570 #if __ARM_USER_PROTECT__
6571 if (pmap
->tte_index_max
== NTTES
) {
6572 thread
->machine
.uptw_ttc
= 2;
6574 thread
->machine
.uptw_ttc
= 1;
6576 thread
->machine
.uptw_ttb
= ((unsigned int) pmap
->ttep
) | TTBR_SETUP
;
6577 thread
->machine
.asid
= pmap
->hw_asid
;
6582 pmap_flush_core_tlb_asid(pmap_t pmap
)
6584 #if (__ARM_VMSA__ == 7)
6585 flush_core_tlb_asid(pmap
->hw_asid
);
6587 flush_core_tlb_asid(((uint64_t) pmap
->hw_asid
) << TLBI_ASID_SHIFT
);
6591 MARK_AS_PMAP_TEXT
static void
6592 pmap_switch_internal(
6595 VALIDATE_PMAP(pmap
);
6596 pmap_cpu_data_t
*cpu_data_ptr
= pmap_get_cpu_data();
6597 uint16_t asid_index
= pmap
->hw_asid
;
6598 boolean_t do_asid_flush
= FALSE
;
6600 #if __ARM_KERNEL_PROTECT__
6604 #if (__ARM_VMSA__ > 7)
6605 pmap_t last_nested_pmap
= cpu_data_ptr
->cpu_nested_pmap
;
6608 #if MAX_ASID > MAX_HW_ASID
6609 if (asid_index
> 0) {
6612 assert(asid_index
< (sizeof(cpu_data_ptr
->cpu_asid_high_bits
) / sizeof(*cpu_data_ptr
->cpu_asid_high_bits
)));
6614 /* Extract the "virtual" bits of the ASIDs (which could cause us to alias). */
6615 uint8_t asid_high_bits
= pmap
->sw_asid
;
6616 uint8_t last_asid_high_bits
= cpu_data_ptr
->cpu_asid_high_bits
[asid_index
];
6618 if (asid_high_bits
!= last_asid_high_bits
) {
6620 * If the virtual ASID of the new pmap does not match the virtual ASID
6621 * last seen on this CPU for the physical ASID (that was a mouthful),
6622 * then this switch runs the risk of aliasing. We need to flush the
6623 * TLB for this phyiscal ASID in this case.
6625 cpu_data_ptr
->cpu_asid_high_bits
[asid_index
] = asid_high_bits
;
6626 do_asid_flush
= TRUE
;
6629 #endif /* MAX_ASID > MAX_HW_ASID */
6631 pmap_switch_user_ttb_internal(pmap
);
6633 #if (__ARM_VMSA__ > 7)
6634 /* If we're switching to a different nested pmap (i.e. shared region), we'll need
6635 * to flush the userspace mappings for that region. Those mappings are global
6636 * and will not be protected by the ASID. It should also be cheaper to flush the
6637 * entire local TLB rather than to do a broadcast MMU flush by VA region. */
6638 if ((pmap
!= kernel_pmap
) && (last_nested_pmap
!= NULL
) && (pmap
->nested_pmap
!= last_nested_pmap
)) {
6642 if (do_asid_flush
) {
6643 pmap_flush_core_tlb_asid(pmap
);
6644 #if DEVELOPMENT || DEBUG
6645 os_atomic_inc(&pmap_asid_flushes
, relaxed
);
6654 PMAP_TRACE(1, PMAP_CODE(PMAP__SWITCH
) | DBG_FUNC_START
, VM_KERNEL_ADDRHIDE(pmap
), PMAP_VASID(pmap
), pmap
->hw_asid
);
6656 pmap_switch_ppl(pmap
);
6658 pmap_switch_internal(pmap
);
6660 PMAP_TRACE(1, PMAP_CODE(PMAP__SWITCH
) | DBG_FUNC_END
);
6668 pmap_page_protect_options(ppnum
, prot
, 0, NULL
);
6672 * Routine: pmap_page_protect_options
6675 * Lower the permission for all mappings to a given
6678 MARK_AS_PMAP_TEXT
static void
6679 pmap_page_protect_options_internal(
6682 unsigned int options
)
6684 pmap_paddr_t phys
= ptoa(ppnum
);
6686 pv_entry_t
**pve_pp
;
6691 pv_entry_t
*new_pve_p
;
6692 pt_entry_t
*new_pte_p
;
6693 vm_offset_t pvh_flags
;
6697 boolean_t tlb_flush_needed
= FALSE
;
6698 unsigned int pvh_cnt
= 0;
6700 assert(ppnum
!= vm_page_fictitious_addr
);
6702 /* Only work with managed pages. */
6703 if (!pa_valid(phys
)) {
6708 * Determine the new protection.
6712 return; /* nothing to do */
6714 case VM_PROT_READ
| VM_PROT_EXECUTE
:
6722 pai
= (int)pa_index(phys
);
6724 pv_h
= pai_to_pvh(pai
);
6725 pvh_flags
= pvh_get_flags(pv_h
);
6728 if (remove
&& (pvh_flags
& PVH_FLAG_LOCKDOWN
)) {
6729 panic("%d is locked down (%#llx), cannot remove", pai
, pvh_get_flags(pv_h
));
6733 pte_p
= PT_ENTRY_NULL
;
6734 pve_p
= PV_ENTRY_NULL
;
6736 pveh_p
= PV_ENTRY_NULL
;
6737 pvet_p
= PV_ENTRY_NULL
;
6738 new_pve_p
= PV_ENTRY_NULL
;
6739 new_pte_p
= PT_ENTRY_NULL
;
6740 if (pvh_test_type(pv_h
, PVH_TYPE_PTEP
)) {
6741 pte_p
= pvh_ptep(pv_h
);
6742 } else if (pvh_test_type(pv_h
, PVH_TYPE_PVEP
)) {
6743 pve_p
= pvh_list(pv_h
);
6747 while ((pve_p
!= PV_ENTRY_NULL
) || (pte_p
!= PT_ENTRY_NULL
)) {
6748 vm_map_address_t va
;
6751 boolean_t update
= FALSE
;
6753 if (pve_p
!= PV_ENTRY_NULL
) {
6754 pte_p
= pve_get_ptep(pve_p
);
6757 #ifdef PVH_FLAG_IOMMU
6758 if ((vm_offset_t
)pte_p
& PVH_FLAG_IOMMU
) {
6760 if (pvh_flags
& PVH_FLAG_LOCKDOWN
) {
6761 panic("pmap_page_protect: ppnum 0x%x locked down, cannot be owned by iommu 0x%llx, pve_p=%p",
6762 ppnum
, (uint64_t)pte_p
& ~PVH_FLAG_IOMMU
, pve_p
);
6766 if (options
& PMAP_OPTIONS_COMPRESSOR
) {
6767 panic("pmap_page_protect: attempt to compress ppnum 0x%x owned by iommu 0x%llx, pve_p=%p",
6768 ppnum
, (uint64_t)pte_p
& ~PVH_FLAG_IOMMU
, pve_p
);
6770 if (pve_p
!= PV_ENTRY_NULL
) {
6771 pv_entry_t
*temp_pve_p
= PVE_NEXT_PTR(pve_next(pve_p
));
6772 pvh_remove(pv_h
, pve_pp
, pve_p
);
6773 pveh_p
= pvh_list(pv_h
);
6774 pve_next(pve_p
) = new_pve_p
;
6783 goto protect_skip_pve
;
6786 pmap
= ptep_get_pmap(pte_p
);
6787 va
= ptep_get_va(pte_p
);
6789 if (pte_p
== PT_ENTRY_NULL
) {
6790 panic("pmap_page_protect: pmap=%p prot=%d options=%u, pv_h=%p, pveh_p=%p, pve_p=%p, va=0x%llx ppnum: 0x%x\n",
6791 pmap
, prot
, options
, pv_h
, pveh_p
, pve_p
, (uint64_t)va
, ppnum
);
6792 } else if ((pmap
== NULL
) || (atop(pte_to_pa(*pte_p
)) != ppnum
)) {
6794 if (kern_feature_override(KF_PMAPV_OVRD
) == FALSE
) {
6795 pv_entry_t
*check_pve_p
= pveh_p
;
6796 while (check_pve_p
!= PV_ENTRY_NULL
) {
6797 if ((check_pve_p
!= pve_p
) && (pve_get_ptep(check_pve_p
) == pte_p
)) {
6798 panic("pmap_page_protect: duplicate pve entry pte_p=%p pmap=%p prot=%d options=%u, pv_h=%p, pveh_p=%p, pve_p=%p, pte=0x%llx, va=0x%llx ppnum: 0x%x\n",
6799 pte_p
, pmap
, prot
, options
, pv_h
, pveh_p
, pve_p
, (uint64_t)*pte_p
, (uint64_t)va
, ppnum
);
6801 check_pve_p
= PVE_NEXT_PTR(pve_next(check_pve_p
));
6805 panic("pmap_page_protect: bad pve entry pte_p=%p pmap=%p prot=%d options=%u, pv_h=%p, pveh_p=%p, pve_p=%p, pte=0x%llx, va=0x%llx ppnum: 0x%x\n",
6806 pte_p
, pmap
, prot
, options
, pv_h
, pveh_p
, pve_p
, (uint64_t)*pte_p
, (uint64_t)va
, ppnum
);
6809 #if DEVELOPMENT || DEBUG
6810 if ((prot
& VM_PROT_EXECUTE
) || !nx_enabled
|| !pmap
->nx_enabled
)
6812 if ((prot
& VM_PROT_EXECUTE
))
6814 { set_NX
= FALSE
;} else {
6818 /* Remove the mapping if new protection is NONE */
6820 boolean_t is_altacct
= FALSE
;
6822 if (IS_ALTACCT_PAGE(pai
, pve_p
)) {
6828 if (pte_is_wired(*pte_p
)) {
6829 pte_set_wired(pte_p
, 0);
6830 if (pmap
!= kernel_pmap
) {
6831 pmap_ledger_debit(pmap
, task_ledgers
.wired_mem
, PAGE_SIZE
);
6832 OSAddAtomic(-1, (SInt32
*) &pmap
->stats
.wired_count
);
6836 if (*pte_p
!= ARM_PTE_TYPE_FAULT
&&
6837 pmap
!= kernel_pmap
&&
6838 (options
& PMAP_OPTIONS_COMPRESSOR
) &&
6839 IS_INTERNAL_PAGE(pai
)) {
6840 assert(!ARM_PTE_IS_COMPRESSED(*pte_p
, pte_p
));
6841 /* mark this PTE as having been "compressed" */
6842 tmplate
= ARM_PTE_COMPRESSED
;
6844 tmplate
|= ARM_PTE_COMPRESSED_ALT
;
6848 tmplate
= ARM_PTE_TYPE_FAULT
;
6851 if ((*pte_p
!= ARM_PTE_TYPE_FAULT
) &&
6852 tmplate
== ARM_PTE_TYPE_FAULT
&&
6853 (pmap
!= kernel_pmap
)) {
6854 if (OSAddAtomic16(-1, (SInt16
*) &(ptep_get_ptd(pte_p
)->ptd_info
[ARM_PT_DESC_INDEX(pte_p
)].refcnt
)) <= 0) {
6855 panic("pmap_page_protect_options(): over-release of ptdp %p for pte %p\n", ptep_get_ptd(pte_p
), pte_p
);
6859 if (*pte_p
!= tmplate
) {
6860 WRITE_PTE_STRONG(pte_p
, tmplate
);
6864 pmap_ledger_debit(pmap
, task_ledgers
.phys_mem
, PAGE_SIZE
);
6865 OSAddAtomic(-1, (SInt32
*) &pmap
->stats
.resident_count
);
6869 * We only ever compress internal pages.
6871 if (options
& PMAP_OPTIONS_COMPRESSOR
) {
6872 assert(IS_INTERNAL_PAGE(pai
));
6876 if (pmap
!= kernel_pmap
) {
6877 if (IS_REUSABLE_PAGE(pai
) &&
6878 IS_INTERNAL_PAGE(pai
) &&
6880 PMAP_STATS_ASSERTF(pmap
->stats
.reusable
> 0, pmap
, "stats.reusable %d", pmap
->stats
.reusable
);
6881 OSAddAtomic(-1, &pmap
->stats
.reusable
);
6882 } else if (IS_INTERNAL_PAGE(pai
)) {
6883 PMAP_STATS_ASSERTF(pmap
->stats
.internal
> 0, pmap
, "stats.internal %d", pmap
->stats
.internal
);
6884 OSAddAtomic(-1, &pmap
->stats
.internal
);
6886 PMAP_STATS_ASSERTF(pmap
->stats
.external
> 0, pmap
, "stats.external %d", pmap
->stats
.external
);
6887 OSAddAtomic(-1, &pmap
->stats
.external
);
6889 if ((options
& PMAP_OPTIONS_COMPRESSOR
) &&
6890 IS_INTERNAL_PAGE(pai
)) {
6891 /* adjust "compressed" stats */
6892 OSAddAtomic64(+1, &pmap
->stats
.compressed
);
6893 PMAP_STATS_PEAK(pmap
->stats
.compressed
);
6894 pmap
->stats
.compressed_lifetime
++;
6897 if (IS_ALTACCT_PAGE(pai
, pve_p
)) {
6898 assert(IS_INTERNAL_PAGE(pai
));
6899 pmap_ledger_debit(pmap
, task_ledgers
.internal
, PAGE_SIZE
);
6900 pmap_ledger_debit(pmap
, task_ledgers
.alternate_accounting
, PAGE_SIZE
);
6901 if (options
& PMAP_OPTIONS_COMPRESSOR
) {
6902 pmap_ledger_credit(pmap
, task_ledgers
.internal_compressed
, PAGE_SIZE
);
6903 pmap_ledger_credit(pmap
, task_ledgers
.alternate_accounting_compressed
, PAGE_SIZE
);
6907 * Cleanup our marker before
6908 * we free this pv_entry.
6910 CLR_ALTACCT_PAGE(pai
, pve_p
);
6911 } else if (IS_REUSABLE_PAGE(pai
)) {
6912 assert(IS_INTERNAL_PAGE(pai
));
6913 if (options
& PMAP_OPTIONS_COMPRESSOR
) {
6914 pmap_ledger_credit(pmap
, task_ledgers
.internal_compressed
, PAGE_SIZE
);
6915 /* was not in footprint, but is now */
6916 pmap_ledger_credit(pmap
, task_ledgers
.phys_footprint
, PAGE_SIZE
);
6918 } else if (IS_INTERNAL_PAGE(pai
)) {
6919 pmap_ledger_debit(pmap
, task_ledgers
.internal
, PAGE_SIZE
);
6922 * Update all stats related to physical footprint, which only
6923 * deals with internal pages.
6925 if (options
& PMAP_OPTIONS_COMPRESSOR
) {
6927 * This removal is only being done so we can send this page to
6928 * the compressor; therefore it mustn't affect total task footprint.
6930 pmap_ledger_credit(pmap
, task_ledgers
.internal_compressed
, PAGE_SIZE
);
6933 * This internal page isn't going to the compressor, so adjust stats to keep
6934 * phys_footprint up to date.
6936 pmap_ledger_debit(pmap
, task_ledgers
.phys_footprint
, PAGE_SIZE
);
6939 /* external page: no impact on ledgers */
6943 if (pve_p
!= PV_ENTRY_NULL
) {
6944 assert(pve_next(pve_p
) == PVE_NEXT_PTR(pve_next(pve_p
)));
6948 const pt_attr_t
*const pt_attr
= pmap_get_pt_attr(pmap
);
6952 if (pmap
== kernel_pmap
) {
6953 tmplate
= ((spte
& ~ARM_PTE_APMASK
) | ARM_PTE_AP(AP_RONA
));
6955 tmplate
= ((spte
& ~ARM_PTE_APMASK
) | pt_attr_leaf_ro(pt_attr
));
6958 pte_set_was_writeable(tmplate
, false);
6960 * While the naive implementation of this would serve to add execute
6961 * permission, this is not how the VM uses this interface, or how
6962 * x86_64 implements it. So ignore requests to add execute permissions.
6965 tmplate
|= pt_attr_leaf_xn(pt_attr
);
6968 #if __APRR_SUPPORTED__
6969 if (__improbable(is_pte_xprr_protected(spte
))) {
6970 panic("pmap_page_protect: modifying an xPRR mapping pte_p=%p pmap=%p prot=%d options=%u, pv_h=%p, pveh_p=%p, pve_p=%p, pte=0x%llx, tmplate=0x%llx, va=0x%llx ppnum: 0x%x",
6971 pte_p
, pmap
, prot
, options
, pv_h
, pveh_p
, pve_p
, (uint64_t)spte
, (uint64_t)tmplate
, (uint64_t)va
, ppnum
);
6974 if (__improbable(is_pte_xprr_protected(tmplate
))) {
6975 panic("pmap_page_protect: creating an xPRR mapping pte_p=%p pmap=%p prot=%d options=%u, pv_h=%p, pveh_p=%p, pve_p=%p, pte=0x%llx, tmplate=0x%llx, va=0x%llx ppnum: 0x%x",
6976 pte_p
, pmap
, prot
, options
, pv_h
, pveh_p
, pve_p
, (uint64_t)spte
, (uint64_t)tmplate
, (uint64_t)va
, ppnum
);
6978 #endif /* __APRR_SUPPORTED__*/
6980 if (*pte_p
!= ARM_PTE_TYPE_FAULT
&&
6981 !ARM_PTE_IS_COMPRESSED(*pte_p
, pte_p
) &&
6982 *pte_p
!= tmplate
) {
6983 WRITE_PTE_STRONG(pte_p
, tmplate
);
6988 /* Invalidate TLBs for all CPUs using it */
6990 tlb_flush_needed
= TRUE
;
6991 pmap_get_pt_ops(pmap
)->flush_tlb_region_async(va
, PAGE_SIZE
, pmap
);
6994 #ifdef PVH_FLAG_IOMMU
6997 pte_p
= PT_ENTRY_NULL
;
6999 if (pve_p
!= PV_ENTRY_NULL
) {
7001 assert(pve_next(pve_p
) == PVE_NEXT_PTR(pve_next(pve_p
)));
7003 pve_pp
= pve_link_field(pve_p
);
7004 pve_p
= PVE_NEXT_PTR(pve_next(pve_p
));
7008 #ifdef PVH_FLAG_EXEC
7009 if (remove
&& (pvh_get_flags(pv_h
) & PVH_FLAG_EXEC
)) {
7010 pmap_set_ptov_ap(pai
, AP_RWNA
, tlb_flush_needed
);
7013 if (tlb_flush_needed
) {
7017 /* if we removed a bunch of entries, take care of them now */
7019 if (new_pve_p
!= PV_ENTRY_NULL
) {
7020 pvh_update_head(pv_h
, new_pve_p
, PVH_TYPE_PVEP
);
7021 pvh_set_flags(pv_h
, pvh_flags
);
7022 } else if (new_pte_p
!= PT_ENTRY_NULL
) {
7023 pvh_update_head(pv_h
, new_pte_p
, PVH_TYPE_PTEP
);
7024 pvh_set_flags(pv_h
, pvh_flags
);
7026 pvh_update_head(pv_h
, PV_ENTRY_NULL
, PVH_TYPE_NULL
);
7032 if (remove
&& (pvet_p
!= PV_ENTRY_NULL
)) {
7033 pv_list_free(pveh_p
, pvet_p
, pvh_cnt
);
7038 pmap_page_protect_options(
7041 unsigned int options
,
7044 pmap_paddr_t phys
= ptoa(ppnum
);
7046 assert(ppnum
!= vm_page_fictitious_addr
);
7048 /* Only work with managed pages. */
7049 if (!pa_valid(phys
)) {
7054 * Determine the new protection.
7056 if (prot
== VM_PROT_ALL
) {
7057 return; /* nothing to do */
7060 PMAP_TRACE(2, PMAP_CODE(PMAP__PAGE_PROTECT
) | DBG_FUNC_START
, ppnum
, prot
);
7063 pmap_page_protect_options_ppl(ppnum
, prot
, options
);
7065 pmap_page_protect_options_internal(ppnum
, prot
, options
);
7068 PMAP_TRACE(2, PMAP_CODE(PMAP__PAGE_PROTECT
) | DBG_FUNC_END
);
7072 * Indicates if the pmap layer enforces some additional restrictions on the
7073 * given set of protections.
7076 pmap_has_prot_policy(__unused vm_prot_t prot
)
7082 * Set the physical protection on the
7083 * specified range of this map as requested.
7084 * VERY IMPORTANT: Will not increase permissions.
7085 * VERY IMPORTANT: Only pmap_enter() is allowed to grant permissions.
7094 pmap_protect_options(pmap
, b
, e
, prot
, 0, NULL
);
7097 MARK_AS_PMAP_TEXT
static void
7098 pmap_protect_options_internal(
7100 vm_map_address_t start
,
7101 vm_map_address_t end
,
7103 unsigned int options
,
7104 __unused
void *args
)
7106 const pt_attr_t
*const pt_attr
= pmap_get_pt_attr(pmap
);
7108 pt_entry_t
*bpte_p
, *epte_p
;
7110 boolean_t set_NX
= TRUE
;
7111 #if (__ARM_VMSA__ > 7)
7112 boolean_t set_XO
= FALSE
;
7114 boolean_t should_have_removed
= FALSE
;
7115 bool need_strong_sync
= false;
7117 if (__improbable(end
< start
)) {
7118 panic("%s called with bogus range: %p, %p", __func__
, (void*)start
, (void*)end
);
7121 #if DEVELOPMENT || DEBUG
7122 if (options
& PMAP_OPTIONS_PROTECT_IMMEDIATE
) {
7123 if ((prot
& VM_PROT_ALL
) == VM_PROT_NONE
) {
7124 should_have_removed
= TRUE
;
7129 /* Determine the new protection. */
7131 #if (__ARM_VMSA__ > 7)
7132 case VM_PROT_EXECUTE
:
7137 case VM_PROT_READ
| VM_PROT_EXECUTE
:
7139 case VM_PROT_READ
| VM_PROT_WRITE
:
7141 return; /* nothing to do */
7143 should_have_removed
= TRUE
;
7147 if (should_have_removed
) {
7148 panic("%s: should have been a remove operation, "
7149 "pmap=%p, start=%p, end=%p, prot=%#x, options=%#x, args=%p",
7151 pmap
, (void *)start
, (void *)end
, prot
, options
, args
);
7154 #if DEVELOPMENT || DEBUG
7155 if ((prot
& VM_PROT_EXECUTE
) || !nx_enabled
|| !pmap
->nx_enabled
)
7157 if ((prot
& VM_PROT_EXECUTE
))
7165 VALIDATE_PMAP(pmap
);
7167 tte_p
= pmap_tte(pmap
, start
);
7169 if ((tte_p
!= (tt_entry_t
*) NULL
) && (*tte_p
& ARM_TTE_TYPE_MASK
) == ARM_TTE_TYPE_TABLE
) {
7170 bpte_p
= (pt_entry_t
*) ttetokv(*tte_p
);
7171 bpte_p
= &bpte_p
[ptenum(start
)];
7172 epte_p
= bpte_p
+ arm_atop(end
- start
);
7175 for (pte_p
= bpte_p
;
7177 pte_p
+= PAGE_SIZE
/ ARM_PGBYTES
) {
7179 #if DEVELOPMENT || DEBUG
7180 boolean_t force_write
= FALSE
;
7185 if ((spte
== ARM_PTE_TYPE_FAULT
) ||
7186 ARM_PTE_IS_COMPRESSED(spte
, pte_p
)) {
7192 boolean_t managed
= FALSE
;
7196 * It may be possible for the pte to transition from managed
7197 * to unmanaged in this timeframe; for now, elide the assert.
7198 * We should break out as a consequence of checking pa_valid.
7200 // assert(!ARM_PTE_IS_COMPRESSED(spte));
7201 pa
= pte_to_pa(spte
);
7202 if (!pa_valid(pa
)) {
7205 pai
= (int)pa_index(pa
);
7208 pa
= pte_to_pa(spte
);
7209 if (pai
== (int)pa_index(pa
)) {
7211 break; // Leave the PVH locked as we will unlock it after we free the PTE
7216 if ((spte
== ARM_PTE_TYPE_FAULT
) ||
7217 ARM_PTE_IS_COMPRESSED(spte
, pte_p
)) {
7223 if (pmap
== kernel_pmap
) {
7224 #if DEVELOPMENT || DEBUG
7225 if ((options
& PMAP_OPTIONS_PROTECT_IMMEDIATE
) && (prot
& VM_PROT_WRITE
)) {
7227 tmplate
= ((spte
& ~ARM_PTE_APMASK
) | ARM_PTE_AP(AP_RWNA
));
7231 tmplate
= ((spte
& ~ARM_PTE_APMASK
) | ARM_PTE_AP(AP_RONA
));
7234 #if DEVELOPMENT || DEBUG
7235 if ((options
& PMAP_OPTIONS_PROTECT_IMMEDIATE
) && (prot
& VM_PROT_WRITE
)) {
7237 tmplate
= ((spte
& ~ARM_PTE_APMASK
) | pt_attr_leaf_rw(pt_attr
));
7241 tmplate
= ((spte
& ~ARM_PTE_APMASK
) | pt_attr_leaf_ro(pt_attr
));
7246 * XXX Removing "NX" would
7247 * grant "execute" access
7248 * immediately, bypassing any
7249 * checks VM might want to do
7250 * in its soft fault path.
7251 * pmap_protect() and co. are
7252 * not allowed to increase
7253 * access permissions.
7256 tmplate
|= pt_attr_leaf_xn(pt_attr
);
7258 #if (__ARM_VMSA__ > 7)
7259 if (pmap
== kernel_pmap
) {
7260 /* do NOT clear "PNX"! */
7261 tmplate
|= ARM_PTE_NX
;
7263 /* do NOT clear "NX"! */
7264 tmplate
|= pt_attr_leaf_x(pt_attr
);
7266 tmplate
&= ~ARM_PTE_APMASK
;
7267 tmplate
|= pt_attr_leaf_rona(pt_attr
);
7273 #if DEVELOPMENT || DEBUG
7276 * TODO: Run CS/Monitor checks here.
7280 * We are marking the page as writable,
7281 * so we consider it to be modified and
7284 pa_set_bits(pa
, PP_ATTR_REFERENCED
| PP_ATTR_MODIFIED
);
7285 tmplate
|= ARM_PTE_AF
;
7287 if (IS_REFFAULT_PAGE(pai
)) {
7288 CLR_REFFAULT_PAGE(pai
);
7291 if (IS_MODFAULT_PAGE(pai
)) {
7292 CLR_MODFAULT_PAGE(pai
);
7295 } else if (options
& PMAP_OPTIONS_PROTECT_IMMEDIATE
) {
7297 * An immediate request for anything other than
7298 * write should still mark the page as
7299 * referenced if managed.
7302 pa_set_bits(pa
, PP_ATTR_REFERENCED
);
7303 tmplate
|= ARM_PTE_AF
;
7305 if (IS_REFFAULT_PAGE(pai
)) {
7306 CLR_REFFAULT_PAGE(pai
);
7312 /* We do not expect to write fast fault the entry. */
7313 pte_set_was_writeable(tmplate
, false);
7315 #if __APRR_SUPPORTED__
7316 if (__improbable(is_pte_xprr_protected(spte
) && (pte_to_xprr_perm(spte
) != XPRR_USER_JIT_PERM
)
7317 && (pte_to_xprr_perm(spte
) != XPRR_USER_XO_PERM
))) {
7318 /* Only test for PPL protection here, User-JIT mappings may be mutated by this function. */
7319 panic("%s: modifying a PPL mapping pte_p=%p pmap=%p prot=%d options=%u, pte=0x%llx, tmplate=0x%llx",
7320 __func__
, pte_p
, pmap
, prot
, options
, (uint64_t)spte
, (uint64_t)tmplate
);
7323 if (__improbable(is_pte_xprr_protected(tmplate
) && (pte_to_xprr_perm(tmplate
) != XPRR_USER_XO_PERM
))) {
7324 panic("%s: creating an xPRR mapping pte_p=%p pmap=%p prot=%d options=%u, pte=0x%llx, tmplate=0x%llx",
7325 __func__
, pte_p
, pmap
, prot
, options
, (uint64_t)spte
, (uint64_t)tmplate
);
7327 #endif /* __APRR_SUPPORTED__*/
7328 WRITE_PTE_FAST(pte_p
, tmplate
);
7331 ASSERT_PVH_LOCKED(pai
);
7335 FLUSH_PTE_RANGE_STRONG(bpte_p
, epte_p
);
7336 PMAP_UPDATE_TLBS(pmap
, start
, end
, need_strong_sync
);
7343 pmap_protect_options(
7348 unsigned int options
,
7349 __unused
void *args
)
7351 vm_map_address_t l
, beg
;
7353 __unused
const pt_attr_t
* const pt_attr
= pmap_get_pt_attr(pmap
);
7355 if ((b
| e
) & PAGE_MASK
) {
7356 panic("pmap_protect_options() pmap %p start 0x%llx end 0x%llx\n",
7357 pmap
, (uint64_t)b
, (uint64_t)e
);
7360 #if DEVELOPMENT || DEBUG
7361 if (options
& PMAP_OPTIONS_PROTECT_IMMEDIATE
) {
7362 if ((prot
& VM_PROT_ALL
) == VM_PROT_NONE
) {
7363 pmap_remove_options(pmap
, b
, e
, options
);
7369 /* Determine the new protection. */
7371 case VM_PROT_EXECUTE
:
7373 case VM_PROT_READ
| VM_PROT_EXECUTE
:
7375 case VM_PROT_READ
| VM_PROT_WRITE
:
7377 return; /* nothing to do */
7379 pmap_remove_options(pmap
, b
, e
, options
);
7384 PMAP_TRACE(2, PMAP_CODE(PMAP__PROTECT
) | DBG_FUNC_START
,
7385 VM_KERNEL_ADDRHIDE(pmap
), VM_KERNEL_ADDRHIDE(b
),
7386 VM_KERNEL_ADDRHIDE(e
));
7391 l
= ((beg
+ pt_attr_twig_size(pt_attr
)) & ~pt_attr_twig_offmask(pt_attr
));
7398 pmap_protect_options_ppl(pmap
, beg
, l
, prot
, options
, args
);
7400 pmap_protect_options_internal(pmap
, beg
, l
, prot
, options
, args
);
7406 PMAP_TRACE(2, PMAP_CODE(PMAP__PROTECT
) | DBG_FUNC_END
);
7409 /* Map a (possibly) autogenned block */
7418 __unused
unsigned int flags
)
7421 addr64_t original_va
= va
;
7424 for (page
= 0; page
< size
; page
++) {
7425 kr
= pmap_enter(pmap
, va
, pa
, prot
, VM_PROT_NONE
, attr
, TRUE
);
7427 if (kr
!= KERN_SUCCESS
) {
7429 * This will panic for now, as it is unclear that
7430 * removing the mappings is correct.
7432 panic("%s: failed pmap_enter, "
7433 "pmap=%p, va=%#llx, pa=%u, size=%u, prot=%#x, flags=%#x",
7435 pmap
, va
, pa
, size
, prot
, flags
);
7437 pmap_remove(pmap
, original_va
, va
- original_va
);
7445 return KERN_SUCCESS
;
7449 * Insert the given physical page (p) at
7450 * the specified virtual address (v) in the
7451 * target physical map with the protection requested.
7453 * If specified, the page will be wired down, meaning
7454 * that the related pte can not be reclaimed.
7456 * NB: This is the only routine which MAY NOT lazy-evaluate
7457 * or lose information. That is, this routine must actually
7458 * insert this page into the given map eventually (must make
7459 * forward progress eventually.
7467 vm_prot_t fault_type
,
7471 return pmap_enter_options(pmap
, v
, pn
, prot
, fault_type
, flags
, wired
, 0, NULL
);
7476 pmap_enter_pte(pmap_t pmap
, pt_entry_t
*pte_p
, pt_entry_t pte
, vm_map_address_t v
)
7478 if (pmap
!= kernel_pmap
&& ((pte
& ARM_PTE_WIRED
) != (*pte_p
& ARM_PTE_WIRED
))) {
7479 SInt16
*ptd_wiredcnt_ptr
= (SInt16
*)&(ptep_get_ptd(pte_p
)->ptd_info
[ARM_PT_DESC_INDEX(pte_p
)].wiredcnt
);
7480 if (pte
& ARM_PTE_WIRED
) {
7481 OSAddAtomic16(1, ptd_wiredcnt_ptr
);
7482 pmap_ledger_credit(pmap
, task_ledgers
.wired_mem
, PAGE_SIZE
);
7483 OSAddAtomic(1, (SInt32
*) &pmap
->stats
.wired_count
);
7485 OSAddAtomic16(-1, ptd_wiredcnt_ptr
);
7486 pmap_ledger_debit(pmap
, task_ledgers
.wired_mem
, PAGE_SIZE
);
7487 OSAddAtomic(-1, (SInt32
*) &pmap
->stats
.wired_count
);
7490 if (*pte_p
!= ARM_PTE_TYPE_FAULT
&&
7491 !ARM_PTE_IS_COMPRESSED(*pte_p
, pte_p
)) {
7492 WRITE_PTE_STRONG(pte_p
, pte
);
7493 PMAP_UPDATE_TLBS(pmap
, v
, v
+ PAGE_SIZE
, false);
7495 WRITE_PTE(pte_p
, pte
);
7496 __builtin_arm_isb(ISB_SY
);
7499 PMAP_TRACE(3, PMAP_CODE(PMAP__TTE
), VM_KERNEL_ADDRHIDE(pmap
), VM_KERNEL_ADDRHIDE(v
), VM_KERNEL_ADDRHIDE(v
+ PAGE_SIZE
), pte
);
7502 MARK_AS_PMAP_TEXT
static pt_entry_t
7503 wimg_to_pte(unsigned int wimg
)
7507 switch (wimg
& (VM_WIMG_MASK
)) {
7510 pte
= ARM_PTE_ATTRINDX(CACHE_ATTRINDX_DISABLE
);
7511 pte
|= ARM_PTE_NX
| ARM_PTE_PNX
;
7513 case VM_WIMG_POSTED
:
7514 pte
= ARM_PTE_ATTRINDX(CACHE_ATTRINDX_POSTED
);
7515 pte
|= ARM_PTE_NX
| ARM_PTE_PNX
;
7517 case VM_WIMG_POSTED_REORDERED
:
7518 pte
= ARM_PTE_ATTRINDX(CACHE_ATTRINDX_POSTED_REORDERED
);
7519 pte
|= ARM_PTE_NX
| ARM_PTE_PNX
;
7521 case VM_WIMG_POSTED_COMBINED_REORDERED
:
7522 pte
= ARM_PTE_ATTRINDX(CACHE_ATTRINDX_POSTED_COMBINED_REORDERED
);
7523 pte
|= ARM_PTE_NX
| ARM_PTE_PNX
;
7526 pte
= ARM_PTE_ATTRINDX(CACHE_ATTRINDX_WRITECOMB
);
7527 pte
|= ARM_PTE_NX
| ARM_PTE_PNX
;
7530 pte
= ARM_PTE_ATTRINDX(CACHE_ATTRINDX_WRITETHRU
);
7531 #if (__ARM_VMSA__ > 7)
7532 pte
|= ARM_PTE_SH(SH_OUTER_MEMORY
);
7537 case VM_WIMG_COPYBACK
:
7538 pte
= ARM_PTE_ATTRINDX(CACHE_ATTRINDX_WRITEBACK
);
7539 #if (__ARM_VMSA__ > 7)
7540 pte
|= ARM_PTE_SH(SH_OUTER_MEMORY
);
7545 case VM_WIMG_INNERWBACK
:
7546 pte
= ARM_PTE_ATTRINDX(CACHE_ATTRINDX_INNERWRITEBACK
);
7547 #if (__ARM_VMSA__ > 7)
7548 pte
|= ARM_PTE_SH(SH_INNER_MEMORY
);
7554 pte
= ARM_PTE_ATTRINDX(CACHE_ATTRINDX_DEFAULT
);
7555 #if (__ARM_VMSA__ > 7)
7556 pte
|= ARM_PTE_SH(SH_OUTER_MEMORY
);
7570 unsigned int options
,
7572 boolean_t
*is_altacct
)
7575 pv_h
= pai_to_pvh(pai
);
7576 boolean_t first_cpu_mapping
;
7578 ASSERT_PVH_LOCKED(pai
);
7580 vm_offset_t pvh_flags
= pvh_get_flags(pv_h
);
7583 if (pvh_flags
& PVH_FLAG_LOCKDOWN
) {
7584 panic("%d is locked down (%#lx), cannot enter", pai
, pvh_flags
);
7589 /* An IOMMU mapping may already be present for a page that hasn't yet
7590 * had a CPU mapping established, so we use PVH_FLAG_CPU to determine
7591 * if this is the first CPU mapping. We base internal/reusable
7592 * accounting on the options specified for the first CPU mapping.
7593 * PVH_FLAG_CPU, and thus this accounting, will then persist as long
7594 * as there are *any* mappings of the page. The accounting for a
7595 * page should not need to change until the page is recycled by the
7596 * VM layer, and we assert that there are no mappings when a page
7597 * is recycled. An IOMMU mapping of a freed/recycled page is
7598 * considered a security violation & potential DMA corruption path.*/
7599 first_cpu_mapping
= ((pmap
!= NULL
) && !(pvh_flags
& PVH_FLAG_CPU
));
7600 if (first_cpu_mapping
) {
7601 pvh_flags
|= PVH_FLAG_CPU
;
7604 first_cpu_mapping
= pvh_test_type(pv_h
, PVH_TYPE_NULL
);
7607 if (first_cpu_mapping
) {
7608 if (options
& PMAP_OPTIONS_INTERNAL
) {
7609 SET_INTERNAL_PAGE(pai
);
7611 CLR_INTERNAL_PAGE(pai
);
7613 if ((options
& PMAP_OPTIONS_INTERNAL
) &&
7614 (options
& PMAP_OPTIONS_REUSABLE
)) {
7615 SET_REUSABLE_PAGE(pai
);
7617 CLR_REUSABLE_PAGE(pai
);
7620 if (pvh_test_type(pv_h
, PVH_TYPE_NULL
)) {
7621 pvh_update_head(pv_h
, pte_p
, PVH_TYPE_PTEP
);
7622 if (pmap
!= NULL
&& pmap
!= kernel_pmap
&&
7623 ((options
& PMAP_OPTIONS_ALT_ACCT
) ||
7624 PMAP_FOOTPRINT_SUSPENDED(pmap
)) &&
7625 IS_INTERNAL_PAGE(pai
)) {
7627 * Make a note to ourselves that this mapping is using alternative
7628 * accounting. We'll need this in order to know which ledger to
7629 * debit when the mapping is removed.
7631 * The altacct bit must be set while the pv head is locked. Defer
7632 * the ledger accounting until after we've dropped the lock.
7634 SET_ALTACCT_PAGE(pai
, PV_ENTRY_NULL
);
7637 CLR_ALTACCT_PAGE(pai
, PV_ENTRY_NULL
);
7640 if (pvh_test_type(pv_h
, PVH_TYPE_PTEP
)) {
7644 * convert pvh list from PVH_TYPE_PTEP to PVH_TYPE_PVEP
7646 pte1_p
= pvh_ptep(pv_h
);
7647 pvh_set_flags(pv_h
, pvh_flags
);
7648 if ((*pve_p
== PV_ENTRY_NULL
) && (!pv_alloc(pmap
, pai
, pve_p
))) {
7652 pve_set_ptep(*pve_p
, pte1_p
);
7653 (*pve_p
)->pve_next
= PV_ENTRY_NULL
;
7655 if (IS_ALTACCT_PAGE(pai
, PV_ENTRY_NULL
)) {
7657 * transfer "altacct" from
7658 * pp_attr to this pve
7660 CLR_ALTACCT_PAGE(pai
, PV_ENTRY_NULL
);
7661 SET_ALTACCT_PAGE(pai
, *pve_p
);
7663 pvh_update_head(pv_h
, *pve_p
, PVH_TYPE_PVEP
);
7664 *pve_p
= PV_ENTRY_NULL
;
7665 } else if (!pvh_test_type(pv_h
, PVH_TYPE_PVEP
)) {
7666 panic("%s: unexpected PV head %p, pte_p=%p pmap=%p pv_h=%p",
7667 __func__
, *pv_h
, pte_p
, pmap
, pv_h
);
7670 * Set up pv_entry for this new mapping and then
7671 * add it to the list for this physical page.
7673 pvh_set_flags(pv_h
, pvh_flags
);
7674 if ((*pve_p
== PV_ENTRY_NULL
) && (!pv_alloc(pmap
, pai
, pve_p
))) {
7678 pve_set_ptep(*pve_p
, pte_p
);
7679 (*pve_p
)->pve_next
= PV_ENTRY_NULL
;
7681 pvh_add(pv_h
, *pve_p
);
7683 if (pmap
!= NULL
&& pmap
!= kernel_pmap
&&
7684 ((options
& PMAP_OPTIONS_ALT_ACCT
) ||
7685 PMAP_FOOTPRINT_SUSPENDED(pmap
)) &&
7686 IS_INTERNAL_PAGE(pai
)) {
7688 * Make a note to ourselves that this
7689 * mapping is using alternative
7690 * accounting. We'll need this in order
7691 * to know which ledger to debit when
7692 * the mapping is removed.
7694 * The altacct bit must be set while
7695 * the pv head is locked. Defer the
7696 * ledger accounting until after we've
7699 SET_ALTACCT_PAGE(pai
, *pve_p
);
7703 *pve_p
= PV_ENTRY_NULL
;
7706 pvh_set_flags(pv_h
, pvh_flags
);
7711 MARK_AS_PMAP_TEXT
static kern_return_t
7712 pmap_enter_options_internal(
7717 vm_prot_t fault_type
,
7720 unsigned int options
)
7722 pmap_paddr_t pa
= ptoa(pn
);
7728 boolean_t set_XO
= FALSE
;
7729 boolean_t refcnt_updated
;
7730 boolean_t wiredcnt_updated
;
7731 unsigned int wimg_bits
;
7732 boolean_t was_compressed
, was_alt_compressed
;
7733 kern_return_t kr
= KERN_SUCCESS
;
7735 VALIDATE_PMAP(pmap
);
7737 __unused
const pt_attr_t
* const pt_attr
= pmap_get_pt_attr(pmap
);
7739 if ((v
) & PAGE_MASK
) {
7740 panic("pmap_enter_options() pmap %p v 0x%llx\n",
7744 if ((prot
& VM_PROT_EXECUTE
) && (prot
& VM_PROT_WRITE
) && (pmap
== kernel_pmap
)) {
7745 panic("pmap_enter_options(): WX request on kernel_pmap");
7748 #if DEVELOPMENT || DEBUG
7749 if ((prot
& VM_PROT_EXECUTE
) || !nx_enabled
|| !pmap
->nx_enabled
)
7751 if ((prot
& VM_PROT_EXECUTE
))
7753 { set_NX
= FALSE
;} else {
7757 #if (__ARM_VMSA__ > 7)
7758 if (prot
== VM_PROT_EXECUTE
) {
7763 assert(pn
!= vm_page_fictitious_addr
);
7765 refcnt_updated
= FALSE
;
7766 wiredcnt_updated
= FALSE
;
7767 pve_p
= PV_ENTRY_NULL
;
7768 was_compressed
= FALSE
;
7769 was_alt_compressed
= FALSE
;
7774 * Expand pmap to include this pte. Assume that
7775 * pmap is always expanded to include enough hardware
7776 * pages to map one VM page.
7778 while ((pte_p
= pmap_pte(pmap
, v
)) == PT_ENTRY_NULL
) {
7779 /* Must unlock to expand the pmap. */
7782 kr
= pmap_expand(pmap
, v
, options
, PMAP_TT_MAX_LEVEL
);
7784 if (kr
!= KERN_SUCCESS
) {
7791 if (options
& PMAP_OPTIONS_NOENTER
) {
7793 return KERN_SUCCESS
;
7800 if (ARM_PTE_IS_COMPRESSED(spte
, pte_p
)) {
7802 * "pmap" should be locked at this point, so this should
7803 * not race with another pmap_enter() or pmap_remove_range().
7805 assert(pmap
!= kernel_pmap
);
7807 /* one less "compressed" */
7808 OSAddAtomic64(-1, &pmap
->stats
.compressed
);
7809 pmap_ledger_debit(pmap
, task_ledgers
.internal_compressed
,
7812 was_compressed
= TRUE
;
7813 if (spte
& ARM_PTE_COMPRESSED_ALT
) {
7814 was_alt_compressed
= TRUE
;
7815 pmap_ledger_debit(pmap
, task_ledgers
.alternate_accounting_compressed
, PAGE_SIZE
);
7817 /* was part of the footprint */
7818 pmap_ledger_debit(pmap
, task_ledgers
.phys_footprint
, PAGE_SIZE
);
7821 /* clear "compressed" marker */
7822 /* XXX is it necessary since we're about to overwrite it ? */
7823 WRITE_PTE_FAST(pte_p
, ARM_PTE_TYPE_FAULT
);
7824 spte
= ARM_PTE_TYPE_FAULT
;
7827 * We're replacing a "compressed" marker with a valid PTE,
7828 * so no change for "refcnt".
7830 refcnt_updated
= TRUE
;
7833 if ((spte
!= ARM_PTE_TYPE_FAULT
) && (pte_to_pa(spte
) != pa
)) {
7834 pmap_remove_range(pmap
, v
, pte_p
, pte_p
+ 1, 0);
7837 pte
= pa_to_pte(pa
) | ARM_PTE_TYPE
;
7839 /* Don't bother tracking wiring for kernel PTEs. We use ARM_PTE_WIRED to track
7840 * wired memory statistics for user pmaps, but kernel PTEs are assumed
7841 * to be wired in nearly all cases. For VM layer functionality, the wired
7842 * count in vm_page_t is sufficient. */
7843 if (wired
&& pmap
!= kernel_pmap
) {
7844 pte
|= ARM_PTE_WIRED
;
7848 pte
|= pt_attr_leaf_xn(pt_attr
);
7850 #if (__ARM_VMSA__ > 7)
7851 if (pmap
== kernel_pmap
) {
7854 pte
|= pt_attr_leaf_x(pt_attr
);
7859 if (pmap
== kernel_pmap
) {
7860 #if __ARM_KERNEL_PROTECT__
7862 #endif /* __ARM_KERNEL_PROTECT__ */
7863 if (prot
& VM_PROT_WRITE
) {
7864 pte
|= ARM_PTE_AP(AP_RWNA
);
7865 pa_set_bits(pa
, PP_ATTR_MODIFIED
| PP_ATTR_REFERENCED
);
7867 pte
|= ARM_PTE_AP(AP_RONA
);
7868 pa_set_bits(pa
, PP_ATTR_REFERENCED
);
7870 #if (__ARM_VMSA__ == 7)
7871 if ((_COMM_PAGE_BASE_ADDRESS
<= v
) && (v
< _COMM_PAGE_BASE_ADDRESS
+ _COMM_PAGE_AREA_LENGTH
)) {
7872 pte
= (pte
& ~(ARM_PTE_APMASK
)) | ARM_PTE_AP(AP_RORO
);
7876 if (!pmap
->nested
) {
7878 } else if ((pmap
->nested_region_asid_bitmap
)
7879 && (v
>= pmap
->nested_region_subord_addr
)
7880 && (v
< (pmap
->nested_region_subord_addr
+ pmap
->nested_region_size
))) {
7881 unsigned int index
= (unsigned int)((v
- pmap
->nested_region_subord_addr
) >> pt_attr_twig_shift(pt_attr
));
7883 if ((pmap
->nested_region_asid_bitmap
)
7884 && testbit(index
, (int *)pmap
->nested_region_asid_bitmap
)) {
7889 if (pmap
->nested_pmap
!= NULL
) {
7890 vm_map_address_t nest_vaddr
;
7891 pt_entry_t
*nest_pte_p
;
7893 nest_vaddr
= v
- pmap
->nested_region_grand_addr
+ pmap
->nested_region_subord_addr
;
7895 if ((nest_vaddr
>= pmap
->nested_region_subord_addr
)
7896 && (nest_vaddr
< (pmap
->nested_region_subord_addr
+ pmap
->nested_region_size
))
7897 && ((nest_pte_p
= pmap_pte(pmap
->nested_pmap
, nest_vaddr
)) != PT_ENTRY_NULL
)
7898 && (*nest_pte_p
!= ARM_PTE_TYPE_FAULT
)
7899 && (!ARM_PTE_IS_COMPRESSED(*nest_pte_p
, nest_pte_p
))
7900 && (((*nest_pte_p
) & ARM_PTE_NG
) != ARM_PTE_NG
)) {
7901 unsigned int index
= (unsigned int)((v
- pmap
->nested_region_subord_addr
) >> pt_attr_twig_shift(pt_attr
));
7903 if ((pmap
->nested_pmap
->nested_region_asid_bitmap
)
7904 && !testbit(index
, (int *)pmap
->nested_pmap
->nested_region_asid_bitmap
)) {
7905 panic("pmap_enter(): Global attribute conflict nest_pte_p=%p pmap=%p v=0x%llx spte=0x%llx \n",
7906 nest_pte_p
, pmap
, (uint64_t)v
, (uint64_t)*nest_pte_p
);
7911 if (prot
& VM_PROT_WRITE
) {
7912 if (pa_valid(pa
) && (!pa_test_bits(pa
, PP_ATTR_MODIFIED
))) {
7913 if (fault_type
& VM_PROT_WRITE
) {
7915 pte
|= pt_attr_leaf_rwna(pt_attr
);
7917 pte
|= pt_attr_leaf_rw(pt_attr
);
7919 pa_set_bits(pa
, PP_ATTR_REFERENCED
| PP_ATTR_MODIFIED
);
7922 pte
|= pt_attr_leaf_rona(pt_attr
);
7924 pte
|= pt_attr_leaf_ro(pt_attr
);
7926 pa_set_bits(pa
, PP_ATTR_REFERENCED
);
7927 pte_set_was_writeable(pte
, true);
7931 pte
|= pt_attr_leaf_rwna(pt_attr
);
7933 pte
|= pt_attr_leaf_rw(pt_attr
);
7935 pa_set_bits(pa
, PP_ATTR_REFERENCED
);
7939 pte
|= pt_attr_leaf_rona(pt_attr
);
7941 pte
|= pt_attr_leaf_ro(pt_attr
);;
7943 pa_set_bits(pa
, PP_ATTR_REFERENCED
);
7949 volatile uint16_t *refcnt
= NULL
;
7950 volatile uint16_t *wiredcnt
= NULL
;
7951 if (pmap
!= kernel_pmap
) {
7952 refcnt
= &(ptep_get_ptd(pte_p
)->ptd_info
[ARM_PT_DESC_INDEX(pte_p
)].refcnt
);
7953 wiredcnt
= &(ptep_get_ptd(pte_p
)->ptd_info
[ARM_PT_DESC_INDEX(pte_p
)].wiredcnt
);
7954 /* Bump the wired count to keep the PTE page from being reclaimed. We need this because
7955 * we may drop the PVH and pmap locks later in pmap_enter() if we need to allocate
7956 * a new PV entry. */
7957 if (!wiredcnt_updated
) {
7958 OSAddAtomic16(1, (volatile int16_t*)wiredcnt
);
7959 wiredcnt_updated
= TRUE
;
7961 if (!refcnt_updated
) {
7962 OSAddAtomic16(1, (volatile int16_t*)refcnt
);
7963 refcnt_updated
= TRUE
;
7969 boolean_t is_altacct
, is_internal
;
7971 is_internal
= FALSE
;
7974 pai
= (int)pa_index(pa
);
7979 if ((flags
& (VM_WIMG_MASK
| VM_WIMG_USE_DEFAULT
))) {
7980 wimg_bits
= (flags
& (VM_WIMG_MASK
| VM_WIMG_USE_DEFAULT
));
7982 wimg_bits
= pmap_cache_attributes(pn
);
7985 /* We may be retrying this operation after dropping the PVH lock.
7986 * Cache attributes for the physical page may have changed while the lock
7987 * was dropped, so clear any cache attributes we may have previously set
7988 * in the PTE template. */
7989 pte
&= ~(ARM_PTE_ATTRINDXMASK
| ARM_PTE_SHMASK
);
7990 pte
|= pmap_get_pt_ops(pmap
)->wimg_to_pte(wimg_bits
);
7993 /* The regular old kernel is not allowed to remap PPL pages. */
7994 if (pa_test_monitor(pa
)) {
7995 panic("%s: page belongs to PPL, "
7996 "pmap=%p, v=0x%llx, pn=%u, prot=0x%x, fault_type=0x%x, flags=0x%x, wired=%u, options=0x%x",
7998 pmap
, v
, pn
, prot
, fault_type
, flags
, wired
, options
);
8001 if (pvh_get_flags(pai_to_pvh(pai
)) & PVH_FLAG_LOCKDOWN
) {
8002 panic("%s: page locked down, "
8003 "pmap=%p, v=0x%llx, pn=%u, prot=0x%x, fault_type=0x%x, flags=0x%x, wired=%u, options=0x%x",
8005 pmap
, v
, pn
, prot
, fault_type
, flags
, wired
, options
);
8010 if (pte
== *pte_p
) {
8012 * This pmap_enter operation has been completed by another thread
8013 * undo refcnt on pt and return
8016 goto Pmap_enter_cleanup
;
8017 } else if (pte_to_pa(*pte_p
) == pa
) {
8018 pmap_enter_pte(pmap
, pte_p
, pte
, v
);
8020 goto Pmap_enter_cleanup
;
8021 } else if (*pte_p
!= ARM_PTE_TYPE_FAULT
) {
8023 * pte has been modified by another thread
8024 * hold refcnt on pt and retry pmap_enter operation
8027 goto Pmap_enter_retry
;
8029 if (!pmap_enter_pv(pmap
, pte_p
, pai
, options
, &pve_p
, &is_altacct
)) {
8030 goto Pmap_enter_loop
;
8033 pmap_enter_pte(pmap
, pte_p
, pte
, v
);
8035 if (pmap
!= kernel_pmap
) {
8036 if (IS_REUSABLE_PAGE(pai
) &&
8038 assert(IS_INTERNAL_PAGE(pai
));
8039 OSAddAtomic(+1, &pmap
->stats
.reusable
);
8040 PMAP_STATS_PEAK(pmap
->stats
.reusable
);
8041 } else if (IS_INTERNAL_PAGE(pai
)) {
8042 OSAddAtomic(+1, &pmap
->stats
.internal
);
8043 PMAP_STATS_PEAK(pmap
->stats
.internal
);
8046 OSAddAtomic(+1, &pmap
->stats
.external
);
8047 PMAP_STATS_PEAK(pmap
->stats
.external
);
8053 if (pmap
!= kernel_pmap
) {
8054 pmap_ledger_credit(pmap
, task_ledgers
.phys_mem
, PAGE_SIZE
);
8058 * Make corresponding adjustments to
8059 * phys_footprint statistics.
8061 pmap_ledger_credit(pmap
, task_ledgers
.internal
, PAGE_SIZE
);
8064 * If this page is internal and
8065 * in an IOKit region, credit
8066 * the task's total count of
8067 * dirty, internal IOKit pages.
8068 * It should *not* count towards
8069 * the task's total physical
8070 * memory footprint, because
8071 * this entire region was
8072 * already billed to the task
8073 * at the time the mapping was
8076 * Put another way, this is
8078 * alternate_accounting++, so
8079 * net effect on phys_footprint
8080 * is 0. That means: don't
8081 * touch phys_footprint here.
8083 pmap_ledger_credit(pmap
, task_ledgers
.alternate_accounting
, PAGE_SIZE
);
8085 pmap_ledger_credit(pmap
, task_ledgers
.phys_footprint
, PAGE_SIZE
);
8090 OSAddAtomic(1, (SInt32
*) &pmap
->stats
.resident_count
);
8091 if (pmap
->stats
.resident_count
> pmap
->stats
.resident_max
) {
8092 pmap
->stats
.resident_max
= pmap
->stats
.resident_count
;
8095 if (prot
& VM_PROT_EXECUTE
) {
8097 goto Pmap_enter_cleanup
;
8100 wimg_bits
= pmap_cache_attributes(pn
);
8101 if ((flags
& (VM_WIMG_MASK
| VM_WIMG_USE_DEFAULT
))) {
8102 wimg_bits
= (wimg_bits
& (~VM_WIMG_MASK
)) | (flags
& (VM_WIMG_MASK
| VM_WIMG_USE_DEFAULT
));
8105 pte
|= pmap_get_pt_ops(pmap
)->wimg_to_pte(wimg_bits
);
8108 if (!pmap_ppl_disable
&& (wimg_bits
& PP_ATTR_MONITOR
)) {
8109 uint64_t xprr_perm
= pte_to_xprr_perm(pte
);
8110 switch (xprr_perm
) {
8111 case XPRR_KERN_RO_PERM
:
8113 case XPRR_KERN_RW_PERM
:
8114 pte
&= ~ARM_PTE_XPRR_MASK
;
8115 pte
|= xprr_perm_to_pte(XPRR_PPL_RW_PERM
);
8118 panic("Unsupported xPRR perm %llu for pte 0x%llx", xprr_perm
, (uint64_t)pte
);
8122 pmap_enter_pte(pmap
, pte_p
, pte
, v
);
8125 goto Pmap_enter_return
;
8129 if (refcnt
!= NULL
) {
8130 assert(refcnt_updated
);
8131 if (OSAddAtomic16(-1, (volatile int16_t*)refcnt
) <= 0) {
8132 panic("pmap_enter(): over-release of ptdp %p for pte %p\n", ptep_get_ptd(pte_p
), pte_p
);
8139 if (pgtrace_enabled
) {
8140 // Clone and invalidate original mapping if eligible
8141 for (int i
= 0; i
< PAGE_RATIO
; i
++) {
8142 pmap_pgtrace_enter_clone(pmap
, v
+ ARM_PGBYTES
* i
, 0, 0);
8147 if (pve_p
!= PV_ENTRY_NULL
) {
8151 if (wiredcnt_updated
&& (OSAddAtomic16(-1, (volatile int16_t*)wiredcnt
) <= 0)) {
8152 panic("pmap_enter(): over-unwire of ptdp %p for pte %p\n", ptep_get_ptd(pte_p
), pte_p
);
8166 vm_prot_t fault_type
,
8169 unsigned int options
,
8172 kern_return_t kr
= KERN_FAILURE
;
8174 PMAP_TRACE(2, PMAP_CODE(PMAP__ENTER
) | DBG_FUNC_START
,
8175 VM_KERNEL_ADDRHIDE(pmap
), VM_KERNEL_ADDRHIDE(v
), pn
, prot
);
8178 if (options
& PMAP_OPTIONS_NOWAIT
) {
8179 /* If NOWAIT was requested, just return the result. */
8180 kr
= pmap_enter_options_ppl(pmap
, v
, pn
, prot
, fault_type
, flags
, wired
, options
);
8183 * If NOWAIT was not requested, loop until the enter does not
8184 * fail due to lack of resources.
8186 while ((kr
= pmap_enter_options_ppl(pmap
, v
, pn
, prot
, fault_type
, flags
, wired
, options
| PMAP_OPTIONS_NOWAIT
)) == KERN_RESOURCE_SHORTAGE
) {
8187 pv_water_mark_check();
8188 pmap_alloc_page_for_ppl();
8192 pmap_ledger_check_balance(pmap
);
8194 kr
= pmap_enter_options_internal(pmap
, v
, pn
, prot
, fault_type
, flags
, wired
, options
);
8196 pv_water_mark_check();
8198 PMAP_TRACE(2, PMAP_CODE(PMAP__ENTER
) | DBG_FUNC_END
, kr
);
8204 * Routine: pmap_change_wiring
8205 * Function: Change the wiring attribute for a map/virtual-address
8207 * In/out conditions:
8208 * The mapping must already exist in the pmap.
8210 MARK_AS_PMAP_TEXT
static void
8211 pmap_change_wiring_internal(
8219 /* Don't bother tracking wiring for kernel PTEs. We use ARM_PTE_WIRED to track
8220 * wired memory statistics for user pmaps, but kernel PTEs are assumed
8221 * to be wired in nearly all cases. For VM layer functionality, the wired
8222 * count in vm_page_t is sufficient. */
8223 if (pmap
== kernel_pmap
) {
8226 VALIDATE_USER_PMAP(pmap
);
8229 pte_p
= pmap_pte(pmap
, v
);
8230 assert(pte_p
!= PT_ENTRY_NULL
);
8231 pa
= pte_to_pa(*pte_p
);
8233 while (pa_valid(pa
)) {
8234 pmap_paddr_t new_pa
;
8236 LOCK_PVH((int)pa_index(pa
));
8237 new_pa
= pte_to_pa(*pte_p
);
8243 UNLOCK_PVH((int)pa_index(pa
));
8247 if (wired
&& !pte_is_wired(*pte_p
)) {
8248 pte_set_wired(pte_p
, wired
);
8249 OSAddAtomic(+1, (SInt32
*) &pmap
->stats
.wired_count
);
8250 pmap_ledger_credit(pmap
, task_ledgers
.wired_mem
, PAGE_SIZE
);
8251 } else if (!wired
&& pte_is_wired(*pte_p
)) {
8252 PMAP_STATS_ASSERTF(pmap
->stats
.wired_count
>= 1, pmap
, "stats.wired_count %d", pmap
->stats
.wired_count
);
8253 pte_set_wired(pte_p
, wired
);
8254 OSAddAtomic(-1, (SInt32
*) &pmap
->stats
.wired_count
);
8255 pmap_ledger_debit(pmap
, task_ledgers
.wired_mem
, PAGE_SIZE
);
8259 UNLOCK_PVH((int)pa_index(pa
));
8272 pmap_change_wiring_ppl(pmap
, v
, wired
);
8274 pmap_ledger_check_balance(pmap
);
8276 pmap_change_wiring_internal(pmap
, v
, wired
);
8280 MARK_AS_PMAP_TEXT
static ppnum_t
8281 pmap_find_phys_internal(
8287 VALIDATE_PMAP(pmap
);
8289 if (pmap
!= kernel_pmap
) {
8293 ppn
= pmap_vtophys(pmap
, va
);
8295 if (pmap
!= kernel_pmap
) {
8307 pmap_paddr_t pa
= 0;
8309 if (pmap
== kernel_pmap
) {
8311 } else if ((current_thread()->map
) && (pmap
== vm_map_pmap(current_thread()->map
))) {
8316 return (ppnum_t
)(pa
>> PAGE_SHIFT
);
8321 return pmap_find_phys_ppl(pmap
, va
);
8323 return pmap_find_phys_internal(pmap
, va
);
8326 return pmap_vtophys(pmap
, va
);
8340 pa
= ((pmap_paddr_t
)pmap_vtophys(kernel_pmap
, va
)) << PAGE_SHIFT
;
8342 pa
|= (va
& PAGE_MASK
);
8345 return (pmap_paddr_t
)pa
;
8353 if ((va
< pmap
->min
) || (va
>= pmap
->max
)) {
8357 #if (__ARM_VMSA__ == 7)
8358 tt_entry_t
*tte_p
, tte
;
8362 tte_p
= pmap_tte(pmap
, va
);
8363 if (tte_p
== (tt_entry_t
*) NULL
) {
8368 if ((tte
& ARM_TTE_TYPE_MASK
) == ARM_TTE_TYPE_TABLE
) {
8369 pte_p
= (pt_entry_t
*) ttetokv(tte
) + ptenum(va
);
8370 ppn
= (ppnum_t
) atop(pte_to_pa(*pte_p
) | (va
& ARM_PGMASK
));
8371 #if DEVELOPMENT || DEBUG
8373 ARM_PTE_IS_COMPRESSED(*pte_p
, pte_p
)) {
8374 panic("pmap_vtophys(%p,0x%llx): compressed pte_p=%p 0x%llx with ppn=0x%x\n",
8375 pmap
, va
, pte_p
, (uint64_t) (*pte_p
), ppn
);
8377 #endif /* DEVELOPMENT || DEBUG */
8378 } else if ((tte
& ARM_TTE_TYPE_MASK
) == ARM_TTE_TYPE_BLOCK
) {
8379 if ((tte
& ARM_TTE_BLOCK_SUPER
) == ARM_TTE_BLOCK_SUPER
) {
8380 ppn
= (ppnum_t
) atop(suptte_to_pa(tte
) | (va
& ARM_TT_L1_SUPER_OFFMASK
));
8382 ppn
= (ppnum_t
) atop(sectte_to_pa(tte
) | (va
& ARM_TT_L1_BLOCK_OFFMASK
));
8392 __unused
const pt_attr_t
* const pt_attr
= pmap_get_pt_attr(pmap
);
8394 /* Level 0 currently unused */
8396 /* Get first-level (1GB) entry */
8397 ttp
= pmap_tt1e(pmap
, va
);
8399 if ((tte
& (ARM_TTE_TYPE_MASK
| ARM_TTE_VALID
)) != (ARM_TTE_TYPE_TABLE
| ARM_TTE_VALID
)) {
8403 tte
= ((tt_entry_t
*) phystokv(tte
& ARM_TTE_TABLE_MASK
))[tt2_index(pmap
, pt_attr
, va
)];
8405 if ((tte
& ARM_TTE_VALID
) != (ARM_TTE_VALID
)) {
8409 if ((tte
& ARM_TTE_TYPE_MASK
) == ARM_TTE_TYPE_BLOCK
) {
8410 ppn
= (ppnum_t
) atop((tte
& ARM_TTE_BLOCK_L2_MASK
) | (va
& ARM_TT_L2_OFFMASK
));
8413 tte
= ((tt_entry_t
*) phystokv(tte
& ARM_TTE_TABLE_MASK
))[tt3_index(pmap
, pt_attr
, va
)];
8414 ppn
= (ppnum_t
) atop((tte
& ARM_PTE_MASK
) | (va
& ARM_TT_L3_OFFMASK
));
8420 MARK_AS_PMAP_TEXT
static vm_offset_t
8421 pmap_extract_internal(
8423 vm_map_address_t va
)
8425 pmap_paddr_t pa
= 0;
8432 VALIDATE_PMAP(pmap
);
8436 ppn
= pmap_vtophys(pmap
, va
);
8439 pa
= ptoa(ppn
) | ((va
) & PAGE_MASK
);
8448 * Routine: pmap_extract
8450 * Extract the physical page address associated
8451 * with the given map/virtual_address pair.
8457 vm_map_address_t va
)
8459 pmap_paddr_t pa
= 0;
8461 if (pmap
== kernel_pmap
) {
8463 } else if (pmap
== vm_map_pmap(current_thread()->map
)) {
8472 return pmap_extract_ppl(pmap
, va
);
8474 return pmap_extract_internal(pmap
, va
);
8479 * pmap_init_pte_page - Initialize a page table page.
8486 unsigned int ttlevel
,
8487 boolean_t alloc_ptd
,
8490 pt_desc_t
*ptdp
= NULL
;
8493 pvh
= (vm_offset_t
*)(pai_to_pvh(pa_index(ml_static_vtop((vm_offset_t
)pte_p
))));
8495 if (pvh_test_type(pvh
, PVH_TYPE_NULL
)) {
8498 * This path should only be invoked from arm_vm_init. If we are emulating 16KB pages
8499 * on 4KB hardware, we may already have allocated a page table descriptor for a
8500 * bootstrap request, so we check for an existing PTD here.
8502 ptdp
= ptd_alloc(pmap
, true);
8503 pvh_update_head_unlocked(pvh
, ptdp
, PVH_TYPE_PTDP
);
8505 panic("pmap_init_pte_page(): pte_p %p", pte_p
);
8507 } else if (pvh_test_type(pvh
, PVH_TYPE_PTDP
)) {
8508 ptdp
= (pt_desc_t
*)(pvh_list(pvh
));
8510 panic("pmap_init_pte_page(): invalid PVH type for pte_p %p", pte_p
);
8514 bzero(pte_p
, ARM_PGBYTES
);
8515 // below barrier ensures the page zeroing is visible to PTW before
8516 // it is linked to the PTE of previous level
8517 __builtin_arm_dmb(DMB_ISHST
);
8519 ptd_init(ptdp
, pmap
, va
, ttlevel
, pte_p
);
8523 * Routine: pmap_expand
8525 * Expands a pmap to be able to map the specified virtual address.
8527 * Allocates new memory for the default (COARSE) translation table
8528 * entry, initializes all the pte entries to ARM_PTE_TYPE_FAULT and
8529 * also allocates space for the corresponding pv entries.
8531 * Nothing should be locked.
8533 static kern_return_t
8537 unsigned int options
,
8540 __unused
const pt_attr_t
* const pt_attr
= pmap_get_pt_attr(pmap
);
8542 #if (__ARM_VMSA__ == 7)
8548 #if DEVELOPMENT || DEBUG
8550 * We no longer support root level expansion; panic in case something
8551 * still attempts to trigger it.
8553 i
= tte_index(pmap
, pt_attr
, v
);
8555 if (i
>= pmap
->tte_index_max
) {
8556 panic("%s: index out of range, index=%u, max=%u, "
8557 "pmap=%p, addr=%p, options=%u, level=%u",
8558 __func__
, i
, pmap
->tte_index_max
,
8559 pmap
, (void *)v
, options
, level
);
8561 #endif /* DEVELOPMENT || DEBUG */
8564 return KERN_SUCCESS
;
8568 tt_entry_t
*tte_next_p
;
8572 if (pmap_pte(pmap
, v
) != PT_ENTRY_NULL
) {
8574 return KERN_SUCCESS
;
8576 tte_p
= &pmap
->tte
[ttenum(v
& ~ARM_TT_L1_PT_OFFMASK
)];
8577 for (i
= 0, tte_next_p
= tte_p
; i
< 4; i
++) {
8578 if (tte_to_pa(*tte_next_p
)) {
8579 pa
= tte_to_pa(*tte_next_p
);
8584 pa
= pa
& ~PAGE_MASK
;
8586 tte_p
= &pmap
->tte
[ttenum(v
)];
8587 *tte_p
= pa_to_tte(pa
) | (((v
>> ARM_TT_L1_SHIFT
) & 0x3) << 10) | ARM_TTE_TYPE_TABLE
;
8589 PMAP_TRACE(3, PMAP_CODE(PMAP__TTE
), VM_KERNEL_ADDRHIDE(pmap
), VM_KERNEL_ADDRHIDE(v
& ~ARM_TT_L1_OFFMASK
),
8590 VM_KERNEL_ADDRHIDE((v
& ~ARM_TT_L1_OFFMASK
) + ARM_TT_L1_SIZE
), *tte_p
);
8592 return KERN_SUCCESS
;
8596 v
= v
& ~ARM_TT_L1_PT_OFFMASK
;
8599 while (pmap_pte(pmap
, v
) == PT_ENTRY_NULL
) {
8601 * Allocate a VM page for the level 2 page table entries.
8603 while (pmap_tt_allocate(pmap
, &tt_p
, PMAP_TT_L2_LEVEL
, ((options
& PMAP_TT_ALLOCATE_NOWAIT
)? PMAP_PAGES_ALLOCATE_NOWAIT
: 0)) != KERN_SUCCESS
) {
8604 if (options
& PMAP_OPTIONS_NOWAIT
) {
8605 return KERN_RESOURCE_SHORTAGE
;
8612 * See if someone else expanded us first
8614 if (pmap_pte(pmap
, v
) == PT_ENTRY_NULL
) {
8615 tt_entry_t
*tte_next_p
;
8617 pmap_init_pte_page(pmap
, (pt_entry_t
*) tt_p
, v
, PMAP_TT_L2_LEVEL
, FALSE
, TRUE
);
8618 pa
= kvtophys((vm_offset_t
)tt_p
);
8619 tte_p
= &pmap
->tte
[ttenum(v
)];
8620 for (i
= 0, tte_next_p
= tte_p
; i
< 4; i
++) {
8621 *tte_next_p
= pa_to_tte(pa
) | ARM_TTE_TYPE_TABLE
;
8622 PMAP_TRACE(3, PMAP_CODE(PMAP__TTE
), VM_KERNEL_ADDRHIDE(pmap
), VM_KERNEL_ADDRHIDE((v
& ~ARM_TT_L1_PT_OFFMASK
) + (i
* ARM_TT_L1_SIZE
)),
8623 VM_KERNEL_ADDRHIDE((v
& ~ARM_TT_L1_PT_OFFMASK
) + ((i
+ 1) * ARM_TT_L1_SIZE
)), *tte_p
);
8627 FLUSH_PTE_RANGE(tte_p
, tte_p
+ 4);
8630 tt_p
= (tt_entry_t
*)NULL
;
8633 if (tt_p
!= (tt_entry_t
*)NULL
) {
8634 pmap_tt_deallocate(pmap
, tt_p
, PMAP_TT_L2_LEVEL
);
8635 tt_p
= (tt_entry_t
*)NULL
;
8638 return KERN_SUCCESS
;
8641 unsigned int ttlevel
= pt_attr_root_level(pt_attr
);
8646 tt_p
= (tt_entry_t
*)NULL
;
8648 for (; ttlevel
< level
; ttlevel
++) {
8651 if (pmap_ttne(pmap
, ttlevel
+ 1, v
) == PT_ENTRY_NULL
) {
8653 while (pmap_tt_allocate(pmap
, &tt_p
, ttlevel
+ 1, ((options
& PMAP_TT_ALLOCATE_NOWAIT
)? PMAP_PAGES_ALLOCATE_NOWAIT
: 0)) != KERN_SUCCESS
) {
8654 if (options
& PMAP_OPTIONS_NOWAIT
) {
8655 return KERN_RESOURCE_SHORTAGE
;
8658 panic("%s: failed to allocate tt, "
8659 "pmap=%p, v=%p, options=0x%x, level=%u",
8661 pmap
, (void *)v
, options
, level
);
8667 if ((pmap_ttne(pmap
, ttlevel
+ 1, v
) == PT_ENTRY_NULL
)) {
8668 pmap_init_pte_page(pmap
, (pt_entry_t
*) tt_p
, v
, ttlevel
+ 1, FALSE
, TRUE
);
8669 pa
= kvtophys((vm_offset_t
)tt_p
);
8670 tte_p
= pmap_ttne(pmap
, ttlevel
, v
);
8671 *tte_p
= (pa
& ARM_TTE_TABLE_MASK
) | ARM_TTE_TYPE_TABLE
| ARM_TTE_VALID
;
8672 PMAP_TRACE(ttlevel
+ 1, PMAP_CODE(PMAP__TTE
), VM_KERNEL_ADDRHIDE(pmap
), VM_KERNEL_ADDRHIDE(v
& ~pt_attr_ln_offmask(pt_attr
, ttlevel
)),
8673 VM_KERNEL_ADDRHIDE((v
& ~pt_attr_ln_offmask(pt_attr
, ttlevel
)) + pt_attr_ln_size(pt_attr
, ttlevel
)), *tte_p
);
8675 tt_p
= (tt_entry_t
*)NULL
;
8681 if (tt_p
!= (tt_entry_t
*)NULL
) {
8682 pmap_tt_deallocate(pmap
, tt_p
, ttlevel
+ 1);
8683 tt_p
= (tt_entry_t
*)NULL
;
8687 return KERN_SUCCESS
;
8692 * Routine: pmap_collect
8694 * Garbage collects the physical map system for
8695 * pages which are no longer used.
8696 * Success need not be guaranteed -- that is, there
8697 * may well be pages which are not referenced, but
8698 * others may be collected.
8701 pmap_collect(pmap_t pmap
)
8703 if (pmap
== PMAP_NULL
) {
8709 if ((pmap
->nested
== FALSE
) && (pmap
!= kernel_pmap
)) {
8710 /* TODO: Scan for vm page assigned to top level page tables with no reference */
8721 * Pmap garbage collection
8722 * Called by the pageout daemon when pages are scarce.
8731 * We cannot invoke the scheduler from the PPL, so for now we elide the
8732 * GC logic if the PPL is enabled.
8736 pmap_t pmap
, pmap_next
;
8739 if (pmap_gc_allowed
&&
8740 (pmap_gc_allowed_by_time_throttle
||
8742 pmap_gc_forced
= FALSE
;
8743 pmap_gc_allowed_by_time_throttle
= FALSE
;
8744 pmap_simple_lock(&pmaps_lock
);
8745 pmap
= CAST_DOWN_EXPLICIT(pmap_t
, queue_first(&map_pmap_list
));
8746 while (!queue_end(&map_pmap_list
, (queue_entry_t
)pmap
)) {
8747 if (!(pmap
->gc_status
& PMAP_GC_INFLIGHT
)) {
8748 pmap
->gc_status
|= PMAP_GC_INFLIGHT
;
8750 pmap_simple_unlock(&pmaps_lock
);
8754 pmap_simple_lock(&pmaps_lock
);
8755 gc_wait
= (pmap
->gc_status
& PMAP_GC_WAIT
);
8756 pmap
->gc_status
&= ~(PMAP_GC_INFLIGHT
| PMAP_GC_WAIT
);
8757 pmap_next
= CAST_DOWN_EXPLICIT(pmap_t
, queue_next(&pmap
->pmaps
));
8759 if (!queue_end(&map_pmap_list
, (queue_entry_t
)pmap_next
)) {
8760 pmap_next
->gc_status
|= PMAP_GC_INFLIGHT
;
8762 pmap_simple_unlock(&pmaps_lock
);
8763 thread_wakeup((event_t
) &pmap
->gc_status
);
8764 pmap_simple_lock(&pmaps_lock
);
8768 pmap_simple_unlock(&pmaps_lock
);
8774 * Called by the VM to reclaim pages that we can reclaim quickly and cheaply.
8777 pmap_release_pages_fast(void)
8780 return pmap_release_ppl_pages_to_kernel();
8781 #else /* XNU_MONITOR */
8787 * By default, don't attempt pmap GC more frequently
8788 * than once / 1 minutes.
8792 compute_pmap_gc_throttle(
8795 pmap_gc_allowed_by_time_throttle
= TRUE
;
8799 * pmap_attribute_cache_sync(vm_offset_t pa)
8801 * Invalidates all of the instruction cache on a physical page and
8802 * pushes any dirty data from the data cache for the same physical page
8806 pmap_attribute_cache_sync(
8809 __unused vm_machine_attribute_t attribute
,
8810 __unused vm_machine_attribute_val_t
* value
)
8812 if (size
> PAGE_SIZE
) {
8813 panic("pmap_attribute_cache_sync size: 0x%llx\n", (uint64_t)size
);
8815 cache_sync_page(pp
);
8818 return KERN_SUCCESS
;
8822 * pmap_sync_page_data_phys(ppnum_t pp)
8824 * Invalidates all of the instruction cache on a physical page and
8825 * pushes any dirty data from the data cache for the same physical page
8828 pmap_sync_page_data_phys(
8831 cache_sync_page(pp
);
8835 * pmap_sync_page_attributes_phys(ppnum_t pp)
8837 * Write back and invalidate all cachelines on a physical page.
8840 pmap_sync_page_attributes_phys(
8843 flush_dcache((vm_offset_t
) (pp
<< PAGE_SHIFT
), PAGE_SIZE
, TRUE
);
8847 /* temporary workaround */
8856 pte_p
= pmap_pte(map
->pmap
, va
);
8861 return (spte
& ARM_PTE_ATTRINDXMASK
) == ARM_PTE_ATTRINDX(CACHE_ATTRINDX_DEFAULT
);
8873 addr
= (unsigned int *) phystokv(ptoa(pn
));
8874 count
= PAGE_SIZE
/ sizeof(unsigned int);
8880 extern void mapping_set_mod(ppnum_t pn
);
8886 pmap_set_modify(pn
);
8889 extern void mapping_set_ref(ppnum_t pn
);
8895 pmap_set_reference(pn
);
8899 * Clear specified attribute bits.
8901 * Try to force an arm_fast_fault() for all mappings of
8902 * the page - to force attributes to be set again at fault time.
8903 * If the forcing succeeds, clear the cached bits at the head.
8904 * Otherwise, something must have been wired, so leave the cached
8907 MARK_AS_PMAP_TEXT
static void
8908 phys_attribute_clear_internal(
8914 pmap_paddr_t pa
= ptoa(pn
);
8915 vm_prot_t allow_mode
= VM_PROT_ALL
;
8918 if (bits
& PP_ATTR_PPL_OWNED_BITS
) {
8919 panic("%s: illegal request, "
8920 "pn=%u, bits=%#x, options=%#x, arg=%p",
8922 pn
, bits
, options
, arg
);
8926 if ((bits
& PP_ATTR_MODIFIED
) &&
8927 (options
& PMAP_OPTIONS_NOFLUSH
) &&
8929 panic("phys_attribute_clear(0x%x,0x%x,0x%x,%p): "
8930 "should not clear 'modified' without flushing TLBs\n",
8931 pn
, bits
, options
, arg
);
8934 assert(pn
!= vm_page_fictitious_addr
);
8936 if (options
& PMAP_OPTIONS_CLEAR_WRITE
) {
8937 assert(bits
== PP_ATTR_MODIFIED
);
8939 pmap_page_protect_options_internal(pn
, (VM_PROT_ALL
& ~VM_PROT_WRITE
), 0);
8941 * We short circuit this case; it should not need to
8942 * invoke arm_force_fast_fault, so just clear the modified bit.
8943 * pmap_page_protect has taken care of resetting
8944 * the state so that we'll see the next write as a fault to
8945 * the VM (i.e. we don't want a fast fault).
8947 pa_clear_bits(pa
, bits
);
8950 if (bits
& PP_ATTR_REFERENCED
) {
8951 allow_mode
&= ~(VM_PROT_READ
| VM_PROT_EXECUTE
);
8953 if (bits
& PP_ATTR_MODIFIED
) {
8954 allow_mode
&= ~VM_PROT_WRITE
;
8957 if (bits
== PP_ATTR_NOENCRYPT
) {
8959 * We short circuit this case; it should not need to
8960 * invoke arm_force_fast_fault, so just clear and
8961 * return. On ARM, this bit is just a debugging aid.
8963 pa_clear_bits(pa
, bits
);
8967 if (arm_force_fast_fault_internal(pn
, allow_mode
, options
)) {
8968 pa_clear_bits(pa
, bits
);
8974 phys_attribute_clear(
8981 * Do we really want this tracepoint? It will be extremely chatty.
8982 * Also, should we have a corresponding trace point for the set path?
8984 PMAP_TRACE(3, PMAP_CODE(PMAP__ATTRIBUTE_CLEAR
) | DBG_FUNC_START
, pn
, bits
);
8987 phys_attribute_clear_ppl(pn
, bits
, options
, arg
);
8989 phys_attribute_clear_internal(pn
, bits
, options
, arg
);
8992 PMAP_TRACE(3, PMAP_CODE(PMAP__ATTRIBUTE_CLEAR
) | DBG_FUNC_END
);
8996 * Set specified attribute bits.
8998 * Set cached value in the pv head because we have
8999 * no per-mapping hardware support for referenced and
9002 MARK_AS_PMAP_TEXT
static void
9003 phys_attribute_set_internal(
9007 pmap_paddr_t pa
= ptoa(pn
);
9008 assert(pn
!= vm_page_fictitious_addr
);
9011 if (bits
& PP_ATTR_PPL_OWNED_BITS
) {
9012 panic("%s: illegal request, "
9019 pa_set_bits(pa
, bits
);
9030 phys_attribute_set_ppl(pn
, bits
);
9032 phys_attribute_set_internal(pn
, bits
);
9038 * Check specified attribute bits.
9040 * use the software cached bits (since no hw support).
9043 phys_attribute_test(
9047 pmap_paddr_t pa
= ptoa(pn
);
9048 assert(pn
!= vm_page_fictitious_addr
);
9049 return pa_test_bits(pa
, bits
);
9054 * Set the modify/reference bits on the specified physical page.
9057 pmap_set_modify(ppnum_t pn
)
9059 phys_attribute_set(pn
, PP_ATTR_MODIFIED
);
9064 * Clear the modify bits on the specified physical page.
9070 phys_attribute_clear(pn
, PP_ATTR_MODIFIED
, 0, NULL
);
9077 * Return whether or not the specified physical page is modified
9078 * by any physical maps.
9084 return phys_attribute_test(pn
, PP_ATTR_MODIFIED
);
9089 * Set the reference bit on the specified physical page.
9095 phys_attribute_set(pn
, PP_ATTR_REFERENCED
);
9099 * Clear the reference bits on the specified physical page.
9102 pmap_clear_reference(
9105 phys_attribute_clear(pn
, PP_ATTR_REFERENCED
, 0, NULL
);
9110 * pmap_is_referenced:
9112 * Return whether or not the specified physical page is referenced
9113 * by any physical maps.
9119 return phys_attribute_test(pn
, PP_ATTR_REFERENCED
);
9123 * pmap_get_refmod(phys)
9124 * returns the referenced and modified bits of the specified
9131 return ((phys_attribute_test(pn
, PP_ATTR_MODIFIED
)) ? VM_MEM_MODIFIED
: 0)
9132 | ((phys_attribute_test(pn
, PP_ATTR_REFERENCED
)) ? VM_MEM_REFERENCED
: 0);
9136 * pmap_clear_refmod(phys, mask)
9137 * clears the referenced and modified bits as specified by the mask
9138 * of the specified physical page.
9141 pmap_clear_refmod_options(
9144 unsigned int options
,
9149 bits
= ((mask
& VM_MEM_MODIFIED
) ? PP_ATTR_MODIFIED
: 0) |
9150 ((mask
& VM_MEM_REFERENCED
) ? PP_ATTR_REFERENCED
: 0);
9151 phys_attribute_clear(pn
, bits
, options
, arg
);
9159 pmap_clear_refmod_options(pn
, mask
, 0, NULL
);
9163 pmap_disconnect_options(
9165 unsigned int options
,
9168 if ((options
& PMAP_OPTIONS_COMPRESSOR_IFF_MODIFIED
)) {
9170 * On ARM, the "modified" bit is managed by software, so
9171 * we know up-front if the physical page is "modified",
9172 * without having to scan all the PTEs pointing to it.
9173 * The caller should have made the VM page "busy" so noone
9174 * should be able to establish any new mapping and "modify"
9175 * the page behind us.
9177 if (pmap_is_modified(pn
)) {
9179 * The page has been modified and will be sent to
9180 * the VM compressor.
9182 options
|= PMAP_OPTIONS_COMPRESSOR
;
9185 * The page hasn't been modified and will be freed
9186 * instead of compressed.
9191 /* disconnect the page */
9192 pmap_page_protect_options(pn
, 0, options
, arg
);
9194 /* return ref/chg status */
9195 return pmap_get_refmod(pn
);
9203 * Disconnect all mappings for this page and return reference and change status
9204 * in generic format.
9211 pmap_page_protect(pn
, 0); /* disconnect the page */
9212 return pmap_get_refmod(pn
); /* return ref/chg status */
9216 pmap_has_managed_page(ppnum_t first
, ppnum_t last
)
9218 if (ptoa(first
) >= vm_last_phys
) {
9221 if (ptoa(last
) < vm_first_phys
) {
9229 * The state maintained by the noencrypt functions is used as a
9230 * debugging aid on ARM. This incurs some overhead on the part
9231 * of the caller. A special case check in phys_attribute_clear
9232 * (the most expensive path) currently minimizes this overhead,
9233 * but stubbing these functions out on RELEASE kernels yields
9240 #if DEVELOPMENT || DEBUG
9241 boolean_t result
= FALSE
;
9243 if (!pa_valid(ptoa(pn
))) {
9247 result
= (phys_attribute_test(pn
, PP_ATTR_NOENCRYPT
));
9260 #if DEVELOPMENT || DEBUG
9261 if (!pa_valid(ptoa(pn
))) {
9265 phys_attribute_set(pn
, PP_ATTR_NOENCRYPT
);
9272 pmap_clear_noencrypt(
9275 #if DEVELOPMENT || DEBUG
9276 if (!pa_valid(ptoa(pn
))) {
9280 phys_attribute_clear(pn
, PP_ATTR_NOENCRYPT
, 0, NULL
);
9288 pmap_is_monitor(ppnum_t pn
)
9290 assert(pa_valid(ptoa(pn
)));
9291 return phys_attribute_test(pn
, PP_ATTR_MONITOR
);
9296 pmap_lock_phys_page(ppnum_t pn
)
9300 pmap_paddr_t phys
= ptoa(pn
);
9302 if (pa_valid(phys
)) {
9303 pai
= (int)pa_index(phys
);
9309 { simple_lock(&phys_backup_lock
, LCK_GRP_NULL
);}
9314 pmap_unlock_phys_page(ppnum_t pn
)
9318 pmap_paddr_t phys
= ptoa(pn
);
9320 if (pa_valid(phys
)) {
9321 pai
= (int)pa_index(phys
);
9327 { simple_unlock(&phys_backup_lock
);}
9330 MARK_AS_PMAP_TEXT
static void
9331 pmap_switch_user_ttb_internal(
9334 VALIDATE_PMAP(pmap
);
9335 pmap_cpu_data_t
*cpu_data_ptr
;
9336 cpu_data_ptr
= pmap_get_cpu_data();
9338 #if (__ARM_VMSA__ == 7)
9339 cpu_data_ptr
->cpu_user_pmap
= pmap
;
9340 cpu_data_ptr
->cpu_user_pmap_stamp
= pmap
->stamp
;
9342 #if MACH_ASSERT && __ARM_USER_PROTECT__
9344 unsigned int ttbr0_val
, ttbr1_val
;
9345 __asm__
volatile ("mrc p15,0,%0,c2,c0,0\n" : "=r"(ttbr0_val
));
9346 __asm__
volatile ("mrc p15,0,%0,c2,c0,1\n" : "=r"(ttbr1_val
));
9347 if (ttbr0_val
!= ttbr1_val
) {
9348 panic("Misaligned ttbr0 %08X\n", ttbr0_val
);
9352 if (pmap
->tte_index_max
== NTTES
) {
9353 /* Setting TTBCR.N for TTBR0 TTBR1 boundary at 0x40000000 */
9354 __asm__
volatile ("mcr p15,0,%0,c2,c0,2" : : "r"(2));
9355 __builtin_arm_isb(ISB_SY
);
9356 #if !__ARM_USER_PROTECT__
9357 set_mmu_ttb(pmap
->ttep
);
9360 #if !__ARM_USER_PROTECT__
9361 set_mmu_ttb(pmap
->ttep
);
9363 /* Setting TTBCR.N for TTBR0 TTBR1 boundary at 0x80000000 */
9364 __asm__
volatile ("mcr p15,0,%0,c2,c0,2" : : "r"(1));
9365 __builtin_arm_isb(ISB_SY
);
9366 #if MACH_ASSERT && __ARM_USER_PROTECT__
9367 if (pmap
->ttep
& 0x1000) {
9368 panic("Misaligned ttbr0 %08X\n", pmap
->ttep
);
9373 #if !__ARM_USER_PROTECT__
9374 set_context_id(pmap
->hw_asid
);
9377 #else /* (__ARM_VMSA__ == 7) */
9379 if (pmap
!= kernel_pmap
) {
9380 cpu_data_ptr
->cpu_nested_pmap
= pmap
->nested_pmap
;
9383 if (pmap
== kernel_pmap
) {
9384 pmap_clear_user_ttb_internal();
9386 set_mmu_ttb((pmap
->ttep
& TTBR_BADDR_MASK
) | (((uint64_t)pmap
->hw_asid
) << TTBR_ASID_SHIFT
));
9389 #if defined(HAS_APPLE_PAC) && (__APCFG_SUPPORTED__ || __APSTS_SUPPORTED__)
9390 if (!(BootArgs
->bootFlags
& kBootFlagsDisableJOP
) && !(BootArgs
->bootFlags
& kBootFlagsDisableUserJOP
)) {
9391 uint64_t sctlr
= __builtin_arm_rsr64("SCTLR_EL1");
9392 bool jop_enabled
= sctlr
& SCTLR_JOP_KEYS_ENABLED
;
9393 if (!jop_enabled
&& !pmap
->disable_jop
) {
9395 sctlr
|= SCTLR_JOP_KEYS_ENABLED
;
9396 __builtin_arm_wsr64("SCTLR_EL1", sctlr
);
9397 // no ISB necessary because this won't take effect until eret returns to EL0
9398 } else if (jop_enabled
&& pmap
->disable_jop
) {
9400 sctlr
&= ~SCTLR_JOP_KEYS_ENABLED
;
9401 __builtin_arm_wsr64("SCTLR_EL1", sctlr
);
9404 #endif /* HAS_APPLE_PAC && (__APCFG_SUPPORTED__ || __APSTS_SUPPORTED__) */
9405 #endif /* (__ARM_VMSA__ == 7) */
9409 pmap_switch_user_ttb(
9412 PMAP_TRACE(1, PMAP_CODE(PMAP__SWITCH_USER_TTB
) | DBG_FUNC_START
, VM_KERNEL_ADDRHIDE(pmap
), PMAP_VASID(pmap
), pmap
->hw_asid
);
9414 pmap_switch_user_ttb_ppl(pmap
);
9416 pmap_switch_user_ttb_internal(pmap
);
9418 PMAP_TRACE(1, PMAP_CODE(PMAP__SWITCH_USER_TTB
) | DBG_FUNC_END
);
9421 MARK_AS_PMAP_TEXT
static void
9422 pmap_clear_user_ttb_internal(void)
9424 #if (__ARM_VMSA__ > 7)
9425 set_mmu_ttb(invalid_ttep
& TTBR_BADDR_MASK
);
9427 set_mmu_ttb(kernel_pmap
->ttep
);
9432 pmap_clear_user_ttb(void)
9435 pmap_clear_user_ttb_ppl();
9437 pmap_clear_user_ttb_internal();
9442 * Routine: arm_force_fast_fault
9445 * Force all mappings for this page to fault according
9446 * to the access modes allowed, so we can gather ref/modify
9449 MARK_AS_PMAP_TEXT
static boolean_t
9450 arm_force_fast_fault_internal(
9452 vm_prot_t allow_mode
,
9455 pmap_paddr_t phys
= ptoa(ppnum
);
9461 boolean_t is_reusable
, is_internal
;
9462 boolean_t tlb_flush_needed
= FALSE
;
9463 boolean_t ref_fault
;
9464 boolean_t mod_fault
;
9466 assert(ppnum
!= vm_page_fictitious_addr
);
9468 if (!pa_valid(phys
)) {
9469 return FALSE
; /* Not a managed page. */
9475 pai
= (int)pa_index(phys
);
9477 pv_h
= pai_to_pvh(pai
);
9479 pte_p
= PT_ENTRY_NULL
;
9480 pve_p
= PV_ENTRY_NULL
;
9481 if (pvh_test_type(pv_h
, PVH_TYPE_PTEP
)) {
9482 pte_p
= pvh_ptep(pv_h
);
9483 } else if (pvh_test_type(pv_h
, PVH_TYPE_PVEP
)) {
9484 pve_p
= pvh_list(pv_h
);
9487 is_reusable
= IS_REUSABLE_PAGE(pai
);
9488 is_internal
= IS_INTERNAL_PAGE(pai
);
9490 while ((pve_p
!= PV_ENTRY_NULL
) || (pte_p
!= PT_ENTRY_NULL
)) {
9491 vm_map_address_t va
;
9495 boolean_t update_pte
;
9497 if (pve_p
!= PV_ENTRY_NULL
) {
9498 pte_p
= pve_get_ptep(pve_p
);
9501 if (pte_p
== PT_ENTRY_NULL
) {
9502 panic("pte_p is NULL: pve_p=%p ppnum=0x%x\n", pve_p
, ppnum
);
9504 #ifdef PVH_FLAG_IOMMU
9505 if ((vm_offset_t
)pte_p
& PVH_FLAG_IOMMU
) {
9509 if (*pte_p
== ARM_PTE_EMPTY
) {
9510 panic("pte is NULL: pte_p=%p ppnum=0x%x\n", pte_p
, ppnum
);
9512 if (ARM_PTE_IS_COMPRESSED(*pte_p
, pte_p
)) {
9513 panic("pte is COMPRESSED: pte_p=%p ppnum=0x%x\n", pte_p
, ppnum
);
9516 pmap
= ptep_get_pmap(pte_p
);
9517 va
= ptep_get_va(pte_p
);
9519 assert(va
>= pmap
->min
&& va
< pmap
->max
);
9521 if (pte_is_wired(*pte_p
) || pmap
== kernel_pmap
) {
9530 if ((allow_mode
& VM_PROT_READ
) != VM_PROT_READ
) {
9531 /* read protection sets the pte to fault */
9532 tmplate
= tmplate
& ~ARM_PTE_AF
;
9536 if ((allow_mode
& VM_PROT_WRITE
) != VM_PROT_WRITE
) {
9537 /* take away write permission if set */
9538 if (pmap
== kernel_pmap
) {
9539 if ((tmplate
& ARM_PTE_APMASK
) == ARM_PTE_AP(AP_RWNA
)) {
9540 tmplate
= ((tmplate
& ~ARM_PTE_APMASK
) | ARM_PTE_AP(AP_RONA
));
9541 pte_set_was_writeable(tmplate
, true);
9546 if ((tmplate
& ARM_PTE_APMASK
) == ARM_PTE_AP(AP_RWRW
)) {
9547 tmplate
= ((tmplate
& ~ARM_PTE_APMASK
) | pt_attr_leaf_ro(pmap_get_pt_attr(pmap
)));
9548 pte_set_was_writeable(tmplate
, true);
9555 #if MACH_ASSERT && XNU_MONITOR
9556 if (is_pte_xprr_protected(spte
)) {
9557 if (pte_to_xprr_perm(spte
) != pte_to_xprr_perm(tmplate
)) {
9558 panic("%s: attempted to mutate an xPRR mapping pte_p=%p, pmap=%p, pv_h=%p, pve_p=%p, pte=0x%llx, tmplate=0x%llx, va=0x%llx, "
9559 "ppnum=0x%x, options=0x%x, allow_mode=0x%x",
9560 __FUNCTION__
, pte_p
, pmap
, pv_h
, pve_p
, (unsigned long long)spte
, (unsigned long long)tmplate
, (unsigned long long)va
,
9561 ppnum
, options
, allow_mode
);
9564 #endif /* MACH_ASSERT && XNU_MONITOR */
9567 if (*pte_p
!= ARM_PTE_TYPE_FAULT
&&
9568 !ARM_PTE_IS_COMPRESSED(*pte_p
, pte_p
)) {
9569 WRITE_PTE_STRONG(pte_p
, tmplate
);
9570 pmap_get_pt_ops(pmap
)->flush_tlb_region_async(va
, PAGE_SIZE
, pmap
);
9571 tlb_flush_needed
= TRUE
;
9573 WRITE_PTE(pte_p
, tmplate
);
9574 __builtin_arm_isb(ISB_SY
);
9578 /* update pmap stats and ledgers */
9579 if (IS_ALTACCT_PAGE(pai
, pve_p
)) {
9581 * We do not track "reusable" status for
9582 * "alternate accounting" mappings.
9584 } else if ((options
& PMAP_OPTIONS_CLEAR_REUSABLE
) &&
9587 pmap
!= kernel_pmap
) {
9588 /* one less "reusable" */
9589 PMAP_STATS_ASSERTF(pmap
->stats
.reusable
> 0, pmap
, "stats.reusable %d", pmap
->stats
.reusable
);
9590 OSAddAtomic(-1, &pmap
->stats
.reusable
);
9591 /* one more "internal" */
9592 OSAddAtomic(+1, &pmap
->stats
.internal
);
9593 PMAP_STATS_PEAK(pmap
->stats
.internal
);
9594 PMAP_STATS_ASSERTF(pmap
->stats
.internal
> 0, pmap
, "stats.internal %d", pmap
->stats
.internal
);
9595 pmap_ledger_credit(pmap
, task_ledgers
.internal
, machine_ptob(1));
9596 assert(!IS_ALTACCT_PAGE(pai
, pve_p
));
9597 assert(IS_INTERNAL_PAGE(pai
));
9598 pmap_ledger_credit(pmap
, task_ledgers
.phys_footprint
, machine_ptob(1));
9601 * Avoid the cost of another trap to handle the fast
9602 * fault when we next write to this page: let's just
9603 * handle that now since we already have all the
9604 * necessary information.
9607 arm_clear_fast_fault(ppnum
, VM_PROT_WRITE
);
9609 } else if ((options
& PMAP_OPTIONS_SET_REUSABLE
) &&
9612 pmap
!= kernel_pmap
) {
9613 /* one more "reusable" */
9614 OSAddAtomic(+1, &pmap
->stats
.reusable
);
9615 PMAP_STATS_PEAK(pmap
->stats
.reusable
);
9616 PMAP_STATS_ASSERTF(pmap
->stats
.reusable
> 0, pmap
, "stats.reusable %d", pmap
->stats
.reusable
);
9617 /* one less "internal" */
9618 PMAP_STATS_ASSERTF(pmap
->stats
.internal
> 0, pmap
, "stats.internal %d", pmap
->stats
.internal
);
9619 OSAddAtomic(-1, &pmap
->stats
.internal
);
9620 pmap_ledger_debit(pmap
, task_ledgers
.internal
, machine_ptob(1));
9621 assert(!IS_ALTACCT_PAGE(pai
, pve_p
));
9622 assert(IS_INTERNAL_PAGE(pai
));
9623 pmap_ledger_debit(pmap
, task_ledgers
.phys_footprint
, machine_ptob(1));
9626 #ifdef PVH_FLAG_IOMMU
9629 pte_p
= PT_ENTRY_NULL
;
9630 if (pve_p
!= PV_ENTRY_NULL
) {
9631 pve_p
= PVE_NEXT_PTR(pve_next(pve_p
));
9635 if (tlb_flush_needed
) {
9639 /* update global "reusable" status for this page */
9641 if ((options
& PMAP_OPTIONS_CLEAR_REUSABLE
) &&
9643 CLR_REUSABLE_PAGE(pai
);
9644 } else if ((options
& PMAP_OPTIONS_SET_REUSABLE
) &&
9646 SET_REUSABLE_PAGE(pai
);
9651 SET_MODFAULT_PAGE(pai
);
9654 SET_REFFAULT_PAGE(pai
);
9662 arm_force_fast_fault(
9664 vm_prot_t allow_mode
,
9668 pmap_paddr_t phys
= ptoa(ppnum
);
9670 assert(ppnum
!= vm_page_fictitious_addr
);
9672 if (!pa_valid(phys
)) {
9673 return FALSE
; /* Not a managed page. */
9677 return arm_force_fast_fault_ppl(ppnum
, allow_mode
, options
);
9679 return arm_force_fast_fault_internal(ppnum
, allow_mode
, options
);
9684 * Routine: arm_clear_fast_fault
9687 * Clear pending force fault for all mappings for this page based on
9688 * the observed fault type, update ref/modify bits.
9691 arm_clear_fast_fault(
9693 vm_prot_t fault_type
)
9695 pmap_paddr_t pa
= ptoa(ppnum
);
9700 boolean_t tlb_flush_needed
= FALSE
;
9703 assert(ppnum
!= vm_page_fictitious_addr
);
9705 if (!pa_valid(pa
)) {
9706 return FALSE
; /* Not a managed page. */
9710 pai
= (int)pa_index(pa
);
9711 ASSERT_PVH_LOCKED(pai
);
9712 pv_h
= pai_to_pvh(pai
);
9714 pte_p
= PT_ENTRY_NULL
;
9715 pve_p
= PV_ENTRY_NULL
;
9716 if (pvh_test_type(pv_h
, PVH_TYPE_PTEP
)) {
9717 pte_p
= pvh_ptep(pv_h
);
9718 } else if (pvh_test_type(pv_h
, PVH_TYPE_PVEP
)) {
9719 pve_p
= pvh_list(pv_h
);
9722 while ((pve_p
!= PV_ENTRY_NULL
) || (pte_p
!= PT_ENTRY_NULL
)) {
9723 vm_map_address_t va
;
9728 if (pve_p
!= PV_ENTRY_NULL
) {
9729 pte_p
= pve_get_ptep(pve_p
);
9732 if (pte_p
== PT_ENTRY_NULL
) {
9733 panic("pte_p is NULL: pve_p=%p ppnum=0x%x\n", pve_p
, ppnum
);
9735 #ifdef PVH_FLAG_IOMMU
9736 if ((vm_offset_t
)pte_p
& PVH_FLAG_IOMMU
) {
9740 if (*pte_p
== ARM_PTE_EMPTY
) {
9741 panic("pte is NULL: pte_p=%p ppnum=0x%x\n", pte_p
, ppnum
);
9744 pmap
= ptep_get_pmap(pte_p
);
9745 va
= ptep_get_va(pte_p
);
9747 assert(va
>= pmap
->min
&& va
< pmap
->max
);
9752 if ((fault_type
& VM_PROT_WRITE
) && (pte_was_writeable(spte
))) {
9754 if (pmap
== kernel_pmap
) {
9755 tmplate
= ((spte
& ~ARM_PTE_APMASK
) | ARM_PTE_AP(AP_RWNA
));
9757 tmplate
= ((spte
& ~ARM_PTE_APMASK
) | pt_attr_leaf_rw(pmap_get_pt_attr(pmap
)));
9761 tmplate
|= ARM_PTE_AF
;
9763 pte_set_was_writeable(tmplate
, false);
9764 pa_set_bits(pa
, PP_ATTR_REFERENCED
| PP_ATTR_MODIFIED
);
9765 } else if ((fault_type
& VM_PROT_READ
) && ((spte
& ARM_PTE_AF
) != ARM_PTE_AF
)) {
9766 tmplate
= spte
| ARM_PTE_AF
;
9769 pa_set_bits(pa
, PP_ATTR_REFERENCED
);
9773 #if MACH_ASSERT && XNU_MONITOR
9774 if (is_pte_xprr_protected(spte
)) {
9775 if (pte_to_xprr_perm(spte
) != pte_to_xprr_perm(tmplate
)) {
9776 panic("%s: attempted to mutate an xPRR mapping pte_p=%p, pmap=%p, pv_h=%p, pve_p=%p, pte=0x%llx, tmplate=0x%llx, va=0x%llx, "
9777 "ppnum=0x%x, fault_type=0x%x",
9778 __FUNCTION__
, pte_p
, pmap
, pv_h
, pve_p
, (unsigned long long)spte
, (unsigned long long)tmplate
, (unsigned long long)va
,
9782 #endif /* MACH_ASSERT && XNU_MONITOR */
9784 if (spte
!= tmplate
) {
9785 if (spte
!= ARM_PTE_TYPE_FAULT
) {
9786 WRITE_PTE_STRONG(pte_p
, tmplate
);
9787 pmap_get_pt_ops(pmap
)->flush_tlb_region_async(va
, PAGE_SIZE
, pmap
);
9788 tlb_flush_needed
= TRUE
;
9790 WRITE_PTE(pte_p
, tmplate
);
9791 __builtin_arm_isb(ISB_SY
);
9796 #ifdef PVH_FLAG_IOMMU
9799 pte_p
= PT_ENTRY_NULL
;
9800 if (pve_p
!= PV_ENTRY_NULL
) {
9801 pve_p
= PVE_NEXT_PTR(pve_next(pve_p
));
9804 if (tlb_flush_needed
) {
9811 * Determine if the fault was induced by software tracking of
9812 * modify/reference bits. If so, re-enable the mapping (and set
9813 * the appropriate bits).
9815 * Returns KERN_SUCCESS if the fault was induced and was
9816 * successfully handled.
9818 * Returns KERN_FAILURE if the fault was not induced and
9819 * the function was unable to deal with it.
9821 * Returns KERN_PROTECTION_FAILURE if the pmap layer explictly
9822 * disallows this type of access.
9824 MARK_AS_PMAP_TEXT
static kern_return_t
9825 arm_fast_fault_internal(
9827 vm_map_address_t va
,
9828 vm_prot_t fault_type
,
9829 __unused
bool was_af_fault
,
9830 __unused
bool from_user
)
9832 kern_return_t result
= KERN_FAILURE
;
9834 pt_entry_t spte
= ARM_PTE_TYPE_FAULT
;
9837 VALIDATE_PMAP(pmap
);
9842 * If the entry doesn't exist, is completely invalid, or is already
9843 * valid, we can't fix it here.
9846 ptep
= pmap_pte(pmap
, va
);
9847 if (ptep
!= PT_ENTRY_NULL
) {
9851 pa
= pte_to_pa(spte
);
9853 if ((spte
== ARM_PTE_TYPE_FAULT
) ||
9854 ARM_PTE_IS_COMPRESSED(spte
, ptep
)) {
9859 if (!pa_valid(pa
)) {
9862 if (pmap_cache_attributes((ppnum_t
)atop(pa
)) & PP_ATTR_MONITOR
) {
9863 return KERN_PROTECTION_FAILURE
;
9868 pai
= (int)pa_index(pa
);
9870 #if __APRR_SUPPORTED__
9871 if (*ptep
== spte
) {
9873 * Double-check the spte value, as we care
9879 #else /* !(__APRR_SUPPORTED__*/
9881 #endif /* !(__APRR_SUPPORTED__*/
9888 #if __APRR_SUPPORTED__
9889 /* Check to see if this mapping had APRR restrictions. */
9890 if (is_pte_xprr_protected(spte
)) {
9892 * We have faulted on an XPRR managed mapping; decide if the access should be
9893 * reattempted or if it should cause an exception. Now that all JIT entitled
9894 * task threads always have MPRR enabled we're only here because of
9895 * an AF fault or an actual permission fault. AF faults will have result
9896 * changed to KERN_SUCCESS below upon arm_clear_fast_fault return.
9898 if (was_af_fault
&& (spte
& ARM_PTE_AF
)) {
9899 result
= KERN_SUCCESS
;
9902 result
= KERN_PROTECTION_FAILURE
;
9905 #endif /* __APRR_SUPPORTED__*/
9907 if ((IS_REFFAULT_PAGE(pai
)) ||
9908 ((fault_type
& VM_PROT_WRITE
) && IS_MODFAULT_PAGE(pai
))) {
9910 * An attempted access will always clear ref/mod fault state, as
9911 * appropriate for the fault type. arm_clear_fast_fault will
9912 * update the associated PTEs for the page as appropriate; if
9913 * any PTEs are updated, we redrive the access. If the mapping
9914 * does not actually allow for the attempted access, the
9915 * following fault will (hopefully) fail to update any PTEs, and
9916 * thus cause arm_fast_fault to decide that it failed to handle
9919 if (IS_REFFAULT_PAGE(pai
)) {
9920 CLR_REFFAULT_PAGE(pai
);
9922 if ((fault_type
& VM_PROT_WRITE
) && IS_MODFAULT_PAGE(pai
)) {
9923 CLR_MODFAULT_PAGE(pai
);
9926 if (arm_clear_fast_fault((ppnum_t
)atop(pa
), fault_type
)) {
9928 * Should this preserve KERN_PROTECTION_FAILURE? The
9929 * cost of not doing so is a another fault in a case
9930 * that should already result in an exception.
9932 result
= KERN_SUCCESS
;
9936 #if __APRR_SUPPORTED__
9938 #endif /* __APRR_SUPPORTED__*/
9947 vm_map_address_t va
,
9948 vm_prot_t fault_type
,
9950 __unused
bool from_user
)
9952 kern_return_t result
= KERN_FAILURE
;
9954 if (va
< pmap
->min
|| va
>= pmap
->max
) {
9958 PMAP_TRACE(3, PMAP_CODE(PMAP__FAST_FAULT
) | DBG_FUNC_START
,
9959 VM_KERNEL_ADDRHIDE(pmap
), VM_KERNEL_ADDRHIDE(va
), fault_type
,
9962 #if (__ARM_VMSA__ == 7)
9963 if (pmap
!= kernel_pmap
) {
9964 pmap_cpu_data_t
*cpu_data_ptr
= pmap_get_cpu_data();
9966 pmap_t cur_user_pmap
;
9968 cur_pmap
= current_pmap();
9969 cur_user_pmap
= cpu_data_ptr
->cpu_user_pmap
;
9971 if ((cur_user_pmap
== cur_pmap
) && (cur_pmap
== pmap
)) {
9972 if (cpu_data_ptr
->cpu_user_pmap_stamp
!= pmap
->stamp
) {
9973 pmap_set_pmap(pmap
, current_thread());
9974 result
= KERN_SUCCESS
;
9982 result
= arm_fast_fault_ppl(pmap
, va
, fault_type
, was_af_fault
, from_user
);
9984 result
= arm_fast_fault_internal(pmap
, va
, fault_type
, was_af_fault
, from_user
);
9987 #if (__ARM_VMSA__ == 7)
9991 PMAP_TRACE(3, PMAP_CODE(PMAP__FAST_FAULT
) | DBG_FUNC_END
, result
);
10001 bcopy_phys((addr64_t
) (ptoa(psrc
)),
10002 (addr64_t
) (ptoa(pdst
)),
10008 * pmap_copy_page copies the specified (machine independent) pages.
10011 pmap_copy_part_page(
10013 vm_offset_t src_offset
,
10015 vm_offset_t dst_offset
,
10018 bcopy_phys((addr64_t
) (ptoa(psrc
) + src_offset
),
10019 (addr64_t
) (ptoa(pdst
) + dst_offset
),
10025 * pmap_zero_page zeros the specified (machine independent) page.
10031 assert(pn
!= vm_page_fictitious_addr
);
10032 bzero_phys((addr64_t
) ptoa(pn
), PAGE_SIZE
);
10036 * pmap_zero_part_page
10037 * zeros the specified (machine independent) part of a page.
10040 pmap_zero_part_page(
10042 vm_offset_t offset
,
10045 assert(pn
!= vm_page_fictitious_addr
);
10046 assert(offset
+ len
<= PAGE_SIZE
);
10047 bzero_phys((addr64_t
) (ptoa(pn
) + offset
), len
);
10052 * nop in current arm implementation
10055 inval_copy_windows(
10056 __unused thread_t t
)
10064 pt_entry_t
*ptep
, pte
;
10066 ptep
= pmap_pte(kernel_pmap
, LOWGLOBAL_ALIAS
);
10067 assert(ptep
!= PT_ENTRY_NULL
);
10068 assert(*ptep
== ARM_PTE_EMPTY
);
10070 pte
= pa_to_pte(ml_static_vtop((vm_offset_t
)&lowGlo
)) | AP_RONA
| ARM_PTE_NX
| ARM_PTE_PNX
| ARM_PTE_AF
| ARM_PTE_TYPE
;
10071 #if __ARM_KERNEL_PROTECT__
10073 #endif /* __ARM_KERNEL_PROTECT__ */
10074 pte
|= ARM_PTE_ATTRINDX(CACHE_ATTRINDX_WRITEBACK
);
10075 #if (__ARM_VMSA__ > 7)
10076 pte
|= ARM_PTE_SH(SH_OUTER_MEMORY
);
10081 FLUSH_PTE_RANGE(ptep
, (ptep
+ 1));
10082 PMAP_UPDATE_TLBS(kernel_pmap
, LOWGLOBAL_ALIAS
, LOWGLOBAL_ALIAS
+ PAGE_SIZE
, false);
10086 pmap_cpu_windows_copy_addr(int cpu_num
, unsigned int index
)
10088 if (__improbable(index
>= CPUWINDOWS_MAX
)) {
10089 panic("%s: invalid index %u", __func__
, index
);
10091 return (vm_offset_t
)(CPUWINDOWS_BASE
+ (PAGE_SIZE
* ((CPUWINDOWS_MAX
* cpu_num
) + index
)));
10094 MARK_AS_PMAP_TEXT
static unsigned int
10095 pmap_map_cpu_windows_copy_internal(
10098 unsigned int wimg_bits
)
10100 pt_entry_t
*ptep
= NULL
, pte
;
10101 pmap_cpu_data_t
*pmap_cpu_data
= pmap_get_cpu_data();
10102 unsigned int cpu_num
;
10104 vm_offset_t cpu_copywindow_vaddr
= 0;
10105 bool need_strong_sync
= false;
10107 #if XNU_MONITOR || HAS_MILD_DSB
10108 unsigned int cacheattr
= (!pa_valid(ptoa(pn
)) ? pmap_cache_attributes(pn
) : 0);
10109 need_strong_sync
= ((cacheattr
& PMAP_IO_RANGE_STRONG_SYNC
) != 0);
10113 #ifdef __ARM_COHERENT_IO__
10114 if (pa_valid(ptoa(pn
)) && !pmap_ppl_disable
) {
10115 panic("%s: attempted to map a managed page, "
10116 "pn=%u, prot=0x%x, wimg_bits=0x%x",
10118 pn
, prot
, wimg_bits
);
10120 if (!pmap_ppl_disable
&& (cacheattr
& PP_ATTR_MONITOR
)) {
10121 panic("%s: attempt to map PPL-protected I/O address 0x%llx", __func__
, (uint64_t)ptoa(pn
));
10124 #else /* __ARM_COHERENT_IO__ */
10125 #error CPU copy windows are not properly supported with both the PPL and incoherent IO
10126 #endif /* __ARM_COHERENT_IO__ */
10127 #endif /* XNU_MONITOR */
10128 cpu_num
= pmap_cpu_data
->cpu_number
;
10130 for (i
= 0; i
< CPUWINDOWS_MAX
; i
++) {
10131 cpu_copywindow_vaddr
= pmap_cpu_windows_copy_addr(cpu_num
, i
);
10132 ptep
= pmap_pte(kernel_pmap
, cpu_copywindow_vaddr
);
10133 assert(!ARM_PTE_IS_COMPRESSED(*ptep
, ptep
));
10134 if (*ptep
== ARM_PTE_TYPE_FAULT
) {
10138 if (i
== CPUWINDOWS_MAX
) {
10139 panic("pmap_map_cpu_windows_copy: out of window\n");
10142 pte
= pa_to_pte(ptoa(pn
)) | ARM_PTE_TYPE
| ARM_PTE_AF
| ARM_PTE_NX
| ARM_PTE_PNX
;
10143 #if __ARM_KERNEL_PROTECT__
10145 #endif /* __ARM_KERNEL_PROTECT__ */
10147 pte
|= wimg_to_pte(wimg_bits
);
10149 if (prot
& VM_PROT_WRITE
) {
10150 pte
|= ARM_PTE_AP(AP_RWNA
);
10152 pte
|= ARM_PTE_AP(AP_RONA
);
10155 WRITE_PTE_FAST(ptep
, pte
);
10157 * Invalidate tlb. Cover nested cpu_copywindow_vaddr usage with the interrupted context
10158 * in pmap_unmap_cpu_windows_copy() after clearing the pte and before tlb invalidate.
10160 FLUSH_PTE_STRONG(ptep
);
10161 PMAP_UPDATE_TLBS(kernel_pmap
, cpu_copywindow_vaddr
, cpu_copywindow_vaddr
+ PAGE_SIZE
, pmap_cpu_data
->copywindow_strong_sync
[i
]);
10162 pmap_cpu_data
->copywindow_strong_sync
[i
] = need_strong_sync
;
10168 pmap_map_cpu_windows_copy(
10171 unsigned int wimg_bits
)
10174 return pmap_map_cpu_windows_copy_ppl(pn
, prot
, wimg_bits
);
10176 return pmap_map_cpu_windows_copy_internal(pn
, prot
, wimg_bits
);
10180 MARK_AS_PMAP_TEXT
static void
10181 pmap_unmap_cpu_windows_copy_internal(
10182 unsigned int index
)
10185 unsigned int cpu_num
;
10186 vm_offset_t cpu_copywindow_vaddr
= 0;
10187 pmap_cpu_data_t
*pmap_cpu_data
= pmap_get_cpu_data();
10189 cpu_num
= pmap_cpu_data
->cpu_number
;
10191 cpu_copywindow_vaddr
= pmap_cpu_windows_copy_addr(cpu_num
, index
);
10192 /* Issue full-system DSB to ensure prior operations on the per-CPU window
10193 * (which are likely to have been on I/O memory) are complete before
10194 * tearing down the mapping. */
10195 __builtin_arm_dsb(DSB_SY
);
10196 ptep
= pmap_pte(kernel_pmap
, cpu_copywindow_vaddr
);
10197 WRITE_PTE_STRONG(ptep
, ARM_PTE_TYPE_FAULT
);
10198 PMAP_UPDATE_TLBS(kernel_pmap
, cpu_copywindow_vaddr
, cpu_copywindow_vaddr
+ PAGE_SIZE
, pmap_cpu_data
->copywindow_strong_sync
[index
]);
10202 pmap_unmap_cpu_windows_copy(
10203 unsigned int index
)
10206 return pmap_unmap_cpu_windows_copy_ppl(index
);
10208 return pmap_unmap_cpu_windows_copy_internal(index
);
10213 * Indicate that a pmap is intended to be used as a nested pmap
10214 * within one or more larger address spaces. This must be set
10215 * before pmap_nest() is called with this pmap as the 'subordinate'.
10217 MARK_AS_PMAP_TEXT
static void
10218 pmap_set_nested_internal(
10221 VALIDATE_PMAP(pmap
);
10222 pmap
->nested
= TRUE
;
10230 pmap_set_nested_ppl(pmap
);
10232 pmap_set_nested_internal(pmap
);
10237 * pmap_trim_range(pmap, start, end)
10239 * pmap = pmap to operate on
10240 * start = start of the range
10241 * end = end of the range
10243 * Attempts to deallocate TTEs for the given range in the nested range.
10245 MARK_AS_PMAP_TEXT
static void
10252 addr64_t nested_region_start
;
10253 addr64_t nested_region_end
;
10254 addr64_t adjusted_start
;
10255 addr64_t adjusted_end
;
10256 addr64_t adjust_offmask
;
10257 tt_entry_t
* tte_p
;
10258 pt_entry_t
* pte_p
;
10259 __unused
const pt_attr_t
* const pt_attr
= pmap_get_pt_attr(pmap
);
10261 if (__improbable(end
< start
)) {
10262 panic("%s: invalid address range, "
10263 "pmap=%p, start=%p, end=%p",
10265 pmap
, (void*)start
, (void*)end
);
10268 nested_region_start
= pmap
->nested
? pmap
->nested_region_subord_addr
: pmap
->nested_region_subord_addr
;
10269 nested_region_end
= nested_region_start
+ pmap
->nested_region_size
;
10271 if (__improbable((start
< nested_region_start
) || (end
> nested_region_end
))) {
10272 panic("%s: range outside nested region %p-%p, "
10273 "pmap=%p, start=%p, end=%p",
10274 __func__
, (void *)nested_region_start
, (void *)nested_region_end
,
10275 pmap
, (void*)start
, (void*)end
);
10278 /* Contract the range to TT page boundaries. */
10279 adjust_offmask
= pt_attr_leaf_table_offmask(pt_attr
);
10280 adjusted_start
= ((start
+ adjust_offmask
) & ~adjust_offmask
);
10281 adjusted_end
= end
& ~adjust_offmask
;
10282 bool modified
= false;
10284 /* Iterate over the range, trying to remove TTEs. */
10285 for (cur
= adjusted_start
; (cur
< adjusted_end
) && (cur
>= adjusted_start
); cur
+= pt_attr_twig_size(pt_attr
)) {
10288 tte_p
= pmap_tte(pmap
, cur
);
10290 if (tte_p
== (tt_entry_t
*) NULL
) {
10294 if ((*tte_p
& ARM_TTE_TYPE_MASK
) == ARM_TTE_TYPE_TABLE
) {
10295 pte_p
= (pt_entry_t
*) ttetokv(*tte_p
);
10297 if ((ptep_get_ptd(pte_p
)->ptd_info
[ARM_PT_DESC_INDEX(pte_p
)].refcnt
== 0) &&
10298 (pmap
!= kernel_pmap
)) {
10299 if (pmap
->nested
== TRUE
) {
10300 /* Deallocate for the nested map. */
10301 pmap_tte_deallocate(pmap
, tte_p
, pt_attr_twig_level(pt_attr
));
10303 /* Just remove for the parent map. */
10304 pmap_tte_remove(pmap
, tte_p
, pt_attr_twig_level(pt_attr
));
10307 pmap_get_pt_ops(pmap
)->flush_tlb_tte_async(cur
, pmap
);
10320 #if (__ARM_VMSA__ > 7)
10321 /* Remove empty L2 TTs. */
10322 adjusted_start
= ((start
+ ARM_TT_L1_OFFMASK
) & ~ARM_TT_L1_OFFMASK
);
10323 adjusted_end
= end
& ~ARM_TT_L1_OFFMASK
;
10325 for (cur
= adjusted_start
; (cur
< adjusted_end
) && (cur
>= adjusted_start
); cur
+= ARM_TT_L1_SIZE
) {
10326 /* For each L1 entry in our range... */
10329 bool remove_tt1e
= true;
10330 tt_entry_t
* tt1e_p
= pmap_tt1e(pmap
, cur
);
10331 tt_entry_t
* tt2e_start
;
10332 tt_entry_t
* tt2e_end
;
10333 tt_entry_t
* tt2e_p
;
10336 if (tt1e_p
== NULL
) {
10343 if (tt1e
== ARM_TTE_TYPE_FAULT
) {
10348 tt2e_start
= &((tt_entry_t
*) phystokv(tt1e
& ARM_TTE_TABLE_MASK
))[0];
10349 tt2e_end
= &tt2e_start
[TTE_PGENTRIES
];
10351 for (tt2e_p
= tt2e_start
; tt2e_p
< tt2e_end
; tt2e_p
++) {
10352 if (*tt2e_p
!= ARM_TTE_TYPE_FAULT
) {
10354 * If any TTEs are populated, don't remove the
10357 remove_tt1e
= false;
10362 pmap_tte_deallocate(pmap
, tt1e_p
, PMAP_TT_L1_LEVEL
);
10363 PMAP_UPDATE_TLBS(pmap
, cur
, cur
+ PAGE_SIZE
, false);
10368 #endif /* (__ARM_VMSA__ > 7) */
10372 * pmap_trim_internal(grand, subord, vstart, nstart, size)
10374 * grand = pmap subord is nested in
10375 * subord = nested pmap
10376 * vstart = start of the used range in grand
10377 * nstart = start of the used range in nstart
10378 * size = size of the used range
10380 * Attempts to trim the shared region page tables down to only cover the given
10381 * range in subord and grand.
10383 MARK_AS_PMAP_TEXT
static void
10384 pmap_trim_internal(
10391 addr64_t vend
, nend
;
10392 addr64_t adjust_offmask
;
10394 if (__improbable(os_add_overflow(vstart
, size
, &vend
))) {
10395 panic("%s: grand addr wraps around, "
10396 "grand=%p, subord=%p, vstart=%p, nstart=%p, size=%#llx",
10397 __func__
, grand
, subord
, (void*)vstart
, (void*)nstart
, size
);
10400 if (__improbable(os_add_overflow(nstart
, size
, &nend
))) {
10401 panic("%s: nested addr wraps around, "
10402 "grand=%p, subord=%p, vstart=%p, nstart=%p, size=%#llx",
10403 __func__
, grand
, subord
, (void*)vstart
, (void*)nstart
, size
);
10406 VALIDATE_PMAP(grand
);
10407 VALIDATE_PMAP(subord
);
10409 __unused
const pt_attr_t
* const pt_attr
= pmap_get_pt_attr(grand
);
10413 if (!subord
->nested
) {
10414 panic("%s: subord is not nestable, "
10415 "grand=%p, subord=%p, vstart=%p, nstart=%p, size=%#llx",
10416 __func__
, grand
, subord
, (void*)vstart
, (void*)nstart
, size
);
10419 if (grand
->nested
) {
10420 panic("%s: grand is nestable, "
10421 "grand=%p, subord=%p, vstart=%p, nstart=%p, size=%#llx",
10422 __func__
, grand
, subord
, (void*)vstart
, (void*)nstart
, size
);
10425 if (grand
->nested_pmap
!= subord
) {
10426 panic("%s: grand->nested != subord, "
10427 "grand=%p, subord=%p, vstart=%p, nstart=%p, size=%#llx",
10428 __func__
, grand
, subord
, (void*)vstart
, (void*)nstart
, size
);
10432 if ((vstart
< grand
->nested_region_grand_addr
) || (vend
> (grand
->nested_region_grand_addr
+ grand
->nested_region_size
))) {
10433 panic("%s: grand range not in nested region, "
10434 "grand=%p, subord=%p, vstart=%p, nstart=%p, size=%#llx",
10435 __func__
, grand
, subord
, (void*)vstart
, (void*)nstart
, size
);
10438 if ((nstart
< grand
->nested_region_grand_addr
) || (nend
> (grand
->nested_region_grand_addr
+ grand
->nested_region_size
))) {
10439 panic("%s: subord range not in nested region, "
10440 "grand=%p, subord=%p, vstart=%p, nstart=%p, size=%#llx",
10441 __func__
, grand
, subord
, (void*)vstart
, (void*)nstart
, size
);
10446 if (!grand
->nested_has_no_bounds_ref
) {
10447 assert(subord
->nested_bounds_set
);
10449 if (!grand
->nested_bounds_set
) {
10450 /* Inherit the bounds from subord. */
10451 grand
->nested_region_true_start
= (subord
->nested_region_true_start
- grand
->nested_region_subord_addr
) + grand
->nested_region_grand_addr
;
10452 grand
->nested_region_true_end
= (subord
->nested_region_true_end
- grand
->nested_region_subord_addr
) + grand
->nested_region_grand_addr
;
10453 grand
->nested_bounds_set
= true;
10456 PMAP_UNLOCK(subord
);
10460 if ((!subord
->nested_bounds_set
) && size
) {
10461 adjust_offmask
= pt_attr_leaf_table_offmask(pt_attr
);
10463 subord
->nested_region_true_start
= nstart
;
10464 subord
->nested_region_true_end
= nend
;
10465 subord
->nested_region_true_start
&= ~adjust_offmask
;
10467 if (__improbable(os_add_overflow(subord
->nested_region_true_end
, adjust_offmask
, &subord
->nested_region_true_end
))) {
10468 panic("%s: padded true end wraps around, "
10469 "grand=%p, subord=%p, vstart=%p, nstart=%p, size=%#llx",
10470 __func__
, grand
, subord
, (void*)vstart
, (void*)nstart
, size
);
10473 subord
->nested_region_true_end
&= ~adjust_offmask
;
10474 subord
->nested_bounds_set
= true;
10477 if (subord
->nested_bounds_set
) {
10478 /* Inherit the bounds from subord. */
10479 grand
->nested_region_true_start
= (subord
->nested_region_true_start
- grand
->nested_region_subord_addr
) + grand
->nested_region_grand_addr
;
10480 grand
->nested_region_true_end
= (subord
->nested_region_true_end
- grand
->nested_region_subord_addr
) + grand
->nested_region_grand_addr
;
10481 grand
->nested_bounds_set
= true;
10483 /* If we know the bounds, we can trim the pmap. */
10484 grand
->nested_has_no_bounds_ref
= false;
10485 PMAP_UNLOCK(subord
);
10487 /* Don't trim if we don't know the bounds. */
10488 PMAP_UNLOCK(subord
);
10492 /* Trim grand to only cover the given range. */
10493 pmap_trim_range(grand
, grand
->nested_region_grand_addr
, grand
->nested_region_true_start
);
10494 pmap_trim_range(grand
, grand
->nested_region_true_end
, (grand
->nested_region_grand_addr
+ grand
->nested_region_size
));
10496 /* Try to trim subord. */
10497 pmap_trim_subord(subord
);
10500 MARK_AS_PMAP_TEXT
static void
10501 pmap_trim_self(pmap_t pmap
)
10503 if (pmap
->nested_has_no_bounds_ref
&& pmap
->nested_pmap
) {
10504 /* If we have a no bounds ref, we need to drop it. */
10505 PMAP_LOCK(pmap
->nested_pmap
);
10506 pmap
->nested_has_no_bounds_ref
= false;
10507 boolean_t nested_bounds_set
= pmap
->nested_pmap
->nested_bounds_set
;
10508 vm_map_offset_t nested_region_true_start
= (pmap
->nested_pmap
->nested_region_true_start
- pmap
->nested_region_subord_addr
) + pmap
->nested_region_grand_addr
;
10509 vm_map_offset_t nested_region_true_end
= (pmap
->nested_pmap
->nested_region_true_end
- pmap
->nested_region_subord_addr
) + pmap
->nested_region_grand_addr
;
10510 PMAP_UNLOCK(pmap
->nested_pmap
);
10512 if (nested_bounds_set
) {
10513 pmap_trim_range(pmap
, pmap
->nested_region_grand_addr
, nested_region_true_start
);
10514 pmap_trim_range(pmap
, nested_region_true_end
, (pmap
->nested_region_grand_addr
+ pmap
->nested_region_size
));
10517 * Try trimming the nested pmap, in case we had the
10520 pmap_trim_subord(pmap
->nested_pmap
);
10525 * pmap_trim_subord(grand, subord)
10527 * grand = pmap that we have nested subord in
10528 * subord = nested pmap we are attempting to trim
10530 * Trims subord if possible
10532 MARK_AS_PMAP_TEXT
static void
10533 pmap_trim_subord(pmap_t subord
)
10535 bool contract_subord
= false;
10539 subord
->nested_no_bounds_refcnt
--;
10541 if ((subord
->nested_no_bounds_refcnt
== 0) && (subord
->nested_bounds_set
)) {
10542 /* If this was the last no bounds reference, trim subord. */
10543 contract_subord
= true;
10546 PMAP_UNLOCK(subord
);
10548 if (contract_subord
) {
10549 pmap_trim_range(subord
, subord
->nested_region_subord_addr
, subord
->nested_region_true_start
);
10550 pmap_trim_range(subord
, subord
->nested_region_true_end
, subord
->nested_region_subord_addr
+ subord
->nested_region_size
);
10563 pmap_trim_ppl(grand
, subord
, vstart
, nstart
, size
);
10565 pmap_ledger_check_balance(grand
);
10566 pmap_ledger_check_balance(subord
);
10568 pmap_trim_internal(grand
, subord
, vstart
, nstart
, size
);
10572 #if HAS_APPLE_PAC && XNU_MONITOR
10574 pmap_sign_user_ptr_internal(void *value
, ptrauth_key key
, uint64_t discriminator
)
10577 boolean_t current_intr_state
= ml_set_interrupts_enabled(FALSE
);
10579 ml_set_kernelkey_enabled(FALSE
);
10581 case ptrauth_key_asia
:
10582 res
= ptrauth_sign_unauthenticated(value
, ptrauth_key_asia
, discriminator
);
10584 case ptrauth_key_asda
:
10585 res
= ptrauth_sign_unauthenticated(value
, ptrauth_key_asda
, discriminator
);
10588 panic("attempt to sign user pointer without process independent key");
10590 ml_set_kernelkey_enabled(TRUE
);
10592 ml_set_interrupts_enabled(current_intr_state
);
10598 pmap_sign_user_ptr(void *value
, ptrauth_key key
, uint64_t discriminator
)
10600 return pmap_sign_user_ptr_internal(value
, key
, discriminator
);
10604 pmap_auth_user_ptr_internal(void *value
, ptrauth_key key
, uint64_t discriminator
)
10606 if ((key
!= ptrauth_key_asia
) && (key
!= ptrauth_key_asda
)) {
10607 panic("attempt to auth user pointer without process independent key");
10611 boolean_t current_intr_state
= ml_set_interrupts_enabled(FALSE
);
10613 ml_set_kernelkey_enabled(FALSE
);
10614 res
= ml_auth_ptr_unchecked(value
, key
, discriminator
);
10615 ml_set_kernelkey_enabled(TRUE
);
10617 ml_set_interrupts_enabled(current_intr_state
);
10623 pmap_auth_user_ptr(void *value
, ptrauth_key key
, uint64_t discriminator
)
10625 return pmap_auth_user_ptr_internal(value
, key
, discriminator
);
10627 #endif /* HAS_APPLE_PAC && XNU_MONITOR */
10630 * kern_return_t pmap_nest(grand, subord, vstart, size)
10632 * grand = the pmap that we will nest subord into
10633 * subord = the pmap that goes into the grand
10634 * vstart = start of range in pmap to be inserted
10635 * nstart = start of range in pmap nested pmap
10636 * size = Size of nest area (up to 16TB)
10638 * Inserts a pmap into another. This is used to implement shared segments.
10642 MARK_AS_PMAP_TEXT
static kern_return_t
10643 pmap_nest_internal(
10650 kern_return_t kr
= KERN_FAILURE
;
10651 vm_map_offset_t vaddr
, nvaddr
;
10652 tt_entry_t
*stte_p
;
10653 tt_entry_t
*gtte_p
;
10655 unsigned int num_tte
;
10656 unsigned int nested_region_asid_bitmap_size
;
10657 unsigned int* nested_region_asid_bitmap
;
10658 int expand_options
= 0;
10660 addr64_t vend
, nend
;
10661 if (__improbable(os_add_overflow(vstart
, size
, &vend
))) {
10662 panic("%s: %p grand addr wraps around: 0x%llx + 0x%llx", __func__
, grand
, vstart
, size
);
10664 if (__improbable(os_add_overflow(nstart
, size
, &nend
))) {
10665 panic("%s: %p nested addr wraps around: 0x%llx + 0x%llx", __func__
, subord
, nstart
, size
);
10668 VALIDATE_PMAP(grand
);
10669 VALIDATE_PMAP(subord
);
10671 __unused
const pt_attr_t
* const pt_attr
= pmap_get_pt_attr(grand
);
10672 assert(pmap_get_pt_attr(subord
) == pt_attr
);
10675 expand_options
|= PMAP_TT_ALLOCATE_NOWAIT
;
10678 if (((size
| vstart
| nstart
) & (pt_attr_leaf_table_offmask(pt_attr
))) != 0x0ULL
) {
10679 panic("pmap_nest() pmap %p unaligned nesting request 0x%llx, 0x%llx, 0x%llx\n", grand
, vstart
, nstart
, size
);
10682 if (!subord
->nested
) {
10683 panic("%s: subordinate pmap %p is not nestable", __func__
, subord
);
10686 if ((grand
->nested_pmap
!= PMAP_NULL
) && (grand
->nested_pmap
!= subord
)) {
10687 panic("pmap_nest() pmap %p has a nested pmap\n", grand
);
10690 if (subord
->nested_region_asid_bitmap
== NULL
) {
10691 nested_region_asid_bitmap_size
= (unsigned int)(size
>> pt_attr_twig_shift(pt_attr
)) / (sizeof(unsigned int) * NBBY
);
10694 pmap_paddr_t pa
= 0;
10696 if ((nested_region_asid_bitmap_size
* sizeof(unsigned int)) > PAGE_SIZE
) {
10697 panic("%s: nested_region_asid_bitmap_size=%u will not fit in a page, "
10698 "grand=%p, subord=%p, vstart=0x%llx, nstart=0x%llx, size=%llx",
10700 nested_region_asid_bitmap_size
,
10701 grand
, subord
, vstart
, nstart
, size
);
10704 kr
= pmap_pages_alloc(&pa
, PAGE_SIZE
, PMAP_PAGES_ALLOCATE_NOWAIT
);
10706 if (kr
!= KERN_SUCCESS
) {
10712 nested_region_asid_bitmap
= (unsigned int *)phystokv(pa
);
10714 nested_region_asid_bitmap
= kalloc(nested_region_asid_bitmap_size
* sizeof(unsigned int));
10716 bzero(nested_region_asid_bitmap
, nested_region_asid_bitmap_size
* sizeof(unsigned int));
10719 if (subord
->nested_region_asid_bitmap
== NULL
) {
10720 subord
->nested_region_asid_bitmap
= nested_region_asid_bitmap
;
10721 subord
->nested_region_asid_bitmap_size
= nested_region_asid_bitmap_size
;
10722 subord
->nested_region_subord_addr
= nstart
;
10723 subord
->nested_region_size
= (mach_vm_offset_t
) size
;
10724 nested_region_asid_bitmap
= NULL
;
10726 PMAP_UNLOCK(subord
);
10727 if (nested_region_asid_bitmap
!= NULL
) {
10729 pmap_pages_free(kvtophys((vm_offset_t
)nested_region_asid_bitmap
), PAGE_SIZE
);
10731 kfree(nested_region_asid_bitmap
, nested_region_asid_bitmap_size
* sizeof(unsigned int));
10735 if ((subord
->nested_region_subord_addr
+ subord
->nested_region_size
) < nend
) {
10737 unsigned int new_nested_region_asid_bitmap_size
;
10738 unsigned int* new_nested_region_asid_bitmap
;
10740 nested_region_asid_bitmap
= NULL
;
10741 nested_region_asid_bitmap_size
= 0;
10742 new_size
= nend
- subord
->nested_region_subord_addr
;
10744 /* We explicitly add 1 to the bitmap allocation size in order to avoid issues with truncation. */
10745 new_nested_region_asid_bitmap_size
= (unsigned int)((new_size
>> pt_attr_twig_shift(pt_attr
)) / (sizeof(unsigned int) * NBBY
)) + 1;
10748 pmap_paddr_t pa
= 0;
10750 if ((new_nested_region_asid_bitmap_size
* sizeof(unsigned int)) > PAGE_SIZE
) {
10751 panic("%s: new_nested_region_asid_bitmap_size=%u will not fit in a page, "
10752 "grand=%p, subord=%p, vstart=0x%llx, nstart=0x%llx, size=%llx",
10754 new_nested_region_asid_bitmap_size
,
10755 grand
, subord
, vstart
, nstart
, size
);
10758 kr
= pmap_pages_alloc(&pa
, PAGE_SIZE
, PMAP_PAGES_ALLOCATE_NOWAIT
);
10760 if (kr
!= KERN_SUCCESS
) {
10766 new_nested_region_asid_bitmap
= (unsigned int *)phystokv(pa
);
10768 new_nested_region_asid_bitmap
= kalloc(new_nested_region_asid_bitmap_size
* sizeof(unsigned int));
10771 if (subord
->nested_region_size
< new_size
) {
10772 bzero(new_nested_region_asid_bitmap
, new_nested_region_asid_bitmap_size
* sizeof(unsigned int));
10773 bcopy(subord
->nested_region_asid_bitmap
, new_nested_region_asid_bitmap
, subord
->nested_region_asid_bitmap_size
);
10774 nested_region_asid_bitmap_size
= subord
->nested_region_asid_bitmap_size
;
10775 nested_region_asid_bitmap
= subord
->nested_region_asid_bitmap
;
10776 subord
->nested_region_asid_bitmap
= new_nested_region_asid_bitmap
;
10777 subord
->nested_region_asid_bitmap_size
= new_nested_region_asid_bitmap_size
;
10778 subord
->nested_region_size
= new_size
;
10779 new_nested_region_asid_bitmap
= NULL
;
10781 PMAP_UNLOCK(subord
);
10782 if (nested_region_asid_bitmap
!= NULL
)
10784 {pmap_pages_free(kvtophys((vm_offset_t
)nested_region_asid_bitmap
), PAGE_SIZE
);}
10786 { kfree(nested_region_asid_bitmap
, nested_region_asid_bitmap_size
* sizeof(unsigned int));}
10788 if (new_nested_region_asid_bitmap
!= NULL
)
10790 {pmap_pages_free(kvtophys((vm_offset_t
)new_nested_region_asid_bitmap
), PAGE_SIZE
);}
10792 { kfree(new_nested_region_asid_bitmap
, new_nested_region_asid_bitmap_size
* sizeof(unsigned int));}
10797 if (grand
->nested_pmap
== PMAP_NULL
) {
10798 grand
->nested_pmap
= subord
;
10800 if (!subord
->nested_bounds_set
) {
10802 * We are nesting without the shared regions bounds
10803 * being known. We'll have to trim the pmap later.
10805 grand
->nested_has_no_bounds_ref
= true;
10806 subord
->nested_no_bounds_refcnt
++;
10809 grand
->nested_region_grand_addr
= vstart
;
10810 grand
->nested_region_subord_addr
= nstart
;
10811 grand
->nested_region_size
= (mach_vm_offset_t
) size
;
10813 if ((grand
->nested_region_grand_addr
> vstart
)) {
10814 panic("pmap_nest() pmap %p : attempt to nest outside the nested region\n", grand
);
10815 } else if ((grand
->nested_region_grand_addr
+ grand
->nested_region_size
) < vend
) {
10816 grand
->nested_region_size
= (mach_vm_offset_t
)(vstart
- grand
->nested_region_grand_addr
+ size
);
10820 #if (__ARM_VMSA__ == 7)
10821 nvaddr
= (vm_map_offset_t
) nstart
;
10822 vaddr
= (vm_map_offset_t
) vstart
;
10823 num_tte
= size
>> ARM_TT_L1_SHIFT
;
10825 for (i
= 0; i
< num_tte
; i
++) {
10826 if (((subord
->nested_region_true_start
) > nvaddr
) || ((subord
->nested_region_true_end
) <= nvaddr
)) {
10830 stte_p
= pmap_tte(subord
, nvaddr
);
10831 if ((stte_p
== (tt_entry_t
*)NULL
) || (((*stte_p
) & ARM_TTE_TYPE_MASK
) != ARM_TTE_TYPE_TABLE
)) {
10832 PMAP_UNLOCK(subord
);
10833 kr
= pmap_expand(subord
, nvaddr
, expand_options
, PMAP_TT_L2_LEVEL
);
10835 if (kr
!= KERN_SUCCESS
) {
10842 PMAP_UNLOCK(subord
);
10844 stte_p
= pmap_tte(grand
, vaddr
);
10845 if (stte_p
== (tt_entry_t
*)NULL
) {
10846 PMAP_UNLOCK(grand
);
10847 kr
= pmap_expand(grand
, vaddr
, expand_options
, PMAP_TT_L1_LEVEL
);
10849 if (kr
!= KERN_SUCCESS
) {
10854 PMAP_UNLOCK(grand
);
10860 nvaddr
+= ARM_TT_L1_SIZE
;
10861 vaddr
+= ARM_TT_L1_SIZE
;
10865 nvaddr
= (vm_map_offset_t
) nstart
;
10866 num_tte
= (unsigned int)(size
>> pt_attr_twig_shift(pt_attr
));
10868 for (i
= 0; i
< num_tte
; i
++) {
10869 if (((subord
->nested_region_true_start
) > nvaddr
) || ((subord
->nested_region_true_end
) <= nvaddr
)) {
10873 stte_p
= pmap_tte(subord
, nvaddr
);
10874 if (stte_p
== PT_ENTRY_NULL
|| *stte_p
== ARM_TTE_EMPTY
) {
10875 PMAP_UNLOCK(subord
);
10876 kr
= pmap_expand(subord
, nvaddr
, expand_options
, PMAP_TT_LEAF_LEVEL
);
10878 if (kr
!= KERN_SUCCESS
) {
10886 nvaddr
+= pt_attr_twig_size(pt_attr
);
10889 PMAP_UNLOCK(subord
);
10892 * copy tte's from subord pmap into grand pmap
10896 nvaddr
= (vm_map_offset_t
) nstart
;
10897 vaddr
= (vm_map_offset_t
) vstart
;
10900 #if (__ARM_VMSA__ == 7)
10901 for (i
= 0; i
< num_tte
; i
++) {
10902 if (((subord
->nested_region_true_start
) > nvaddr
) || ((subord
->nested_region_true_end
) <= nvaddr
)) {
10906 stte_p
= pmap_tte(subord
, nvaddr
);
10907 gtte_p
= pmap_tte(grand
, vaddr
);
10911 nvaddr
+= ARM_TT_L1_SIZE
;
10912 vaddr
+= ARM_TT_L1_SIZE
;
10915 for (i
= 0; i
< num_tte
; i
++) {
10916 if (((subord
->nested_region_true_start
) > nvaddr
) || ((subord
->nested_region_true_end
) <= nvaddr
)) {
10920 stte_p
= pmap_tte(subord
, nvaddr
);
10921 gtte_p
= pmap_tte(grand
, vaddr
);
10922 if (gtte_p
== PT_ENTRY_NULL
) {
10923 PMAP_UNLOCK(grand
);
10924 kr
= pmap_expand(grand
, vaddr
, expand_options
, PMAP_TT_TWIG_LEVEL
);
10927 if (kr
!= KERN_SUCCESS
) {
10931 gtte_p
= pmap_tt2e(grand
, vaddr
);
10936 vaddr
+= pt_attr_twig_size(pt_attr
);
10937 nvaddr
+= pt_attr_twig_size(pt_attr
);
10944 stte_p
= pmap_tte(grand
, vstart
);
10945 FLUSH_PTE_RANGE_STRONG(stte_p
, stte_p
+ num_tte
);
10947 #if (__ARM_VMSA__ > 7)
10949 * check for overflow on LP64 arch
10951 assert((size
& 0xFFFFFFFF00000000ULL
) == 0);
10953 PMAP_UPDATE_TLBS(grand
, vstart
, vend
, false);
10955 PMAP_UNLOCK(grand
);
10967 kern_return_t kr
= KERN_FAILURE
;
10969 PMAP_TRACE(2, PMAP_CODE(PMAP__NEST
) | DBG_FUNC_START
,
10970 VM_KERNEL_ADDRHIDE(grand
), VM_KERNEL_ADDRHIDE(subord
),
10971 VM_KERNEL_ADDRHIDE(vstart
));
10974 while ((kr
= pmap_nest_ppl(grand
, subord
, vstart
, nstart
, size
)) == KERN_RESOURCE_SHORTAGE
) {
10975 pmap_alloc_page_for_ppl();
10978 pmap_ledger_check_balance(grand
);
10979 pmap_ledger_check_balance(subord
);
10981 kr
= pmap_nest_internal(grand
, subord
, vstart
, nstart
, size
);
10984 PMAP_TRACE(2, PMAP_CODE(PMAP__NEST
) | DBG_FUNC_END
, kr
);
10990 * kern_return_t pmap_unnest(grand, vaddr)
10992 * grand = the pmap that will have the virtual range unnested
10993 * vaddr = start of range in pmap to be unnested
10994 * size = size of range in pmap to be unnested
11004 return pmap_unnest_options(grand
, vaddr
, size
, 0);
11007 MARK_AS_PMAP_TEXT
static kern_return_t
11008 pmap_unnest_options_internal(
11012 unsigned int option
)
11014 vm_map_offset_t start
;
11015 vm_map_offset_t addr
;
11017 unsigned int current_index
;
11018 unsigned int start_index
;
11019 unsigned int max_index
;
11020 unsigned int num_tte
;
11024 if (__improbable(os_add_overflow(vaddr
, size
, &vend
))) {
11025 panic("%s: %p vaddr wraps around: 0x%llx + 0x%llx", __func__
, grand
, vaddr
, size
);
11028 VALIDATE_PMAP(grand
);
11030 __unused
const pt_attr_t
* const pt_attr
= pmap_get_pt_attr(grand
);
11032 if (((size
| vaddr
) & pt_attr_twig_offmask(pt_attr
)) != 0x0ULL
) {
11033 panic("pmap_unnest(): unaligned request");
11036 if ((option
& PMAP_UNNEST_CLEAN
) == 0) {
11037 if (grand
->nested_pmap
== NULL
) {
11038 panic("%s: %p has no nested pmap", __func__
, grand
);
11041 if ((vaddr
< grand
->nested_region_grand_addr
) || (vend
> (grand
->nested_region_grand_addr
+ grand
->nested_region_size
))) {
11042 panic("%s: %p: unnest request to region not-fully-nested region [%p, %p)", __func__
, grand
, (void*)vaddr
, (void*)vend
);
11045 PMAP_LOCK(grand
->nested_pmap
);
11047 start
= vaddr
- grand
->nested_region_grand_addr
+ grand
->nested_region_subord_addr
;
11048 start_index
= (unsigned int)((vaddr
- grand
->nested_region_grand_addr
) >> pt_attr_twig_shift(pt_attr
));
11049 max_index
= (unsigned int)(start_index
+ (size
>> pt_attr_twig_shift(pt_attr
)));
11050 num_tte
= (unsigned int)(size
>> pt_attr_twig_shift(pt_attr
));
11052 for (current_index
= start_index
, addr
= start
; current_index
< max_index
; current_index
++, addr
+= pt_attr_twig_size(pt_attr
)) {
11053 pt_entry_t
*bpte
, *epte
, *cpte
;
11055 if (addr
< grand
->nested_pmap
->nested_region_true_start
) {
11056 /* We haven't reached the interesting range. */
11060 if (addr
>= grand
->nested_pmap
->nested_region_true_end
) {
11061 /* We're done with the interesting range. */
11065 bpte
= pmap_pte(grand
->nested_pmap
, addr
);
11066 epte
= bpte
+ (pt_attr_leaf_index_mask(pt_attr
) >> pt_attr_leaf_shift(pt_attr
));
11068 if (!testbit(current_index
, (int *)grand
->nested_pmap
->nested_region_asid_bitmap
)) {
11069 setbit(current_index
, (int *)grand
->nested_pmap
->nested_region_asid_bitmap
);
11071 for (cpte
= bpte
; cpte
<= epte
; cpte
++) {
11074 boolean_t managed
= FALSE
;
11077 if ((*cpte
!= ARM_PTE_TYPE_FAULT
)
11078 && (!ARM_PTE_IS_COMPRESSED(*cpte
, cpte
))) {
11081 pa
= pte_to_pa(spte
);
11082 if (!pa_valid(pa
)) {
11085 pai
= (int)pa_index(pa
);
11088 pa
= pte_to_pa(spte
);
11089 if (pai
== (int)pa_index(pa
)) {
11091 break; // Leave the PVH locked as we'll unlock it after we update the PTE
11096 if (((spte
& ARM_PTE_NG
) != ARM_PTE_NG
)) {
11097 WRITE_PTE_FAST(cpte
, (spte
| ARM_PTE_NG
));
11101 ASSERT_PVH_LOCKED(pai
);
11108 FLUSH_PTE_RANGE_STRONG(bpte
, epte
);
11109 flush_mmu_tlb_region_asid_async(start
, (unsigned)size
, grand
->nested_pmap
);
11114 PMAP_UNLOCK(grand
->nested_pmap
);
11120 * invalidate all pdes for segment at vaddr in pmap grand
11125 num_tte
= (unsigned int)(size
>> pt_attr_twig_shift(pt_attr
));
11127 for (i
= 0; i
< num_tte
; i
++, addr
+= pt_attr_twig_size(pt_attr
)) {
11128 if (addr
< grand
->nested_pmap
->nested_region_true_start
) {
11129 /* We haven't reached the interesting range. */
11133 if (addr
>= grand
->nested_pmap
->nested_region_true_end
) {
11134 /* We're done with the interesting range. */
11138 tte_p
= pmap_tte(grand
, addr
);
11139 *tte_p
= ARM_TTE_TYPE_FAULT
;
11142 tte_p
= pmap_tte(grand
, start
);
11143 FLUSH_PTE_RANGE_STRONG(tte_p
, tte_p
+ num_tte
);
11144 PMAP_UPDATE_TLBS(grand
, start
, vend
, false);
11146 PMAP_UNLOCK(grand
);
11148 return KERN_SUCCESS
;
11152 pmap_unnest_options(
11156 unsigned int option
)
11158 kern_return_t kr
= KERN_FAILURE
;
11160 PMAP_TRACE(2, PMAP_CODE(PMAP__UNNEST
) | DBG_FUNC_START
,
11161 VM_KERNEL_ADDRHIDE(grand
), VM_KERNEL_ADDRHIDE(vaddr
));
11164 kr
= pmap_unnest_options_ppl(grand
, vaddr
, size
, option
);
11166 kr
= pmap_unnest_options_internal(grand
, vaddr
, size
, option
);
11169 PMAP_TRACE(2, PMAP_CODE(PMAP__UNNEST
) | DBG_FUNC_END
, kr
);
11175 pmap_adjust_unnest_parameters(
11177 __unused vm_map_offset_t
*s
,
11178 __unused vm_map_offset_t
*e
)
11180 return TRUE
; /* to get to log_unnest_badness()... */
11184 * disable no-execute capability on
11185 * the specified pmap
11187 #if DEVELOPMENT || DEBUG
11192 pmap
->nx_enabled
= FALSE
;
11197 __unused pmap_t pmap
)
11206 pt_fake_zone_index
= zone_index
;
11212 vm_size_t
*cur_size
, vm_size_t
*max_size
, vm_size_t
*elem_size
, vm_size_t
*alloc_size
,
11213 uint64_t *sum_size
, int *collectable
, int *exhaustable
, int *caller_acct
)
11215 *count
= inuse_pmap_pages_count
;
11216 *cur_size
= PAGE_SIZE
* (inuse_pmap_pages_count
);
11217 *max_size
= PAGE_SIZE
* (inuse_pmap_pages_count
+ vm_page_inactive_count
+ vm_page_active_count
+ vm_page_free_count
);
11218 *elem_size
= PAGE_SIZE
;
11219 *alloc_size
= PAGE_SIZE
;
11220 *sum_size
= (alloc_pmap_pages_count
) * PAGE_SIZE
;
11228 * flush a range of hardware TLB entries.
11229 * NOTE: assumes the smallest TLB entry in use will be for
11230 * an ARM small page (4K).
11233 #define ARM_FULL_TLB_FLUSH_THRESHOLD 64
11235 #if __ARM_RANGE_TLBI__
11236 #define ARM64_RANGE_TLB_FLUSH_THRESHOLD 1
11237 #define ARM64_FULL_TLB_FLUSH_THRESHOLD ARM64_16K_TLB_RANGE_PAGES
11239 #define ARM64_FULL_TLB_FLUSH_THRESHOLD 256
11240 #endif // __ARM_RANGE_TLBI__
11243 flush_mmu_tlb_region_asid_async(
11248 #if (__ARM_VMSA__ == 7)
11249 vm_offset_t end
= va
+ length
;
11252 asid
= pmap
->hw_asid
;
11254 if (length
/ ARM_SMALL_PAGE_SIZE
> ARM_FULL_TLB_FLUSH_THRESHOLD
) {
11255 boolean_t flush_all
= FALSE
;
11257 if ((asid
== 0) || (pmap
->nested
== TRUE
)) {
11261 flush_mmu_tlb_async();
11263 flush_mmu_tlb_asid_async(asid
);
11268 if (pmap
->nested
== TRUE
) {
11269 #if !__ARM_MP_EXT__
11272 va
= arm_trunc_page(va
);
11274 flush_mmu_tlb_mva_entries_async(va
);
11275 va
+= ARM_SMALL_PAGE_SIZE
;
11280 va
= arm_trunc_page(va
) | (asid
& 0xff);
11281 flush_mmu_tlb_entries_async(va
, end
);
11284 unsigned npages
= length
>> pt_attr_leaf_shift(pmap_get_pt_attr(pmap
));
11287 asid
= pmap
->hw_asid
;
11289 if (npages
> ARM64_FULL_TLB_FLUSH_THRESHOLD
) {
11290 boolean_t flush_all
= FALSE
;
11292 if ((asid
== 0) || (pmap
->nested
== TRUE
)) {
11296 flush_mmu_tlb_async();
11298 flush_mmu_tlb_asid_async((uint64_t)asid
<< TLBI_ASID_SHIFT
);
11302 #if __ARM_RANGE_TLBI__
11303 if (npages
> ARM64_RANGE_TLB_FLUSH_THRESHOLD
) {
11304 va
= generate_rtlbi_param(npages
, asid
, va
);
11305 if (pmap
->nested
== TRUE
) {
11306 flush_mmu_tlb_allrange_async(va
);
11308 flush_mmu_tlb_range_async(va
);
11313 vm_offset_t end
= tlbi_asid(asid
) | tlbi_addr(va
+ length
);
11314 va
= tlbi_asid(asid
) | tlbi_addr(va
);
11315 if (pmap
->nested
== TRUE
) {
11316 flush_mmu_tlb_allentries_async(va
, end
);
11318 flush_mmu_tlb_entries_async(va
, end
);
11324 MARK_AS_PMAP_TEXT
static void
11325 flush_mmu_tlb_tte_asid_async(vm_offset_t va
, pmap_t pmap
)
11327 #if (__ARM_VMSA__ == 7)
11328 flush_mmu_tlb_entry_async((va
& ~ARM_TT_L1_PT_OFFMASK
) | (pmap
->hw_asid
& 0xff));
11329 flush_mmu_tlb_entry_async(((va
& ~ARM_TT_L1_PT_OFFMASK
) + ARM_TT_L1_SIZE
) | (pmap
->hw_asid
& 0xff));
11330 flush_mmu_tlb_entry_async(((va
& ~ARM_TT_L1_PT_OFFMASK
) + 2 * ARM_TT_L1_SIZE
) | (pmap
->hw_asid
& 0xff));
11331 flush_mmu_tlb_entry_async(((va
& ~ARM_TT_L1_PT_OFFMASK
) + 3 * ARM_TT_L1_SIZE
) | (pmap
->hw_asid
& 0xff));
11333 flush_mmu_tlb_entry_async(tlbi_addr(va
& ~pt_attr_twig_offmask(pmap_get_pt_attr(pmap
))) | tlbi_asid(pmap
->hw_asid
));
11337 MARK_AS_PMAP_TEXT
static void
11338 flush_mmu_tlb_full_asid_async(pmap_t pmap
)
11340 #if (__ARM_VMSA__ == 7)
11341 flush_mmu_tlb_asid_async(pmap
->hw_asid
);
11342 #else /* (__ARM_VMSA__ == 7) */
11343 flush_mmu_tlb_asid_async((uint64_t)(pmap
->hw_asid
) << TLBI_ASID_SHIFT
);
11344 #endif /* (__ARM_VMSA__ == 7) */
11348 flush_mmu_tlb_region(
11352 flush_mmu_tlb_region_asid_async(va
, length
, kernel_pmap
);
11356 static pmap_io_range_t
*
11357 pmap_find_io_attr(pmap_paddr_t paddr
)
11359 pmap_io_range_t find_range
= {.addr
= paddr
& ~PAGE_MASK
, .len
= PAGE_SIZE
};
11360 unsigned int begin
= 0, end
= num_io_rgns
- 1;
11361 if ((num_io_rgns
== 0) || (paddr
< io_attr_table
[begin
].addr
) ||
11362 (paddr
>= (io_attr_table
[end
].addr
+ io_attr_table
[end
].len
))) {
11367 unsigned int middle
= (begin
+ end
) / 2;
11368 int cmp
= cmp_io_rgns(&find_range
, &io_attr_table
[middle
]);
11370 return &io_attr_table
[middle
];
11371 } else if (begin
== end
) {
11373 } else if (cmp
> 0) {
11374 begin
= middle
+ 1;
11384 pmap_cache_attributes(
11387 pmap_paddr_t paddr
;
11389 unsigned int result
;
11390 pp_attr_t pp_attr_current
;
11394 assert(vm_last_phys
> vm_first_phys
); // Check that pmap has been bootstrapped
11396 if (!pa_valid(paddr
)) {
11397 pmap_io_range_t
*io_rgn
= pmap_find_io_attr(paddr
);
11398 return (io_rgn
== NULL
) ? VM_WIMG_IO
: io_rgn
->wimg
;
11401 result
= VM_WIMG_DEFAULT
;
11403 pai
= (int)pa_index(paddr
);
11405 pp_attr_current
= pp_attr_table
[pai
];
11406 if (pp_attr_current
& PP_ATTR_WIMG_MASK
) {
11407 result
= pp_attr_current
& PP_ATTR_WIMG_MASK
;
11412 MARK_AS_PMAP_TEXT
static void
11413 pmap_sync_wimg(ppnum_t pn
, unsigned int wimg_bits_prev
, unsigned int wimg_bits_new
)
11415 if ((wimg_bits_prev
!= wimg_bits_new
)
11416 && ((wimg_bits_prev
== VM_WIMG_COPYBACK
)
11417 || ((wimg_bits_prev
== VM_WIMG_INNERWBACK
)
11418 && (wimg_bits_new
!= VM_WIMG_COPYBACK
))
11419 || ((wimg_bits_prev
== VM_WIMG_WTHRU
)
11420 && ((wimg_bits_new
!= VM_WIMG_COPYBACK
) || (wimg_bits_new
!= VM_WIMG_INNERWBACK
))))) {
11421 pmap_sync_page_attributes_phys(pn
);
11424 if ((wimg_bits_new
== VM_WIMG_RT
) && (wimg_bits_prev
!= VM_WIMG_RT
)) {
11425 pmap_force_dcache_clean(phystokv(ptoa(pn
)), PAGE_SIZE
);
11429 MARK_AS_PMAP_TEXT
static __unused
void
11430 pmap_update_compressor_page_internal(ppnum_t pn
, unsigned int prev_cacheattr
, unsigned int new_cacheattr
)
11432 pmap_paddr_t paddr
= ptoa(pn
);
11433 int pai
= (int)pa_index(paddr
);
11435 if (__improbable(!pa_valid(paddr
))) {
11436 panic("%s called on non-managed page 0x%08x", __func__
, pn
);
11442 if (__improbable(pa_test_monitor(paddr
))) {
11443 panic("%s invoked on PPL page 0x%08x", __func__
, pn
);
11447 pmap_update_cache_attributes_locked(pn
, new_cacheattr
);
11451 pmap_sync_wimg(pn
, prev_cacheattr
& VM_WIMG_MASK
, new_cacheattr
& VM_WIMG_MASK
);
11455 pmap_map_compressor_page(ppnum_t pn
)
11457 #if __ARM_PTE_PHYSMAP__
11458 unsigned int cacheattr
= pmap_cache_attributes(pn
) & VM_WIMG_MASK
;
11459 if (cacheattr
!= VM_WIMG_DEFAULT
) {
11461 pmap_update_compressor_page_ppl(pn
, cacheattr
, VM_WIMG_DEFAULT
);
11463 pmap_update_compressor_page_internal(pn
, cacheattr
, VM_WIMG_DEFAULT
);
11467 return (void*)phystokv(ptoa(pn
));
11471 pmap_unmap_compressor_page(ppnum_t pn __unused
, void *kva __unused
)
11473 #if __ARM_PTE_PHYSMAP__
11474 unsigned int cacheattr
= pmap_cache_attributes(pn
) & VM_WIMG_MASK
;
11475 if (cacheattr
!= VM_WIMG_DEFAULT
) {
11477 pmap_update_compressor_page_ppl(pn
, VM_WIMG_DEFAULT
, cacheattr
);
11479 pmap_update_compressor_page_internal(pn
, VM_WIMG_DEFAULT
, cacheattr
);
11485 MARK_AS_PMAP_TEXT
static boolean_t
11486 pmap_batch_set_cache_attributes_internal(
11488 unsigned int cacheattr
,
11489 unsigned int page_cnt
,
11490 unsigned int page_index
,
11494 pmap_paddr_t paddr
;
11496 pp_attr_t pp_attr_current
;
11497 pp_attr_t pp_attr_template
;
11498 unsigned int wimg_bits_prev
, wimg_bits_new
;
11500 if (cacheattr
& VM_WIMG_USE_DEFAULT
) {
11501 cacheattr
= VM_WIMG_DEFAULT
;
11504 if ((doit
== FALSE
) && (*res
== 0)) {
11505 pmap_pin_kernel_pages((vm_offset_t
)res
, sizeof(*res
));
11507 pmap_unpin_kernel_pages((vm_offset_t
)res
, sizeof(*res
));
11508 if (platform_cache_batch_wimg(cacheattr
& (VM_WIMG_MASK
), page_cnt
<< PAGE_SHIFT
) == FALSE
) {
11515 if (!pa_valid(paddr
)) {
11516 panic("pmap_batch_set_cache_attributes(): pn 0x%08x not managed", pn
);
11519 pai
= (int)pa_index(paddr
);
11524 if (pa_test_monitor(paddr
)) {
11525 panic("%s invoked on PPL page 0x%llx", __func__
, (uint64_t)paddr
);
11531 pp_attr_current
= pp_attr_table
[pai
];
11532 wimg_bits_prev
= VM_WIMG_DEFAULT
;
11533 if (pp_attr_current
& PP_ATTR_WIMG_MASK
) {
11534 wimg_bits_prev
= pp_attr_current
& PP_ATTR_WIMG_MASK
;
11537 pp_attr_template
= (pp_attr_current
& ~PP_ATTR_WIMG_MASK
) | PP_ATTR_WIMG(cacheattr
& (VM_WIMG_MASK
));
11543 /* WIMG bits should only be updated under the PVH lock, but we should do this in a CAS loop
11544 * to avoid losing simultaneous updates to other bits like refmod. */
11545 } while (!OSCompareAndSwap16(pp_attr_current
, pp_attr_template
, &pp_attr_table
[pai
]));
11547 wimg_bits_new
= VM_WIMG_DEFAULT
;
11548 if (pp_attr_template
& PP_ATTR_WIMG_MASK
) {
11549 wimg_bits_new
= pp_attr_template
& PP_ATTR_WIMG_MASK
;
11553 if (wimg_bits_new
!= wimg_bits_prev
) {
11554 pmap_update_cache_attributes_locked(pn
, cacheattr
);
11557 if ((wimg_bits_new
== VM_WIMG_RT
) && (wimg_bits_prev
!= VM_WIMG_RT
)) {
11558 pmap_force_dcache_clean(phystokv(paddr
), PAGE_SIZE
);
11561 if (wimg_bits_new
== VM_WIMG_COPYBACK
) {
11564 if (wimg_bits_prev
== wimg_bits_new
) {
11565 pmap_pin_kernel_pages((vm_offset_t
)res
, sizeof(*res
));
11567 pmap_unpin_kernel_pages((vm_offset_t
)res
, sizeof(*res
));
11568 if (!platform_cache_batch_wimg(wimg_bits_new
, (*res
) << PAGE_SHIFT
)) {
11575 if (page_cnt
== (page_index
+ 1)) {
11576 wimg_bits_prev
= VM_WIMG_COPYBACK
;
11577 if (((wimg_bits_prev
!= wimg_bits_new
))
11578 && ((wimg_bits_prev
== VM_WIMG_COPYBACK
)
11579 || ((wimg_bits_prev
== VM_WIMG_INNERWBACK
)
11580 && (wimg_bits_new
!= VM_WIMG_COPYBACK
))
11581 || ((wimg_bits_prev
== VM_WIMG_WTHRU
)
11582 && ((wimg_bits_new
!= VM_WIMG_COPYBACK
) || (wimg_bits_new
!= VM_WIMG_INNERWBACK
))))) {
11583 platform_cache_flush_wimg(wimg_bits_new
);
11591 pmap_batch_set_cache_attributes(
11593 unsigned int cacheattr
,
11594 unsigned int page_cnt
,
11595 unsigned int page_index
,
11600 return pmap_batch_set_cache_attributes_ppl(pn
, cacheattr
, page_cnt
, page_index
, doit
, res
);
11602 return pmap_batch_set_cache_attributes_internal(pn
, cacheattr
, page_cnt
, page_index
, doit
, res
);
11606 MARK_AS_PMAP_TEXT
static void
11607 pmap_set_cache_attributes_priv(
11609 unsigned int cacheattr
,
11610 boolean_t external __unused
)
11612 pmap_paddr_t paddr
;
11614 pp_attr_t pp_attr_current
;
11615 pp_attr_t pp_attr_template
;
11616 unsigned int wimg_bits_prev
, wimg_bits_new
;
11620 if (!pa_valid(paddr
)) {
11621 return; /* Not a managed page. */
11624 if (cacheattr
& VM_WIMG_USE_DEFAULT
) {
11625 cacheattr
= VM_WIMG_DEFAULT
;
11628 pai
= (int)pa_index(paddr
);
11633 if (external
&& pa_test_monitor(paddr
)) {
11634 panic("%s invoked on PPL page 0x%llx", __func__
, (uint64_t)paddr
);
11635 } else if (!external
&& !pa_test_monitor(paddr
)) {
11636 panic("%s invoked on non-PPL page 0x%llx", __func__
, (uint64_t)paddr
);
11641 pp_attr_current
= pp_attr_table
[pai
];
11642 wimg_bits_prev
= VM_WIMG_DEFAULT
;
11643 if (pp_attr_current
& PP_ATTR_WIMG_MASK
) {
11644 wimg_bits_prev
= pp_attr_current
& PP_ATTR_WIMG_MASK
;
11647 pp_attr_template
= (pp_attr_current
& ~PP_ATTR_WIMG_MASK
) | PP_ATTR_WIMG(cacheattr
& (VM_WIMG_MASK
));
11649 /* WIMG bits should only be updated under the PVH lock, but we should do this in a CAS loop
11650 * to avoid losing simultaneous updates to other bits like refmod. */
11651 } while (!OSCompareAndSwap16(pp_attr_current
, pp_attr_template
, &pp_attr_table
[pai
]));
11653 wimg_bits_new
= VM_WIMG_DEFAULT
;
11654 if (pp_attr_template
& PP_ATTR_WIMG_MASK
) {
11655 wimg_bits_new
= pp_attr_template
& PP_ATTR_WIMG_MASK
;
11658 if (wimg_bits_new
!= wimg_bits_prev
) {
11659 pmap_update_cache_attributes_locked(pn
, cacheattr
);
11664 pmap_sync_wimg(pn
, wimg_bits_prev
, wimg_bits_new
);
11667 MARK_AS_PMAP_TEXT
static void
11668 pmap_set_cache_attributes_internal(
11670 unsigned int cacheattr
)
11672 pmap_set_cache_attributes_priv(pn
, cacheattr
, TRUE
);
11676 pmap_set_cache_attributes(
11678 unsigned int cacheattr
)
11681 pmap_set_cache_attributes_ppl(pn
, cacheattr
);
11683 pmap_set_cache_attributes_internal(pn
, cacheattr
);
11687 MARK_AS_PMAP_TEXT
void
11688 pmap_update_cache_attributes_locked(
11690 unsigned attributes
)
11692 pmap_paddr_t phys
= ptoa(ppnum
);
11696 pt_entry_t tmplate
;
11698 boolean_t tlb_flush_needed
= FALSE
;
11700 PMAP_TRACE(2, PMAP_CODE(PMAP__UPDATE_CACHING
) | DBG_FUNC_START
, ppnum
, attributes
);
11702 #if __ARM_PTE_PHYSMAP__
11703 vm_offset_t kva
= phystokv(phys
);
11704 pte_p
= pmap_pte(kernel_pmap
, kva
);
11707 tmplate
&= ~(ARM_PTE_ATTRINDXMASK
| ARM_PTE_SHMASK
);
11709 tmplate
|= (wimg_to_pte(attributes
) & ~ARM_PTE_XPRR_MASK
);
11711 tmplate
|= wimg_to_pte(attributes
);
11713 #if (__ARM_VMSA__ > 7)
11714 if (tmplate
& ARM_PTE_HINT_MASK
) {
11715 panic("%s: physical aperture PTE %p has hint bit set, va=%p, pte=0x%llx",
11716 __FUNCTION__
, pte_p
, (void *)kva
, tmplate
);
11719 WRITE_PTE_STRONG(pte_p
, tmplate
);
11720 flush_mmu_tlb_region_asid_async(kva
, PAGE_SIZE
, kernel_pmap
);
11721 tlb_flush_needed
= TRUE
;
11724 pai
= (unsigned int)pa_index(phys
);
11726 pv_h
= pai_to_pvh(pai
);
11728 pte_p
= PT_ENTRY_NULL
;
11729 pve_p
= PV_ENTRY_NULL
;
11730 if (pvh_test_type(pv_h
, PVH_TYPE_PTEP
)) {
11731 pte_p
= pvh_ptep(pv_h
);
11732 } else if (pvh_test_type(pv_h
, PVH_TYPE_PVEP
)) {
11733 pve_p
= pvh_list(pv_h
);
11734 pte_p
= PT_ENTRY_NULL
;
11737 while ((pve_p
!= PV_ENTRY_NULL
) || (pte_p
!= PT_ENTRY_NULL
)) {
11738 vm_map_address_t va
;
11741 if (pve_p
!= PV_ENTRY_NULL
) {
11742 pte_p
= pve_get_ptep(pve_p
);
11744 #ifdef PVH_FLAG_IOMMU
11745 if ((vm_offset_t
)pte_p
& PVH_FLAG_IOMMU
) {
11746 goto cache_skip_pve
;
11749 pmap
= ptep_get_pmap(pte_p
);
11750 va
= ptep_get_va(pte_p
);
11753 tmplate
&= ~(ARM_PTE_ATTRINDXMASK
| ARM_PTE_SHMASK
);
11754 tmplate
|= pmap_get_pt_ops(pmap
)->wimg_to_pte(attributes
);
11756 WRITE_PTE_STRONG(pte_p
, tmplate
);
11757 pmap_get_pt_ops(pmap
)->flush_tlb_region_async(va
, PAGE_SIZE
, pmap
);
11758 tlb_flush_needed
= TRUE
;
11760 #ifdef PVH_FLAG_IOMMU
11763 pte_p
= PT_ENTRY_NULL
;
11764 if (pve_p
!= PV_ENTRY_NULL
) {
11765 pve_p
= PVE_NEXT_PTR(pve_next(pve_p
));
11768 if (tlb_flush_needed
) {
11772 PMAP_TRACE(2, PMAP_CODE(PMAP__UPDATE_CACHING
) | DBG_FUNC_END
, ppnum
, attributes
);
11775 #if (__ARM_VMSA__ == 7)
11777 pmap_create_sharedpage(
11783 (void) pmap_pages_alloc(&pa
, PAGE_SIZE
, 0);
11784 memset((char *) phystokv(pa
), 0, PAGE_SIZE
);
11786 kr
= pmap_enter(kernel_pmap
, _COMM_PAGE_BASE_ADDRESS
, atop(pa
), VM_PROT_READ
| VM_PROT_WRITE
, VM_PROT_NONE
, VM_WIMG_USE_DEFAULT
, TRUE
);
11787 assert(kr
== KERN_SUCCESS
);
11789 return (vm_map_address_t
)phystokv(pa
);
11795 vm_address_t address
,
11796 tt_entry_t
template)
11798 tt_entry_t
*ptep
, pte
;
11800 ptep
= pmap_tt3e(pmap
, address
);
11801 if (ptep
== NULL
) {
11802 panic("%s: no ptep?\n", __FUNCTION__
);
11806 pte
= tte_to_pa(pte
) | template;
11807 WRITE_PTE_STRONG(ptep
, pte
);
11810 /* Note absence of non-global bit */
11811 #define PMAP_COMM_PAGE_PTE_TEMPLATE (ARM_PTE_TYPE_VALID \
11812 | ARM_PTE_ATTRINDX(CACHE_ATTRINDX_WRITEBACK) \
11813 | ARM_PTE_SH(SH_INNER_MEMORY) | ARM_PTE_NX \
11814 | ARM_PTE_PNX | ARM_PTE_AP(AP_RORO) | ARM_PTE_AF)
11817 pmap_create_sharedpage(
11822 pmap_paddr_t pa
= 0;
11825 pa
= pmap_alloc_page_for_kern();
11829 (void) pmap_pages_alloc(&pa
, PAGE_SIZE
, 0);
11832 memset((char *) phystokv(pa
), 0, PAGE_SIZE
);
11834 #ifdef CONFIG_XNUPOST
11836 * The kernel pmap maintains a user accessible mapping of the commpage
11839 kr
= pmap_enter(kernel_pmap
, _COMM_HIGH_PAGE64_BASE_ADDRESS
, (ppnum_t
)atop(pa
), VM_PROT_READ
, VM_PROT_NONE
, VM_WIMG_USE_DEFAULT
, TRUE
);
11840 assert(kr
== KERN_SUCCESS
);
11843 * This mapping should not be global (as we only expect to reference it
11846 pmap_update_tt3e(kernel_pmap
, _COMM_HIGH_PAGE64_BASE_ADDRESS
, PMAP_COMM_PAGE_PTE_TEMPLATE
| ARM_PTE_NG
);
11849 kasan_map_shadow(_COMM_HIGH_PAGE64_BASE_ADDRESS
, PAGE_SIZE
, true);
11851 #endif /* CONFIG_XNUPOST */
11854 * In order to avoid burning extra pages on mapping the shared page, we
11855 * create a dedicated pmap for the shared page. We forcibly nest the
11856 * translation tables from this pmap into other pmaps. The level we
11857 * will nest at depends on the MMU configuration (page size, TTBR range,
11860 * Note that this is NOT "the nested pmap" (which is used to nest the
11863 * Note that we update parameters of the entry for our unique needs (NG
11866 sharedpage_pmap
= pmap_create_options(NULL
, 0x0, 0);
11867 assert(sharedpage_pmap
!= NULL
);
11869 /* The user 64-bit mapping... */
11870 kr
= pmap_enter(sharedpage_pmap
, _COMM_PAGE64_BASE_ADDRESS
, (ppnum_t
)atop(pa
), VM_PROT_READ
, VM_PROT_NONE
, VM_WIMG_USE_DEFAULT
, TRUE
);
11871 assert(kr
== KERN_SUCCESS
);
11872 pmap_update_tt3e(sharedpage_pmap
, _COMM_PAGE64_BASE_ADDRESS
, PMAP_COMM_PAGE_PTE_TEMPLATE
);
11874 /* ...and the user 32-bit mapping. */
11875 kr
= pmap_enter(sharedpage_pmap
, _COMM_PAGE32_BASE_ADDRESS
, (ppnum_t
)atop(pa
), VM_PROT_READ
, VM_PROT_NONE
, VM_WIMG_USE_DEFAULT
, TRUE
);
11876 assert(kr
== KERN_SUCCESS
);
11877 pmap_update_tt3e(sharedpage_pmap
, _COMM_PAGE32_BASE_ADDRESS
, PMAP_COMM_PAGE_PTE_TEMPLATE
);
11879 /* For manipulation in kernel, go straight to physical page */
11880 return (vm_map_address_t
)phystokv(pa
);
11884 * Asserts to ensure that the TTEs we nest to map the shared page do not overlap
11885 * with user controlled TTEs.
11887 #if (ARM_PGSHIFT == 14)
11888 static_assert((_COMM_PAGE64_BASE_ADDRESS
& ~ARM_TT_L2_OFFMASK
) >= MACH_VM_MAX_ADDRESS
);
11889 static_assert((_COMM_PAGE32_BASE_ADDRESS
& ~ARM_TT_L2_OFFMASK
) >= VM_MAX_ADDRESS
);
11890 #elif (ARM_PGSHIFT == 12)
11891 static_assert((_COMM_PAGE64_BASE_ADDRESS
& ~ARM_TT_L1_OFFMASK
) >= MACH_VM_MAX_ADDRESS
);
11892 static_assert((_COMM_PAGE32_BASE_ADDRESS
& ~ARM_TT_L1_OFFMASK
) >= VM_MAX_ADDRESS
);
11894 #error Nested shared page mapping is unsupported on this config
11897 MARK_AS_PMAP_TEXT
static kern_return_t
11898 pmap_insert_sharedpage_internal(
11901 kern_return_t kr
= KERN_SUCCESS
;
11902 vm_offset_t sharedpage_vaddr
;
11903 pt_entry_t
*ttep
, *src_ttep
;
11906 VALIDATE_PMAP(pmap
);
11908 options
|= PMAP_OPTIONS_NOWAIT
;
11909 #endif /* XNU_MONITOR */
11911 #if _COMM_PAGE_AREA_LENGTH != PAGE_SIZE
11912 #error We assume a single page.
11915 if (pmap_is_64bit(pmap
)) {
11916 sharedpage_vaddr
= _COMM_PAGE64_BASE_ADDRESS
;
11918 sharedpage_vaddr
= _COMM_PAGE32_BASE_ADDRESS
;
11924 * For 4KB pages, we can force the commpage to nest at the level one
11925 * page table, as each entry is 1GB (i.e, there will be no overlap
11926 * with regular userspace mappings). For 16KB pages, each level one
11927 * entry is 64GB, so we must go to the second level entry (32MB) in
11930 #if (ARM_PGSHIFT == 12)
11933 /* Just slam in the L1 entry. */
11934 ttep
= pmap_tt1e(pmap
, sharedpage_vaddr
);
11936 if (*ttep
!= ARM_PTE_EMPTY
) {
11937 panic("%s: Found something mapped at the commpage address?!", __FUNCTION__
);
11940 src_ttep
= pmap_tt1e(sharedpage_pmap
, sharedpage_vaddr
);
11941 #elif (ARM_PGSHIFT == 14)
11942 /* Allocate for the L2 entry if necessary, and slam it into place. */
11944 * As long as we are use a three level page table, the first level
11945 * should always exist, so we don't need to check for it.
11947 while (*pmap_tt1e(pmap
, sharedpage_vaddr
) == ARM_PTE_EMPTY
) {
11950 kr
= pmap_expand(pmap
, sharedpage_vaddr
, options
, PMAP_TT_L2_LEVEL
);
11952 if (kr
!= KERN_SUCCESS
) {
11954 if (kr
== KERN_RESOURCE_SHORTAGE
) {
11959 panic("Failed to pmap_expand for commpage, pmap=%p", pmap
);
11966 ttep
= pmap_tt2e(pmap
, sharedpage_vaddr
);
11968 if (*ttep
!= ARM_PTE_EMPTY
) {
11969 panic("%s: Found something mapped at the commpage address?!", __FUNCTION__
);
11972 src_ttep
= pmap_tt2e(sharedpage_pmap
, sharedpage_vaddr
);
11976 FLUSH_PTE_STRONG(ttep
);
11978 /* TODO: Should we flush in the 64-bit case? */
11979 flush_mmu_tlb_region_asid_async(sharedpage_vaddr
, PAGE_SIZE
, kernel_pmap
);
11981 #if (ARM_PGSHIFT == 12)
11982 flush_mmu_tlb_entry_async(tlbi_addr(sharedpage_vaddr
& ~ARM_TT_L1_OFFMASK
) | tlbi_asid(pmap
->hw_asid
));
11983 #elif (ARM_PGSHIFT == 14)
11984 flush_mmu_tlb_entry_async(tlbi_addr(sharedpage_vaddr
& ~ARM_TT_L2_OFFMASK
) | tlbi_asid(pmap
->hw_asid
));
11994 pmap_unmap_sharedpage(
11998 vm_offset_t sharedpage_vaddr
;
12000 #if _COMM_PAGE_AREA_LENGTH != PAGE_SIZE
12001 #error We assume a single page.
12004 if (pmap_is_64bit(pmap
)) {
12005 sharedpage_vaddr
= _COMM_PAGE64_BASE_ADDRESS
;
12007 sharedpage_vaddr
= _COMM_PAGE32_BASE_ADDRESS
;
12010 #if (ARM_PGSHIFT == 12)
12011 ttep
= pmap_tt1e(pmap
, sharedpage_vaddr
);
12013 if (ttep
== NULL
) {
12017 /* It had better be mapped to the shared page */
12018 if (*ttep
!= ARM_TTE_EMPTY
&& *ttep
!= *pmap_tt1e(sharedpage_pmap
, sharedpage_vaddr
)) {
12019 panic("%s: Something other than commpage mapped in shared page slot?", __FUNCTION__
);
12021 #elif (ARM_PGSHIFT == 14)
12022 ttep
= pmap_tt2e(pmap
, sharedpage_vaddr
);
12024 if (ttep
== NULL
) {
12028 /* It had better be mapped to the shared page */
12029 if (*ttep
!= ARM_TTE_EMPTY
&& *ttep
!= *pmap_tt2e(sharedpage_pmap
, sharedpage_vaddr
)) {
12030 panic("%s: Something other than commpage mapped in shared page slot?", __FUNCTION__
);
12034 *ttep
= ARM_TTE_EMPTY
;
12035 flush_mmu_tlb_region_asid_async(sharedpage_vaddr
, PAGE_SIZE
, kernel_pmap
);
12037 #if (ARM_PGSHIFT == 12)
12038 flush_mmu_tlb_entry_async(tlbi_addr(sharedpage_vaddr
& ~ARM_TT_L1_OFFMASK
) | tlbi_asid(pmap
->hw_asid
));
12039 #elif (ARM_PGSHIFT == 14)
12040 flush_mmu_tlb_entry_async(tlbi_addr(sharedpage_vaddr
& ~ARM_TT_L2_OFFMASK
) | tlbi_asid(pmap
->hw_asid
));
12046 pmap_insert_sharedpage(
12050 kern_return_t kr
= KERN_FAILURE
;
12052 while ((kr
= pmap_insert_sharedpage_ppl(pmap
)) == KERN_RESOURCE_SHORTAGE
) {
12053 pmap_alloc_page_for_ppl();
12056 pmap_ledger_check_balance(pmap
);
12058 if (kr
!= KERN_SUCCESS
) {
12059 panic("%s: failed to insert the shared page, kr=%d, "
12065 pmap_insert_sharedpage_internal(pmap
);
12073 return pmap
->is_64bit
;
12078 /* ARMTODO -- an implementation that accounts for
12079 * holes in the physical map, if any.
12085 return pa_valid(ptoa(pn
));
12089 pmap_bootloader_page(
12092 pmap_paddr_t paddr
= ptoa(pn
);
12094 if (pa_valid(paddr
)) {
12097 pmap_io_range_t
*io_rgn
= pmap_find_io_attr(paddr
);
12098 return (io_rgn
!= NULL
) && (io_rgn
->wimg
& PMAP_IO_RANGE_CARVEOUT
);
12101 MARK_AS_PMAP_TEXT
static boolean_t
12102 pmap_is_empty_internal(
12104 vm_map_offset_t va_start
,
12105 vm_map_offset_t va_end
)
12107 vm_map_offset_t block_start
, block_end
;
12110 if (pmap
== NULL
) {
12114 VALIDATE_PMAP(pmap
);
12116 __unused
const pt_attr_t
* const pt_attr
= pmap_get_pt_attr(pmap
);
12117 unsigned int initial_not_in_kdp
= not_in_kdp
;
12119 if ((pmap
!= kernel_pmap
) && (initial_not_in_kdp
)) {
12123 #if (__ARM_VMSA__ == 7)
12124 if (tte_index(pmap
, pt_attr
, va_end
) >= pmap
->tte_index_max
) {
12125 if ((pmap
!= kernel_pmap
) && (initial_not_in_kdp
)) {
12132 /* TODO: This will be faster if we increment ttep at each level. */
12133 block_start
= va_start
;
12135 while (block_start
< va_end
) {
12136 pt_entry_t
*bpte_p
, *epte_p
;
12139 block_end
= (block_start
+ pt_attr_twig_size(pt_attr
)) & ~pt_attr_twig_offmask(pt_attr
);
12140 if (block_end
> va_end
) {
12141 block_end
= va_end
;
12144 tte_p
= pmap_tte(pmap
, block_start
);
12145 if ((tte_p
!= PT_ENTRY_NULL
)
12146 && ((*tte_p
& ARM_TTE_TYPE_MASK
) == ARM_TTE_TYPE_TABLE
)) {
12147 pte_p
= (pt_entry_t
*) ttetokv(*tte_p
);
12148 bpte_p
= &pte_p
[pte_index(pmap
, pt_attr
, block_start
)];
12149 epte_p
= &pte_p
[pte_index(pmap
, pt_attr
, block_end
)];
12151 for (pte_p
= bpte_p
; pte_p
< epte_p
; pte_p
++) {
12152 if (*pte_p
!= ARM_PTE_EMPTY
) {
12153 if ((pmap
!= kernel_pmap
) && (initial_not_in_kdp
)) {
12160 block_start
= block_end
;
12163 if ((pmap
!= kernel_pmap
) && (initial_not_in_kdp
)) {
12173 vm_map_offset_t va_start
,
12174 vm_map_offset_t va_end
)
12177 return pmap_is_empty_ppl(pmap
, va_start
, va_end
);
12179 return pmap_is_empty_internal(pmap
, va_start
, va_end
);
12186 unsigned int option
)
12188 return (is64
) ? pmap_max_64bit_offset(option
) : pmap_max_32bit_offset(option
);
12192 pmap_max_64bit_offset(
12193 __unused
unsigned int option
)
12195 vm_map_offset_t max_offset_ret
= 0;
12197 #if defined(__arm64__)
12198 const vm_map_offset_t min_max_offset
= SHARED_REGION_BASE_ARM64
+ SHARED_REGION_SIZE_ARM64
+ 0x20000000; // end of shared region + 512MB for various purposes
12199 if (option
== ARM_PMAP_MAX_OFFSET_DEFAULT
) {
12200 max_offset_ret
= arm64_pmap_max_offset_default
;
12201 } else if (option
== ARM_PMAP_MAX_OFFSET_MIN
) {
12202 max_offset_ret
= min_max_offset
;
12203 } else if (option
== ARM_PMAP_MAX_OFFSET_MAX
) {
12204 max_offset_ret
= MACH_VM_MAX_ADDRESS
;
12205 } else if (option
== ARM_PMAP_MAX_OFFSET_DEVICE
) {
12206 if (arm64_pmap_max_offset_default
) {
12207 max_offset_ret
= arm64_pmap_max_offset_default
;
12208 } else if (max_mem
> 0xC0000000) {
12209 max_offset_ret
= min_max_offset
+ 0x138000000; // Max offset is 13.375GB for devices with > 3GB of memory
12210 } else if (max_mem
> 0x40000000) {
12211 max_offset_ret
= min_max_offset
+ 0x38000000; // Max offset is 9.375GB for devices with > 1GB and <= 3GB of memory
12213 max_offset_ret
= min_max_offset
;
12215 } else if (option
== ARM_PMAP_MAX_OFFSET_JUMBO
) {
12216 if (arm64_pmap_max_offset_default
) {
12217 // Allow the boot-arg to override jumbo size
12218 max_offset_ret
= arm64_pmap_max_offset_default
;
12220 max_offset_ret
= MACH_VM_MAX_ADDRESS
; // Max offset is 64GB for pmaps with special "jumbo" blessing
12223 panic("pmap_max_64bit_offset illegal option 0x%x\n", option
);
12226 assert(max_offset_ret
<= MACH_VM_MAX_ADDRESS
);
12227 assert(max_offset_ret
>= min_max_offset
);
12229 panic("Can't run pmap_max_64bit_offset on non-64bit architectures\n");
12232 return max_offset_ret
;
12236 pmap_max_32bit_offset(
12237 unsigned int option
)
12239 vm_map_offset_t max_offset_ret
= 0;
12241 if (option
== ARM_PMAP_MAX_OFFSET_DEFAULT
) {
12242 max_offset_ret
= arm_pmap_max_offset_default
;
12243 } else if (option
== ARM_PMAP_MAX_OFFSET_MIN
) {
12244 max_offset_ret
= 0x80000000;
12245 } else if (option
== ARM_PMAP_MAX_OFFSET_MAX
) {
12246 max_offset_ret
= VM_MAX_ADDRESS
;
12247 } else if (option
== ARM_PMAP_MAX_OFFSET_DEVICE
) {
12248 if (arm_pmap_max_offset_default
) {
12249 max_offset_ret
= arm_pmap_max_offset_default
;
12250 } else if (max_mem
> 0x20000000) {
12251 max_offset_ret
= 0x80000000;
12253 max_offset_ret
= 0x80000000;
12255 } else if (option
== ARM_PMAP_MAX_OFFSET_JUMBO
) {
12256 max_offset_ret
= 0x80000000;
12258 panic("pmap_max_32bit_offset illegal option 0x%x\n", option
);
12261 assert(max_offset_ret
<= MACH_VM_MAX_ADDRESS
);
12262 return max_offset_ret
;
12267 * Constrain DTrace copyin/copyout actions
12269 extern kern_return_t
dtrace_copyio_preflight(addr64_t
);
12270 extern kern_return_t
dtrace_copyio_postflight(addr64_t
);
12273 dtrace_copyio_preflight(
12274 __unused addr64_t va
)
12276 if (current_map() == kernel_map
) {
12277 return KERN_FAILURE
;
12279 return KERN_SUCCESS
;
12284 dtrace_copyio_postflight(
12285 __unused addr64_t va
)
12287 return KERN_SUCCESS
;
12289 #endif /* CONFIG_DTRACE */
12293 pmap_flush_context_init(__unused pmap_flush_context
*pfc
)
12300 __unused pmap_flush_context
*cpus_to_flush
)
12302 /* not implemented yet */
12309 * Enforce that the address range described by kva and nbytes is not currently
12310 * PPL-owned, and won't become PPL-owned while pinned. This is to prevent
12311 * unintentionally writing to PPL-owned memory.
12314 pmap_pin_kernel_pages(vm_offset_t kva
, size_t nbytes
)
12317 if (os_add_overflow(kva
, nbytes
, &end
)) {
12318 panic("%s(%p, 0x%llx): overflow", __func__
, (void*)kva
, (uint64_t)nbytes
);
12320 for (vm_offset_t ckva
= kva
; ckva
< end
; ckva
= round_page(ckva
+ 1)) {
12321 pmap_paddr_t pa
= kvtophys(ckva
);
12322 if (!pa_valid(pa
)) {
12323 panic("%s(%p): invalid physical page 0x%llx", __func__
, (void*)kva
, (uint64_t)pa
);
12326 unsigned int pai
= (unsigned int)pa_index(pa
);
12327 if (ckva
== phystokv(pa
)) {
12328 panic("%s(%p): attempt to pin static mapping for page 0x%llx", __func__
, (void*)kva
, (uint64_t)pa
);
12331 attr
= pp_attr_table
[pai
] & ~PP_ATTR_NO_MONITOR
;
12332 if (attr
& PP_ATTR_MONITOR
) {
12333 panic("%s(%p): physical page 0x%llx belongs to PPL", __func__
, (void*)kva
, (uint64_t)pa
);
12335 } while (!OSCompareAndSwap16(attr
, attr
| PP_ATTR_NO_MONITOR
, &pp_attr_table
[pai
]));
12340 pmap_unpin_kernel_pages(vm_offset_t kva
, size_t nbytes
)
12343 if (os_add_overflow(kva
, nbytes
, &end
)) {
12344 panic("%s(%p, 0x%llx): overflow", __func__
, (void*)kva
, (uint64_t)nbytes
);
12346 for (vm_offset_t ckva
= kva
; ckva
< end
; ckva
= round_page(ckva
+ 1)) {
12347 pmap_paddr_t pa
= kvtophys(ckva
);
12348 if (!pa_valid(pa
)) {
12349 panic("%s(%p): invalid physical page 0x%llx", __func__
, (void*)kva
, (uint64_t)pa
);
12351 if (!(pp_attr_table
[pa_index(pa
)] & PP_ATTR_NO_MONITOR
)) {
12352 panic("%s(%p): physical page 0x%llx not pinned", __func__
, (void*)kva
, (uint64_t)pa
);
12354 assert(!(pp_attr_table
[pa_index(pa
)] & PP_ATTR_MONITOR
));
12355 pa_clear_no_monitor(pa
);
12360 * Lock down a page, making all mappings read-only, and preventing
12361 * further mappings or removal of this particular kva's mapping.
12362 * Effectively, it makes the page at kva immutable.
12364 MARK_AS_PMAP_TEXT
static void
12365 pmap_ppl_lockdown_page(vm_address_t kva
)
12367 pmap_paddr_t pa
= kvtophys(kva
);
12368 unsigned int pai
= (unsigned int)pa_index(pa
);
12370 pv_entry_t
**pv_h
= pai_to_pvh(pai
);
12372 if (pa_test_monitor(pa
)) {
12373 panic("%#lx: page %llx belongs to PPL", kva
, pa
);
12376 if (pvh_get_flags(pv_h
) & (PVH_FLAG_LOCKDOWN
| PVH_FLAG_EXEC
)) {
12377 panic("%#lx: already locked down/executable (%#llx)", kva
, pvh_get_flags(pv_h
));
12380 pt_entry_t
*pte_p
= pmap_pte(kernel_pmap
, kva
);
12382 if (pte_p
== PT_ENTRY_NULL
) {
12383 panic("%#lx: NULL pte", kva
);
12386 pt_entry_t tmplate
= *pte_p
;
12387 if ((tmplate
& ARM_PTE_APMASK
) != ARM_PTE_AP(AP_RWNA
)) {
12388 panic("%#lx: not a kernel r/w page (%#llx)", kva
, tmplate
& ARM_PTE_APMASK
);
12391 pvh_set_flags(pv_h
, pvh_get_flags(pv_h
) | PVH_FLAG_LOCKDOWN
);
12393 pmap_set_ptov_ap(pai
, AP_RONA
, FALSE
);
12397 pmap_page_protect_options_internal((ppnum_t
)atop(pa
), VM_PROT_READ
, 0);
12401 * Release a page from being locked down to the PPL, making it writable
12402 * to the kernel once again.
12404 MARK_AS_PMAP_TEXT
static void
12405 pmap_ppl_unlockdown_page(vm_address_t kva
)
12407 pmap_paddr_t pa
= kvtophys(kva
);
12408 unsigned int pai
= (unsigned int)pa_index(pa
);
12410 pv_entry_t
**pv_h
= pai_to_pvh(pai
);
12412 vm_offset_t pvh_flags
= pvh_get_flags(pv_h
);
12414 if (!(pvh_flags
& PVH_FLAG_LOCKDOWN
)) {
12415 panic("unlockdown attempt on not locked down virtual %#lx/pai %d", kva
, pai
);
12418 pvh_set_flags(pv_h
, pvh_flags
& ~PVH_FLAG_LOCKDOWN
);
12419 pmap_set_ptov_ap(pai
, AP_RWNA
, FALSE
);
12423 #else /* XNU_MONITOR */
12425 static void __unused
12426 pmap_pin_kernel_pages(vm_offset_t kva __unused
, size_t nbytes __unused
)
12430 static void __unused
12431 pmap_unpin_kernel_pages(vm_offset_t kva __unused
, size_t nbytes __unused
)
12435 #endif /* !XNU_MONITOR */
12438 #define PMAP_RESIDENT_INVALID ((mach_vm_size_t)-1)
12440 MARK_AS_PMAP_TEXT
static mach_vm_size_t
12441 pmap_query_resident_internal(
12443 vm_map_address_t start
,
12444 vm_map_address_t end
,
12445 mach_vm_size_t
*compressed_bytes_p
)
12447 mach_vm_size_t resident_bytes
= 0;
12448 mach_vm_size_t compressed_bytes
= 0;
12450 pt_entry_t
*bpte
, *epte
;
12454 if (pmap
== NULL
) {
12455 return PMAP_RESIDENT_INVALID
;
12458 VALIDATE_PMAP(pmap
);
12460 /* Ensure that this request is valid, and addresses exactly one TTE. */
12461 if (__improbable((start
% ARM_PGBYTES
) || (end
% ARM_PGBYTES
))) {
12462 panic("%s: address range %p, %p not page-aligned", __func__
, (void*)start
, (void*)end
);
12465 if (__improbable((end
< start
) || ((end
- start
) > (PTE_PGENTRIES
* ARM_PGBYTES
)))) {
12466 panic("%s: invalid address range %p, %p", __func__
, (void*)start
, (void*)end
);
12470 tte_p
= pmap_tte(pmap
, start
);
12471 if (tte_p
== (tt_entry_t
*) NULL
) {
12473 return PMAP_RESIDENT_INVALID
;
12475 if ((*tte_p
& ARM_TTE_TYPE_MASK
) == ARM_TTE_TYPE_TABLE
) {
12476 __unused
const pt_attr_t
* const pt_attr
= pmap_get_pt_attr(pmap
);
12477 pte_p
= (pt_entry_t
*) ttetokv(*tte_p
);
12478 bpte
= &pte_p
[pte_index(pmap
, pt_attr
, start
)];
12479 epte
= &pte_p
[pte_index(pmap
, pt_attr
, end
)];
12481 for (; bpte
< epte
; bpte
++) {
12482 if (ARM_PTE_IS_COMPRESSED(*bpte
, bpte
)) {
12483 compressed_bytes
+= ARM_PGBYTES
;
12484 } else if (pa_valid(pte_to_pa(*bpte
))) {
12485 resident_bytes
+= ARM_PGBYTES
;
12491 if (compressed_bytes_p
) {
12492 pmap_pin_kernel_pages((vm_offset_t
)compressed_bytes_p
, sizeof(*compressed_bytes_p
));
12493 *compressed_bytes_p
+= compressed_bytes
;
12494 pmap_unpin_kernel_pages((vm_offset_t
)compressed_bytes_p
, sizeof(*compressed_bytes_p
));
12497 return resident_bytes
;
12501 pmap_query_resident(
12503 vm_map_address_t start
,
12504 vm_map_address_t end
,
12505 mach_vm_size_t
*compressed_bytes_p
)
12507 mach_vm_size_t total_resident_bytes
;
12508 mach_vm_size_t compressed_bytes
;
12509 vm_map_address_t va
;
12512 if (pmap
== PMAP_NULL
) {
12513 if (compressed_bytes_p
) {
12514 *compressed_bytes_p
= 0;
12519 __unused
const pt_attr_t
* const pt_attr
= pmap_get_pt_attr(pmap
);
12521 total_resident_bytes
= 0;
12522 compressed_bytes
= 0;
12524 PMAP_TRACE(3, PMAP_CODE(PMAP__QUERY_RESIDENT
) | DBG_FUNC_START
,
12525 VM_KERNEL_ADDRHIDE(pmap
), VM_KERNEL_ADDRHIDE(start
),
12526 VM_KERNEL_ADDRHIDE(end
));
12530 vm_map_address_t l
;
12531 mach_vm_size_t resident_bytes
;
12533 l
= ((va
+ pt_attr_twig_size(pt_attr
)) & ~pt_attr_twig_offmask(pt_attr
));
12539 resident_bytes
= pmap_query_resident_ppl(pmap
, va
, l
, compressed_bytes_p
);
12541 resident_bytes
= pmap_query_resident_internal(pmap
, va
, l
, compressed_bytes_p
);
12543 if (resident_bytes
== PMAP_RESIDENT_INVALID
) {
12547 total_resident_bytes
+= resident_bytes
;
12552 if (compressed_bytes_p
) {
12553 *compressed_bytes_p
= compressed_bytes
;
12556 PMAP_TRACE(3, PMAP_CODE(PMAP__QUERY_RESIDENT
) | DBG_FUNC_END
,
12557 total_resident_bytes
);
12559 return total_resident_bytes
;
12564 pmap_check_ledgers(
12570 if (pmap
->pmap_pid
== 0) {
12572 * This pmap was not or is no longer fully associated
12573 * with a task (e.g. the old pmap after a fork()/exec() or
12574 * spawn()). Its "ledger" still points at a task that is
12575 * now using a different (and active) address space, so
12576 * we can't check that all the pmap ledgers are balanced here.
12578 * If the "pid" is set, that means that we went through
12579 * pmap_set_process() in task_terminate_internal(), so
12580 * this task's ledger should not have been re-used and
12581 * all the pmap ledgers should be back to 0.
12586 pid
= pmap
->pmap_pid
;
12587 procname
= pmap
->pmap_procname
;
12589 vm_map_pmap_check_ledgers(pmap
, pmap
->ledger
, pid
, procname
);
12591 PMAP_STATS_ASSERTF(pmap
->stats
.resident_count
== 0, pmap
, "stats.resident_count %d", pmap
->stats
.resident_count
);
12593 PMAP_STATS_ASSERTF(pmap
->stats
.wired_count
== 0, pmap
, "stats.wired_count %d", pmap
->stats
.wired_count
);
12595 PMAP_STATS_ASSERTF(pmap
->stats
.device
== 0, pmap
, "stats.device %d", pmap
->stats
.device
);
12596 PMAP_STATS_ASSERTF(pmap
->stats
.internal
== 0, pmap
, "stats.internal %d", pmap
->stats
.internal
);
12597 PMAP_STATS_ASSERTF(pmap
->stats
.external
== 0, pmap
, "stats.external %d", pmap
->stats
.external
);
12598 PMAP_STATS_ASSERTF(pmap
->stats
.reusable
== 0, pmap
, "stats.reusable %d", pmap
->stats
.reusable
);
12599 PMAP_STATS_ASSERTF(pmap
->stats
.compressed
== 0, pmap
, "stats.compressed %lld", pmap
->stats
.compressed
);
12601 #endif /* MACH_ASSERT */
12604 pmap_advise_pagezero_range(__unused pmap_t p
, __unused
uint64_t a
)
12610 #define PROF_START uint64_t t, nanot;\
12611 t = mach_absolute_time();
12613 #define PROF_END absolutetime_to_nanoseconds(mach_absolute_time()-t, &nanot);\
12614 kprintf("%s: took %llu ns\n", __func__, nanot);
12616 #define PMAP_PGTRACE_LOCK(p) \
12618 *(p) = ml_set_interrupts_enabled(false); \
12619 if (simple_lock_try(&(pmap_pgtrace.lock), LCK_GRP_NULL)) break; \
12620 ml_set_interrupts_enabled(*(p)); \
12623 #define PMAP_PGTRACE_UNLOCK(p) \
12625 simple_unlock(&(pmap_pgtrace.lock)); \
12626 ml_set_interrupts_enabled(*(p)); \
12629 #define PGTRACE_WRITE_PTE(pte_p, pte_entry) \
12631 *(pte_p) = (pte_entry); \
12632 FLUSH_PTE(pte_p); \
12635 #define PGTRACE_MAX_MAP 16 // maximum supported va to same pa
12642 } pmap_pgtrace_page_state_t
;
12645 queue_chain_t chain
;
12649 * maps - list of va maps to upper pa
12650 * map_pool - map pool
12651 * map_waste - waste can
12656 queue_head_t map_pool
;
12657 queue_head_t map_waste
;
12658 pmap_pgtrace_page_state_t state
;
12659 } pmap_pgtrace_page_t
;
12663 * pages - list of tracing page info
12665 queue_head_t pages
;
12666 decl_simple_lock_data(, lock
);
12667 } pmap_pgtrace
= {};
12670 pmap_pgtrace_init(void)
12672 queue_init(&(pmap_pgtrace
.pages
));
12673 simple_lock_init(&(pmap_pgtrace
.lock
), 0);
12677 if (PE_parse_boot_argn("pgtrace", &enabled
, sizeof(enabled
))) {
12678 pgtrace_enabled
= enabled
;
12682 // find a page with given pa - pmap_pgtrace should be locked
12683 inline static pmap_pgtrace_page_t
*
12684 pmap_pgtrace_find_page(pmap_paddr_t pa
)
12686 queue_head_t
*q
= &(pmap_pgtrace
.pages
);
12687 pmap_pgtrace_page_t
*p
;
12689 queue_iterate(q
, p
, pmap_pgtrace_page_t
*, chain
) {
12690 if (p
->state
== UNDEFINED
) {
12693 if (p
->state
== PA_UNDEFINED
) {
12704 // enter clone of given pmap, va page and range - pmap should be locked
12706 pmap_pgtrace_enter_clone(pmap_t pmap
, vm_map_offset_t va_page
, vm_map_offset_t start
, vm_map_offset_t end
)
12709 queue_head_t
*q
= &(pmap_pgtrace
.pages
);
12710 pmap_paddr_t pa_page
;
12711 pt_entry_t
*ptep
, *cptep
;
12712 pmap_pgtrace_page_t
*p
;
12713 bool found
= false;
12715 PMAP_ASSERT_LOCKED(pmap
);
12716 assert(va_page
== arm_trunc_page(va_page
));
12718 PMAP_PGTRACE_LOCK(&ints
);
12720 ptep
= pmap_pte(pmap
, va_page
);
12722 // target pte should exist
12723 if (!ptep
|| !(*ptep
& ARM_PTE_TYPE_VALID
)) {
12724 PMAP_PGTRACE_UNLOCK(&ints
);
12728 queue_head_t
*mapq
;
12729 queue_head_t
*mappool
;
12730 pmap_pgtrace_map_t
*map
= NULL
;
12732 pa_page
= pte_to_pa(*ptep
);
12734 // find if we have a page info defined for this
12735 queue_iterate(q
, p
, pmap_pgtrace_page_t
*, chain
) {
12737 mappool
= &(p
->map_pool
);
12739 switch (p
->state
) {
12741 queue_iterate(mapq
, map
, pmap_pgtrace_map_t
*, chain
) {
12742 if (map
->cloned
== false && map
->pmap
== pmap
&& map
->ova
== va_page
) {
12744 map
->range
.start
= start
;
12745 map
->range
.end
= end
;
12753 if (p
->pa
!= pa_page
) {
12756 queue_iterate(mapq
, map
, pmap_pgtrace_map_t
*, chain
) {
12757 if (map
->cloned
== false) {
12759 map
->ova
= va_page
;
12760 map
->range
.start
= start
;
12761 map
->range
.end
= end
;
12769 if (p
->pa
!= pa_page
) {
12772 queue_iterate(mapq
, map
, pmap_pgtrace_map_t
*, chain
) {
12773 if (map
->cloned
== true && map
->pmap
== pmap
&& map
->ova
== va_page
) {
12774 kprintf("%s: skip existing mapping at va=%llx\n", __func__
, va_page
);
12776 } else if (map
->cloned
== true && map
->pmap
== kernel_pmap
&& map
->cva
[1] == va_page
) {
12777 kprintf("%s: skip clone mapping at va=%llx\n", __func__
, va_page
);
12779 } else if (map
->cloned
== false && map
->pmap
== pmap
&& map
->ova
== va_page
) {
12780 // range should be already defined as well
12788 panic("invalid state p->state=%x\n", p
->state
);
12791 if (found
== true) {
12796 // do not clone if no page info found
12797 if (found
== false) {
12798 PMAP_PGTRACE_UNLOCK(&ints
);
12802 // copy pre, target and post ptes to clone ptes
12803 for (int i
= 0; i
< 3; i
++) {
12804 ptep
= pmap_pte(pmap
, va_page
+ (i
- 1) * ARM_PGBYTES
);
12805 cptep
= pmap_pte(kernel_pmap
, map
->cva
[i
]);
12806 assert(cptep
!= NULL
);
12807 if (ptep
== NULL
) {
12808 PGTRACE_WRITE_PTE(cptep
, (pt_entry_t
)NULL
);
12810 PGTRACE_WRITE_PTE(cptep
, *ptep
);
12812 PMAP_UPDATE_TLBS(kernel_pmap
, map
->cva
[i
], map
->cva
[i
] + ARM_PGBYTES
, false);
12815 // get ptes for original and clone
12816 ptep
= pmap_pte(pmap
, va_page
);
12817 cptep
= pmap_pte(kernel_pmap
, map
->cva
[1]);
12819 // invalidate original pte and mark it as a pgtrace page
12820 PGTRACE_WRITE_PTE(ptep
, (*ptep
| ARM_PTE_PGTRACE
) & ~ARM_PTE_TYPE_VALID
);
12821 PMAP_UPDATE_TLBS(pmap
, map
->ova
, map
->ova
+ ARM_PGBYTES
, false);
12823 map
->cloned
= true;
12824 p
->state
= DEFINED
;
12826 kprintf("%s: pa_page=%llx va_page=%llx cva[1]=%llx pmap=%p ptep=%p cptep=%p\n", __func__
, pa_page
, va_page
, map
->cva
[1], pmap
, ptep
, cptep
);
12828 PMAP_PGTRACE_UNLOCK(&ints
);
12833 // This function removes trace bit and validate pte if applicable. Pmap must be locked.
12835 pmap_pgtrace_remove_clone(pmap_t pmap
, pmap_paddr_t pa
, vm_map_offset_t va
)
12837 bool ints
, found
= false;
12838 pmap_pgtrace_page_t
*p
;
12841 PMAP_PGTRACE_LOCK(&ints
);
12843 // we must have this page info
12844 p
= pmap_pgtrace_find_page(pa
);
12849 // find matching map
12850 queue_head_t
*mapq
= &(p
->maps
);
12851 queue_head_t
*mappool
= &(p
->map_pool
);
12852 pmap_pgtrace_map_t
*map
;
12854 queue_iterate(mapq
, map
, pmap_pgtrace_map_t
*, chain
) {
12855 if (map
->pmap
== pmap
&& map
->ova
== va
) {
12865 if (map
->cloned
== true) {
12866 // Restore back the pte to original state
12867 ptep
= pmap_pte(pmap
, map
->ova
);
12869 PGTRACE_WRITE_PTE(ptep
, *ptep
| ARM_PTE_TYPE_VALID
);
12870 PMAP_UPDATE_TLBS(pmap
, va
, va
+ ARM_PGBYTES
, false);
12872 // revert clone pages
12873 for (int i
= 0; i
< 3; i
++) {
12874 ptep
= pmap_pte(kernel_pmap
, map
->cva
[i
]);
12875 assert(ptep
!= NULL
);
12876 PGTRACE_WRITE_PTE(ptep
, map
->cva_spte
[i
]);
12877 PMAP_UPDATE_TLBS(kernel_pmap
, map
->cva
[i
], map
->cva
[i
] + ARM_PGBYTES
, false);
12881 queue_remove(mapq
, map
, pmap_pgtrace_map_t
*, chain
);
12883 map
->ova
= (vm_map_offset_t
)NULL
;
12884 map
->cloned
= false;
12885 queue_enter_first(mappool
, map
, pmap_pgtrace_map_t
*, chain
);
12887 kprintf("%s: p=%p pa=%llx va=%llx\n", __func__
, p
, pa
, va
);
12890 PMAP_PGTRACE_UNLOCK(&ints
);
12893 // remove all clones of given pa - pmap must be locked
12895 pmap_pgtrace_remove_all_clone(pmap_paddr_t pa
)
12898 pmap_pgtrace_page_t
*p
;
12901 PMAP_PGTRACE_LOCK(&ints
);
12903 // we must have this page info
12904 p
= pmap_pgtrace_find_page(pa
);
12906 PMAP_PGTRACE_UNLOCK(&ints
);
12910 queue_head_t
*mapq
= &(p
->maps
);
12911 queue_head_t
*mappool
= &(p
->map_pool
);
12912 queue_head_t
*mapwaste
= &(p
->map_waste
);
12913 pmap_pgtrace_map_t
*map
;
12915 // move maps to waste
12916 while (!queue_empty(mapq
)) {
12917 queue_remove_first(mapq
, map
, pmap_pgtrace_map_t
*, chain
);
12918 queue_enter_first(mapwaste
, map
, pmap_pgtrace_map_t
*, chain
);
12921 PMAP_PGTRACE_UNLOCK(&ints
);
12923 // sanitize maps in waste
12924 queue_iterate(mapwaste
, map
, pmap_pgtrace_map_t
*, chain
) {
12925 if (map
->cloned
== true) {
12926 PMAP_LOCK(map
->pmap
);
12928 // restore back original pte
12929 ptep
= pmap_pte(map
->pmap
, map
->ova
);
12931 PGTRACE_WRITE_PTE(ptep
, *ptep
| ARM_PTE_TYPE_VALID
);
12932 PMAP_UPDATE_TLBS(map
->pmap
, map
->ova
, map
->ova
+ ARM_PGBYTES
, false);
12934 // revert clone ptes
12935 for (int i
= 0; i
< 3; i
++) {
12936 ptep
= pmap_pte(kernel_pmap
, map
->cva
[i
]);
12937 assert(ptep
!= NULL
);
12938 PGTRACE_WRITE_PTE(ptep
, map
->cva_spte
[i
]);
12939 PMAP_UPDATE_TLBS(kernel_pmap
, map
->cva
[i
], map
->cva
[i
] + ARM_PGBYTES
, false);
12942 PMAP_UNLOCK(map
->pmap
);
12946 map
->ova
= (vm_map_offset_t
)NULL
;
12947 map
->cloned
= false;
12950 PMAP_PGTRACE_LOCK(&ints
);
12952 // recycle maps back to map_pool
12953 while (!queue_empty(mapwaste
)) {
12954 queue_remove_first(mapwaste
, map
, pmap_pgtrace_map_t
*, chain
);
12955 queue_enter_first(mappool
, map
, pmap_pgtrace_map_t
*, chain
);
12958 PMAP_PGTRACE_UNLOCK(&ints
);
12962 pmap_pgtrace_get_search_space(pmap_t pmap
, vm_map_offset_t
*startp
, vm_map_offset_t
*endp
)
12965 vm_map_offset_t end
;
12967 if (pmap
== kernel_pmap
) {
12968 tsz
= (get_tcr() >> TCR_T1SZ_SHIFT
) & TCR_TSZ_MASK
;
12969 *startp
= MAX(VM_MIN_KERNEL_ADDRESS
, (UINT64_MAX
>> (64 - tsz
)) << (64 - tsz
));
12970 *endp
= VM_MAX_KERNEL_ADDRESS
;
12972 tsz
= (get_tcr() >> TCR_T0SZ_SHIFT
) & TCR_TSZ_MASK
;
12976 end
= ((uint64_t)1 << (64 - tsz
)) - 1;
12983 assert(*endp
> *startp
);
12988 // has pa mapped in given pmap? then clone it
12990 pmap_pgtrace_clone_from_pa(pmap_t pmap
, pmap_paddr_t pa
, vm_map_offset_t start_offset
, vm_map_offset_t end_offset
)
12993 vm_map_offset_t min
, max
;
12994 vm_map_offset_t cur_page
, end_page
;
12998 __unused
const pt_attr_t
* const pt_attr
= pmap_get_pt_attr(pmap
);
13000 pmap_pgtrace_get_search_space(pmap
, &min
, &max
);
13002 cur_page
= arm_trunc_page(min
);
13003 end_page
= arm_trunc_page(max
);
13004 while (cur_page
<= end_page
) {
13005 vm_map_offset_t add
= 0;
13009 // skip uninterested space
13010 if (pmap
== kernel_pmap
&&
13011 ((vm_kernel_base
<= cur_page
&& cur_page
< vm_kernel_top
) ||
13012 (vm_kext_base
<= cur_page
&& cur_page
< vm_kext_top
))) {
13014 goto unlock_continue
;
13017 // check whether we can skip l1
13018 ttep
= pmap_tt1e(pmap
, cur_page
);
13021 if ((tte
& (ARM_TTE_TYPE_MASK
| ARM_TTE_VALID
)) != (ARM_TTE_TYPE_TABLE
| ARM_TTE_VALID
)) {
13022 add
= ARM_TT_L1_SIZE
;
13023 goto unlock_continue
;
13027 tte
= ((tt_entry_t
*) phystokv(tte
& ARM_TTE_TABLE_MASK
))[tt2_index(pmap
, pt_attr
, cur_page
)];
13029 if ((tte
& (ARM_TTE_TYPE_MASK
| ARM_TTE_VALID
)) != (ARM_TTE_TYPE_TABLE
| ARM_TTE_VALID
)) {
13030 add
= ARM_TT_L2_SIZE
;
13031 goto unlock_continue
;
13035 ptep
= &(((pt_entry_t
*) phystokv(tte
& ARM_TTE_TABLE_MASK
))[tt3_index(pmap
, pt_attr
, cur_page
)]);
13036 if (ptep
== PT_ENTRY_NULL
) {
13037 add
= ARM_TT_L3_SIZE
;
13038 goto unlock_continue
;
13041 if (arm_trunc_page(pa
) == pte_to_pa(*ptep
)) {
13042 if (pmap_pgtrace_enter_clone(pmap
, cur_page
, start_offset
, end_offset
) == true) {
13053 if (cur_page
+ add
< cur_page
) {
13064 // search pv table and clone vas of given pa
13066 pmap_pgtrace_clone_from_pvtable(pmap_paddr_t pa
, vm_map_offset_t start_offset
, vm_map_offset_t end_offset
)
13075 queue_chain_t chain
;
13077 vm_map_offset_t va
;
13080 queue_head_t pmapvaq
;
13083 queue_init(&pmapvaq
);
13085 pai
= pa_index(pa
);
13087 pvh
= pai_to_pvh(pai
);
13089 // collect pmap/va pair from pvh
13090 if (pvh_test_type(pvh
, PVH_TYPE_PTEP
)) {
13091 ptep
= pvh_ptep(pvh
);
13092 pmap
= ptep_get_pmap(ptep
);
13094 pmapva
= (pmap_va_t
*)kalloc(sizeof(pmap_va_t
));
13095 pmapva
->pmap
= pmap
;
13096 pmapva
->va
= ptep_get_va(ptep
);
13098 queue_enter_first(&pmapvaq
, pmapva
, pmap_va_t
*, chain
);
13099 } else if (pvh_test_type(pvh
, PVH_TYPE_PVEP
)) {
13102 pvep
= pvh_list(pvh
);
13104 ptep
= pve_get_ptep(pvep
);
13105 pmap
= ptep_get_pmap(ptep
);
13107 pmapva
= (pmap_va_t
*)kalloc(sizeof(pmap_va_t
));
13108 pmapva
->pmap
= pmap
;
13109 pmapva
->va
= ptep_get_va(ptep
);
13111 queue_enter_first(&pmapvaq
, pmapva
, pmap_va_t
*, chain
);
13113 pvep
= PVE_NEXT_PTR(pve_next(pvep
));
13119 // clone them while making sure mapping still exists
13120 queue_iterate(&pmapvaq
, pmapva
, pmap_va_t
*, chain
) {
13121 PMAP_LOCK(pmapva
->pmap
);
13122 ptep
= pmap_pte(pmapva
->pmap
, pmapva
->va
);
13123 if (pte_to_pa(*ptep
) == pa
) {
13124 if (pmap_pgtrace_enter_clone(pmapva
->pmap
, pmapva
->va
, start_offset
, end_offset
) == true) {
13128 PMAP_UNLOCK(pmapva
->pmap
);
13130 kfree(pmapva
, sizeof(pmap_va_t
));
13136 // allocate a page info
13137 static pmap_pgtrace_page_t
*
13138 pmap_pgtrace_alloc_page(void)
13140 pmap_pgtrace_page_t
*p
;
13141 queue_head_t
*mapq
;
13142 queue_head_t
*mappool
;
13143 queue_head_t
*mapwaste
;
13144 pmap_pgtrace_map_t
*map
;
13146 p
= kalloc(sizeof(pmap_pgtrace_page_t
));
13149 p
->state
= UNDEFINED
;
13152 mappool
= &(p
->map_pool
);
13153 mapwaste
= &(p
->map_waste
);
13155 queue_init(mappool
);
13156 queue_init(mapwaste
);
13158 for (int i
= 0; i
< PGTRACE_MAX_MAP
; i
++) {
13159 vm_map_offset_t newcva
;
13162 vm_map_entry_t entry
;
13165 vm_object_reference(kernel_object
);
13166 kr
= vm_map_find_space(kernel_map
, &newcva
, vm_map_round_page(3 * ARM_PGBYTES
, PAGE_MASK
), 0, 0, VM_MAP_KERNEL_FLAGS_NONE
, VM_KERN_MEMORY_DIAG
, &entry
);
13167 if (kr
!= KERN_SUCCESS
) {
13168 panic("%s VM couldn't find any space kr=%d\n", __func__
, kr
);
13170 VME_OBJECT_SET(entry
, kernel_object
);
13171 VME_OFFSET_SET(entry
, newcva
);
13172 vm_map_unlock(kernel_map
);
13174 // fill default clone page info and add to pool
13175 map
= kalloc(sizeof(pmap_pgtrace_map_t
));
13176 for (int j
= 0; j
< 3; j
++) {
13177 vm_map_offset_t addr
= newcva
+ j
* ARM_PGBYTES
;
13179 // pre-expand pmap while preemption enabled
13180 kr
= pmap_expand(kernel_pmap
, addr
, 0, PMAP_TT_MAX_LEVEL
);
13181 if (kr
!= KERN_SUCCESS
) {
13182 panic("%s: pmap_expand(kernel_pmap, addr=%llx) returns kr=%d\n", __func__
, addr
, kr
);
13185 cptep
= pmap_pte(kernel_pmap
, addr
);
13186 assert(cptep
!= NULL
);
13188 map
->cva
[j
] = addr
;
13189 map
->cva_spte
[j
] = *cptep
;
13191 map
->range
.start
= map
->range
.end
= 0;
13192 map
->cloned
= false;
13193 queue_enter_first(mappool
, map
, pmap_pgtrace_map_t
*, chain
);
13199 // free a page info
13201 pmap_pgtrace_free_page(pmap_pgtrace_page_t
*p
)
13203 queue_head_t
*mapq
;
13204 queue_head_t
*mappool
;
13205 queue_head_t
*mapwaste
;
13206 pmap_pgtrace_map_t
*map
;
13211 mappool
= &(p
->map_pool
);
13212 mapwaste
= &(p
->map_waste
);
13214 while (!queue_empty(mapq
)) {
13215 queue_remove_first(mapq
, map
, pmap_pgtrace_map_t
*, chain
);
13216 kfree(map
, sizeof(pmap_pgtrace_map_t
));
13219 while (!queue_empty(mappool
)) {
13220 queue_remove_first(mappool
, map
, pmap_pgtrace_map_t
*, chain
);
13221 kfree(map
, sizeof(pmap_pgtrace_map_t
));
13224 while (!queue_empty(mapwaste
)) {
13225 queue_remove_first(mapwaste
, map
, pmap_pgtrace_map_t
*, chain
);
13226 kfree(map
, sizeof(pmap_pgtrace_map_t
));
13229 kfree(p
, sizeof(pmap_pgtrace_page_t
));
13232 // construct page infos with the given address range
13234 pmap_pgtrace_add_page(pmap_t pmap
, vm_map_offset_t start
, vm_map_offset_t end
)
13238 queue_head_t
*q
= &(pmap_pgtrace
.pages
);
13240 vm_map_offset_t cur_page
, end_page
;
13243 kprintf("%s: invalid start=%llx > end=%llx\n", __func__
, start
, end
);
13249 // add each page in given range
13250 cur_page
= arm_trunc_page(start
);
13251 end_page
= arm_trunc_page(end
);
13252 while (cur_page
<= end_page
) {
13253 pmap_paddr_t pa_page
= 0;
13254 uint64_t num_cloned
= 0;
13255 pmap_pgtrace_page_t
*p
= NULL
, *newp
;
13256 bool free_newp
= true;
13257 pmap_pgtrace_page_state_t state
;
13259 // do all allocations outside of spinlocks
13260 newp
= pmap_pgtrace_alloc_page();
13262 // keep lock orders in pmap, kernel_pmap and pgtrace lock
13263 if (pmap
!= NULL
) {
13266 if (pmap
!= kernel_pmap
) {
13267 PMAP_LOCK(kernel_pmap
);
13270 // addresses are physical if pmap is null
13271 if (pmap
== NULL
) {
13273 pa_page
= cur_page
;
13274 state
= VA_UNDEFINED
;
13276 ptep
= pmap_pte(pmap
, cur_page
);
13277 if (ptep
!= NULL
) {
13278 pa_page
= pte_to_pa(*ptep
);
13281 state
= PA_UNDEFINED
;
13285 // search if we have a page info already
13286 PMAP_PGTRACE_LOCK(&ints
);
13287 if (state
!= PA_UNDEFINED
) {
13288 p
= pmap_pgtrace_find_page(pa_page
);
13291 // add pre-allocated page info if nothing found
13293 queue_enter_first(q
, newp
, pmap_pgtrace_page_t
*, chain
);
13298 // now p points what we want
13301 queue_head_t
*mapq
= &(p
->maps
);
13302 queue_head_t
*mappool
= &(p
->map_pool
);
13303 pmap_pgtrace_map_t
*map
;
13304 vm_map_offset_t start_offset
, end_offset
;
13306 // calculate trace offsets in the page
13307 if (cur_page
> start
) {
13310 start_offset
= start
- cur_page
;
13312 if (cur_page
== end_page
) {
13313 end_offset
= end
- end_page
;
13315 end_offset
= ARM_PGBYTES
- 1;
13318 kprintf("%s: pmap=%p cur_page=%llx ptep=%p state=%d start_offset=%llx end_offset=%llx\n", __func__
, pmap
, cur_page
, ptep
, state
, start_offset
, end_offset
);
13321 assert(!queue_empty(mappool
));
13322 queue_remove_first(mappool
, map
, pmap_pgtrace_map_t
*, chain
);
13323 if (p
->state
== PA_UNDEFINED
) {
13325 map
->ova
= cur_page
;
13326 map
->range
.start
= start_offset
;
13327 map
->range
.end
= end_offset
;
13328 } else if (p
->state
== VA_UNDEFINED
) {
13330 map
->range
.start
= start_offset
;
13331 map
->range
.end
= end_offset
;
13332 } else if (p
->state
== DEFINED
) {
13335 map
->ova
= cur_page
;
13336 map
->range
.start
= start_offset
;
13337 map
->range
.end
= end_offset
;
13339 panic("invalid p->state=%d\n", p
->state
);
13343 map
->cloned
= false;
13344 queue_enter(mapq
, map
, pmap_pgtrace_map_t
*, chain
);
13347 PMAP_PGTRACE_UNLOCK(&ints
);
13348 if (pmap
!= kernel_pmap
) {
13349 PMAP_UNLOCK(kernel_pmap
);
13351 if (pmap
!= NULL
) {
13356 if (pa_valid(pa_page
)) {
13357 num_cloned
= pmap_pgtrace_clone_from_pvtable(pa_page
, start_offset
, end_offset
);
13359 if (pmap
== NULL
) {
13360 num_cloned
+= pmap_pgtrace_clone_from_pa(kernel_pmap
, pa_page
, start_offset
, end_offset
);
13362 num_cloned
+= pmap_pgtrace_clone_from_pa(pmap
, pa_page
, start_offset
, end_offset
);
13365 // free pre-allocations if we didn't add it to the q
13367 pmap_pgtrace_free_page(newp
);
13370 if (num_cloned
== 0) {
13371 kprintf("%s: no mapping found for pa_page=%llx but will be added when a page entered\n", __func__
, pa_page
);
13377 if (cur_page
+ ARM_PGBYTES
< cur_page
) {
13380 cur_page
+= ARM_PGBYTES
;
13389 // delete page infos for given address range
13391 pmap_pgtrace_delete_page(pmap_t pmap
, vm_map_offset_t start
, vm_map_offset_t end
)
13395 queue_head_t
*q
= &(pmap_pgtrace
.pages
);
13396 pmap_pgtrace_page_t
*p
;
13397 vm_map_offset_t cur_page
, end_page
;
13399 kprintf("%s start=%llx end=%llx\n", __func__
, start
, end
);
13404 pmap_paddr_t pa_page
;
13406 // remove page info from start to end
13407 cur_page
= arm_trunc_page(start
);
13408 end_page
= arm_trunc_page(end
);
13409 while (cur_page
<= end_page
) {
13412 if (pmap
== NULL
) {
13413 pa_page
= cur_page
;
13416 ptep
= pmap_pte(pmap
, cur_page
);
13417 if (ptep
== NULL
) {
13421 pa_page
= pte_to_pa(*ptep
);
13425 // remove all clones and validate
13426 pmap_pgtrace_remove_all_clone(pa_page
);
13428 // find page info and delete
13429 PMAP_PGTRACE_LOCK(&ints
);
13430 p
= pmap_pgtrace_find_page(pa_page
);
13432 queue_remove(q
, p
, pmap_pgtrace_page_t
*, chain
);
13435 PMAP_PGTRACE_UNLOCK(&ints
);
13437 // free outside of locks
13439 pmap_pgtrace_free_page(p
);
13444 if (cur_page
+ ARM_PGBYTES
< cur_page
) {
13447 cur_page
+= ARM_PGBYTES
;
13457 pmap_pgtrace_fault(pmap_t pmap
, vm_map_offset_t va
, arm_saved_state_t
*ss
)
13460 pgtrace_run_result_t res
;
13461 pmap_pgtrace_page_t
*p
;
13462 bool ints
, found
= false;
13465 // Quick check if we are interested
13466 ptep
= pmap_pte(pmap
, va
);
13467 if (!ptep
|| !(*ptep
& ARM_PTE_PGTRACE
)) {
13468 return KERN_FAILURE
;
13471 PMAP_PGTRACE_LOCK(&ints
);
13473 // Check again since access is serialized
13474 ptep
= pmap_pte(pmap
, va
);
13475 if (!ptep
|| !(*ptep
& ARM_PTE_PGTRACE
)) {
13476 PMAP_PGTRACE_UNLOCK(&ints
);
13477 return KERN_FAILURE
;
13478 } else if ((*ptep
& ARM_PTE_TYPE_VALID
) == ARM_PTE_TYPE_VALID
) {
13479 // Somehow this cpu's tlb has not updated
13480 kprintf("%s Somehow this cpu's tlb has not updated?\n", __func__
);
13481 PMAP_UPDATE_TLBS(pmap
, va
, va
+ ARM_PGBYTES
, false);
13483 PMAP_PGTRACE_UNLOCK(&ints
);
13484 return KERN_SUCCESS
;
13487 // Find if this pa is what we are tracing
13488 pa
= pte_to_pa(*ptep
);
13490 p
= pmap_pgtrace_find_page(arm_trunc_page(pa
));
13492 panic("%s Can't find va=%llx pa=%llx from tracing pages\n", __func__
, va
, pa
);
13495 // find if pmap and va are also matching
13496 queue_head_t
*mapq
= &(p
->maps
);
13497 queue_head_t
*mapwaste
= &(p
->map_waste
);
13498 pmap_pgtrace_map_t
*map
;
13500 queue_iterate(mapq
, map
, pmap_pgtrace_map_t
*, chain
) {
13501 if (map
->pmap
== pmap
&& map
->ova
== arm_trunc_page(va
)) {
13507 // if not found, search map waste as they are still valid
13509 queue_iterate(mapwaste
, map
, pmap_pgtrace_map_t
*, chain
) {
13510 if (map
->pmap
== pmap
&& map
->ova
== arm_trunc_page(va
)) {
13518 panic("%s Can't find va=%llx pa=%llx from tracing pages\n", __func__
, va
, pa
);
13521 // Decode and run it on the clone map
13522 bzero(&res
, sizeof(res
));
13523 pgtrace_decode_and_run(*(uint32_t *)get_saved_state_pc(ss
), // instruction
13524 va
, map
->cva
, // fault va and clone page vas
13527 // write a log if in range
13528 vm_map_offset_t offset
= va
- map
->ova
;
13529 if (map
->range
.start
<= offset
&& offset
<= map
->range
.end
) {
13530 pgtrace_write_log(res
);
13533 PMAP_PGTRACE_UNLOCK(&ints
);
13535 // Return to next instruction
13536 add_saved_state_pc(ss
, sizeof(uint32_t));
13538 return KERN_SUCCESS
;
13543 pmap_enforces_execute_only(
13544 #if (__ARM_VMSA__ == 7)
13549 #if (__ARM_VMSA__ > 7)
13550 return pmap
!= kernel_pmap
;
13556 MARK_AS_PMAP_TEXT
void
13557 pmap_set_jit_entitled_internal(
13558 __unused pmap_t pmap
)
13564 pmap_set_jit_entitled(
13568 pmap_set_jit_entitled_ppl(pmap
);
13570 pmap_set_jit_entitled_internal(pmap
);
13574 MARK_AS_PMAP_TEXT
static kern_return_t
13575 pmap_query_page_info_internal(
13577 vm_map_offset_t va
,
13584 pv_entry_t
**pv_h
, *pve_p
;
13586 if (pmap
== PMAP_NULL
|| pmap
== kernel_pmap
) {
13587 pmap_pin_kernel_pages((vm_offset_t
)disp_p
, sizeof(*disp_p
));
13589 pmap_unpin_kernel_pages((vm_offset_t
)disp_p
, sizeof(*disp_p
));
13590 return KERN_INVALID_ARGUMENT
;
13595 VALIDATE_PMAP(pmap
);
13598 pte
= pmap_pte(pmap
, va
);
13599 if (pte
== PT_ENTRY_NULL
) {
13603 pa
= pte_to_pa(*pte
);
13605 if (ARM_PTE_IS_COMPRESSED(*pte
, pte
)) {
13606 disp
|= PMAP_QUERY_PAGE_COMPRESSED
;
13607 if (*pte
& ARM_PTE_COMPRESSED_ALT
) {
13608 disp
|= PMAP_QUERY_PAGE_COMPRESSED_ALTACCT
;
13612 disp
|= PMAP_QUERY_PAGE_PRESENT
;
13613 pai
= (int) pa_index(pa
);
13614 if (!pa_valid(pa
)) {
13618 pv_h
= pai_to_pvh(pai
);
13619 pve_p
= PV_ENTRY_NULL
;
13620 if (pvh_test_type(pv_h
, PVH_TYPE_PVEP
)) {
13621 pve_p
= pvh_list(pv_h
);
13622 while (pve_p
!= PV_ENTRY_NULL
&&
13623 pve_get_ptep(pve_p
) != pte
) {
13624 pve_p
= PVE_NEXT_PTR(pve_next(pve_p
));
13627 if (IS_ALTACCT_PAGE(pai
, pve_p
)) {
13628 disp
|= PMAP_QUERY_PAGE_ALTACCT
;
13629 } else if (IS_REUSABLE_PAGE(pai
)) {
13630 disp
|= PMAP_QUERY_PAGE_REUSABLE
;
13631 } else if (IS_INTERNAL_PAGE(pai
)) {
13632 disp
|= PMAP_QUERY_PAGE_INTERNAL
;
13639 pmap_pin_kernel_pages((vm_offset_t
)disp_p
, sizeof(*disp_p
));
13641 pmap_unpin_kernel_pages((vm_offset_t
)disp_p
, sizeof(*disp_p
));
13642 return KERN_SUCCESS
;
13646 pmap_query_page_info(
13648 vm_map_offset_t va
,
13652 return pmap_query_page_info_ppl(pmap
, va
, disp_p
);
13654 return pmap_query_page_info_internal(pmap
, va
, disp_p
);
13658 MARK_AS_PMAP_TEXT kern_return_t
13659 pmap_return_internal(__unused boolean_t do_panic
, __unused boolean_t do_recurse
)
13662 return KERN_SUCCESS
;
13666 pmap_return(boolean_t do_panic
, boolean_t do_recurse
)
13669 return pmap_return_ppl(do_panic
, do_recurse
);
13671 return pmap_return_internal(do_panic
, do_recurse
);
13678 MARK_AS_PMAP_TEXT
static void
13679 pmap_footprint_suspend_internal(
13683 #if DEVELOPMENT || DEBUG
13685 current_thread()->pmap_footprint_suspended
= TRUE
;
13686 map
->pmap
->footprint_was_suspended
= TRUE
;
13688 current_thread()->pmap_footprint_suspended
= FALSE
;
13690 #else /* DEVELOPMENT || DEBUG */
13693 #endif /* DEVELOPMENT || DEBUG */
13697 pmap_footprint_suspend(
13702 pmap_footprint_suspend_ppl(map
, suspend
);
13704 pmap_footprint_suspend_internal(map
, suspend
);
13708 #if defined(__arm64__) && (DEVELOPMENT || DEBUG)
13710 struct page_table_dump_header
{
13712 uint64_t num_entries
;
13718 pmap_dump_page_tables_recurse(pmap_t pmap
,
13719 const tt_entry_t
*ttp
,
13720 unsigned int cur_level
,
13725 size_t bytes_used
= 0;
13726 uint64_t num_entries
= ARM_PGBYTES
/ sizeof(*ttp
);
13727 const pt_attr_t
* const pt_attr
= pmap_get_pt_attr(pmap
);
13729 uint64_t size
= pt_attr
->pta_level_info
[cur_level
].size
;
13730 uint64_t valid_mask
= pt_attr
->pta_level_info
[cur_level
].valid_mask
;
13731 uint64_t type_mask
= pt_attr
->pta_level_info
[cur_level
].type_mask
;
13732 uint64_t type_block
= pt_attr
->pta_level_info
[cur_level
].type_block
;
13734 if (cur_level
== arm64_root_pgtable_level
) {
13735 num_entries
= arm64_root_pgtable_num_ttes
;
13738 uint64_t tt_size
= num_entries
* sizeof(tt_entry_t
);
13739 const tt_entry_t
*tt_end
= &ttp
[num_entries
];
13741 if (((vm_offset_t
)buf_end
- (vm_offset_t
)bufp
) < (tt_size
+ sizeof(struct page_table_dump_header
))) {
13745 struct page_table_dump_header
*header
= (struct page_table_dump_header
*)bufp
;
13746 header
->pa
= ml_static_vtop((vm_offset_t
)ttp
);
13747 header
->num_entries
= num_entries
;
13748 header
->start_va
= start_va
;
13749 header
->end_va
= start_va
+ (num_entries
* size
);
13751 bcopy(ttp
, (uint8_t*)bufp
+ sizeof(*header
), tt_size
);
13752 bytes_used
+= (sizeof(*header
) + tt_size
);
13753 uint64_t current_va
= start_va
;
13755 for (const tt_entry_t
*ttep
= ttp
; ttep
< tt_end
; ttep
++, current_va
+= size
) {
13756 tt_entry_t tte
= *ttep
;
13758 if (!(tte
& valid_mask
)) {
13762 if ((tte
& type_mask
) == type_block
) {
13765 if (cur_level
>= PMAP_TT_MAX_LEVEL
) {
13766 panic("%s: corrupt entry %#llx at %p, "
13767 "ttp=%p, cur_level=%u, bufp=%p, buf_end=%p",
13768 __FUNCTION__
, tte
, ttep
,
13769 ttp
, cur_level
, bufp
, buf_end
);
13772 const tt_entry_t
*next_tt
= (const tt_entry_t
*)phystokv(tte
& ARM_TTE_TABLE_MASK
);
13774 size_t recurse_result
= pmap_dump_page_tables_recurse(pmap
, next_tt
, cur_level
+ 1, current_va
, (uint8_t*)bufp
+ bytes_used
, buf_end
);
13776 if (recurse_result
== 0) {
13780 bytes_used
+= recurse_result
;
13788 pmap_dump_page_tables(pmap_t pmap
, void *bufp
, void *buf_end
)
13791 panic("pmap_dump_page_tables must only be called from kernel debugger context");
13793 return pmap_dump_page_tables_recurse(pmap
, pmap
->tte
, arm64_root_pgtable_level
, pmap
->min
, bufp
, buf_end
);
13796 #else /* defined(__arm64__) && (DEVELOPMENT || DEBUG) */
13799 pmap_dump_page_tables(pmap_t pmap __unused
, void *bufp __unused
, void *buf_end __unused
)
13804 #endif /* !defined(__arm64__) */