2 * Copyright (c) 2011-2019 Apple Inc. All rights reserved.
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
29 #include <mach_assert.h>
30 #include <mach_ldebug.h>
32 #include <mach/shared_region.h>
33 #include <mach/vm_param.h>
34 #include <mach/vm_prot.h>
35 #include <mach/vm_map.h>
36 #include <mach/machine/vm_param.h>
37 #include <mach/machine/vm_types.h>
39 #include <mach/boolean.h>
40 #include <kern/bits.h>
41 #include <kern/thread.h>
42 #include <kern/sched.h>
43 #include <kern/zalloc.h>
44 #include <kern/kalloc.h>
45 #include <kern/ledger.h>
47 #include <kern/trustcache.h>
49 #include <os/overflow.h>
52 #include <vm/vm_map.h>
53 #include <vm/vm_kern.h>
54 #include <vm/vm_protos.h>
55 #include <vm/vm_object.h>
56 #include <vm/vm_page.h>
57 #include <vm/vm_pageout.h>
60 #include <libkern/img4/interface.h>
61 #include <libkern/section_keywords.h>
63 #include <machine/atomic.h>
64 #include <machine/thread.h>
65 #include <machine/lowglobals.h>
67 #include <arm/caches_internal.h>
68 #include <arm/cpu_data.h>
69 #include <arm/cpu_data_internal.h>
70 #include <arm/cpu_capabilities.h>
71 #include <arm/cpu_number.h>
72 #include <arm/machine_cpu.h>
73 #include <arm/misc_protos.h>
76 #if (__ARM_VMSA__ > 7)
77 #include <arm64/proc_reg.h>
78 #include <pexpert/arm64/boot.h>
81 #include <arm64/pgtrace.h>
82 #if CONFIG_PGTRACE_NONKEXT
83 #include <arm64/pgtrace_decoder.h>
84 #endif // CONFIG_PGTRACE_NONKEXT
88 #include <pexpert/device_tree.h>
90 #include <san/kasan.h>
91 #include <sys/cdefs.h>
93 #if defined(HAS_APPLE_PAC)
97 #define PMAP_TT_L0_LEVEL 0x0
98 #define PMAP_TT_L1_LEVEL 0x1
99 #define PMAP_TT_L2_LEVEL 0x2
100 #define PMAP_TT_L3_LEVEL 0x3
101 #if (__ARM_VMSA__ == 7)
102 #define PMAP_TT_MAX_LEVEL PMAP_TT_L2_LEVEL
104 #define PMAP_TT_MAX_LEVEL PMAP_TT_L3_LEVEL
106 #define PMAP_TT_LEAF_LEVEL PMAP_TT_MAX_LEVEL
107 #define PMAP_TT_TWIG_LEVEL (PMAP_TT_MAX_LEVEL - 1)
109 static bool alloc_asid(pmap_t pmap
);
110 static void free_asid(pmap_t pmap
);
111 static void flush_mmu_tlb_region_asid_async(vm_offset_t va
, unsigned length
, pmap_t pmap
);
112 static void flush_mmu_tlb_tte_asid_async(vm_offset_t va
, pmap_t pmap
);
113 static void flush_mmu_tlb_full_asid_async(pmap_t pmap
);
114 static pt_entry_t
wimg_to_pte(unsigned int wimg
);
116 struct page_table_ops
{
117 bool (*alloc_id
)(pmap_t pmap
);
118 void (*free_id
)(pmap_t pmap
);
119 void (*flush_tlb_region_async
)(vm_offset_t va
, unsigned length
, pmap_t pmap
);
120 void (*flush_tlb_tte_async
)(vm_offset_t va
, pmap_t pmap
);
121 void (*flush_tlb_async
)(pmap_t pmap
);
122 pt_entry_t (*wimg_to_pte
)(unsigned int wimg
);
125 static const struct page_table_ops native_pt_ops
=
127 .alloc_id
= alloc_asid
,
128 .free_id
= free_asid
,
129 .flush_tlb_region_async
= flush_mmu_tlb_region_asid_async
,
130 .flush_tlb_tte_async
= flush_mmu_tlb_tte_asid_async
,
131 .flush_tlb_async
= flush_mmu_tlb_full_asid_async
,
132 .wimg_to_pte
= wimg_to_pte
,
135 #if (__ARM_VMSA__ > 7)
136 const struct page_table_level_info pmap_table_level_info_16k
[] =
139 .size
= ARM_16K_TT_L0_SIZE
,
140 .offmask
= ARM_16K_TT_L0_OFFMASK
,
141 .shift
= ARM_16K_TT_L0_SHIFT
,
142 .index_mask
= ARM_16K_TT_L0_INDEX_MASK
,
143 .valid_mask
= ARM_TTE_VALID
,
144 .type_mask
= ARM_TTE_TYPE_MASK
,
145 .type_block
= ARM_TTE_TYPE_BLOCK
148 .size
= ARM_16K_TT_L1_SIZE
,
149 .offmask
= ARM_16K_TT_L1_OFFMASK
,
150 .shift
= ARM_16K_TT_L1_SHIFT
,
151 .index_mask
= ARM_16K_TT_L1_INDEX_MASK
,
152 .valid_mask
= ARM_TTE_VALID
,
153 .type_mask
= ARM_TTE_TYPE_MASK
,
154 .type_block
= ARM_TTE_TYPE_BLOCK
157 .size
= ARM_16K_TT_L2_SIZE
,
158 .offmask
= ARM_16K_TT_L2_OFFMASK
,
159 .shift
= ARM_16K_TT_L2_SHIFT
,
160 .index_mask
= ARM_16K_TT_L2_INDEX_MASK
,
161 .valid_mask
= ARM_TTE_VALID
,
162 .type_mask
= ARM_TTE_TYPE_MASK
,
163 .type_block
= ARM_TTE_TYPE_BLOCK
166 .size
= ARM_16K_TT_L3_SIZE
,
167 .offmask
= ARM_16K_TT_L3_OFFMASK
,
168 .shift
= ARM_16K_TT_L3_SHIFT
,
169 .index_mask
= ARM_16K_TT_L3_INDEX_MASK
,
170 .valid_mask
= ARM_PTE_TYPE_VALID
,
171 .type_mask
= ARM_PTE_TYPE_MASK
,
172 .type_block
= ARM_TTE_TYPE_L3BLOCK
176 const struct page_table_level_info pmap_table_level_info_4k
[] =
179 .size
= ARM_4K_TT_L0_SIZE
,
180 .offmask
= ARM_4K_TT_L0_OFFMASK
,
181 .shift
= ARM_4K_TT_L0_SHIFT
,
182 .index_mask
= ARM_4K_TT_L0_INDEX_MASK
,
183 .valid_mask
= ARM_TTE_VALID
,
184 .type_mask
= ARM_TTE_TYPE_MASK
,
185 .type_block
= ARM_TTE_TYPE_BLOCK
188 .size
= ARM_4K_TT_L1_SIZE
,
189 .offmask
= ARM_4K_TT_L1_OFFMASK
,
190 .shift
= ARM_4K_TT_L1_SHIFT
,
191 .index_mask
= ARM_4K_TT_L1_INDEX_MASK
,
192 .valid_mask
= ARM_TTE_VALID
,
193 .type_mask
= ARM_TTE_TYPE_MASK
,
194 .type_block
= ARM_TTE_TYPE_BLOCK
197 .size
= ARM_4K_TT_L2_SIZE
,
198 .offmask
= ARM_4K_TT_L2_OFFMASK
,
199 .shift
= ARM_4K_TT_L2_SHIFT
,
200 .index_mask
= ARM_4K_TT_L2_INDEX_MASK
,
201 .valid_mask
= ARM_TTE_VALID
,
202 .type_mask
= ARM_TTE_TYPE_MASK
,
203 .type_block
= ARM_TTE_TYPE_BLOCK
206 .size
= ARM_4K_TT_L3_SIZE
,
207 .offmask
= ARM_4K_TT_L3_OFFMASK
,
208 .shift
= ARM_4K_TT_L3_SHIFT
,
209 .index_mask
= ARM_4K_TT_L3_INDEX_MASK
,
210 .valid_mask
= ARM_PTE_TYPE_VALID
,
211 .type_mask
= ARM_PTE_TYPE_MASK
,
212 .type_block
= ARM_TTE_TYPE_L3BLOCK
216 struct page_table_attr
{
217 const struct page_table_level_info
* const pta_level_info
;
218 const struct page_table_ops
* const pta_ops
;
219 const uintptr_t ap_ro
;
220 const uintptr_t ap_rw
;
221 const uintptr_t ap_rona
;
222 const uintptr_t ap_rwna
;
223 const uintptr_t ap_xn
;
224 const uintptr_t ap_x
;
225 const unsigned int pta_root_level
;
226 const unsigned int pta_max_level
;
229 const struct page_table_attr pmap_pt_attr_4k
= {
230 .pta_level_info
= pmap_table_level_info_4k
,
231 .pta_root_level
= PMAP_TT_L1_LEVEL
,
232 .pta_max_level
= PMAP_TT_L3_LEVEL
,
233 .pta_ops
= &native_pt_ops
,
234 .ap_ro
= ARM_PTE_AP(AP_RORO
),
235 .ap_rw
= ARM_PTE_AP(AP_RWRW
),
236 .ap_rona
= ARM_PTE_AP(AP_RONA
),
237 .ap_rwna
= ARM_PTE_AP(AP_RWNA
),
238 .ap_xn
= ARM_PTE_PNX
| ARM_PTE_NX
,
242 const struct page_table_attr pmap_pt_attr_16k
= {
243 .pta_level_info
= pmap_table_level_info_16k
,
244 .pta_root_level
= PMAP_TT_L1_LEVEL
,
245 .pta_max_level
= PMAP_TT_L3_LEVEL
,
246 .pta_ops
= &native_pt_ops
,
247 .ap_ro
= ARM_PTE_AP(AP_RORO
),
248 .ap_rw
= ARM_PTE_AP(AP_RWRW
),
249 .ap_rona
= ARM_PTE_AP(AP_RONA
),
250 .ap_rwna
= ARM_PTE_AP(AP_RWNA
),
251 .ap_xn
= ARM_PTE_PNX
| ARM_PTE_NX
,
256 const struct page_table_attr
* const native_pt_attr
= &pmap_pt_attr_16k
;
257 #else /* !__ARM_16K_PG__ */
258 const struct page_table_attr
* const native_pt_attr
= &pmap_pt_attr_4k
;
259 #endif /* !__ARM_16K_PG__ */
262 #else /* (__ARM_VMSA__ > 7) */
264 * We don't support pmap parameterization for VMSA7, so use an opaque
265 * page_table_attr structure.
267 const struct page_table_attr
* const native_pt_attr
= NULL
;
268 #endif /* (__ARM_VMSA__ > 7) */
270 typedef struct page_table_attr pt_attr_t
;
272 /* Macro for getting pmap attributes; not a function for const propagation. */
273 #if ARM_PARAMETERIZED_PMAP
274 /* The page table attributes are linked to the pmap */
275 #define pmap_get_pt_attr(pmap) ((pmap)->pmap_pt_attr)
276 #define pmap_get_pt_ops(pmap) ((pmap)->pmap_pt_attr->pta_ops)
277 #else /* !ARM_PARAMETERIZED_PMAP */
278 /* The page table attributes are fixed (to allow for const propagation) */
279 #define pmap_get_pt_attr(pmap) (native_pt_attr)
280 #define pmap_get_pt_ops(pmap) (&native_pt_ops)
281 #endif /* !ARM_PARAMETERIZED_PMAP */
283 #if (__ARM_VMSA__ > 7)
284 static inline uint64_t
285 pt_attr_ln_size(const pt_attr_t
* const pt_attr
, unsigned int level
)
287 return pt_attr
->pta_level_info
[level
].size
;
290 __unused
static inline uint64_t
291 pt_attr_ln_shift(const pt_attr_t
* const pt_attr
, unsigned int level
)
293 return pt_attr
->pta_level_info
[level
].shift
;
296 __unused
static inline uint64_t
297 pt_attr_ln_offmask(const pt_attr_t
* const pt_attr
, unsigned int level
)
299 return pt_attr
->pta_level_info
[level
].offmask
;
302 static inline unsigned int
303 pt_attr_twig_level(const pt_attr_t
* const pt_attr
)
305 return pt_attr
->pta_max_level
- 1;
308 static inline unsigned int
309 pt_attr_root_level(const pt_attr_t
* const pt_attr
)
311 return pt_attr
->pta_root_level
;
314 static __unused
inline uint64_t
315 pt_attr_leaf_size(const pt_attr_t
* const pt_attr
)
317 return pt_attr
->pta_level_info
[pt_attr
->pta_max_level
].size
;
320 static __unused
inline uint64_t
321 pt_attr_leaf_offmask(const pt_attr_t
* const pt_attr
)
323 return pt_attr
->pta_level_info
[pt_attr
->pta_max_level
].offmask
;
326 static inline uint64_t
327 pt_attr_leaf_shift(const pt_attr_t
* const pt_attr
)
329 return pt_attr
->pta_level_info
[pt_attr
->pta_max_level
].shift
;
332 static __unused
inline uint64_t
333 pt_attr_leaf_index_mask(const pt_attr_t
* const pt_attr
)
335 return pt_attr
->pta_level_info
[pt_attr
->pta_max_level
].index_mask
;
338 static inline uint64_t
339 pt_attr_twig_size(const pt_attr_t
* const pt_attr
)
341 return pt_attr
->pta_level_info
[pt_attr
->pta_max_level
- 1].size
;
344 static inline uint64_t
345 pt_attr_twig_offmask(const pt_attr_t
* const pt_attr
)
347 return pt_attr
->pta_level_info
[pt_attr
->pta_max_level
- 1].offmask
;
350 static inline uint64_t
351 pt_attr_twig_shift(const pt_attr_t
* const pt_attr
)
353 return pt_attr
->pta_level_info
[pt_attr
->pta_max_level
- 1].shift
;
356 static __unused
inline uint64_t
357 pt_attr_twig_index_mask(const pt_attr_t
* const pt_attr
)
359 return pt_attr
->pta_level_info
[pt_attr
->pta_max_level
- 1].index_mask
;
362 static inline uint64_t
363 pt_attr_leaf_table_size(const pt_attr_t
* const pt_attr
)
365 return pt_attr_twig_size(pt_attr
);
368 static inline uint64_t
369 pt_attr_leaf_table_offmask(const pt_attr_t
* const pt_attr
)
371 return pt_attr_twig_offmask(pt_attr
);
374 static inline uintptr_t
375 pt_attr_leaf_rw(const pt_attr_t
* const pt_attr
)
377 return pt_attr
->ap_rw
;
380 static inline uintptr_t
381 pt_attr_leaf_ro(const pt_attr_t
* const pt_attr
)
383 return pt_attr
->ap_ro
;
386 static inline uintptr_t
387 pt_attr_leaf_rona(const pt_attr_t
* const pt_attr
)
389 return pt_attr
->ap_rona
;
392 static inline uintptr_t
393 pt_attr_leaf_rwna(const pt_attr_t
* const pt_attr
)
395 return pt_attr
->ap_rwna
;
398 static inline uintptr_t
399 pt_attr_leaf_xn(const pt_attr_t
* const pt_attr
)
401 return pt_attr
->ap_xn
;
404 static inline uintptr_t
405 pt_attr_leaf_x(const pt_attr_t
* const pt_attr
)
407 return pt_attr
->ap_x
;
410 #else /* (__ARM_VMSA__ > 7) */
412 static inline unsigned int
413 pt_attr_twig_level(__unused
const pt_attr_t
* const pt_attr
)
415 return PMAP_TT_L1_LEVEL
;
418 static inline uint64_t
419 pt_attr_twig_size(__unused
const pt_attr_t
* const pt_attr
)
421 return ARM_TT_TWIG_SIZE
;
424 static inline uint64_t
425 pt_attr_twig_offmask(__unused
const pt_attr_t
* const pt_attr
)
427 return ARM_TT_TWIG_OFFMASK
;
430 static inline uint64_t
431 pt_attr_twig_shift(__unused
const pt_attr_t
* const pt_attr
)
433 return ARM_TT_TWIG_SHIFT
;
436 static __unused
inline uint64_t
437 pt_attr_twig_index_mask(__unused
const pt_attr_t
* const pt_attr
)
439 return ARM_TT_TWIG_INDEX_MASK
;
442 __unused
static inline uint64_t
443 pt_attr_leaf_size(__unused
const pt_attr_t
* const pt_attr
)
445 return ARM_TT_LEAF_SIZE
;
448 __unused
static inline uint64_t
449 pt_attr_leaf_offmask(__unused
const pt_attr_t
* const pt_attr
)
451 return ARM_TT_LEAF_OFFMASK
;
454 static inline uint64_t
455 pt_attr_leaf_shift(__unused
const pt_attr_t
* const pt_attr
)
457 return ARM_TT_LEAF_SHIFT
;
460 static __unused
inline uint64_t
461 pt_attr_leaf_index_mask(__unused
const pt_attr_t
* const pt_attr
)
463 return ARM_TT_LEAF_INDEX_MASK
;
466 static inline uint64_t
467 pt_attr_leaf_table_size(__unused
const pt_attr_t
* const pt_attr
)
469 return ARM_TT_L1_PT_SIZE
;
472 static inline uint64_t
473 pt_attr_leaf_table_offmask(__unused
const pt_attr_t
* const pt_attr
)
475 return ARM_TT_L1_PT_OFFMASK
;
478 static inline uintptr_t
479 pt_attr_leaf_rw(__unused
const pt_attr_t
* const pt_attr
)
481 return ARM_PTE_AP(AP_RWRW
);
484 static inline uintptr_t
485 pt_attr_leaf_ro(__unused
const pt_attr_t
* const pt_attr
)
487 return ARM_PTE_AP(AP_RORO
);
490 static inline uintptr_t
491 pt_attr_leaf_rona(__unused
const pt_attr_t
* const pt_attr
)
493 return ARM_PTE_AP(AP_RONA
);
496 static inline uintptr_t
497 pt_attr_leaf_rwna(__unused
const pt_attr_t
* const pt_attr
)
499 return ARM_PTE_AP(AP_RWNA
);
502 static inline uintptr_t
503 pt_attr_leaf_xn(__unused
const pt_attr_t
* const pt_attr
)
508 #endif /* (__ARM_VMSA__ > 7) */
511 pmap_sync_tlb(bool strong __unused
)
517 int vm_footprint_suspend_allowed
= 1;
519 extern int pmap_ledgers_panic
;
520 extern int pmap_ledgers_panic_leeway
;
522 int pmap_stats_assert
= 1;
523 #define PMAP_STATS_ASSERTF(cond, pmap, fmt, ...) \
525 if (pmap_stats_assert && (pmap)->pmap_stats_assert) \
526 assertf(cond, fmt, ##__VA_ARGS__); \
528 #else /* MACH_ASSERT */
529 #define PMAP_STATS_ASSERTF(cond, pmap, fmt, ...)
530 #endif /* MACH_ASSERT */
532 #if DEVELOPMENT || DEBUG
533 #define PMAP_FOOTPRINT_SUSPENDED(pmap) \
534 (current_thread()->pmap_footprint_suspended)
535 #else /* DEVELOPMENT || DEBUG */
536 #define PMAP_FOOTPRINT_SUSPENDED(pmap) (FALSE)
537 #endif /* DEVELOPMENT || DEBUG */
541 #define pmap_ledger_debit(p, e, a) ledger_debit((p)->ledger, e, a)
542 #define pmap_ledger_credit(p, e, a) ledger_credit((p)->ledger, e, a)
545 #if DEVELOPMENT || DEBUG
546 int panic_on_unsigned_execute
= 0;
547 #endif /* DEVELOPMENT || DEBUG */
550 /* Virtual memory region for early allocation */
551 #if (__ARM_VMSA__ == 7)
552 #define VREGION1_HIGH_WINDOW (0)
554 #define VREGION1_HIGH_WINDOW (PE_EARLY_BOOT_VA)
556 #define VREGION1_START ((VM_MAX_KERNEL_ADDRESS & CPUWINDOWS_BASE_MASK) - VREGION1_HIGH_WINDOW)
557 #define VREGION1_SIZE (trunc_page(VM_MAX_KERNEL_ADDRESS - (VREGION1_START)))
559 extern unsigned int not_in_kdp
;
561 extern vm_offset_t first_avail
;
563 extern pmap_paddr_t avail_start
;
564 extern pmap_paddr_t avail_end
;
566 extern vm_offset_t virtual_space_start
; /* Next available kernel VA */
567 extern vm_offset_t virtual_space_end
; /* End of kernel address space */
568 extern vm_offset_t static_memory_end
;
570 extern int maxproc
, hard_maxproc
;
572 #if (__ARM_VMSA__ > 7)
573 /* The number of address bits one TTBR can cover. */
574 #define PGTABLE_ADDR_BITS (64ULL - T0SZ_BOOT)
577 * The bounds on our TTBRs. These are for sanity checking that
578 * an address is accessible by a TTBR before we attempt to map it.
580 #define ARM64_TTBR0_MIN_ADDR (0ULL)
581 #define ARM64_TTBR0_MAX_ADDR (0ULL + (1ULL << PGTABLE_ADDR_BITS) - 1)
582 #define ARM64_TTBR1_MIN_ADDR (0ULL - (1ULL << PGTABLE_ADDR_BITS))
583 #define ARM64_TTBR1_MAX_ADDR (~0ULL)
585 /* The level of the root of a page table. */
586 const uint64_t arm64_root_pgtable_level
= (3 - ((PGTABLE_ADDR_BITS
- 1 - ARM_PGSHIFT
) / (ARM_PGSHIFT
- TTE_SHIFT
)));
588 /* The number of entries in the root TT of a page table. */
589 const uint64_t arm64_root_pgtable_num_ttes
= (2 << ((PGTABLE_ADDR_BITS
- 1 - ARM_PGSHIFT
) % (ARM_PGSHIFT
- TTE_SHIFT
)));
591 const uint64_t arm64_root_pgtable_level
= 0;
592 const uint64_t arm64_root_pgtable_num_ttes
= 0;
595 struct pmap kernel_pmap_store MARK_AS_PMAP_DATA
;
596 SECURITY_READ_ONLY_LATE(pmap_t
) kernel_pmap
= &kernel_pmap_store
;
598 struct vm_object pmap_object_store
__attribute__((aligned(VM_PACKED_POINTER_ALIGNMENT
))); /* store pt pages */
599 vm_object_t pmap_object
= &pmap_object_store
;
601 static struct zone
*pmap_zone
; /* zone of pmap structures */
603 decl_simple_lock_data(, pmaps_lock MARK_AS_PMAP_DATA
);
604 decl_simple_lock_data(, tt1_lock MARK_AS_PMAP_DATA
);
605 unsigned int pmap_stamp MARK_AS_PMAP_DATA
;
606 queue_head_t map_pmap_list MARK_AS_PMAP_DATA
;
608 decl_simple_lock_data(, pt_pages_lock MARK_AS_PMAP_DATA
);
609 queue_head_t pt_page_list MARK_AS_PMAP_DATA
; /* pt page ptd entries list */
611 decl_simple_lock_data(, pmap_pages_lock MARK_AS_PMAP_DATA
);
613 typedef struct page_free_entry
{
614 struct page_free_entry
*next
;
617 #define PAGE_FREE_ENTRY_NULL ((page_free_entry_t *) 0)
619 page_free_entry_t
*pmap_pages_reclaim_list MARK_AS_PMAP_DATA
; /* Reclaimed pt page list */
620 unsigned int pmap_pages_request_count MARK_AS_PMAP_DATA
; /* Pending requests to reclaim pt page */
621 unsigned long long pmap_pages_request_acum MARK_AS_PMAP_DATA
;
624 typedef struct tt_free_entry
{
625 struct tt_free_entry
*next
;
628 #define TT_FREE_ENTRY_NULL ((tt_free_entry_t *) 0)
630 tt_free_entry_t
*free_page_size_tt_list MARK_AS_PMAP_DATA
;
631 unsigned int free_page_size_tt_count MARK_AS_PMAP_DATA
;
632 unsigned int free_page_size_tt_max MARK_AS_PMAP_DATA
;
633 #define FREE_PAGE_SIZE_TT_MAX 4
634 tt_free_entry_t
*free_two_page_size_tt_list MARK_AS_PMAP_DATA
;
635 unsigned int free_two_page_size_tt_count MARK_AS_PMAP_DATA
;
636 unsigned int free_two_page_size_tt_max MARK_AS_PMAP_DATA
;
637 #define FREE_TWO_PAGE_SIZE_TT_MAX 4
638 tt_free_entry_t
*free_tt_list MARK_AS_PMAP_DATA
;
639 unsigned int free_tt_count MARK_AS_PMAP_DATA
;
640 unsigned int free_tt_max MARK_AS_PMAP_DATA
;
642 #define TT_FREE_ENTRY_NULL ((tt_free_entry_t *) 0)
644 boolean_t pmap_gc_allowed MARK_AS_PMAP_DATA
= TRUE
;
645 boolean_t pmap_gc_forced MARK_AS_PMAP_DATA
= FALSE
;
646 boolean_t pmap_gc_allowed_by_time_throttle
= TRUE
;
648 unsigned int inuse_user_ttepages_count MARK_AS_PMAP_DATA
= 0; /* non-root, non-leaf user pagetable pages, in units of PAGE_SIZE */
649 unsigned int inuse_user_ptepages_count MARK_AS_PMAP_DATA
= 0; /* leaf user pagetable pages, in units of PAGE_SIZE */
650 unsigned int inuse_user_tteroot_count MARK_AS_PMAP_DATA
= 0; /* root user pagetables, in units of PMAP_ROOT_ALLOC_SIZE */
651 unsigned int inuse_kernel_ttepages_count MARK_AS_PMAP_DATA
= 0; /* non-root, non-leaf kernel pagetable pages, in units of PAGE_SIZE */
652 unsigned int inuse_kernel_ptepages_count MARK_AS_PMAP_DATA
= 0; /* leaf kernel pagetable pages, in units of PAGE_SIZE */
653 unsigned int inuse_kernel_tteroot_count MARK_AS_PMAP_DATA
= 0; /* root kernel pagetables, in units of PMAP_ROOT_ALLOC_SIZE */
654 unsigned int inuse_pmap_pages_count
= 0; /* debugging */
656 SECURITY_READ_ONLY_LATE(tt_entry_t
*) invalid_tte
= 0;
657 SECURITY_READ_ONLY_LATE(pmap_paddr_t
) invalid_ttep
= 0;
659 SECURITY_READ_ONLY_LATE(tt_entry_t
*) cpu_tte
= 0; /* set by arm_vm_init() - keep out of bss */
660 SECURITY_READ_ONLY_LATE(pmap_paddr_t
) cpu_ttep
= 0; /* set by arm_vm_init() - phys tte addr */
662 #if DEVELOPMENT || DEBUG
663 int nx_enabled
= 1; /* enable no-execute protection */
664 int allow_data_exec
= 0; /* No apps may execute data */
665 int allow_stack_exec
= 0; /* No apps may execute from the stack */
666 unsigned long pmap_asid_flushes MARK_AS_PMAP_DATA
= 0;
667 #else /* DEVELOPMENT || DEBUG */
668 const int nx_enabled
= 1; /* enable no-execute protection */
669 const int allow_data_exec
= 0; /* No apps may execute data */
670 const int allow_stack_exec
= 0; /* No apps may execute from the stack */
671 #endif /* DEVELOPMENT || DEBUG */
674 * pv_entry_t - structure to track the active mappings for a given page
676 typedef struct pv_entry
{
677 struct pv_entry
*pve_next
; /* next alias */
678 pt_entry_t
*pve_ptep
; /* page table entry */
680 #if __arm__ && (__BIGGEST_ALIGNMENT__ > 4)
681 /* For the newer ARMv7k ABI where 64-bit types are 64-bit aligned, but pointers
683 * Since pt_desc is 64-bit aligned and we cast often from pv_entry to
686 __attribute__ ((aligned(8))) pv_entry_t
;
691 #define PV_ENTRY_NULL ((pv_entry_t *) 0)
695 * We use the least significant bit of the "pve_next" pointer in a "pv_entry"
696 * as a marker for pages mapped through an "alternate accounting" mapping.
697 * These macros set, clear and test for this marker and extract the actual
698 * value of the "pve_next" pointer.
700 #define PVE_NEXT_ALTACCT ((uintptr_t) 0x1)
701 #define PVE_NEXT_SET_ALTACCT(pve_next_p) \
702 *(pve_next_p) = (struct pv_entry *) (((uintptr_t) *(pve_next_p)) | \
704 #define PVE_NEXT_CLR_ALTACCT(pve_next_p) \
705 *(pve_next_p) = (struct pv_entry *) (((uintptr_t) *(pve_next_p)) & \
707 #define PVE_NEXT_IS_ALTACCT(pve_next) \
708 ((((uintptr_t) (pve_next)) & PVE_NEXT_ALTACCT) ? TRUE : FALSE)
709 #define PVE_NEXT_PTR(pve_next) \
710 ((struct pv_entry *)(((uintptr_t) (pve_next)) & \
713 static void pmap_check_ledgers(pmap_t pmap
);
716 pmap_check_ledgers(__unused pmap_t pmap
)
719 #endif /* MACH_ASSERT */
721 SECURITY_READ_ONLY_LATE(pv_entry_t
* *) pv_head_table
; /* array of pv entry pointers */
723 pv_entry_t
*pv_free_list MARK_AS_PMAP_DATA
;
724 pv_entry_t
*pv_kern_free_list MARK_AS_PMAP_DATA
;
725 decl_simple_lock_data(, pv_free_list_lock MARK_AS_PMAP_DATA
);
726 decl_simple_lock_data(, pv_kern_free_list_lock MARK_AS_PMAP_DATA
);
728 decl_simple_lock_data(, phys_backup_lock
);
731 * pt_desc - structure to keep info on page assigned to page tables
733 #if (__ARM_VMSA__ == 7)
734 #define PT_INDEX_MAX 1
736 #if (ARM_PGSHIFT == 14)
737 #define PT_INDEX_MAX 1
739 #define PT_INDEX_MAX 4
743 #define PT_DESC_REFCOUNT 0x4000U
744 #define PT_DESC_IOMMU_REFCOUNT 0x8000U
746 typedef struct pt_desc
{
747 queue_chain_t pt_page
;
752 * Locate this struct towards the end of the pt_desc; our long term
753 * goal is to make this a VLA to avoid wasting memory if we don't need
758 * For non-leaf pagetables, should always be PT_DESC_REFCOUNT
759 * For leaf pagetables, should reflect the number of non-empty PTEs
760 * For IOMMU pages, should always be PT_DESC_IOMMU_REFCOUNT
762 unsigned short refcnt
;
764 * For non-leaf pagetables, should be 0
765 * For leaf pagetables, should reflect the number of wired entries
766 * For IOMMU pages, may optionally reflect a driver-defined refcount (IOMMU operations are implicitly wired)
768 unsigned short wiredcnt
;
770 } ptd_info
[PT_INDEX_MAX
];
774 #define PTD_ENTRY_NULL ((pt_desc_t *) 0)
776 SECURITY_READ_ONLY_LATE(pt_desc_t
*) ptd_root_table
;
778 pt_desc_t
*ptd_free_list MARK_AS_PMAP_DATA
= PTD_ENTRY_NULL
;
779 SECURITY_READ_ONLY_LATE(boolean_t
) ptd_preboot
= TRUE
;
780 unsigned int ptd_free_count MARK_AS_PMAP_DATA
= 0;
781 decl_simple_lock_data(, ptd_free_list_lock MARK_AS_PMAP_DATA
);
784 * physical page attribute
786 typedef u_int16_t pp_attr_t
;
788 #define PP_ATTR_WIMG_MASK 0x003F
789 #define PP_ATTR_WIMG(x) ((x) & PP_ATTR_WIMG_MASK)
791 #define PP_ATTR_REFERENCED 0x0040
792 #define PP_ATTR_MODIFIED 0x0080
794 #define PP_ATTR_INTERNAL 0x0100
795 #define PP_ATTR_REUSABLE 0x0200
796 #define PP_ATTR_ALTACCT 0x0400
797 #define PP_ATTR_NOENCRYPT 0x0800
799 #define PP_ATTR_REFFAULT 0x1000
800 #define PP_ATTR_MODFAULT 0x2000
803 SECURITY_READ_ONLY_LATE(pp_attr_t
*) pp_attr_table
;
805 typedef struct pmap_io_range
{
808 #define PMAP_IO_RANGE_STRONG_SYNC (1UL << 31) // Strong DSB required for pages in this range
809 uint32_t wimg
; // lower 16 bits treated as pp_attr_t, upper 16 bits contain additional mapping flags
810 uint32_t signature
; // 4CC
811 } __attribute__((packed
)) pmap_io_range_t
;
813 SECURITY_READ_ONLY_LATE(pmap_io_range_t
*) io_attr_table
;
815 SECURITY_READ_ONLY_LATE(pmap_paddr_t
) vm_first_phys
= (pmap_paddr_t
) 0;
816 SECURITY_READ_ONLY_LATE(pmap_paddr_t
) vm_last_phys
= (pmap_paddr_t
) 0;
818 SECURITY_READ_ONLY_LATE(unsigned int) num_io_rgns
= 0;
820 SECURITY_READ_ONLY_LATE(boolean_t
) pmap_initialized
= FALSE
; /* Has pmap_init completed? */
822 SECURITY_READ_ONLY_LATE(uint64_t) pmap_nesting_size_min
;
823 SECURITY_READ_ONLY_LATE(uint64_t) pmap_nesting_size_max
;
825 SECURITY_READ_ONLY_LATE(vm_map_offset_t
) arm_pmap_max_offset_default
= 0x0;
826 #if defined(__arm64__)
827 SECURITY_READ_ONLY_LATE(vm_map_offset_t
) arm64_pmap_max_offset_default
= 0x0;
830 #define PMAP_MAX_SW_ASID ((MAX_ASID + MAX_HW_ASID - 1) / MAX_HW_ASID)
831 _Static_assert(PMAP_MAX_SW_ASID
<= (UINT8_MAX
+ 1),
832 "VASID bits can't be represented by an 8-bit integer");
834 decl_simple_lock_data(, asid_lock MARK_AS_PMAP_DATA
);
835 static bitmap_t asid_bitmap
[BITMAP_LEN(MAX_ASID
)] MARK_AS_PMAP_DATA
;
838 #if (__ARM_VMSA__ > 7)
839 SECURITY_READ_ONLY_LATE(pmap_t
) sharedpage_pmap
;
843 #define pa_index(pa) \
844 (atop((pa) - vm_first_phys))
846 #define pai_to_pvh(pai) \
847 (&pv_head_table[pai])
849 #define pa_valid(x) \
850 ((x) >= vm_first_phys && (x) < vm_last_phys)
852 /* PTE Define Macros */
854 #define pte_is_wired(pte) \
855 (((pte) & ARM_PTE_WIRED_MASK) == ARM_PTE_WIRED)
857 #define pte_set_wired(ptep, wired) \
859 SInt16 *ptd_wiredcnt_ptr; \
860 ptd_wiredcnt_ptr = (SInt16 *)&(ptep_get_ptd(ptep)->ptd_info[ARM_PT_DESC_INDEX(ptep)].wiredcnt); \
862 *ptep |= ARM_PTE_WIRED; \
863 OSAddAtomic16(1, ptd_wiredcnt_ptr); \
865 *ptep &= ~ARM_PTE_WIRED; \
866 OSAddAtomic16(-1, ptd_wiredcnt_ptr); \
870 #define pte_was_writeable(pte) \
871 (((pte) & ARM_PTE_WRITEABLE) == ARM_PTE_WRITEABLE)
873 #define pte_set_was_writeable(pte, was_writeable) \
875 if ((was_writeable)) { \
876 (pte) |= ARM_PTE_WRITEABLE; \
878 (pte) &= ~ARM_PTE_WRITEABLE; \
882 /* PVE Define Macros */
884 #define pve_next(pve) \
887 #define pve_link_field(pve) \
890 #define pve_link(pp, e) \
891 ((pve_next(e) = pve_next(pp)), (pve_next(pp) = (e)))
893 #define pve_unlink(pp, e) \
894 (pve_next(pp) = pve_next(e))
896 /* bits held in the ptep pointer field */
898 #define pve_get_ptep(pve) \
901 #define pve_set_ptep(pve, ptep_new) \
903 (pve)->pve_ptep = (ptep_new); \
906 /* PTEP Define Macros */
908 /* mask for page descriptor index */
909 #define ARM_TT_PT_INDEX_MASK ARM_PGMASK
911 #if (__ARM_VMSA__ == 7)
912 #define ARM_PT_DESC_INDEX_MASK 0x00000
913 #define ARM_PT_DESC_INDEX_SHIFT 0
916 * Shift value used for reconstructing the virtual address for a PTE.
918 #define ARM_TT_PT_ADDR_SHIFT (10U)
920 #define ptep_get_va(ptep) \
921 ((((pt_desc_t *) (pvh_list(pai_to_pvh(pa_index(ml_static_vtop((((vm_offset_t)(ptep) & ~ARM_PGMASK))))))))->ptd_info[ARM_PT_DESC_INDEX(ptep)].va)+ ((((unsigned)(ptep)) & ARM_TT_PT_INDEX_MASK)<<ARM_TT_PT_ADDR_SHIFT))
923 #define ptep_get_pmap(ptep) \
924 ((((pt_desc_t *) (pvh_list(pai_to_pvh(pa_index(ml_static_vtop((((vm_offset_t)(ptep) & ~ARM_PGMASK))))))))->pmap))
928 #if (ARM_PGSHIFT == 12)
929 #define ARM_PT_DESC_INDEX_MASK ((PAGE_SHIFT_CONST == ARM_PGSHIFT )? 0x00000ULL : 0x03000ULL)
930 #define ARM_PT_DESC_INDEX_SHIFT ((PAGE_SHIFT_CONST == ARM_PGSHIFT )? 0 : 12)
932 * Shift value used for reconstructing the virtual address for a PTE.
934 #define ARM_TT_PT_ADDR_SHIFT (9ULL)
937 #define ARM_PT_DESC_INDEX_MASK (0x00000)
938 #define ARM_PT_DESC_INDEX_SHIFT (0)
940 * Shift value used for reconstructing the virtual address for a PTE.
942 #define ARM_TT_PT_ADDR_SHIFT (11ULL)
946 #define ARM_PT_DESC_INDEX(ptep) \
947 (((unsigned)(ptep) & ARM_PT_DESC_INDEX_MASK) >> ARM_PT_DESC_INDEX_SHIFT)
949 #define ptep_get_va(ptep) \
950 ((((pt_desc_t *) (pvh_list(pai_to_pvh(pa_index(ml_static_vtop((((vm_offset_t)(ptep) & ~ARM_PGMASK))))))))->ptd_info[ARM_PT_DESC_INDEX(ptep)].va)+ ((((unsigned)(ptep)) & ARM_TT_PT_INDEX_MASK)<<ARM_TT_PT_ADDR_SHIFT))
952 #define ptep_get_pmap(ptep) \
953 ((((pt_desc_t *) (pvh_list(pai_to_pvh(pa_index(ml_static_vtop((((vm_offset_t)(ptep) & ~ARM_PGMASK))))))))->pmap))
957 #define ARM_PT_DESC_INDEX(ptep) \
958 (((unsigned)(ptep) & ARM_PT_DESC_INDEX_MASK) >> ARM_PT_DESC_INDEX_SHIFT)
960 #define ptep_get_ptd(ptep) \
961 ((struct pt_desc *)(pvh_list(pai_to_pvh(pa_index(ml_static_vtop((vm_offset_t)(ptep)))))))
964 /* PVH Define Macros */
967 #define PVH_TYPE_NULL 0x0UL
968 #define PVH_TYPE_PVEP 0x1UL
969 #define PVH_TYPE_PTEP 0x2UL
970 #define PVH_TYPE_PTDP 0x3UL
972 #define PVH_TYPE_MASK (0x3UL)
976 /* All flags listed below are stored in the PV head pointer unless otherwise noted */
977 #define PVH_FLAG_IOMMU 0x4UL /* Stored in each PTE, or in PV head for single-PTE PV heads */
978 #define PVH_FLAG_IOMMU_TABLE (1ULL << 63) /* Stored in each PTE, or in PV head for single-PTE PV heads */
979 #define PVH_FLAG_CPU (1ULL << 62)
980 #define PVH_LOCK_BIT 61
981 #define PVH_FLAG_LOCK (1ULL << PVH_LOCK_BIT)
982 #define PVH_FLAG_EXEC (1ULL << 60)
983 #define PVH_FLAG_LOCKDOWN (1ULL << 59)
984 #define PVH_HIGH_FLAGS (PVH_FLAG_CPU | PVH_FLAG_LOCK | PVH_FLAG_EXEC | PVH_FLAG_LOCKDOWN)
986 #else /* !__arm64__ */
988 #define PVH_LOCK_BIT 31
989 #define PVH_FLAG_LOCK (1UL << PVH_LOCK_BIT)
990 #define PVH_HIGH_FLAGS PVH_FLAG_LOCK
994 #define PVH_LIST_MASK (~PVH_TYPE_MASK)
996 #define pvh_test_type(h, b) \
997 ((*(vm_offset_t *)(h) & (PVH_TYPE_MASK)) == (b))
999 #define pvh_ptep(h) \
1000 ((pt_entry_t *)((*(vm_offset_t *)(h) & PVH_LIST_MASK) | PVH_HIGH_FLAGS))
1002 #define pvh_list(h) \
1003 ((pv_entry_t *)((*(vm_offset_t *)(h) & PVH_LIST_MASK) | PVH_HIGH_FLAGS))
1005 #define pvh_get_flags(h) \
1006 (*(vm_offset_t *)(h) & PVH_HIGH_FLAGS)
1008 #define pvh_set_flags(h, f) \
1010 os_atomic_store((vm_offset_t *)(h), (*(vm_offset_t *)(h) & ~PVH_HIGH_FLAGS) | (f), \
1014 #define pvh_update_head(h, e, t) \
1016 assert(*(vm_offset_t *)(h) & PVH_FLAG_LOCK); \
1017 os_atomic_store((vm_offset_t *)(h), (vm_offset_t)(e) | (t) | PVH_FLAG_LOCK, \
1021 #define pvh_update_head_unlocked(h, e, t) \
1023 assert(!(*(vm_offset_t *)(h) & PVH_FLAG_LOCK)); \
1024 *(vm_offset_t *)(h) = ((vm_offset_t)(e) | (t)) & ~PVH_FLAG_LOCK; \
1027 #define pvh_add(h, e) \
1029 assert(!pvh_test_type((h), PVH_TYPE_PTEP)); \
1030 pve_next(e) = pvh_list(h); \
1031 pvh_update_head((h), (e), PVH_TYPE_PVEP); \
1034 #define pvh_remove(h, p, e) \
1036 assert(!PVE_NEXT_IS_ALTACCT(pve_next((e)))); \
1038 if (PVE_NEXT_PTR(pve_next((e))) == PV_ENTRY_NULL) { \
1039 pvh_update_head((h), PV_ENTRY_NULL, PVH_TYPE_NULL); \
1041 pvh_update_head((h), PVE_NEXT_PTR(pve_next((e))), PVH_TYPE_PVEP); \
1046 * preserve the "alternate accounting" bit \
1047 * when updating "p" (the previous entry's \
1050 boolean_t __is_altacct; \
1051 __is_altacct = PVE_NEXT_IS_ALTACCT(*(p)); \
1052 *(p) = PVE_NEXT_PTR(pve_next((e))); \
1053 if (__is_altacct) { \
1054 PVE_NEXT_SET_ALTACCT((p)); \
1056 PVE_NEXT_CLR_ALTACCT((p)); \
1062 /* PPATTR Define Macros */
1064 #define ppattr_set_bits(h, b) \
1066 while (!OSCompareAndSwap16(*(pp_attr_t *)(h), *(pp_attr_t *)(h) | (b), (pp_attr_t *)(h))); \
1069 #define ppattr_clear_bits(h, b) \
1071 while (!OSCompareAndSwap16(*(pp_attr_t *)(h), *(pp_attr_t *)(h) & ~(b), (pp_attr_t *)(h))); \
1074 #define ppattr_test_bits(h, b) \
1075 ((*(pp_attr_t *)(h) & (b)) == (b))
1077 #define pa_set_bits(x, b) \
1080 ppattr_set_bits(&pp_attr_table[pa_index(x)], \
1084 #define pa_test_bits(x, b) \
1085 (pa_valid(x) ? ppattr_test_bits(&pp_attr_table[pa_index(x)],\
1088 #define pa_clear_bits(x, b) \
1091 ppattr_clear_bits(&pp_attr_table[pa_index(x)], \
1095 #define pa_set_modify(x) \
1096 pa_set_bits(x, PP_ATTR_MODIFIED)
1098 #define pa_clear_modify(x) \
1099 pa_clear_bits(x, PP_ATTR_MODIFIED)
1101 #define pa_set_reference(x) \
1102 pa_set_bits(x, PP_ATTR_REFERENCED)
1104 #define pa_clear_reference(x) \
1105 pa_clear_bits(x, PP_ATTR_REFERENCED)
1108 #define IS_INTERNAL_PAGE(pai) \
1109 ppattr_test_bits(&pp_attr_table[pai], PP_ATTR_INTERNAL)
1110 #define SET_INTERNAL_PAGE(pai) \
1111 ppattr_set_bits(&pp_attr_table[pai], PP_ATTR_INTERNAL)
1112 #define CLR_INTERNAL_PAGE(pai) \
1113 ppattr_clear_bits(&pp_attr_table[pai], PP_ATTR_INTERNAL)
1115 #define IS_REUSABLE_PAGE(pai) \
1116 ppattr_test_bits(&pp_attr_table[pai], PP_ATTR_REUSABLE)
1117 #define SET_REUSABLE_PAGE(pai) \
1118 ppattr_set_bits(&pp_attr_table[pai], PP_ATTR_REUSABLE)
1119 #define CLR_REUSABLE_PAGE(pai) \
1120 ppattr_clear_bits(&pp_attr_table[pai], PP_ATTR_REUSABLE)
1122 #define IS_ALTACCT_PAGE(pai, pve_p) \
1123 (((pve_p) == NULL) \
1124 ? ppattr_test_bits(&pp_attr_table[pai], PP_ATTR_ALTACCT) \
1125 : PVE_NEXT_IS_ALTACCT(pve_next((pve_p))))
1126 #define SET_ALTACCT_PAGE(pai, pve_p) \
1127 if ((pve_p) == NULL) { \
1128 ppattr_set_bits(&pp_attr_table[pai], PP_ATTR_ALTACCT); \
1130 PVE_NEXT_SET_ALTACCT(&pve_next((pve_p))); \
1132 #define CLR_ALTACCT_PAGE(pai, pve_p) \
1133 if ((pve_p) == NULL) { \
1134 ppattr_clear_bits(&pp_attr_table[pai], PP_ATTR_ALTACCT); \
1136 PVE_NEXT_CLR_ALTACCT(&pve_next((pve_p))); \
1139 #define IS_REFFAULT_PAGE(pai) \
1140 ppattr_test_bits(&pp_attr_table[pai], PP_ATTR_REFFAULT)
1141 #define SET_REFFAULT_PAGE(pai) \
1142 ppattr_set_bits(&pp_attr_table[pai], PP_ATTR_REFFAULT)
1143 #define CLR_REFFAULT_PAGE(pai) \
1144 ppattr_clear_bits(&pp_attr_table[pai], PP_ATTR_REFFAULT)
1146 #define IS_MODFAULT_PAGE(pai) \
1147 ppattr_test_bits(&pp_attr_table[pai], PP_ATTR_MODFAULT)
1148 #define SET_MODFAULT_PAGE(pai) \
1149 ppattr_set_bits(&pp_attr_table[pai], PP_ATTR_MODFAULT)
1150 #define CLR_MODFAULT_PAGE(pai) \
1151 ppattr_clear_bits(&pp_attr_table[pai], PP_ATTR_MODFAULT)
1153 #define tte_get_ptd(tte) \
1154 ((struct pt_desc *)(pvh_list(pai_to_pvh(pa_index((vm_offset_t)((tte) & ~PAGE_MASK))))))
1157 #if (__ARM_VMSA__ == 7)
1159 #define tte_index(pmap, pt_attr, addr) \
1162 #define pte_index(pmap, pt_attr, addr) \
1167 #define ttn_index(pmap, pt_attr, addr, pt_level) \
1168 (((addr) & (pt_attr)->pta_level_info[(pt_level)].index_mask) >> (pt_attr)->pta_level_info[(pt_level)].shift)
1170 #define tt0_index(pmap, pt_attr, addr) \
1171 ttn_index((pmap), (pt_attr), (addr), PMAP_TT_L0_LEVEL)
1173 #define tt1_index(pmap, pt_attr, addr) \
1174 ttn_index((pmap), (pt_attr), (addr), PMAP_TT_L1_LEVEL)
1176 #define tt2_index(pmap, pt_attr, addr) \
1177 ttn_index((pmap), (pt_attr), (addr), PMAP_TT_L2_LEVEL)
1179 #define tt3_index(pmap, pt_attr, addr) \
1180 ttn_index((pmap), (pt_attr), (addr), PMAP_TT_L3_LEVEL)
1182 #define tte_index(pmap, pt_attr, addr) \
1183 tt2_index((pmap), (pt_attr), (addr))
1185 #define pte_index(pmap, pt_attr, addr) \
1186 tt3_index((pmap), (pt_attr), (addr))
1191 * Lock on pmap system
1194 lck_grp_t pmap_lck_grp
;
1196 #define PMAP_LOCK_INIT(pmap) { \
1197 simple_lock_init(&(pmap)->lock, 0); \
1200 #define PMAP_LOCK(pmap) { \
1201 pmap_simple_lock(&(pmap)->lock); \
1204 #define PMAP_UNLOCK(pmap) { \
1205 pmap_simple_unlock(&(pmap)->lock); \
1209 #define PMAP_ASSERT_LOCKED(pmap) { \
1210 simple_lock_assert(&(pmap)->lock, LCK_ASSERT_OWNED); \
1213 #define PMAP_ASSERT_LOCKED(pmap)
1216 #if defined(__arm64__)
1217 #define PVH_LOCK_WORD 1 /* Assumes little-endian */
1219 #define PVH_LOCK_WORD 0
1222 #define ASSERT_PVH_LOCKED(index) \
1224 assert((vm_offset_t)(pv_head_table[index]) & PVH_FLAG_LOCK); \
1227 #define LOCK_PVH(index) \
1229 pmap_lock_bit((uint32_t*)(&pv_head_table[index]) + PVH_LOCK_WORD, PVH_LOCK_BIT - (PVH_LOCK_WORD * 32)); \
1232 #define UNLOCK_PVH(index) \
1234 ASSERT_PVH_LOCKED(index); \
1235 pmap_unlock_bit((uint32_t*)(&pv_head_table[index]) + PVH_LOCK_WORD, PVH_LOCK_BIT - (PVH_LOCK_WORD * 32)); \
1238 #define PMAP_UPDATE_TLBS(pmap, s, e, strong) { \
1239 pmap_get_pt_ops(pmap)->flush_tlb_region_async(s, (unsigned)(e - s), pmap); \
1240 pmap_sync_tlb(strong); \
1243 #define FLUSH_PTE_RANGE(spte, epte) \
1244 __builtin_arm_dmb(DMB_ISH);
1246 #define FLUSH_PTE(pte_p) \
1247 __builtin_arm_dmb(DMB_ISH);
1249 #define FLUSH_PTE_STRONG(pte_p) \
1250 __builtin_arm_dsb(DSB_ISH);
1252 #define FLUSH_PTE_RANGE_STRONG(spte, epte) \
1253 __builtin_arm_dsb(DSB_ISH);
1255 #define WRITE_PTE_FAST(pte_p, pte_entry) \
1256 __unreachable_ok_push \
1257 if (TEST_PAGE_RATIO_4) { \
1258 if (((unsigned)(pte_p)) & 0x1f) { \
1259 panic("%s: WRITE_PTE_FAST is unaligned, " \
1260 "pte_p=%p, pte_entry=%p", \
1262 pte_p, (void*)pte_entry); \
1264 if (((pte_entry) & ~ARM_PTE_COMPRESSED_MASK) == ARM_PTE_EMPTY) { \
1265 *(pte_p) = (pte_entry); \
1266 *((pte_p)+1) = (pte_entry); \
1267 *((pte_p)+2) = (pte_entry); \
1268 *((pte_p)+3) = (pte_entry); \
1270 *(pte_p) = (pte_entry); \
1271 *((pte_p)+1) = (pte_entry) | 0x1000; \
1272 *((pte_p)+2) = (pte_entry) | 0x2000; \
1273 *((pte_p)+3) = (pte_entry) | 0x3000; \
1276 *(pte_p) = (pte_entry); \
1278 __unreachable_ok_pop
1280 #define WRITE_PTE(pte_p, pte_entry) \
1281 WRITE_PTE_FAST(pte_p, pte_entry); \
1284 #define WRITE_PTE_STRONG(pte_p, pte_entry) \
1285 WRITE_PTE_FAST(pte_p, pte_entry); \
1286 FLUSH_PTE_STRONG(pte_p);
1289 * Other useful macros.
1291 #define current_pmap() \
1292 (vm_map_pmap(current_thread()->map))
1295 #define VALIDATE_USER_PMAP(x)
1296 #define VALIDATE_PMAP(x)
1297 #define VALIDATE_LEDGER(x)
1300 #if DEVELOPMENT || DEBUG
1303 * Trace levels are controlled by a bitmask in which each
1304 * level can be enabled/disabled by the (1<<level) position
1306 * Level 1: pmap lifecycle (create/destroy/switch)
1307 * Level 2: mapping lifecycle (enter/remove/protect/nest/unnest)
1308 * Level 3: internal state management (tte/attributes/fast-fault)
1311 SECURITY_READ_ONLY_LATE(unsigned int) pmap_trace_mask
= 0;
1313 #define PMAP_TRACE(level, ...) \
1314 if (__improbable((1 << (level)) & pmap_trace_mask)) { \
1315 KDBG_RELEASE(__VA_ARGS__); \
1319 #define PMAP_TRACE(level, ...)
1325 * Internal function prototypes (forward declarations).
1328 static void pv_init(
1331 static boolean_t
pv_alloc(
1334 pv_entry_t
**pvepp
);
1336 static void pv_free(
1339 static void pv_list_free(
1344 static void ptd_bootstrap(
1345 pt_desc_t
*ptdp
, unsigned int ptd_cnt
);
1347 static inline pt_desc_t
*ptd_alloc_unlinked(bool reclaim
);
1349 static pt_desc_t
*ptd_alloc(pmap_t pmap
, bool reclaim
);
1351 static void ptd_deallocate(pt_desc_t
*ptdp
);
1353 static void ptd_init(
1354 pt_desc_t
*ptdp
, pmap_t pmap
, vm_map_address_t va
, unsigned int ttlevel
, pt_entry_t
* pte_p
);
1356 static void pmap_zone_init(
1359 static void pmap_set_reference(
1362 ppnum_t
pmap_vtophys(
1363 pmap_t pmap
, addr64_t va
);
1365 void pmap_switch_user_ttb(
1368 static kern_return_t
pmap_expand(
1369 pmap_t
, vm_map_address_t
, unsigned int options
, unsigned int level
);
1371 static int pmap_remove_range(
1372 pmap_t
, vm_map_address_t
, pt_entry_t
*, pt_entry_t
*, uint32_t *);
1374 static int pmap_remove_range_options(
1375 pmap_t
, vm_map_address_t
, pt_entry_t
*, pt_entry_t
*, uint32_t *, bool *, int);
1377 static tt_entry_t
*pmap_tt1_allocate(
1378 pmap_t
, vm_size_t
, unsigned int);
1380 #define PMAP_TT_ALLOCATE_NOWAIT 0x1
1382 static void pmap_tt1_deallocate(
1383 pmap_t
, tt_entry_t
*, vm_size_t
, unsigned int);
1385 #define PMAP_TT_DEALLOCATE_NOBLOCK 0x1
1387 static kern_return_t
pmap_tt_allocate(
1388 pmap_t
, tt_entry_t
**, unsigned int, unsigned int);
1390 #define PMAP_TT_ALLOCATE_NOWAIT 0x1
1392 static void pmap_tte_deallocate(
1393 pmap_t
, tt_entry_t
*, unsigned int);
1395 #ifdef __ARM64_PMAP_SUBPAGE_L1__
1396 #if (__ARM_VMSA__ <= 7)
1397 #error This is not supported for old-style page tables
1399 #define PMAP_ROOT_ALLOC_SIZE (((ARM_TT_L1_INDEX_MASK >> ARM_TT_L1_SHIFT) + 1) * sizeof(tt_entry_t))
1401 #define PMAP_ROOT_ALLOC_SIZE (ARM_PGBYTES)
1404 const unsigned int arm_hardware_page_size
= ARM_PGBYTES
;
1405 const unsigned int arm_pt_desc_size
= sizeof(pt_desc_t
);
1406 const unsigned int arm_pt_root_size
= PMAP_ROOT_ALLOC_SIZE
;
1408 #define PMAP_TT_DEALLOCATE_NOBLOCK 0x1
1410 #if (__ARM_VMSA__ > 7)
1412 static inline tt_entry_t
*pmap_tt1e(
1413 pmap_t
, vm_map_address_t
);
1415 static inline tt_entry_t
*pmap_tt2e(
1416 pmap_t
, vm_map_address_t
);
1418 static inline pt_entry_t
*pmap_tt3e(
1419 pmap_t
, vm_map_address_t
);
1421 static inline pt_entry_t
*pmap_ttne(
1422 pmap_t
, unsigned int, vm_map_address_t
);
1424 static void pmap_unmap_sharedpage(
1428 pmap_is_64bit(pmap_t
);
1432 static inline tt_entry_t
*pmap_tte(
1433 pmap_t
, vm_map_address_t
);
1435 static inline pt_entry_t
*pmap_pte(
1436 pmap_t
, vm_map_address_t
);
1438 static void pmap_update_cache_attributes_locked(
1441 boolean_t
arm_clear_fast_fault(
1443 vm_prot_t fault_type
);
1445 static pmap_paddr_t
pmap_pages_reclaim(
1448 static kern_return_t
pmap_pages_alloc(
1453 #define PMAP_PAGES_ALLOCATE_NOWAIT 0x1
1454 #define PMAP_PAGES_RECLAIM_NOWAIT 0x2
1456 static void pmap_pages_free(
1460 static void pmap_pin_kernel_pages(vm_offset_t kva
, size_t nbytes
);
1462 static void pmap_unpin_kernel_pages(vm_offset_t kva
, size_t nbytes
);
1464 static void pmap_trim_self(pmap_t pmap
);
1465 static void pmap_trim_subord(pmap_t subord
);
1468 #define PMAP_SUPPORT_PROTOTYPES(__return_type, __function_name, __function_args, __function_index) \
1469 static __return_type __function_name##_internal __function_args
1471 PMAP_SUPPORT_PROTOTYPES(
1473 arm_fast_fault
, (pmap_t pmap
,
1474 vm_map_address_t va
,
1475 vm_prot_t fault_type
,
1477 bool from_user
), ARM_FAST_FAULT_INDEX
);
1480 PMAP_SUPPORT_PROTOTYPES(
1482 arm_force_fast_fault
, (ppnum_t ppnum
,
1483 vm_prot_t allow_mode
,
1484 int options
), ARM_FORCE_FAST_FAULT_INDEX
);
1486 PMAP_SUPPORT_PROTOTYPES(
1488 mapping_free_prime
, (void), MAPPING_FREE_PRIME_INDEX
);
1490 PMAP_SUPPORT_PROTOTYPES(
1492 mapping_replenish
, (void), MAPPING_REPLENISH_INDEX
);
1494 PMAP_SUPPORT_PROTOTYPES(
1496 pmap_batch_set_cache_attributes
, (ppnum_t pn
,
1497 unsigned int cacheattr
,
1498 unsigned int page_cnt
,
1499 unsigned int page_index
,
1501 unsigned int *res
), PMAP_BATCH_SET_CACHE_ATTRIBUTES_INDEX
);
1503 PMAP_SUPPORT_PROTOTYPES(
1505 pmap_change_wiring
, (pmap_t pmap
,
1507 boolean_t wired
), PMAP_CHANGE_WIRING_INDEX
);
1509 PMAP_SUPPORT_PROTOTYPES(
1511 pmap_create_options
, (ledger_t ledger
,
1513 unsigned int flags
), PMAP_CREATE_INDEX
);
1515 PMAP_SUPPORT_PROTOTYPES(
1517 pmap_destroy
, (pmap_t pmap
), PMAP_DESTROY_INDEX
);
1519 PMAP_SUPPORT_PROTOTYPES(
1521 pmap_enter_options
, (pmap_t pmap
,
1525 vm_prot_t fault_type
,
1528 unsigned int options
), PMAP_ENTER_OPTIONS_INDEX
);
1530 PMAP_SUPPORT_PROTOTYPES(
1532 pmap_extract
, (pmap_t pmap
,
1533 vm_map_address_t va
), PMAP_EXTRACT_INDEX
);
1535 PMAP_SUPPORT_PROTOTYPES(
1537 pmap_find_phys
, (pmap_t pmap
,
1538 addr64_t va
), PMAP_FIND_PHYS_INDEX
);
1540 #if (__ARM_VMSA__ > 7)
1541 PMAP_SUPPORT_PROTOTYPES(
1543 pmap_insert_sharedpage
, (pmap_t pmap
), PMAP_INSERT_SHAREDPAGE_INDEX
);
1547 PMAP_SUPPORT_PROTOTYPES(
1549 pmap_is_empty
, (pmap_t pmap
,
1550 vm_map_offset_t va_start
,
1551 vm_map_offset_t va_end
), PMAP_IS_EMPTY_INDEX
);
1554 PMAP_SUPPORT_PROTOTYPES(
1556 pmap_map_cpu_windows_copy
, (ppnum_t pn
,
1558 unsigned int wimg_bits
), PMAP_MAP_CPU_WINDOWS_COPY_INDEX
);
1560 PMAP_SUPPORT_PROTOTYPES(
1562 pmap_nest
, (pmap_t grand
,
1566 uint64_t size
), PMAP_NEST_INDEX
);
1568 PMAP_SUPPORT_PROTOTYPES(
1570 pmap_page_protect_options
, (ppnum_t ppnum
,
1572 unsigned int options
), PMAP_PAGE_PROTECT_OPTIONS_INDEX
);
1574 PMAP_SUPPORT_PROTOTYPES(
1576 pmap_protect_options
, (pmap_t pmap
,
1577 vm_map_address_t start
,
1578 vm_map_address_t end
,
1580 unsigned int options
,
1581 void *args
), PMAP_PROTECT_OPTIONS_INDEX
);
1583 PMAP_SUPPORT_PROTOTYPES(
1585 pmap_query_page_info
, (pmap_t pmap
,
1587 int *disp_p
), PMAP_QUERY_PAGE_INFO_INDEX
);
1589 PMAP_SUPPORT_PROTOTYPES(
1591 pmap_query_resident
, (pmap_t pmap
,
1592 vm_map_address_t start
,
1593 vm_map_address_t end
,
1594 mach_vm_size_t
* compressed_bytes_p
), PMAP_QUERY_RESIDENT_INDEX
);
1596 PMAP_SUPPORT_PROTOTYPES(
1598 pmap_reference
, (pmap_t pmap
), PMAP_REFERENCE_INDEX
);
1600 PMAP_SUPPORT_PROTOTYPES(
1602 pmap_remove_options
, (pmap_t pmap
,
1603 vm_map_address_t start
,
1604 vm_map_address_t end
,
1605 int options
), PMAP_REMOVE_OPTIONS_INDEX
);
1607 PMAP_SUPPORT_PROTOTYPES(
1609 pmap_return
, (boolean_t do_panic
,
1610 boolean_t do_recurse
), PMAP_RETURN_INDEX
);
1612 PMAP_SUPPORT_PROTOTYPES(
1614 pmap_set_cache_attributes
, (ppnum_t pn
,
1615 unsigned int cacheattr
), PMAP_SET_CACHE_ATTRIBUTES_INDEX
);
1617 PMAP_SUPPORT_PROTOTYPES(
1619 pmap_update_compressor_page
, (ppnum_t pn
,
1620 unsigned int prev_cacheattr
, unsigned int new_cacheattr
), PMAP_UPDATE_COMPRESSOR_PAGE_INDEX
);
1622 PMAP_SUPPORT_PROTOTYPES(
1624 pmap_set_nested
, (pmap_t pmap
), PMAP_SET_NESTED_INDEX
);
1627 PMAP_SUPPORT_PROTOTYPES(
1629 pmap_set_process
, (pmap_t pmap
,
1631 char *procname
), PMAP_SET_PROCESS_INDEX
);
1634 PMAP_SUPPORT_PROTOTYPES(
1636 pmap_unmap_cpu_windows_copy
, (unsigned int index
), PMAP_UNMAP_CPU_WINDOWS_COPY_INDEX
);
1638 PMAP_SUPPORT_PROTOTYPES(
1640 pmap_unnest_options
, (pmap_t grand
,
1643 unsigned int option
), PMAP_UNNEST_OPTIONS_INDEX
);
1646 PMAP_SUPPORT_PROTOTYPES(
1648 phys_attribute_set
, (ppnum_t pn
,
1649 unsigned int bits
), PHYS_ATTRIBUTE_SET_INDEX
);
1652 PMAP_SUPPORT_PROTOTYPES(
1654 phys_attribute_clear
, (ppnum_t pn
,
1657 void *arg
), PHYS_ATTRIBUTE_CLEAR_INDEX
);
1659 PMAP_SUPPORT_PROTOTYPES(
1661 pmap_switch
, (pmap_t pmap
), PMAP_SWITCH_INDEX
);
1663 PMAP_SUPPORT_PROTOTYPES(
1665 pmap_switch_user_ttb
, (pmap_t pmap
), PMAP_SWITCH_USER_TTB_INDEX
);
1667 PMAP_SUPPORT_PROTOTYPES(
1669 pmap_clear_user_ttb
, (void), PMAP_CLEAR_USER_TTB_INDEX
);
1672 PMAP_SUPPORT_PROTOTYPES(
1674 pmap_set_jit_entitled
, (pmap_t pmap
), PMAP_SET_JIT_ENTITLED_INDEX
);
1676 PMAP_SUPPORT_PROTOTYPES(
1678 pmap_trim
, (pmap_t grand
,
1682 uint64_t size
), PMAP_TRIM_INDEX
);
1689 void pmap_footprint_suspend(vm_map_t map
,
1691 PMAP_SUPPORT_PROTOTYPES(
1693 pmap_footprint_suspend
, (vm_map_t map
,
1695 PMAP_FOOTPRINT_SUSPEND_INDEX
);
1699 boolean_t pgtrace_enabled
= 0;
1702 queue_chain_t chain
;
1705 * pmap - pmap for below addresses
1706 * ova - original va page address
1707 * cva - clone va addresses for pre, target and post pages
1708 * cva_spte - clone saved ptes
1709 * range - trace range in this map
1710 * cloned - has been cloned or not
1713 vm_map_offset_t ova
;
1714 vm_map_offset_t cva
[3];
1715 pt_entry_t cva_spte
[3];
1721 } pmap_pgtrace_map_t
;
1723 static void pmap_pgtrace_init(void);
1724 static bool pmap_pgtrace_enter_clone(pmap_t pmap
, vm_map_offset_t va_page
, vm_map_offset_t start
, vm_map_offset_t end
);
1725 static void pmap_pgtrace_remove_clone(pmap_t pmap
, pmap_paddr_t pa_page
, vm_map_offset_t va_page
);
1726 static void pmap_pgtrace_remove_all_clone(pmap_paddr_t pa
);
1729 #if (__ARM_VMSA__ > 7)
1731 * The low global vector page is mapped at a fixed alias.
1732 * Since the page size is 16k for H8 and newer we map the globals to a 16k
1733 * aligned address. Readers of the globals (e.g. lldb, panic server) need
1734 * to check both addresses anyway for backward compatibility. So for now
1735 * we leave H6 and H7 where they were.
1737 #if (ARM_PGSHIFT == 14)
1738 #define LOWGLOBAL_ALIAS (LOW_GLOBAL_BASE_ADDRESS + 0x4000)
1740 #define LOWGLOBAL_ALIAS (LOW_GLOBAL_BASE_ADDRESS + 0x2000)
1744 #define LOWGLOBAL_ALIAS (0xFFFF1000)
1747 long long alloc_tteroot_count
__attribute__((aligned(8))) MARK_AS_PMAP_DATA
= 0LL;
1748 long long alloc_ttepages_count
__attribute__((aligned(8))) MARK_AS_PMAP_DATA
= 0LL;
1749 long long alloc_ptepages_count
__attribute__((aligned(8))) MARK_AS_PMAP_DATA
= 0LL;
1750 long long alloc_pmap_pages_count
__attribute__((aligned(8))) = 0LL;
1752 int pt_fake_zone_index
= -1; /* index of pmap fake zone */
1757 * Allocates and initializes a per-CPU data structure for the pmap.
1759 MARK_AS_PMAP_TEXT
static void
1760 pmap_cpu_data_init_internal(unsigned int cpu_number
)
1762 pmap_cpu_data_t
* pmap_cpu_data
= pmap_get_cpu_data();
1764 pmap_cpu_data
->cpu_number
= cpu_number
;
1768 pmap_cpu_data_init(void)
1770 pmap_cpu_data_init_internal(cpu_number());
1774 pmap_cpu_data_array_init(void)
1777 pmap_cpu_data_init();
1781 pmap_get_cpu_data(void)
1783 pmap_cpu_data_t
* pmap_cpu_data
= NULL
;
1785 pmap_cpu_data
= &getCpuDatap()->cpu_pmap_cpu_data
;
1787 return pmap_cpu_data
;
1797 boolean_t found_page
;
1802 * pmap_pages_reclaim() is returning a page by freeing an active pt page.
1803 * To be eligible, a pt page is assigned to a user pmap. It doesn't have any wired pte
1804 * entry and it contains at least one valid pte entry.
1806 * In a loop, check for a page in the reclaimed pt page list.
1807 * if one is present, unlink that page and return the physical page address.
1808 * Otherwise, scan the pt page list for an eligible pt page to reclaim.
1809 * If found, invoke pmap_remove_range() on its pmap and address range then
1810 * deallocates that pt page. This will end up adding the pt page to the
1811 * reclaimed pt page list.
1812 * If no eligible page were found in the pt page list, panic.
1815 pmap_simple_lock(&pmap_pages_lock
);
1816 pmap_pages_request_count
++;
1817 pmap_pages_request_acum
++;
1820 if (pmap_pages_reclaim_list
!= (page_free_entry_t
*)NULL
) {
1821 page_free_entry_t
*page_entry
;
1823 page_entry
= pmap_pages_reclaim_list
;
1824 pmap_pages_reclaim_list
= pmap_pages_reclaim_list
->next
;
1825 pmap_simple_unlock(&pmap_pages_lock
);
1827 return (pmap_paddr_t
)ml_static_vtop((vm_offset_t
)page_entry
);
1830 pmap_simple_unlock(&pmap_pages_lock
);
1832 pmap_simple_lock(&pt_pages_lock
);
1833 ptdp
= (pt_desc_t
*)queue_first(&pt_page_list
);
1836 while (!queue_end(&pt_page_list
, (queue_entry_t
)ptdp
)) {
1837 if ((ptdp
->pmap
->nested
== FALSE
)
1838 && (pmap_simple_lock_try(&ptdp
->pmap
->lock
))) {
1839 assert(ptdp
->pmap
!= kernel_pmap
);
1840 unsigned refcnt_acc
= 0;
1841 unsigned wiredcnt_acc
= 0;
1843 for (i
= 0; i
< PT_INDEX_MAX
; i
++) {
1844 if (ptdp
->ptd_info
[i
].refcnt
== PT_DESC_REFCOUNT
) {
1845 /* Do not attempt to free a page that contains an L2 table */
1849 refcnt_acc
+= ptdp
->ptd_info
[i
].refcnt
;
1850 wiredcnt_acc
+= ptdp
->ptd_info
[i
].wiredcnt
;
1852 if ((wiredcnt_acc
== 0) && (refcnt_acc
!= 0)) {
1854 /* Leave ptdp->pmap locked here. We're about to reclaim
1855 * a tt page from it, so we don't want anyone else messing
1856 * with it while we do that. */
1859 pmap_simple_unlock(&ptdp
->pmap
->lock
);
1861 ptdp
= (pt_desc_t
*)queue_next((queue_t
)ptdp
);
1864 panic("%s: No eligible page in pt_page_list", __FUNCTION__
);
1866 int remove_count
= 0;
1867 bool need_strong_sync
= false;
1868 vm_map_address_t va
;
1870 pt_entry_t
*bpte
, *epte
;
1873 uint32_t rmv_spte
= 0;
1875 pmap_simple_unlock(&pt_pages_lock
);
1877 PMAP_ASSERT_LOCKED(pmap
); // pmap lock should be held from loop above
1879 __unused
const pt_attr_t
* const pt_attr
= pmap_get_pt_attr(pmap
);
1881 for (i
= 0; i
< PT_INDEX_MAX
; i
++) {
1882 va
= ptdp
->ptd_info
[i
].va
;
1884 /* If the VA is bogus, this may represent an unallocated region
1885 * or one which is in transition (already being freed or expanded).
1886 * Don't try to remove mappings here. */
1887 if (va
== (vm_offset_t
)-1) {
1891 tte_p
= pmap_tte(pmap
, va
);
1892 if ((tte_p
!= (tt_entry_t
*) NULL
)
1893 && ((*tte_p
& ARM_TTE_TYPE_MASK
) == ARM_TTE_TYPE_TABLE
)) {
1894 pte_p
= (pt_entry_t
*) ttetokv(*tte_p
);
1895 bpte
= &pte_p
[pte_index(pmap
, pt_attr
, va
)];
1896 epte
= bpte
+ PAGE_SIZE
/ sizeof(pt_entry_t
);
1898 * Use PMAP_OPTIONS_REMOVE to clear any
1899 * "compressed" markers and update the
1900 * "compressed" counter in pmap->stats.
1901 * This means that we lose accounting for
1902 * any compressed pages in this range
1903 * but the alternative is to not be able
1904 * to account for their future decompression,
1905 * which could cause the counter to drift
1908 remove_count
+= pmap_remove_range_options(
1909 pmap
, va
, bpte
, epte
,
1910 &rmv_spte
, &need_strong_sync
, PMAP_OPTIONS_REMOVE
);
1911 if (ptdp
->ptd_info
[ARM_PT_DESC_INDEX(pte_p
)].refcnt
!= 0) {
1912 panic("%s: ptdp %p, count %d", __FUNCTION__
, ptdp
, ptdp
->ptd_info
[ARM_PT_DESC_INDEX(pte_p
)].refcnt
);
1915 pmap_tte_deallocate(pmap
, tte_p
, PMAP_TT_TWIG_LEVEL
);
1917 if (remove_count
> 0) {
1918 pmap_get_pt_ops(pmap
)->flush_tlb_region_async(va
, (unsigned int)pt_attr_leaf_table_size(pt_attr
), pmap
);
1920 pmap_get_pt_ops(pmap
)->flush_tlb_tte_async(va
, pmap
);
1924 // Undo the lock we grabbed when we found ptdp above
1926 pmap_sync_tlb(need_strong_sync
);
1928 pmap_simple_lock(&pmap_pages_lock
);
1933 static kern_return_t
1939 vm_page_t m
= VM_PAGE_NULL
, m_prev
;
1941 if (option
& PMAP_PAGES_RECLAIM_NOWAIT
) {
1942 assert(size
== PAGE_SIZE
);
1943 *pa
= pmap_pages_reclaim();
1944 return KERN_SUCCESS
;
1946 if (size
== PAGE_SIZE
) {
1947 while ((m
= vm_page_grab()) == VM_PAGE_NULL
) {
1948 if (option
& PMAP_PAGES_ALLOCATE_NOWAIT
) {
1949 return KERN_RESOURCE_SHORTAGE
;
1954 vm_page_lock_queues();
1955 vm_page_wire(m
, VM_KERN_MEMORY_PTE
, TRUE
);
1956 vm_page_unlock_queues();
1958 if (size
== 2 * PAGE_SIZE
) {
1959 while (cpm_allocate(size
, &m
, 0, 1, TRUE
, 0) != KERN_SUCCESS
) {
1960 if (option
& PMAP_PAGES_ALLOCATE_NOWAIT
) {
1961 return KERN_RESOURCE_SHORTAGE
;
1968 *pa
= (pmap_paddr_t
)ptoa(VM_PAGE_GET_PHYS_PAGE(m
));
1970 vm_object_lock(pmap_object
);
1971 while (m
!= VM_PAGE_NULL
) {
1972 vm_page_insert_wired(m
, pmap_object
, (vm_object_offset_t
) ((ptoa(VM_PAGE_GET_PHYS_PAGE(m
))) - gPhysBase
), VM_KERN_MEMORY_PTE
);
1974 m
= NEXT_PAGE(m_prev
);
1975 *(NEXT_PAGE_PTR(m_prev
)) = VM_PAGE_NULL
;
1977 vm_object_unlock(pmap_object
);
1979 OSAddAtomic(size
>> PAGE_SHIFT
, &inuse_pmap_pages_count
);
1980 OSAddAtomic64(size
>> PAGE_SHIFT
, &alloc_pmap_pages_count
);
1982 return KERN_SUCCESS
;
1991 pmap_simple_lock(&pmap_pages_lock
);
1993 if (pmap_pages_request_count
!= 0) {
1994 page_free_entry_t
*page_entry
;
1996 pmap_pages_request_count
--;
1997 page_entry
= (page_free_entry_t
*)phystokv(pa
);
1998 page_entry
->next
= pmap_pages_reclaim_list
;
1999 pmap_pages_reclaim_list
= page_entry
;
2000 pmap_simple_unlock(&pmap_pages_lock
);
2005 pmap_simple_unlock(&pmap_pages_lock
);
2008 pmap_paddr_t pa_max
;
2010 OSAddAtomic(-(size
>> PAGE_SHIFT
), &inuse_pmap_pages_count
);
2012 for (pa_max
= pa
+ size
; pa
< pa_max
; pa
= pa
+ PAGE_SIZE
) {
2013 vm_object_lock(pmap_object
);
2014 m
= vm_page_lookup(pmap_object
, (pa
- gPhysBase
));
2015 assert(m
!= VM_PAGE_NULL
);
2016 assert(VM_PAGE_WIRED(m
));
2017 vm_page_lock_queues();
2019 vm_page_unlock_queues();
2020 vm_object_unlock(pmap_object
);
2026 pmap_t pmap
, int bytes
)
2028 pmap_ledger_credit(pmap
, task_ledgers
.tkm_private
, bytes
);
2036 pmap_ledger_debit(pmap
, task_ledgers
.tkm_private
, bytes
);
2040 pmap_tt_ledger_credit(
2044 if (pmap
!= kernel_pmap
) {
2045 pmap_ledger_credit(pmap
, task_ledgers
.phys_footprint
, size
);
2046 pmap_ledger_credit(pmap
, task_ledgers
.page_table
, size
);
2051 pmap_tt_ledger_debit(
2055 if (pmap
!= kernel_pmap
) {
2056 pmap_ledger_debit(pmap
, task_ledgers
.phys_footprint
, size
);
2057 pmap_ledger_debit(pmap
, task_ledgers
.page_table
, size
);
2062 alloc_asid(pmap_t pmap
)
2067 pmap_simple_lock(&asid_lock
);
2068 vasid
= bitmap_first(&asid_bitmap
[0], MAX_ASID
);
2070 pmap_simple_unlock(&asid_lock
);
2073 assert(vasid
< MAX_ASID
);
2074 bitmap_clear(&asid_bitmap
[0], (unsigned int)vasid
);
2075 pmap_simple_unlock(&asid_lock
);
2076 // bitmap_first() returns highest-order bits first, but a 0-based scheme works
2077 // slightly better with the collision detection scheme used by pmap_switch_internal().
2078 vasid
= MAX_ASID
- 1 - vasid
;
2079 hw_asid
= vasid
% MAX_HW_ASID
;
2080 pmap
->sw_asid
= vasid
/ MAX_HW_ASID
;
2081 hw_asid
+= 1; // Account for ASID 0, which is reserved for the kernel
2082 #if __ARM_KERNEL_PROTECT__
2083 hw_asid
<<= 1; // We're really handing out 2 hardware ASIDs, one for EL0 and one for EL1 access
2085 pmap
->hw_asid
= hw_asid
;
2090 free_asid(pmap_t pmap
)
2093 uint16_t hw_asid
= pmap
->hw_asid
;
2094 assert(hw_asid
!= 0); // Should not try to free kernel ASID
2096 #if __ARM_KERNEL_PROTECT__
2101 vasid
= ((unsigned int)pmap
->sw_asid
* MAX_HW_ASID
) + hw_asid
;
2102 vasid
= MAX_ASID
- 1 - vasid
;
2104 pmap_simple_lock(&asid_lock
);
2105 assert(!bitmap_test(&asid_bitmap
[0], vasid
));
2106 bitmap_set(&asid_bitmap
[0], vasid
);
2107 pmap_simple_unlock(&asid_lock
);
2111 #ifndef PMAP_PV_LOAD_FACTOR
2112 #define PMAP_PV_LOAD_FACTOR 1
2115 #define PV_LOW_WATER_MARK_DEFAULT (0x200 * PMAP_PV_LOAD_FACTOR)
2116 #define PV_KERN_LOW_WATER_MARK_DEFAULT (0x200 * PMAP_PV_LOAD_FACTOR)
2117 #define PV_ALLOC_CHUNK_INITIAL (0x200 * PMAP_PV_LOAD_FACTOR)
2118 #define PV_KERN_ALLOC_CHUNK_INITIAL (0x200 * PMAP_PV_LOAD_FACTOR)
2119 #define PV_ALLOC_INITIAL_TARGET (PV_ALLOC_CHUNK_INITIAL * 5)
2120 #define PV_KERN_ALLOC_INITIAL_TARGET (PV_KERN_ALLOC_CHUNK_INITIAL)
2122 uint32_t pv_free_count MARK_AS_PMAP_DATA
= 0;
2123 uint32_t pv_page_count MARK_AS_PMAP_DATA
= 0;
2124 uint32_t pv_kern_free_count MARK_AS_PMAP_DATA
= 0;
2126 uint32_t pv_low_water_mark MARK_AS_PMAP_DATA
;
2127 uint32_t pv_kern_low_water_mark MARK_AS_PMAP_DATA
;
2128 uint32_t pv_alloc_chunk MARK_AS_PMAP_DATA
;
2129 uint32_t pv_kern_alloc_chunk MARK_AS_PMAP_DATA
;
2131 thread_t mapping_replenish_thread
;
2132 event_t mapping_replenish_event
;
2133 event_t pmap_user_pv_throttle_event
;
2134 volatile uint32_t mappingrecurse
= 0;
2136 uint64_t pmap_pv_throttle_stat
;
2137 uint64_t pmap_pv_throttled_waiters
;
2139 unsigned pmap_mapping_thread_wakeups
;
2140 unsigned pmap_kernel_reserve_replenish_stat MARK_AS_PMAP_DATA
;
2141 unsigned pmap_user_reserve_replenish_stat MARK_AS_PMAP_DATA
;
2142 unsigned pmap_kern_reserve_alloc_stat MARK_AS_PMAP_DATA
;
2149 simple_lock_init(&pv_free_list_lock
, 0);
2150 simple_lock_init(&pv_kern_free_list_lock
, 0);
2151 pv_free_list
= PV_ENTRY_NULL
;
2152 pv_free_count
= 0x0U
;
2153 pv_kern_free_list
= PV_ENTRY_NULL
;
2154 pv_kern_free_count
= 0x0U
;
2157 static inline void PV_ALLOC(pv_entry_t
**pv_ep
);
2158 static inline void PV_KERN_ALLOC(pv_entry_t
**pv_e
);
2159 static inline void PV_FREE_LIST(pv_entry_t
*pv_eh
, pv_entry_t
*pv_et
, int pv_cnt
);
2160 static inline void PV_KERN_FREE_LIST(pv_entry_t
*pv_eh
, pv_entry_t
*pv_et
, int pv_cnt
);
2162 static inline void pmap_pv_throttle(pmap_t p
);
2171 PMAP_ASSERT_LOCKED(pmap
);
2173 ASSERT_PVH_LOCKED(pai
);
2175 if (PV_ENTRY_NULL
== *pvepp
) {
2176 if ((pmap
== NULL
) || (kernel_pmap
== pmap
)) {
2177 PV_KERN_ALLOC(pvepp
);
2179 if (PV_ENTRY_NULL
== *pvepp
) {
2193 ret
= pmap_pages_alloc(&pa
, PAGE_SIZE
, PMAP_PAGES_ALLOCATE_NOWAIT
);
2195 if (ret
== KERN_RESOURCE_SHORTAGE
) {
2196 ret
= pmap_pages_alloc(&pa
, PAGE_SIZE
, PMAP_PAGES_RECLAIM_NOWAIT
);
2199 if (ret
!= KERN_SUCCESS
) {
2200 panic("%s: failed to alloc page for kernel, ret=%d, "
2201 "pmap=%p, pai=%u, pvepp=%p",
2208 pv_e
= (pv_entry_t
*)phystokv(pa
);
2210 pv_eh
= pv_et
= PV_ENTRY_NULL
;
2214 for (j
= 1; j
< (PAGE_SIZE
/ sizeof(pv_entry_t
)); j
++) {
2215 pv_e
->pve_next
= pv_eh
;
2218 if (pv_et
== PV_ENTRY_NULL
) {
2224 PV_KERN_FREE_LIST(pv_eh
, pv_et
, pv_cnt
);
2234 pmap_pv_throttle(pmap
);
2244 ret
= pmap_pages_alloc(&pa
, PAGE_SIZE
, 0);
2246 if (ret
!= KERN_SUCCESS
) {
2247 panic("%s: failed to alloc page, ret=%d, "
2248 "pmap=%p, pai=%u, pvepp=%p",
2255 pv_e
= (pv_entry_t
*)phystokv(pa
);
2257 pv_eh
= pv_et
= PV_ENTRY_NULL
;
2261 for (j
= 1; j
< (PAGE_SIZE
/ sizeof(pv_entry_t
)); j
++) {
2262 pv_e
->pve_next
= pv_eh
;
2265 if (pv_et
== PV_ENTRY_NULL
) {
2271 PV_FREE_LIST(pv_eh
, pv_et
, pv_cnt
);
2278 assert(PV_ENTRY_NULL
!= *pvepp
);
2286 PV_FREE_LIST(pvep
, pvep
, 1);
2295 PV_FREE_LIST(pvehp
, pvetp
, cnt
);
2299 pv_water_mark_check(void)
2301 if (__improbable((pv_free_count
< pv_low_water_mark
) || (pv_kern_free_count
< pv_kern_low_water_mark
))) {
2302 if (!mappingrecurse
&& os_atomic_cmpxchg(&mappingrecurse
, 0, 1, acq_rel
)) {
2303 thread_wakeup(&mapping_replenish_event
);
2309 PV_ALLOC(pv_entry_t
**pv_ep
)
2311 assert(*pv_ep
== PV_ENTRY_NULL
);
2312 pmap_simple_lock(&pv_free_list_lock
);
2314 * If the kernel reserved pool is low, let non-kernel mappings allocate
2315 * synchronously, possibly subject to a throttle.
2317 if ((pv_kern_free_count
>= pv_kern_low_water_mark
) && ((*pv_ep
= pv_free_list
) != 0)) {
2318 pv_free_list
= (pv_entry_t
*)(*pv_ep
)->pve_next
;
2319 (*pv_ep
)->pve_next
= PV_ENTRY_NULL
;
2323 pmap_simple_unlock(&pv_free_list_lock
);
2327 PV_FREE_LIST(pv_entry_t
*pv_eh
, pv_entry_t
*pv_et
, int pv_cnt
)
2329 pmap_simple_lock(&pv_free_list_lock
);
2330 pv_et
->pve_next
= (pv_entry_t
*)pv_free_list
;
2331 pv_free_list
= pv_eh
;
2332 pv_free_count
+= pv_cnt
;
2333 pmap_simple_unlock(&pv_free_list_lock
);
2337 PV_KERN_ALLOC(pv_entry_t
**pv_e
)
2339 assert(*pv_e
== PV_ENTRY_NULL
);
2340 pmap_simple_lock(&pv_kern_free_list_lock
);
2342 if ((*pv_e
= pv_kern_free_list
) != 0) {
2343 pv_kern_free_list
= (pv_entry_t
*)(*pv_e
)->pve_next
;
2344 (*pv_e
)->pve_next
= PV_ENTRY_NULL
;
2345 pv_kern_free_count
--;
2346 pmap_kern_reserve_alloc_stat
++;
2349 pmap_simple_unlock(&pv_kern_free_list_lock
);
2353 PV_KERN_FREE_LIST(pv_entry_t
*pv_eh
, pv_entry_t
*pv_et
, int pv_cnt
)
2355 pmap_simple_lock(&pv_kern_free_list_lock
);
2356 pv_et
->pve_next
= pv_kern_free_list
;
2357 pv_kern_free_list
= pv_eh
;
2358 pv_kern_free_count
+= pv_cnt
;
2359 pmap_simple_unlock(&pv_kern_free_list_lock
);
2363 pmap_pv_throttle(__unused pmap_t p
)
2365 assert(p
!= kernel_pmap
);
2366 /* Apply throttle on non-kernel mappings */
2367 if (pv_kern_free_count
< (pv_kern_low_water_mark
/ 2)) {
2368 pmap_pv_throttle_stat
++;
2369 /* This doesn't need to be strictly accurate, merely a hint
2370 * to eliminate the timeout when the reserve is replenished.
2372 pmap_pv_throttled_waiters
++;
2373 assert_wait_timeout(&pmap_user_pv_throttle_event
, THREAD_UNINT
, 1, 1000 * NSEC_PER_USEC
);
2374 thread_block(THREAD_CONTINUE_NULL
);
2379 * Creates a target number of free pv_entry_t objects for the kernel free list
2380 * and the general free list.
2382 MARK_AS_PMAP_TEXT
static kern_return_t
2383 mapping_free_prime_internal(void)
2392 int alloc_options
= 0;
2393 int needed_pv_cnt
= 0;
2394 int target_pv_free_cnt
= 0;
2396 SECURITY_READ_ONLY_LATE(static boolean_t
) mapping_free_prime_internal_called
= FALSE
;
2397 SECURITY_READ_ONLY_LATE(static boolean_t
) mapping_free_prime_internal_done
= FALSE
;
2399 if (mapping_free_prime_internal_done
) {
2400 return KERN_FAILURE
;
2403 if (!mapping_free_prime_internal_called
) {
2404 mapping_free_prime_internal_called
= TRUE
;
2406 pv_low_water_mark
= PV_LOW_WATER_MARK_DEFAULT
;
2408 /* Alterable via sysctl */
2409 pv_kern_low_water_mark
= PV_KERN_LOW_WATER_MARK_DEFAULT
;
2411 pv_kern_alloc_chunk
= PV_KERN_ALLOC_CHUNK_INITIAL
;
2412 pv_alloc_chunk
= PV_ALLOC_CHUNK_INITIAL
;
2416 pv_eh
= pv_et
= PV_ENTRY_NULL
;
2417 target_pv_free_cnt
= PV_ALLOC_INITIAL_TARGET
;
2420 * We don't take the lock to read pv_free_count, as we should not be
2421 * invoking this from a multithreaded context.
2423 needed_pv_cnt
= target_pv_free_cnt
- pv_free_count
;
2425 if (needed_pv_cnt
> target_pv_free_cnt
) {
2429 while (pv_cnt
< needed_pv_cnt
) {
2430 ret
= pmap_pages_alloc(&pa
, PAGE_SIZE
, alloc_options
);
2432 assert(ret
== KERN_SUCCESS
);
2436 pv_e
= (pv_entry_t
*)phystokv(pa
);
2438 for (j
= 0; j
< (PAGE_SIZE
/ sizeof(pv_entry_t
)); j
++) {
2439 pv_e
->pve_next
= pv_eh
;
2442 if (pv_et
== PV_ENTRY_NULL
) {
2451 PV_FREE_LIST(pv_eh
, pv_et
, pv_cnt
);
2455 pv_eh
= pv_et
= PV_ENTRY_NULL
;
2456 target_pv_free_cnt
= PV_KERN_ALLOC_INITIAL_TARGET
;
2459 * We don't take the lock to read pv_kern_free_count, as we should not
2460 * be invoking this from a multithreaded context.
2462 needed_pv_cnt
= target_pv_free_cnt
- pv_kern_free_count
;
2464 if (needed_pv_cnt
> target_pv_free_cnt
) {
2468 while (pv_cnt
< needed_pv_cnt
) {
2469 ret
= pmap_pages_alloc(&pa
, PAGE_SIZE
, alloc_options
);
2471 assert(ret
== KERN_SUCCESS
);
2474 pv_e
= (pv_entry_t
*)phystokv(pa
);
2476 for (j
= 0; j
< (PAGE_SIZE
/ sizeof(pv_entry_t
)); j
++) {
2477 pv_e
->pve_next
= pv_eh
;
2480 if (pv_et
== PV_ENTRY_NULL
) {
2489 PV_KERN_FREE_LIST(pv_eh
, pv_et
, pv_cnt
);
2492 mapping_free_prime_internal_done
= TRUE
;
2493 return KERN_SUCCESS
;
2497 mapping_free_prime(void)
2499 kern_return_t kr
= KERN_FAILURE
;
2501 kr
= mapping_free_prime_internal();
2503 if (kr
!= KERN_SUCCESS
) {
2504 panic("%s: failed, kr=%d",
2509 void mapping_replenish(void);
2512 mapping_adjust(void)
2516 mres
= kernel_thread_start_priority((thread_continue_t
)mapping_replenish
, NULL
, MAXPRI_KERNEL
, &mapping_replenish_thread
);
2517 if (mres
!= KERN_SUCCESS
) {
2518 panic("%s: mapping_replenish thread creation failed",
2521 thread_deallocate(mapping_replenish_thread
);
2525 * Fills the kernel and general PV free lists back up to their low watermarks.
2527 MARK_AS_PMAP_TEXT
static kern_return_t
2528 mapping_replenish_internal(void)
2536 kern_return_t ret
= KERN_SUCCESS
;
2538 while (pv_kern_free_count
< pv_kern_low_water_mark
) {
2540 pv_eh
= pv_et
= PV_ENTRY_NULL
;
2542 ret
= pmap_pages_alloc(&pa
, PAGE_SIZE
, 0);
2543 assert(ret
== KERN_SUCCESS
);
2547 pv_e
= (pv_entry_t
*)phystokv(pa
);
2549 for (j
= 0; j
< (PAGE_SIZE
/ sizeof(pv_entry_t
)); j
++) {
2550 pv_e
->pve_next
= pv_eh
;
2553 if (pv_et
== PV_ENTRY_NULL
) {
2559 pmap_kernel_reserve_replenish_stat
+= pv_cnt
;
2560 PV_KERN_FREE_LIST(pv_eh
, pv_et
, pv_cnt
);
2563 while (pv_free_count
< pv_low_water_mark
) {
2565 pv_eh
= pv_et
= PV_ENTRY_NULL
;
2567 ret
= pmap_pages_alloc(&pa
, PAGE_SIZE
, 0);
2568 assert(ret
== KERN_SUCCESS
);
2572 pv_e
= (pv_entry_t
*)phystokv(pa
);
2574 for (j
= 0; j
< (PAGE_SIZE
/ sizeof(pv_entry_t
)); j
++) {
2575 pv_e
->pve_next
= pv_eh
;
2578 if (pv_et
== PV_ENTRY_NULL
) {
2584 pmap_user_reserve_replenish_stat
+= pv_cnt
;
2585 PV_FREE_LIST(pv_eh
, pv_et
, pv_cnt
);
2592 * Continuation function that keeps the PV free lists from running out of free
2595 __attribute__((noreturn
))
2597 mapping_replenish(void)
2601 /* We qualify for VM privileges...*/
2602 current_thread()->options
|= TH_OPT_VMPRIV
;
2605 kr
= mapping_replenish_internal();
2607 if (kr
!= KERN_SUCCESS
) {
2608 panic("%s: failed, kr=%d", __FUNCTION__
, kr
);
2612 * Wake threads throttled while the kernel reserve was being replenished.
2614 if (pmap_pv_throttled_waiters
) {
2615 pmap_pv_throttled_waiters
= 0;
2616 thread_wakeup(&pmap_user_pv_throttle_event
);
2619 /* Check if the kernel pool has been depleted since the
2620 * first pass, to reduce refill latency.
2622 if (pv_kern_free_count
< pv_kern_low_water_mark
) {
2625 /* Block sans continuation to avoid yielding kernel stack */
2626 assert_wait(&mapping_replenish_event
, THREAD_UNINT
);
2628 thread_block(THREAD_CONTINUE_NULL
);
2629 pmap_mapping_thread_wakeups
++;
2637 unsigned int ptd_cnt
)
2639 simple_lock_init(&ptd_free_list_lock
, 0);
2640 while (ptd_cnt
!= 0) {
2641 (*(void **)ptdp
) = (void *)ptd_free_list
;
2642 ptd_free_list
= ptdp
;
2647 ptd_preboot
= FALSE
;
2651 ptd_alloc_unlinked(bool reclaim
)
2657 pmap_simple_lock(&ptd_free_list_lock
);
2660 if (ptd_free_count
== 0) {
2661 unsigned int ptd_cnt
;
2662 pt_desc_t
*ptdp_next
;
2665 ptdp
= (pt_desc_t
*)avail_start
;
2666 avail_start
+= ARM_PGBYTES
;
2668 ptd_cnt
= ARM_PGBYTES
/ sizeof(pt_desc_t
);
2673 pmap_simple_unlock(&ptd_free_list_lock
);
2675 if (pmap_pages_alloc(&pa
, PAGE_SIZE
, PMAP_PAGES_ALLOCATE_NOWAIT
) != KERN_SUCCESS
) {
2677 ret
= pmap_pages_alloc(&pa
, PAGE_SIZE
, PMAP_PAGES_RECLAIM_NOWAIT
);
2678 assert(ret
== KERN_SUCCESS
);
2683 ptdp
= (pt_desc_t
*)phystokv(pa
);
2685 pmap_simple_lock(&ptd_free_list_lock
);
2687 ptd_cnt
= PAGE_SIZE
/ sizeof(pt_desc_t
);
2690 while (ptd_cnt
!= 0) {
2691 (*(void **)ptdp_next
) = (void *)ptd_free_list
;
2692 ptd_free_list
= ptdp_next
;
2699 if ((ptdp
= ptd_free_list
) != PTD_ENTRY_NULL
) {
2700 ptd_free_list
= (pt_desc_t
*)(*(void **)ptdp
);
2703 panic("%s: out of ptd entry",
2708 pmap_simple_unlock(&ptd_free_list_lock
);
2711 ptdp
->pt_page
.next
= NULL
;
2712 ptdp
->pt_page
.prev
= NULL
;
2715 for (i
= 0; i
< PT_INDEX_MAX
; i
++) {
2716 ptdp
->ptd_info
[i
].va
= (vm_offset_t
)-1;
2717 ptdp
->ptd_info
[i
].refcnt
= 0;
2718 ptdp
->ptd_info
[i
].wiredcnt
= 0;
2724 static inline pt_desc_t
*
2725 ptd_alloc(pmap_t pmap
, bool reclaim
)
2727 pt_desc_t
*ptdp
= ptd_alloc_unlinked(reclaim
);
2734 if (pmap
!= kernel_pmap
) {
2735 /* We should never try to reclaim kernel pagetable pages in
2736 * pmap_pages_reclaim(), so don't enter them into the list. */
2737 pmap_simple_lock(&pt_pages_lock
);
2738 queue_enter(&pt_page_list
, ptdp
, pt_desc_t
*, pt_page
);
2739 pmap_simple_unlock(&pt_pages_lock
);
2742 pmap_tt_ledger_credit(pmap
, sizeof(*ptdp
));
2747 ptd_deallocate(pt_desc_t
*ptdp
)
2749 pmap_t pmap
= ptdp
->pmap
;
2752 panic("%s: early boot, "
2758 if (ptdp
->pt_page
.next
!= NULL
) {
2759 pmap_simple_lock(&pt_pages_lock
);
2760 queue_remove(&pt_page_list
, ptdp
, pt_desc_t
*, pt_page
);
2761 pmap_simple_unlock(&pt_pages_lock
);
2763 pmap_simple_lock(&ptd_free_list_lock
);
2764 (*(void **)ptdp
) = (void *)ptd_free_list
;
2765 ptd_free_list
= (pt_desc_t
*)ptdp
;
2767 pmap_simple_unlock(&ptd_free_list_lock
);
2769 pmap_tt_ledger_debit(pmap
, sizeof(*ptdp
));
2777 vm_map_address_t va
,
2781 if (ptdp
->pmap
!= pmap
) {
2782 panic("%s: pmap mismatch, "
2783 "ptdp=%p, pmap=%p, va=%p, level=%u, pte_p=%p",
2785 ptdp
, pmap
, (void*)va
, level
, pte_p
);
2788 #if (__ARM_VMSA__ == 7)
2790 ptdp
->ptd_info
[ARM_PT_DESC_INDEX(pte_p
)].va
= (vm_offset_t
) va
& ~(ARM_TT_L1_PT_OFFMASK
);
2792 assert(level
> pt_attr_root_level(pmap_get_pt_attr(pmap
)));
2793 ptdp
->ptd_info
[ARM_PT_DESC_INDEX(pte_p
)].va
= (vm_offset_t
) va
& ~(pt_attr_ln_offmask(pmap_get_pt_attr(pmap
), level
- 1));
2795 if (level
< PMAP_TT_MAX_LEVEL
) {
2796 ptdp
->ptd_info
[ARM_PT_DESC_INDEX(pte_p
)].refcnt
= PT_DESC_REFCOUNT
;
2805 return pa_valid(addr
);
2808 #if (__ARM_VMSA__ == 7)
2811 * Given an offset and a map, compute the address of the
2812 * corresponding translation table entry.
2814 static inline tt_entry_t
*
2815 pmap_tte(pmap_t pmap
,
2816 vm_map_address_t addr
)
2818 __unused
const pt_attr_t
* const pt_attr
= pmap_get_pt_attr(pmap
);
2820 if (!(tte_index(pmap
, pt_attr
, addr
) < pmap
->tte_index_max
)) {
2821 return (tt_entry_t
*)NULL
;
2823 return &pmap
->tte
[tte_index(pmap
, pt_attr
, addr
)];
2828 * Given an offset and a map, compute the address of the
2829 * pte. If the address is invalid with respect to the map
2830 * then PT_ENTRY_NULL is returned (and the map may need to grow).
2832 * This is only used internally.
2834 static inline pt_entry_t
*
2837 vm_map_address_t addr
)
2843 ttp
= pmap_tte(pmap
, addr
);
2844 if (ttp
== (tt_entry_t
*)NULL
) {
2845 return PT_ENTRY_NULL
;
2849 if ((tte
& ARM_TTE_TYPE_MASK
) == ARM_TTE_TYPE_BLOCK
) {
2850 panic("%s: Attempt to demote L1 block, tte=0x%lx, "
2852 __FUNCTION__
, (unsigned long)tte
,
2856 if ((tte
& ARM_TTE_TYPE_MASK
) != ARM_TTE_TYPE_TABLE
) {
2857 return PT_ENTRY_NULL
;
2859 ptp
= (pt_entry_t
*) ttetokv(tte
) + ptenum(addr
);
2863 __unused
static inline tt_entry_t
*
2864 pmap_ttne(pmap_t pmap
,
2865 unsigned int target_level
,
2866 vm_map_address_t addr
)
2868 tt_entry_t
* ret_ttep
= NULL
;
2870 switch (target_level
) {
2872 ret_ttep
= pmap_tte(pmap
, addr
);
2875 ret_ttep
= (tt_entry_t
*)pmap_pte(pmap
, addr
);
2878 panic("%s: bad level, "
2879 "pmap=%p, target_level=%u, addr=%p",
2881 pmap
, target_level
, (void *)addr
);
2889 static inline tt_entry_t
*
2890 pmap_ttne(pmap_t pmap
,
2891 unsigned int target_level
,
2892 vm_map_address_t addr
)
2894 tt_entry_t
* ttp
= NULL
;
2895 tt_entry_t
* ttep
= NULL
;
2896 tt_entry_t tte
= ARM_TTE_EMPTY
;
2897 unsigned int cur_level
;
2899 const pt_attr_t
* const pt_attr
= pmap_get_pt_attr(pmap
);
2903 assert(target_level
<= pt_attr
->pta_max_level
);
2905 for (cur_level
= pt_attr
->pta_root_level
; cur_level
<= target_level
; cur_level
++) {
2906 ttep
= &ttp
[ttn_index(pmap
, pt_attr
, addr
, cur_level
)];
2908 if (cur_level
== target_level
) {
2915 if ((tte
& (ARM_TTE_TYPE_MASK
| ARM_TTE_VALID
)) == (ARM_TTE_TYPE_BLOCK
| ARM_TTE_VALID
)) {
2916 panic("%s: Attempt to demote L%u block, tte=0x%llx, "
2917 "pmap=%p, target_level=%u, addr=%p",
2918 __FUNCTION__
, cur_level
, tte
,
2919 pmap
, target_level
, (void*)addr
);
2922 if ((tte
& (ARM_TTE_TYPE_MASK
| ARM_TTE_VALID
)) != (ARM_TTE_TYPE_TABLE
| ARM_TTE_VALID
)) {
2923 return TT_ENTRY_NULL
;
2926 ttp
= (tt_entry_t
*)phystokv(tte
& ARM_TTE_TABLE_MASK
);
2933 * Given an offset and a map, compute the address of level 1 translation table entry.
2934 * If the tranlation is invalid then PT_ENTRY_NULL is returned.
2936 static inline tt_entry_t
*
2937 pmap_tt1e(pmap_t pmap
,
2938 vm_map_address_t addr
)
2940 return pmap_ttne(pmap
, PMAP_TT_L1_LEVEL
, addr
);
2944 * Given an offset and a map, compute the address of level 2 translation table entry.
2945 * If the tranlation is invalid then PT_ENTRY_NULL is returned.
2947 static inline tt_entry_t
*
2948 pmap_tt2e(pmap_t pmap
,
2949 vm_map_address_t addr
)
2951 return pmap_ttne(pmap
, PMAP_TT_L2_LEVEL
, addr
);
2956 * Given an offset and a map, compute the address of level 3 translation table entry.
2957 * If the tranlation is invalid then PT_ENTRY_NULL is returned.
2959 static inline pt_entry_t
*
2962 vm_map_address_t addr
)
2964 return (pt_entry_t
*)pmap_ttne(pmap
, PMAP_TT_L3_LEVEL
, addr
);
2967 static inline tt_entry_t
*
2970 vm_map_address_t addr
)
2972 return pmap_tt2e(pmap
, addr
);
2975 static inline pt_entry_t
*
2978 vm_map_address_t addr
)
2980 return pmap_tt3e(pmap
, addr
);
2991 * Map memory at initialization. The physical addresses being
2992 * mapped are not managed and are never unmapped.
2994 * For now, VM is already on, we only need to map the
2999 vm_map_address_t virt
,
3009 while (start
< end
) {
3010 kr
= pmap_enter(kernel_pmap
, virt
, (ppnum_t
)atop(start
),
3011 prot
, VM_PROT_NONE
, flags
, FALSE
);
3013 if (kr
!= KERN_SUCCESS
) {
3014 panic("%s: failed pmap_enter, "
3015 "virt=%p, start_addr=%p, end_addr=%p, prot=%#x, flags=%#x",
3017 (void *) virt
, (void *) start
, (void *) end
, prot
, flags
);
3027 pmap_map_bd_with_options(
3028 vm_map_address_t virt
,
3036 vm_map_address_t vaddr
;
3038 pt_entry_t mem_attr
;
3040 switch (options
& PMAP_MAP_BD_MASK
) {
3041 case PMAP_MAP_BD_WCOMB
:
3042 mem_attr
= ARM_PTE_ATTRINDX(CACHE_ATTRINDX_WRITECOMB
);
3043 #if (__ARM_VMSA__ > 7)
3044 mem_attr
|= ARM_PTE_SH(SH_OUTER_MEMORY
);
3046 mem_attr
|= ARM_PTE_SH
;
3049 case PMAP_MAP_BD_POSTED
:
3050 mem_attr
= ARM_PTE_ATTRINDX(CACHE_ATTRINDX_POSTED
);
3052 case PMAP_MAP_BD_POSTED_REORDERED
:
3053 mem_attr
= ARM_PTE_ATTRINDX(CACHE_ATTRINDX_POSTED_REORDERED
);
3055 case PMAP_MAP_BD_POSTED_COMBINED_REORDERED
:
3056 mem_attr
= ARM_PTE_ATTRINDX(CACHE_ATTRINDX_POSTED_COMBINED_REORDERED
);
3059 mem_attr
= ARM_PTE_ATTRINDX(CACHE_ATTRINDX_DISABLE
);
3063 tmplate
= pa_to_pte(start
) | ARM_PTE_AP((prot
& VM_PROT_WRITE
) ? AP_RWNA
: AP_RONA
) |
3064 mem_attr
| ARM_PTE_TYPE
| ARM_PTE_NX
| ARM_PTE_PNX
| ARM_PTE_AF
;
3065 #if __ARM_KERNEL_PROTECT__
3066 tmplate
|= ARM_PTE_NG
;
3067 #endif /* __ARM_KERNEL_PROTECT__ */
3071 while (paddr
< end
) {
3072 ptep
= pmap_pte(kernel_pmap
, vaddr
);
3073 if (ptep
== PT_ENTRY_NULL
) {
3074 panic("%s: no PTE for vaddr=%p, "
3075 "virt=%p, start=%p, end=%p, prot=0x%x, options=0x%x",
3076 __FUNCTION__
, (void*)vaddr
,
3077 (void*)virt
, (void*)start
, (void*)end
, prot
, options
);
3080 assert(!ARM_PTE_IS_COMPRESSED(*ptep
, ptep
));
3081 WRITE_PTE_STRONG(ptep
, tmplate
);
3083 pte_increment_pa(tmplate
);
3089 flush_mmu_tlb_region(virt
, (unsigned)(end
- start
));
3096 * Back-door routine for mapping kernel VM at initialization.
3097 * Useful for mapping memory outside the range
3098 * [vm_first_phys, vm_last_phys] (i.e., devices).
3099 * Otherwise like pmap_map.
3103 vm_map_address_t virt
,
3110 vm_map_address_t vaddr
;
3113 /* not cacheable and not buffered */
3114 tmplate
= pa_to_pte(start
)
3115 | ARM_PTE_TYPE
| ARM_PTE_AF
| ARM_PTE_NX
| ARM_PTE_PNX
3116 | ARM_PTE_AP((prot
& VM_PROT_WRITE
) ? AP_RWNA
: AP_RONA
)
3117 | ARM_PTE_ATTRINDX(CACHE_ATTRINDX_DISABLE
);
3118 #if __ARM_KERNEL_PROTECT__
3119 tmplate
|= ARM_PTE_NG
;
3120 #endif /* __ARM_KERNEL_PROTECT__ */
3124 while (paddr
< end
) {
3125 ptep
= pmap_pte(kernel_pmap
, vaddr
);
3126 if (ptep
== PT_ENTRY_NULL
) {
3127 panic("pmap_map_bd");
3129 assert(!ARM_PTE_IS_COMPRESSED(*ptep
, ptep
));
3130 WRITE_PTE_STRONG(ptep
, tmplate
);
3132 pte_increment_pa(tmplate
);
3138 flush_mmu_tlb_region(virt
, (unsigned)(end
- start
));
3145 * Back-door routine for mapping kernel VM at initialization.
3146 * Useful for mapping memory specific physical addresses in early
3147 * boot (i.e., before kernel_map is initialized).
3149 * Maps are in the VM_HIGH_KERNEL_WINDOW area.
3153 pmap_map_high_window_bd(
3154 vm_offset_t pa_start
,
3158 pt_entry_t
*ptep
, pte
;
3159 #if (__ARM_VMSA__ == 7)
3160 vm_map_address_t va_start
= VM_HIGH_KERNEL_WINDOW
;
3161 vm_map_address_t va_max
= VM_MAX_KERNEL_ADDRESS
;
3163 vm_map_address_t va_start
= VREGION1_START
;
3164 vm_map_address_t va_max
= VREGION1_START
+ VREGION1_SIZE
;
3166 vm_map_address_t va_end
;
3167 vm_map_address_t va
;
3170 offset
= pa_start
& PAGE_MASK
;
3174 if (len
> (va_max
- va_start
)) {
3175 panic("%s: area too large, "
3176 "pa_start=%p, len=%p, prot=0x%x",
3178 (void*)pa_start
, (void*)len
, prot
);
3182 for (; va_start
< va_max
; va_start
+= PAGE_SIZE
) {
3183 ptep
= pmap_pte(kernel_pmap
, va_start
);
3184 assert(!ARM_PTE_IS_COMPRESSED(*ptep
, ptep
));
3185 if (*ptep
== ARM_PTE_TYPE_FAULT
) {
3189 if (va_start
> va_max
) {
3190 panic("%s: insufficient pages, "
3191 "pa_start=%p, len=%p, prot=0x%x",
3193 (void*)pa_start
, (void*)len
, prot
);
3196 for (va_end
= va_start
+ PAGE_SIZE
; va_end
< va_start
+ len
; va_end
+= PAGE_SIZE
) {
3197 ptep
= pmap_pte(kernel_pmap
, va_end
);
3198 assert(!ARM_PTE_IS_COMPRESSED(*ptep
, ptep
));
3199 if (*ptep
!= ARM_PTE_TYPE_FAULT
) {
3200 va_start
= va_end
+ PAGE_SIZE
;
3205 for (va
= va_start
; va
< va_end
; va
+= PAGE_SIZE
, pa_start
+= PAGE_SIZE
) {
3206 ptep
= pmap_pte(kernel_pmap
, va
);
3207 pte
= pa_to_pte(pa_start
)
3208 | ARM_PTE_TYPE
| ARM_PTE_AF
| ARM_PTE_NX
| ARM_PTE_PNX
3209 | ARM_PTE_AP((prot
& VM_PROT_WRITE
) ? AP_RWNA
: AP_RONA
)
3210 | ARM_PTE_ATTRINDX(CACHE_ATTRINDX_DEFAULT
);
3211 #if (__ARM_VMSA__ > 7)
3212 pte
|= ARM_PTE_SH(SH_OUTER_MEMORY
);
3216 #if __ARM_KERNEL_PROTECT__
3218 #endif /* __ARM_KERNEL_PROTECT__ */
3219 WRITE_PTE_STRONG(ptep
, pte
);
3221 PMAP_UPDATE_TLBS(kernel_pmap
, va_start
, va_start
+ len
, false);
3223 kasan_notify_address(va_start
, len
);
3228 #define PMAP_ALIGN(addr, align) ((addr) + ((align) - 1) & ~((align) - 1))
3231 pmap_compute_io_rgns(void)
3234 pmap_io_range_t
*ranges
;
3238 unsigned int prop_size
;
3240 err
= DTLookupEntry(NULL
, "/defaults", &entry
);
3241 assert(err
== kSuccess
);
3243 if (kSuccess
!= DTGetProperty(entry
, "pmap-io-ranges", &prop
, &prop_size
)) {
3248 for (unsigned int i
= 0; i
< (prop_size
/ sizeof(*ranges
)); ++i
) {
3249 if (ranges
[i
].addr
& PAGE_MASK
) {
3250 panic("pmap I/O region %u addr 0x%llx is not page-aligned", i
, ranges
[i
].addr
);
3252 if (ranges
[i
].len
& PAGE_MASK
) {
3253 panic("pmap I/O region %u length 0x%llx is not page-aligned", i
, ranges
[i
].len
);
3255 if (os_add_overflow(ranges
[i
].addr
, ranges
[i
].len
, &rgn_end
)) {
3256 panic("pmap I/O region %u addr 0x%llx length 0x%llx wraps around", i
, ranges
[i
].addr
, ranges
[i
].len
);
3258 if (((ranges
[i
].addr
<= gPhysBase
) && (rgn_end
> gPhysBase
)) ||
3259 ((ranges
[i
].addr
< avail_end
) && (rgn_end
>= avail_end
)) ||
3260 ((ranges
[i
].addr
> gPhysBase
) && (rgn_end
< avail_end
))) {
3261 panic("pmap I/O region %u addr 0x%llx length 0x%llx overlaps physical memory", i
, ranges
[i
].addr
, ranges
[i
].len
);
3267 return num_io_rgns
* sizeof(*ranges
);
3271 * return < 0 for a < b
3275 typedef int (*cmpfunc_t
)(const void *a
, const void *b
);
3278 qsort(void *a
, size_t n
, size_t es
, cmpfunc_t cmp
);
3281 cmp_io_rgns(const void *a
, const void *b
)
3283 const pmap_io_range_t
*range_a
= a
;
3284 const pmap_io_range_t
*range_b
= b
;
3285 if ((range_b
->addr
+ range_b
->len
) <= range_a
->addr
) {
3287 } else if ((range_a
->addr
+ range_a
->len
) <= range_b
->addr
) {
3295 pmap_load_io_rgns(void)
3298 pmap_io_range_t
*ranges
;
3301 unsigned int prop_size
;
3303 if (num_io_rgns
== 0) {
3307 err
= DTLookupEntry(NULL
, "/defaults", &entry
);
3308 assert(err
== kSuccess
);
3310 err
= DTGetProperty(entry
, "pmap-io-ranges", &prop
, &prop_size
);
3311 assert(err
== kSuccess
);
3314 for (unsigned int i
= 0; i
< (prop_size
/ sizeof(*ranges
)); ++i
) {
3315 io_attr_table
[i
] = ranges
[i
];
3318 qsort(io_attr_table
, num_io_rgns
, sizeof(*ranges
), cmp_io_rgns
);
3323 * pmap_get_arm64_prot
3325 * return effective armv8 VMSA block protections including
3326 * table AP/PXN/XN overrides of a pmap entry
3331 pmap_get_arm64_prot(
3336 unsigned int level
= 0;
3337 uint64_t tte_type
= 0;
3338 uint64_t effective_prot_bits
= 0;
3339 uint64_t aggregate_tte
= 0;
3340 uint64_t table_ap_bits
= 0, table_xn
= 0, table_pxn
= 0;
3341 const pt_attr_t
* const pt_attr
= pmap_get_pt_attr(pmap
);
3343 for (level
= pt_attr
->pta_root_level
; level
<= pt_attr
->pta_max_level
; level
++) {
3344 tte
= *pmap_ttne(pmap
, level
, addr
);
3346 if (!(tte
& ARM_TTE_VALID
)) {
3350 tte_type
= tte
& ARM_TTE_TYPE_MASK
;
3352 if ((tte_type
== ARM_TTE_TYPE_BLOCK
) ||
3353 (level
== pt_attr
->pta_max_level
)) {
3354 /* Block or page mapping; both have the same protection bit layout. */
3356 } else if (tte_type
== ARM_TTE_TYPE_TABLE
) {
3357 /* All of the table bits we care about are overrides, so just OR them together. */
3358 aggregate_tte
|= tte
;
3362 table_ap_bits
= ((aggregate_tte
>> ARM_TTE_TABLE_APSHIFT
) & AP_MASK
);
3363 table_xn
= (aggregate_tte
& ARM_TTE_TABLE_XN
);
3364 table_pxn
= (aggregate_tte
& ARM_TTE_TABLE_PXN
);
3366 /* Start with the PTE bits. */
3367 effective_prot_bits
= tte
& (ARM_PTE_APMASK
| ARM_PTE_NX
| ARM_PTE_PNX
);
3369 /* Table AP bits mask out block/page AP bits */
3370 effective_prot_bits
&= ~(ARM_PTE_AP(table_ap_bits
));
3372 /* XN/PXN bits can be OR'd in. */
3373 effective_prot_bits
|= (table_xn
? ARM_PTE_NX
: 0);
3374 effective_prot_bits
|= (table_pxn
? ARM_PTE_PNX
: 0);
3376 return effective_prot_bits
;
3378 #endif /* __arm64__ */
3382 * Bootstrap the system enough to run with virtual memory.
3384 * The early VM initialization code has already allocated
3385 * the first CPU's translation table and made entries for
3386 * all the one-to-one mappings to be found there.
3388 * We must set up the kernel pmap structures, the
3389 * physical-to-virtual translation lookup tables for the
3390 * physical memory to be managed (between avail_start and
3393 * Map the kernel's code and data, and allocate the system page table.
3394 * Page_size must already be set.
3397 * first_avail first available physical page -
3398 * after kernel page tables
3399 * avail_start PA of first managed physical page
3400 * avail_end PA of last managed physical page
3407 pmap_paddr_t pmap_struct_start
;
3408 vm_size_t pv_head_size
;
3409 vm_size_t ptd_root_table_size
;
3410 vm_size_t pp_attr_table_size
;
3411 vm_size_t io_attr_table_size
;
3412 unsigned int npages
;
3413 vm_map_offset_t maxoffset
;
3415 lck_grp_init(&pmap_lck_grp
, "pmap", LCK_GRP_ATTR_NULL
);
3418 #if DEVELOPMENT || DEBUG
3419 if (PE_parse_boot_argn("pmap_trace", &pmap_trace_mask
, sizeof(pmap_trace_mask
))) {
3420 kprintf("Kernel traces for pmap operations enabled\n");
3425 * Initialize the kernel pmap.
3428 #if ARM_PARAMETERIZED_PMAP
3429 kernel_pmap
->pmap_pt_attr
= native_pt_attr
;
3430 #endif /* ARM_PARAMETERIZED_PMAP */
3432 kernel_pmap
->disable_jop
= 0;
3433 #endif /* HAS_APPLE_PAC */
3434 kernel_pmap
->tte
= cpu_tte
;
3435 kernel_pmap
->ttep
= cpu_ttep
;
3436 #if (__ARM_VMSA__ > 7)
3437 kernel_pmap
->min
= ARM64_TTBR1_MIN_ADDR
;
3439 kernel_pmap
->min
= VM_MIN_KERNEL_AND_KEXT_ADDRESS
;
3441 kernel_pmap
->max
= VM_MAX_KERNEL_ADDRESS
;
3442 os_atomic_init(&kernel_pmap
->ref_count
, 1);
3443 kernel_pmap
->gc_status
= 0;
3444 kernel_pmap
->nx_enabled
= TRUE
;
3446 kernel_pmap
->is_64bit
= TRUE
;
3448 kernel_pmap
->is_64bit
= FALSE
;
3450 kernel_pmap
->stamp
= os_atomic_inc(&pmap_stamp
, relaxed
);
3452 kernel_pmap
->nested_region_grand_addr
= 0x0ULL
;
3453 kernel_pmap
->nested_region_subord_addr
= 0x0ULL
;
3454 kernel_pmap
->nested_region_size
= 0x0ULL
;
3455 kernel_pmap
->nested_region_asid_bitmap
= NULL
;
3456 kernel_pmap
->nested_region_asid_bitmap_size
= 0x0UL
;
3458 #if (__ARM_VMSA__ == 7)
3459 kernel_pmap
->tte_index_max
= 4 * NTTES
;
3461 kernel_pmap
->prev_tte
= (tt_entry_t
*) NULL
;
3462 kernel_pmap
->hw_asid
= 0;
3463 kernel_pmap
->sw_asid
= 0;
3465 PMAP_LOCK_INIT(kernel_pmap
);
3466 #if (__ARM_VMSA__ == 7)
3467 simple_lock_init(&kernel_pmap
->tt1_lock
, 0);
3468 kernel_pmap
->cpu_ref
= 0;
3470 memset((void *) &kernel_pmap
->stats
, 0, sizeof(kernel_pmap
->stats
));
3472 /* allocate space for and initialize the bookkeeping structures */
3473 io_attr_table_size
= pmap_compute_io_rgns();
3474 npages
= (unsigned int)atop(mem_size
);
3475 pp_attr_table_size
= npages
* sizeof(pp_attr_t
);
3476 pv_head_size
= round_page(sizeof(pv_entry_t
*) * npages
);
3477 // allocate enough initial PTDs to map twice the available physical memory
3478 ptd_root_table_size
= sizeof(pt_desc_t
) * (mem_size
/ ((PAGE_SIZE
/ sizeof(pt_entry_t
)) * ARM_PGBYTES
)) * 2;
3480 pmap_struct_start
= avail_start
;
3482 pp_attr_table
= (pp_attr_t
*) phystokv(avail_start
);
3483 avail_start
= PMAP_ALIGN(avail_start
+ pp_attr_table_size
, __alignof(pp_attr_t
));
3484 io_attr_table
= (pmap_io_range_t
*) phystokv(avail_start
);
3485 avail_start
= PMAP_ALIGN(avail_start
+ io_attr_table_size
, __alignof(pv_entry_t
*));
3486 pv_head_table
= (pv_entry_t
**) phystokv(avail_start
);
3487 avail_start
= PMAP_ALIGN(avail_start
+ pv_head_size
, __alignof(pt_desc_t
));
3488 ptd_root_table
= (pt_desc_t
*)phystokv(avail_start
);
3489 avail_start
= round_page(avail_start
+ ptd_root_table_size
);
3491 memset((char *)phystokv(pmap_struct_start
), 0, avail_start
- pmap_struct_start
);
3493 pmap_load_io_rgns();
3494 ptd_bootstrap(ptd_root_table
, (unsigned int)(ptd_root_table_size
/ sizeof(pt_desc_t
)));
3496 pmap_cpu_data_array_init();
3498 vm_first_phys
= gPhysBase
;
3499 vm_last_phys
= trunc_page(avail_end
);
3501 simple_lock_init(&pmaps_lock
, 0);
3502 simple_lock_init(&asid_lock
, 0);
3503 simple_lock_init(&tt1_lock
, 0);
3504 queue_init(&map_pmap_list
);
3505 queue_enter(&map_pmap_list
, kernel_pmap
, pmap_t
, pmaps
);
3506 free_page_size_tt_list
= TT_FREE_ENTRY_NULL
;
3507 free_page_size_tt_count
= 0;
3508 free_page_size_tt_max
= 0;
3509 free_two_page_size_tt_list
= TT_FREE_ENTRY_NULL
;
3510 free_two_page_size_tt_count
= 0;
3511 free_two_page_size_tt_max
= 0;
3512 free_tt_list
= TT_FREE_ENTRY_NULL
;
3516 simple_lock_init(&pt_pages_lock
, 0);
3517 queue_init(&pt_page_list
);
3519 simple_lock_init(&pmap_pages_lock
, 0);
3520 pmap_pages_request_count
= 0;
3521 pmap_pages_request_acum
= 0;
3522 pmap_pages_reclaim_list
= PAGE_FREE_ENTRY_NULL
;
3524 virtual_space_start
= vstart
;
3525 virtual_space_end
= VM_MAX_KERNEL_ADDRESS
;
3527 bitmap_full(&asid_bitmap
[0], MAX_ASID
);
3531 if (PE_parse_boot_argn("arm_maxoffset", &maxoffset
, sizeof(maxoffset
))) {
3532 maxoffset
= trunc_page(maxoffset
);
3533 if ((maxoffset
>= pmap_max_offset(FALSE
, ARM_PMAP_MAX_OFFSET_MIN
))
3534 && (maxoffset
<= pmap_max_offset(FALSE
, ARM_PMAP_MAX_OFFSET_MAX
))) {
3535 arm_pmap_max_offset_default
= maxoffset
;
3538 #if defined(__arm64__)
3539 if (PE_parse_boot_argn("arm64_maxoffset", &maxoffset
, sizeof(maxoffset
))) {
3540 maxoffset
= trunc_page(maxoffset
);
3541 if ((maxoffset
>= pmap_max_offset(TRUE
, ARM_PMAP_MAX_OFFSET_MIN
))
3542 && (maxoffset
<= pmap_max_offset(TRUE
, ARM_PMAP_MAX_OFFSET_MAX
))) {
3543 arm64_pmap_max_offset_default
= maxoffset
;
3548 #if DEVELOPMENT || DEBUG
3549 PE_parse_boot_argn("panic_on_unsigned_execute", &panic_on_unsigned_execute
, sizeof(panic_on_unsigned_execute
));
3550 #endif /* DEVELOPMENT || DEBUG */
3552 pmap_nesting_size_min
= ARM_NESTING_SIZE_MIN
;
3553 pmap_nesting_size_max
= ARM_NESTING_SIZE_MAX
;
3555 simple_lock_init(&phys_backup_lock
, 0);
3559 PE_parse_boot_argn("pmap_stats_assert",
3561 sizeof(pmap_stats_assert
));
3562 PE_parse_boot_argn("vm_footprint_suspend_allowed",
3563 &vm_footprint_suspend_allowed
,
3564 sizeof(vm_footprint_suspend_allowed
));
3565 #endif /* MACH_ASSERT */
3568 /* Shadow the CPU copy windows, as they fall outside of the physical aperture */
3569 kasan_map_shadow(CPUWINDOWS_BASE
, CPUWINDOWS_TOP
- CPUWINDOWS_BASE
, true);
3576 vm_offset_t
*startp
,
3580 *startp
= virtual_space_start
;
3581 *endp
= virtual_space_end
;
3586 pmap_virtual_region(
3587 unsigned int region_select
,
3588 vm_map_offset_t
*startp
,
3592 boolean_t ret
= FALSE
;
3593 #if __ARM64_PMAP_SUBPAGE_L1__ && __ARM_16K_PG__
3594 if (region_select
== 0) {
3596 * In this config, the bootstrap mappings should occupy their own L2
3597 * TTs, as they should be immutable after boot. Having the associated
3598 * TTEs and PTEs in their own pages allows us to lock down those pages,
3599 * while allowing the rest of the kernel address range to be remapped.
3601 #if (__ARM_VMSA__ > 7)
3602 *startp
= LOW_GLOBAL_BASE_ADDRESS
& ~ARM_TT_L2_OFFMASK
;
3604 #error Unsupported configuration
3606 *size
= ((VM_MAX_KERNEL_ADDRESS
- *startp
) & ~PAGE_MASK
);
3610 #if (__ARM_VMSA__ > 7)
3611 unsigned long low_global_vr_mask
= 0;
3612 vm_map_size_t low_global_vr_size
= 0;
3615 if (region_select
== 0) {
3616 #if (__ARM_VMSA__ == 7)
3617 *startp
= gVirtBase
& 0xFFC00000;
3618 *size
= ((virtual_space_start
- (gVirtBase
& 0xFFC00000)) + ~0xFFC00000) & 0xFFC00000;
3620 /* Round to avoid overlapping with the V=P area; round to at least the L2 block size. */
3621 if (!TEST_PAGE_SIZE_4K
) {
3622 *startp
= gVirtBase
& 0xFFFFFFFFFE000000;
3623 *size
= ((virtual_space_start
- (gVirtBase
& 0xFFFFFFFFFE000000)) + ~0xFFFFFFFFFE000000) & 0xFFFFFFFFFE000000;
3625 *startp
= gVirtBase
& 0xFFFFFFFFFF800000;
3626 *size
= ((virtual_space_start
- (gVirtBase
& 0xFFFFFFFFFF800000)) + ~0xFFFFFFFFFF800000) & 0xFFFFFFFFFF800000;
3631 if (region_select
== 1) {
3632 *startp
= VREGION1_START
;
3633 *size
= VREGION1_SIZE
;
3636 #if (__ARM_VMSA__ > 7)
3637 /* We need to reserve a range that is at least the size of an L2 block mapping for the low globals */
3638 if (!TEST_PAGE_SIZE_4K
) {
3639 low_global_vr_mask
= 0xFFFFFFFFFE000000;
3640 low_global_vr_size
= 0x2000000;
3642 low_global_vr_mask
= 0xFFFFFFFFFF800000;
3643 low_global_vr_size
= 0x800000;
3646 if (((gVirtBase
& low_global_vr_mask
) != LOW_GLOBAL_BASE_ADDRESS
) && (region_select
== 2)) {
3647 *startp
= LOW_GLOBAL_BASE_ADDRESS
;
3648 *size
= low_global_vr_size
;
3652 if (region_select
== 3) {
3653 /* In this config, we allow the bootstrap mappings to occupy the same
3654 * page table pages as the heap.
3656 *startp
= VM_MIN_KERNEL_ADDRESS
;
3657 *size
= LOW_GLOBAL_BASE_ADDRESS
- *startp
;
3669 return (unsigned int)atop(avail_end
- first_avail
);
3676 __unused boolean_t might_free
)
3678 return pmap_next_page(pnum
);
3686 if (first_avail
!= avail_end
) {
3687 *pnum
= (ppnum_t
)atop(first_avail
);
3688 first_avail
+= PAGE_SIZE
;
3696 * Initialize the pmap module.
3697 * Called by vm_init, to initialize any structures that the pmap
3698 * system needs to map virtual memory.
3705 * Protect page zero in the kernel map.
3706 * (can be overruled by permanent transltion
3707 * table entries at page zero - see arm_vm_init).
3709 vm_protect(kernel_map
, 0, PAGE_SIZE
, TRUE
, VM_PROT_NONE
);
3711 pmap_initialized
= TRUE
;
3717 * Initialize the pmap object (for tracking the vm_page_t
3718 * structures for pages we allocate to be page tables in
3721 _vm_object_allocate(mem_size
, pmap_object
);
3722 pmap_object
->copy_strategy
= MEMORY_OBJECT_COPY_NONE
;
3727 * The values of [hard_]maxproc may have been scaled, make sure
3728 * they are still less than the value of MAX_ASID.
3730 if (maxproc
> MAX_ASID
) {
3733 if (hard_maxproc
> MAX_ASID
) {
3734 hard_maxproc
= MAX_ASID
;
3738 pmap_pgtrace_init();
3748 pmap_paddr_t phys
= ptoa(ppnum
);
3750 assert(phys
!= vm_page_fictitious_addr
);
3752 if (!pa_valid(phys
)) {
3756 pai
= (int)pa_index(phys
);
3757 pv_h
= pai_to_pvh(pai
);
3759 return pvh_test_type(pv_h
, PVH_TYPE_NULL
);
3764 pmap_assert_free(ppnum_t ppnum
)
3766 assertf(pmap_verify_free(ppnum
), "page = 0x%x", ppnum
);
3773 * Initialize zones used by pmap.
3780 * Create the zone of physical maps
3781 * and the physical-to-virtual entries.
3783 pmap_zone
= zinit((vm_size_t
) sizeof(struct pmap
), (vm_size_t
) sizeof(struct pmap
) * 256,
3789 pmap_ledger_alloc_init(size_t size
)
3791 panic("%s: unsupported, "
3798 pmap_ledger_alloc(void)
3800 panic("%s: unsupported",
3806 pmap_ledger_free(ledger_t ledger
)
3808 panic("%s: unsupported, "
3814 * Create and return a physical map.
3816 * If the size specified for the map
3817 * is zero, the map is an actual physical
3818 * map, and may be referenced by the
3821 * If the size specified is non-zero,
3822 * the map will be used in software only, and
3823 * is bounded by that size.
3825 MARK_AS_PMAP_TEXT
static pmap_t
3826 pmap_create_options_internal(
3832 unsigned tte_index_max
;
3834 bool is_64bit
= flags
& PMAP_CREATE_64BIT
;
3835 #if defined(HAS_APPLE_PAC)
3836 bool disable_jop
= flags
& PMAP_CREATE_DISABLE_JOP
;
3837 #endif /* defined(HAS_APPLE_PAC) */
3840 * A software use-only map doesn't even need a pmap.
3847 * Allocate a pmap struct from the pmap_zone. Then allocate
3848 * the translation table of the right size for the pmap.
3850 if ((p
= (pmap_t
) zalloc(pmap_zone
)) == PMAP_NULL
) {
3854 if (flags
& PMAP_CREATE_64BIT
) {
3855 p
->min
= MACH_VM_MIN_ADDRESS
;
3856 p
->max
= MACH_VM_MAX_ADDRESS
;
3858 p
->min
= VM_MIN_ADDRESS
;
3859 p
->max
= VM_MAX_ADDRESS
;
3862 #if defined(HAS_APPLE_PAC)
3863 p
->disable_jop
= disable_jop
;
3864 #endif /* defined(HAS_APPLE_PAC) */
3866 p
->nested_region_true_start
= 0;
3867 p
->nested_region_true_end
= ~0;
3869 os_atomic_init(&p
->ref_count
, 1);
3871 p
->stamp
= os_atomic_inc(&pmap_stamp
, relaxed
);
3872 p
->nx_enabled
= TRUE
;
3873 p
->is_64bit
= is_64bit
;
3875 p
->nested_pmap
= PMAP_NULL
;
3877 #if ARM_PARAMETERIZED_PMAP
3878 p
->pmap_pt_attr
= native_pt_attr
;
3879 #endif /* ARM_PARAMETERIZED_PMAP */
3881 if (!pmap_get_pt_ops(p
)->alloc_id(p
)) {
3890 #if (__ARM_VMSA__ == 7)
3891 simple_lock_init(&p
->tt1_lock
, 0);
3894 memset((void *) &p
->stats
, 0, sizeof(p
->stats
));
3896 p
->tt_entry_free
= (tt_entry_t
*)0;
3898 p
->tte
= pmap_tt1_allocate(p
, PMAP_ROOT_ALLOC_SIZE
, 0);
3900 goto tt1_alloc_fail
;
3903 p
->ttep
= ml_static_vtop((vm_offset_t
)p
->tte
);
3904 PMAP_TRACE(3, PMAP_CODE(PMAP__TTE
), VM_KERNEL_ADDRHIDE(p
), VM_KERNEL_ADDRHIDE(p
->min
), VM_KERNEL_ADDRHIDE(p
->max
), p
->ttep
);
3906 #if (__ARM_VMSA__ == 7)
3907 tte_index_max
= p
->tte_index_max
= NTTES
;
3909 tte_index_max
= (PMAP_ROOT_ALLOC_SIZE
/ sizeof(tt_entry_t
));
3911 p
->prev_tte
= (tt_entry_t
*) NULL
;
3913 /* nullify the translation table */
3914 for (i
= 0; i
< tte_index_max
; i
++) {
3915 p
->tte
[i
] = ARM_TTE_TYPE_FAULT
;
3918 FLUSH_PTE_RANGE(p
->tte
, p
->tte
+ tte_index_max
);
3921 * initialize the rest of the structure
3923 p
->nested_region_grand_addr
= 0x0ULL
;
3924 p
->nested_region_subord_addr
= 0x0ULL
;
3925 p
->nested_region_size
= 0x0ULL
;
3926 p
->nested_region_asid_bitmap
= NULL
;
3927 p
->nested_region_asid_bitmap_size
= 0x0UL
;
3929 p
->nested_has_no_bounds_ref
= false;
3930 p
->nested_no_bounds_refcnt
= 0;
3931 p
->nested_bounds_set
= false;
3935 p
->pmap_stats_assert
= TRUE
;
3937 strlcpy(p
->pmap_procname
, "<nil>", sizeof(p
->pmap_procname
));
3938 #endif /* MACH_ASSERT */
3939 #if DEVELOPMENT || DEBUG
3940 p
->footprint_was_suspended
= FALSE
;
3941 #endif /* DEVELOPMENT || DEBUG */
3943 pmap_simple_lock(&pmaps_lock
);
3944 queue_enter(&map_pmap_list
, p
, pmap_t
, pmaps
);
3945 pmap_simple_unlock(&pmaps_lock
);
3950 pmap_get_pt_ops(p
)->free_id(p
);
3952 zfree(pmap_zone
, p
);
3957 pmap_create_options(
3964 PMAP_TRACE(1, PMAP_CODE(PMAP__CREATE
) | DBG_FUNC_START
, size
, flags
);
3966 ledger_reference(ledger
);
3968 pmap
= pmap_create_options_internal(ledger
, size
, flags
);
3970 if (pmap
== PMAP_NULL
) {
3971 ledger_dereference(ledger
);
3974 PMAP_TRACE(1, PMAP_CODE(PMAP__CREATE
) | DBG_FUNC_END
, VM_KERNEL_ADDRHIDE(pmap
), PMAP_VASID(pmap
), pmap
->hw_asid
);
3980 MARK_AS_PMAP_TEXT
static void
3981 pmap_set_process_internal(
3982 __unused pmap_t pmap
,
3984 __unused
char *procname
)
3991 VALIDATE_PMAP(pmap
);
3993 pmap
->pmap_pid
= pid
;
3994 strlcpy(pmap
->pmap_procname
, procname
, sizeof(pmap
->pmap_procname
));
3995 if (pmap_ledgers_panic_leeway
) {
3998 * Some processes somehow trigger some issues that make
3999 * the pmap stats and ledgers go off track, causing
4000 * some assertion failures and ledger panics.
4001 * Turn off the sanity checks if we allow some ledger leeway
4002 * because of that. We'll still do a final check in
4003 * pmap_check_ledgers() for discrepancies larger than the
4004 * allowed leeway after the address space has been fully
4007 pmap
->pmap_stats_assert
= FALSE
;
4008 ledger_disable_panic_on_negative(pmap
->ledger
,
4009 task_ledgers
.phys_footprint
);
4010 ledger_disable_panic_on_negative(pmap
->ledger
,
4011 task_ledgers
.internal
);
4012 ledger_disable_panic_on_negative(pmap
->ledger
,
4013 task_ledgers
.internal_compressed
);
4014 ledger_disable_panic_on_negative(pmap
->ledger
,
4015 task_ledgers
.iokit_mapped
);
4016 ledger_disable_panic_on_negative(pmap
->ledger
,
4017 task_ledgers
.alternate_accounting
);
4018 ledger_disable_panic_on_negative(pmap
->ledger
,
4019 task_ledgers
.alternate_accounting_compressed
);
4021 #endif /* MACH_ASSERT */
4023 #endif /* MACH_ASSERT*/
4032 pmap_set_process_internal(pmap
, pid
, procname
);
4034 #endif /* MACH_ASSERT */
4037 * We maintain stats and ledgers so that a task's physical footprint is:
4038 * phys_footprint = ((internal - alternate_accounting)
4039 * + (internal_compressed - alternate_accounting_compressed)
4041 * + purgeable_nonvolatile
4042 * + purgeable_nonvolatile_compressed
4044 * where "alternate_accounting" includes "iokit" and "purgeable" memory.
4049 * Retire the given physical map from service.
4050 * Should only be called if the map contains
4051 * no valid mappings.
4053 MARK_AS_PMAP_TEXT
static void
4054 pmap_destroy_internal(
4057 if (pmap
== PMAP_NULL
) {
4061 VALIDATE_PMAP(pmap
);
4063 __unused
const pt_attr_t
* const pt_attr
= pmap_get_pt_attr(pmap
);
4065 int32_t ref_count
= os_atomic_dec(&pmap
->ref_count
, relaxed
);
4066 if (ref_count
> 0) {
4068 } else if (ref_count
< 0) {
4069 panic("pmap %p: refcount underflow", pmap
);
4070 } else if (pmap
== kernel_pmap
) {
4071 panic("pmap %p: attempt to destroy kernel pmap", pmap
);
4076 #if (__ARM_VMSA__ > 7)
4077 pmap_unmap_sharedpage(pmap
);
4078 #endif /* (__ARM_VMSA__ > 7) */
4080 pmap_simple_lock(&pmaps_lock
);
4081 while (pmap
->gc_status
& PMAP_GC_INFLIGHT
) {
4082 pmap
->gc_status
|= PMAP_GC_WAIT
;
4083 assert_wait((event_t
) &pmap
->gc_status
, THREAD_UNINT
);
4084 pmap_simple_unlock(&pmaps_lock
);
4085 (void) thread_block(THREAD_CONTINUE_NULL
);
4086 pmap_simple_lock(&pmaps_lock
);
4088 queue_remove(&map_pmap_list
, pmap
, pmap_t
, pmaps
);
4089 pmap_simple_unlock(&pmaps_lock
);
4091 #if (__ARM_VMSA__ == 7)
4092 if (pmap
->cpu_ref
!= 0) {
4093 panic("%s: cpu_ref=%u, "
4095 __FUNCTION__
, pmap
->cpu_ref
,
4098 #endif /* (__ARM_VMSA__ == 7) */
4100 pmap_trim_self(pmap
);
4103 * Free the memory maps, then the
4106 #if (__ARM_VMSA__ == 7)
4110 for (i
= 0; i
< pmap
->tte_index_max
; i
++) {
4111 ttep
= &pmap
->tte
[i
];
4112 if ((*ttep
& ARM_TTE_TYPE_MASK
) == ARM_TTE_TYPE_TABLE
) {
4113 pmap_tte_deallocate(pmap
, ttep
, PMAP_TT_L1_LEVEL
);
4117 #else /* (__ARM_VMSA__ == 7) */
4121 for (level
= pt_attr
->pta_max_level
- 1; level
>= pt_attr
->pta_root_level
; level
--) {
4122 for (c
= pmap
->min
; c
< pmap
->max
; c
+= pt_attr_ln_size(pt_attr
, level
)) {
4123 ttep
= pmap_ttne(pmap
, level
, c
);
4125 if ((ttep
!= PT_ENTRY_NULL
) && (*ttep
& ARM_TTE_TYPE_MASK
) == ARM_TTE_TYPE_TABLE
) {
4127 pmap_tte_deallocate(pmap
, ttep
, level
);
4132 #endif /* (__ARM_VMSA__ == 7) */
4137 #if (__ARM_VMSA__ == 7)
4138 pmap_tt1_deallocate(pmap
, pmap
->tte
, pmap
->tte_index_max
* sizeof(tt_entry_t
), 0);
4139 pmap
->tte_index_max
= 0;
4140 #else /* (__ARM_VMSA__ == 7) */
4141 pmap_tt1_deallocate(pmap
, pmap
->tte
, PMAP_ROOT_ALLOC_SIZE
, 0);
4142 #endif /* (__ARM_VMSA__ == 7) */
4143 pmap
->tte
= (tt_entry_t
*) NULL
;
4147 #if (__ARM_VMSA__ == 7)
4148 if (pmap
->prev_tte
) {
4149 pmap_tt1_deallocate(pmap
, pmap
->prev_tte
, PMAP_ROOT_ALLOC_SIZE
, 0);
4150 pmap
->prev_tte
= (tt_entry_t
*) NULL
;
4152 #endif /* (__ARM_VMSA__ == 7) */
4154 assert((tt_free_entry_t
*)pmap
->tt_entry_free
== NULL
);
4156 pmap_get_pt_ops(pmap
)->flush_tlb_async(pmap
);
4159 /* return its asid to the pool */
4160 pmap_get_pt_ops(pmap
)->free_id(pmap
);
4161 pmap_check_ledgers(pmap
);
4163 if (pmap
->nested_region_asid_bitmap
) {
4164 kfree(pmap
->nested_region_asid_bitmap
, pmap
->nested_region_asid_bitmap_size
* sizeof(unsigned int));
4167 zfree(pmap_zone
, pmap
);
4176 PMAP_TRACE(1, PMAP_CODE(PMAP__DESTROY
) | DBG_FUNC_START
, VM_KERNEL_ADDRHIDE(pmap
), PMAP_VASID(pmap
), pmap
->hw_asid
);
4178 ledger
= pmap
->ledger
;
4180 pmap_destroy_internal(pmap
);
4182 ledger_dereference(ledger
);
4184 PMAP_TRACE(1, PMAP_CODE(PMAP__DESTROY
) | DBG_FUNC_END
);
4189 * Add a reference to the specified pmap.
4191 MARK_AS_PMAP_TEXT
static void
4192 pmap_reference_internal(
4195 if (pmap
!= PMAP_NULL
) {
4196 VALIDATE_PMAP(pmap
);
4197 os_atomic_inc(&pmap
->ref_count
, relaxed
);
4205 pmap_reference_internal(pmap
);
4214 tt_entry_t
*tt1
= NULL
;
4215 tt_free_entry_t
*tt1_free
;
4218 vm_address_t va_end
;
4221 pmap_simple_lock(&tt1_lock
);
4222 if ((size
== PAGE_SIZE
) && (free_page_size_tt_count
!= 0)) {
4223 free_page_size_tt_count
--;
4224 tt1
= (tt_entry_t
*)free_page_size_tt_list
;
4225 free_page_size_tt_list
= ((tt_free_entry_t
*)tt1
)->next
;
4226 } else if ((size
== 2 * PAGE_SIZE
) && (free_two_page_size_tt_count
!= 0)) {
4227 free_two_page_size_tt_count
--;
4228 tt1
= (tt_entry_t
*)free_two_page_size_tt_list
;
4229 free_two_page_size_tt_list
= ((tt_free_entry_t
*)tt1
)->next
;
4230 } else if ((size
< PAGE_SIZE
) && (free_tt_count
!= 0)) {
4232 tt1
= (tt_entry_t
*)free_tt_list
;
4233 free_tt_list
= (tt_free_entry_t
*)((tt_free_entry_t
*)tt1
)->next
;
4236 pmap_simple_unlock(&tt1_lock
);
4239 pmap_tt_ledger_credit(pmap
, size
);
4240 return (tt_entry_t
*)tt1
;
4243 ret
= pmap_pages_alloc(&pa
, (unsigned)((size
< PAGE_SIZE
)? PAGE_SIZE
: size
), ((option
& PMAP_TT_ALLOCATE_NOWAIT
)? PMAP_PAGES_ALLOCATE_NOWAIT
: 0));
4245 if (ret
== KERN_RESOURCE_SHORTAGE
) {
4246 return (tt_entry_t
*)0;
4250 if (size
< PAGE_SIZE
) {
4251 va
= phystokv(pa
) + size
;
4252 tt_free_entry_t
*local_free_list
= (tt_free_entry_t
*)va
;
4253 tt_free_entry_t
*next_free
= NULL
;
4254 for (va_end
= phystokv(pa
) + PAGE_SIZE
; va
< va_end
; va
= va
+ size
) {
4255 tt1_free
= (tt_free_entry_t
*)va
;
4256 tt1_free
->next
= next_free
;
4257 next_free
= tt1_free
;
4259 pmap_simple_lock(&tt1_lock
);
4260 local_free_list
->next
= free_tt_list
;
4261 free_tt_list
= next_free
;
4262 free_tt_count
+= ((PAGE_SIZE
/ size
) - 1);
4263 if (free_tt_count
> free_tt_max
) {
4264 free_tt_max
= free_tt_count
;
4266 pmap_simple_unlock(&tt1_lock
);
4269 /* Always report root allocations in units of PMAP_ROOT_ALLOC_SIZE, which can be obtained by sysctl arm_pt_root_size.
4270 * Depending on the device, this can vary between 512b and 16K. */
4271 OSAddAtomic((uint32_t)(size
/ PMAP_ROOT_ALLOC_SIZE
), (pmap
== kernel_pmap
? &inuse_kernel_tteroot_count
: &inuse_user_tteroot_count
));
4272 OSAddAtomic64(size
/ PMAP_ROOT_ALLOC_SIZE
, &alloc_tteroot_count
);
4273 pmap_tt_ledger_credit(pmap
, size
);
4275 return (tt_entry_t
*) phystokv(pa
);
4279 pmap_tt1_deallocate(
4285 tt_free_entry_t
*tt_entry
;
4287 tt_entry
= (tt_free_entry_t
*)tt
;
4289 pmap_simple_lock(&tt1_lock
);
4291 if (size
< PAGE_SIZE
) {
4293 if (free_tt_count
> free_tt_max
) {
4294 free_tt_max
= free_tt_count
;
4296 tt_entry
->next
= free_tt_list
;
4297 free_tt_list
= tt_entry
;
4300 if (size
== PAGE_SIZE
) {
4301 free_page_size_tt_count
++;
4302 if (free_page_size_tt_count
> free_page_size_tt_max
) {
4303 free_page_size_tt_max
= free_page_size_tt_count
;
4305 tt_entry
->next
= free_page_size_tt_list
;
4306 free_page_size_tt_list
= tt_entry
;
4309 if (size
== 2 * PAGE_SIZE
) {
4310 free_two_page_size_tt_count
++;
4311 if (free_two_page_size_tt_count
> free_two_page_size_tt_max
) {
4312 free_two_page_size_tt_max
= free_two_page_size_tt_count
;
4314 tt_entry
->next
= free_two_page_size_tt_list
;
4315 free_two_page_size_tt_list
= tt_entry
;
4318 if (option
& PMAP_TT_DEALLOCATE_NOBLOCK
) {
4319 pmap_simple_unlock(&tt1_lock
);
4320 pmap_tt_ledger_debit(pmap
, size
);
4324 while (free_page_size_tt_count
> FREE_PAGE_SIZE_TT_MAX
) {
4325 free_page_size_tt_count
--;
4326 tt
= (tt_entry_t
*)free_page_size_tt_list
;
4327 free_page_size_tt_list
= ((tt_free_entry_t
*)tt
)->next
;
4329 pmap_simple_unlock(&tt1_lock
);
4331 pmap_pages_free(ml_static_vtop((vm_offset_t
)tt
), PAGE_SIZE
);
4333 OSAddAtomic(-(int32_t)(PAGE_SIZE
/ PMAP_ROOT_ALLOC_SIZE
), (pmap
== kernel_pmap
? &inuse_kernel_tteroot_count
: &inuse_user_tteroot_count
));
4335 pmap_simple_lock(&tt1_lock
);
4338 while (free_two_page_size_tt_count
> FREE_TWO_PAGE_SIZE_TT_MAX
) {
4339 free_two_page_size_tt_count
--;
4340 tt
= (tt_entry_t
*)free_two_page_size_tt_list
;
4341 free_two_page_size_tt_list
= ((tt_free_entry_t
*)tt
)->next
;
4343 pmap_simple_unlock(&tt1_lock
);
4345 pmap_pages_free(ml_static_vtop((vm_offset_t
)tt
), 2 * PAGE_SIZE
);
4347 OSAddAtomic(-2 * (int32_t)(PAGE_SIZE
/ PMAP_ROOT_ALLOC_SIZE
), (pmap
== kernel_pmap
? &inuse_kernel_tteroot_count
: &inuse_user_tteroot_count
));
4349 pmap_simple_lock(&tt1_lock
);
4351 pmap_simple_unlock(&tt1_lock
);
4352 pmap_tt_ledger_debit(pmap
, size
);
4355 static kern_return_t
4360 unsigned int options
)
4366 if ((tt_free_entry_t
*)pmap
->tt_entry_free
!= NULL
) {
4367 tt_free_entry_t
*tt_free_next
;
4369 tt_free_next
= ((tt_free_entry_t
*)pmap
->tt_entry_free
)->next
;
4370 *ttp
= (tt_entry_t
*)pmap
->tt_entry_free
;
4371 pmap
->tt_entry_free
= (tt_entry_t
*)tt_free_next
;
4379 * Allocate a VM page for the level x page table entries.
4381 while (pmap_pages_alloc(&pa
, PAGE_SIZE
, ((options
& PMAP_TT_ALLOCATE_NOWAIT
)? PMAP_PAGES_ALLOCATE_NOWAIT
: 0)) != KERN_SUCCESS
) {
4382 if (options
& PMAP_OPTIONS_NOWAIT
) {
4383 return KERN_RESOURCE_SHORTAGE
;
4388 while ((ptdp
= ptd_alloc(pmap
, false)) == NULL
) {
4389 if (options
& PMAP_OPTIONS_NOWAIT
) {
4390 pmap_pages_free(pa
, PAGE_SIZE
);
4391 return KERN_RESOURCE_SHORTAGE
;
4396 if (level
< PMAP_TT_MAX_LEVEL
) {
4397 OSAddAtomic64(1, &alloc_ttepages_count
);
4398 OSAddAtomic(1, (pmap
== kernel_pmap
? &inuse_kernel_ttepages_count
: &inuse_user_ttepages_count
));
4400 OSAddAtomic64(1, &alloc_ptepages_count
);
4401 OSAddAtomic(1, (pmap
== kernel_pmap
? &inuse_kernel_ptepages_count
: &inuse_user_ptepages_count
));
4404 pmap_tt_ledger_credit(pmap
, PAGE_SIZE
);
4406 PMAP_ZINFO_PALLOC(pmap
, PAGE_SIZE
);
4408 pvh_update_head_unlocked(pai_to_pvh(pa_index(pa
)), ptdp
, PVH_TYPE_PTDP
);
4410 __unreachable_ok_push
4411 if (TEST_PAGE_RATIO_4
) {
4413 vm_address_t va_end
;
4417 for (va_end
= phystokv(pa
) + PAGE_SIZE
, va
= phystokv(pa
) + ARM_PGBYTES
; va
< va_end
; va
= va
+ ARM_PGBYTES
) {
4418 ((tt_free_entry_t
*)va
)->next
= (tt_free_entry_t
*)pmap
->tt_entry_free
;
4419 pmap
->tt_entry_free
= (tt_entry_t
*)va
;
4423 __unreachable_ok_pop
4425 *ttp
= (tt_entry_t
*)phystokv(pa
);
4429 return KERN_SUCCESS
;
4440 unsigned pt_acc_cnt
;
4441 unsigned i
, max_pt_index
= PAGE_RATIO
;
4442 vm_offset_t free_page
= 0;
4446 ptdp
= ptep_get_ptd((vm_offset_t
)ttp
);
4448 ptdp
->ptd_info
[ARM_PT_DESC_INDEX(ttp
)].va
= (vm_offset_t
)-1;
4450 if ((level
< PMAP_TT_MAX_LEVEL
) && (ptdp
->ptd_info
[ARM_PT_DESC_INDEX(ttp
)].refcnt
== PT_DESC_REFCOUNT
)) {
4451 ptdp
->ptd_info
[ARM_PT_DESC_INDEX(ttp
)].refcnt
= 0;
4454 if (ptdp
->ptd_info
[ARM_PT_DESC_INDEX(ttp
)].refcnt
!= 0) {
4455 panic("pmap_tt_deallocate(): ptdp %p, count %d\n", ptdp
, ptdp
->ptd_info
[ARM_PT_DESC_INDEX(ttp
)].refcnt
);
4458 ptdp
->ptd_info
[ARM_PT_DESC_INDEX(ttp
)].refcnt
= 0;
4460 for (i
= 0, pt_acc_cnt
= 0; i
< max_pt_index
; i
++) {
4461 pt_acc_cnt
+= ptdp
->ptd_info
[i
].refcnt
;
4464 if (pt_acc_cnt
== 0) {
4465 tt_free_entry_t
*tt_free_list
= (tt_free_entry_t
*)&pmap
->tt_entry_free
;
4466 unsigned pt_free_entry_cnt
= 1;
4468 while (pt_free_entry_cnt
< max_pt_index
&& tt_free_list
) {
4469 tt_free_entry_t
*tt_free_list_next
;
4471 tt_free_list_next
= tt_free_list
->next
;
4472 if ((((vm_offset_t
)tt_free_list_next
) - ((vm_offset_t
)ttp
& ~PAGE_MASK
)) < PAGE_SIZE
) {
4473 pt_free_entry_cnt
++;
4475 tt_free_list
= tt_free_list_next
;
4477 if (pt_free_entry_cnt
== max_pt_index
) {
4478 tt_free_entry_t
*tt_free_list_cur
;
4480 free_page
= (vm_offset_t
)ttp
& ~PAGE_MASK
;
4481 tt_free_list
= (tt_free_entry_t
*)&pmap
->tt_entry_free
;
4482 tt_free_list_cur
= (tt_free_entry_t
*)&pmap
->tt_entry_free
;
4484 while (tt_free_list_cur
) {
4485 tt_free_entry_t
*tt_free_list_next
;
4487 tt_free_list_next
= tt_free_list_cur
->next
;
4488 if ((((vm_offset_t
)tt_free_list_next
) - free_page
) < PAGE_SIZE
) {
4489 tt_free_list
->next
= tt_free_list_next
->next
;
4491 tt_free_list
= tt_free_list_next
;
4493 tt_free_list_cur
= tt_free_list_next
;
4496 ((tt_free_entry_t
*)ttp
)->next
= (tt_free_entry_t
*)pmap
->tt_entry_free
;
4497 pmap
->tt_entry_free
= ttp
;
4500 ((tt_free_entry_t
*)ttp
)->next
= (tt_free_entry_t
*)pmap
->tt_entry_free
;
4501 pmap
->tt_entry_free
= ttp
;
4506 if (free_page
!= 0) {
4507 ptd_deallocate(ptep_get_ptd((vm_offset_t
)free_page
));
4508 *(pt_desc_t
**)pai_to_pvh(pa_index(ml_static_vtop(free_page
))) = NULL
;
4509 pmap_pages_free(ml_static_vtop(free_page
), PAGE_SIZE
);
4510 if (level
< PMAP_TT_MAX_LEVEL
) {
4511 OSAddAtomic(-1, (pmap
== kernel_pmap
? &inuse_kernel_ttepages_count
: &inuse_user_ttepages_count
));
4513 OSAddAtomic(-1, (pmap
== kernel_pmap
? &inuse_kernel_ptepages_count
: &inuse_user_ptepages_count
));
4515 PMAP_ZINFO_PFREE(pmap
, PAGE_SIZE
);
4516 pmap_tt_ledger_debit(pmap
, PAGE_SIZE
);
4526 tt_entry_t tte
= *ttep
;
4529 panic("pmap_tte_deallocate(): null tt_entry ttep==%p\n", ttep
);
4532 if (((level
+ 1) == PMAP_TT_MAX_LEVEL
) && (tte_get_ptd(tte
)->ptd_info
[ARM_PT_DESC_INDEX(ttetokv(*ttep
))].refcnt
!= 0)) {
4533 panic("pmap_tte_deallocate(): pmap=%p ttep=%p ptd=%p refcnt=0x%x \n", pmap
, ttep
,
4534 tte_get_ptd(tte
), (tte_get_ptd(tte
)->ptd_info
[ARM_PT_DESC_INDEX(ttetokv(*ttep
))].refcnt
));
4537 #if (__ARM_VMSA__ == 7)
4539 tt_entry_t
*ttep_4M
= (tt_entry_t
*) ((vm_offset_t
)ttep
& 0xFFFFFFF0);
4542 for (i
= 0; i
< 4; i
++, ttep_4M
++) {
4543 *ttep_4M
= (tt_entry_t
) 0;
4545 FLUSH_PTE_RANGE_STRONG(ttep_4M
- 4, ttep_4M
);
4548 *ttep
= (tt_entry_t
) 0;
4549 FLUSH_PTE_STRONG(ttep
);
4554 pmap_tte_deallocate(
4562 PMAP_ASSERT_LOCKED(pmap
);
4567 if (tte_get_ptd(tte
)->pmap
!= pmap
) {
4568 panic("pmap_tte_deallocate(): ptd=%p ptd->pmap=%p pmap=%p \n",
4569 tte_get_ptd(tte
), tte_get_ptd(tte
)->pmap
, pmap
);
4573 pmap_tte_remove(pmap
, ttep
, level
);
4575 if ((tte
& ARM_TTE_TYPE_MASK
) == ARM_TTE_TYPE_TABLE
) {
4578 pt_entry_t
*pte_p
= ((pt_entry_t
*) (ttetokv(tte
) & ~ARM_PGMASK
));
4581 for (i
= 0; i
< (ARM_PGBYTES
/ sizeof(*pte_p
)); i
++, pte_p
++) {
4582 if (ARM_PTE_IS_COMPRESSED(*pte_p
, pte_p
)) {
4583 panic("pmap_tte_deallocate: tte=0x%llx pmap=%p, pte_p=%p, pte=0x%llx compressed\n",
4584 (uint64_t)tte
, pmap
, pte_p
, (uint64_t)(*pte_p
));
4585 } else if (((*pte_p
) & ARM_PTE_TYPE_MASK
) != ARM_PTE_TYPE_FAULT
) {
4586 panic("pmap_tte_deallocate: tte=0x%llx pmap=%p, pte_p=%p, pte=0x%llx\n",
4587 (uint64_t)tte
, pmap
, pte_p
, (uint64_t)(*pte_p
));
4594 /* Clear any page offset: we mean to free the whole page, but armv7 TTEs may only be
4595 * aligned on 1K boundaries. We clear the surrounding "chunk" of 4 TTEs above. */
4596 pa
= tte_to_pa(tte
) & ~ARM_PGMASK
;
4597 pmap_tt_deallocate(pmap
, (tt_entry_t
*) phystokv(pa
), level
+ 1);
4603 * Remove a range of hardware page-table entries.
4604 * The entries given are the first (inclusive)
4605 * and last (exclusive) entries for the VM pages.
4606 * The virtual address is the va for the first pte.
4608 * The pmap must be locked.
4609 * If the pmap is not the kernel pmap, the range must lie
4610 * entirely within one pte-page. This is NOT checked.
4611 * Assumes that the pte-page exists.
4613 * Returns the number of PTE changed, and sets *rmv_cnt
4614 * to the number of SPTE changed.
4619 vm_map_address_t va
,
4624 bool need_strong_sync
= false;
4625 int num_changed
= pmap_remove_range_options(pmap
, va
, bpte
, epte
, rmv_cnt
,
4626 &need_strong_sync
, PMAP_OPTIONS_REMOVE
);
4627 if (num_changed
> 0) {
4628 PMAP_UPDATE_TLBS(pmap
, va
, va
+ (PAGE_SIZE
* (epte
- bpte
)), need_strong_sync
);
4634 #ifdef PVH_FLAG_EXEC
4637 * Update the access protection bits of the physical aperture mapping for a page.
4638 * This is useful, for example, in guranteeing that a verified executable page
4639 * has no writable mappings anywhere in the system, including the physical
4640 * aperture. flush_tlb_async can be set to true to avoid unnecessary TLB
4641 * synchronization overhead in cases where the call to this function is
4642 * guaranteed to be followed by other TLB operations.
4645 pmap_set_ptov_ap(unsigned int pai __unused
, unsigned int ap __unused
, boolean_t flush_tlb_async __unused
)
4647 #if __ARM_PTE_PHYSMAP__
4648 ASSERT_PVH_LOCKED(pai
);
4649 vm_offset_t kva
= phystokv(vm_first_phys
+ (pmap_paddr_t
)ptoa(pai
));
4650 pt_entry_t
*pte_p
= pmap_pte(kernel_pmap
, kva
);
4652 pt_entry_t tmplate
= *pte_p
;
4653 if ((tmplate
& ARM_PTE_APMASK
) == ARM_PTE_AP(ap
)) {
4656 tmplate
= (tmplate
& ~ARM_PTE_APMASK
) | ARM_PTE_AP(ap
);
4657 #if (__ARM_VMSA__ > 7)
4658 if (tmplate
& ARM_PTE_HINT_MASK
) {
4659 panic("%s: physical aperture PTE %p has hint bit set, va=%p, pte=0x%llx",
4660 __func__
, pte_p
, (void *)kva
, tmplate
);
4663 WRITE_PTE_STRONG(pte_p
, tmplate
);
4664 flush_mmu_tlb_region_asid_async(kva
, PAGE_SIZE
, kernel_pmap
);
4665 if (!flush_tlb_async
) {
4671 #endif /* defined(PVH_FLAG_EXEC) */
4679 int *num_alt_internal
,
4683 pv_entry_t
**pv_h
, **pve_pp
;
4686 ASSERT_PVH_LOCKED(pai
);
4687 pv_h
= pai_to_pvh(pai
);
4688 vm_offset_t pvh_flags
= pvh_get_flags(pv_h
);
4691 if (pvh_test_type(pv_h
, PVH_TYPE_PTEP
)) {
4692 if (__builtin_expect((cpte
!= pvh_ptep(pv_h
)), 0)) {
4693 panic("%s: cpte=%p does not match pv_h=%p (%p), pai=0x%x\n", __func__
, cpte
, pv_h
, pvh_ptep(pv_h
), pai
);
4695 if (IS_ALTACCT_PAGE(pai
, PV_ENTRY_NULL
)) {
4696 assert(IS_INTERNAL_PAGE(pai
));
4698 (*num_alt_internal
)++;
4699 CLR_ALTACCT_PAGE(pai
, PV_ENTRY_NULL
);
4700 } else if (IS_INTERNAL_PAGE(pai
)) {
4701 if (IS_REUSABLE_PAGE(pai
)) {
4709 pvh_update_head(pv_h
, PV_ENTRY_NULL
, PVH_TYPE_NULL
);
4710 } else if (pvh_test_type(pv_h
, PVH_TYPE_PVEP
)) {
4712 pve_p
= pvh_list(pv_h
);
4714 while (pve_p
!= PV_ENTRY_NULL
&&
4715 (pve_get_ptep(pve_p
) != cpte
)) {
4716 pve_pp
= pve_link_field(pve_p
);
4717 pve_p
= PVE_NEXT_PTR(pve_next(pve_p
));
4720 if (__builtin_expect((pve_p
== PV_ENTRY_NULL
), 0)) {
4721 panic("%s: cpte=%p (pai=0x%x) not in pv_h=%p\n", __func__
, cpte
, pai
, pv_h
);
4725 if ((pmap
!= NULL
) && (kern_feature_override(KF_PMAPV_OVRD
) == FALSE
)) {
4726 pv_entry_t
*check_pve_p
= PVE_NEXT_PTR(pve_next(pve_p
));
4727 while (check_pve_p
!= PV_ENTRY_NULL
) {
4728 if (pve_get_ptep(check_pve_p
) == cpte
) {
4729 panic("%s: duplicate pve entry cpte=%p pmap=%p, pv_h=%p, pve_p=%p, pai=0x%x",
4730 __func__
, cpte
, pmap
, pv_h
, pve_p
, pai
);
4732 check_pve_p
= PVE_NEXT_PTR(pve_next(check_pve_p
));
4737 if (IS_ALTACCT_PAGE(pai
, pve_p
)) {
4738 assert(IS_INTERNAL_PAGE(pai
));
4740 (*num_alt_internal
)++;
4741 CLR_ALTACCT_PAGE(pai
, pve_p
);
4742 } else if (IS_INTERNAL_PAGE(pai
)) {
4743 if (IS_REUSABLE_PAGE(pai
)) {
4752 pvh_remove(pv_h
, pve_pp
, pve_p
);
4754 if (!pvh_test_type(pv_h
, PVH_TYPE_NULL
)) {
4755 pvh_set_flags(pv_h
, pvh_flags
);
4758 panic("%s: unexpected PV head %p, cpte=%p pmap=%p pv_h=%p pai=0x%x",
4759 __func__
, *pv_h
, cpte
, pmap
, pv_h
, pai
);
4762 #ifdef PVH_FLAG_EXEC
4763 if ((pvh_flags
& PVH_FLAG_EXEC
) && pvh_test_type(pv_h
, PVH_TYPE_NULL
)) {
4764 pmap_set_ptov_ap(pai
, AP_RWNA
, FALSE
);
4770 pmap_remove_range_options(
4772 vm_map_address_t va
,
4776 bool *need_strong_sync __unused
,
4780 int num_removed
, num_unwired
;
4781 int num_pte_changed
;
4784 int num_external
, num_internal
, num_reusable
;
4785 int num_alt_internal
;
4786 uint64_t num_compressed
, num_alt_compressed
;
4788 PMAP_ASSERT_LOCKED(pmap
);
4792 num_pte_changed
= 0;
4797 num_alt_internal
= 0;
4798 num_alt_compressed
= 0;
4800 for (cpte
= bpte
; cpte
< epte
;
4801 cpte
+= PAGE_SIZE
/ ARM_PGBYTES
, va
+= PAGE_SIZE
) {
4803 boolean_t managed
= FALSE
;
4808 if (pgtrace_enabled
) {
4809 pmap_pgtrace_remove_clone(pmap
, pte_to_pa(spte
), va
);
4814 if (pmap
!= kernel_pmap
&&
4815 (options
& PMAP_OPTIONS_REMOVE
) &&
4816 (ARM_PTE_IS_COMPRESSED(spte
, cpte
))) {
4818 * "pmap" must be locked at this point,
4819 * so this should not race with another
4820 * pmap_remove_range() or pmap_enter().
4823 /* one less "compressed"... */
4825 if (spte
& ARM_PTE_COMPRESSED_ALT
) {
4826 /* ... but it used to be "ALTACCT" */
4827 num_alt_compressed
++;
4831 WRITE_PTE_FAST(cpte
, ARM_PTE_TYPE_FAULT
);
4833 * "refcnt" also accounts for
4834 * our "compressed" markers,
4835 * so let's update it here.
4837 if (OSAddAtomic16(-1, (SInt16
*) &(ptep_get_ptd(cpte
)->ptd_info
[ARM_PT_DESC_INDEX(cpte
)].refcnt
)) <= 0) {
4838 panic("pmap_remove_range_options: over-release of ptdp %p for pte %p\n", ptep_get_ptd(cpte
), cpte
);
4843 * It may be possible for the pte to transition from managed
4844 * to unmanaged in this timeframe; for now, elide the assert.
4845 * We should break out as a consequence of checking pa_valid.
4847 //assert(!ARM_PTE_IS_COMPRESSED(spte));
4848 pa
= pte_to_pa(spte
);
4849 if (!pa_valid(pa
)) {
4852 pai
= (int)pa_index(pa
);
4855 pa
= pte_to_pa(spte
);
4856 if (pai
== (int)pa_index(pa
)) {
4858 break; // Leave pai locked as we will unlock it after we free the PV entry
4863 if (ARM_PTE_IS_COMPRESSED(*cpte
, cpte
)) {
4865 * There used to be a valid mapping here but it
4866 * has already been removed when the page was
4867 * sent to the VM compressor, so nothing left to
4873 /* remove the translation, do not flush the TLB */
4874 if (*cpte
!= ARM_PTE_TYPE_FAULT
) {
4875 assertf(!ARM_PTE_IS_COMPRESSED(*cpte
, cpte
), "unexpected compressed pte %p (=0x%llx)", cpte
, (uint64_t)*cpte
);
4876 assertf((*cpte
& ARM_PTE_TYPE_VALID
) == ARM_PTE_TYPE
, "invalid pte %p (=0x%llx)", cpte
, (uint64_t)*cpte
);
4878 if (managed
&& (pmap
!= kernel_pmap
) && (ptep_get_va(cpte
) != va
)) {
4879 panic("pmap_remove_range_options(): cpte=%p ptd=%p pte=0x%llx va=0x%llx\n",
4880 cpte
, ptep_get_ptd(cpte
), (uint64_t)*cpte
, (uint64_t)va
);
4883 WRITE_PTE_FAST(cpte
, ARM_PTE_TYPE_FAULT
);
4887 if ((spte
!= ARM_PTE_TYPE_FAULT
) &&
4888 (pmap
!= kernel_pmap
)) {
4889 assertf(!ARM_PTE_IS_COMPRESSED(spte
, cpte
), "unexpected compressed pte %p (=0x%llx)", cpte
, (uint64_t)spte
);
4890 assertf((spte
& ARM_PTE_TYPE_VALID
) == ARM_PTE_TYPE
, "invalid pte %p (=0x%llx)", cpte
, (uint64_t)spte
);
4891 if (OSAddAtomic16(-1, (SInt16
*) &(ptep_get_ptd(cpte
)->ptd_info
[ARM_PT_DESC_INDEX(cpte
)].refcnt
)) <= 0) {
4892 panic("pmap_remove_range_options: over-release of ptdp %p for pte %p\n", ptep_get_ptd(cpte
), cpte
);
4899 if (pte_is_wired(spte
)) {
4900 pte_set_wired(cpte
, 0);
4904 * if not managed, we're done
4910 * find and remove the mapping from the chain for this
4914 pmap_remove_pv(pmap
, cpte
, pai
, &num_internal
, &num_alt_internal
, &num_reusable
, &num_external
);
4923 OSAddAtomic(-num_removed
, (SInt32
*) &pmap
->stats
.resident_count
);
4924 pmap_ledger_debit(pmap
, task_ledgers
.phys_mem
, machine_ptob(num_removed
));
4926 if (pmap
!= kernel_pmap
) {
4927 /* sanity checks... */
4929 if (pmap
->stats
.internal
< num_internal
) {
4930 if ((!pmap_stats_assert
||
4931 !pmap
->pmap_stats_assert
)) {
4932 printf("%d[%s] pmap_remove_range_options(%p,0x%llx,%p,%p,0x%x): num_internal=%d num_removed=%d num_unwired=%d num_external=%d num_reusable=%d num_compressed=%lld num_alt_internal=%d num_alt_compressed=%lld num_pte_changed=%d stats.internal=%d stats.reusable=%d\n",
4934 pmap
->pmap_procname
,
4949 pmap
->stats
.internal
,
4950 pmap
->stats
.reusable
);
4952 panic("%d[%s] pmap_remove_range_options(%p,0x%llx,%p,%p,0x%x): num_internal=%d num_removed=%d num_unwired=%d num_external=%d num_reusable=%d num_compressed=%lld num_alt_internal=%d num_alt_compressed=%lld num_pte_changed=%d stats.internal=%d stats.reusable=%d",
4954 pmap
->pmap_procname
,
4969 pmap
->stats
.internal
,
4970 pmap
->stats
.reusable
);
4973 #endif /* MACH_ASSERT */
4974 PMAP_STATS_ASSERTF(pmap
->stats
.external
>= num_external
,
4976 "pmap=%p num_external=%d stats.external=%d",
4977 pmap
, num_external
, pmap
->stats
.external
);
4978 PMAP_STATS_ASSERTF(pmap
->stats
.internal
>= num_internal
,
4980 "pmap=%p num_internal=%d stats.internal=%d num_reusable=%d stats.reusable=%d",
4982 num_internal
, pmap
->stats
.internal
,
4983 num_reusable
, pmap
->stats
.reusable
);
4984 PMAP_STATS_ASSERTF(pmap
->stats
.reusable
>= num_reusable
,
4986 "pmap=%p num_internal=%d stats.internal=%d num_reusable=%d stats.reusable=%d",
4988 num_internal
, pmap
->stats
.internal
,
4989 num_reusable
, pmap
->stats
.reusable
);
4990 PMAP_STATS_ASSERTF(pmap
->stats
.compressed
>= num_compressed
,
4992 "pmap=%p num_compressed=%lld num_alt_compressed=%lld stats.compressed=%lld",
4993 pmap
, num_compressed
, num_alt_compressed
,
4994 pmap
->stats
.compressed
);
4996 /* update pmap stats... */
4997 OSAddAtomic(-num_unwired
, (SInt32
*) &pmap
->stats
.wired_count
);
4999 OSAddAtomic(-num_external
, &pmap
->stats
.external
);
5002 OSAddAtomic(-num_internal
, &pmap
->stats
.internal
);
5005 OSAddAtomic(-num_reusable
, &pmap
->stats
.reusable
);
5007 if (num_compressed
) {
5008 OSAddAtomic64(-num_compressed
, &pmap
->stats
.compressed
);
5010 /* ... and ledgers */
5011 pmap_ledger_debit(pmap
, task_ledgers
.wired_mem
, machine_ptob(num_unwired
));
5012 pmap_ledger_debit(pmap
, task_ledgers
.internal
, machine_ptob(num_internal
));
5013 pmap_ledger_debit(pmap
, task_ledgers
.alternate_accounting
, machine_ptob(num_alt_internal
));
5014 pmap_ledger_debit(pmap
, task_ledgers
.alternate_accounting_compressed
, machine_ptob(num_alt_compressed
));
5015 pmap_ledger_debit(pmap
, task_ledgers
.internal_compressed
, machine_ptob(num_compressed
));
5016 /* make needed adjustments to phys_footprint */
5017 pmap_ledger_debit(pmap
, task_ledgers
.phys_footprint
,
5018 machine_ptob((num_internal
-
5021 num_alt_compressed
)));
5024 /* flush the ptable entries we have written */
5025 if (num_pte_changed
> 0) {
5026 FLUSH_PTE_RANGE_STRONG(bpte
, epte
);
5029 return num_pte_changed
;
5034 * Remove the given range of addresses
5035 * from the specified map.
5037 * It is assumed that the start and end are properly
5038 * rounded to the hardware page size.
5043 vm_map_address_t start
,
5044 vm_map_address_t end
)
5046 pmap_remove_options(pmap
, start
, end
, PMAP_OPTIONS_REMOVE
);
5049 MARK_AS_PMAP_TEXT
static int
5050 pmap_remove_options_internal(
5052 vm_map_address_t start
,
5053 vm_map_address_t end
,
5056 int remove_count
= 0;
5057 pt_entry_t
*bpte
, *epte
;
5060 uint32_t rmv_spte
= 0;
5061 bool need_strong_sync
= false;
5062 bool flush_tte
= false;
5064 if (__improbable(end
< start
)) {
5065 panic("%s: invalid address range %p, %p", __func__
, (void*)start
, (void*)end
);
5068 VALIDATE_PMAP(pmap
);
5070 __unused
const pt_attr_t
* const pt_attr
= pmap_get_pt_attr(pmap
);
5074 tte_p
= pmap_tte(pmap
, start
);
5076 if (tte_p
== (tt_entry_t
*) NULL
) {
5080 if ((*tte_p
& ARM_TTE_TYPE_MASK
) == ARM_TTE_TYPE_TABLE
) {
5081 pte_p
= (pt_entry_t
*) ttetokv(*tte_p
);
5082 bpte
= &pte_p
[ptenum(start
)];
5083 epte
= bpte
+ ((end
- start
) >> pt_attr_leaf_shift(pt_attr
));
5085 remove_count
+= pmap_remove_range_options(pmap
, start
, bpte
, epte
,
5086 &rmv_spte
, &need_strong_sync
, options
);
5088 if (rmv_spte
&& (ptep_get_ptd(pte_p
)->ptd_info
[ARM_PT_DESC_INDEX(pte_p
)].refcnt
== 0) &&
5089 (pmap
!= kernel_pmap
) && (pmap
->nested
== FALSE
)) {
5090 pmap_tte_deallocate(pmap
, tte_p
, pt_attr_twig_level(pt_attr
));
5098 if (remove_count
> 0) {
5099 PMAP_UPDATE_TLBS(pmap
, start
, end
, need_strong_sync
);
5100 } else if (flush_tte
> 0) {
5101 pmap_get_pt_ops(pmap
)->flush_tlb_tte_async(start
, pmap
);
5104 return remove_count
;
5108 pmap_remove_options(
5110 vm_map_address_t start
,
5111 vm_map_address_t end
,
5114 int remove_count
= 0;
5115 vm_map_address_t va
;
5117 if (pmap
== PMAP_NULL
) {
5121 __unused
const pt_attr_t
* const pt_attr
= pmap_get_pt_attr(pmap
);
5123 PMAP_TRACE(2, PMAP_CODE(PMAP__REMOVE
) | DBG_FUNC_START
,
5124 VM_KERNEL_ADDRHIDE(pmap
), VM_KERNEL_ADDRHIDE(start
),
5125 VM_KERNEL_ADDRHIDE(end
));
5128 if ((start
| end
) & PAGE_MASK
) {
5129 panic("pmap_remove_options() pmap %p start 0x%llx end 0x%llx\n",
5130 pmap
, (uint64_t)start
, (uint64_t)end
);
5132 if ((end
< start
) || (start
< pmap
->min
) || (end
> pmap
->max
)) {
5133 panic("pmap_remove_options(): invalid address range, pmap=%p, start=0x%llx, end=0x%llx\n",
5134 pmap
, (uint64_t)start
, (uint64_t)end
);
5139 * Invalidate the translation buffer first
5145 l
= ((va
+ pt_attr_twig_size(pt_attr
)) & ~pt_attr_twig_offmask(pt_attr
));
5150 remove_count
+= pmap_remove_options_internal(pmap
, va
, l
, options
);
5155 PMAP_TRACE(2, PMAP_CODE(PMAP__REMOVE
) | DBG_FUNC_END
);
5160 * Remove phys addr if mapped in specified map
5163 pmap_remove_some_phys(
5164 __unused pmap_t map
,
5165 __unused ppnum_t pn
)
5167 /* Implement to support working set code */
5173 #if !__ARM_USER_PROTECT__
5179 #if __ARM_USER_PROTECT__
5180 if (pmap
->tte_index_max
== NTTES
) {
5181 thread
->machine
.uptw_ttc
= 2;
5183 thread
->machine
.uptw_ttc
= 1;
5185 thread
->machine
.uptw_ttb
= ((unsigned int) pmap
->ttep
) | TTBR_SETUP
;
5186 thread
->machine
.asid
= pmap
->hw_asid
;
5191 pmap_flush_core_tlb_asid(pmap_t pmap
)
5193 #if (__ARM_VMSA__ == 7)
5194 flush_core_tlb_asid(pmap
->hw_asid
);
5196 flush_core_tlb_asid(((uint64_t) pmap
->hw_asid
) << TLBI_ASID_SHIFT
);
5200 MARK_AS_PMAP_TEXT
static void
5201 pmap_switch_internal(
5204 VALIDATE_PMAP(pmap
);
5205 pmap_cpu_data_t
*cpu_data_ptr
= pmap_get_cpu_data();
5206 uint16_t asid_index
= pmap
->hw_asid
;
5207 boolean_t do_asid_flush
= FALSE
;
5209 #if __ARM_KERNEL_PROTECT__
5213 #if (__ARM_VMSA__ == 7)
5215 pmap_simple_lock(&pmap
->tt1_lock
);
5217 pmap_t last_nested_pmap
= cpu_data_ptr
->cpu_nested_pmap
;
5220 #if MAX_ASID > MAX_HW_ASID
5221 if (asid_index
> 0) {
5224 assert(asid_index
< (sizeof(cpu_data_ptr
->cpu_asid_high_bits
) / sizeof(*cpu_data_ptr
->cpu_asid_high_bits
)));
5226 /* Extract the "virtual" bits of the ASIDs (which could cause us to alias). */
5227 uint8_t asid_high_bits
= pmap
->sw_asid
;
5228 uint8_t last_asid_high_bits
= cpu_data_ptr
->cpu_asid_high_bits
[asid_index
];
5230 if (asid_high_bits
!= last_asid_high_bits
) {
5232 * If the virtual ASID of the new pmap does not match the virtual ASID
5233 * last seen on this CPU for the physical ASID (that was a mouthful),
5234 * then this switch runs the risk of aliasing. We need to flush the
5235 * TLB for this phyiscal ASID in this case.
5237 cpu_data_ptr
->cpu_asid_high_bits
[asid_index
] = asid_high_bits
;
5238 do_asid_flush
= TRUE
;
5241 #endif /* MAX_ASID > MAX_HW_ASID */
5243 pmap_switch_user_ttb_internal(pmap
);
5245 #if (__ARM_VMSA__ > 7)
5246 /* If we're switching to a different nested pmap (i.e. shared region), we'll need
5247 * to flush the userspace mappings for that region. Those mappings are global
5248 * and will not be protected by the ASID. It should also be cheaper to flush the
5249 * entire local TLB rather than to do a broadcast MMU flush by VA region. */
5250 if ((pmap
!= kernel_pmap
) && (last_nested_pmap
!= NULL
) && (pmap
->nested_pmap
!= last_nested_pmap
)) {
5254 if (do_asid_flush
) {
5255 pmap_flush_core_tlb_asid(pmap
);
5256 #if DEVELOPMENT || DEBUG
5257 os_atomic_inc(&pmap_asid_flushes
, relaxed
);
5261 #if (__ARM_VMSA__ == 7)
5262 pmap_simple_unlock(&pmap
->tt1_lock
);
5270 PMAP_TRACE(1, PMAP_CODE(PMAP__SWITCH
) | DBG_FUNC_START
, VM_KERNEL_ADDRHIDE(pmap
), PMAP_VASID(pmap
), pmap
->hw_asid
);
5271 pmap_switch_internal(pmap
);
5272 PMAP_TRACE(1, PMAP_CODE(PMAP__SWITCH
) | DBG_FUNC_END
);
5280 pmap_page_protect_options(ppnum
, prot
, 0, NULL
);
5284 * Routine: pmap_page_protect_options
5287 * Lower the permission for all mappings to a given
5290 MARK_AS_PMAP_TEXT
static void
5291 pmap_page_protect_options_internal(
5294 unsigned int options
)
5296 pmap_paddr_t phys
= ptoa(ppnum
);
5298 pv_entry_t
**pve_pp
;
5303 pv_entry_t
*new_pve_p
;
5304 pt_entry_t
*new_pte_p
;
5305 vm_offset_t pvh_flags
;
5309 boolean_t tlb_flush_needed
= FALSE
;
5310 unsigned int pvh_cnt
= 0;
5312 assert(ppnum
!= vm_page_fictitious_addr
);
5314 /* Only work with managed pages. */
5315 if (!pa_valid(phys
)) {
5320 * Determine the new protection.
5324 return; /* nothing to do */
5326 case VM_PROT_READ
| VM_PROT_EXECUTE
:
5334 pai
= (int)pa_index(phys
);
5336 pv_h
= pai_to_pvh(pai
);
5337 pvh_flags
= pvh_get_flags(pv_h
);
5340 pte_p
= PT_ENTRY_NULL
;
5341 pve_p
= PV_ENTRY_NULL
;
5343 pveh_p
= PV_ENTRY_NULL
;
5344 pvet_p
= PV_ENTRY_NULL
;
5345 new_pve_p
= PV_ENTRY_NULL
;
5346 new_pte_p
= PT_ENTRY_NULL
;
5347 if (pvh_test_type(pv_h
, PVH_TYPE_PTEP
)) {
5348 pte_p
= pvh_ptep(pv_h
);
5349 } else if (pvh_test_type(pv_h
, PVH_TYPE_PVEP
)) {
5350 pve_p
= pvh_list(pv_h
);
5354 while ((pve_p
!= PV_ENTRY_NULL
) || (pte_p
!= PT_ENTRY_NULL
)) {
5355 vm_map_address_t va
;
5358 boolean_t update
= FALSE
;
5360 if (pve_p
!= PV_ENTRY_NULL
) {
5361 pte_p
= pve_get_ptep(pve_p
);
5364 #ifdef PVH_FLAG_IOMMU
5365 if ((vm_offset_t
)pte_p
& PVH_FLAG_IOMMU
) {
5367 if (options
& PMAP_OPTIONS_COMPRESSOR
) {
5368 panic("pmap_page_protect: attempt to compress ppnum 0x%x owned by iommu 0x%llx, pve_p=%p",
5369 ppnum
, (uint64_t)pte_p
& ~PVH_FLAG_IOMMU
, pve_p
);
5371 if (pve_p
!= PV_ENTRY_NULL
) {
5372 pv_entry_t
*temp_pve_p
= PVE_NEXT_PTR(pve_next(pve_p
));
5373 pvh_remove(pv_h
, pve_pp
, pve_p
);
5374 pveh_p
= pvh_list(pv_h
);
5375 pve_next(pve_p
) = new_pve_p
;
5384 goto protect_skip_pve
;
5387 pmap
= ptep_get_pmap(pte_p
);
5388 va
= ptep_get_va(pte_p
);
5390 if (pte_p
== PT_ENTRY_NULL
) {
5391 panic("pmap_page_protect: pmap=%p prot=%d options=%u, pv_h=%p, pveh_p=%p, pve_p=%p, va=0x%llx ppnum: 0x%x\n",
5392 pmap
, prot
, options
, pv_h
, pveh_p
, pve_p
, (uint64_t)va
, ppnum
);
5393 } else if ((pmap
== NULL
) || (atop(pte_to_pa(*pte_p
)) != ppnum
)) {
5395 if (kern_feature_override(KF_PMAPV_OVRD
) == FALSE
) {
5396 pv_entry_t
*check_pve_p
= pveh_p
;
5397 while (check_pve_p
!= PV_ENTRY_NULL
) {
5398 if ((check_pve_p
!= pve_p
) && (pve_get_ptep(check_pve_p
) == pte_p
)) {
5399 panic("pmap_page_protect: duplicate pve entry pte_p=%p pmap=%p prot=%d options=%u, pv_h=%p, pveh_p=%p, pve_p=%p, pte=0x%llx, va=0x%llx ppnum: 0x%x\n",
5400 pte_p
, pmap
, prot
, options
, pv_h
, pveh_p
, pve_p
, (uint64_t)*pte_p
, (uint64_t)va
, ppnum
);
5402 check_pve_p
= PVE_NEXT_PTR(pve_next(check_pve_p
));
5406 panic("pmap_page_protect: bad pve entry pte_p=%p pmap=%p prot=%d options=%u, pv_h=%p, pveh_p=%p, pve_p=%p, pte=0x%llx, va=0x%llx ppnum: 0x%x\n",
5407 pte_p
, pmap
, prot
, options
, pv_h
, pveh_p
, pve_p
, (uint64_t)*pte_p
, (uint64_t)va
, ppnum
);
5410 #if DEVELOPMENT || DEBUG
5411 if ((prot
& VM_PROT_EXECUTE
) || !nx_enabled
|| !pmap
->nx_enabled
)
5413 if ((prot
& VM_PROT_EXECUTE
))
5415 { set_NX
= FALSE
;} else {
5419 /* Remove the mapping if new protection is NONE */
5421 boolean_t is_altacct
= FALSE
;
5423 if (IS_ALTACCT_PAGE(pai
, pve_p
)) {
5429 if (pte_is_wired(*pte_p
)) {
5430 pte_set_wired(pte_p
, 0);
5431 if (pmap
!= kernel_pmap
) {
5432 pmap_ledger_debit(pmap
, task_ledgers
.wired_mem
, PAGE_SIZE
);
5433 OSAddAtomic(-1, (SInt32
*) &pmap
->stats
.wired_count
);
5437 if (*pte_p
!= ARM_PTE_TYPE_FAULT
&&
5438 pmap
!= kernel_pmap
&&
5439 (options
& PMAP_OPTIONS_COMPRESSOR
) &&
5440 IS_INTERNAL_PAGE(pai
)) {
5441 assert(!ARM_PTE_IS_COMPRESSED(*pte_p
, pte_p
));
5442 /* mark this PTE as having been "compressed" */
5443 tmplate
= ARM_PTE_COMPRESSED
;
5445 tmplate
|= ARM_PTE_COMPRESSED_ALT
;
5449 tmplate
= ARM_PTE_TYPE_FAULT
;
5452 if ((*pte_p
!= ARM_PTE_TYPE_FAULT
) &&
5453 tmplate
== ARM_PTE_TYPE_FAULT
&&
5454 (pmap
!= kernel_pmap
)) {
5455 if (OSAddAtomic16(-1, (SInt16
*) &(ptep_get_ptd(pte_p
)->ptd_info
[ARM_PT_DESC_INDEX(pte_p
)].refcnt
)) <= 0) {
5456 panic("pmap_page_protect_options(): over-release of ptdp %p for pte %p\n", ptep_get_ptd(pte_p
), pte_p
);
5460 if (*pte_p
!= tmplate
) {
5461 WRITE_PTE_STRONG(pte_p
, tmplate
);
5465 pmap_ledger_debit(pmap
, task_ledgers
.phys_mem
, PAGE_SIZE
);
5466 OSAddAtomic(-1, (SInt32
*) &pmap
->stats
.resident_count
);
5470 * We only ever compress internal pages.
5472 if (options
& PMAP_OPTIONS_COMPRESSOR
) {
5473 assert(IS_INTERNAL_PAGE(pai
));
5477 if (pmap
!= kernel_pmap
) {
5478 if (IS_REUSABLE_PAGE(pai
) &&
5479 IS_INTERNAL_PAGE(pai
) &&
5481 PMAP_STATS_ASSERTF(pmap
->stats
.reusable
> 0, pmap
, "stats.reusable %d", pmap
->stats
.reusable
);
5482 OSAddAtomic(-1, &pmap
->stats
.reusable
);
5483 } else if (IS_INTERNAL_PAGE(pai
)) {
5484 PMAP_STATS_ASSERTF(pmap
->stats
.internal
> 0, pmap
, "stats.internal %d", pmap
->stats
.internal
);
5485 OSAddAtomic(-1, &pmap
->stats
.internal
);
5487 PMAP_STATS_ASSERTF(pmap
->stats
.external
> 0, pmap
, "stats.external %d", pmap
->stats
.external
);
5488 OSAddAtomic(-1, &pmap
->stats
.external
);
5490 if ((options
& PMAP_OPTIONS_COMPRESSOR
) &&
5491 IS_INTERNAL_PAGE(pai
)) {
5492 /* adjust "compressed" stats */
5493 OSAddAtomic64(+1, &pmap
->stats
.compressed
);
5494 PMAP_STATS_PEAK(pmap
->stats
.compressed
);
5495 pmap
->stats
.compressed_lifetime
++;
5498 if (IS_ALTACCT_PAGE(pai
, pve_p
)) {
5499 assert(IS_INTERNAL_PAGE(pai
));
5500 pmap_ledger_debit(pmap
, task_ledgers
.internal
, PAGE_SIZE
);
5501 pmap_ledger_debit(pmap
, task_ledgers
.alternate_accounting
, PAGE_SIZE
);
5502 if (options
& PMAP_OPTIONS_COMPRESSOR
) {
5503 pmap_ledger_credit(pmap
, task_ledgers
.internal_compressed
, PAGE_SIZE
);
5504 pmap_ledger_credit(pmap
, task_ledgers
.alternate_accounting_compressed
, PAGE_SIZE
);
5508 * Cleanup our marker before
5509 * we free this pv_entry.
5511 CLR_ALTACCT_PAGE(pai
, pve_p
);
5512 } else if (IS_REUSABLE_PAGE(pai
)) {
5513 assert(IS_INTERNAL_PAGE(pai
));
5514 if (options
& PMAP_OPTIONS_COMPRESSOR
) {
5515 pmap_ledger_credit(pmap
, task_ledgers
.internal_compressed
, PAGE_SIZE
);
5516 /* was not in footprint, but is now */
5517 pmap_ledger_credit(pmap
, task_ledgers
.phys_footprint
, PAGE_SIZE
);
5519 } else if (IS_INTERNAL_PAGE(pai
)) {
5520 pmap_ledger_debit(pmap
, task_ledgers
.internal
, PAGE_SIZE
);
5523 * Update all stats related to physical footprint, which only
5524 * deals with internal pages.
5526 if (options
& PMAP_OPTIONS_COMPRESSOR
) {
5528 * This removal is only being done so we can send this page to
5529 * the compressor; therefore it mustn't affect total task footprint.
5531 pmap_ledger_credit(pmap
, task_ledgers
.internal_compressed
, PAGE_SIZE
);
5534 * This internal page isn't going to the compressor, so adjust stats to keep
5535 * phys_footprint up to date.
5537 pmap_ledger_debit(pmap
, task_ledgers
.phys_footprint
, PAGE_SIZE
);
5540 /* external page: no impact on ledgers */
5544 if (pve_p
!= PV_ENTRY_NULL
) {
5545 assert(pve_next(pve_p
) == PVE_NEXT_PTR(pve_next(pve_p
)));
5549 const pt_attr_t
*const pt_attr
= pmap_get_pt_attr(pmap
);
5553 if (pmap
== kernel_pmap
) {
5554 tmplate
= ((spte
& ~ARM_PTE_APMASK
) | ARM_PTE_AP(AP_RONA
));
5556 tmplate
= ((spte
& ~ARM_PTE_APMASK
) | pt_attr_leaf_ro(pt_attr
));
5559 pte_set_was_writeable(tmplate
, false);
5561 * While the naive implementation of this would serve to add execute
5562 * permission, this is not how the VM uses this interface, or how
5563 * x86_64 implements it. So ignore requests to add execute permissions.
5566 tmplate
|= pt_attr_leaf_xn(pt_attr
);
5570 if (*pte_p
!= ARM_PTE_TYPE_FAULT
&&
5571 !ARM_PTE_IS_COMPRESSED(*pte_p
, pte_p
) &&
5572 *pte_p
!= tmplate
) {
5573 WRITE_PTE_STRONG(pte_p
, tmplate
);
5578 /* Invalidate TLBs for all CPUs using it */
5580 tlb_flush_needed
= TRUE
;
5581 pmap_get_pt_ops(pmap
)->flush_tlb_region_async(va
, PAGE_SIZE
, pmap
);
5584 #ifdef PVH_FLAG_IOMMU
5587 pte_p
= PT_ENTRY_NULL
;
5589 if (pve_p
!= PV_ENTRY_NULL
) {
5591 assert(pve_next(pve_p
) == PVE_NEXT_PTR(pve_next(pve_p
)));
5593 pve_pp
= pve_link_field(pve_p
);
5594 pve_p
= PVE_NEXT_PTR(pve_next(pve_p
));
5598 #ifdef PVH_FLAG_EXEC
5599 if (remove
&& (pvh_get_flags(pv_h
) & PVH_FLAG_EXEC
)) {
5600 pmap_set_ptov_ap(pai
, AP_RWNA
, tlb_flush_needed
);
5603 if (tlb_flush_needed
) {
5607 /* if we removed a bunch of entries, take care of them now */
5609 if (new_pve_p
!= PV_ENTRY_NULL
) {
5610 pvh_update_head(pv_h
, new_pve_p
, PVH_TYPE_PVEP
);
5611 pvh_set_flags(pv_h
, pvh_flags
);
5612 } else if (new_pte_p
!= PT_ENTRY_NULL
) {
5613 pvh_update_head(pv_h
, new_pte_p
, PVH_TYPE_PTEP
);
5614 pvh_set_flags(pv_h
, pvh_flags
);
5616 pvh_update_head(pv_h
, PV_ENTRY_NULL
, PVH_TYPE_NULL
);
5622 if (remove
&& (pvet_p
!= PV_ENTRY_NULL
)) {
5623 pv_list_free(pveh_p
, pvet_p
, pvh_cnt
);
5628 pmap_page_protect_options(
5631 unsigned int options
,
5634 pmap_paddr_t phys
= ptoa(ppnum
);
5636 assert(ppnum
!= vm_page_fictitious_addr
);
5638 /* Only work with managed pages. */
5639 if (!pa_valid(phys
)) {
5644 * Determine the new protection.
5646 if (prot
== VM_PROT_ALL
) {
5647 return; /* nothing to do */
5650 PMAP_TRACE(2, PMAP_CODE(PMAP__PAGE_PROTECT
) | DBG_FUNC_START
, ppnum
, prot
);
5652 pmap_page_protect_options_internal(ppnum
, prot
, options
);
5654 PMAP_TRACE(2, PMAP_CODE(PMAP__PAGE_PROTECT
) | DBG_FUNC_END
);
5658 * Indicates if the pmap layer enforces some additional restrictions on the
5659 * given set of protections.
5662 pmap_has_prot_policy(__unused vm_prot_t prot
)
5668 * Set the physical protection on the
5669 * specified range of this map as requested.
5670 * VERY IMPORTANT: Will not increase permissions.
5671 * VERY IMPORTANT: Only pmap_enter() is allowed to grant permissions.
5680 pmap_protect_options(pmap
, b
, e
, prot
, 0, NULL
);
5683 MARK_AS_PMAP_TEXT
static void
5684 pmap_protect_options_internal(
5686 vm_map_address_t start
,
5687 vm_map_address_t end
,
5689 unsigned int options
,
5690 __unused
void *args
)
5692 const pt_attr_t
*const pt_attr
= pmap_get_pt_attr(pmap
);
5694 pt_entry_t
*bpte_p
, *epte_p
;
5696 boolean_t set_NX
= TRUE
;
5697 #if (__ARM_VMSA__ > 7)
5698 boolean_t set_XO
= FALSE
;
5700 boolean_t should_have_removed
= FALSE
;
5701 bool need_strong_sync
= false;
5703 if (__improbable(end
< start
)) {
5704 panic("%s called with bogus range: %p, %p", __func__
, (void*)start
, (void*)end
);
5707 #if DEVELOPMENT || DEBUG
5708 if (options
& PMAP_OPTIONS_PROTECT_IMMEDIATE
) {
5709 if ((prot
& VM_PROT_ALL
) == VM_PROT_NONE
) {
5710 should_have_removed
= TRUE
;
5715 /* Determine the new protection. */
5717 #if (__ARM_VMSA__ > 7)
5718 case VM_PROT_EXECUTE
:
5723 case VM_PROT_READ
| VM_PROT_EXECUTE
:
5725 case VM_PROT_READ
| VM_PROT_WRITE
:
5727 return; /* nothing to do */
5729 should_have_removed
= TRUE
;
5733 if (should_have_removed
) {
5734 panic("%s: should have been a remove operation, "
5735 "pmap=%p, start=%p, end=%p, prot=%#x, options=%#x, args=%p",
5737 pmap
, (void *)start
, (void *)end
, prot
, options
, args
);
5740 #if DEVELOPMENT || DEBUG
5741 if ((prot
& VM_PROT_EXECUTE
) || !nx_enabled
|| !pmap
->nx_enabled
)
5743 if ((prot
& VM_PROT_EXECUTE
))
5751 VALIDATE_PMAP(pmap
);
5753 tte_p
= pmap_tte(pmap
, start
);
5755 if ((tte_p
!= (tt_entry_t
*) NULL
) && (*tte_p
& ARM_TTE_TYPE_MASK
) == ARM_TTE_TYPE_TABLE
) {
5756 bpte_p
= (pt_entry_t
*) ttetokv(*tte_p
);
5757 bpte_p
= &bpte_p
[ptenum(start
)];
5758 epte_p
= bpte_p
+ arm_atop(end
- start
);
5761 for (pte_p
= bpte_p
;
5763 pte_p
+= PAGE_SIZE
/ ARM_PGBYTES
) {
5765 #if DEVELOPMENT || DEBUG
5766 boolean_t force_write
= FALSE
;
5771 if ((spte
== ARM_PTE_TYPE_FAULT
) ||
5772 ARM_PTE_IS_COMPRESSED(spte
, pte_p
)) {
5778 boolean_t managed
= FALSE
;
5782 * It may be possible for the pte to transition from managed
5783 * to unmanaged in this timeframe; for now, elide the assert.
5784 * We should break out as a consequence of checking pa_valid.
5786 // assert(!ARM_PTE_IS_COMPRESSED(spte));
5787 pa
= pte_to_pa(spte
);
5788 if (!pa_valid(pa
)) {
5791 pai
= (int)pa_index(pa
);
5794 pa
= pte_to_pa(spte
);
5795 if (pai
== (int)pa_index(pa
)) {
5797 break; // Leave the PVH locked as we will unlock it after we free the PTE
5802 if ((spte
== ARM_PTE_TYPE_FAULT
) ||
5803 ARM_PTE_IS_COMPRESSED(spte
, pte_p
)) {
5809 if (pmap
== kernel_pmap
) {
5810 #if DEVELOPMENT || DEBUG
5811 if ((options
& PMAP_OPTIONS_PROTECT_IMMEDIATE
) && (prot
& VM_PROT_WRITE
)) {
5813 tmplate
= ((spte
& ~ARM_PTE_APMASK
) | ARM_PTE_AP(AP_RWNA
));
5817 tmplate
= ((spte
& ~ARM_PTE_APMASK
) | ARM_PTE_AP(AP_RONA
));
5820 #if DEVELOPMENT || DEBUG
5821 if ((options
& PMAP_OPTIONS_PROTECT_IMMEDIATE
) && (prot
& VM_PROT_WRITE
)) {
5823 tmplate
= ((spte
& ~ARM_PTE_APMASK
) | pt_attr_leaf_rw(pt_attr
));
5827 tmplate
= ((spte
& ~ARM_PTE_APMASK
) | pt_attr_leaf_ro(pt_attr
));
5832 * XXX Removing "NX" would
5833 * grant "execute" access
5834 * immediately, bypassing any
5835 * checks VM might want to do
5836 * in its soft fault path.
5837 * pmap_protect() and co. are
5838 * not allowed to increase
5839 * access permissions.
5842 tmplate
|= pt_attr_leaf_xn(pt_attr
);
5844 #if (__ARM_VMSA__ > 7)
5845 if (pmap
== kernel_pmap
) {
5846 /* do NOT clear "PNX"! */
5847 tmplate
|= ARM_PTE_NX
;
5849 /* do NOT clear "NX"! */
5850 tmplate
|= pt_attr_leaf_x(pt_attr
);
5852 tmplate
&= ~ARM_PTE_APMASK
;
5853 tmplate
|= pt_attr_leaf_rona(pt_attr
);
5859 #if DEVELOPMENT || DEBUG
5862 * TODO: Run CS/Monitor checks here.
5866 * We are marking the page as writable,
5867 * so we consider it to be modified and
5870 pa_set_bits(pa
, PP_ATTR_REFERENCED
| PP_ATTR_MODIFIED
);
5871 tmplate
|= ARM_PTE_AF
;
5873 if (IS_REFFAULT_PAGE(pai
)) {
5874 CLR_REFFAULT_PAGE(pai
);
5877 if (IS_MODFAULT_PAGE(pai
)) {
5878 CLR_MODFAULT_PAGE(pai
);
5881 } else if (options
& PMAP_OPTIONS_PROTECT_IMMEDIATE
) {
5883 * An immediate request for anything other than
5884 * write should still mark the page as
5885 * referenced if managed.
5888 pa_set_bits(pa
, PP_ATTR_REFERENCED
);
5889 tmplate
|= ARM_PTE_AF
;
5891 if (IS_REFFAULT_PAGE(pai
)) {
5892 CLR_REFFAULT_PAGE(pai
);
5898 /* We do not expect to write fast fault the entry. */
5899 pte_set_was_writeable(tmplate
, false);
5901 WRITE_PTE_FAST(pte_p
, tmplate
);
5904 ASSERT_PVH_LOCKED(pai
);
5908 FLUSH_PTE_RANGE_STRONG(bpte_p
, epte_p
);
5909 PMAP_UPDATE_TLBS(pmap
, start
, end
, need_strong_sync
);
5916 pmap_protect_options(
5921 unsigned int options
,
5922 __unused
void *args
)
5924 vm_map_address_t l
, beg
;
5926 __unused
const pt_attr_t
* const pt_attr
= pmap_get_pt_attr(pmap
);
5928 if ((b
| e
) & PAGE_MASK
) {
5929 panic("pmap_protect_options() pmap %p start 0x%llx end 0x%llx\n",
5930 pmap
, (uint64_t)b
, (uint64_t)e
);
5933 #if DEVELOPMENT || DEBUG
5934 if (options
& PMAP_OPTIONS_PROTECT_IMMEDIATE
) {
5935 if ((prot
& VM_PROT_ALL
) == VM_PROT_NONE
) {
5936 pmap_remove_options(pmap
, b
, e
, options
);
5942 /* Determine the new protection. */
5944 case VM_PROT_EXECUTE
:
5946 case VM_PROT_READ
| VM_PROT_EXECUTE
:
5948 case VM_PROT_READ
| VM_PROT_WRITE
:
5950 return; /* nothing to do */
5952 pmap_remove_options(pmap
, b
, e
, options
);
5957 PMAP_TRACE(2, PMAP_CODE(PMAP__PROTECT
) | DBG_FUNC_START
,
5958 VM_KERNEL_ADDRHIDE(pmap
), VM_KERNEL_ADDRHIDE(b
),
5959 VM_KERNEL_ADDRHIDE(e
));
5964 l
= ((beg
+ pt_attr_twig_size(pt_attr
)) & ~pt_attr_twig_offmask(pt_attr
));
5970 pmap_protect_options_internal(pmap
, beg
, l
, prot
, options
, args
);
5975 PMAP_TRACE(2, PMAP_CODE(PMAP__PROTECT
) | DBG_FUNC_END
);
5978 /* Map a (possibly) autogenned block */
5987 __unused
unsigned int flags
)
5990 addr64_t original_va
= va
;
5993 for (page
= 0; page
< size
; page
++) {
5994 kr
= pmap_enter(pmap
, va
, pa
, prot
, VM_PROT_NONE
, attr
, TRUE
);
5996 if (kr
!= KERN_SUCCESS
) {
5998 * This will panic for now, as it is unclear that
5999 * removing the mappings is correct.
6001 panic("%s: failed pmap_enter, "
6002 "pmap=%p, va=%#llx, pa=%u, size=%u, prot=%#x, flags=%#x",
6004 pmap
, va
, pa
, size
, prot
, flags
);
6006 pmap_remove(pmap
, original_va
, va
- original_va
);
6014 return KERN_SUCCESS
;
6018 * Insert the given physical page (p) at
6019 * the specified virtual address (v) in the
6020 * target physical map with the protection requested.
6022 * If specified, the page will be wired down, meaning
6023 * that the related pte can not be reclaimed.
6025 * NB: This is the only routine which MAY NOT lazy-evaluate
6026 * or lose information. That is, this routine must actually
6027 * insert this page into the given map eventually (must make
6028 * forward progress eventually.
6036 vm_prot_t fault_type
,
6040 return pmap_enter_options(pmap
, v
, pn
, prot
, fault_type
, flags
, wired
, 0, NULL
);
6045 pmap_enter_pte(pmap_t pmap
, pt_entry_t
*pte_p
, pt_entry_t pte
, vm_map_address_t v
)
6047 if (pmap
!= kernel_pmap
&& ((pte
& ARM_PTE_WIRED
) != (*pte_p
& ARM_PTE_WIRED
))) {
6048 SInt16
*ptd_wiredcnt_ptr
= (SInt16
*)&(ptep_get_ptd(pte_p
)->ptd_info
[ARM_PT_DESC_INDEX(pte_p
)].wiredcnt
);
6049 if (pte
& ARM_PTE_WIRED
) {
6050 OSAddAtomic16(1, ptd_wiredcnt_ptr
);
6051 pmap_ledger_credit(pmap
, task_ledgers
.wired_mem
, PAGE_SIZE
);
6052 OSAddAtomic(1, (SInt32
*) &pmap
->stats
.wired_count
);
6054 OSAddAtomic16(-1, ptd_wiredcnt_ptr
);
6055 pmap_ledger_debit(pmap
, task_ledgers
.wired_mem
, PAGE_SIZE
);
6056 OSAddAtomic(-1, (SInt32
*) &pmap
->stats
.wired_count
);
6059 if (*pte_p
!= ARM_PTE_TYPE_FAULT
&&
6060 !ARM_PTE_IS_COMPRESSED(*pte_p
, pte_p
)) {
6061 WRITE_PTE_STRONG(pte_p
, pte
);
6062 PMAP_UPDATE_TLBS(pmap
, v
, v
+ PAGE_SIZE
, false);
6064 WRITE_PTE(pte_p
, pte
);
6065 __builtin_arm_isb(ISB_SY
);
6068 PMAP_TRACE(3, PMAP_CODE(PMAP__TTE
), VM_KERNEL_ADDRHIDE(pmap
), VM_KERNEL_ADDRHIDE(v
), VM_KERNEL_ADDRHIDE(v
+ PAGE_SIZE
), pte
);
6071 MARK_AS_PMAP_TEXT
static pt_entry_t
6072 wimg_to_pte(unsigned int wimg
)
6076 switch (wimg
& (VM_WIMG_MASK
)) {
6079 pte
= ARM_PTE_ATTRINDX(CACHE_ATTRINDX_DISABLE
);
6080 pte
|= ARM_PTE_NX
| ARM_PTE_PNX
;
6082 case VM_WIMG_POSTED
:
6083 pte
= ARM_PTE_ATTRINDX(CACHE_ATTRINDX_POSTED
);
6084 pte
|= ARM_PTE_NX
| ARM_PTE_PNX
;
6086 case VM_WIMG_POSTED_REORDERED
:
6087 pte
= ARM_PTE_ATTRINDX(CACHE_ATTRINDX_POSTED_REORDERED
);
6088 pte
|= ARM_PTE_NX
| ARM_PTE_PNX
;
6090 case VM_WIMG_POSTED_COMBINED_REORDERED
:
6091 pte
= ARM_PTE_ATTRINDX(CACHE_ATTRINDX_POSTED_COMBINED_REORDERED
);
6092 pte
|= ARM_PTE_NX
| ARM_PTE_PNX
;
6095 pte
= ARM_PTE_ATTRINDX(CACHE_ATTRINDX_WRITECOMB
);
6096 pte
|= ARM_PTE_NX
| ARM_PTE_PNX
;
6099 pte
= ARM_PTE_ATTRINDX(CACHE_ATTRINDX_WRITETHRU
);
6100 #if (__ARM_VMSA__ > 7)
6101 pte
|= ARM_PTE_SH(SH_OUTER_MEMORY
);
6106 case VM_WIMG_COPYBACK
:
6107 pte
= ARM_PTE_ATTRINDX(CACHE_ATTRINDX_WRITEBACK
);
6108 #if (__ARM_VMSA__ > 7)
6109 pte
|= ARM_PTE_SH(SH_OUTER_MEMORY
);
6114 case VM_WIMG_INNERWBACK
:
6115 pte
= ARM_PTE_ATTRINDX(CACHE_ATTRINDX_INNERWRITEBACK
);
6116 #if (__ARM_VMSA__ > 7)
6117 pte
|= ARM_PTE_SH(SH_INNER_MEMORY
);
6123 pte
= ARM_PTE_ATTRINDX(CACHE_ATTRINDX_DEFAULT
);
6124 #if (__ARM_VMSA__ > 7)
6125 pte
|= ARM_PTE_SH(SH_OUTER_MEMORY
);
6139 unsigned int options
,
6141 boolean_t
*is_altacct
)
6144 pv_h
= pai_to_pvh(pai
);
6145 boolean_t first_cpu_mapping
;
6147 ASSERT_PVH_LOCKED(pai
);
6149 vm_offset_t pvh_flags
= pvh_get_flags(pv_h
);
6153 /* An IOMMU mapping may already be present for a page that hasn't yet
6154 * had a CPU mapping established, so we use PVH_FLAG_CPU to determine
6155 * if this is the first CPU mapping. We base internal/reusable
6156 * accounting on the options specified for the first CPU mapping.
6157 * PVH_FLAG_CPU, and thus this accounting, will then persist as long
6158 * as there are *any* mappings of the page. The accounting for a
6159 * page should not need to change until the page is recycled by the
6160 * VM layer, and we assert that there are no mappings when a page
6161 * is recycled. An IOMMU mapping of a freed/recycled page is
6162 * considered a security violation & potential DMA corruption path.*/
6163 first_cpu_mapping
= ((pmap
!= NULL
) && !(pvh_flags
& PVH_FLAG_CPU
));
6164 if (first_cpu_mapping
) {
6165 pvh_flags
|= PVH_FLAG_CPU
;
6168 first_cpu_mapping
= pvh_test_type(pv_h
, PVH_TYPE_NULL
);
6171 if (first_cpu_mapping
) {
6172 if (options
& PMAP_OPTIONS_INTERNAL
) {
6173 SET_INTERNAL_PAGE(pai
);
6175 CLR_INTERNAL_PAGE(pai
);
6177 if ((options
& PMAP_OPTIONS_INTERNAL
) &&
6178 (options
& PMAP_OPTIONS_REUSABLE
)) {
6179 SET_REUSABLE_PAGE(pai
);
6181 CLR_REUSABLE_PAGE(pai
);
6184 if (pvh_test_type(pv_h
, PVH_TYPE_NULL
)) {
6185 pvh_update_head(pv_h
, pte_p
, PVH_TYPE_PTEP
);
6186 if (pmap
!= NULL
&& pmap
!= kernel_pmap
&&
6187 ((options
& PMAP_OPTIONS_ALT_ACCT
) ||
6188 PMAP_FOOTPRINT_SUSPENDED(pmap
)) &&
6189 IS_INTERNAL_PAGE(pai
)) {
6191 * Make a note to ourselves that this mapping is using alternative
6192 * accounting. We'll need this in order to know which ledger to
6193 * debit when the mapping is removed.
6195 * The altacct bit must be set while the pv head is locked. Defer
6196 * the ledger accounting until after we've dropped the lock.
6198 SET_ALTACCT_PAGE(pai
, PV_ENTRY_NULL
);
6201 CLR_ALTACCT_PAGE(pai
, PV_ENTRY_NULL
);
6204 if (pvh_test_type(pv_h
, PVH_TYPE_PTEP
)) {
6208 * convert pvh list from PVH_TYPE_PTEP to PVH_TYPE_PVEP
6210 pte1_p
= pvh_ptep(pv_h
);
6211 pvh_set_flags(pv_h
, pvh_flags
);
6212 if ((*pve_p
== PV_ENTRY_NULL
) && (!pv_alloc(pmap
, pai
, pve_p
))) {
6216 pve_set_ptep(*pve_p
, pte1_p
);
6217 (*pve_p
)->pve_next
= PV_ENTRY_NULL
;
6219 if (IS_ALTACCT_PAGE(pai
, PV_ENTRY_NULL
)) {
6221 * transfer "altacct" from
6222 * pp_attr to this pve
6224 CLR_ALTACCT_PAGE(pai
, PV_ENTRY_NULL
);
6225 SET_ALTACCT_PAGE(pai
, *pve_p
);
6227 pvh_update_head(pv_h
, *pve_p
, PVH_TYPE_PVEP
);
6228 *pve_p
= PV_ENTRY_NULL
;
6229 } else if (!pvh_test_type(pv_h
, PVH_TYPE_PVEP
)) {
6230 panic("%s: unexpected PV head %p, pte_p=%p pmap=%p pv_h=%p",
6231 __func__
, *pv_h
, pte_p
, pmap
, pv_h
);
6234 * Set up pv_entry for this new mapping and then
6235 * add it to the list for this physical page.
6237 pvh_set_flags(pv_h
, pvh_flags
);
6238 if ((*pve_p
== PV_ENTRY_NULL
) && (!pv_alloc(pmap
, pai
, pve_p
))) {
6242 pve_set_ptep(*pve_p
, pte_p
);
6243 (*pve_p
)->pve_next
= PV_ENTRY_NULL
;
6245 pvh_add(pv_h
, *pve_p
);
6247 if (pmap
!= NULL
&& pmap
!= kernel_pmap
&&
6248 ((options
& PMAP_OPTIONS_ALT_ACCT
) ||
6249 PMAP_FOOTPRINT_SUSPENDED(pmap
)) &&
6250 IS_INTERNAL_PAGE(pai
)) {
6252 * Make a note to ourselves that this
6253 * mapping is using alternative
6254 * accounting. We'll need this in order
6255 * to know which ledger to debit when
6256 * the mapping is removed.
6258 * The altacct bit must be set while
6259 * the pv head is locked. Defer the
6260 * ledger accounting until after we've
6263 SET_ALTACCT_PAGE(pai
, *pve_p
);
6267 *pve_p
= PV_ENTRY_NULL
;
6270 pvh_set_flags(pv_h
, pvh_flags
);
6275 MARK_AS_PMAP_TEXT
static kern_return_t
6276 pmap_enter_options_internal(
6281 vm_prot_t fault_type
,
6284 unsigned int options
)
6286 pmap_paddr_t pa
= ptoa(pn
);
6292 boolean_t set_XO
= FALSE
;
6293 boolean_t refcnt_updated
;
6294 boolean_t wiredcnt_updated
;
6295 unsigned int wimg_bits
;
6296 boolean_t was_compressed
, was_alt_compressed
;
6297 kern_return_t kr
= KERN_SUCCESS
;
6299 VALIDATE_PMAP(pmap
);
6301 __unused
const pt_attr_t
* const pt_attr
= pmap_get_pt_attr(pmap
);
6303 if ((v
) & PAGE_MASK
) {
6304 panic("pmap_enter_options() pmap %p v 0x%llx\n",
6308 if ((prot
& VM_PROT_EXECUTE
) && (prot
& VM_PROT_WRITE
) && (pmap
== kernel_pmap
)) {
6309 panic("pmap_enter_options(): WX request on kernel_pmap");
6312 #if DEVELOPMENT || DEBUG
6313 if ((prot
& VM_PROT_EXECUTE
) || !nx_enabled
|| !pmap
->nx_enabled
)
6315 if ((prot
& VM_PROT_EXECUTE
))
6317 { set_NX
= FALSE
;} else {
6321 #if (__ARM_VMSA__ > 7)
6322 if (prot
== VM_PROT_EXECUTE
) {
6327 assert(pn
!= vm_page_fictitious_addr
);
6329 refcnt_updated
= FALSE
;
6330 wiredcnt_updated
= FALSE
;
6331 pve_p
= PV_ENTRY_NULL
;
6332 was_compressed
= FALSE
;
6333 was_alt_compressed
= FALSE
;
6338 * Expand pmap to include this pte. Assume that
6339 * pmap is always expanded to include enough hardware
6340 * pages to map one VM page.
6342 while ((pte_p
= pmap_pte(pmap
, v
)) == PT_ENTRY_NULL
) {
6343 /* Must unlock to expand the pmap. */
6346 kr
= pmap_expand(pmap
, v
, options
, PMAP_TT_MAX_LEVEL
);
6348 if (kr
!= KERN_SUCCESS
) {
6355 if (options
& PMAP_OPTIONS_NOENTER
) {
6357 return KERN_SUCCESS
;
6364 if (ARM_PTE_IS_COMPRESSED(spte
, pte_p
)) {
6366 * "pmap" should be locked at this point, so this should
6367 * not race with another pmap_enter() or pmap_remove_range().
6369 assert(pmap
!= kernel_pmap
);
6371 /* one less "compressed" */
6372 OSAddAtomic64(-1, &pmap
->stats
.compressed
);
6373 pmap_ledger_debit(pmap
, task_ledgers
.internal_compressed
,
6376 was_compressed
= TRUE
;
6377 if (spte
& ARM_PTE_COMPRESSED_ALT
) {
6378 was_alt_compressed
= TRUE
;
6379 pmap_ledger_debit(pmap
, task_ledgers
.alternate_accounting_compressed
, PAGE_SIZE
);
6381 /* was part of the footprint */
6382 pmap_ledger_debit(pmap
, task_ledgers
.phys_footprint
, PAGE_SIZE
);
6385 /* clear "compressed" marker */
6386 /* XXX is it necessary since we're about to overwrite it ? */
6387 WRITE_PTE_FAST(pte_p
, ARM_PTE_TYPE_FAULT
);
6388 spte
= ARM_PTE_TYPE_FAULT
;
6391 * We're replacing a "compressed" marker with a valid PTE,
6392 * so no change for "refcnt".
6394 refcnt_updated
= TRUE
;
6397 if ((spte
!= ARM_PTE_TYPE_FAULT
) && (pte_to_pa(spte
) != pa
)) {
6398 pmap_remove_range(pmap
, v
, pte_p
, pte_p
+ 1, 0);
6401 pte
= pa_to_pte(pa
) | ARM_PTE_TYPE
;
6403 /* Don't bother tracking wiring for kernel PTEs. We use ARM_PTE_WIRED to track
6404 * wired memory statistics for user pmaps, but kernel PTEs are assumed
6405 * to be wired in nearly all cases. For VM layer functionality, the wired
6406 * count in vm_page_t is sufficient. */
6407 if (wired
&& pmap
!= kernel_pmap
) {
6408 pte
|= ARM_PTE_WIRED
;
6412 pte
|= pt_attr_leaf_xn(pt_attr
);
6414 #if (__ARM_VMSA__ > 7)
6415 if (pmap
== kernel_pmap
) {
6418 pte
|= pt_attr_leaf_x(pt_attr
);
6423 if (pmap
== kernel_pmap
) {
6424 #if __ARM_KERNEL_PROTECT__
6426 #endif /* __ARM_KERNEL_PROTECT__ */
6427 if (prot
& VM_PROT_WRITE
) {
6428 pte
|= ARM_PTE_AP(AP_RWNA
);
6429 pa_set_bits(pa
, PP_ATTR_MODIFIED
| PP_ATTR_REFERENCED
);
6431 pte
|= ARM_PTE_AP(AP_RONA
);
6432 pa_set_bits(pa
, PP_ATTR_REFERENCED
);
6434 #if (__ARM_VMSA__ == 7)
6435 if ((_COMM_PAGE_BASE_ADDRESS
<= v
) && (v
< _COMM_PAGE_BASE_ADDRESS
+ _COMM_PAGE_AREA_LENGTH
)) {
6436 pte
= (pte
& ~(ARM_PTE_APMASK
)) | ARM_PTE_AP(AP_RORO
);
6440 if (!pmap
->nested
) {
6442 } else if ((pmap
->nested_region_asid_bitmap
)
6443 && (v
>= pmap
->nested_region_subord_addr
)
6444 && (v
< (pmap
->nested_region_subord_addr
+ pmap
->nested_region_size
))) {
6445 unsigned int index
= (unsigned int)((v
- pmap
->nested_region_subord_addr
) >> pt_attr_twig_shift(pt_attr
));
6447 if ((pmap
->nested_region_asid_bitmap
)
6448 && testbit(index
, (int *)pmap
->nested_region_asid_bitmap
)) {
6453 if (pmap
->nested_pmap
!= NULL
) {
6454 vm_map_address_t nest_vaddr
;
6455 pt_entry_t
*nest_pte_p
;
6457 nest_vaddr
= v
- pmap
->nested_region_grand_addr
+ pmap
->nested_region_subord_addr
;
6459 if ((nest_vaddr
>= pmap
->nested_region_subord_addr
)
6460 && (nest_vaddr
< (pmap
->nested_region_subord_addr
+ pmap
->nested_region_size
))
6461 && ((nest_pte_p
= pmap_pte(pmap
->nested_pmap
, nest_vaddr
)) != PT_ENTRY_NULL
)
6462 && (*nest_pte_p
!= ARM_PTE_TYPE_FAULT
)
6463 && (!ARM_PTE_IS_COMPRESSED(*nest_pte_p
, nest_pte_p
))
6464 && (((*nest_pte_p
) & ARM_PTE_NG
) != ARM_PTE_NG
)) {
6465 unsigned int index
= (unsigned int)((v
- pmap
->nested_region_subord_addr
) >> pt_attr_twig_shift(pt_attr
));
6467 if ((pmap
->nested_pmap
->nested_region_asid_bitmap
)
6468 && !testbit(index
, (int *)pmap
->nested_pmap
->nested_region_asid_bitmap
)) {
6469 panic("pmap_enter(): Global attribute conflict nest_pte_p=%p pmap=%p v=0x%llx spte=0x%llx \n",
6470 nest_pte_p
, pmap
, (uint64_t)v
, (uint64_t)*nest_pte_p
);
6475 if (prot
& VM_PROT_WRITE
) {
6476 if (pa_valid(pa
) && (!pa_test_bits(pa
, PP_ATTR_MODIFIED
))) {
6477 if (fault_type
& VM_PROT_WRITE
) {
6479 pte
|= pt_attr_leaf_rwna(pt_attr
);
6481 pte
|= pt_attr_leaf_rw(pt_attr
);
6483 pa_set_bits(pa
, PP_ATTR_REFERENCED
| PP_ATTR_MODIFIED
);
6486 pte
|= pt_attr_leaf_rona(pt_attr
);
6488 pte
|= pt_attr_leaf_ro(pt_attr
);
6490 pa_set_bits(pa
, PP_ATTR_REFERENCED
);
6491 pte_set_was_writeable(pte
, true);
6495 pte
|= pt_attr_leaf_rwna(pt_attr
);
6497 pte
|= pt_attr_leaf_rw(pt_attr
);
6499 pa_set_bits(pa
, PP_ATTR_REFERENCED
);
6503 pte
|= pt_attr_leaf_rona(pt_attr
);
6505 pte
|= pt_attr_leaf_ro(pt_attr
);;
6507 pa_set_bits(pa
, PP_ATTR_REFERENCED
);
6513 volatile uint16_t *refcnt
= NULL
;
6514 volatile uint16_t *wiredcnt
= NULL
;
6515 if (pmap
!= kernel_pmap
) {
6516 refcnt
= &(ptep_get_ptd(pte_p
)->ptd_info
[ARM_PT_DESC_INDEX(pte_p
)].refcnt
);
6517 wiredcnt
= &(ptep_get_ptd(pte_p
)->ptd_info
[ARM_PT_DESC_INDEX(pte_p
)].wiredcnt
);
6518 /* Bump the wired count to keep the PTE page from being reclaimed. We need this because
6519 * we may drop the PVH and pmap locks later in pmap_enter() if we need to allocate
6520 * a new PV entry. */
6521 if (!wiredcnt_updated
) {
6522 OSAddAtomic16(1, (volatile int16_t*)wiredcnt
);
6523 wiredcnt_updated
= TRUE
;
6525 if (!refcnt_updated
) {
6526 OSAddAtomic16(1, (volatile int16_t*)refcnt
);
6527 refcnt_updated
= TRUE
;
6533 boolean_t is_altacct
, is_internal
;
6535 is_internal
= FALSE
;
6538 pai
= (int)pa_index(pa
);
6543 if ((flags
& (VM_WIMG_MASK
| VM_WIMG_USE_DEFAULT
))) {
6544 wimg_bits
= (flags
& (VM_WIMG_MASK
| VM_WIMG_USE_DEFAULT
));
6546 wimg_bits
= pmap_cache_attributes(pn
);
6549 /* We may be retrying this operation after dropping the PVH lock.
6550 * Cache attributes for the physical page may have changed while the lock
6551 * was dropped, so clear any cache attributes we may have previously set
6552 * in the PTE template. */
6553 pte
&= ~(ARM_PTE_ATTRINDXMASK
| ARM_PTE_SHMASK
);
6554 pte
|= pmap_get_pt_ops(pmap
)->wimg_to_pte(wimg_bits
);
6558 if (pte
== *pte_p
) {
6560 * This pmap_enter operation has been completed by another thread
6561 * undo refcnt on pt and return
6564 goto Pmap_enter_cleanup
;
6565 } else if (pte_to_pa(*pte_p
) == pa
) {
6566 pmap_enter_pte(pmap
, pte_p
, pte
, v
);
6568 goto Pmap_enter_cleanup
;
6569 } else if (*pte_p
!= ARM_PTE_TYPE_FAULT
) {
6571 * pte has been modified by another thread
6572 * hold refcnt on pt and retry pmap_enter operation
6575 goto Pmap_enter_retry
;
6577 if (!pmap_enter_pv(pmap
, pte_p
, pai
, options
, &pve_p
, &is_altacct
)) {
6578 goto Pmap_enter_loop
;
6581 pmap_enter_pte(pmap
, pte_p
, pte
, v
);
6583 if (pmap
!= kernel_pmap
) {
6584 if (IS_REUSABLE_PAGE(pai
) &&
6586 assert(IS_INTERNAL_PAGE(pai
));
6587 OSAddAtomic(+1, &pmap
->stats
.reusable
);
6588 PMAP_STATS_PEAK(pmap
->stats
.reusable
);
6589 } else if (IS_INTERNAL_PAGE(pai
)) {
6590 OSAddAtomic(+1, &pmap
->stats
.internal
);
6591 PMAP_STATS_PEAK(pmap
->stats
.internal
);
6594 OSAddAtomic(+1, &pmap
->stats
.external
);
6595 PMAP_STATS_PEAK(pmap
->stats
.external
);
6601 if (pmap
!= kernel_pmap
) {
6602 pmap_ledger_credit(pmap
, task_ledgers
.phys_mem
, PAGE_SIZE
);
6606 * Make corresponding adjustments to
6607 * phys_footprint statistics.
6609 pmap_ledger_credit(pmap
, task_ledgers
.internal
, PAGE_SIZE
);
6612 * If this page is internal and
6613 * in an IOKit region, credit
6614 * the task's total count of
6615 * dirty, internal IOKit pages.
6616 * It should *not* count towards
6617 * the task's total physical
6618 * memory footprint, because
6619 * this entire region was
6620 * already billed to the task
6621 * at the time the mapping was
6624 * Put another way, this is
6626 * alternate_accounting++, so
6627 * net effect on phys_footprint
6628 * is 0. That means: don't
6629 * touch phys_footprint here.
6631 pmap_ledger_credit(pmap
, task_ledgers
.alternate_accounting
, PAGE_SIZE
);
6633 pmap_ledger_credit(pmap
, task_ledgers
.phys_footprint
, PAGE_SIZE
);
6638 OSAddAtomic(1, (SInt32
*) &pmap
->stats
.resident_count
);
6639 if (pmap
->stats
.resident_count
> pmap
->stats
.resident_max
) {
6640 pmap
->stats
.resident_max
= pmap
->stats
.resident_count
;
6643 if (prot
& VM_PROT_EXECUTE
) {
6645 goto Pmap_enter_cleanup
;
6648 wimg_bits
= pmap_cache_attributes(pn
);
6649 if ((flags
& (VM_WIMG_MASK
| VM_WIMG_USE_DEFAULT
))) {
6650 wimg_bits
= (wimg_bits
& (~VM_WIMG_MASK
)) | (flags
& (VM_WIMG_MASK
| VM_WIMG_USE_DEFAULT
));
6653 pte
|= pmap_get_pt_ops(pmap
)->wimg_to_pte(wimg_bits
);
6655 pmap_enter_pte(pmap
, pte_p
, pte
, v
);
6658 goto Pmap_enter_return
;
6662 if (refcnt
!= NULL
) {
6663 assert(refcnt_updated
);
6664 if (OSAddAtomic16(-1, (volatile int16_t*)refcnt
) <= 0) {
6665 panic("pmap_enter(): over-release of ptdp %p for pte %p\n", ptep_get_ptd(pte_p
), pte_p
);
6672 if (pgtrace_enabled
) {
6673 // Clone and invalidate original mapping if eligible
6674 for (int i
= 0; i
< PAGE_RATIO
; i
++) {
6675 pmap_pgtrace_enter_clone(pmap
, v
+ ARM_PGBYTES
* i
, 0, 0);
6680 if (pve_p
!= PV_ENTRY_NULL
) {
6684 if (wiredcnt_updated
&& (OSAddAtomic16(-1, (volatile int16_t*)wiredcnt
) <= 0)) {
6685 panic("pmap_enter(): over-unwire of ptdp %p for pte %p\n", ptep_get_ptd(pte_p
), pte_p
);
6699 vm_prot_t fault_type
,
6702 unsigned int options
,
6705 kern_return_t kr
= KERN_FAILURE
;
6707 PMAP_TRACE(2, PMAP_CODE(PMAP__ENTER
) | DBG_FUNC_START
,
6708 VM_KERNEL_ADDRHIDE(pmap
), VM_KERNEL_ADDRHIDE(v
), pn
, prot
);
6710 kr
= pmap_enter_options_internal(pmap
, v
, pn
, prot
, fault_type
, flags
, wired
, options
);
6711 pv_water_mark_check();
6713 PMAP_TRACE(2, PMAP_CODE(PMAP__ENTER
) | DBG_FUNC_END
, kr
);
6719 * Routine: pmap_change_wiring
6720 * Function: Change the wiring attribute for a map/virtual-address
6722 * In/out conditions:
6723 * The mapping must already exist in the pmap.
6725 MARK_AS_PMAP_TEXT
static void
6726 pmap_change_wiring_internal(
6734 /* Don't bother tracking wiring for kernel PTEs. We use ARM_PTE_WIRED to track
6735 * wired memory statistics for user pmaps, but kernel PTEs are assumed
6736 * to be wired in nearly all cases. For VM layer functionality, the wired
6737 * count in vm_page_t is sufficient. */
6738 if (pmap
== kernel_pmap
) {
6741 VALIDATE_USER_PMAP(pmap
);
6744 pte_p
= pmap_pte(pmap
, v
);
6745 assert(pte_p
!= PT_ENTRY_NULL
);
6746 pa
= pte_to_pa(*pte_p
);
6748 while (pa_valid(pa
)) {
6749 pmap_paddr_t new_pa
;
6751 LOCK_PVH((int)pa_index(pa
));
6752 new_pa
= pte_to_pa(*pte_p
);
6758 UNLOCK_PVH((int)pa_index(pa
));
6762 if (wired
&& !pte_is_wired(*pte_p
)) {
6763 pte_set_wired(pte_p
, wired
);
6764 OSAddAtomic(+1, (SInt32
*) &pmap
->stats
.wired_count
);
6765 pmap_ledger_credit(pmap
, task_ledgers
.wired_mem
, PAGE_SIZE
);
6766 } else if (!wired
&& pte_is_wired(*pte_p
)) {
6767 PMAP_STATS_ASSERTF(pmap
->stats
.wired_count
>= 1, pmap
, "stats.wired_count %d", pmap
->stats
.wired_count
);
6768 pte_set_wired(pte_p
, wired
);
6769 OSAddAtomic(-1, (SInt32
*) &pmap
->stats
.wired_count
);
6770 pmap_ledger_debit(pmap
, task_ledgers
.wired_mem
, PAGE_SIZE
);
6774 UNLOCK_PVH((int)pa_index(pa
));
6786 pmap_change_wiring_internal(pmap
, v
, wired
);
6789 MARK_AS_PMAP_TEXT
static ppnum_t
6790 pmap_find_phys_internal(
6796 VALIDATE_PMAP(pmap
);
6798 if (pmap
!= kernel_pmap
) {
6802 ppn
= pmap_vtophys(pmap
, va
);
6804 if (pmap
!= kernel_pmap
) {
6816 pmap_paddr_t pa
= 0;
6818 if (pmap
== kernel_pmap
) {
6820 } else if ((current_thread()->map
) && (pmap
== vm_map_pmap(current_thread()->map
))) {
6825 return (ppnum_t
)(pa
>> PAGE_SHIFT
);
6829 return pmap_find_phys_internal(pmap
, va
);
6831 return pmap_vtophys(pmap
, va
);
6845 pa
= ((pmap_paddr_t
)pmap_vtophys(kernel_pmap
, va
)) << PAGE_SHIFT
;
6847 pa
|= (va
& PAGE_MASK
);
6850 return (pmap_paddr_t
)pa
;
6858 if ((va
< pmap
->min
) || (va
>= pmap
->max
)) {
6862 #if (__ARM_VMSA__ == 7)
6863 tt_entry_t
*tte_p
, tte
;
6867 tte_p
= pmap_tte(pmap
, va
);
6868 if (tte_p
== (tt_entry_t
*) NULL
) {
6873 if ((tte
& ARM_TTE_TYPE_MASK
) == ARM_TTE_TYPE_TABLE
) {
6874 pte_p
= (pt_entry_t
*) ttetokv(tte
) + ptenum(va
);
6875 ppn
= (ppnum_t
) atop(pte_to_pa(*pte_p
) | (va
& ARM_PGMASK
));
6876 #if DEVELOPMENT || DEBUG
6878 ARM_PTE_IS_COMPRESSED(*pte_p
, pte_p
)) {
6879 panic("pmap_vtophys(%p,0x%llx): compressed pte_p=%p 0x%llx with ppn=0x%x\n",
6880 pmap
, va
, pte_p
, (uint64_t) (*pte_p
), ppn
);
6882 #endif /* DEVELOPMENT || DEBUG */
6883 } else if ((tte
& ARM_TTE_TYPE_MASK
) == ARM_TTE_TYPE_BLOCK
) {
6884 if ((tte
& ARM_TTE_BLOCK_SUPER
) == ARM_TTE_BLOCK_SUPER
) {
6885 ppn
= (ppnum_t
) atop(suptte_to_pa(tte
) | (va
& ARM_TT_L1_SUPER_OFFMASK
));
6887 ppn
= (ppnum_t
) atop(sectte_to_pa(tte
) | (va
& ARM_TT_L1_BLOCK_OFFMASK
));
6897 __unused
const pt_attr_t
* const pt_attr
= pmap_get_pt_attr(pmap
);
6899 /* Level 0 currently unused */
6901 /* Get first-level (1GB) entry */
6902 ttp
= pmap_tt1e(pmap
, va
);
6904 if ((tte
& (ARM_TTE_TYPE_MASK
| ARM_TTE_VALID
)) != (ARM_TTE_TYPE_TABLE
| ARM_TTE_VALID
)) {
6908 tte
= ((tt_entry_t
*) phystokv(tte
& ARM_TTE_TABLE_MASK
))[tt2_index(pmap
, pt_attr
, va
)];
6910 if ((tte
& ARM_TTE_VALID
) != (ARM_TTE_VALID
)) {
6914 if ((tte
& ARM_TTE_TYPE_MASK
) == ARM_TTE_TYPE_BLOCK
) {
6915 ppn
= (ppnum_t
) atop((tte
& ARM_TTE_BLOCK_L2_MASK
) | (va
& ARM_TT_L2_OFFMASK
));
6918 tte
= ((tt_entry_t
*) phystokv(tte
& ARM_TTE_TABLE_MASK
))[tt3_index(pmap
, pt_attr
, va
)];
6919 ppn
= (ppnum_t
) atop((tte
& ARM_PTE_MASK
) | (va
& ARM_TT_L3_OFFMASK
));
6925 MARK_AS_PMAP_TEXT
static vm_offset_t
6926 pmap_extract_internal(
6928 vm_map_address_t va
)
6930 pmap_paddr_t pa
= 0;
6937 VALIDATE_PMAP(pmap
);
6941 ppn
= pmap_vtophys(pmap
, va
);
6944 pa
= ptoa(ppn
) | ((va
) & PAGE_MASK
);
6953 * Routine: pmap_extract
6955 * Extract the physical page address associated
6956 * with the given map/virtual_address pair.
6962 vm_map_address_t va
)
6964 pmap_paddr_t pa
= 0;
6966 if (pmap
== kernel_pmap
) {
6968 } else if (pmap
== vm_map_pmap(current_thread()->map
)) {
6976 return pmap_extract_internal(pmap
, va
);
6980 * pmap_init_pte_page - Initialize a page table page.
6987 unsigned int ttlevel
,
6988 boolean_t alloc_ptd
,
6991 pt_desc_t
*ptdp
= NULL
;
6994 pvh
= (vm_offset_t
*)(pai_to_pvh(pa_index(ml_static_vtop((vm_offset_t
)pte_p
))));
6996 if (pvh_test_type(pvh
, PVH_TYPE_NULL
)) {
6999 * This path should only be invoked from arm_vm_init. If we are emulating 16KB pages
7000 * on 4KB hardware, we may already have allocated a page table descriptor for a
7001 * bootstrap request, so we check for an existing PTD here.
7003 ptdp
= ptd_alloc(pmap
, true);
7004 pvh_update_head_unlocked(pvh
, ptdp
, PVH_TYPE_PTDP
);
7006 panic("pmap_init_pte_page(): pte_p %p", pte_p
);
7008 } else if (pvh_test_type(pvh
, PVH_TYPE_PTDP
)) {
7009 ptdp
= (pt_desc_t
*)(pvh_list(pvh
));
7011 panic("pmap_init_pte_page(): invalid PVH type for pte_p %p", pte_p
);
7015 bzero(pte_p
, ARM_PGBYTES
);
7016 // below barrier ensures the page zeroing is visible to PTW before
7017 // it is linked to the PTE of previous level
7018 __builtin_arm_dmb(DMB_ISHST
);
7020 ptd_init(ptdp
, pmap
, va
, ttlevel
, pte_p
);
7024 * Routine: pmap_expand
7026 * Expands a pmap to be able to map the specified virtual address.
7028 * Allocates new memory for the default (COARSE) translation table
7029 * entry, initializes all the pte entries to ARM_PTE_TYPE_FAULT and
7030 * also allocates space for the corresponding pv entries.
7032 * Nothing should be locked.
7034 static kern_return_t
7038 unsigned int options
,
7041 __unused
const pt_attr_t
* const pt_attr
= pmap_get_pt_attr(pmap
);
7043 #if (__ARM_VMSA__ == 7)
7049 while (tte_index(pmap
, pt_attr
, v
) >= pmap
->tte_index_max
) {
7050 tte_p
= pmap_tt1_allocate(pmap
, 2 * ARM_PGBYTES
, ((options
& PMAP_OPTIONS_NOWAIT
)? PMAP_TT_ALLOCATE_NOWAIT
: 0));
7051 if (tte_p
== (tt_entry_t
*)0) {
7052 return KERN_RESOURCE_SHORTAGE
;
7056 if (pmap
->tte_index_max
> NTTES
) {
7057 pmap_tt1_deallocate(pmap
, tte_p
, 2 * ARM_PGBYTES
, PMAP_TT_DEALLOCATE_NOBLOCK
);
7062 pmap_simple_lock(&pmap
->tt1_lock
);
7063 for (i
= 0; i
< pmap
->tte_index_max
; i
++) {
7064 tte_p
[i
] = pmap
->tte
[i
];
7066 for (i
= NTTES
; i
< 2 * NTTES
; i
++) {
7067 tte_p
[i
] = ARM_TTE_TYPE_FAULT
;
7070 FLUSH_PTE_RANGE(tte_p
, tte_p
+ (2 * NTTES
)); // DMB
7072 /* Order is important here, so that pmap_switch_user_ttb() sees things
7073 * in the correct sequence.
7074 * --update of pmap->tte[p] must happen prior to updating pmap->tte_index_max,
7075 * separated by at least a DMB, so that context switch does not see a 1 GB
7076 * L1 table with a 2GB size.
7077 * --update of pmap->tte[p] must also happen prior to setting pmap->prev_tte,
7078 * separated by at least a DMB, so that context switch does not see an L1
7079 * table to be freed without also seeing its replacement.*/
7081 tt_entry_t
*prev_tte
= pmap
->tte
;
7084 pmap
->ttep
= ml_static_vtop((vm_offset_t
)pmap
->tte
);
7086 __builtin_arm_dmb(DMB_ISH
);
7088 pmap
->tte_index_max
= 2 * NTTES
;
7089 pmap
->stamp
= os_atomic_inc(&pmap_stamp
, relaxed
);
7091 for (i
= 0; i
< NTTES
; i
++) {
7092 prev_tte
[i
] = ARM_TTE_TYPE_FAULT
;
7095 /* We need a strong flush here because a TLB flush will be
7096 * issued from pmap_switch_user_ttb() as soon as this pmap
7097 * is no longer active on any CPU. We need to ensure all
7098 * prior stores to the TTE region have retired before that. */
7099 FLUSH_PTE_RANGE_STRONG(prev_tte
, prev_tte
+ NTTES
); // DSB
7100 pmap
->prev_tte
= prev_tte
;
7102 pmap_simple_unlock(&pmap
->tt1_lock
);
7104 if (current_pmap() == pmap
) {
7105 pmap_set_pmap(pmap
, current_thread());
7110 return KERN_SUCCESS
;
7114 tt_entry_t
*tte_next_p
;
7118 if (pmap_pte(pmap
, v
) != PT_ENTRY_NULL
) {
7120 return KERN_SUCCESS
;
7122 tte_p
= &pmap
->tte
[ttenum(v
& ~ARM_TT_L1_PT_OFFMASK
)];
7123 for (i
= 0, tte_next_p
= tte_p
; i
< 4; i
++) {
7124 if (tte_to_pa(*tte_next_p
)) {
7125 pa
= tte_to_pa(*tte_next_p
);
7130 pa
= pa
& ~PAGE_MASK
;
7132 tte_p
= &pmap
->tte
[ttenum(v
)];
7133 *tte_p
= pa_to_tte(pa
) | (((v
>> ARM_TT_L1_SHIFT
) & 0x3) << 10) | ARM_TTE_TYPE_TABLE
;
7135 PMAP_TRACE(3, PMAP_CODE(PMAP__TTE
), VM_KERNEL_ADDRHIDE(pmap
), VM_KERNEL_ADDRHIDE(v
& ~ARM_TT_L1_OFFMASK
),
7136 VM_KERNEL_ADDRHIDE((v
& ~ARM_TT_L1_OFFMASK
) + ARM_TT_L1_SIZE
), *tte_p
);
7138 return KERN_SUCCESS
;
7142 v
= v
& ~ARM_TT_L1_PT_OFFMASK
;
7145 while (pmap_pte(pmap
, v
) == PT_ENTRY_NULL
) {
7147 * Allocate a VM page for the level 2 page table entries.
7149 while (pmap_tt_allocate(pmap
, &tt_p
, PMAP_TT_L2_LEVEL
, ((options
& PMAP_TT_ALLOCATE_NOWAIT
)? PMAP_PAGES_ALLOCATE_NOWAIT
: 0)) != KERN_SUCCESS
) {
7150 if (options
& PMAP_OPTIONS_NOWAIT
) {
7151 return KERN_RESOURCE_SHORTAGE
;
7158 * See if someone else expanded us first
7160 if (pmap_pte(pmap
, v
) == PT_ENTRY_NULL
) {
7161 tt_entry_t
*tte_next_p
;
7163 pmap_init_pte_page(pmap
, (pt_entry_t
*) tt_p
, v
, PMAP_TT_L2_LEVEL
, FALSE
, TRUE
);
7164 pa
= kvtophys((vm_offset_t
)tt_p
);
7165 tte_p
= &pmap
->tte
[ttenum(v
)];
7166 for (i
= 0, tte_next_p
= tte_p
; i
< 4; i
++) {
7167 *tte_next_p
= pa_to_tte(pa
) | ARM_TTE_TYPE_TABLE
;
7168 PMAP_TRACE(3, PMAP_CODE(PMAP__TTE
), VM_KERNEL_ADDRHIDE(pmap
), VM_KERNEL_ADDRHIDE((v
& ~ARM_TT_L1_PT_OFFMASK
) + (i
* ARM_TT_L1_SIZE
)),
7169 VM_KERNEL_ADDRHIDE((v
& ~ARM_TT_L1_PT_OFFMASK
) + ((i
+ 1) * ARM_TT_L1_SIZE
)), *tte_p
);
7173 FLUSH_PTE_RANGE(tte_p
, tte_p
+ 4);
7176 tt_p
= (tt_entry_t
*)NULL
;
7179 if (tt_p
!= (tt_entry_t
*)NULL
) {
7180 pmap_tt_deallocate(pmap
, tt_p
, PMAP_TT_L2_LEVEL
);
7181 tt_p
= (tt_entry_t
*)NULL
;
7184 return KERN_SUCCESS
;
7187 unsigned int ttlevel
= pt_attr_root_level(pt_attr
);
7192 tt_p
= (tt_entry_t
*)NULL
;
7194 for (; ttlevel
< level
; ttlevel
++) {
7197 if (pmap_ttne(pmap
, ttlevel
+ 1, v
) == PT_ENTRY_NULL
) {
7199 while (pmap_tt_allocate(pmap
, &tt_p
, ttlevel
+ 1, ((options
& PMAP_TT_ALLOCATE_NOWAIT
)? PMAP_PAGES_ALLOCATE_NOWAIT
: 0)) != KERN_SUCCESS
) {
7200 if (options
& PMAP_OPTIONS_NOWAIT
) {
7201 return KERN_RESOURCE_SHORTAGE
;
7206 if ((pmap_ttne(pmap
, ttlevel
+ 1, v
) == PT_ENTRY_NULL
)) {
7207 pmap_init_pte_page(pmap
, (pt_entry_t
*) tt_p
, v
, ttlevel
+ 1, FALSE
, TRUE
);
7208 pa
= kvtophys((vm_offset_t
)tt_p
);
7209 tte_p
= pmap_ttne(pmap
, ttlevel
, v
);
7210 *tte_p
= (pa
& ARM_TTE_TABLE_MASK
) | ARM_TTE_TYPE_TABLE
| ARM_TTE_VALID
;
7211 PMAP_TRACE(ttlevel
+ 1, PMAP_CODE(PMAP__TTE
), VM_KERNEL_ADDRHIDE(pmap
), VM_KERNEL_ADDRHIDE(v
& ~pt_attr_ln_offmask(pt_attr
, ttlevel
)),
7212 VM_KERNEL_ADDRHIDE((v
& ~pt_attr_ln_offmask(pt_attr
, ttlevel
)) + pt_attr_ln_size(pt_attr
, ttlevel
)), *tte_p
);
7214 tt_p
= (tt_entry_t
*)NULL
;
7220 if (tt_p
!= (tt_entry_t
*)NULL
) {
7221 pmap_tt_deallocate(pmap
, tt_p
, ttlevel
+ 1);
7222 tt_p
= (tt_entry_t
*)NULL
;
7226 return KERN_SUCCESS
;
7231 * Routine: pmap_collect
7233 * Garbage collects the physical map system for
7234 * pages which are no longer used.
7235 * Success need not be guaranteed -- that is, there
7236 * may well be pages which are not referenced, but
7237 * others may be collected.
7240 pmap_collect(pmap_t pmap
)
7242 if (pmap
== PMAP_NULL
) {
7248 if ((pmap
->nested
== FALSE
) && (pmap
!= kernel_pmap
)) {
7249 /* TODO: Scan for vm page assigned to top level page tables with no reference */
7260 * Pmap garbage collection
7261 * Called by the pageout daemon when pages are scarce.
7268 pmap_t pmap
, pmap_next
;
7271 if (pmap_gc_allowed
&&
7272 (pmap_gc_allowed_by_time_throttle
||
7274 pmap_gc_forced
= FALSE
;
7275 pmap_gc_allowed_by_time_throttle
= FALSE
;
7276 pmap_simple_lock(&pmaps_lock
);
7277 pmap
= CAST_DOWN_EXPLICIT(pmap_t
, queue_first(&map_pmap_list
));
7278 while (!queue_end(&map_pmap_list
, (queue_entry_t
)pmap
)) {
7279 if (!(pmap
->gc_status
& PMAP_GC_INFLIGHT
)) {
7280 pmap
->gc_status
|= PMAP_GC_INFLIGHT
;
7282 pmap_simple_unlock(&pmaps_lock
);
7286 pmap_simple_lock(&pmaps_lock
);
7287 gc_wait
= (pmap
->gc_status
& PMAP_GC_WAIT
);
7288 pmap
->gc_status
&= ~(PMAP_GC_INFLIGHT
| PMAP_GC_WAIT
);
7289 pmap_next
= CAST_DOWN_EXPLICIT(pmap_t
, queue_next(&pmap
->pmaps
));
7291 if (!queue_end(&map_pmap_list
, (queue_entry_t
)pmap_next
)) {
7292 pmap_next
->gc_status
|= PMAP_GC_INFLIGHT
;
7294 pmap_simple_unlock(&pmaps_lock
);
7295 thread_wakeup((event_t
) &pmap
->gc_status
);
7296 pmap_simple_lock(&pmaps_lock
);
7300 pmap_simple_unlock(&pmaps_lock
);
7305 * Called by the VM to reclaim pages that we can reclaim quickly and cheaply.
7308 pmap_release_pages_fast(void)
7314 * By default, don't attempt pmap GC more frequently
7315 * than once / 1 minutes.
7319 compute_pmap_gc_throttle(
7322 pmap_gc_allowed_by_time_throttle
= TRUE
;
7326 * pmap_attribute_cache_sync(vm_offset_t pa)
7328 * Invalidates all of the instruction cache on a physical page and
7329 * pushes any dirty data from the data cache for the same physical page
7333 pmap_attribute_cache_sync(
7336 __unused vm_machine_attribute_t attribute
,
7337 __unused vm_machine_attribute_val_t
* value
)
7339 if (size
> PAGE_SIZE
) {
7340 panic("pmap_attribute_cache_sync size: 0x%llx\n", (uint64_t)size
);
7342 cache_sync_page(pp
);
7345 return KERN_SUCCESS
;
7349 * pmap_sync_page_data_phys(ppnum_t pp)
7351 * Invalidates all of the instruction cache on a physical page and
7352 * pushes any dirty data from the data cache for the same physical page
7355 pmap_sync_page_data_phys(
7358 cache_sync_page(pp
);
7362 * pmap_sync_page_attributes_phys(ppnum_t pp)
7364 * Write back and invalidate all cachelines on a physical page.
7367 pmap_sync_page_attributes_phys(
7370 flush_dcache((vm_offset_t
) (pp
<< PAGE_SHIFT
), PAGE_SIZE
, TRUE
);
7374 /* temporary workaround */
7383 pte_p
= pmap_pte(map
->pmap
, va
);
7388 return (spte
& ARM_PTE_ATTRINDXMASK
) == ARM_PTE_ATTRINDX(CACHE_ATTRINDX_DEFAULT
);
7400 addr
= (unsigned int *) phystokv(ptoa(pn
));
7401 count
= PAGE_SIZE
/ sizeof(unsigned int);
7407 extern void mapping_set_mod(ppnum_t pn
);
7413 pmap_set_modify(pn
);
7416 extern void mapping_set_ref(ppnum_t pn
);
7422 pmap_set_reference(pn
);
7426 * Clear specified attribute bits.
7428 * Try to force an arm_fast_fault() for all mappings of
7429 * the page - to force attributes to be set again at fault time.
7430 * If the forcing succeeds, clear the cached bits at the head.
7431 * Otherwise, something must have been wired, so leave the cached
7434 MARK_AS_PMAP_TEXT
static void
7435 phys_attribute_clear_internal(
7441 pmap_paddr_t pa
= ptoa(pn
);
7442 vm_prot_t allow_mode
= VM_PROT_ALL
;
7445 if ((bits
& PP_ATTR_MODIFIED
) &&
7446 (options
& PMAP_OPTIONS_NOFLUSH
) &&
7448 panic("phys_attribute_clear(0x%x,0x%x,0x%x,%p): "
7449 "should not clear 'modified' without flushing TLBs\n",
7450 pn
, bits
, options
, arg
);
7453 assert(pn
!= vm_page_fictitious_addr
);
7455 if (options
& PMAP_OPTIONS_CLEAR_WRITE
) {
7456 assert(bits
== PP_ATTR_MODIFIED
);
7458 pmap_page_protect_options_internal(pn
, (VM_PROT_ALL
& ~VM_PROT_WRITE
), 0);
7460 * We short circuit this case; it should not need to
7461 * invoke arm_force_fast_fault, so just clear the modified bit.
7462 * pmap_page_protect has taken care of resetting
7463 * the state so that we'll see the next write as a fault to
7464 * the VM (i.e. we don't want a fast fault).
7466 pa_clear_bits(pa
, bits
);
7469 if (bits
& PP_ATTR_REFERENCED
) {
7470 allow_mode
&= ~(VM_PROT_READ
| VM_PROT_EXECUTE
);
7472 if (bits
& PP_ATTR_MODIFIED
) {
7473 allow_mode
&= ~VM_PROT_WRITE
;
7476 if (bits
== PP_ATTR_NOENCRYPT
) {
7478 * We short circuit this case; it should not need to
7479 * invoke arm_force_fast_fault, so just clear and
7480 * return. On ARM, this bit is just a debugging aid.
7482 pa_clear_bits(pa
, bits
);
7486 if (arm_force_fast_fault_internal(pn
, allow_mode
, options
)) {
7487 pa_clear_bits(pa
, bits
);
7493 phys_attribute_clear(
7500 * Do we really want this tracepoint? It will be extremely chatty.
7501 * Also, should we have a corresponding trace point for the set path?
7503 PMAP_TRACE(3, PMAP_CODE(PMAP__ATTRIBUTE_CLEAR
) | DBG_FUNC_START
, pn
, bits
);
7505 phys_attribute_clear_internal(pn
, bits
, options
, arg
);
7507 PMAP_TRACE(3, PMAP_CODE(PMAP__ATTRIBUTE_CLEAR
) | DBG_FUNC_END
);
7511 * Set specified attribute bits.
7513 * Set cached value in the pv head because we have
7514 * no per-mapping hardware support for referenced and
7517 MARK_AS_PMAP_TEXT
static void
7518 phys_attribute_set_internal(
7522 pmap_paddr_t pa
= ptoa(pn
);
7523 assert(pn
!= vm_page_fictitious_addr
);
7526 pa_set_bits(pa
, bits
);
7536 phys_attribute_set_internal(pn
, bits
);
7541 * Check specified attribute bits.
7543 * use the software cached bits (since no hw support).
7546 phys_attribute_test(
7550 pmap_paddr_t pa
= ptoa(pn
);
7551 assert(pn
!= vm_page_fictitious_addr
);
7552 return pa_test_bits(pa
, bits
);
7557 * Set the modify/reference bits on the specified physical page.
7560 pmap_set_modify(ppnum_t pn
)
7562 phys_attribute_set(pn
, PP_ATTR_MODIFIED
);
7567 * Clear the modify bits on the specified physical page.
7573 phys_attribute_clear(pn
, PP_ATTR_MODIFIED
, 0, NULL
);
7580 * Return whether or not the specified physical page is modified
7581 * by any physical maps.
7587 return phys_attribute_test(pn
, PP_ATTR_MODIFIED
);
7592 * Set the reference bit on the specified physical page.
7598 phys_attribute_set(pn
, PP_ATTR_REFERENCED
);
7602 * Clear the reference bits on the specified physical page.
7605 pmap_clear_reference(
7608 phys_attribute_clear(pn
, PP_ATTR_REFERENCED
, 0, NULL
);
7613 * pmap_is_referenced:
7615 * Return whether or not the specified physical page is referenced
7616 * by any physical maps.
7622 return phys_attribute_test(pn
, PP_ATTR_REFERENCED
);
7626 * pmap_get_refmod(phys)
7627 * returns the referenced and modified bits of the specified
7634 return ((phys_attribute_test(pn
, PP_ATTR_MODIFIED
)) ? VM_MEM_MODIFIED
: 0)
7635 | ((phys_attribute_test(pn
, PP_ATTR_REFERENCED
)) ? VM_MEM_REFERENCED
: 0);
7639 * pmap_clear_refmod(phys, mask)
7640 * clears the referenced and modified bits as specified by the mask
7641 * of the specified physical page.
7644 pmap_clear_refmod_options(
7647 unsigned int options
,
7652 bits
= ((mask
& VM_MEM_MODIFIED
) ? PP_ATTR_MODIFIED
: 0) |
7653 ((mask
& VM_MEM_REFERENCED
) ? PP_ATTR_REFERENCED
: 0);
7654 phys_attribute_clear(pn
, bits
, options
, arg
);
7662 pmap_clear_refmod_options(pn
, mask
, 0, NULL
);
7666 pmap_disconnect_options(
7668 unsigned int options
,
7671 if ((options
& PMAP_OPTIONS_COMPRESSOR_IFF_MODIFIED
)) {
7673 * On ARM, the "modified" bit is managed by software, so
7674 * we know up-front if the physical page is "modified",
7675 * without having to scan all the PTEs pointing to it.
7676 * The caller should have made the VM page "busy" so noone
7677 * should be able to establish any new mapping and "modify"
7678 * the page behind us.
7680 if (pmap_is_modified(pn
)) {
7682 * The page has been modified and will be sent to
7683 * the VM compressor.
7685 options
|= PMAP_OPTIONS_COMPRESSOR
;
7688 * The page hasn't been modified and will be freed
7689 * instead of compressed.
7694 /* disconnect the page */
7695 pmap_page_protect_options(pn
, 0, options
, arg
);
7697 /* return ref/chg status */
7698 return pmap_get_refmod(pn
);
7706 * Disconnect all mappings for this page and return reference and change status
7707 * in generic format.
7714 pmap_page_protect(pn
, 0); /* disconnect the page */
7715 return pmap_get_refmod(pn
); /* return ref/chg status */
7719 pmap_has_managed_page(ppnum_t first
, ppnum_t last
)
7721 if (ptoa(first
) >= vm_last_phys
) {
7724 if (ptoa(last
) < vm_first_phys
) {
7732 * The state maintained by the noencrypt functions is used as a
7733 * debugging aid on ARM. This incurs some overhead on the part
7734 * of the caller. A special case check in phys_attribute_clear
7735 * (the most expensive path) currently minimizes this overhead,
7736 * but stubbing these functions out on RELEASE kernels yields
7743 #if DEVELOPMENT || DEBUG
7744 boolean_t result
= FALSE
;
7746 if (!pa_valid(ptoa(pn
))) {
7750 result
= (phys_attribute_test(pn
, PP_ATTR_NOENCRYPT
));
7763 #if DEVELOPMENT || DEBUG
7764 if (!pa_valid(ptoa(pn
))) {
7768 phys_attribute_set(pn
, PP_ATTR_NOENCRYPT
);
7775 pmap_clear_noencrypt(
7778 #if DEVELOPMENT || DEBUG
7779 if (!pa_valid(ptoa(pn
))) {
7783 phys_attribute_clear(pn
, PP_ATTR_NOENCRYPT
, 0, NULL
);
7791 pmap_lock_phys_page(ppnum_t pn
)
7794 pmap_paddr_t phys
= ptoa(pn
);
7796 if (pa_valid(phys
)) {
7797 pai
= (int)pa_index(phys
);
7800 { simple_lock(&phys_backup_lock
, LCK_GRP_NULL
);}
7805 pmap_unlock_phys_page(ppnum_t pn
)
7808 pmap_paddr_t phys
= ptoa(pn
);
7810 if (pa_valid(phys
)) {
7811 pai
= (int)pa_index(phys
);
7814 { simple_unlock(&phys_backup_lock
);}
7817 MARK_AS_PMAP_TEXT
static void
7818 pmap_switch_user_ttb_internal(
7821 VALIDATE_PMAP(pmap
);
7822 pmap_cpu_data_t
*cpu_data_ptr
;
7823 cpu_data_ptr
= pmap_get_cpu_data();
7825 #if (__ARM_VMSA__ == 7)
7827 if ((cpu_data_ptr
->cpu_user_pmap
!= PMAP_NULL
)
7828 && (cpu_data_ptr
->cpu_user_pmap
!= kernel_pmap
)) {
7830 tt_entry_t
*tt_entry
= cpu_data_ptr
->cpu_user_pmap
->prev_tte
;
7832 c
= os_atomic_dec(&cpu_data_ptr
->cpu_user_pmap
->cpu_ref
, acq_rel
);
7833 if ((c
== 0) && (tt_entry
!= NULL
)) {
7834 /* We saved off the old 1-page tt1 in pmap_expand() in case other cores were still using it.
7835 * Now that the user pmap's cpu_ref is 0, we should be able to safely free it.*/
7837 cpu_data_ptr
->cpu_user_pmap
->prev_tte
= NULL
;
7838 #if !__ARM_USER_PROTECT__
7839 set_mmu_ttb(kernel_pmap
->ttep
);
7840 set_context_id(kernel_pmap
->hw_asid
);
7842 /* Now that we can guarantee the old 1-page L1 table is no longer active on any CPU,
7843 * flush any cached intermediate translations that may point to it. Note that to be truly
7844 * safe from prefetch-related issues, this table PA must have been cleared from TTBR0 prior
7845 * to this call. __ARM_USER_PROTECT__ effectively guarantees that for all current configurations.*/
7846 flush_mmu_tlb_asid(cpu_data_ptr
->cpu_user_pmap
->hw_asid
);
7847 pmap_tt1_deallocate(cpu_data_ptr
->cpu_user_pmap
, tt_entry
, ARM_PGBYTES
, PMAP_TT_DEALLOCATE_NOBLOCK
);
7850 cpu_data_ptr
->cpu_user_pmap
= pmap
;
7851 cpu_data_ptr
->cpu_user_pmap_stamp
= pmap
->stamp
;
7852 os_atomic_inc(&pmap
->cpu_ref
, acq_rel
);
7854 #if MACH_ASSERT && __ARM_USER_PROTECT__
7856 unsigned int ttbr0_val
, ttbr1_val
;
7857 __asm__
volatile ("mrc p15,0,%0,c2,c0,0\n" : "=r"(ttbr0_val
));
7858 __asm__
volatile ("mrc p15,0,%0,c2,c0,1\n" : "=r"(ttbr1_val
));
7859 if (ttbr0_val
!= ttbr1_val
) {
7860 panic("Misaligned ttbr0 %08X\n", ttbr0_val
);
7864 if (pmap
->tte_index_max
== NTTES
) {
7865 /* Setting TTBCR.N for TTBR0 TTBR1 boundary at 0x40000000 */
7866 __asm__
volatile ("mcr p15,0,%0,c2,c0,2" : : "r"(2));
7867 __builtin_arm_isb(ISB_SY
);
7868 #if !__ARM_USER_PROTECT__
7869 set_mmu_ttb(pmap
->ttep
);
7872 #if !__ARM_USER_PROTECT__
7873 set_mmu_ttb(pmap
->ttep
);
7875 /* Setting TTBCR.N for TTBR0 TTBR1 boundary at 0x80000000 */
7876 __asm__
volatile ("mcr p15,0,%0,c2,c0,2" : : "r"(1));
7877 __builtin_arm_isb(ISB_SY
);
7878 #if MACH_ASSERT && __ARM_USER_PROTECT__
7879 if (pmap
->ttep
& 0x1000) {
7880 panic("Misaligned ttbr0 %08X\n", pmap
->ttep
);
7885 #if !__ARM_USER_PROTECT__
7886 set_context_id(pmap
->hw_asid
);
7889 #else /* (__ARM_VMSA__ == 7) */
7891 if (pmap
!= kernel_pmap
) {
7892 cpu_data_ptr
->cpu_nested_pmap
= pmap
->nested_pmap
;
7895 if (pmap
== kernel_pmap
) {
7896 pmap_clear_user_ttb_internal();
7898 set_mmu_ttb((pmap
->ttep
& TTBR_BADDR_MASK
) | (((uint64_t)pmap
->hw_asid
) << TTBR_ASID_SHIFT
));
7901 #if defined(HAS_APPLE_PAC) && (__APCFG_SUPPORTED__ || __APSTS_SUPPORTED__)
7902 if (!(BootArgs
->bootFlags
& kBootFlagsDisableJOP
) && !(BootArgs
->bootFlags
& kBootFlagsDisableUserJOP
)) {
7903 uint64_t sctlr
= __builtin_arm_rsr64("SCTLR_EL1");
7904 bool jop_enabled
= sctlr
& SCTLR_JOP_KEYS_ENABLED
;
7905 if (!jop_enabled
&& !pmap
->disable_jop
) {
7907 sctlr
|= SCTLR_JOP_KEYS_ENABLED
;
7908 __builtin_arm_wsr64("SCTLR_EL1", sctlr
);
7909 // no ISB necessary because this won't take effect until eret returns to EL0
7910 } else if (jop_enabled
&& pmap
->disable_jop
) {
7912 sctlr
&= ~SCTLR_JOP_KEYS_ENABLED
;
7913 __builtin_arm_wsr64("SCTLR_EL1", sctlr
);
7916 #endif /* HAS_APPLE_PAC && (__APCFG_SUPPORTED__ || __APSTS_SUPPORTED__) */
7917 #endif /* (__ARM_VMSA__ == 7) */
7921 pmap_switch_user_ttb(
7924 PMAP_TRACE(1, PMAP_CODE(PMAP__SWITCH_USER_TTB
) | DBG_FUNC_START
, VM_KERNEL_ADDRHIDE(pmap
), PMAP_VASID(pmap
), pmap
->hw_asid
);
7925 pmap_switch_user_ttb_internal(pmap
);
7926 PMAP_TRACE(1, PMAP_CODE(PMAP__SWITCH_USER_TTB
) | DBG_FUNC_END
);
7929 MARK_AS_PMAP_TEXT
static void
7930 pmap_clear_user_ttb_internal(void)
7932 #if (__ARM_VMSA__ > 7)
7933 set_mmu_ttb(invalid_ttep
& TTBR_BADDR_MASK
);
7935 set_mmu_ttb(kernel_pmap
->ttep
);
7940 pmap_clear_user_ttb(void)
7942 pmap_clear_user_ttb_internal();
7946 * Routine: arm_force_fast_fault
7949 * Force all mappings for this page to fault according
7950 * to the access modes allowed, so we can gather ref/modify
7953 MARK_AS_PMAP_TEXT
static boolean_t
7954 arm_force_fast_fault_internal(
7956 vm_prot_t allow_mode
,
7959 pmap_paddr_t phys
= ptoa(ppnum
);
7965 boolean_t is_reusable
, is_internal
;
7966 boolean_t tlb_flush_needed
= FALSE
;
7967 boolean_t ref_fault
;
7968 boolean_t mod_fault
;
7970 assert(ppnum
!= vm_page_fictitious_addr
);
7972 if (!pa_valid(phys
)) {
7973 return FALSE
; /* Not a managed page. */
7979 pai
= (int)pa_index(phys
);
7981 pv_h
= pai_to_pvh(pai
);
7983 pte_p
= PT_ENTRY_NULL
;
7984 pve_p
= PV_ENTRY_NULL
;
7985 if (pvh_test_type(pv_h
, PVH_TYPE_PTEP
)) {
7986 pte_p
= pvh_ptep(pv_h
);
7987 } else if (pvh_test_type(pv_h
, PVH_TYPE_PVEP
)) {
7988 pve_p
= pvh_list(pv_h
);
7991 is_reusable
= IS_REUSABLE_PAGE(pai
);
7992 is_internal
= IS_INTERNAL_PAGE(pai
);
7994 while ((pve_p
!= PV_ENTRY_NULL
) || (pte_p
!= PT_ENTRY_NULL
)) {
7995 vm_map_address_t va
;
7999 boolean_t update_pte
;
8001 if (pve_p
!= PV_ENTRY_NULL
) {
8002 pte_p
= pve_get_ptep(pve_p
);
8005 if (pte_p
== PT_ENTRY_NULL
) {
8006 panic("pte_p is NULL: pve_p=%p ppnum=0x%x\n", pve_p
, ppnum
);
8008 #ifdef PVH_FLAG_IOMMU
8009 if ((vm_offset_t
)pte_p
& PVH_FLAG_IOMMU
) {
8013 if (*pte_p
== ARM_PTE_EMPTY
) {
8014 panic("pte is NULL: pte_p=%p ppnum=0x%x\n", pte_p
, ppnum
);
8016 if (ARM_PTE_IS_COMPRESSED(*pte_p
, pte_p
)) {
8017 panic("pte is COMPRESSED: pte_p=%p ppnum=0x%x\n", pte_p
, ppnum
);
8020 pmap
= ptep_get_pmap(pte_p
);
8021 va
= ptep_get_va(pte_p
);
8023 assert(va
>= pmap
->min
&& va
< pmap
->max
);
8025 if (pte_is_wired(*pte_p
) || pmap
== kernel_pmap
) {
8034 if ((allow_mode
& VM_PROT_READ
) != VM_PROT_READ
) {
8035 /* read protection sets the pte to fault */
8036 tmplate
= tmplate
& ~ARM_PTE_AF
;
8040 if ((allow_mode
& VM_PROT_WRITE
) != VM_PROT_WRITE
) {
8041 /* take away write permission if set */
8042 if (pmap
== kernel_pmap
) {
8043 if ((tmplate
& ARM_PTE_APMASK
) == ARM_PTE_AP(AP_RWNA
)) {
8044 tmplate
= ((tmplate
& ~ARM_PTE_APMASK
) | ARM_PTE_AP(AP_RONA
));
8045 pte_set_was_writeable(tmplate
, true);
8050 if ((tmplate
& ARM_PTE_APMASK
) == ARM_PTE_AP(AP_RWRW
)) {
8051 tmplate
= ((tmplate
& ~ARM_PTE_APMASK
) | pt_attr_leaf_ro(pmap_get_pt_attr(pmap
)));
8052 pte_set_was_writeable(tmplate
, true);
8061 if (*pte_p
!= ARM_PTE_TYPE_FAULT
&&
8062 !ARM_PTE_IS_COMPRESSED(*pte_p
, pte_p
)) {
8063 WRITE_PTE_STRONG(pte_p
, tmplate
);
8064 pmap_get_pt_ops(pmap
)->flush_tlb_region_async(va
, PAGE_SIZE
, pmap
);
8065 tlb_flush_needed
= TRUE
;
8067 WRITE_PTE(pte_p
, tmplate
);
8068 __builtin_arm_isb(ISB_SY
);
8072 /* update pmap stats and ledgers */
8073 if (IS_ALTACCT_PAGE(pai
, pve_p
)) {
8075 * We do not track "reusable" status for
8076 * "alternate accounting" mappings.
8078 } else if ((options
& PMAP_OPTIONS_CLEAR_REUSABLE
) &&
8081 pmap
!= kernel_pmap
) {
8082 /* one less "reusable" */
8083 PMAP_STATS_ASSERTF(pmap
->stats
.reusable
> 0, pmap
, "stats.reusable %d", pmap
->stats
.reusable
);
8084 OSAddAtomic(-1, &pmap
->stats
.reusable
);
8085 /* one more "internal" */
8086 OSAddAtomic(+1, &pmap
->stats
.internal
);
8087 PMAP_STATS_PEAK(pmap
->stats
.internal
);
8088 PMAP_STATS_ASSERTF(pmap
->stats
.internal
> 0, pmap
, "stats.internal %d", pmap
->stats
.internal
);
8089 pmap_ledger_credit(pmap
, task_ledgers
.internal
, machine_ptob(1));
8090 assert(!IS_ALTACCT_PAGE(pai
, pve_p
));
8091 assert(IS_INTERNAL_PAGE(pai
));
8092 pmap_ledger_credit(pmap
, task_ledgers
.phys_footprint
, machine_ptob(1));
8095 * Avoid the cost of another trap to handle the fast
8096 * fault when we next write to this page: let's just
8097 * handle that now since we already have all the
8098 * necessary information.
8101 arm_clear_fast_fault(ppnum
, VM_PROT_WRITE
);
8103 } else if ((options
& PMAP_OPTIONS_SET_REUSABLE
) &&
8106 pmap
!= kernel_pmap
) {
8107 /* one more "reusable" */
8108 OSAddAtomic(+1, &pmap
->stats
.reusable
);
8109 PMAP_STATS_PEAK(pmap
->stats
.reusable
);
8110 PMAP_STATS_ASSERTF(pmap
->stats
.reusable
> 0, pmap
, "stats.reusable %d", pmap
->stats
.reusable
);
8111 /* one less "internal" */
8112 PMAP_STATS_ASSERTF(pmap
->stats
.internal
> 0, pmap
, "stats.internal %d", pmap
->stats
.internal
);
8113 OSAddAtomic(-1, &pmap
->stats
.internal
);
8114 pmap_ledger_debit(pmap
, task_ledgers
.internal
, machine_ptob(1));
8115 assert(!IS_ALTACCT_PAGE(pai
, pve_p
));
8116 assert(IS_INTERNAL_PAGE(pai
));
8117 pmap_ledger_debit(pmap
, task_ledgers
.phys_footprint
, machine_ptob(1));
8120 #ifdef PVH_FLAG_IOMMU
8123 pte_p
= PT_ENTRY_NULL
;
8124 if (pve_p
!= PV_ENTRY_NULL
) {
8125 pve_p
= PVE_NEXT_PTR(pve_next(pve_p
));
8129 if (tlb_flush_needed
) {
8133 /* update global "reusable" status for this page */
8135 if ((options
& PMAP_OPTIONS_CLEAR_REUSABLE
) &&
8137 CLR_REUSABLE_PAGE(pai
);
8138 } else if ((options
& PMAP_OPTIONS_SET_REUSABLE
) &&
8140 SET_REUSABLE_PAGE(pai
);
8145 SET_MODFAULT_PAGE(pai
);
8148 SET_REFFAULT_PAGE(pai
);
8156 arm_force_fast_fault(
8158 vm_prot_t allow_mode
,
8162 pmap_paddr_t phys
= ptoa(ppnum
);
8164 assert(ppnum
!= vm_page_fictitious_addr
);
8166 if (!pa_valid(phys
)) {
8167 return FALSE
; /* Not a managed page. */
8170 return arm_force_fast_fault_internal(ppnum
, allow_mode
, options
);
8174 * Routine: arm_clear_fast_fault
8177 * Clear pending force fault for all mappings for this page based on
8178 * the observed fault type, update ref/modify bits.
8181 arm_clear_fast_fault(
8183 vm_prot_t fault_type
)
8185 pmap_paddr_t pa
= ptoa(ppnum
);
8190 boolean_t tlb_flush_needed
= FALSE
;
8193 assert(ppnum
!= vm_page_fictitious_addr
);
8195 if (!pa_valid(pa
)) {
8196 return FALSE
; /* Not a managed page. */
8200 pai
= (int)pa_index(pa
);
8201 ASSERT_PVH_LOCKED(pai
);
8202 pv_h
= pai_to_pvh(pai
);
8204 pte_p
= PT_ENTRY_NULL
;
8205 pve_p
= PV_ENTRY_NULL
;
8206 if (pvh_test_type(pv_h
, PVH_TYPE_PTEP
)) {
8207 pte_p
= pvh_ptep(pv_h
);
8208 } else if (pvh_test_type(pv_h
, PVH_TYPE_PVEP
)) {
8209 pve_p
= pvh_list(pv_h
);
8212 while ((pve_p
!= PV_ENTRY_NULL
) || (pte_p
!= PT_ENTRY_NULL
)) {
8213 vm_map_address_t va
;
8218 if (pve_p
!= PV_ENTRY_NULL
) {
8219 pte_p
= pve_get_ptep(pve_p
);
8222 if (pte_p
== PT_ENTRY_NULL
) {
8223 panic("pte_p is NULL: pve_p=%p ppnum=0x%x\n", pve_p
, ppnum
);
8225 #ifdef PVH_FLAG_IOMMU
8226 if ((vm_offset_t
)pte_p
& PVH_FLAG_IOMMU
) {
8230 if (*pte_p
== ARM_PTE_EMPTY
) {
8231 panic("pte is NULL: pte_p=%p ppnum=0x%x\n", pte_p
, ppnum
);
8234 pmap
= ptep_get_pmap(pte_p
);
8235 va
= ptep_get_va(pte_p
);
8237 assert(va
>= pmap
->min
&& va
< pmap
->max
);
8242 if ((fault_type
& VM_PROT_WRITE
) && (pte_was_writeable(spte
))) {
8244 if (pmap
== kernel_pmap
) {
8245 tmplate
= ((spte
& ~ARM_PTE_APMASK
) | ARM_PTE_AP(AP_RWNA
));
8247 tmplate
= ((spte
& ~ARM_PTE_APMASK
) | pt_attr_leaf_rw(pmap_get_pt_attr(pmap
)));
8251 tmplate
|= ARM_PTE_AF
;
8253 pte_set_was_writeable(tmplate
, false);
8254 pa_set_bits(pa
, PP_ATTR_REFERENCED
| PP_ATTR_MODIFIED
);
8255 } else if ((fault_type
& VM_PROT_READ
) && ((spte
& ARM_PTE_AF
) != ARM_PTE_AF
)) {
8256 tmplate
= spte
| ARM_PTE_AF
;
8259 pa_set_bits(pa
, PP_ATTR_REFERENCED
);
8264 if (spte
!= tmplate
) {
8265 if (spte
!= ARM_PTE_TYPE_FAULT
) {
8266 WRITE_PTE_STRONG(pte_p
, tmplate
);
8267 pmap_get_pt_ops(pmap
)->flush_tlb_region_async(va
, PAGE_SIZE
, pmap
);
8268 tlb_flush_needed
= TRUE
;
8270 WRITE_PTE(pte_p
, tmplate
);
8271 __builtin_arm_isb(ISB_SY
);
8276 #ifdef PVH_FLAG_IOMMU
8279 pte_p
= PT_ENTRY_NULL
;
8280 if (pve_p
!= PV_ENTRY_NULL
) {
8281 pve_p
= PVE_NEXT_PTR(pve_next(pve_p
));
8284 if (tlb_flush_needed
) {
8291 * Determine if the fault was induced by software tracking of
8292 * modify/reference bits. If so, re-enable the mapping (and set
8293 * the appropriate bits).
8295 * Returns KERN_SUCCESS if the fault was induced and was
8296 * successfully handled.
8298 * Returns KERN_FAILURE if the fault was not induced and
8299 * the function was unable to deal with it.
8301 * Returns KERN_PROTECTION_FAILURE if the pmap layer explictly
8302 * disallows this type of access.
8304 MARK_AS_PMAP_TEXT
static kern_return_t
8305 arm_fast_fault_internal(
8307 vm_map_address_t va
,
8308 vm_prot_t fault_type
,
8309 __unused
bool was_af_fault
,
8310 __unused
bool from_user
)
8312 kern_return_t result
= KERN_FAILURE
;
8314 pt_entry_t spte
= ARM_PTE_TYPE_FAULT
;
8317 VALIDATE_PMAP(pmap
);
8322 * If the entry doesn't exist, is completely invalid, or is already
8323 * valid, we can't fix it here.
8326 ptep
= pmap_pte(pmap
, va
);
8327 if (ptep
!= PT_ENTRY_NULL
) {
8331 pa
= pte_to_pa(spte
);
8333 if ((spte
== ARM_PTE_TYPE_FAULT
) ||
8334 ARM_PTE_IS_COMPRESSED(spte
, ptep
)) {
8339 if (!pa_valid(pa
)) {
8343 pai
= (int)pa_index(pa
);
8353 if ((IS_REFFAULT_PAGE(pai
)) ||
8354 ((fault_type
& VM_PROT_WRITE
) && IS_MODFAULT_PAGE(pai
))) {
8356 * An attempted access will always clear ref/mod fault state, as
8357 * appropriate for the fault type. arm_clear_fast_fault will
8358 * update the associated PTEs for the page as appropriate; if
8359 * any PTEs are updated, we redrive the access. If the mapping
8360 * does not actually allow for the attempted access, the
8361 * following fault will (hopefully) fail to update any PTEs, and
8362 * thus cause arm_fast_fault to decide that it failed to handle
8365 if (IS_REFFAULT_PAGE(pai
)) {
8366 CLR_REFFAULT_PAGE(pai
);
8368 if ((fault_type
& VM_PROT_WRITE
) && IS_MODFAULT_PAGE(pai
)) {
8369 CLR_MODFAULT_PAGE(pai
);
8372 if (arm_clear_fast_fault((ppnum_t
)atop(pa
), fault_type
)) {
8374 * Should this preserve KERN_PROTECTION_FAILURE? The
8375 * cost of not doing so is a another fault in a case
8376 * that should already result in an exception.
8378 result
= KERN_SUCCESS
;
8390 vm_map_address_t va
,
8391 vm_prot_t fault_type
,
8393 __unused
bool from_user
)
8395 kern_return_t result
= KERN_FAILURE
;
8397 if (va
< pmap
->min
|| va
>= pmap
->max
) {
8401 PMAP_TRACE(3, PMAP_CODE(PMAP__FAST_FAULT
) | DBG_FUNC_START
,
8402 VM_KERNEL_ADDRHIDE(pmap
), VM_KERNEL_ADDRHIDE(va
), fault_type
,
8405 #if (__ARM_VMSA__ == 7)
8406 if (pmap
!= kernel_pmap
) {
8407 pmap_cpu_data_t
*cpu_data_ptr
= pmap_get_cpu_data();
8409 pmap_t cur_user_pmap
;
8411 cur_pmap
= current_pmap();
8412 cur_user_pmap
= cpu_data_ptr
->cpu_user_pmap
;
8414 if ((cur_user_pmap
== cur_pmap
) && (cur_pmap
== pmap
)) {
8415 if (cpu_data_ptr
->cpu_user_pmap_stamp
!= pmap
->stamp
) {
8416 pmap_set_pmap(pmap
, current_thread());
8417 result
= KERN_SUCCESS
;
8424 result
= arm_fast_fault_internal(pmap
, va
, fault_type
, was_af_fault
, from_user
);
8426 #if (__ARM_VMSA__ == 7)
8430 PMAP_TRACE(3, PMAP_CODE(PMAP__FAST_FAULT
) | DBG_FUNC_END
, result
);
8440 bcopy_phys((addr64_t
) (ptoa(psrc
)),
8441 (addr64_t
) (ptoa(pdst
)),
8447 * pmap_copy_page copies the specified (machine independent) pages.
8450 pmap_copy_part_page(
8452 vm_offset_t src_offset
,
8454 vm_offset_t dst_offset
,
8457 bcopy_phys((addr64_t
) (ptoa(psrc
) + src_offset
),
8458 (addr64_t
) (ptoa(pdst
) + dst_offset
),
8464 * pmap_zero_page zeros the specified (machine independent) page.
8470 assert(pn
!= vm_page_fictitious_addr
);
8471 bzero_phys((addr64_t
) ptoa(pn
), PAGE_SIZE
);
8475 * pmap_zero_part_page
8476 * zeros the specified (machine independent) part of a page.
8479 pmap_zero_part_page(
8484 assert(pn
!= vm_page_fictitious_addr
);
8485 assert(offset
+ len
<= PAGE_SIZE
);
8486 bzero_phys((addr64_t
) (ptoa(pn
) + offset
), len
);
8491 * nop in current arm implementation
8495 __unused thread_t t
)
8503 pt_entry_t
*ptep
, pte
;
8505 ptep
= pmap_pte(kernel_pmap
, LOWGLOBAL_ALIAS
);
8506 assert(ptep
!= PT_ENTRY_NULL
);
8507 assert(*ptep
== ARM_PTE_EMPTY
);
8509 pte
= pa_to_pte(ml_static_vtop((vm_offset_t
)&lowGlo
)) | AP_RONA
| ARM_PTE_NX
| ARM_PTE_PNX
| ARM_PTE_AF
| ARM_PTE_TYPE
;
8510 #if __ARM_KERNEL_PROTECT__
8512 #endif /* __ARM_KERNEL_PROTECT__ */
8513 pte
|= ARM_PTE_ATTRINDX(CACHE_ATTRINDX_WRITEBACK
);
8514 #if (__ARM_VMSA__ > 7)
8515 pte
|= ARM_PTE_SH(SH_OUTER_MEMORY
);
8520 FLUSH_PTE_RANGE(ptep
, (ptep
+ 1));
8521 PMAP_UPDATE_TLBS(kernel_pmap
, LOWGLOBAL_ALIAS
, LOWGLOBAL_ALIAS
+ PAGE_SIZE
, false);
8525 pmap_cpu_windows_copy_addr(int cpu_num
, unsigned int index
)
8527 if (__improbable(index
>= CPUWINDOWS_MAX
)) {
8528 panic("%s: invalid index %u", __func__
, index
);
8530 return (vm_offset_t
)(CPUWINDOWS_BASE
+ (PAGE_SIZE
* ((CPUWINDOWS_MAX
* cpu_num
) + index
)));
8533 MARK_AS_PMAP_TEXT
static unsigned int
8534 pmap_map_cpu_windows_copy_internal(
8537 unsigned int wimg_bits
)
8539 pt_entry_t
*ptep
= NULL
, pte
;
8540 pmap_cpu_data_t
*pmap_cpu_data
= pmap_get_cpu_data();
8541 unsigned int cpu_num
;
8543 vm_offset_t cpu_copywindow_vaddr
= 0;
8544 bool need_strong_sync
= false;
8547 cpu_num
= pmap_cpu_data
->cpu_number
;
8549 for (i
= 0; i
< CPUWINDOWS_MAX
; i
++) {
8550 cpu_copywindow_vaddr
= pmap_cpu_windows_copy_addr(cpu_num
, i
);
8551 ptep
= pmap_pte(kernel_pmap
, cpu_copywindow_vaddr
);
8552 assert(!ARM_PTE_IS_COMPRESSED(*ptep
, ptep
));
8553 if (*ptep
== ARM_PTE_TYPE_FAULT
) {
8557 if (i
== CPUWINDOWS_MAX
) {
8558 panic("pmap_map_cpu_windows_copy: out of window\n");
8561 pte
= pa_to_pte(ptoa(pn
)) | ARM_PTE_TYPE
| ARM_PTE_AF
| ARM_PTE_NX
| ARM_PTE_PNX
;
8562 #if __ARM_KERNEL_PROTECT__
8564 #endif /* __ARM_KERNEL_PROTECT__ */
8566 pte
|= wimg_to_pte(wimg_bits
);
8568 if (prot
& VM_PROT_WRITE
) {
8569 pte
|= ARM_PTE_AP(AP_RWNA
);
8571 pte
|= ARM_PTE_AP(AP_RONA
);
8574 WRITE_PTE_FAST(ptep
, pte
);
8576 * Invalidate tlb. Cover nested cpu_copywindow_vaddr usage with the interrupted context
8577 * in pmap_unmap_cpu_windows_copy() after clearing the pte and before tlb invalidate.
8579 FLUSH_PTE_STRONG(ptep
);
8580 PMAP_UPDATE_TLBS(kernel_pmap
, cpu_copywindow_vaddr
, cpu_copywindow_vaddr
+ PAGE_SIZE
, pmap_cpu_data
->copywindow_strong_sync
[i
]);
8581 pmap_cpu_data
->copywindow_strong_sync
[i
] = need_strong_sync
;
8587 pmap_map_cpu_windows_copy(
8590 unsigned int wimg_bits
)
8592 return pmap_map_cpu_windows_copy_internal(pn
, prot
, wimg_bits
);
8595 MARK_AS_PMAP_TEXT
static void
8596 pmap_unmap_cpu_windows_copy_internal(
8600 unsigned int cpu_num
;
8601 vm_offset_t cpu_copywindow_vaddr
= 0;
8602 pmap_cpu_data_t
*pmap_cpu_data
= pmap_get_cpu_data();
8604 cpu_num
= pmap_cpu_data
->cpu_number
;
8606 cpu_copywindow_vaddr
= pmap_cpu_windows_copy_addr(cpu_num
, index
);
8607 /* Issue full-system DSB to ensure prior operations on the per-CPU window
8608 * (which are likely to have been on I/O memory) are complete before
8609 * tearing down the mapping. */
8610 __builtin_arm_dsb(DSB_SY
);
8611 ptep
= pmap_pte(kernel_pmap
, cpu_copywindow_vaddr
);
8612 WRITE_PTE_STRONG(ptep
, ARM_PTE_TYPE_FAULT
);
8613 PMAP_UPDATE_TLBS(kernel_pmap
, cpu_copywindow_vaddr
, cpu_copywindow_vaddr
+ PAGE_SIZE
, pmap_cpu_data
->copywindow_strong_sync
[index
]);
8617 pmap_unmap_cpu_windows_copy(
8620 return pmap_unmap_cpu_windows_copy_internal(index
);
8624 * Indicate that a pmap is intended to be used as a nested pmap
8625 * within one or more larger address spaces. This must be set
8626 * before pmap_nest() is called with this pmap as the 'subordinate'.
8628 MARK_AS_PMAP_TEXT
static void
8629 pmap_set_nested_internal(
8632 VALIDATE_PMAP(pmap
);
8633 pmap
->nested
= TRUE
;
8640 pmap_set_nested_internal(pmap
);
8644 * pmap_trim_range(pmap, start, end)
8646 * pmap = pmap to operate on
8647 * start = start of the range
8648 * end = end of the range
8650 * Attempts to deallocate TTEs for the given range in the nested range.
8652 MARK_AS_PMAP_TEXT
static void
8659 addr64_t nested_region_start
;
8660 addr64_t nested_region_end
;
8661 addr64_t adjusted_start
;
8662 addr64_t adjusted_end
;
8663 addr64_t adjust_offmask
;
8666 __unused
const pt_attr_t
* const pt_attr
= pmap_get_pt_attr(pmap
);
8668 if (__improbable(end
< start
)) {
8669 panic("%s: invalid address range, "
8670 "pmap=%p, start=%p, end=%p",
8672 pmap
, (void*)start
, (void*)end
);
8675 nested_region_start
= pmap
->nested
? pmap
->nested_region_subord_addr
: pmap
->nested_region_subord_addr
;
8676 nested_region_end
= nested_region_start
+ pmap
->nested_region_size
;
8678 if (__improbable((start
< nested_region_start
) || (end
> nested_region_end
))) {
8679 panic("%s: range outside nested region %p-%p, "
8680 "pmap=%p, start=%p, end=%p",
8681 __func__
, (void *)nested_region_start
, (void *)nested_region_end
,
8682 pmap
, (void*)start
, (void*)end
);
8685 /* Contract the range to TT page boundaries. */
8686 adjust_offmask
= pt_attr_leaf_table_offmask(pt_attr
);
8687 adjusted_start
= ((start
+ adjust_offmask
) & ~adjust_offmask
);
8688 adjusted_end
= end
& ~adjust_offmask
;
8689 bool modified
= false;
8691 /* Iterate over the range, trying to remove TTEs. */
8692 for (cur
= adjusted_start
; (cur
< adjusted_end
) && (cur
>= adjusted_start
); cur
+= pt_attr_twig_size(pt_attr
)) {
8695 tte_p
= pmap_tte(pmap
, cur
);
8697 if (tte_p
== (tt_entry_t
*) NULL
) {
8701 if ((*tte_p
& ARM_TTE_TYPE_MASK
) == ARM_TTE_TYPE_TABLE
) {
8702 pte_p
= (pt_entry_t
*) ttetokv(*tte_p
);
8704 if ((ptep_get_ptd(pte_p
)->ptd_info
[ARM_PT_DESC_INDEX(pte_p
)].refcnt
== 0) &&
8705 (pmap
!= kernel_pmap
)) {
8706 if (pmap
->nested
== TRUE
) {
8707 /* Deallocate for the nested map. */
8708 pmap_tte_deallocate(pmap
, tte_p
, pt_attr_twig_level(pt_attr
));
8710 /* Just remove for the parent map. */
8711 pmap_tte_remove(pmap
, tte_p
, pt_attr_twig_level(pt_attr
));
8714 pmap_get_pt_ops(pmap
)->flush_tlb_tte_async(cur
, pmap
);
8727 #if (__ARM_VMSA__ > 7)
8728 /* Remove empty L2 TTs. */
8729 adjusted_start
= ((start
+ ARM_TT_L1_OFFMASK
) & ~ARM_TT_L1_OFFMASK
);
8730 adjusted_end
= end
& ~ARM_TT_L1_OFFMASK
;
8732 for (cur
= adjusted_start
; (cur
< adjusted_end
) && (cur
>= adjusted_start
); cur
+= ARM_TT_L1_SIZE
) {
8733 /* For each L1 entry in our range... */
8736 bool remove_tt1e
= true;
8737 tt_entry_t
* tt1e_p
= pmap_tt1e(pmap
, cur
);
8738 tt_entry_t
* tt2e_start
;
8739 tt_entry_t
* tt2e_end
;
8740 tt_entry_t
* tt2e_p
;
8743 if (tt1e_p
== NULL
) {
8750 if (tt1e
== ARM_TTE_TYPE_FAULT
) {
8755 tt2e_start
= &((tt_entry_t
*) phystokv(tt1e
& ARM_TTE_TABLE_MASK
))[0];
8756 tt2e_end
= &tt2e_start
[TTE_PGENTRIES
];
8758 for (tt2e_p
= tt2e_start
; tt2e_p
< tt2e_end
; tt2e_p
++) {
8759 if (*tt2e_p
!= ARM_TTE_TYPE_FAULT
) {
8761 * If any TTEs are populated, don't remove the
8764 remove_tt1e
= false;
8769 pmap_tte_deallocate(pmap
, tt1e_p
, PMAP_TT_L1_LEVEL
);
8770 PMAP_UPDATE_TLBS(pmap
, cur
, cur
+ PAGE_SIZE
, false);
8775 #endif /* (__ARM_VMSA__ > 7) */
8779 * pmap_trim_internal(grand, subord, vstart, nstart, size)
8781 * grand = pmap subord is nested in
8782 * subord = nested pmap
8783 * vstart = start of the used range in grand
8784 * nstart = start of the used range in nstart
8785 * size = size of the used range
8787 * Attempts to trim the shared region page tables down to only cover the given
8788 * range in subord and grand.
8790 MARK_AS_PMAP_TEXT
static void
8798 addr64_t vend
, nend
;
8799 addr64_t adjust_offmask
;
8801 if (__improbable(os_add_overflow(vstart
, size
, &vend
))) {
8802 panic("%s: grand addr wraps around, "
8803 "grand=%p, subord=%p, vstart=%p, nstart=%p, size=%#llx",
8804 __func__
, grand
, subord
, (void*)vstart
, (void*)nstart
, size
);
8807 if (__improbable(os_add_overflow(nstart
, size
, &nend
))) {
8808 panic("%s: nested addr wraps around, "
8809 "grand=%p, subord=%p, vstart=%p, nstart=%p, size=%#llx",
8810 __func__
, grand
, subord
, (void*)vstart
, (void*)nstart
, size
);
8813 VALIDATE_PMAP(grand
);
8814 VALIDATE_PMAP(subord
);
8816 __unused
const pt_attr_t
* const pt_attr
= pmap_get_pt_attr(grand
);
8820 if (!subord
->nested
) {
8821 panic("%s: subord is not nestable, "
8822 "grand=%p, subord=%p, vstart=%p, nstart=%p, size=%#llx",
8823 __func__
, grand
, subord
, (void*)vstart
, (void*)nstart
, size
);
8826 if (grand
->nested
) {
8827 panic("%s: grand is nestable, "
8828 "grand=%p, subord=%p, vstart=%p, nstart=%p, size=%#llx",
8829 __func__
, grand
, subord
, (void*)vstart
, (void*)nstart
, size
);
8832 if (grand
->nested_pmap
!= subord
) {
8833 panic("%s: grand->nested != subord, "
8834 "grand=%p, subord=%p, vstart=%p, nstart=%p, size=%#llx",
8835 __func__
, grand
, subord
, (void*)vstart
, (void*)nstart
, size
);
8839 if ((vstart
< grand
->nested_region_grand_addr
) || (vend
> (grand
->nested_region_grand_addr
+ grand
->nested_region_size
))) {
8840 panic("%s: grand range not in nested region, "
8841 "grand=%p, subord=%p, vstart=%p, nstart=%p, size=%#llx",
8842 __func__
, grand
, subord
, (void*)vstart
, (void*)nstart
, size
);
8845 if ((nstart
< grand
->nested_region_grand_addr
) || (nend
> (grand
->nested_region_grand_addr
+ grand
->nested_region_size
))) {
8846 panic("%s: subord range not in nested region, "
8847 "grand=%p, subord=%p, vstart=%p, nstart=%p, size=%#llx",
8848 __func__
, grand
, subord
, (void*)vstart
, (void*)nstart
, size
);
8853 if (!grand
->nested_has_no_bounds_ref
) {
8854 assert(subord
->nested_bounds_set
);
8856 if (!grand
->nested_bounds_set
) {
8857 /* Inherit the bounds from subord. */
8858 grand
->nested_region_true_start
= (subord
->nested_region_true_start
- grand
->nested_region_subord_addr
) + grand
->nested_region_grand_addr
;
8859 grand
->nested_region_true_end
= (subord
->nested_region_true_end
- grand
->nested_region_subord_addr
) + grand
->nested_region_grand_addr
;
8860 grand
->nested_bounds_set
= true;
8863 PMAP_UNLOCK(subord
);
8867 if ((!subord
->nested_bounds_set
) && size
) {
8868 adjust_offmask
= pt_attr_leaf_table_offmask(pt_attr
);
8870 subord
->nested_region_true_start
= nstart
;
8871 subord
->nested_region_true_end
= nend
;
8872 subord
->nested_region_true_start
&= ~adjust_offmask
;
8874 if (__improbable(os_add_overflow(subord
->nested_region_true_end
, adjust_offmask
, &subord
->nested_region_true_end
))) {
8875 panic("%s: padded true end wraps around, "
8876 "grand=%p, subord=%p, vstart=%p, nstart=%p, size=%#llx",
8877 __func__
, grand
, subord
, (void*)vstart
, (void*)nstart
, size
);
8880 subord
->nested_region_true_end
&= ~adjust_offmask
;
8881 subord
->nested_bounds_set
= true;
8884 if (subord
->nested_bounds_set
) {
8885 /* Inherit the bounds from subord. */
8886 grand
->nested_region_true_start
= (subord
->nested_region_true_start
- grand
->nested_region_subord_addr
) + grand
->nested_region_grand_addr
;
8887 grand
->nested_region_true_end
= (subord
->nested_region_true_end
- grand
->nested_region_subord_addr
) + grand
->nested_region_grand_addr
;
8888 grand
->nested_bounds_set
= true;
8890 /* If we know the bounds, we can trim the pmap. */
8891 grand
->nested_has_no_bounds_ref
= false;
8892 PMAP_UNLOCK(subord
);
8894 /* Don't trim if we don't know the bounds. */
8895 PMAP_UNLOCK(subord
);
8899 /* Trim grand to only cover the given range. */
8900 pmap_trim_range(grand
, grand
->nested_region_grand_addr
, grand
->nested_region_true_start
);
8901 pmap_trim_range(grand
, grand
->nested_region_true_end
, (grand
->nested_region_grand_addr
+ grand
->nested_region_size
));
8903 /* Try to trim subord. */
8904 pmap_trim_subord(subord
);
8907 MARK_AS_PMAP_TEXT
static void
8908 pmap_trim_self(pmap_t pmap
)
8910 if (pmap
->nested_has_no_bounds_ref
&& pmap
->nested_pmap
) {
8911 /* If we have a no bounds ref, we need to drop it. */
8912 PMAP_LOCK(pmap
->nested_pmap
);
8913 pmap
->nested_has_no_bounds_ref
= false;
8914 boolean_t nested_bounds_set
= pmap
->nested_pmap
->nested_bounds_set
;
8915 vm_map_offset_t nested_region_true_start
= (pmap
->nested_pmap
->nested_region_true_start
- pmap
->nested_region_subord_addr
) + pmap
->nested_region_grand_addr
;
8916 vm_map_offset_t nested_region_true_end
= (pmap
->nested_pmap
->nested_region_true_end
- pmap
->nested_region_subord_addr
) + pmap
->nested_region_grand_addr
;
8917 PMAP_UNLOCK(pmap
->nested_pmap
);
8919 if (nested_bounds_set
) {
8920 pmap_trim_range(pmap
, pmap
->nested_region_grand_addr
, nested_region_true_start
);
8921 pmap_trim_range(pmap
, nested_region_true_end
, (pmap
->nested_region_grand_addr
+ pmap
->nested_region_size
));
8924 * Try trimming the nested pmap, in case we had the
8927 pmap_trim_subord(pmap
->nested_pmap
);
8932 * pmap_trim_subord(grand, subord)
8934 * grand = pmap that we have nested subord in
8935 * subord = nested pmap we are attempting to trim
8937 * Trims subord if possible
8939 MARK_AS_PMAP_TEXT
static void
8940 pmap_trim_subord(pmap_t subord
)
8942 bool contract_subord
= false;
8946 subord
->nested_no_bounds_refcnt
--;
8948 if ((subord
->nested_no_bounds_refcnt
== 0) && (subord
->nested_bounds_set
)) {
8949 /* If this was the last no bounds reference, trim subord. */
8950 contract_subord
= true;
8953 PMAP_UNLOCK(subord
);
8955 if (contract_subord
) {
8956 pmap_trim_range(subord
, subord
->nested_region_subord_addr
, subord
->nested_region_true_start
);
8957 pmap_trim_range(subord
, subord
->nested_region_true_end
, subord
->nested_region_subord_addr
+ subord
->nested_region_size
);
8969 pmap_trim_internal(grand
, subord
, vstart
, nstart
, size
);
8974 * kern_return_t pmap_nest(grand, subord, vstart, size)
8976 * grand = the pmap that we will nest subord into
8977 * subord = the pmap that goes into the grand
8978 * vstart = start of range in pmap to be inserted
8979 * nstart = start of range in pmap nested pmap
8980 * size = Size of nest area (up to 16TB)
8982 * Inserts a pmap into another. This is used to implement shared segments.
8986 MARK_AS_PMAP_TEXT
static kern_return_t
8994 kern_return_t kr
= KERN_FAILURE
;
8995 vm_map_offset_t vaddr
, nvaddr
;
8999 unsigned int num_tte
;
9000 unsigned int nested_region_asid_bitmap_size
;
9001 unsigned int* nested_region_asid_bitmap
;
9002 int expand_options
= 0;
9004 addr64_t vend
, nend
;
9005 if (__improbable(os_add_overflow(vstart
, size
, &vend
))) {
9006 panic("%s: %p grand addr wraps around: 0x%llx + 0x%llx", __func__
, grand
, vstart
, size
);
9008 if (__improbable(os_add_overflow(nstart
, size
, &nend
))) {
9009 panic("%s: %p nested addr wraps around: 0x%llx + 0x%llx", __func__
, subord
, nstart
, size
);
9012 VALIDATE_PMAP(grand
);
9013 VALIDATE_PMAP(subord
);
9015 __unused
const pt_attr_t
* const pt_attr
= pmap_get_pt_attr(grand
);
9016 assert(pmap_get_pt_attr(subord
) == pt_attr
);
9019 if (((size
| vstart
| nstart
) & (pt_attr_leaf_table_offmask(pt_attr
))) != 0x0ULL
) {
9020 panic("pmap_nest() pmap %p unaligned nesting request 0x%llx, 0x%llx, 0x%llx\n", grand
, vstart
, nstart
, size
);
9023 if (!subord
->nested
) {
9024 panic("%s: subordinate pmap %p is not nestable", __func__
, subord
);
9027 if ((grand
->nested_pmap
!= PMAP_NULL
) && (grand
->nested_pmap
!= subord
)) {
9028 panic("pmap_nest() pmap %p has a nested pmap\n", grand
);
9031 if (subord
->nested_region_asid_bitmap
== NULL
) {
9032 nested_region_asid_bitmap_size
= (unsigned int)(size
>> pt_attr_twig_shift(pt_attr
)) / (sizeof(unsigned int) * NBBY
);
9034 nested_region_asid_bitmap
= kalloc(nested_region_asid_bitmap_size
* sizeof(unsigned int));
9035 bzero(nested_region_asid_bitmap
, nested_region_asid_bitmap_size
* sizeof(unsigned int));
9038 if (subord
->nested_region_asid_bitmap
== NULL
) {
9039 subord
->nested_region_asid_bitmap
= nested_region_asid_bitmap
;
9040 subord
->nested_region_asid_bitmap_size
= nested_region_asid_bitmap_size
;
9041 subord
->nested_region_subord_addr
= nstart
;
9042 subord
->nested_region_size
= (mach_vm_offset_t
) size
;
9043 nested_region_asid_bitmap
= NULL
;
9045 PMAP_UNLOCK(subord
);
9046 if (nested_region_asid_bitmap
!= NULL
) {
9047 kfree(nested_region_asid_bitmap
, nested_region_asid_bitmap_size
* sizeof(unsigned int));
9050 if ((subord
->nested_region_subord_addr
+ subord
->nested_region_size
) < nend
) {
9052 unsigned int new_nested_region_asid_bitmap_size
;
9053 unsigned int* new_nested_region_asid_bitmap
;
9055 nested_region_asid_bitmap
= NULL
;
9056 nested_region_asid_bitmap_size
= 0;
9057 new_size
= nend
- subord
->nested_region_subord_addr
;
9059 /* We explicitly add 1 to the bitmap allocation size in order to avoid issues with truncation. */
9060 new_nested_region_asid_bitmap_size
= (unsigned int)((new_size
>> pt_attr_twig_shift(pt_attr
)) / (sizeof(unsigned int) * NBBY
)) + 1;
9062 new_nested_region_asid_bitmap
= kalloc(new_nested_region_asid_bitmap_size
* sizeof(unsigned int));
9064 if (subord
->nested_region_size
< new_size
) {
9065 bzero(new_nested_region_asid_bitmap
, new_nested_region_asid_bitmap_size
* sizeof(unsigned int));
9066 bcopy(subord
->nested_region_asid_bitmap
, new_nested_region_asid_bitmap
, subord
->nested_region_asid_bitmap_size
);
9067 nested_region_asid_bitmap_size
= subord
->nested_region_asid_bitmap_size
;
9068 nested_region_asid_bitmap
= subord
->nested_region_asid_bitmap
;
9069 subord
->nested_region_asid_bitmap
= new_nested_region_asid_bitmap
;
9070 subord
->nested_region_asid_bitmap_size
= new_nested_region_asid_bitmap_size
;
9071 subord
->nested_region_size
= new_size
;
9072 new_nested_region_asid_bitmap
= NULL
;
9074 PMAP_UNLOCK(subord
);
9075 if (nested_region_asid_bitmap
!= NULL
)
9076 { kfree(nested_region_asid_bitmap
, nested_region_asid_bitmap_size
* sizeof(unsigned int));}
9077 if (new_nested_region_asid_bitmap
!= NULL
)
9078 { kfree(new_nested_region_asid_bitmap
, new_nested_region_asid_bitmap_size
* sizeof(unsigned int));}
9082 if (grand
->nested_pmap
== PMAP_NULL
) {
9083 grand
->nested_pmap
= subord
;
9085 if (!subord
->nested_bounds_set
) {
9087 * We are nesting without the shared regions bounds
9088 * being known. We'll have to trim the pmap later.
9090 grand
->nested_has_no_bounds_ref
= true;
9091 subord
->nested_no_bounds_refcnt
++;
9094 grand
->nested_region_grand_addr
= vstart
;
9095 grand
->nested_region_subord_addr
= nstart
;
9096 grand
->nested_region_size
= (mach_vm_offset_t
) size
;
9098 if ((grand
->nested_region_grand_addr
> vstart
)) {
9099 panic("pmap_nest() pmap %p : attempt to nest outside the nested region\n", grand
);
9100 } else if ((grand
->nested_region_grand_addr
+ grand
->nested_region_size
) < vend
) {
9101 grand
->nested_region_size
= (mach_vm_offset_t
)(vstart
- grand
->nested_region_grand_addr
+ size
);
9105 #if (__ARM_VMSA__ == 7)
9106 nvaddr
= (vm_map_offset_t
) nstart
;
9107 vaddr
= (vm_map_offset_t
) vstart
;
9108 num_tte
= size
>> ARM_TT_L1_SHIFT
;
9110 for (i
= 0; i
< num_tte
; i
++) {
9111 if (((subord
->nested_region_true_start
) > nvaddr
) || ((subord
->nested_region_true_end
) <= nvaddr
)) {
9115 stte_p
= pmap_tte(subord
, nvaddr
);
9116 if ((stte_p
== (tt_entry_t
*)NULL
) || (((*stte_p
) & ARM_TTE_TYPE_MASK
) != ARM_TTE_TYPE_TABLE
)) {
9117 PMAP_UNLOCK(subord
);
9118 kr
= pmap_expand(subord
, nvaddr
, expand_options
, PMAP_TT_L2_LEVEL
);
9120 if (kr
!= KERN_SUCCESS
) {
9127 PMAP_UNLOCK(subord
);
9129 stte_p
= pmap_tte(grand
, vaddr
);
9130 if (stte_p
== (tt_entry_t
*)NULL
) {
9132 kr
= pmap_expand(grand
, vaddr
, expand_options
, PMAP_TT_L1_LEVEL
);
9134 if (kr
!= KERN_SUCCESS
) {
9145 nvaddr
+= ARM_TT_L1_SIZE
;
9146 vaddr
+= ARM_TT_L1_SIZE
;
9150 nvaddr
= (vm_map_offset_t
) nstart
;
9151 num_tte
= (unsigned int)(size
>> pt_attr_twig_shift(pt_attr
));
9153 for (i
= 0; i
< num_tte
; i
++) {
9154 if (((subord
->nested_region_true_start
) > nvaddr
) || ((subord
->nested_region_true_end
) <= nvaddr
)) {
9158 stte_p
= pmap_tte(subord
, nvaddr
);
9159 if (stte_p
== PT_ENTRY_NULL
|| *stte_p
== ARM_TTE_EMPTY
) {
9160 PMAP_UNLOCK(subord
);
9161 kr
= pmap_expand(subord
, nvaddr
, expand_options
, PMAP_TT_LEAF_LEVEL
);
9163 if (kr
!= KERN_SUCCESS
) {
9171 nvaddr
+= pt_attr_twig_size(pt_attr
);
9174 PMAP_UNLOCK(subord
);
9177 * copy tte's from subord pmap into grand pmap
9181 nvaddr
= (vm_map_offset_t
) nstart
;
9182 vaddr
= (vm_map_offset_t
) vstart
;
9185 #if (__ARM_VMSA__ == 7)
9186 for (i
= 0; i
< num_tte
; i
++) {
9187 if (((subord
->nested_region_true_start
) > nvaddr
) || ((subord
->nested_region_true_end
) <= nvaddr
)) {
9191 stte_p
= pmap_tte(subord
, nvaddr
);
9192 gtte_p
= pmap_tte(grand
, vaddr
);
9196 nvaddr
+= ARM_TT_L1_SIZE
;
9197 vaddr
+= ARM_TT_L1_SIZE
;
9200 for (i
= 0; i
< num_tte
; i
++) {
9201 if (((subord
->nested_region_true_start
) > nvaddr
) || ((subord
->nested_region_true_end
) <= nvaddr
)) {
9205 stte_p
= pmap_tte(subord
, nvaddr
);
9206 gtte_p
= pmap_tte(grand
, vaddr
);
9207 if (gtte_p
== PT_ENTRY_NULL
) {
9209 kr
= pmap_expand(grand
, vaddr
, expand_options
, PMAP_TT_TWIG_LEVEL
);
9212 if (kr
!= KERN_SUCCESS
) {
9216 gtte_p
= pmap_tt2e(grand
, vaddr
);
9221 vaddr
+= pt_attr_twig_size(pt_attr
);
9222 nvaddr
+= pt_attr_twig_size(pt_attr
);
9229 stte_p
= pmap_tte(grand
, vstart
);
9230 FLUSH_PTE_RANGE_STRONG(stte_p
, stte_p
+ num_tte
);
9232 #if (__ARM_VMSA__ > 7)
9234 * check for overflow on LP64 arch
9236 assert((size
& 0xFFFFFFFF00000000ULL
) == 0);
9238 PMAP_UPDATE_TLBS(grand
, vstart
, vend
, false);
9252 kern_return_t kr
= KERN_FAILURE
;
9254 PMAP_TRACE(2, PMAP_CODE(PMAP__NEST
) | DBG_FUNC_START
,
9255 VM_KERNEL_ADDRHIDE(grand
), VM_KERNEL_ADDRHIDE(subord
),
9256 VM_KERNEL_ADDRHIDE(vstart
));
9258 kr
= pmap_nest_internal(grand
, subord
, vstart
, nstart
, size
);
9260 PMAP_TRACE(2, PMAP_CODE(PMAP__NEST
) | DBG_FUNC_END
, kr
);
9266 * kern_return_t pmap_unnest(grand, vaddr)
9268 * grand = the pmap that will have the virtual range unnested
9269 * vaddr = start of range in pmap to be unnested
9270 * size = size of range in pmap to be unnested
9280 return pmap_unnest_options(grand
, vaddr
, size
, 0);
9283 MARK_AS_PMAP_TEXT
static kern_return_t
9284 pmap_unnest_options_internal(
9288 unsigned int option
)
9290 vm_map_offset_t start
;
9291 vm_map_offset_t addr
;
9293 unsigned int current_index
;
9294 unsigned int start_index
;
9295 unsigned int max_index
;
9296 unsigned int num_tte
;
9300 if (__improbable(os_add_overflow(vaddr
, size
, &vend
))) {
9301 panic("%s: %p vaddr wraps around: 0x%llx + 0x%llx", __func__
, grand
, vaddr
, size
);
9304 VALIDATE_PMAP(grand
);
9306 __unused
const pt_attr_t
* const pt_attr
= pmap_get_pt_attr(grand
);
9308 if (((size
| vaddr
) & pt_attr_twig_offmask(pt_attr
)) != 0x0ULL
) {
9309 panic("pmap_unnest(): unaligned request");
9312 if ((option
& PMAP_UNNEST_CLEAN
) == 0) {
9313 if (grand
->nested_pmap
== NULL
) {
9314 panic("%s: %p has no nested pmap", __func__
, grand
);
9317 if ((vaddr
< grand
->nested_region_grand_addr
) || (vend
> (grand
->nested_region_grand_addr
+ grand
->nested_region_size
))) {
9318 panic("%s: %p: unnest request to region not-fully-nested region [%p, %p)", __func__
, grand
, (void*)vaddr
, (void*)vend
);
9321 PMAP_LOCK(grand
->nested_pmap
);
9323 start
= vaddr
- grand
->nested_region_grand_addr
+ grand
->nested_region_subord_addr
;
9324 start_index
= (unsigned int)((vaddr
- grand
->nested_region_grand_addr
) >> pt_attr_twig_shift(pt_attr
));
9325 max_index
= (unsigned int)(start_index
+ (size
>> pt_attr_twig_shift(pt_attr
)));
9326 num_tte
= (unsigned int)(size
>> pt_attr_twig_shift(pt_attr
));
9328 for (current_index
= start_index
, addr
= start
; current_index
< max_index
; current_index
++, addr
+= pt_attr_twig_size(pt_attr
)) {
9329 pt_entry_t
*bpte
, *epte
, *cpte
;
9331 if (addr
< grand
->nested_pmap
->nested_region_true_start
) {
9332 /* We haven't reached the interesting range. */
9336 if (addr
>= grand
->nested_pmap
->nested_region_true_end
) {
9337 /* We're done with the interesting range. */
9341 bpte
= pmap_pte(grand
->nested_pmap
, addr
);
9342 epte
= bpte
+ (pt_attr_leaf_index_mask(pt_attr
) >> pt_attr_leaf_shift(pt_attr
));
9344 if (!testbit(current_index
, (int *)grand
->nested_pmap
->nested_region_asid_bitmap
)) {
9345 setbit(current_index
, (int *)grand
->nested_pmap
->nested_region_asid_bitmap
);
9347 for (cpte
= bpte
; cpte
<= epte
; cpte
++) {
9350 boolean_t managed
= FALSE
;
9353 if ((*cpte
!= ARM_PTE_TYPE_FAULT
)
9354 && (!ARM_PTE_IS_COMPRESSED(*cpte
, cpte
))) {
9357 pa
= pte_to_pa(spte
);
9358 if (!pa_valid(pa
)) {
9361 pai
= (int)pa_index(pa
);
9364 pa
= pte_to_pa(spte
);
9365 if (pai
== (int)pa_index(pa
)) {
9367 break; // Leave the PVH locked as we'll unlock it after we update the PTE
9372 if (((spte
& ARM_PTE_NG
) != ARM_PTE_NG
)) {
9373 WRITE_PTE_FAST(cpte
, (spte
| ARM_PTE_NG
));
9377 ASSERT_PVH_LOCKED(pai
);
9384 FLUSH_PTE_RANGE_STRONG(bpte
, epte
);
9385 flush_mmu_tlb_region_asid_async(start
, (unsigned)size
, grand
->nested_pmap
);
9390 PMAP_UNLOCK(grand
->nested_pmap
);
9396 * invalidate all pdes for segment at vaddr in pmap grand
9401 num_tte
= (unsigned int)(size
>> pt_attr_twig_shift(pt_attr
));
9403 for (i
= 0; i
< num_tte
; i
++, addr
+= pt_attr_twig_size(pt_attr
)) {
9404 if (addr
< grand
->nested_pmap
->nested_region_true_start
) {
9405 /* We haven't reached the interesting range. */
9409 if (addr
>= grand
->nested_pmap
->nested_region_true_end
) {
9410 /* We're done with the interesting range. */
9414 tte_p
= pmap_tte(grand
, addr
);
9415 *tte_p
= ARM_TTE_TYPE_FAULT
;
9418 tte_p
= pmap_tte(grand
, start
);
9419 FLUSH_PTE_RANGE_STRONG(tte_p
, tte_p
+ num_tte
);
9420 PMAP_UPDATE_TLBS(grand
, start
, vend
, false);
9424 return KERN_SUCCESS
;
9428 pmap_unnest_options(
9432 unsigned int option
)
9434 kern_return_t kr
= KERN_FAILURE
;
9436 PMAP_TRACE(2, PMAP_CODE(PMAP__UNNEST
) | DBG_FUNC_START
,
9437 VM_KERNEL_ADDRHIDE(grand
), VM_KERNEL_ADDRHIDE(vaddr
));
9439 kr
= pmap_unnest_options_internal(grand
, vaddr
, size
, option
);
9441 PMAP_TRACE(2, PMAP_CODE(PMAP__UNNEST
) | DBG_FUNC_END
, kr
);
9447 pmap_adjust_unnest_parameters(
9449 __unused vm_map_offset_t
*s
,
9450 __unused vm_map_offset_t
*e
)
9452 return TRUE
; /* to get to log_unnest_badness()... */
9456 * disable no-execute capability on
9457 * the specified pmap
9459 #if DEVELOPMENT || DEBUG
9464 pmap
->nx_enabled
= FALSE
;
9469 __unused pmap_t pmap
)
9478 pt_fake_zone_index
= zone_index
;
9484 vm_size_t
*cur_size
, vm_size_t
*max_size
, vm_size_t
*elem_size
, vm_size_t
*alloc_size
,
9485 uint64_t *sum_size
, int *collectable
, int *exhaustable
, int *caller_acct
)
9487 *count
= inuse_pmap_pages_count
;
9488 *cur_size
= PAGE_SIZE
* (inuse_pmap_pages_count
);
9489 *max_size
= PAGE_SIZE
* (inuse_pmap_pages_count
+ vm_page_inactive_count
+ vm_page_active_count
+ vm_page_free_count
);
9490 *elem_size
= PAGE_SIZE
;
9491 *alloc_size
= PAGE_SIZE
;
9492 *sum_size
= (alloc_pmap_pages_count
) * PAGE_SIZE
;
9500 * flush a range of hardware TLB entries.
9501 * NOTE: assumes the smallest TLB entry in use will be for
9502 * an ARM small page (4K).
9505 #define ARM_FULL_TLB_FLUSH_THRESHOLD 64
9507 #if __ARM_RANGE_TLBI__
9508 #define ARM64_RANGE_TLB_FLUSH_THRESHOLD 1
9509 #define ARM64_FULL_TLB_FLUSH_THRESHOLD ARM64_16K_TLB_RANGE_PAGES
9511 #define ARM64_FULL_TLB_FLUSH_THRESHOLD 256
9512 #endif // __ARM_RANGE_TLBI__
9515 flush_mmu_tlb_region_asid_async(
9520 #if (__ARM_VMSA__ == 7)
9521 vm_offset_t end
= va
+ length
;
9524 asid
= pmap
->hw_asid
;
9526 if (length
/ ARM_SMALL_PAGE_SIZE
> ARM_FULL_TLB_FLUSH_THRESHOLD
) {
9527 boolean_t flush_all
= FALSE
;
9529 if ((asid
== 0) || (pmap
->nested
== TRUE
)) {
9533 flush_mmu_tlb_async();
9535 flush_mmu_tlb_asid_async(asid
);
9540 if (pmap
->nested
== TRUE
) {
9544 va
= arm_trunc_page(va
);
9546 flush_mmu_tlb_mva_entries_async(va
);
9547 va
+= ARM_SMALL_PAGE_SIZE
;
9552 va
= arm_trunc_page(va
) | (asid
& 0xff);
9553 flush_mmu_tlb_entries_async(va
, end
);
9556 unsigned npages
= length
>> pt_attr_leaf_shift(pmap_get_pt_attr(pmap
));
9559 asid
= pmap
->hw_asid
;
9561 if (npages
> ARM64_FULL_TLB_FLUSH_THRESHOLD
) {
9562 boolean_t flush_all
= FALSE
;
9564 if ((asid
== 0) || (pmap
->nested
== TRUE
)) {
9568 flush_mmu_tlb_async();
9570 flush_mmu_tlb_asid_async((uint64_t)asid
<< TLBI_ASID_SHIFT
);
9574 #if __ARM_RANGE_TLBI__
9575 if (npages
> ARM64_RANGE_TLB_FLUSH_THRESHOLD
) {
9576 va
= generate_rtlbi_param(npages
, asid
, va
);
9577 if (pmap
->nested
== TRUE
) {
9578 flush_mmu_tlb_allrange_async(va
);
9580 flush_mmu_tlb_range_async(va
);
9585 vm_offset_t end
= tlbi_asid(asid
) | tlbi_addr(va
+ length
);
9586 va
= tlbi_asid(asid
) | tlbi_addr(va
);
9587 if (pmap
->nested
== TRUE
) {
9588 flush_mmu_tlb_allentries_async(va
, end
);
9590 flush_mmu_tlb_entries_async(va
, end
);
9596 MARK_AS_PMAP_TEXT
static void
9597 flush_mmu_tlb_tte_asid_async(vm_offset_t va
, pmap_t pmap
)
9599 #if (__ARM_VMSA__ == 7)
9600 flush_mmu_tlb_entry_async((va
& ~ARM_TT_L1_PT_OFFMASK
) | (pmap
->hw_asid
& 0xff));
9601 flush_mmu_tlb_entry_async(((va
& ~ARM_TT_L1_PT_OFFMASK
) + ARM_TT_L1_SIZE
) | (pmap
->hw_asid
& 0xff));
9602 flush_mmu_tlb_entry_async(((va
& ~ARM_TT_L1_PT_OFFMASK
) + 2 * ARM_TT_L1_SIZE
) | (pmap
->hw_asid
& 0xff));
9603 flush_mmu_tlb_entry_async(((va
& ~ARM_TT_L1_PT_OFFMASK
) + 3 * ARM_TT_L1_SIZE
) | (pmap
->hw_asid
& 0xff));
9605 flush_mmu_tlb_entry_async(tlbi_addr(va
& ~pt_attr_twig_offmask(pmap_get_pt_attr(pmap
))) | tlbi_asid(pmap
->hw_asid
));
9609 MARK_AS_PMAP_TEXT
static void
9610 flush_mmu_tlb_full_asid_async(pmap_t pmap
)
9612 #if (__ARM_VMSA__ == 7)
9613 flush_mmu_tlb_asid_async(pmap
->hw_asid
);
9614 #else /* (__ARM_VMSA__ == 7) */
9615 flush_mmu_tlb_asid_async((uint64_t)(pmap
->hw_asid
) << TLBI_ASID_SHIFT
);
9616 #endif /* (__ARM_VMSA__ == 7) */
9620 flush_mmu_tlb_region(
9624 flush_mmu_tlb_region_asid_async(va
, length
, kernel_pmap
);
9628 static pmap_io_range_t
*
9629 pmap_find_io_attr(pmap_paddr_t paddr
)
9631 pmap_io_range_t find_range
= {.addr
= paddr
& ~PAGE_MASK
, .len
= PAGE_SIZE
};
9632 unsigned int begin
= 0, end
= num_io_rgns
- 1;
9633 if ((num_io_rgns
== 0) || (paddr
< io_attr_table
[begin
].addr
) ||
9634 (paddr
>= (io_attr_table
[end
].addr
+ io_attr_table
[end
].len
))) {
9639 unsigned int middle
= (begin
+ end
) / 2;
9640 int cmp
= cmp_io_rgns(&find_range
, &io_attr_table
[middle
]);
9642 return &io_attr_table
[middle
];
9643 } else if (begin
== end
) {
9645 } else if (cmp
> 0) {
9656 pmap_cache_attributes(
9661 unsigned int result
;
9662 pp_attr_t pp_attr_current
;
9666 assert(vm_last_phys
> vm_first_phys
); // Check that pmap has been bootstrapped
9668 if (!pa_valid(paddr
)) {
9669 pmap_io_range_t
*io_rgn
= pmap_find_io_attr(paddr
);
9670 return (io_rgn
== NULL
) ? VM_WIMG_IO
: io_rgn
->wimg
;
9673 result
= VM_WIMG_DEFAULT
;
9675 pai
= (int)pa_index(paddr
);
9677 pp_attr_current
= pp_attr_table
[pai
];
9678 if (pp_attr_current
& PP_ATTR_WIMG_MASK
) {
9679 result
= pp_attr_current
& PP_ATTR_WIMG_MASK
;
9684 MARK_AS_PMAP_TEXT
static void
9685 pmap_sync_wimg(ppnum_t pn
, unsigned int wimg_bits_prev
, unsigned int wimg_bits_new
)
9687 if ((wimg_bits_prev
!= wimg_bits_new
)
9688 && ((wimg_bits_prev
== VM_WIMG_COPYBACK
)
9689 || ((wimg_bits_prev
== VM_WIMG_INNERWBACK
)
9690 && (wimg_bits_new
!= VM_WIMG_COPYBACK
))
9691 || ((wimg_bits_prev
== VM_WIMG_WTHRU
)
9692 && ((wimg_bits_new
!= VM_WIMG_COPYBACK
) || (wimg_bits_new
!= VM_WIMG_INNERWBACK
))))) {
9693 pmap_sync_page_attributes_phys(pn
);
9696 if ((wimg_bits_new
== VM_WIMG_RT
) && (wimg_bits_prev
!= VM_WIMG_RT
)) {
9697 pmap_force_dcache_clean(phystokv(ptoa(pn
)), PAGE_SIZE
);
9701 MARK_AS_PMAP_TEXT
static __unused
void
9702 pmap_update_compressor_page_internal(ppnum_t pn
, unsigned int prev_cacheattr
, unsigned int new_cacheattr
)
9704 pmap_paddr_t paddr
= ptoa(pn
);
9705 int pai
= (int)pa_index(paddr
);
9707 if (__improbable(!pa_valid(paddr
))) {
9708 panic("%s called on non-managed page 0x%08x", __func__
, pn
);
9714 pmap_update_cache_attributes_locked(pn
, new_cacheattr
);
9718 pmap_sync_wimg(pn
, prev_cacheattr
& VM_WIMG_MASK
, new_cacheattr
& VM_WIMG_MASK
);
9722 pmap_map_compressor_page(ppnum_t pn
)
9724 #if __ARM_PTE_PHYSMAP__
9725 unsigned int cacheattr
= pmap_cache_attributes(pn
) & VM_WIMG_MASK
;
9726 if (cacheattr
!= VM_WIMG_DEFAULT
) {
9727 pmap_update_compressor_page_internal(pn
, cacheattr
, VM_WIMG_DEFAULT
);
9730 return (void*)phystokv(ptoa(pn
));
9734 pmap_unmap_compressor_page(ppnum_t pn __unused
, void *kva __unused
)
9736 #if __ARM_PTE_PHYSMAP__
9737 unsigned int cacheattr
= pmap_cache_attributes(pn
) & VM_WIMG_MASK
;
9738 if (cacheattr
!= VM_WIMG_DEFAULT
) {
9739 pmap_update_compressor_page_internal(pn
, VM_WIMG_DEFAULT
, cacheattr
);
9744 MARK_AS_PMAP_TEXT
static boolean_t
9745 pmap_batch_set_cache_attributes_internal(
9747 unsigned int cacheattr
,
9748 unsigned int page_cnt
,
9749 unsigned int page_index
,
9755 pp_attr_t pp_attr_current
;
9756 pp_attr_t pp_attr_template
;
9757 unsigned int wimg_bits_prev
, wimg_bits_new
;
9759 if (cacheattr
& VM_WIMG_USE_DEFAULT
) {
9760 cacheattr
= VM_WIMG_DEFAULT
;
9763 if ((doit
== FALSE
) && (*res
== 0)) {
9764 pmap_pin_kernel_pages((vm_offset_t
)res
, sizeof(*res
));
9766 pmap_unpin_kernel_pages((vm_offset_t
)res
, sizeof(*res
));
9767 if (platform_cache_batch_wimg(cacheattr
& (VM_WIMG_MASK
), page_cnt
<< PAGE_SHIFT
) == FALSE
) {
9774 if (!pa_valid(paddr
)) {
9775 panic("pmap_batch_set_cache_attributes(): pn 0x%08x not managed", pn
);
9778 pai
= (int)pa_index(paddr
);
9785 pp_attr_current
= pp_attr_table
[pai
];
9786 wimg_bits_prev
= VM_WIMG_DEFAULT
;
9787 if (pp_attr_current
& PP_ATTR_WIMG_MASK
) {
9788 wimg_bits_prev
= pp_attr_current
& PP_ATTR_WIMG_MASK
;
9791 pp_attr_template
= (pp_attr_current
& ~PP_ATTR_WIMG_MASK
) | PP_ATTR_WIMG(cacheattr
& (VM_WIMG_MASK
));
9797 /* WIMG bits should only be updated under the PVH lock, but we should do this in a CAS loop
9798 * to avoid losing simultaneous updates to other bits like refmod. */
9799 } while (!OSCompareAndSwap16(pp_attr_current
, pp_attr_template
, &pp_attr_table
[pai
]));
9801 wimg_bits_new
= VM_WIMG_DEFAULT
;
9802 if (pp_attr_template
& PP_ATTR_WIMG_MASK
) {
9803 wimg_bits_new
= pp_attr_template
& PP_ATTR_WIMG_MASK
;
9807 if (wimg_bits_new
!= wimg_bits_prev
) {
9808 pmap_update_cache_attributes_locked(pn
, cacheattr
);
9811 if ((wimg_bits_new
== VM_WIMG_RT
) && (wimg_bits_prev
!= VM_WIMG_RT
)) {
9812 pmap_force_dcache_clean(phystokv(paddr
), PAGE_SIZE
);
9815 if (wimg_bits_new
== VM_WIMG_COPYBACK
) {
9818 if (wimg_bits_prev
== wimg_bits_new
) {
9819 pmap_pin_kernel_pages((vm_offset_t
)res
, sizeof(*res
));
9821 pmap_unpin_kernel_pages((vm_offset_t
)res
, sizeof(*res
));
9822 if (!platform_cache_batch_wimg(wimg_bits_new
, (*res
) << PAGE_SHIFT
)) {
9829 if (page_cnt
== (page_index
+ 1)) {
9830 wimg_bits_prev
= VM_WIMG_COPYBACK
;
9831 if (((wimg_bits_prev
!= wimg_bits_new
))
9832 && ((wimg_bits_prev
== VM_WIMG_COPYBACK
)
9833 || ((wimg_bits_prev
== VM_WIMG_INNERWBACK
)
9834 && (wimg_bits_new
!= VM_WIMG_COPYBACK
))
9835 || ((wimg_bits_prev
== VM_WIMG_WTHRU
)
9836 && ((wimg_bits_new
!= VM_WIMG_COPYBACK
) || (wimg_bits_new
!= VM_WIMG_INNERWBACK
))))) {
9837 platform_cache_flush_wimg(wimg_bits_new
);
9845 pmap_batch_set_cache_attributes(
9847 unsigned int cacheattr
,
9848 unsigned int page_cnt
,
9849 unsigned int page_index
,
9853 return pmap_batch_set_cache_attributes_internal(pn
, cacheattr
, page_cnt
, page_index
, doit
, res
);
9856 MARK_AS_PMAP_TEXT
static void
9857 pmap_set_cache_attributes_priv(
9859 unsigned int cacheattr
,
9860 boolean_t external __unused
)
9864 pp_attr_t pp_attr_current
;
9865 pp_attr_t pp_attr_template
;
9866 unsigned int wimg_bits_prev
, wimg_bits_new
;
9870 if (!pa_valid(paddr
)) {
9871 return; /* Not a managed page. */
9874 if (cacheattr
& VM_WIMG_USE_DEFAULT
) {
9875 cacheattr
= VM_WIMG_DEFAULT
;
9878 pai
= (int)pa_index(paddr
);
9884 pp_attr_current
= pp_attr_table
[pai
];
9885 wimg_bits_prev
= VM_WIMG_DEFAULT
;
9886 if (pp_attr_current
& PP_ATTR_WIMG_MASK
) {
9887 wimg_bits_prev
= pp_attr_current
& PP_ATTR_WIMG_MASK
;
9890 pp_attr_template
= (pp_attr_current
& ~PP_ATTR_WIMG_MASK
) | PP_ATTR_WIMG(cacheattr
& (VM_WIMG_MASK
));
9892 /* WIMG bits should only be updated under the PVH lock, but we should do this in a CAS loop
9893 * to avoid losing simultaneous updates to other bits like refmod. */
9894 } while (!OSCompareAndSwap16(pp_attr_current
, pp_attr_template
, &pp_attr_table
[pai
]));
9896 wimg_bits_new
= VM_WIMG_DEFAULT
;
9897 if (pp_attr_template
& PP_ATTR_WIMG_MASK
) {
9898 wimg_bits_new
= pp_attr_template
& PP_ATTR_WIMG_MASK
;
9901 if (wimg_bits_new
!= wimg_bits_prev
) {
9902 pmap_update_cache_attributes_locked(pn
, cacheattr
);
9907 pmap_sync_wimg(pn
, wimg_bits_prev
, wimg_bits_new
);
9910 MARK_AS_PMAP_TEXT
static void
9911 pmap_set_cache_attributes_internal(
9913 unsigned int cacheattr
)
9915 pmap_set_cache_attributes_priv(pn
, cacheattr
, TRUE
);
9919 pmap_set_cache_attributes(
9921 unsigned int cacheattr
)
9923 pmap_set_cache_attributes_internal(pn
, cacheattr
);
9926 MARK_AS_PMAP_TEXT
void
9927 pmap_update_cache_attributes_locked(
9929 unsigned attributes
)
9931 pmap_paddr_t phys
= ptoa(ppnum
);
9937 boolean_t tlb_flush_needed
= FALSE
;
9939 PMAP_TRACE(2, PMAP_CODE(PMAP__UPDATE_CACHING
) | DBG_FUNC_START
, ppnum
, attributes
);
9941 #if __ARM_PTE_PHYSMAP__
9942 vm_offset_t kva
= phystokv(phys
);
9943 pte_p
= pmap_pte(kernel_pmap
, kva
);
9946 tmplate
&= ~(ARM_PTE_ATTRINDXMASK
| ARM_PTE_SHMASK
);
9947 tmplate
|= wimg_to_pte(attributes
);
9948 #if (__ARM_VMSA__ > 7)
9949 if (tmplate
& ARM_PTE_HINT_MASK
) {
9950 panic("%s: physical aperture PTE %p has hint bit set, va=%p, pte=0x%llx",
9951 __FUNCTION__
, pte_p
, (void *)kva
, tmplate
);
9954 WRITE_PTE_STRONG(pte_p
, tmplate
);
9955 flush_mmu_tlb_region_asid_async(kva
, PAGE_SIZE
, kernel_pmap
);
9956 tlb_flush_needed
= TRUE
;
9959 pai
= (unsigned int)pa_index(phys
);
9961 pv_h
= pai_to_pvh(pai
);
9963 pte_p
= PT_ENTRY_NULL
;
9964 pve_p
= PV_ENTRY_NULL
;
9965 if (pvh_test_type(pv_h
, PVH_TYPE_PTEP
)) {
9966 pte_p
= pvh_ptep(pv_h
);
9967 } else if (pvh_test_type(pv_h
, PVH_TYPE_PVEP
)) {
9968 pve_p
= pvh_list(pv_h
);
9969 pte_p
= PT_ENTRY_NULL
;
9972 while ((pve_p
!= PV_ENTRY_NULL
) || (pte_p
!= PT_ENTRY_NULL
)) {
9973 vm_map_address_t va
;
9976 if (pve_p
!= PV_ENTRY_NULL
) {
9977 pte_p
= pve_get_ptep(pve_p
);
9979 #ifdef PVH_FLAG_IOMMU
9980 if ((vm_offset_t
)pte_p
& PVH_FLAG_IOMMU
) {
9981 goto cache_skip_pve
;
9984 pmap
= ptep_get_pmap(pte_p
);
9985 va
= ptep_get_va(pte_p
);
9988 tmplate
&= ~(ARM_PTE_ATTRINDXMASK
| ARM_PTE_SHMASK
);
9989 tmplate
|= pmap_get_pt_ops(pmap
)->wimg_to_pte(attributes
);
9991 WRITE_PTE_STRONG(pte_p
, tmplate
);
9992 pmap_get_pt_ops(pmap
)->flush_tlb_region_async(va
, PAGE_SIZE
, pmap
);
9993 tlb_flush_needed
= TRUE
;
9995 #ifdef PVH_FLAG_IOMMU
9998 pte_p
= PT_ENTRY_NULL
;
9999 if (pve_p
!= PV_ENTRY_NULL
) {
10000 pve_p
= PVE_NEXT_PTR(pve_next(pve_p
));
10003 if (tlb_flush_needed
) {
10007 PMAP_TRACE(2, PMAP_CODE(PMAP__UPDATE_CACHING
) | DBG_FUNC_END
, ppnum
, attributes
);
10010 #if (__ARM_VMSA__ == 7)
10012 pmap_create_sharedpage(
10018 (void) pmap_pages_alloc(&pa
, PAGE_SIZE
, 0);
10019 memset((char *) phystokv(pa
), 0, PAGE_SIZE
);
10021 kr
= pmap_enter(kernel_pmap
, _COMM_PAGE_BASE_ADDRESS
, atop(pa
), VM_PROT_READ
| VM_PROT_WRITE
, VM_PROT_NONE
, VM_WIMG_USE_DEFAULT
, TRUE
);
10022 assert(kr
== KERN_SUCCESS
);
10024 return (vm_map_address_t
)phystokv(pa
);
10030 vm_address_t address
,
10031 tt_entry_t
template)
10033 tt_entry_t
*ptep
, pte
;
10035 ptep
= pmap_tt3e(pmap
, address
);
10036 if (ptep
== NULL
) {
10037 panic("%s: no ptep?\n", __FUNCTION__
);
10041 pte
= tte_to_pa(pte
) | template;
10042 WRITE_PTE_STRONG(ptep
, pte
);
10045 /* Note absence of non-global bit */
10046 #define PMAP_COMM_PAGE_PTE_TEMPLATE (ARM_PTE_TYPE_VALID \
10047 | ARM_PTE_ATTRINDX(CACHE_ATTRINDX_WRITEBACK) \
10048 | ARM_PTE_SH(SH_INNER_MEMORY) | ARM_PTE_NX \
10049 | ARM_PTE_PNX | ARM_PTE_AP(AP_RORO) | ARM_PTE_AF)
10052 pmap_create_sharedpage(
10057 pmap_paddr_t pa
= 0;
10060 (void) pmap_pages_alloc(&pa
, PAGE_SIZE
, 0);
10062 memset((char *) phystokv(pa
), 0, PAGE_SIZE
);
10064 #ifdef CONFIG_XNUPOST
10066 * The kernel pmap maintains a user accessible mapping of the commpage
10069 kr
= pmap_enter(kernel_pmap
, _COMM_HIGH_PAGE64_BASE_ADDRESS
, (ppnum_t
)atop(pa
), VM_PROT_READ
, VM_PROT_NONE
, VM_WIMG_USE_DEFAULT
, TRUE
);
10070 assert(kr
== KERN_SUCCESS
);
10073 * This mapping should not be global (as we only expect to reference it
10076 pmap_update_tt3e(kernel_pmap
, _COMM_HIGH_PAGE64_BASE_ADDRESS
, PMAP_COMM_PAGE_PTE_TEMPLATE
| ARM_PTE_NG
);
10079 kasan_map_shadow(_COMM_HIGH_PAGE64_BASE_ADDRESS
, PAGE_SIZE
, true);
10081 #endif /* CONFIG_XNUPOST */
10084 * In order to avoid burning extra pages on mapping the shared page, we
10085 * create a dedicated pmap for the shared page. We forcibly nest the
10086 * translation tables from this pmap into other pmaps. The level we
10087 * will nest at depends on the MMU configuration (page size, TTBR range,
10090 * Note that this is NOT "the nested pmap" (which is used to nest the
10093 * Note that we update parameters of the entry for our unique needs (NG
10096 sharedpage_pmap
= pmap_create_options(NULL
, 0x0, 0);
10097 assert(sharedpage_pmap
!= NULL
);
10099 /* The user 64-bit mapping... */
10100 kr
= pmap_enter(sharedpage_pmap
, _COMM_PAGE64_BASE_ADDRESS
, (ppnum_t
)atop(pa
), VM_PROT_READ
, VM_PROT_NONE
, VM_WIMG_USE_DEFAULT
, TRUE
);
10101 assert(kr
== KERN_SUCCESS
);
10102 pmap_update_tt3e(sharedpage_pmap
, _COMM_PAGE64_BASE_ADDRESS
, PMAP_COMM_PAGE_PTE_TEMPLATE
);
10104 /* ...and the user 32-bit mapping. */
10105 kr
= pmap_enter(sharedpage_pmap
, _COMM_PAGE32_BASE_ADDRESS
, (ppnum_t
)atop(pa
), VM_PROT_READ
, VM_PROT_NONE
, VM_WIMG_USE_DEFAULT
, TRUE
);
10106 assert(kr
== KERN_SUCCESS
);
10107 pmap_update_tt3e(sharedpage_pmap
, _COMM_PAGE32_BASE_ADDRESS
, PMAP_COMM_PAGE_PTE_TEMPLATE
);
10109 /* For manipulation in kernel, go straight to physical page */
10110 return (vm_map_address_t
)phystokv(pa
);
10114 * Asserts to ensure that the TTEs we nest to map the shared page do not overlap
10115 * with user controlled TTEs.
10117 #if (ARM_PGSHIFT == 14)
10118 static_assert((_COMM_PAGE64_BASE_ADDRESS
& ~ARM_TT_L2_OFFMASK
) >= MACH_VM_MAX_ADDRESS
);
10119 static_assert((_COMM_PAGE32_BASE_ADDRESS
& ~ARM_TT_L2_OFFMASK
) >= VM_MAX_ADDRESS
);
10120 #elif (ARM_PGSHIFT == 12)
10121 static_assert((_COMM_PAGE64_BASE_ADDRESS
& ~ARM_TT_L1_OFFMASK
) >= MACH_VM_MAX_ADDRESS
);
10122 static_assert((_COMM_PAGE32_BASE_ADDRESS
& ~ARM_TT_L1_OFFMASK
) >= VM_MAX_ADDRESS
);
10124 #error Nested shared page mapping is unsupported on this config
10127 MARK_AS_PMAP_TEXT
static kern_return_t
10128 pmap_insert_sharedpage_internal(
10131 kern_return_t kr
= KERN_SUCCESS
;
10132 vm_offset_t sharedpage_vaddr
;
10133 pt_entry_t
*ttep
, *src_ttep
;
10136 VALIDATE_PMAP(pmap
);
10138 #if _COMM_PAGE_AREA_LENGTH != PAGE_SIZE
10139 #error We assume a single page.
10142 if (pmap_is_64bit(pmap
)) {
10143 sharedpage_vaddr
= _COMM_PAGE64_BASE_ADDRESS
;
10145 sharedpage_vaddr
= _COMM_PAGE32_BASE_ADDRESS
;
10151 * For 4KB pages, we can force the commpage to nest at the level one
10152 * page table, as each entry is 1GB (i.e, there will be no overlap
10153 * with regular userspace mappings). For 16KB pages, each level one
10154 * entry is 64GB, so we must go to the second level entry (32MB) in
10157 #if (ARM_PGSHIFT == 12)
10160 /* Just slam in the L1 entry. */
10161 ttep
= pmap_tt1e(pmap
, sharedpage_vaddr
);
10163 if (*ttep
!= ARM_PTE_EMPTY
) {
10164 panic("%s: Found something mapped at the commpage address?!", __FUNCTION__
);
10167 src_ttep
= pmap_tt1e(sharedpage_pmap
, sharedpage_vaddr
);
10168 #elif (ARM_PGSHIFT == 14)
10169 /* Allocate for the L2 entry if necessary, and slam it into place. */
10171 * As long as we are use a three level page table, the first level
10172 * should always exist, so we don't need to check for it.
10174 while (*pmap_tt1e(pmap
, sharedpage_vaddr
) == ARM_PTE_EMPTY
) {
10177 kr
= pmap_expand(pmap
, sharedpage_vaddr
, options
, PMAP_TT_L2_LEVEL
);
10179 if (kr
!= KERN_SUCCESS
) {
10181 panic("Failed to pmap_expand for commpage, pmap=%p", pmap
);
10188 ttep
= pmap_tt2e(pmap
, sharedpage_vaddr
);
10190 if (*ttep
!= ARM_PTE_EMPTY
) {
10191 panic("%s: Found something mapped at the commpage address?!", __FUNCTION__
);
10194 src_ttep
= pmap_tt2e(sharedpage_pmap
, sharedpage_vaddr
);
10198 FLUSH_PTE_STRONG(ttep
);
10200 /* TODO: Should we flush in the 64-bit case? */
10201 flush_mmu_tlb_region_asid_async(sharedpage_vaddr
, PAGE_SIZE
, kernel_pmap
);
10203 #if (ARM_PGSHIFT == 12)
10204 flush_mmu_tlb_entry_async(tlbi_addr(sharedpage_vaddr
& ~ARM_TT_L1_OFFMASK
) | tlbi_asid(pmap
->hw_asid
));
10205 #elif (ARM_PGSHIFT == 14)
10206 flush_mmu_tlb_entry_async(tlbi_addr(sharedpage_vaddr
& ~ARM_TT_L2_OFFMASK
) | tlbi_asid(pmap
->hw_asid
));
10216 pmap_unmap_sharedpage(
10220 vm_offset_t sharedpage_vaddr
;
10222 #if _COMM_PAGE_AREA_LENGTH != PAGE_SIZE
10223 #error We assume a single page.
10226 if (pmap_is_64bit(pmap
)) {
10227 sharedpage_vaddr
= _COMM_PAGE64_BASE_ADDRESS
;
10229 sharedpage_vaddr
= _COMM_PAGE32_BASE_ADDRESS
;
10232 #if (ARM_PGSHIFT == 12)
10233 ttep
= pmap_tt1e(pmap
, sharedpage_vaddr
);
10235 if (ttep
== NULL
) {
10239 /* It had better be mapped to the shared page */
10240 if (*ttep
!= ARM_TTE_EMPTY
&& *ttep
!= *pmap_tt1e(sharedpage_pmap
, sharedpage_vaddr
)) {
10241 panic("%s: Something other than commpage mapped in shared page slot?", __FUNCTION__
);
10243 #elif (ARM_PGSHIFT == 14)
10244 ttep
= pmap_tt2e(pmap
, sharedpage_vaddr
);
10246 if (ttep
== NULL
) {
10250 /* It had better be mapped to the shared page */
10251 if (*ttep
!= ARM_TTE_EMPTY
&& *ttep
!= *pmap_tt2e(sharedpage_pmap
, sharedpage_vaddr
)) {
10252 panic("%s: Something other than commpage mapped in shared page slot?", __FUNCTION__
);
10256 *ttep
= ARM_TTE_EMPTY
;
10257 flush_mmu_tlb_region_asid_async(sharedpage_vaddr
, PAGE_SIZE
, kernel_pmap
);
10259 #if (ARM_PGSHIFT == 12)
10260 flush_mmu_tlb_entry_async(tlbi_addr(sharedpage_vaddr
& ~ARM_TT_L1_OFFMASK
) | tlbi_asid(pmap
->hw_asid
));
10261 #elif (ARM_PGSHIFT == 14)
10262 flush_mmu_tlb_entry_async(tlbi_addr(sharedpage_vaddr
& ~ARM_TT_L2_OFFMASK
) | tlbi_asid(pmap
->hw_asid
));
10268 pmap_insert_sharedpage(
10271 pmap_insert_sharedpage_internal(pmap
);
10278 return pmap
->is_64bit
;
10283 /* ARMTODO -- an implementation that accounts for
10284 * holes in the physical map, if any.
10290 return pa_valid(ptoa(pn
));
10293 MARK_AS_PMAP_TEXT
static boolean_t
10294 pmap_is_empty_internal(
10296 vm_map_offset_t va_start
,
10297 vm_map_offset_t va_end
)
10299 vm_map_offset_t block_start
, block_end
;
10302 if (pmap
== NULL
) {
10306 VALIDATE_PMAP(pmap
);
10308 __unused
const pt_attr_t
* const pt_attr
= pmap_get_pt_attr(pmap
);
10309 unsigned int initial_not_in_kdp
= not_in_kdp
;
10311 if ((pmap
!= kernel_pmap
) && (initial_not_in_kdp
)) {
10315 #if (__ARM_VMSA__ == 7)
10316 if (tte_index(pmap
, pt_attr
, va_end
) >= pmap
->tte_index_max
) {
10317 if ((pmap
!= kernel_pmap
) && (initial_not_in_kdp
)) {
10324 /* TODO: This will be faster if we increment ttep at each level. */
10325 block_start
= va_start
;
10327 while (block_start
< va_end
) {
10328 pt_entry_t
*bpte_p
, *epte_p
;
10331 block_end
= (block_start
+ pt_attr_twig_size(pt_attr
)) & ~pt_attr_twig_offmask(pt_attr
);
10332 if (block_end
> va_end
) {
10333 block_end
= va_end
;
10336 tte_p
= pmap_tte(pmap
, block_start
);
10337 if ((tte_p
!= PT_ENTRY_NULL
)
10338 && ((*tte_p
& ARM_TTE_TYPE_MASK
) == ARM_TTE_TYPE_TABLE
)) {
10339 pte_p
= (pt_entry_t
*) ttetokv(*tte_p
);
10340 bpte_p
= &pte_p
[pte_index(pmap
, pt_attr
, block_start
)];
10341 epte_p
= &pte_p
[pte_index(pmap
, pt_attr
, block_end
)];
10343 for (pte_p
= bpte_p
; pte_p
< epte_p
; pte_p
++) {
10344 if (*pte_p
!= ARM_PTE_EMPTY
) {
10345 if ((pmap
!= kernel_pmap
) && (initial_not_in_kdp
)) {
10352 block_start
= block_end
;
10355 if ((pmap
!= kernel_pmap
) && (initial_not_in_kdp
)) {
10365 vm_map_offset_t va_start
,
10366 vm_map_offset_t va_end
)
10368 return pmap_is_empty_internal(pmap
, va_start
, va_end
);
10374 unsigned int option
)
10376 return (is64
) ? pmap_max_64bit_offset(option
) : pmap_max_32bit_offset(option
);
10380 pmap_max_64bit_offset(
10381 __unused
unsigned int option
)
10383 vm_map_offset_t max_offset_ret
= 0;
10385 #if defined(__arm64__)
10386 const vm_map_offset_t min_max_offset
= SHARED_REGION_BASE_ARM64
+ SHARED_REGION_SIZE_ARM64
+ 0x20000000; // end of shared region + 512MB for various purposes
10387 if (option
== ARM_PMAP_MAX_OFFSET_DEFAULT
) {
10388 max_offset_ret
= arm64_pmap_max_offset_default
;
10389 } else if (option
== ARM_PMAP_MAX_OFFSET_MIN
) {
10390 max_offset_ret
= min_max_offset
;
10391 } else if (option
== ARM_PMAP_MAX_OFFSET_MAX
) {
10392 max_offset_ret
= MACH_VM_MAX_ADDRESS
;
10393 } else if (option
== ARM_PMAP_MAX_OFFSET_DEVICE
) {
10394 if (arm64_pmap_max_offset_default
) {
10395 max_offset_ret
= arm64_pmap_max_offset_default
;
10396 } else if (max_mem
> 0xC0000000) {
10397 max_offset_ret
= min_max_offset
+ 0x138000000; // Max offset is 13.375GB for devices with > 3GB of memory
10398 } else if (max_mem
> 0x40000000) {
10399 max_offset_ret
= min_max_offset
+ 0x38000000; // Max offset is 9.375GB for devices with > 1GB and <= 3GB of memory
10401 max_offset_ret
= min_max_offset
;
10403 } else if (option
== ARM_PMAP_MAX_OFFSET_JUMBO
) {
10404 if (arm64_pmap_max_offset_default
) {
10405 // Allow the boot-arg to override jumbo size
10406 max_offset_ret
= arm64_pmap_max_offset_default
;
10408 max_offset_ret
= MACH_VM_MAX_ADDRESS
; // Max offset is 64GB for pmaps with special "jumbo" blessing
10411 panic("pmap_max_64bit_offset illegal option 0x%x\n", option
);
10414 assert(max_offset_ret
<= MACH_VM_MAX_ADDRESS
);
10415 assert(max_offset_ret
>= min_max_offset
);
10417 panic("Can't run pmap_max_64bit_offset on non-64bit architectures\n");
10420 return max_offset_ret
;
10424 pmap_max_32bit_offset(
10425 unsigned int option
)
10427 vm_map_offset_t max_offset_ret
= 0;
10429 if (option
== ARM_PMAP_MAX_OFFSET_DEFAULT
) {
10430 max_offset_ret
= arm_pmap_max_offset_default
;
10431 } else if (option
== ARM_PMAP_MAX_OFFSET_MIN
) {
10432 max_offset_ret
= 0x66000000;
10433 } else if (option
== ARM_PMAP_MAX_OFFSET_MAX
) {
10434 max_offset_ret
= VM_MAX_ADDRESS
;
10435 } else if (option
== ARM_PMAP_MAX_OFFSET_DEVICE
) {
10436 if (arm_pmap_max_offset_default
) {
10437 max_offset_ret
= arm_pmap_max_offset_default
;
10438 } else if (max_mem
> 0x20000000) {
10439 max_offset_ret
= 0x80000000;
10441 max_offset_ret
= 0x66000000;
10443 } else if (option
== ARM_PMAP_MAX_OFFSET_JUMBO
) {
10444 max_offset_ret
= 0x80000000;
10446 panic("pmap_max_32bit_offset illegal option 0x%x\n", option
);
10449 assert(max_offset_ret
<= MACH_VM_MAX_ADDRESS
);
10450 return max_offset_ret
;
10455 * Constrain DTrace copyin/copyout actions
10457 extern kern_return_t
dtrace_copyio_preflight(addr64_t
);
10458 extern kern_return_t
dtrace_copyio_postflight(addr64_t
);
10461 dtrace_copyio_preflight(
10462 __unused addr64_t va
)
10464 if (current_map() == kernel_map
) {
10465 return KERN_FAILURE
;
10467 return KERN_SUCCESS
;
10472 dtrace_copyio_postflight(
10473 __unused addr64_t va
)
10475 return KERN_SUCCESS
;
10477 #endif /* CONFIG_DTRACE */
10481 pmap_flush_context_init(__unused pmap_flush_context
*pfc
)
10488 __unused pmap_flush_context
*cpus_to_flush
)
10490 /* not implemented yet */
10495 static void __unused
10496 pmap_pin_kernel_pages(vm_offset_t kva __unused
, size_t nbytes __unused
)
10500 static void __unused
10501 pmap_unpin_kernel_pages(vm_offset_t kva __unused
, size_t nbytes __unused
)
10507 #define PMAP_RESIDENT_INVALID ((mach_vm_size_t)-1)
10509 MARK_AS_PMAP_TEXT
static mach_vm_size_t
10510 pmap_query_resident_internal(
10512 vm_map_address_t start
,
10513 vm_map_address_t end
,
10514 mach_vm_size_t
*compressed_bytes_p
)
10516 mach_vm_size_t resident_bytes
= 0;
10517 mach_vm_size_t compressed_bytes
= 0;
10519 pt_entry_t
*bpte
, *epte
;
10523 if (pmap
== NULL
) {
10524 return PMAP_RESIDENT_INVALID
;
10527 VALIDATE_PMAP(pmap
);
10529 /* Ensure that this request is valid, and addresses exactly one TTE. */
10530 if (__improbable((start
% ARM_PGBYTES
) || (end
% ARM_PGBYTES
))) {
10531 panic("%s: address range %p, %p not page-aligned", __func__
, (void*)start
, (void*)end
);
10534 if (__improbable((end
< start
) || ((end
- start
) > (PTE_PGENTRIES
* ARM_PGBYTES
)))) {
10535 panic("%s: invalid address range %p, %p", __func__
, (void*)start
, (void*)end
);
10539 tte_p
= pmap_tte(pmap
, start
);
10540 if (tte_p
== (tt_entry_t
*) NULL
) {
10542 return PMAP_RESIDENT_INVALID
;
10544 if ((*tte_p
& ARM_TTE_TYPE_MASK
) == ARM_TTE_TYPE_TABLE
) {
10545 __unused
const pt_attr_t
* const pt_attr
= pmap_get_pt_attr(pmap
);
10546 pte_p
= (pt_entry_t
*) ttetokv(*tte_p
);
10547 bpte
= &pte_p
[pte_index(pmap
, pt_attr
, start
)];
10548 epte
= &pte_p
[pte_index(pmap
, pt_attr
, end
)];
10550 for (; bpte
< epte
; bpte
++) {
10551 if (ARM_PTE_IS_COMPRESSED(*bpte
, bpte
)) {
10552 compressed_bytes
+= ARM_PGBYTES
;
10553 } else if (pa_valid(pte_to_pa(*bpte
))) {
10554 resident_bytes
+= ARM_PGBYTES
;
10560 if (compressed_bytes_p
) {
10561 pmap_pin_kernel_pages((vm_offset_t
)compressed_bytes_p
, sizeof(*compressed_bytes_p
));
10562 *compressed_bytes_p
+= compressed_bytes
;
10563 pmap_unpin_kernel_pages((vm_offset_t
)compressed_bytes_p
, sizeof(*compressed_bytes_p
));
10566 return resident_bytes
;
10570 pmap_query_resident(
10572 vm_map_address_t start
,
10573 vm_map_address_t end
,
10574 mach_vm_size_t
*compressed_bytes_p
)
10576 mach_vm_size_t total_resident_bytes
;
10577 mach_vm_size_t compressed_bytes
;
10578 vm_map_address_t va
;
10581 if (pmap
== PMAP_NULL
) {
10582 if (compressed_bytes_p
) {
10583 *compressed_bytes_p
= 0;
10588 __unused
const pt_attr_t
* const pt_attr
= pmap_get_pt_attr(pmap
);
10590 total_resident_bytes
= 0;
10591 compressed_bytes
= 0;
10593 PMAP_TRACE(3, PMAP_CODE(PMAP__QUERY_RESIDENT
) | DBG_FUNC_START
,
10594 VM_KERNEL_ADDRHIDE(pmap
), VM_KERNEL_ADDRHIDE(start
),
10595 VM_KERNEL_ADDRHIDE(end
));
10599 vm_map_address_t l
;
10600 mach_vm_size_t resident_bytes
;
10602 l
= ((va
+ pt_attr_twig_size(pt_attr
)) & ~pt_attr_twig_offmask(pt_attr
));
10607 resident_bytes
= pmap_query_resident_internal(pmap
, va
, l
, compressed_bytes_p
);
10608 if (resident_bytes
== PMAP_RESIDENT_INVALID
) {
10612 total_resident_bytes
+= resident_bytes
;
10617 if (compressed_bytes_p
) {
10618 *compressed_bytes_p
= compressed_bytes
;
10621 PMAP_TRACE(3, PMAP_CODE(PMAP__QUERY_RESIDENT
) | DBG_FUNC_END
,
10622 total_resident_bytes
);
10624 return total_resident_bytes
;
10629 pmap_check_ledgers(
10635 if (pmap
->pmap_pid
== 0) {
10637 * This pmap was not or is no longer fully associated
10638 * with a task (e.g. the old pmap after a fork()/exec() or
10639 * spawn()). Its "ledger" still points at a task that is
10640 * now using a different (and active) address space, so
10641 * we can't check that all the pmap ledgers are balanced here.
10643 * If the "pid" is set, that means that we went through
10644 * pmap_set_process() in task_terminate_internal(), so
10645 * this task's ledger should not have been re-used and
10646 * all the pmap ledgers should be back to 0.
10651 pid
= pmap
->pmap_pid
;
10652 procname
= pmap
->pmap_procname
;
10654 vm_map_pmap_check_ledgers(pmap
, pmap
->ledger
, pid
, procname
);
10656 PMAP_STATS_ASSERTF(pmap
->stats
.resident_count
== 0, pmap
, "stats.resident_count %d", pmap
->stats
.resident_count
);
10658 PMAP_STATS_ASSERTF(pmap
->stats
.wired_count
== 0, pmap
, "stats.wired_count %d", pmap
->stats
.wired_count
);
10660 PMAP_STATS_ASSERTF(pmap
->stats
.device
== 0, pmap
, "stats.device %d", pmap
->stats
.device
);
10661 PMAP_STATS_ASSERTF(pmap
->stats
.internal
== 0, pmap
, "stats.internal %d", pmap
->stats
.internal
);
10662 PMAP_STATS_ASSERTF(pmap
->stats
.external
== 0, pmap
, "stats.external %d", pmap
->stats
.external
);
10663 PMAP_STATS_ASSERTF(pmap
->stats
.reusable
== 0, pmap
, "stats.reusable %d", pmap
->stats
.reusable
);
10664 PMAP_STATS_ASSERTF(pmap
->stats
.compressed
== 0, pmap
, "stats.compressed %lld", pmap
->stats
.compressed
);
10666 #endif /* MACH_ASSERT */
10669 pmap_advise_pagezero_range(__unused pmap_t p
, __unused
uint64_t a
)
10675 #define PROF_START uint64_t t, nanot;\
10676 t = mach_absolute_time();
10678 #define PROF_END absolutetime_to_nanoseconds(mach_absolute_time()-t, &nanot);\
10679 kprintf("%s: took %llu ns\n", __func__, nanot);
10681 #define PMAP_PGTRACE_LOCK(p) \
10683 *(p) = ml_set_interrupts_enabled(false); \
10684 if (simple_lock_try(&(pmap_pgtrace.lock), LCK_GRP_NULL)) break; \
10685 ml_set_interrupts_enabled(*(p)); \
10688 #define PMAP_PGTRACE_UNLOCK(p) \
10690 simple_unlock(&(pmap_pgtrace.lock)); \
10691 ml_set_interrupts_enabled(*(p)); \
10694 #define PGTRACE_WRITE_PTE(pte_p, pte_entry) \
10696 *(pte_p) = (pte_entry); \
10697 FLUSH_PTE(pte_p); \
10700 #define PGTRACE_MAX_MAP 16 // maximum supported va to same pa
10707 } pmap_pgtrace_page_state_t
;
10710 queue_chain_t chain
;
10714 * maps - list of va maps to upper pa
10715 * map_pool - map pool
10716 * map_waste - waste can
10721 queue_head_t map_pool
;
10722 queue_head_t map_waste
;
10723 pmap_pgtrace_page_state_t state
;
10724 } pmap_pgtrace_page_t
;
10728 * pages - list of tracing page info
10730 queue_head_t pages
;
10731 decl_simple_lock_data(, lock
);
10732 } pmap_pgtrace
= {};
10735 pmap_pgtrace_init(void)
10737 queue_init(&(pmap_pgtrace
.pages
));
10738 simple_lock_init(&(pmap_pgtrace
.lock
), 0);
10742 if (PE_parse_boot_argn("pgtrace", &enabled
, sizeof(enabled
))) {
10743 pgtrace_enabled
= enabled
;
10747 // find a page with given pa - pmap_pgtrace should be locked
10748 inline static pmap_pgtrace_page_t
*
10749 pmap_pgtrace_find_page(pmap_paddr_t pa
)
10751 queue_head_t
*q
= &(pmap_pgtrace
.pages
);
10752 pmap_pgtrace_page_t
*p
;
10754 queue_iterate(q
, p
, pmap_pgtrace_page_t
*, chain
) {
10755 if (p
->state
== UNDEFINED
) {
10758 if (p
->state
== PA_UNDEFINED
) {
10769 // enter clone of given pmap, va page and range - pmap should be locked
10771 pmap_pgtrace_enter_clone(pmap_t pmap
, vm_map_offset_t va_page
, vm_map_offset_t start
, vm_map_offset_t end
)
10774 queue_head_t
*q
= &(pmap_pgtrace
.pages
);
10775 pmap_paddr_t pa_page
;
10776 pt_entry_t
*ptep
, *cptep
;
10777 pmap_pgtrace_page_t
*p
;
10778 bool found
= false;
10780 PMAP_ASSERT_LOCKED(pmap
);
10781 assert(va_page
== arm_trunc_page(va_page
));
10783 PMAP_PGTRACE_LOCK(&ints
);
10785 ptep
= pmap_pte(pmap
, va_page
);
10787 // target pte should exist
10788 if (!ptep
|| !(*ptep
& ARM_PTE_TYPE_VALID
)) {
10789 PMAP_PGTRACE_UNLOCK(&ints
);
10793 queue_head_t
*mapq
;
10794 queue_head_t
*mappool
;
10795 pmap_pgtrace_map_t
*map
= NULL
;
10797 pa_page
= pte_to_pa(*ptep
);
10799 // find if we have a page info defined for this
10800 queue_iterate(q
, p
, pmap_pgtrace_page_t
*, chain
) {
10802 mappool
= &(p
->map_pool
);
10804 switch (p
->state
) {
10806 queue_iterate(mapq
, map
, pmap_pgtrace_map_t
*, chain
) {
10807 if (map
->cloned
== false && map
->pmap
== pmap
&& map
->ova
== va_page
) {
10809 map
->range
.start
= start
;
10810 map
->range
.end
= end
;
10818 if (p
->pa
!= pa_page
) {
10821 queue_iterate(mapq
, map
, pmap_pgtrace_map_t
*, chain
) {
10822 if (map
->cloned
== false) {
10824 map
->ova
= va_page
;
10825 map
->range
.start
= start
;
10826 map
->range
.end
= end
;
10834 if (p
->pa
!= pa_page
) {
10837 queue_iterate(mapq
, map
, pmap_pgtrace_map_t
*, chain
) {
10838 if (map
->cloned
== true && map
->pmap
== pmap
&& map
->ova
== va_page
) {
10839 kprintf("%s: skip existing mapping at va=%llx\n", __func__
, va_page
);
10841 } else if (map
->cloned
== true && map
->pmap
== kernel_pmap
&& map
->cva
[1] == va_page
) {
10842 kprintf("%s: skip clone mapping at va=%llx\n", __func__
, va_page
);
10844 } else if (map
->cloned
== false && map
->pmap
== pmap
&& map
->ova
== va_page
) {
10845 // range should be already defined as well
10853 panic("invalid state p->state=%x\n", p
->state
);
10856 if (found
== true) {
10861 // do not clone if no page info found
10862 if (found
== false) {
10863 PMAP_PGTRACE_UNLOCK(&ints
);
10867 // copy pre, target and post ptes to clone ptes
10868 for (int i
= 0; i
< 3; i
++) {
10869 ptep
= pmap_pte(pmap
, va_page
+ (i
- 1) * ARM_PGBYTES
);
10870 cptep
= pmap_pte(kernel_pmap
, map
->cva
[i
]);
10871 assert(cptep
!= NULL
);
10872 if (ptep
== NULL
) {
10873 PGTRACE_WRITE_PTE(cptep
, (pt_entry_t
)NULL
);
10875 PGTRACE_WRITE_PTE(cptep
, *ptep
);
10877 PMAP_UPDATE_TLBS(kernel_pmap
, map
->cva
[i
], map
->cva
[i
] + ARM_PGBYTES
, false);
10880 // get ptes for original and clone
10881 ptep
= pmap_pte(pmap
, va_page
);
10882 cptep
= pmap_pte(kernel_pmap
, map
->cva
[1]);
10884 // invalidate original pte and mark it as a pgtrace page
10885 PGTRACE_WRITE_PTE(ptep
, (*ptep
| ARM_PTE_PGTRACE
) & ~ARM_PTE_TYPE_VALID
);
10886 PMAP_UPDATE_TLBS(pmap
, map
->ova
, map
->ova
+ ARM_PGBYTES
, false);
10888 map
->cloned
= true;
10889 p
->state
= DEFINED
;
10891 kprintf("%s: pa_page=%llx va_page=%llx cva[1]=%llx pmap=%p ptep=%p cptep=%p\n", __func__
, pa_page
, va_page
, map
->cva
[1], pmap
, ptep
, cptep
);
10893 PMAP_PGTRACE_UNLOCK(&ints
);
10898 // This function removes trace bit and validate pte if applicable. Pmap must be locked.
10900 pmap_pgtrace_remove_clone(pmap_t pmap
, pmap_paddr_t pa
, vm_map_offset_t va
)
10902 bool ints
, found
= false;
10903 pmap_pgtrace_page_t
*p
;
10906 PMAP_PGTRACE_LOCK(&ints
);
10908 // we must have this page info
10909 p
= pmap_pgtrace_find_page(pa
);
10914 // find matching map
10915 queue_head_t
*mapq
= &(p
->maps
);
10916 queue_head_t
*mappool
= &(p
->map_pool
);
10917 pmap_pgtrace_map_t
*map
;
10919 queue_iterate(mapq
, map
, pmap_pgtrace_map_t
*, chain
) {
10920 if (map
->pmap
== pmap
&& map
->ova
== va
) {
10930 if (map
->cloned
== true) {
10931 // Restore back the pte to original state
10932 ptep
= pmap_pte(pmap
, map
->ova
);
10934 PGTRACE_WRITE_PTE(ptep
, *ptep
| ARM_PTE_TYPE_VALID
);
10935 PMAP_UPDATE_TLBS(pmap
, va
, va
+ ARM_PGBYTES
, false);
10937 // revert clone pages
10938 for (int i
= 0; i
< 3; i
++) {
10939 ptep
= pmap_pte(kernel_pmap
, map
->cva
[i
]);
10940 assert(ptep
!= NULL
);
10941 PGTRACE_WRITE_PTE(ptep
, map
->cva_spte
[i
]);
10942 PMAP_UPDATE_TLBS(kernel_pmap
, map
->cva
[i
], map
->cva
[i
] + ARM_PGBYTES
, false);
10946 queue_remove(mapq
, map
, pmap_pgtrace_map_t
*, chain
);
10948 map
->ova
= (vm_map_offset_t
)NULL
;
10949 map
->cloned
= false;
10950 queue_enter_first(mappool
, map
, pmap_pgtrace_map_t
*, chain
);
10952 kprintf("%s: p=%p pa=%llx va=%llx\n", __func__
, p
, pa
, va
);
10955 PMAP_PGTRACE_UNLOCK(&ints
);
10958 // remove all clones of given pa - pmap must be locked
10960 pmap_pgtrace_remove_all_clone(pmap_paddr_t pa
)
10963 pmap_pgtrace_page_t
*p
;
10966 PMAP_PGTRACE_LOCK(&ints
);
10968 // we must have this page info
10969 p
= pmap_pgtrace_find_page(pa
);
10971 PMAP_PGTRACE_UNLOCK(&ints
);
10975 queue_head_t
*mapq
= &(p
->maps
);
10976 queue_head_t
*mappool
= &(p
->map_pool
);
10977 queue_head_t
*mapwaste
= &(p
->map_waste
);
10978 pmap_pgtrace_map_t
*map
;
10980 // move maps to waste
10981 while (!queue_empty(mapq
)) {
10982 queue_remove_first(mapq
, map
, pmap_pgtrace_map_t
*, chain
);
10983 queue_enter_first(mapwaste
, map
, pmap_pgtrace_map_t
*, chain
);
10986 PMAP_PGTRACE_UNLOCK(&ints
);
10988 // sanitize maps in waste
10989 queue_iterate(mapwaste
, map
, pmap_pgtrace_map_t
*, chain
) {
10990 if (map
->cloned
== true) {
10991 PMAP_LOCK(map
->pmap
);
10993 // restore back original pte
10994 ptep
= pmap_pte(map
->pmap
, map
->ova
);
10996 PGTRACE_WRITE_PTE(ptep
, *ptep
| ARM_PTE_TYPE_VALID
);
10997 PMAP_UPDATE_TLBS(map
->pmap
, map
->ova
, map
->ova
+ ARM_PGBYTES
, false);
10999 // revert clone ptes
11000 for (int i
= 0; i
< 3; i
++) {
11001 ptep
= pmap_pte(kernel_pmap
, map
->cva
[i
]);
11002 assert(ptep
!= NULL
);
11003 PGTRACE_WRITE_PTE(ptep
, map
->cva_spte
[i
]);
11004 PMAP_UPDATE_TLBS(kernel_pmap
, map
->cva
[i
], map
->cva
[i
] + ARM_PGBYTES
, false);
11007 PMAP_UNLOCK(map
->pmap
);
11011 map
->ova
= (vm_map_offset_t
)NULL
;
11012 map
->cloned
= false;
11015 PMAP_PGTRACE_LOCK(&ints
);
11017 // recycle maps back to map_pool
11018 while (!queue_empty(mapwaste
)) {
11019 queue_remove_first(mapwaste
, map
, pmap_pgtrace_map_t
*, chain
);
11020 queue_enter_first(mappool
, map
, pmap_pgtrace_map_t
*, chain
);
11023 PMAP_PGTRACE_UNLOCK(&ints
);
11027 pmap_pgtrace_get_search_space(pmap_t pmap
, vm_map_offset_t
*startp
, vm_map_offset_t
*endp
)
11030 vm_map_offset_t end
;
11032 if (pmap
== kernel_pmap
) {
11033 tsz
= (get_tcr() >> TCR_T1SZ_SHIFT
) & TCR_TSZ_MASK
;
11034 *startp
= MAX(VM_MIN_KERNEL_ADDRESS
, (UINT64_MAX
>> (64 - tsz
)) << (64 - tsz
));
11035 *endp
= VM_MAX_KERNEL_ADDRESS
;
11037 tsz
= (get_tcr() >> TCR_T0SZ_SHIFT
) & TCR_TSZ_MASK
;
11041 end
= ((uint64_t)1 << (64 - tsz
)) - 1;
11048 assert(*endp
> *startp
);
11053 // has pa mapped in given pmap? then clone it
11055 pmap_pgtrace_clone_from_pa(pmap_t pmap
, pmap_paddr_t pa
, vm_map_offset_t start_offset
, vm_map_offset_t end_offset
)
11058 vm_map_offset_t min
, max
;
11059 vm_map_offset_t cur_page
, end_page
;
11063 __unused
const pt_attr_t
* const pt_attr
= pmap_get_pt_attr(pmap
);
11065 pmap_pgtrace_get_search_space(pmap
, &min
, &max
);
11067 cur_page
= arm_trunc_page(min
);
11068 end_page
= arm_trunc_page(max
);
11069 while (cur_page
<= end_page
) {
11070 vm_map_offset_t add
= 0;
11074 // skip uninterested space
11075 if (pmap
== kernel_pmap
&&
11076 ((vm_kernel_base
<= cur_page
&& cur_page
< vm_kernel_top
) ||
11077 (vm_kext_base
<= cur_page
&& cur_page
< vm_kext_top
))) {
11079 goto unlock_continue
;
11082 // check whether we can skip l1
11083 ttep
= pmap_tt1e(pmap
, cur_page
);
11086 if ((tte
& (ARM_TTE_TYPE_MASK
| ARM_TTE_VALID
)) != (ARM_TTE_TYPE_TABLE
| ARM_TTE_VALID
)) {
11087 add
= ARM_TT_L1_SIZE
;
11088 goto unlock_continue
;
11092 tte
= ((tt_entry_t
*) phystokv(tte
& ARM_TTE_TABLE_MASK
))[tt2_index(pmap
, pt_attr
, cur_page
)];
11094 if ((tte
& (ARM_TTE_TYPE_MASK
| ARM_TTE_VALID
)) != (ARM_TTE_TYPE_TABLE
| ARM_TTE_VALID
)) {
11095 add
= ARM_TT_L2_SIZE
;
11096 goto unlock_continue
;
11100 ptep
= &(((pt_entry_t
*) phystokv(tte
& ARM_TTE_TABLE_MASK
))[tt3_index(pmap
, pt_attr
, cur_page
)]);
11101 if (ptep
== PT_ENTRY_NULL
) {
11102 add
= ARM_TT_L3_SIZE
;
11103 goto unlock_continue
;
11106 if (arm_trunc_page(pa
) == pte_to_pa(*ptep
)) {
11107 if (pmap_pgtrace_enter_clone(pmap
, cur_page
, start_offset
, end_offset
) == true) {
11118 if (cur_page
+ add
< cur_page
) {
11129 // search pv table and clone vas of given pa
11131 pmap_pgtrace_clone_from_pvtable(pmap_paddr_t pa
, vm_map_offset_t start_offset
, vm_map_offset_t end_offset
)
11140 queue_chain_t chain
;
11142 vm_map_offset_t va
;
11145 queue_head_t pmapvaq
;
11148 queue_init(&pmapvaq
);
11150 pai
= pa_index(pa
);
11152 pvh
= pai_to_pvh(pai
);
11154 // collect pmap/va pair from pvh
11155 if (pvh_test_type(pvh
, PVH_TYPE_PTEP
)) {
11156 ptep
= pvh_ptep(pvh
);
11157 pmap
= ptep_get_pmap(ptep
);
11159 pmapva
= (pmap_va_t
*)kalloc(sizeof(pmap_va_t
));
11160 pmapva
->pmap
= pmap
;
11161 pmapva
->va
= ptep_get_va(ptep
);
11163 queue_enter_first(&pmapvaq
, pmapva
, pmap_va_t
*, chain
);
11164 } else if (pvh_test_type(pvh
, PVH_TYPE_PVEP
)) {
11167 pvep
= pvh_list(pvh
);
11169 ptep
= pve_get_ptep(pvep
);
11170 pmap
= ptep_get_pmap(ptep
);
11172 pmapva
= (pmap_va_t
*)kalloc(sizeof(pmap_va_t
));
11173 pmapva
->pmap
= pmap
;
11174 pmapva
->va
= ptep_get_va(ptep
);
11176 queue_enter_first(&pmapvaq
, pmapva
, pmap_va_t
*, chain
);
11178 pvep
= PVE_NEXT_PTR(pve_next(pvep
));
11184 // clone them while making sure mapping still exists
11185 queue_iterate(&pmapvaq
, pmapva
, pmap_va_t
*, chain
) {
11186 PMAP_LOCK(pmapva
->pmap
);
11187 ptep
= pmap_pte(pmapva
->pmap
, pmapva
->va
);
11188 if (pte_to_pa(*ptep
) == pa
) {
11189 if (pmap_pgtrace_enter_clone(pmapva
->pmap
, pmapva
->va
, start_offset
, end_offset
) == true) {
11193 PMAP_UNLOCK(pmapva
->pmap
);
11195 kfree(pmapva
, sizeof(pmap_va_t
));
11201 // allocate a page info
11202 static pmap_pgtrace_page_t
*
11203 pmap_pgtrace_alloc_page(void)
11205 pmap_pgtrace_page_t
*p
;
11206 queue_head_t
*mapq
;
11207 queue_head_t
*mappool
;
11208 queue_head_t
*mapwaste
;
11209 pmap_pgtrace_map_t
*map
;
11211 p
= kalloc(sizeof(pmap_pgtrace_page_t
));
11214 p
->state
= UNDEFINED
;
11217 mappool
= &(p
->map_pool
);
11218 mapwaste
= &(p
->map_waste
);
11220 queue_init(mappool
);
11221 queue_init(mapwaste
);
11223 for (int i
= 0; i
< PGTRACE_MAX_MAP
; i
++) {
11224 vm_map_offset_t newcva
;
11227 vm_map_entry_t entry
;
11230 vm_object_reference(kernel_object
);
11231 kr
= vm_map_find_space(kernel_map
, &newcva
, vm_map_round_page(3 * ARM_PGBYTES
, PAGE_MASK
), 0, 0, VM_MAP_KERNEL_FLAGS_NONE
, VM_KERN_MEMORY_DIAG
, &entry
);
11232 if (kr
!= KERN_SUCCESS
) {
11233 panic("%s VM couldn't find any space kr=%d\n", __func__
, kr
);
11235 VME_OBJECT_SET(entry
, kernel_object
);
11236 VME_OFFSET_SET(entry
, newcva
);
11237 vm_map_unlock(kernel_map
);
11239 // fill default clone page info and add to pool
11240 map
= kalloc(sizeof(pmap_pgtrace_map_t
));
11241 for (int j
= 0; j
< 3; j
++) {
11242 vm_map_offset_t addr
= newcva
+ j
* ARM_PGBYTES
;
11244 // pre-expand pmap while preemption enabled
11245 kr
= pmap_expand(kernel_pmap
, addr
, 0, PMAP_TT_MAX_LEVEL
);
11246 if (kr
!= KERN_SUCCESS
) {
11247 panic("%s: pmap_expand(kernel_pmap, addr=%llx) returns kr=%d\n", __func__
, addr
, kr
);
11250 cptep
= pmap_pte(kernel_pmap
, addr
);
11251 assert(cptep
!= NULL
);
11253 map
->cva
[j
] = addr
;
11254 map
->cva_spte
[j
] = *cptep
;
11256 map
->range
.start
= map
->range
.end
= 0;
11257 map
->cloned
= false;
11258 queue_enter_first(mappool
, map
, pmap_pgtrace_map_t
*, chain
);
11264 // free a page info
11266 pmap_pgtrace_free_page(pmap_pgtrace_page_t
*p
)
11268 queue_head_t
*mapq
;
11269 queue_head_t
*mappool
;
11270 queue_head_t
*mapwaste
;
11271 pmap_pgtrace_map_t
*map
;
11276 mappool
= &(p
->map_pool
);
11277 mapwaste
= &(p
->map_waste
);
11279 while (!queue_empty(mapq
)) {
11280 queue_remove_first(mapq
, map
, pmap_pgtrace_map_t
*, chain
);
11281 kfree(map
, sizeof(pmap_pgtrace_map_t
));
11284 while (!queue_empty(mappool
)) {
11285 queue_remove_first(mappool
, map
, pmap_pgtrace_map_t
*, chain
);
11286 kfree(map
, sizeof(pmap_pgtrace_map_t
));
11289 while (!queue_empty(mapwaste
)) {
11290 queue_remove_first(mapwaste
, map
, pmap_pgtrace_map_t
*, chain
);
11291 kfree(map
, sizeof(pmap_pgtrace_map_t
));
11294 kfree(p
, sizeof(pmap_pgtrace_page_t
));
11297 // construct page infos with the given address range
11299 pmap_pgtrace_add_page(pmap_t pmap
, vm_map_offset_t start
, vm_map_offset_t end
)
11303 queue_head_t
*q
= &(pmap_pgtrace
.pages
);
11305 vm_map_offset_t cur_page
, end_page
;
11308 kprintf("%s: invalid start=%llx > end=%llx\n", __func__
, start
, end
);
11314 // add each page in given range
11315 cur_page
= arm_trunc_page(start
);
11316 end_page
= arm_trunc_page(end
);
11317 while (cur_page
<= end_page
) {
11318 pmap_paddr_t pa_page
= 0;
11319 uint64_t num_cloned
= 0;
11320 pmap_pgtrace_page_t
*p
= NULL
, *newp
;
11321 bool free_newp
= true;
11322 pmap_pgtrace_page_state_t state
;
11324 // do all allocations outside of spinlocks
11325 newp
= pmap_pgtrace_alloc_page();
11327 // keep lock orders in pmap, kernel_pmap and pgtrace lock
11328 if (pmap
!= NULL
) {
11331 if (pmap
!= kernel_pmap
) {
11332 PMAP_LOCK(kernel_pmap
);
11335 // addresses are physical if pmap is null
11336 if (pmap
== NULL
) {
11338 pa_page
= cur_page
;
11339 state
= VA_UNDEFINED
;
11341 ptep
= pmap_pte(pmap
, cur_page
);
11342 if (ptep
!= NULL
) {
11343 pa_page
= pte_to_pa(*ptep
);
11346 state
= PA_UNDEFINED
;
11350 // search if we have a page info already
11351 PMAP_PGTRACE_LOCK(&ints
);
11352 if (state
!= PA_UNDEFINED
) {
11353 p
= pmap_pgtrace_find_page(pa_page
);
11356 // add pre-allocated page info if nothing found
11358 queue_enter_first(q
, newp
, pmap_pgtrace_page_t
*, chain
);
11363 // now p points what we want
11366 queue_head_t
*mapq
= &(p
->maps
);
11367 queue_head_t
*mappool
= &(p
->map_pool
);
11368 pmap_pgtrace_map_t
*map
;
11369 vm_map_offset_t start_offset
, end_offset
;
11371 // calculate trace offsets in the page
11372 if (cur_page
> start
) {
11375 start_offset
= start
- cur_page
;
11377 if (cur_page
== end_page
) {
11378 end_offset
= end
- end_page
;
11380 end_offset
= ARM_PGBYTES
- 1;
11383 kprintf("%s: pmap=%p cur_page=%llx ptep=%p state=%d start_offset=%llx end_offset=%llx\n", __func__
, pmap
, cur_page
, ptep
, state
, start_offset
, end_offset
);
11386 assert(!queue_empty(mappool
));
11387 queue_remove_first(mappool
, map
, pmap_pgtrace_map_t
*, chain
);
11388 if (p
->state
== PA_UNDEFINED
) {
11390 map
->ova
= cur_page
;
11391 map
->range
.start
= start_offset
;
11392 map
->range
.end
= end_offset
;
11393 } else if (p
->state
== VA_UNDEFINED
) {
11395 map
->range
.start
= start_offset
;
11396 map
->range
.end
= end_offset
;
11397 } else if (p
->state
== DEFINED
) {
11400 map
->ova
= cur_page
;
11401 map
->range
.start
= start_offset
;
11402 map
->range
.end
= end_offset
;
11404 panic("invalid p->state=%d\n", p
->state
);
11408 map
->cloned
= false;
11409 queue_enter(mapq
, map
, pmap_pgtrace_map_t
*, chain
);
11412 PMAP_PGTRACE_UNLOCK(&ints
);
11413 if (pmap
!= kernel_pmap
) {
11414 PMAP_UNLOCK(kernel_pmap
);
11416 if (pmap
!= NULL
) {
11421 if (pa_valid(pa_page
)) {
11422 num_cloned
= pmap_pgtrace_clone_from_pvtable(pa_page
, start_offset
, end_offset
);
11424 if (pmap
== NULL
) {
11425 num_cloned
+= pmap_pgtrace_clone_from_pa(kernel_pmap
, pa_page
, start_offset
, end_offset
);
11427 num_cloned
+= pmap_pgtrace_clone_from_pa(pmap
, pa_page
, start_offset
, end_offset
);
11430 // free pre-allocations if we didn't add it to the q
11432 pmap_pgtrace_free_page(newp
);
11435 if (num_cloned
== 0) {
11436 kprintf("%s: no mapping found for pa_page=%llx but will be added when a page entered\n", __func__
, pa_page
);
11442 if (cur_page
+ ARM_PGBYTES
< cur_page
) {
11445 cur_page
+= ARM_PGBYTES
;
11454 // delete page infos for given address range
11456 pmap_pgtrace_delete_page(pmap_t pmap
, vm_map_offset_t start
, vm_map_offset_t end
)
11460 queue_head_t
*q
= &(pmap_pgtrace
.pages
);
11461 pmap_pgtrace_page_t
*p
;
11462 vm_map_offset_t cur_page
, end_page
;
11464 kprintf("%s start=%llx end=%llx\n", __func__
, start
, end
);
11469 pmap_paddr_t pa_page
;
11471 // remove page info from start to end
11472 cur_page
= arm_trunc_page(start
);
11473 end_page
= arm_trunc_page(end
);
11474 while (cur_page
<= end_page
) {
11477 if (pmap
== NULL
) {
11478 pa_page
= cur_page
;
11481 ptep
= pmap_pte(pmap
, cur_page
);
11482 if (ptep
== NULL
) {
11486 pa_page
= pte_to_pa(*ptep
);
11490 // remove all clones and validate
11491 pmap_pgtrace_remove_all_clone(pa_page
);
11493 // find page info and delete
11494 PMAP_PGTRACE_LOCK(&ints
);
11495 p
= pmap_pgtrace_find_page(pa_page
);
11497 queue_remove(q
, p
, pmap_pgtrace_page_t
*, chain
);
11500 PMAP_PGTRACE_UNLOCK(&ints
);
11502 // free outside of locks
11504 pmap_pgtrace_free_page(p
);
11509 if (cur_page
+ ARM_PGBYTES
< cur_page
) {
11512 cur_page
+= ARM_PGBYTES
;
11522 pmap_pgtrace_fault(pmap_t pmap
, vm_map_offset_t va
, arm_saved_state_t
*ss
)
11525 pgtrace_run_result_t res
;
11526 pmap_pgtrace_page_t
*p
;
11527 bool ints
, found
= false;
11530 // Quick check if we are interested
11531 ptep
= pmap_pte(pmap
, va
);
11532 if (!ptep
|| !(*ptep
& ARM_PTE_PGTRACE
)) {
11533 return KERN_FAILURE
;
11536 PMAP_PGTRACE_LOCK(&ints
);
11538 // Check again since access is serialized
11539 ptep
= pmap_pte(pmap
, va
);
11540 if (!ptep
|| !(*ptep
& ARM_PTE_PGTRACE
)) {
11541 PMAP_PGTRACE_UNLOCK(&ints
);
11542 return KERN_FAILURE
;
11543 } else if ((*ptep
& ARM_PTE_TYPE_VALID
) == ARM_PTE_TYPE_VALID
) {
11544 // Somehow this cpu's tlb has not updated
11545 kprintf("%s Somehow this cpu's tlb has not updated?\n", __func__
);
11546 PMAP_UPDATE_TLBS(pmap
, va
, va
+ ARM_PGBYTES
, false);
11548 PMAP_PGTRACE_UNLOCK(&ints
);
11549 return KERN_SUCCESS
;
11552 // Find if this pa is what we are tracing
11553 pa
= pte_to_pa(*ptep
);
11555 p
= pmap_pgtrace_find_page(arm_trunc_page(pa
));
11557 panic("%s Can't find va=%llx pa=%llx from tracing pages\n", __func__
, va
, pa
);
11560 // find if pmap and va are also matching
11561 queue_head_t
*mapq
= &(p
->maps
);
11562 queue_head_t
*mapwaste
= &(p
->map_waste
);
11563 pmap_pgtrace_map_t
*map
;
11565 queue_iterate(mapq
, map
, pmap_pgtrace_map_t
*, chain
) {
11566 if (map
->pmap
== pmap
&& map
->ova
== arm_trunc_page(va
)) {
11572 // if not found, search map waste as they are still valid
11574 queue_iterate(mapwaste
, map
, pmap_pgtrace_map_t
*, chain
) {
11575 if (map
->pmap
== pmap
&& map
->ova
== arm_trunc_page(va
)) {
11583 panic("%s Can't find va=%llx pa=%llx from tracing pages\n", __func__
, va
, pa
);
11586 // Decode and run it on the clone map
11587 bzero(&res
, sizeof(res
));
11588 pgtrace_decode_and_run(*(uint32_t *)get_saved_state_pc(ss
), // instruction
11589 va
, map
->cva
, // fault va and clone page vas
11592 // write a log if in range
11593 vm_map_offset_t offset
= va
- map
->ova
;
11594 if (map
->range
.start
<= offset
&& offset
<= map
->range
.end
) {
11595 pgtrace_write_log(res
);
11598 PMAP_PGTRACE_UNLOCK(&ints
);
11600 // Return to next instruction
11601 add_saved_state_pc(ss
, sizeof(uint32_t));
11603 return KERN_SUCCESS
;
11608 pmap_enforces_execute_only(
11609 #if (__ARM_VMSA__ == 7)
11614 #if (__ARM_VMSA__ > 7)
11615 return pmap
!= kernel_pmap
;
11621 MARK_AS_PMAP_TEXT
void
11622 pmap_set_jit_entitled_internal(
11623 __unused pmap_t pmap
)
11629 pmap_set_jit_entitled(
11632 pmap_set_jit_entitled_internal(pmap
);
11635 MARK_AS_PMAP_TEXT
static kern_return_t
11636 pmap_query_page_info_internal(
11638 vm_map_offset_t va
,
11645 pv_entry_t
**pv_h
, *pve_p
;
11647 if (pmap
== PMAP_NULL
|| pmap
== kernel_pmap
) {
11648 pmap_pin_kernel_pages((vm_offset_t
)disp_p
, sizeof(*disp_p
));
11650 pmap_unpin_kernel_pages((vm_offset_t
)disp_p
, sizeof(*disp_p
));
11651 return KERN_INVALID_ARGUMENT
;
11656 VALIDATE_PMAP(pmap
);
11659 pte
= pmap_pte(pmap
, va
);
11660 if (pte
== PT_ENTRY_NULL
) {
11664 pa
= pte_to_pa(*pte
);
11666 if (ARM_PTE_IS_COMPRESSED(*pte
, pte
)) {
11667 disp
|= PMAP_QUERY_PAGE_COMPRESSED
;
11668 if (*pte
& ARM_PTE_COMPRESSED_ALT
) {
11669 disp
|= PMAP_QUERY_PAGE_COMPRESSED_ALTACCT
;
11673 disp
|= PMAP_QUERY_PAGE_PRESENT
;
11674 pai
= (int) pa_index(pa
);
11675 if (!pa_valid(pa
)) {
11679 pv_h
= pai_to_pvh(pai
);
11680 pve_p
= PV_ENTRY_NULL
;
11681 if (pvh_test_type(pv_h
, PVH_TYPE_PVEP
)) {
11682 pve_p
= pvh_list(pv_h
);
11683 while (pve_p
!= PV_ENTRY_NULL
&&
11684 pve_get_ptep(pve_p
) != pte
) {
11685 pve_p
= PVE_NEXT_PTR(pve_next(pve_p
));
11688 if (IS_ALTACCT_PAGE(pai
, pve_p
)) {
11689 disp
|= PMAP_QUERY_PAGE_ALTACCT
;
11690 } else if (IS_REUSABLE_PAGE(pai
)) {
11691 disp
|= PMAP_QUERY_PAGE_REUSABLE
;
11692 } else if (IS_INTERNAL_PAGE(pai
)) {
11693 disp
|= PMAP_QUERY_PAGE_INTERNAL
;
11700 pmap_pin_kernel_pages((vm_offset_t
)disp_p
, sizeof(*disp_p
));
11702 pmap_unpin_kernel_pages((vm_offset_t
)disp_p
, sizeof(*disp_p
));
11703 return KERN_SUCCESS
;
11707 pmap_query_page_info(
11709 vm_map_offset_t va
,
11712 return pmap_query_page_info_internal(pmap
, va
, disp_p
);
11715 MARK_AS_PMAP_TEXT kern_return_t
11716 pmap_return_internal(__unused boolean_t do_panic
, __unused boolean_t do_recurse
)
11719 return KERN_SUCCESS
;
11723 pmap_return(boolean_t do_panic
, boolean_t do_recurse
)
11725 return pmap_return_internal(do_panic
, do_recurse
);
11731 MARK_AS_PMAP_TEXT
static void
11732 pmap_footprint_suspend_internal(
11736 #if DEVELOPMENT || DEBUG
11738 current_thread()->pmap_footprint_suspended
= TRUE
;
11739 map
->pmap
->footprint_was_suspended
= TRUE
;
11741 current_thread()->pmap_footprint_suspended
= FALSE
;
11743 #else /* DEVELOPMENT || DEBUG */
11746 #endif /* DEVELOPMENT || DEBUG */
11750 pmap_footprint_suspend(
11754 pmap_footprint_suspend_internal(map
, suspend
);
11757 #if defined(__arm64__) && (DEVELOPMENT || DEBUG)
11759 struct page_table_dump_header
{
11761 uint64_t num_entries
;
11767 pmap_dump_page_tables_recurse(pmap_t pmap
,
11768 const tt_entry_t
*ttp
,
11769 unsigned int cur_level
,
11774 size_t bytes_used
= 0;
11775 uint64_t num_entries
= ARM_PGBYTES
/ sizeof(*ttp
);
11776 const pt_attr_t
* const pt_attr
= pmap_get_pt_attr(pmap
);
11778 uint64_t size
= pt_attr
->pta_level_info
[cur_level
].size
;
11779 uint64_t valid_mask
= pt_attr
->pta_level_info
[cur_level
].valid_mask
;
11780 uint64_t type_mask
= pt_attr
->pta_level_info
[cur_level
].type_mask
;
11781 uint64_t type_block
= pt_attr
->pta_level_info
[cur_level
].type_block
;
11783 if (cur_level
== arm64_root_pgtable_level
) {
11784 num_entries
= arm64_root_pgtable_num_ttes
;
11787 uint64_t tt_size
= num_entries
* sizeof(tt_entry_t
);
11788 const tt_entry_t
*tt_end
= &ttp
[num_entries
];
11790 if (((vm_offset_t
)buf_end
- (vm_offset_t
)bufp
) < (tt_size
+ sizeof(struct page_table_dump_header
))) {
11794 struct page_table_dump_header
*header
= (struct page_table_dump_header
*)bufp
;
11795 header
->pa
= ml_static_vtop((vm_offset_t
)ttp
);
11796 header
->num_entries
= num_entries
;
11797 header
->start_va
= start_va
;
11798 header
->end_va
= start_va
+ (num_entries
* size
);
11800 bcopy(ttp
, (uint8_t*)bufp
+ sizeof(*header
), tt_size
);
11801 bytes_used
+= (sizeof(*header
) + tt_size
);
11802 uint64_t current_va
= start_va
;
11804 for (const tt_entry_t
*ttep
= ttp
; ttep
< tt_end
; ttep
++, current_va
+= size
) {
11805 tt_entry_t tte
= *ttep
;
11807 if (!(tte
& valid_mask
)) {
11811 if ((tte
& type_mask
) == type_block
) {
11814 if (cur_level
>= PMAP_TT_MAX_LEVEL
) {
11815 panic("%s: corrupt entry %#llx at %p, "
11816 "ttp=%p, cur_level=%u, bufp=%p, buf_end=%p",
11817 __FUNCTION__
, tte
, ttep
,
11818 ttp
, cur_level
, bufp
, buf_end
);
11821 const tt_entry_t
*next_tt
= (const tt_entry_t
*)phystokv(tte
& ARM_TTE_TABLE_MASK
);
11823 size_t recurse_result
= pmap_dump_page_tables_recurse(pmap
, next_tt
, cur_level
+ 1, current_va
, (uint8_t*)bufp
+ bytes_used
, buf_end
);
11825 if (recurse_result
== 0) {
11829 bytes_used
+= recurse_result
;
11837 pmap_dump_page_tables(pmap_t pmap
, void *bufp
, void *buf_end
)
11840 panic("pmap_dump_page_tables must only be called from kernel debugger context");
11842 return pmap_dump_page_tables_recurse(pmap
, pmap
->tte
, arm64_root_pgtable_level
, pmap
->min
, bufp
, buf_end
);
11845 #else /* defined(__arm64__) && (DEVELOPMENT || DEBUG) */
11848 pmap_dump_page_tables(pmap_t pmap __unused
, void *bufp __unused
, void *buf_end __unused
)
11853 #endif /* !defined(__arm64__) */