]> git.saurik.com Git - apple/xnu.git/blame - osfmk/arm/pmap.c
xnu-6153.141.1.tar.gz
[apple/xnu.git] / osfmk / arm / pmap.c
CommitLineData
5ba3f43e 1/*
cb323159 2 * Copyright (c) 2011-2019 Apple Inc. All rights reserved.
5ba3f43e
A
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28#include <string.h>
29#include <mach_assert.h>
30#include <mach_ldebug.h>
31
32#include <mach/shared_region.h>
33#include <mach/vm_param.h>
34#include <mach/vm_prot.h>
35#include <mach/vm_map.h>
36#include <mach/machine/vm_param.h>
37#include <mach/machine/vm_types.h>
38
39#include <mach/boolean.h>
cb323159 40#include <kern/bits.h>
5ba3f43e
A
41#include <kern/thread.h>
42#include <kern/sched.h>
43#include <kern/zalloc.h>
44#include <kern/kalloc.h>
45#include <kern/ledger.h>
5ba3f43e 46#include <kern/spl.h>
d9a64523
A
47#include <kern/trustcache.h>
48
49#include <os/overflow.h>
5ba3f43e
A
50
51#include <vm/pmap.h>
52#include <vm/vm_map.h>
53#include <vm/vm_kern.h>
54#include <vm/vm_protos.h>
55#include <vm/vm_object.h>
56#include <vm/vm_page.h>
57#include <vm/vm_pageout.h>
58#include <vm/cpm.h>
59
d9a64523 60#include <libkern/img4/interface.h>
5ba3f43e
A
61#include <libkern/section_keywords.h>
62
63#include <machine/atomic.h>
64#include <machine/thread.h>
65#include <machine/lowglobals.h>
66
67#include <arm/caches_internal.h>
68#include <arm/cpu_data.h>
69#include <arm/cpu_data_internal.h>
70#include <arm/cpu_capabilities.h>
71#include <arm/cpu_number.h>
72#include <arm/machine_cpu.h>
73#include <arm/misc_protos.h>
74#include <arm/trap.h>
75
0a7de745 76#if (__ARM_VMSA__ > 7)
5ba3f43e
A
77#include <arm64/proc_reg.h>
78#include <pexpert/arm64/boot.h>
79#if CONFIG_PGTRACE
80#include <stdint.h>
81#include <arm64/pgtrace.h>
82#if CONFIG_PGTRACE_NONKEXT
83#include <arm64/pgtrace_decoder.h>
84#endif // CONFIG_PGTRACE_NONKEXT
85#endif
86#endif
87
88#include <pexpert/device_tree.h>
89
90#include <san/kasan.h>
d9a64523
A
91#include <sys/cdefs.h>
92
cb323159
A
93#if defined(HAS_APPLE_PAC)
94#include <ptrauth.h>
95#endif
96
97#define PMAP_TT_L0_LEVEL 0x0
98#define PMAP_TT_L1_LEVEL 0x1
99#define PMAP_TT_L2_LEVEL 0x2
100#define PMAP_TT_L3_LEVEL 0x3
101#if (__ARM_VMSA__ == 7)
102#define PMAP_TT_MAX_LEVEL PMAP_TT_L2_LEVEL
103#else
104#define PMAP_TT_MAX_LEVEL PMAP_TT_L3_LEVEL
105#endif
106#define PMAP_TT_LEAF_LEVEL PMAP_TT_MAX_LEVEL
107#define PMAP_TT_TWIG_LEVEL (PMAP_TT_MAX_LEVEL - 1)
108
109static bool alloc_asid(pmap_t pmap);
110static void free_asid(pmap_t pmap);
111static void flush_mmu_tlb_region_asid_async(vm_offset_t va, unsigned length, pmap_t pmap);
112static void flush_mmu_tlb_tte_asid_async(vm_offset_t va, pmap_t pmap);
113static void flush_mmu_tlb_full_asid_async(pmap_t pmap);
114static pt_entry_t wimg_to_pte(unsigned int wimg);
115
116struct page_table_ops {
117 bool (*alloc_id)(pmap_t pmap);
118 void (*free_id)(pmap_t pmap);
119 void (*flush_tlb_region_async)(vm_offset_t va, unsigned length, pmap_t pmap);
120 void (*flush_tlb_tte_async)(vm_offset_t va, pmap_t pmap);
121 void (*flush_tlb_async)(pmap_t pmap);
122 pt_entry_t (*wimg_to_pte)(unsigned int wimg);
123};
124
125static const struct page_table_ops native_pt_ops =
126{
127 .alloc_id = alloc_asid,
128 .free_id = free_asid,
129 .flush_tlb_region_async = flush_mmu_tlb_region_asid_async,
130 .flush_tlb_tte_async = flush_mmu_tlb_tte_asid_async,
131 .flush_tlb_async = flush_mmu_tlb_full_asid_async,
132 .wimg_to_pte = wimg_to_pte,
133};
134
135#if (__ARM_VMSA__ > 7)
136const struct page_table_level_info pmap_table_level_info_16k[] =
137{
138 [0] = {
139 .size = ARM_16K_TT_L0_SIZE,
140 .offmask = ARM_16K_TT_L0_OFFMASK,
141 .shift = ARM_16K_TT_L0_SHIFT,
142 .index_mask = ARM_16K_TT_L0_INDEX_MASK,
143 .valid_mask = ARM_TTE_VALID,
144 .type_mask = ARM_TTE_TYPE_MASK,
145 .type_block = ARM_TTE_TYPE_BLOCK
146 },
147 [1] = {
148 .size = ARM_16K_TT_L1_SIZE,
149 .offmask = ARM_16K_TT_L1_OFFMASK,
150 .shift = ARM_16K_TT_L1_SHIFT,
151 .index_mask = ARM_16K_TT_L1_INDEX_MASK,
152 .valid_mask = ARM_TTE_VALID,
153 .type_mask = ARM_TTE_TYPE_MASK,
154 .type_block = ARM_TTE_TYPE_BLOCK
155 },
156 [2] = {
157 .size = ARM_16K_TT_L2_SIZE,
158 .offmask = ARM_16K_TT_L2_OFFMASK,
159 .shift = ARM_16K_TT_L2_SHIFT,
160 .index_mask = ARM_16K_TT_L2_INDEX_MASK,
161 .valid_mask = ARM_TTE_VALID,
162 .type_mask = ARM_TTE_TYPE_MASK,
163 .type_block = ARM_TTE_TYPE_BLOCK
164 },
165 [3] = {
166 .size = ARM_16K_TT_L3_SIZE,
167 .offmask = ARM_16K_TT_L3_OFFMASK,
168 .shift = ARM_16K_TT_L3_SHIFT,
169 .index_mask = ARM_16K_TT_L3_INDEX_MASK,
170 .valid_mask = ARM_PTE_TYPE_VALID,
171 .type_mask = ARM_PTE_TYPE_MASK,
172 .type_block = ARM_TTE_TYPE_L3BLOCK
173 }
174};
175
176const struct page_table_level_info pmap_table_level_info_4k[] =
177{
178 [0] = {
179 .size = ARM_4K_TT_L0_SIZE,
180 .offmask = ARM_4K_TT_L0_OFFMASK,
181 .shift = ARM_4K_TT_L0_SHIFT,
182 .index_mask = ARM_4K_TT_L0_INDEX_MASK,
183 .valid_mask = ARM_TTE_VALID,
184 .type_mask = ARM_TTE_TYPE_MASK,
185 .type_block = ARM_TTE_TYPE_BLOCK
186 },
187 [1] = {
188 .size = ARM_4K_TT_L1_SIZE,
189 .offmask = ARM_4K_TT_L1_OFFMASK,
190 .shift = ARM_4K_TT_L1_SHIFT,
191 .index_mask = ARM_4K_TT_L1_INDEX_MASK,
192 .valid_mask = ARM_TTE_VALID,
193 .type_mask = ARM_TTE_TYPE_MASK,
194 .type_block = ARM_TTE_TYPE_BLOCK
195 },
196 [2] = {
197 .size = ARM_4K_TT_L2_SIZE,
198 .offmask = ARM_4K_TT_L2_OFFMASK,
199 .shift = ARM_4K_TT_L2_SHIFT,
200 .index_mask = ARM_4K_TT_L2_INDEX_MASK,
201 .valid_mask = ARM_TTE_VALID,
202 .type_mask = ARM_TTE_TYPE_MASK,
203 .type_block = ARM_TTE_TYPE_BLOCK
204 },
205 [3] = {
206 .size = ARM_4K_TT_L3_SIZE,
207 .offmask = ARM_4K_TT_L3_OFFMASK,
208 .shift = ARM_4K_TT_L3_SHIFT,
209 .index_mask = ARM_4K_TT_L3_INDEX_MASK,
210 .valid_mask = ARM_PTE_TYPE_VALID,
211 .type_mask = ARM_PTE_TYPE_MASK,
212 .type_block = ARM_TTE_TYPE_L3BLOCK
213 }
214};
215
216struct page_table_attr {
217 const struct page_table_level_info * const pta_level_info;
218 const struct page_table_ops * const pta_ops;
219 const uintptr_t ap_ro;
220 const uintptr_t ap_rw;
221 const uintptr_t ap_rona;
222 const uintptr_t ap_rwna;
223 const uintptr_t ap_xn;
224 const uintptr_t ap_x;
225 const unsigned int pta_root_level;
226 const unsigned int pta_max_level;
227};
228
229const struct page_table_attr pmap_pt_attr_4k = {
230 .pta_level_info = pmap_table_level_info_4k,
231 .pta_root_level = PMAP_TT_L1_LEVEL,
232 .pta_max_level = PMAP_TT_L3_LEVEL,
233 .pta_ops = &native_pt_ops,
234 .ap_ro = ARM_PTE_AP(AP_RORO),
235 .ap_rw = ARM_PTE_AP(AP_RWRW),
236 .ap_rona = ARM_PTE_AP(AP_RONA),
237 .ap_rwna = ARM_PTE_AP(AP_RWNA),
238 .ap_xn = ARM_PTE_PNX | ARM_PTE_NX,
239 .ap_x = ARM_PTE_PNX,
240};
241
242const struct page_table_attr pmap_pt_attr_16k = {
243 .pta_level_info = pmap_table_level_info_16k,
244 .pta_root_level = PMAP_TT_L1_LEVEL,
245 .pta_max_level = PMAP_TT_L3_LEVEL,
246 .pta_ops = &native_pt_ops,
247 .ap_ro = ARM_PTE_AP(AP_RORO),
248 .ap_rw = ARM_PTE_AP(AP_RWRW),
249 .ap_rona = ARM_PTE_AP(AP_RONA),
250 .ap_rwna = ARM_PTE_AP(AP_RWNA),
251 .ap_xn = ARM_PTE_PNX | ARM_PTE_NX,
252 .ap_x = ARM_PTE_PNX,
253};
254
255#if __ARM_16K_PG__
256const struct page_table_attr * const native_pt_attr = &pmap_pt_attr_16k;
257#else /* !__ARM_16K_PG__ */
258const struct page_table_attr * const native_pt_attr = &pmap_pt_attr_4k;
259#endif /* !__ARM_16K_PG__ */
260
261
262#else /* (__ARM_VMSA__ > 7) */
263/*
264 * We don't support pmap parameterization for VMSA7, so use an opaque
265 * page_table_attr structure.
266 */
267const struct page_table_attr * const native_pt_attr = NULL;
268#endif /* (__ARM_VMSA__ > 7) */
269
270typedef struct page_table_attr pt_attr_t;
271
272/* Macro for getting pmap attributes; not a function for const propagation. */
273#if ARM_PARAMETERIZED_PMAP
274/* The page table attributes are linked to the pmap */
275#define pmap_get_pt_attr(pmap) ((pmap)->pmap_pt_attr)
276#define pmap_get_pt_ops(pmap) ((pmap)->pmap_pt_attr->pta_ops)
277#else /* !ARM_PARAMETERIZED_PMAP */
278/* The page table attributes are fixed (to allow for const propagation) */
279#define pmap_get_pt_attr(pmap) (native_pt_attr)
280#define pmap_get_pt_ops(pmap) (&native_pt_ops)
281#endif /* !ARM_PARAMETERIZED_PMAP */
282
283#if (__ARM_VMSA__ > 7)
284static inline uint64_t
285pt_attr_ln_size(const pt_attr_t * const pt_attr, unsigned int level)
286{
287 return pt_attr->pta_level_info[level].size;
288}
289
290__unused static inline uint64_t
291pt_attr_ln_shift(const pt_attr_t * const pt_attr, unsigned int level)
292{
293 return pt_attr->pta_level_info[level].shift;
294}
295
296__unused static inline uint64_t
297pt_attr_ln_offmask(const pt_attr_t * const pt_attr, unsigned int level)
298{
299 return pt_attr->pta_level_info[level].offmask;
300}
301
302static inline unsigned int
303pt_attr_twig_level(const pt_attr_t * const pt_attr)
304{
305 return pt_attr->pta_max_level - 1;
306}
307
308static inline unsigned int
309pt_attr_root_level(const pt_attr_t * const pt_attr)
310{
311 return pt_attr->pta_root_level;
312}
313
314static __unused inline uint64_t
315pt_attr_leaf_size(const pt_attr_t * const pt_attr)
316{
317 return pt_attr->pta_level_info[pt_attr->pta_max_level].size;
318}
319
320static __unused inline uint64_t
321pt_attr_leaf_offmask(const pt_attr_t * const pt_attr)
322{
323 return pt_attr->pta_level_info[pt_attr->pta_max_level].offmask;
324}
325
326static inline uint64_t
327pt_attr_leaf_shift(const pt_attr_t * const pt_attr)
328{
329 return pt_attr->pta_level_info[pt_attr->pta_max_level].shift;
330}
331
332static __unused inline uint64_t
333pt_attr_leaf_index_mask(const pt_attr_t * const pt_attr)
334{
335 return pt_attr->pta_level_info[pt_attr->pta_max_level].index_mask;
336}
337
338static inline uint64_t
339pt_attr_twig_size(const pt_attr_t * const pt_attr)
340{
341 return pt_attr->pta_level_info[pt_attr->pta_max_level - 1].size;
342}
343
344static inline uint64_t
345pt_attr_twig_offmask(const pt_attr_t * const pt_attr)
346{
347 return pt_attr->pta_level_info[pt_attr->pta_max_level - 1].offmask;
348}
349
350static inline uint64_t
351pt_attr_twig_shift(const pt_attr_t * const pt_attr)
352{
353 return pt_attr->pta_level_info[pt_attr->pta_max_level - 1].shift;
354}
355
356static __unused inline uint64_t
357pt_attr_twig_index_mask(const pt_attr_t * const pt_attr)
358{
359 return pt_attr->pta_level_info[pt_attr->pta_max_level - 1].index_mask;
360}
361
362static inline uint64_t
363pt_attr_leaf_table_size(const pt_attr_t * const pt_attr)
364{
365 return pt_attr_twig_size(pt_attr);
366}
367
368static inline uint64_t
369pt_attr_leaf_table_offmask(const pt_attr_t * const pt_attr)
370{
371 return pt_attr_twig_offmask(pt_attr);
372}
373
374static inline uintptr_t
375pt_attr_leaf_rw(const pt_attr_t * const pt_attr)
376{
377 return pt_attr->ap_rw;
378}
379
380static inline uintptr_t
381pt_attr_leaf_ro(const pt_attr_t * const pt_attr)
382{
383 return pt_attr->ap_ro;
384}
385
386static inline uintptr_t
387pt_attr_leaf_rona(const pt_attr_t * const pt_attr)
388{
389 return pt_attr->ap_rona;
390}
391
392static inline uintptr_t
393pt_attr_leaf_rwna(const pt_attr_t * const pt_attr)
394{
395 return pt_attr->ap_rwna;
396}
397
398static inline uintptr_t
399pt_attr_leaf_xn(const pt_attr_t * const pt_attr)
400{
401 return pt_attr->ap_xn;
402}
403
404static inline uintptr_t
405pt_attr_leaf_x(const pt_attr_t * const pt_attr)
406{
407 return pt_attr->ap_x;
408}
409
410#else /* (__ARM_VMSA__ > 7) */
411
412static inline unsigned int
413pt_attr_twig_level(__unused const pt_attr_t * const pt_attr)
414{
415 return PMAP_TT_L1_LEVEL;
416}
417
418static inline uint64_t
419pt_attr_twig_size(__unused const pt_attr_t * const pt_attr)
420{
421 return ARM_TT_TWIG_SIZE;
422}
423
424static inline uint64_t
425pt_attr_twig_offmask(__unused const pt_attr_t * const pt_attr)
426{
427 return ARM_TT_TWIG_OFFMASK;
428}
429
430static inline uint64_t
431pt_attr_twig_shift(__unused const pt_attr_t * const pt_attr)
432{
433 return ARM_TT_TWIG_SHIFT;
434}
435
436static __unused inline uint64_t
437pt_attr_twig_index_mask(__unused const pt_attr_t * const pt_attr)
438{
439 return ARM_TT_TWIG_INDEX_MASK;
440}
441
442__unused static inline uint64_t
443pt_attr_leaf_size(__unused const pt_attr_t * const pt_attr)
444{
445 return ARM_TT_LEAF_SIZE;
446}
447
448__unused static inline uint64_t
449pt_attr_leaf_offmask(__unused const pt_attr_t * const pt_attr)
450{
451 return ARM_TT_LEAF_OFFMASK;
452}
453
454static inline uint64_t
455pt_attr_leaf_shift(__unused const pt_attr_t * const pt_attr)
456{
457 return ARM_TT_LEAF_SHIFT;
458}
459
460static __unused inline uint64_t
461pt_attr_leaf_index_mask(__unused const pt_attr_t * const pt_attr)
462{
463 return ARM_TT_LEAF_INDEX_MASK;
464}
465
466static inline uint64_t
467pt_attr_leaf_table_size(__unused const pt_attr_t * const pt_attr)
468{
469 return ARM_TT_L1_PT_SIZE;
470}
471
472static inline uint64_t
473pt_attr_leaf_table_offmask(__unused const pt_attr_t * const pt_attr)
474{
475 return ARM_TT_L1_PT_OFFMASK;
476}
477
478static inline uintptr_t
479pt_attr_leaf_rw(__unused const pt_attr_t * const pt_attr)
480{
481 return ARM_PTE_AP(AP_RWRW);
482}
483
484static inline uintptr_t
485pt_attr_leaf_ro(__unused const pt_attr_t * const pt_attr)
486{
487 return ARM_PTE_AP(AP_RORO);
488}
489
490static inline uintptr_t
491pt_attr_leaf_rona(__unused const pt_attr_t * const pt_attr)
492{
493 return ARM_PTE_AP(AP_RONA);
494}
495
496static inline uintptr_t
497pt_attr_leaf_rwna(__unused const pt_attr_t * const pt_attr)
498{
499 return ARM_PTE_AP(AP_RWNA);
500}
501
502static inline uintptr_t
503pt_attr_leaf_xn(__unused const pt_attr_t * const pt_attr)
504{
505 return ARM_PTE_NX;
506}
507
508#endif /* (__ARM_VMSA__ > 7) */
509
510static inline void
511pmap_sync_tlb(bool strong __unused)
512{
513 sync_tlb_flush();
514}
5ba3f43e 515
5c9f4661 516#if MACH_ASSERT
d9a64523
A
517int vm_footprint_suspend_allowed = 1;
518
519extern int pmap_ledgers_panic;
520extern int pmap_ledgers_panic_leeway;
521
5c9f4661 522int pmap_stats_assert = 1;
0a7de745
A
523#define PMAP_STATS_ASSERTF(cond, pmap, fmt, ...) \
524 MACRO_BEGIN \
5c9f4661 525 if (pmap_stats_assert && (pmap)->pmap_stats_assert) \
0a7de745 526 assertf(cond, fmt, ##__VA_ARGS__); \
5c9f4661
A
527 MACRO_END
528#else /* MACH_ASSERT */
529#define PMAP_STATS_ASSERTF(cond, pmap, fmt, ...)
530#endif /* MACH_ASSERT */
531
5ba3f43e 532#if DEVELOPMENT || DEBUG
d9a64523
A
533#define PMAP_FOOTPRINT_SUSPENDED(pmap) \
534 (current_thread()->pmap_footprint_suspended)
5ba3f43e
A
535#else /* DEVELOPMENT || DEBUG */
536#define PMAP_FOOTPRINT_SUSPENDED(pmap) (FALSE)
537#endif /* DEVELOPMENT || DEBUG */
538
539
c6bf4f31
A
540#if XNU_MONITOR
541/*
542 * PPL External References.
543 */
544extern vm_offset_t segPPLDATAB;
545extern unsigned long segSizePPLDATA;
546extern vm_offset_t segPPLTEXTB;
547extern unsigned long segSizePPLTEXT;
548#if __APRR_SUPPORTED__
549extern vm_offset_t segPPLTRAMPB;
550extern unsigned long segSizePPLTRAMP;
551extern void ppl_trampoline_start;
552extern void ppl_trampoline_end;
553#endif
554extern vm_offset_t segPPLDATACONSTB;
555extern unsigned long segSizePPLDATACONST;
556
557
558/*
559 * PPL Global Variables
560 */
561
562#if (DEVELOPMENT || DEBUG)
563/* Indicates if the PPL will enforce mapping policies; set by -unsafe_kernel_text */
564SECURITY_READ_ONLY_LATE(boolean_t) pmap_ppl_disable = FALSE;
565#else
566const boolean_t pmap_ppl_disable = FALSE;
567#endif
568
569/* Indicates if the PPL has started applying APRR. */
570boolean_t pmap_ppl_locked_down MARK_AS_PMAP_DATA = FALSE;
571
572/*
573 * The PPL cannot invoke the kernel in order to allocate memory, so we must
574 * maintain a list of free pages that the PPL owns. The kernel can give the PPL
575 * additional pages.
576 */
577decl_simple_lock_data(, pmap_ppl_free_page_lock MARK_AS_PMAP_DATA);
578void ** pmap_ppl_free_page_list MARK_AS_PMAP_DATA = NULL;
579uint64_t pmap_ppl_free_page_count MARK_AS_PMAP_DATA = 0;
580uint64_t pmap_ppl_pages_returned_to_kernel_count_total = 0;
581
582struct pmap_cpu_data_array_entry pmap_cpu_data_array[MAX_CPUS] MARK_AS_PMAP_DATA;
583
584#ifdef CPU_CLUSTER_OFFSETS
585const uint64_t pmap_cluster_offsets[] = CPU_CLUSTER_OFFSETS;
586_Static_assert((sizeof(pmap_cluster_offsets) / sizeof(pmap_cluster_offsets[0])) == __ARM_CLUSTER_COUNT__,
587 "pmap_cluster_offsets[] count does not match __ARM_CLUSTER_COUNT__");
588#endif
589
590extern void *pmap_stacks_start;
591extern void *pmap_stacks_end;
592SECURITY_READ_ONLY_LATE(pmap_paddr_t) pmap_stacks_start_pa = 0;
593SECURITY_READ_ONLY_LATE(pmap_paddr_t) pmap_stacks_end_pa = 0;
594SECURITY_READ_ONLY_LATE(pmap_paddr_t) ppl_cpu_save_area_start = 0;
595SECURITY_READ_ONLY_LATE(pmap_paddr_t) ppl_cpu_save_area_end = 0;
596
597/* Allocation data/locks for pmap structures. */
598decl_simple_lock_data(, pmap_free_list_lock MARK_AS_PMAP_DATA);
599SECURITY_READ_ONLY_LATE(unsigned long) pmap_array_count = 0;
600SECURITY_READ_ONLY_LATE(void *) pmap_array_begin = NULL;
601SECURITY_READ_ONLY_LATE(void *) pmap_array_end = NULL;
602SECURITY_READ_ONLY_LATE(pmap_t) pmap_array = NULL;
603pmap_t pmap_free_list MARK_AS_PMAP_DATA = NULL;
604
605/* Allocation data/locks/structs for task ledger structures. */
606#define PMAP_LEDGER_DATA_BYTES \
607 (((sizeof(task_ledgers) / sizeof(int)) * sizeof(struct ledger_entry)) + sizeof(struct ledger))
608
609/*
610 * Maximum number of ledgers allowed are maximum number of tasks
611 * allowed on system plus some more i.e. ~10% of total tasks = 200.
612 */
613#define MAX_PMAP_LEDGERS (MAX_ASID + 200)
614
615typedef struct pmap_ledger_data {
616 char pld_data[PMAP_LEDGER_DATA_BYTES];
617} pmap_ledger_data_t;
618
619typedef struct pmap_ledger {
620 union {
621 struct pmap_ledger_data ple_data;
622 struct pmap_ledger * next;
623 };
624
625 struct pmap_ledger ** back_ptr;
626} pmap_ledger_t;
627
628SECURITY_READ_ONLY_LATE(bool) pmap_ledger_alloc_initialized = false;
629decl_simple_lock_data(, pmap_ledger_lock MARK_AS_PMAP_DATA);
630SECURITY_READ_ONLY_LATE(void *) pmap_ledger_refcnt_begin = NULL;
631SECURITY_READ_ONLY_LATE(void *) pmap_ledger_refcnt_end = NULL;
632SECURITY_READ_ONLY_LATE(os_refcnt_t *) pmap_ledger_refcnt = NULL;
633SECURITY_READ_ONLY_LATE(void *) pmap_ledger_ptr_array_begin = NULL;
634SECURITY_READ_ONLY_LATE(void *) pmap_ledger_ptr_array_end = NULL;
635SECURITY_READ_ONLY_LATE(pmap_ledger_t * *) pmap_ledger_ptr_array = NULL;
636uint64_t pmap_ledger_ptr_array_free_index MARK_AS_PMAP_DATA = 0;
637pmap_ledger_t * pmap_ledger_free_list MARK_AS_PMAP_DATA = NULL;
638
639#define pmap_ledger_debit(p, e, a) ledger_debit_nocheck((p)->ledger, e, a)
640#define pmap_ledger_credit(p, e, a) ledger_credit_nocheck((p)->ledger, e, a)
641
642static inline void
643pmap_check_ledger_fields(ledger_t ledger)
644{
645 if (ledger == NULL) {
646 return;
647 }
648
649 thread_t cur_thread = current_thread();
650 ledger_check_new_balance(cur_thread, ledger, task_ledgers.alternate_accounting);
651 ledger_check_new_balance(cur_thread, ledger, task_ledgers.alternate_accounting_compressed);
652 ledger_check_new_balance(cur_thread, ledger, task_ledgers.internal);
653 ledger_check_new_balance(cur_thread, ledger, task_ledgers.internal_compressed);
654 ledger_check_new_balance(cur_thread, ledger, task_ledgers.page_table);
655 ledger_check_new_balance(cur_thread, ledger, task_ledgers.phys_footprint);
656 ledger_check_new_balance(cur_thread, ledger, task_ledgers.phys_mem);
657 ledger_check_new_balance(cur_thread, ledger, task_ledgers.tkm_private);
658 ledger_check_new_balance(cur_thread, ledger, task_ledgers.wired_mem);
659}
660
661#define pmap_ledger_check_balance(p) pmap_check_ledger_fields((p)->ledger)
662
663#else /* XNU_MONITOR */
5ba3f43e 664
d9a64523
A
665#define pmap_ledger_debit(p, e, a) ledger_debit((p)->ledger, e, a)
666#define pmap_ledger_credit(p, e, a) ledger_credit((p)->ledger, e, a)
667
c6bf4f31 668#endif /* !XNU_MONITOR */
d9a64523 669
5ba3f43e
A
670#if DEVELOPMENT || DEBUG
671int panic_on_unsigned_execute = 0;
672#endif /* DEVELOPMENT || DEBUG */
673
674
675/* Virtual memory region for early allocation */
0a7de745 676#if (__ARM_VMSA__ == 7)
cb323159 677#define VREGION1_HIGH_WINDOW (0)
5ba3f43e 678#else
0a7de745 679#define VREGION1_HIGH_WINDOW (PE_EARLY_BOOT_VA)
5ba3f43e 680#endif
cb323159 681#define VREGION1_START ((VM_MAX_KERNEL_ADDRESS & CPUWINDOWS_BASE_MASK) - VREGION1_HIGH_WINDOW)
0a7de745 682#define VREGION1_SIZE (trunc_page(VM_MAX_KERNEL_ADDRESS - (VREGION1_START)))
5ba3f43e
A
683
684extern unsigned int not_in_kdp;
685
686extern vm_offset_t first_avail;
687
688extern pmap_paddr_t avail_start;
689extern pmap_paddr_t avail_end;
690
0a7de745
A
691extern vm_offset_t virtual_space_start; /* Next available kernel VA */
692extern vm_offset_t virtual_space_end; /* End of kernel address space */
d9a64523 693extern vm_offset_t static_memory_end;
5ba3f43e 694
cb323159 695extern int maxproc, hard_maxproc;
5ba3f43e
A
696
697#if (__ARM_VMSA__ > 7)
698/* The number of address bits one TTBR can cover. */
699#define PGTABLE_ADDR_BITS (64ULL - T0SZ_BOOT)
700
701/*
702 * The bounds on our TTBRs. These are for sanity checking that
703 * an address is accessible by a TTBR before we attempt to map it.
704 */
705#define ARM64_TTBR0_MIN_ADDR (0ULL)
706#define ARM64_TTBR0_MAX_ADDR (0ULL + (1ULL << PGTABLE_ADDR_BITS) - 1)
707#define ARM64_TTBR1_MIN_ADDR (0ULL - (1ULL << PGTABLE_ADDR_BITS))
708#define ARM64_TTBR1_MAX_ADDR (~0ULL)
709
710/* The level of the root of a page table. */
711const uint64_t arm64_root_pgtable_level = (3 - ((PGTABLE_ADDR_BITS - 1 - ARM_PGSHIFT) / (ARM_PGSHIFT - TTE_SHIFT)));
712
713/* The number of entries in the root TT of a page table. */
714const uint64_t arm64_root_pgtable_num_ttes = (2 << ((PGTABLE_ADDR_BITS - 1 - ARM_PGSHIFT) % (ARM_PGSHIFT - TTE_SHIFT)));
715#else
716const uint64_t arm64_root_pgtable_level = 0;
717const uint64_t arm64_root_pgtable_num_ttes = 0;
718#endif
719
720struct pmap kernel_pmap_store MARK_AS_PMAP_DATA;
721SECURITY_READ_ONLY_LATE(pmap_t) kernel_pmap = &kernel_pmap_store;
722
0a7de745 723struct vm_object pmap_object_store __attribute__((aligned(VM_PACKED_POINTER_ALIGNMENT))); /* store pt pages */
5ba3f43e
A
724vm_object_t pmap_object = &pmap_object_store;
725
0a7de745 726static struct zone *pmap_zone; /* zone of pmap structures */
5ba3f43e 727
cb323159
A
728decl_simple_lock_data(, pmaps_lock MARK_AS_PMAP_DATA);
729decl_simple_lock_data(, tt1_lock MARK_AS_PMAP_DATA);
0a7de745
A
730unsigned int pmap_stamp MARK_AS_PMAP_DATA;
731queue_head_t map_pmap_list MARK_AS_PMAP_DATA;
5ba3f43e 732
cb323159 733decl_simple_lock_data(, pt_pages_lock MARK_AS_PMAP_DATA);
0a7de745 734queue_head_t pt_page_list MARK_AS_PMAP_DATA; /* pt page ptd entries list */
5ba3f43e 735
cb323159 736decl_simple_lock_data(, pmap_pages_lock MARK_AS_PMAP_DATA);
5ba3f43e
A
737
738typedef struct page_free_entry {
0a7de745 739 struct page_free_entry *next;
5ba3f43e
A
740} page_free_entry_t;
741
0a7de745 742#define PAGE_FREE_ENTRY_NULL ((page_free_entry_t *) 0)
5ba3f43e 743
0a7de745
A
744page_free_entry_t *pmap_pages_reclaim_list MARK_AS_PMAP_DATA; /* Reclaimed pt page list */
745unsigned int pmap_pages_request_count MARK_AS_PMAP_DATA; /* Pending requests to reclaim pt page */
746unsigned long long pmap_pages_request_acum MARK_AS_PMAP_DATA;
5ba3f43e
A
747
748
749typedef struct tt_free_entry {
0a7de745 750 struct tt_free_entry *next;
5ba3f43e
A
751} tt_free_entry_t;
752
0a7de745 753#define TT_FREE_ENTRY_NULL ((tt_free_entry_t *) 0)
5ba3f43e 754
0a7de745
A
755tt_free_entry_t *free_page_size_tt_list MARK_AS_PMAP_DATA;
756unsigned int free_page_size_tt_count MARK_AS_PMAP_DATA;
757unsigned int free_page_size_tt_max MARK_AS_PMAP_DATA;
758#define FREE_PAGE_SIZE_TT_MAX 4
759tt_free_entry_t *free_two_page_size_tt_list MARK_AS_PMAP_DATA;
760unsigned int free_two_page_size_tt_count MARK_AS_PMAP_DATA;
761unsigned int free_two_page_size_tt_max MARK_AS_PMAP_DATA;
762#define FREE_TWO_PAGE_SIZE_TT_MAX 4
763tt_free_entry_t *free_tt_list MARK_AS_PMAP_DATA;
764unsigned int free_tt_count MARK_AS_PMAP_DATA;
765unsigned int free_tt_max MARK_AS_PMAP_DATA;
5ba3f43e 766
0a7de745 767#define TT_FREE_ENTRY_NULL ((tt_free_entry_t *) 0)
5ba3f43e
A
768
769boolean_t pmap_gc_allowed MARK_AS_PMAP_DATA = TRUE;
770boolean_t pmap_gc_forced MARK_AS_PMAP_DATA = FALSE;
771boolean_t pmap_gc_allowed_by_time_throttle = TRUE;
772
0a7de745
A
773unsigned int inuse_user_ttepages_count MARK_AS_PMAP_DATA = 0; /* non-root, non-leaf user pagetable pages, in units of PAGE_SIZE */
774unsigned int inuse_user_ptepages_count MARK_AS_PMAP_DATA = 0; /* leaf user pagetable pages, in units of PAGE_SIZE */
775unsigned int inuse_user_tteroot_count MARK_AS_PMAP_DATA = 0; /* root user pagetables, in units of PMAP_ROOT_ALLOC_SIZE */
5ba3f43e
A
776unsigned int inuse_kernel_ttepages_count MARK_AS_PMAP_DATA = 0; /* non-root, non-leaf kernel pagetable pages, in units of PAGE_SIZE */
777unsigned int inuse_kernel_ptepages_count MARK_AS_PMAP_DATA = 0; /* leaf kernel pagetable pages, in units of PAGE_SIZE */
0a7de745
A
778unsigned int inuse_kernel_tteroot_count MARK_AS_PMAP_DATA = 0; /* root kernel pagetables, in units of PMAP_ROOT_ALLOC_SIZE */
779unsigned int inuse_pmap_pages_count = 0; /* debugging */
5ba3f43e
A
780
781SECURITY_READ_ONLY_LATE(tt_entry_t *) invalid_tte = 0;
782SECURITY_READ_ONLY_LATE(pmap_paddr_t) invalid_ttep = 0;
783
0a7de745
A
784SECURITY_READ_ONLY_LATE(tt_entry_t *) cpu_tte = 0; /* set by arm_vm_init() - keep out of bss */
785SECURITY_READ_ONLY_LATE(pmap_paddr_t) cpu_ttep = 0; /* set by arm_vm_init() - phys tte addr */
5ba3f43e
A
786
787#if DEVELOPMENT || DEBUG
0a7de745
A
788int nx_enabled = 1; /* enable no-execute protection */
789int allow_data_exec = 0; /* No apps may execute data */
790int allow_stack_exec = 0; /* No apps may execute from the stack */
cb323159 791unsigned long pmap_asid_flushes MARK_AS_PMAP_DATA = 0;
5ba3f43e 792#else /* DEVELOPMENT || DEBUG */
0a7de745
A
793const int nx_enabled = 1; /* enable no-execute protection */
794const int allow_data_exec = 0; /* No apps may execute data */
795const int allow_stack_exec = 0; /* No apps may execute from the stack */
5ba3f43e
A
796#endif /* DEVELOPMENT || DEBUG */
797
798/*
799 * pv_entry_t - structure to track the active mappings for a given page
800 */
801typedef struct pv_entry {
0a7de745
A
802 struct pv_entry *pve_next; /* next alias */
803 pt_entry_t *pve_ptep; /* page table entry */
cb323159 804}
5ba3f43e
A
805#if __arm__ && (__BIGGEST_ALIGNMENT__ > 4)
806/* For the newer ARMv7k ABI where 64-bit types are 64-bit aligned, but pointers
807 * are 32-bit:
808 * Since pt_desc is 64-bit aligned and we cast often from pv_entry to
809 * pt_desc.
810 */
cb323159 811__attribute__ ((aligned(8))) pv_entry_t;
5ba3f43e 812#else
cb323159 813pv_entry_t;
5ba3f43e
A
814#endif
815
0a7de745 816#define PV_ENTRY_NULL ((pv_entry_t *) 0)
5ba3f43e
A
817
818/*
819 * PMAP LEDGERS:
820 * We use the least significant bit of the "pve_next" pointer in a "pv_entry"
821 * as a marker for pages mapped through an "alternate accounting" mapping.
822 * These macros set, clear and test for this marker and extract the actual
823 * value of the "pve_next" pointer.
824 */
0a7de745 825#define PVE_NEXT_ALTACCT ((uintptr_t) 0x1)
5ba3f43e
A
826#define PVE_NEXT_SET_ALTACCT(pve_next_p) \
827 *(pve_next_p) = (struct pv_entry *) (((uintptr_t) *(pve_next_p)) | \
0a7de745 828 PVE_NEXT_ALTACCT)
5ba3f43e
A
829#define PVE_NEXT_CLR_ALTACCT(pve_next_p) \
830 *(pve_next_p) = (struct pv_entry *) (((uintptr_t) *(pve_next_p)) & \
0a7de745
A
831 ~PVE_NEXT_ALTACCT)
832#define PVE_NEXT_IS_ALTACCT(pve_next) \
5ba3f43e
A
833 ((((uintptr_t) (pve_next)) & PVE_NEXT_ALTACCT) ? TRUE : FALSE)
834#define PVE_NEXT_PTR(pve_next) \
835 ((struct pv_entry *)(((uintptr_t) (pve_next)) & \
0a7de745 836 ~PVE_NEXT_ALTACCT))
5ba3f43e
A
837#if MACH_ASSERT
838static void pmap_check_ledgers(pmap_t pmap);
839#else
0a7de745
A
840static inline void
841pmap_check_ledgers(__unused pmap_t pmap)
842{
843}
5ba3f43e
A
844#endif /* MACH_ASSERT */
845
0a7de745 846SECURITY_READ_ONLY_LATE(pv_entry_t * *) pv_head_table; /* array of pv entry pointers */
5ba3f43e 847
0a7de745
A
848pv_entry_t *pv_free_list MARK_AS_PMAP_DATA;
849pv_entry_t *pv_kern_free_list MARK_AS_PMAP_DATA;
cb323159
A
850decl_simple_lock_data(, pv_free_list_lock MARK_AS_PMAP_DATA);
851decl_simple_lock_data(, pv_kern_free_list_lock MARK_AS_PMAP_DATA);
5ba3f43e 852
cb323159 853decl_simple_lock_data(, phys_backup_lock);
5ba3f43e
A
854
855/*
856 * pt_desc - structure to keep info on page assigned to page tables
857 */
858#if (__ARM_VMSA__ == 7)
0a7de745 859#define PT_INDEX_MAX 1
5ba3f43e
A
860#else
861#if (ARM_PGSHIFT == 14)
0a7de745 862#define PT_INDEX_MAX 1
5ba3f43e 863#else
0a7de745 864#define PT_INDEX_MAX 4
5ba3f43e
A
865#endif
866#endif
867
0a7de745
A
868#define PT_DESC_REFCOUNT 0x4000U
869#define PT_DESC_IOMMU_REFCOUNT 0x8000U
5ba3f43e
A
870
871typedef struct pt_desc {
0a7de745 872 queue_chain_t pt_page;
cb323159
A
873 union {
874 struct pmap *pmap;
875 };
876 /*
877 * Locate this struct towards the end of the pt_desc; our long term
878 * goal is to make this a VLA to avoid wasting memory if we don't need
879 * multiple entries.
880 */
5ba3f43e 881 struct {
d9a64523
A
882 /*
883 * For non-leaf pagetables, should always be PT_DESC_REFCOUNT
884 * For leaf pagetables, should reflect the number of non-empty PTEs
885 * For IOMMU pages, should always be PT_DESC_IOMMU_REFCOUNT
886 */
0a7de745 887 unsigned short refcnt;
d9a64523
A
888 /*
889 * For non-leaf pagetables, should be 0
890 * For leaf pagetables, should reflect the number of wired entries
891 * For IOMMU pages, may optionally reflect a driver-defined refcount (IOMMU operations are implicitly wired)
892 */
0a7de745 893 unsigned short wiredcnt;
0a7de745 894 vm_offset_t va;
cb323159 895 } ptd_info[PT_INDEX_MAX];
5ba3f43e
A
896} pt_desc_t;
897
898
0a7de745 899#define PTD_ENTRY_NULL ((pt_desc_t *) 0)
5ba3f43e
A
900
901SECURITY_READ_ONLY_LATE(pt_desc_t *) ptd_root_table;
902
0a7de745 903pt_desc_t *ptd_free_list MARK_AS_PMAP_DATA = PTD_ENTRY_NULL;
5ba3f43e 904SECURITY_READ_ONLY_LATE(boolean_t) ptd_preboot = TRUE;
0a7de745 905unsigned int ptd_free_count MARK_AS_PMAP_DATA = 0;
cb323159 906decl_simple_lock_data(, ptd_free_list_lock MARK_AS_PMAP_DATA);
5ba3f43e
A
907
908/*
909 * physical page attribute
910 */
0a7de745 911typedef u_int16_t pp_attr_t;
5ba3f43e 912
0a7de745
A
913#define PP_ATTR_WIMG_MASK 0x003F
914#define PP_ATTR_WIMG(x) ((x) & PP_ATTR_WIMG_MASK)
5ba3f43e 915
0a7de745
A
916#define PP_ATTR_REFERENCED 0x0040
917#define PP_ATTR_MODIFIED 0x0080
5ba3f43e 918
0a7de745
A
919#define PP_ATTR_INTERNAL 0x0100
920#define PP_ATTR_REUSABLE 0x0200
921#define PP_ATTR_ALTACCT 0x0400
922#define PP_ATTR_NOENCRYPT 0x0800
5ba3f43e 923
0a7de745
A
924#define PP_ATTR_REFFAULT 0x1000
925#define PP_ATTR_MODFAULT 0x2000
5ba3f43e 926
c6bf4f31
A
927#if XNU_MONITOR
928/*
929 * Denotes that a page is owned by the PPL. This is modified/checked with the
930 * PVH lock held, to avoid ownership related races. This does not need to be a
931 * PP_ATTR bit (as we have the lock), but for now this is a convenient place to
932 * put the bit.
933 */
934#define PP_ATTR_MONITOR 0x4000
935
936/*
937 * Denotes that a page *cannot* be owned by the PPL. This is required in order
938 * to temporarily 'pin' kernel pages that are used to store PPL output parameters.
939 * Otherwise a malicious or buggy caller could pass PPL-owned memory for these
940 * parameters and in so doing stage a write gadget against the PPL.
941 */
942#define PP_ATTR_NO_MONITOR 0x8000
943
944/*
945 * All of the bits owned by the PPL; kernel requests to set or clear these bits
946 * are illegal.
947 */
948#define PP_ATTR_PPL_OWNED_BITS (PP_ATTR_MONITOR | PP_ATTR_NO_MONITOR)
949#endif
5ba3f43e 950
0a7de745 951SECURITY_READ_ONLY_LATE(pp_attr_t*) pp_attr_table;
5ba3f43e 952
0a7de745 953typedef struct pmap_io_range {
d9a64523 954 uint64_t addr;
cb323159
A
955 uint64_t len;
956 #define PMAP_IO_RANGE_STRONG_SYNC (1UL << 31) // Strong DSB required for pages in this range
4ba76501 957 #define PMAP_IO_RANGE_CARVEOUT (1UL << 30) // Corresponds to memory carved out by bootloader
cb323159
A
958 uint32_t wimg; // lower 16 bits treated as pp_attr_t, upper 16 bits contain additional mapping flags
959 uint32_t signature; // 4CC
d9a64523 960} __attribute__((packed)) pmap_io_range_t;
5ba3f43e 961
0a7de745 962SECURITY_READ_ONLY_LATE(pmap_io_range_t*) io_attr_table;
5ba3f43e 963
0a7de745
A
964SECURITY_READ_ONLY_LATE(pmap_paddr_t) vm_first_phys = (pmap_paddr_t) 0;
965SECURITY_READ_ONLY_LATE(pmap_paddr_t) vm_last_phys = (pmap_paddr_t) 0;
5ba3f43e 966
0a7de745 967SECURITY_READ_ONLY_LATE(unsigned int) num_io_rgns = 0;
5ba3f43e 968
0a7de745 969SECURITY_READ_ONLY_LATE(boolean_t) pmap_initialized = FALSE; /* Has pmap_init completed? */
5ba3f43e
A
970
971SECURITY_READ_ONLY_LATE(uint64_t) pmap_nesting_size_min;
972SECURITY_READ_ONLY_LATE(uint64_t) pmap_nesting_size_max;
973
974SECURITY_READ_ONLY_LATE(vm_map_offset_t) arm_pmap_max_offset_default = 0x0;
975#if defined(__arm64__)
976SECURITY_READ_ONLY_LATE(vm_map_offset_t) arm64_pmap_max_offset_default = 0x0;
977#endif
978
cb323159
A
979#define PMAP_MAX_SW_ASID ((MAX_ASID + MAX_HW_ASID - 1) / MAX_HW_ASID)
980_Static_assert(PMAP_MAX_SW_ASID <= (UINT8_MAX + 1),
981 "VASID bits can't be represented by an 8-bit integer");
982
983decl_simple_lock_data(, asid_lock MARK_AS_PMAP_DATA);
984static bitmap_t asid_bitmap[BITMAP_LEN(MAX_ASID)] MARK_AS_PMAP_DATA;
985
5ba3f43e 986
0a7de745 987#if (__ARM_VMSA__ > 7)
5c9f4661 988SECURITY_READ_ONLY_LATE(pmap_t) sharedpage_pmap;
5ba3f43e
A
989#endif
990
c6bf4f31
A
991#if XNU_MONITOR
992/*
993 * We define our target as 8 pages; enough for 2 page table pages, a PTD page,
994 * and a PV page; in essence, twice as many pages as may be necessary to satisfy
995 * a single pmap_enter request.
996 */
997#define PMAP_MIN_FREE_PPL_PAGES 8
998#endif
5ba3f43e 999
0a7de745 1000#define pa_index(pa) \
5ba3f43e
A
1001 (atop((pa) - vm_first_phys))
1002
0a7de745 1003#define pai_to_pvh(pai) \
5ba3f43e
A
1004 (&pv_head_table[pai])
1005
0a7de745 1006#define pa_valid(x) \
5ba3f43e
A
1007 ((x) >= vm_first_phys && (x) < vm_last_phys)
1008
1009/* PTE Define Macros */
1010
0a7de745 1011#define pte_is_wired(pte) \
5ba3f43e
A
1012 (((pte) & ARM_PTE_WIRED_MASK) == ARM_PTE_WIRED)
1013
0a7de745
A
1014#define pte_set_wired(ptep, wired) \
1015 do { \
1016 SInt16 *ptd_wiredcnt_ptr; \
cb323159 1017 ptd_wiredcnt_ptr = (SInt16 *)&(ptep_get_ptd(ptep)->ptd_info[ARM_PT_DESC_INDEX(ptep)].wiredcnt); \
0a7de745
A
1018 if (wired) { \
1019 *ptep |= ARM_PTE_WIRED; \
1020 OSAddAtomic16(1, ptd_wiredcnt_ptr); \
1021 } else { \
1022 *ptep &= ~ARM_PTE_WIRED; \
1023 OSAddAtomic16(-1, ptd_wiredcnt_ptr); \
1024 } \
5ba3f43e
A
1025 } while(0)
1026
0a7de745 1027#define pte_was_writeable(pte) \
5ba3f43e
A
1028 (((pte) & ARM_PTE_WRITEABLE) == ARM_PTE_WRITEABLE)
1029
0a7de745
A
1030#define pte_set_was_writeable(pte, was_writeable) \
1031 do { \
1032 if ((was_writeable)) { \
1033 (pte) |= ARM_PTE_WRITEABLE; \
1034 } else { \
1035 (pte) &= ~ARM_PTE_WRITEABLE; \
1036 } \
5ba3f43e
A
1037 } while(0)
1038
1039/* PVE Define Macros */
1040
0a7de745 1041#define pve_next(pve) \
5ba3f43e
A
1042 ((pve)->pve_next)
1043
0a7de745 1044#define pve_link_field(pve) \
5ba3f43e
A
1045 (&pve_next(pve))
1046
0a7de745 1047#define pve_link(pp, e) \
5ba3f43e
A
1048 ((pve_next(e) = pve_next(pp)), (pve_next(pp) = (e)))
1049
0a7de745 1050#define pve_unlink(pp, e) \
5ba3f43e
A
1051 (pve_next(pp) = pve_next(e))
1052
1053/* bits held in the ptep pointer field */
1054
0a7de745 1055#define pve_get_ptep(pve) \
5ba3f43e
A
1056 ((pve)->pve_ptep)
1057
0a7de745
A
1058#define pve_set_ptep(pve, ptep_new) \
1059 do { \
1060 (pve)->pve_ptep = (ptep_new); \
5ba3f43e
A
1061 } while (0)
1062
1063/* PTEP Define Macros */
1064
cb323159
A
1065/* mask for page descriptor index */
1066#define ARM_TT_PT_INDEX_MASK ARM_PGMASK
5ba3f43e 1067
cb323159 1068#if (__ARM_VMSA__ == 7)
0a7de745
A
1069#define ARM_PT_DESC_INDEX_MASK 0x00000
1070#define ARM_PT_DESC_INDEX_SHIFT 0
5ba3f43e 1071
0a7de745
A
1072/*
1073 * Shift value used for reconstructing the virtual address for a PTE.
1074 */
1075#define ARM_TT_PT_ADDR_SHIFT (10U)
5ba3f43e 1076
0a7de745 1077#define ptep_get_va(ptep) \
cb323159 1078 ((((pt_desc_t *) (pvh_list(pai_to_pvh(pa_index(ml_static_vtop((((vm_offset_t)(ptep) & ~ARM_PGMASK))))))))->ptd_info[ARM_PT_DESC_INDEX(ptep)].va)+ ((((unsigned)(ptep)) & ARM_TT_PT_INDEX_MASK)<<ARM_TT_PT_ADDR_SHIFT))
5ba3f43e 1079
0a7de745 1080#define ptep_get_pmap(ptep) \
cb323159 1081 ((((pt_desc_t *) (pvh_list(pai_to_pvh(pa_index(ml_static_vtop((((vm_offset_t)(ptep) & ~ARM_PGMASK))))))))->pmap))
5ba3f43e
A
1082
1083#else
1084
1085#if (ARM_PGSHIFT == 12)
0a7de745
A
1086#define ARM_PT_DESC_INDEX_MASK ((PAGE_SHIFT_CONST == ARM_PGSHIFT )? 0x00000ULL : 0x03000ULL)
1087#define ARM_PT_DESC_INDEX_SHIFT ((PAGE_SHIFT_CONST == ARM_PGSHIFT )? 0 : 12)
0a7de745
A
1088/*
1089 * Shift value used for reconstructing the virtual address for a PTE.
1090 */
1091#define ARM_TT_PT_ADDR_SHIFT (9ULL)
5ba3f43e
A
1092#else
1093
0a7de745
A
1094#define ARM_PT_DESC_INDEX_MASK (0x00000)
1095#define ARM_PT_DESC_INDEX_SHIFT (0)
0a7de745
A
1096/*
1097 * Shift value used for reconstructing the virtual address for a PTE.
1098 */
1099#define ARM_TT_PT_ADDR_SHIFT (11ULL)
5ba3f43e
A
1100#endif
1101
cb323159 1102
0a7de745 1103#define ARM_PT_DESC_INDEX(ptep) \
5ba3f43e
A
1104 (((unsigned)(ptep) & ARM_PT_DESC_INDEX_MASK) >> ARM_PT_DESC_INDEX_SHIFT)
1105
0a7de745 1106#define ptep_get_va(ptep) \
cb323159 1107 ((((pt_desc_t *) (pvh_list(pai_to_pvh(pa_index(ml_static_vtop((((vm_offset_t)(ptep) & ~ARM_PGMASK))))))))->ptd_info[ARM_PT_DESC_INDEX(ptep)].va)+ ((((unsigned)(ptep)) & ARM_TT_PT_INDEX_MASK)<<ARM_TT_PT_ADDR_SHIFT))
5ba3f43e 1108
0a7de745 1109#define ptep_get_pmap(ptep) \
cb323159 1110 ((((pt_desc_t *) (pvh_list(pai_to_pvh(pa_index(ml_static_vtop((((vm_offset_t)(ptep) & ~ARM_PGMASK))))))))->pmap))
5ba3f43e
A
1111
1112#endif
1113
0a7de745 1114#define ARM_PT_DESC_INDEX(ptep) \
d9a64523
A
1115 (((unsigned)(ptep) & ARM_PT_DESC_INDEX_MASK) >> ARM_PT_DESC_INDEX_SHIFT)
1116
0a7de745 1117#define ptep_get_ptd(ptep) \
d9a64523
A
1118 ((struct pt_desc *)(pvh_list(pai_to_pvh(pa_index(ml_static_vtop((vm_offset_t)(ptep)))))))
1119
5ba3f43e
A
1120
1121/* PVH Define Macros */
1122
1123/* pvhead type */
0a7de745
A
1124#define PVH_TYPE_NULL 0x0UL
1125#define PVH_TYPE_PVEP 0x1UL
1126#define PVH_TYPE_PTEP 0x2UL
1127#define PVH_TYPE_PTDP 0x3UL
5ba3f43e 1128
d9a64523 1129#define PVH_TYPE_MASK (0x3UL)
5ba3f43e 1130
0a7de745 1131#ifdef __arm64__
5ba3f43e 1132
cb323159
A
1133/* All flags listed below are stored in the PV head pointer unless otherwise noted */
1134#define PVH_FLAG_IOMMU 0x4UL /* Stored in each PTE, or in PV head for single-PTE PV heads */
1135#define PVH_FLAG_IOMMU_TABLE (1ULL << 63) /* Stored in each PTE, or in PV head for single-PTE PV heads */
d9a64523
A
1136#define PVH_FLAG_CPU (1ULL << 62)
1137#define PVH_LOCK_BIT 61
1138#define PVH_FLAG_LOCK (1ULL << PVH_LOCK_BIT)
1139#define PVH_FLAG_EXEC (1ULL << 60)
1140#define PVH_FLAG_LOCKDOWN (1ULL << 59)
1141#define PVH_HIGH_FLAGS (PVH_FLAG_CPU | PVH_FLAG_LOCK | PVH_FLAG_EXEC | PVH_FLAG_LOCKDOWN)
1142
1143#else /* !__arm64__ */
1144
1145#define PVH_LOCK_BIT 31
1146#define PVH_FLAG_LOCK (1UL << PVH_LOCK_BIT)
1147#define PVH_HIGH_FLAGS PVH_FLAG_LOCK
5ba3f43e 1148
5ba3f43e
A
1149#endif
1150
0a7de745 1151#define PVH_LIST_MASK (~PVH_TYPE_MASK)
d9a64523 1152
0a7de745 1153#define pvh_test_type(h, b) \
5ba3f43e
A
1154 ((*(vm_offset_t *)(h) & (PVH_TYPE_MASK)) == (b))
1155
0a7de745 1156#define pvh_ptep(h) \
d9a64523 1157 ((pt_entry_t *)((*(vm_offset_t *)(h) & PVH_LIST_MASK) | PVH_HIGH_FLAGS))
5ba3f43e 1158
0a7de745 1159#define pvh_list(h) \
d9a64523 1160 ((pv_entry_t *)((*(vm_offset_t *)(h) & PVH_LIST_MASK) | PVH_HIGH_FLAGS))
5ba3f43e 1161
0a7de745 1162#define pvh_get_flags(h) \
d9a64523 1163 (*(vm_offset_t *)(h) & PVH_HIGH_FLAGS)
5ba3f43e 1164
0a7de745
A
1165#define pvh_set_flags(h, f) \
1166 do { \
cb323159
A
1167 os_atomic_store((vm_offset_t *)(h), (*(vm_offset_t *)(h) & ~PVH_HIGH_FLAGS) | (f), \
1168 relaxed); \
5ba3f43e 1169 } while (0)
d9a64523 1170
0a7de745
A
1171#define pvh_update_head(h, e, t) \
1172 do { \
1173 assert(*(vm_offset_t *)(h) & PVH_FLAG_LOCK); \
cb323159
A
1174 os_atomic_store((vm_offset_t *)(h), (vm_offset_t)(e) | (t) | PVH_FLAG_LOCK, \
1175 relaxed); \
5ba3f43e 1176 } while (0)
5ba3f43e 1177
0a7de745
A
1178#define pvh_update_head_unlocked(h, e, t) \
1179 do { \
1180 assert(!(*(vm_offset_t *)(h) & PVH_FLAG_LOCK)); \
1181 *(vm_offset_t *)(h) = ((vm_offset_t)(e) | (t)) & ~PVH_FLAG_LOCK; \
5ba3f43e
A
1182 } while (0)
1183
0a7de745
A
1184#define pvh_add(h, e) \
1185 do { \
1186 assert(!pvh_test_type((h), PVH_TYPE_PTEP)); \
1187 pve_next(e) = pvh_list(h); \
1188 pvh_update_head((h), (e), PVH_TYPE_PVEP); \
d9a64523
A
1189 } while (0)
1190
0a7de745
A
1191#define pvh_remove(h, p, e) \
1192 do { \
1193 assert(!PVE_NEXT_IS_ALTACCT(pve_next((e)))); \
1194 if ((p) == (h)) { \
1195 if (PVE_NEXT_PTR(pve_next((e))) == PV_ENTRY_NULL) { \
1196 pvh_update_head((h), PV_ENTRY_NULL, PVH_TYPE_NULL); \
1197 } else { \
1198 pvh_update_head((h), PVE_NEXT_PTR(pve_next((e))), PVH_TYPE_PVEP); \
1199 } \
1200 } else { \
1201 /* \
1202 * PMAP LEDGERS: \
1203 * preserve the "alternate accounting" bit \
1204 * when updating "p" (the previous entry's \
1205 * "pve_next"). \
1206 */ \
1207 boolean_t __is_altacct; \
1208 __is_altacct = PVE_NEXT_IS_ALTACCT(*(p)); \
1209 *(p) = PVE_NEXT_PTR(pve_next((e))); \
1210 if (__is_altacct) { \
1211 PVE_NEXT_SET_ALTACCT((p)); \
1212 } else { \
1213 PVE_NEXT_CLR_ALTACCT((p)); \
1214 } \
1215 } \
5ba3f43e
A
1216 } while (0)
1217
1218
1219/* PPATTR Define Macros */
1220
0a7de745
A
1221#define ppattr_set_bits(h, b) \
1222 do { \
1223 while (!OSCompareAndSwap16(*(pp_attr_t *)(h), *(pp_attr_t *)(h) | (b), (pp_attr_t *)(h))); \
5ba3f43e
A
1224 } while (0)
1225
0a7de745
A
1226#define ppattr_clear_bits(h, b) \
1227 do { \
1228 while (!OSCompareAndSwap16(*(pp_attr_t *)(h), *(pp_attr_t *)(h) & ~(b), (pp_attr_t *)(h))); \
5ba3f43e
A
1229 } while (0)
1230
0a7de745 1231#define ppattr_test_bits(h, b) \
5ba3f43e
A
1232 ((*(pp_attr_t *)(h) & (b)) == (b))
1233
0a7de745
A
1234#define pa_set_bits(x, b) \
1235 do { \
1236 if (pa_valid(x)) \
1237 ppattr_set_bits(&pp_attr_table[pa_index(x)], \
1238 (b)); \
5ba3f43e
A
1239 } while (0)
1240
0a7de745 1241#define pa_test_bits(x, b) \
5ba3f43e 1242 (pa_valid(x) ? ppattr_test_bits(&pp_attr_table[pa_index(x)],\
0a7de745 1243 (b)) : FALSE)
5ba3f43e 1244
0a7de745
A
1245#define pa_clear_bits(x, b) \
1246 do { \
1247 if (pa_valid(x)) \
1248 ppattr_clear_bits(&pp_attr_table[pa_index(x)], \
1249 (b)); \
5ba3f43e
A
1250 } while (0)
1251
0a7de745 1252#define pa_set_modify(x) \
5ba3f43e
A
1253 pa_set_bits(x, PP_ATTR_MODIFIED)
1254
0a7de745 1255#define pa_clear_modify(x) \
5ba3f43e
A
1256 pa_clear_bits(x, PP_ATTR_MODIFIED)
1257
0a7de745 1258#define pa_set_reference(x) \
5ba3f43e
A
1259 pa_set_bits(x, PP_ATTR_REFERENCED)
1260
0a7de745 1261#define pa_clear_reference(x) \
5ba3f43e
A
1262 pa_clear_bits(x, PP_ATTR_REFERENCED)
1263
c6bf4f31
A
1264#if XNU_MONITOR
1265#define pa_set_monitor(x) \
1266 pa_set_bits((x), PP_ATTR_MONITOR)
1267
1268#define pa_clear_monitor(x) \
1269 pa_clear_bits((x), PP_ATTR_MONITOR)
1270
1271#define pa_test_monitor(x) \
1272 pa_test_bits((x), PP_ATTR_MONITOR)
1273
1274#define pa_set_no_monitor(x) \
1275 pa_set_bits((x), PP_ATTR_NO_MONITOR)
1276
1277#define pa_clear_no_monitor(x) \
1278 pa_clear_bits((x), PP_ATTR_NO_MONITOR)
1279
1280#define pa_test_no_monitor(x) \
1281 pa_test_bits((x), PP_ATTR_NO_MONITOR)
1282#endif
5ba3f43e
A
1283
1284#define IS_INTERNAL_PAGE(pai) \
1285 ppattr_test_bits(&pp_attr_table[pai], PP_ATTR_INTERNAL)
1286#define SET_INTERNAL_PAGE(pai) \
1287 ppattr_set_bits(&pp_attr_table[pai], PP_ATTR_INTERNAL)
1288#define CLR_INTERNAL_PAGE(pai) \
1289 ppattr_clear_bits(&pp_attr_table[pai], PP_ATTR_INTERNAL)
1290
1291#define IS_REUSABLE_PAGE(pai) \
1292 ppattr_test_bits(&pp_attr_table[pai], PP_ATTR_REUSABLE)
1293#define SET_REUSABLE_PAGE(pai) \
1294 ppattr_set_bits(&pp_attr_table[pai], PP_ATTR_REUSABLE)
1295#define CLR_REUSABLE_PAGE(pai) \
1296 ppattr_clear_bits(&pp_attr_table[pai], PP_ATTR_REUSABLE)
1297
0a7de745
A
1298#define IS_ALTACCT_PAGE(pai, pve_p) \
1299 (((pve_p) == NULL) \
1300 ? ppattr_test_bits(&pp_attr_table[pai], PP_ATTR_ALTACCT) \
5ba3f43e 1301 : PVE_NEXT_IS_ALTACCT(pve_next((pve_p))))
0a7de745
A
1302#define SET_ALTACCT_PAGE(pai, pve_p) \
1303 if ((pve_p) == NULL) { \
1304 ppattr_set_bits(&pp_attr_table[pai], PP_ATTR_ALTACCT); \
1305 } else { \
1306 PVE_NEXT_SET_ALTACCT(&pve_next((pve_p))); \
5ba3f43e 1307 }
0a7de745
A
1308#define CLR_ALTACCT_PAGE(pai, pve_p) \
1309 if ((pve_p) == NULL) { \
1310 ppattr_clear_bits(&pp_attr_table[pai], PP_ATTR_ALTACCT); \
1311 } else { \
1312 PVE_NEXT_CLR_ALTACCT(&pve_next((pve_p))); \
5ba3f43e
A
1313 }
1314
1315#define IS_REFFAULT_PAGE(pai) \
1316 ppattr_test_bits(&pp_attr_table[pai], PP_ATTR_REFFAULT)
1317#define SET_REFFAULT_PAGE(pai) \
1318 ppattr_set_bits(&pp_attr_table[pai], PP_ATTR_REFFAULT)
1319#define CLR_REFFAULT_PAGE(pai) \
1320 ppattr_clear_bits(&pp_attr_table[pai], PP_ATTR_REFFAULT)
1321
1322#define IS_MODFAULT_PAGE(pai) \
1323 ppattr_test_bits(&pp_attr_table[pai], PP_ATTR_MODFAULT)
1324#define SET_MODFAULT_PAGE(pai) \
1325 ppattr_set_bits(&pp_attr_table[pai], PP_ATTR_MODFAULT)
1326#define CLR_MODFAULT_PAGE(pai) \
1327 ppattr_clear_bits(&pp_attr_table[pai], PP_ATTR_MODFAULT)
1328
0a7de745 1329#define tte_get_ptd(tte) \
d9a64523
A
1330 ((struct pt_desc *)(pvh_list(pai_to_pvh(pa_index((vm_offset_t)((tte) & ~PAGE_MASK))))))
1331
5ba3f43e 1332
0a7de745 1333#if (__ARM_VMSA__ == 7)
5ba3f43e 1334
cb323159 1335#define tte_index(pmap, pt_attr, addr) \
5ba3f43e
A
1336 ttenum((addr))
1337
cb323159
A
1338#define pte_index(pmap, pt_attr, addr) \
1339 ptenum((addr))
1340
5ba3f43e
A
1341#else
1342
cb323159
A
1343#define ttn_index(pmap, pt_attr, addr, pt_level) \
1344 (((addr) & (pt_attr)->pta_level_info[(pt_level)].index_mask) >> (pt_attr)->pta_level_info[(pt_level)].shift)
1345
1346#define tt0_index(pmap, pt_attr, addr) \
1347 ttn_index((pmap), (pt_attr), (addr), PMAP_TT_L0_LEVEL)
5ba3f43e 1348
cb323159
A
1349#define tt1_index(pmap, pt_attr, addr) \
1350 ttn_index((pmap), (pt_attr), (addr), PMAP_TT_L1_LEVEL)
5ba3f43e 1351
cb323159
A
1352#define tt2_index(pmap, pt_attr, addr) \
1353 ttn_index((pmap), (pt_attr), (addr), PMAP_TT_L2_LEVEL)
5ba3f43e 1354
cb323159
A
1355#define tt3_index(pmap, pt_attr, addr) \
1356 ttn_index((pmap), (pt_attr), (addr), PMAP_TT_L3_LEVEL)
5ba3f43e 1357
cb323159
A
1358#define tte_index(pmap, pt_attr, addr) \
1359 tt2_index((pmap), (pt_attr), (addr))
1360
1361#define pte_index(pmap, pt_attr, addr) \
1362 tt3_index((pmap), (pt_attr), (addr))
5ba3f43e 1363
5ba3f43e
A
1364#endif
1365
1366/*
1367 * Lock on pmap system
1368 */
1369
0a7de745
A
1370lck_grp_t pmap_lck_grp;
1371
1372#define PMAP_LOCK_INIT(pmap) { \
1373 simple_lock_init(&(pmap)->lock, 0); \
1374 }
5ba3f43e 1375
0a7de745
A
1376#define PMAP_LOCK(pmap) { \
1377 pmap_simple_lock(&(pmap)->lock); \
5ba3f43e
A
1378}
1379
0a7de745
A
1380#define PMAP_UNLOCK(pmap) { \
1381 pmap_simple_unlock(&(pmap)->lock); \
5ba3f43e
A
1382}
1383
1384#if MACH_ASSERT
0a7de745
A
1385#define PMAP_ASSERT_LOCKED(pmap) { \
1386 simple_lock_assert(&(pmap)->lock, LCK_ASSERT_OWNED); \
5ba3f43e
A
1387}
1388#else
1389#define PMAP_ASSERT_LOCKED(pmap)
1390#endif
1391
d9a64523
A
1392#if defined(__arm64__)
1393#define PVH_LOCK_WORD 1 /* Assumes little-endian */
1394#else
1395#define PVH_LOCK_WORD 0
1396#endif
5ba3f43e 1397
0a7de745
A
1398#define ASSERT_PVH_LOCKED(index) \
1399 do { \
1400 assert((vm_offset_t)(pv_head_table[index]) & PVH_FLAG_LOCK); \
d9a64523 1401 } while (0)
5ba3f43e 1402
0a7de745
A
1403#define LOCK_PVH(index) \
1404 do { \
1405 pmap_lock_bit((uint32_t*)(&pv_head_table[index]) + PVH_LOCK_WORD, PVH_LOCK_BIT - (PVH_LOCK_WORD * 32)); \
d9a64523
A
1406 } while (0)
1407
0a7de745
A
1408#define UNLOCK_PVH(index) \
1409 do { \
1410 ASSERT_PVH_LOCKED(index); \
1411 pmap_unlock_bit((uint32_t*)(&pv_head_table[index]) + PVH_LOCK_WORD, PVH_LOCK_BIT - (PVH_LOCK_WORD * 32)); \
d9a64523 1412 } while (0)
5ba3f43e 1413
cb323159
A
1414#define PMAP_UPDATE_TLBS(pmap, s, e, strong) { \
1415 pmap_get_pt_ops(pmap)->flush_tlb_region_async(s, (unsigned)(e - s), pmap); \
1416 pmap_sync_tlb(strong); \
5ba3f43e
A
1417}
1418
0a7de745 1419#define FLUSH_PTE_RANGE(spte, epte) \
d9a64523 1420 __builtin_arm_dmb(DMB_ISH);
5ba3f43e 1421
0a7de745 1422#define FLUSH_PTE(pte_p) \
d9a64523 1423 __builtin_arm_dmb(DMB_ISH);
5ba3f43e 1424
0a7de745 1425#define FLUSH_PTE_STRONG(pte_p) \
d9a64523
A
1426 __builtin_arm_dsb(DSB_ISH);
1427
0a7de745 1428#define FLUSH_PTE_RANGE_STRONG(spte, epte) \
d9a64523 1429 __builtin_arm_dsb(DSB_ISH);
5ba3f43e 1430
0a7de745
A
1431#define WRITE_PTE_FAST(pte_p, pte_entry) \
1432 __unreachable_ok_push \
1433 if (TEST_PAGE_RATIO_4) { \
cb323159
A
1434 if (((unsigned)(pte_p)) & 0x1f) { \
1435 panic("%s: WRITE_PTE_FAST is unaligned, " \
1436 "pte_p=%p, pte_entry=%p", \
1437 __FUNCTION__, \
1438 pte_p, (void*)pte_entry); \
1439 } \
0a7de745
A
1440 if (((pte_entry) & ~ARM_PTE_COMPRESSED_MASK) == ARM_PTE_EMPTY) { \
1441 *(pte_p) = (pte_entry); \
1442 *((pte_p)+1) = (pte_entry); \
1443 *((pte_p)+2) = (pte_entry); \
1444 *((pte_p)+3) = (pte_entry); \
1445 } else { \
1446 *(pte_p) = (pte_entry); \
1447 *((pte_p)+1) = (pte_entry) | 0x1000; \
1448 *((pte_p)+2) = (pte_entry) | 0x2000; \
1449 *((pte_p)+3) = (pte_entry) | 0x3000; \
1450 } \
1451 } else { \
1452 *(pte_p) = (pte_entry); \
1453 } \
d9a64523
A
1454 __unreachable_ok_pop
1455
0a7de745
A
1456#define WRITE_PTE(pte_p, pte_entry) \
1457 WRITE_PTE_FAST(pte_p, pte_entry); \
d9a64523
A
1458 FLUSH_PTE(pte_p);
1459
0a7de745
A
1460#define WRITE_PTE_STRONG(pte_p, pte_entry) \
1461 WRITE_PTE_FAST(pte_p, pte_entry); \
d9a64523 1462 FLUSH_PTE_STRONG(pte_p);
5ba3f43e
A
1463
1464/*
1465 * Other useful macros.
1466 */
0a7de745 1467#define current_pmap() \
5ba3f43e
A
1468 (vm_map_pmap(current_thread()->map))
1469
c6bf4f31
A
1470#if XNU_MONITOR
1471/*
1472 * PPL-related macros.
1473 */
1474#define ARRAY_ELEM_PTR_IS_VALID(_ptr_, _elem_size_, _array_begin_, _array_end_) \
1475 (((_ptr_) >= (typeof(_ptr_))_array_begin_) && \
1476 ((_ptr_) < (typeof(_ptr_))_array_end_) && \
1477 !((((void *)(_ptr_)) - ((void *)_array_begin_)) % (_elem_size_)))
1478
1479#define PMAP_PTR_IS_VALID(x) ARRAY_ELEM_PTR_IS_VALID(x, sizeof(struct pmap), pmap_array_begin, pmap_array_end)
1480
1481#define USER_PMAP_IS_VALID(x) (PMAP_PTR_IS_VALID(x) && (os_atomic_load(&(x)->ref_count, relaxed) > 0))
1482
1483#define VALIDATE_USER_PMAP(x) \
1484 if (__improbable(!USER_PMAP_IS_VALID(x))) \
1485 panic("%s: invalid pmap %p", __func__, (x));
1486
1487#define VALIDATE_PMAP(x) \
1488 if (__improbable(((x) != kernel_pmap) && !USER_PMAP_IS_VALID(x))) \
1489 panic("%s: invalid pmap %p", __func__, (x));
1490
1491#define VALIDATE_LEDGER_PTR(x) \
1492 if (__improbable(!ARRAY_ELEM_PTR_IS_VALID(x, sizeof(void *), pmap_ledger_ptr_array_begin, pmap_ledger_ptr_array_end))) \
1493 panic("%s: invalid ledger ptr %p", __func__, (x));
1494
1495#define ARRAY_ELEM_INDEX(x, _elem_size_, _array_begin_) ((uint64_t)((((void *)(x)) - (_array_begin_)) / (_elem_size_)))
1496
1497static uint64_t
1498pmap_ledger_validate(void * ledger)
1499{
1500 uint64_t array_index;
1501 pmap_ledger_t ** ledger_ptr_array_ptr = ((pmap_ledger_t*)ledger)->back_ptr;
1502 VALIDATE_LEDGER_PTR(ledger_ptr_array_ptr);
1503 array_index = ARRAY_ELEM_INDEX(ledger_ptr_array_ptr, sizeof(pmap_ledger_t *), pmap_ledger_ptr_array_begin);
1504
1505 if (array_index >= MAX_PMAP_LEDGERS) {
1506 panic("%s: ledger %p array index invalid, index was %#llx", __func__, ledger, array_index);
1507 }
1508
1509 pmap_ledger_t *ledger_ptr = *ledger_ptr_array_ptr;
1510
1511 if (__improbable(ledger_ptr != ledger)) {
1512 panic("%s: ledger pointer mismatch, %p != %p", __func__, ledger, ledger_ptr);
1513 }
1514
1515 return array_index;
1516}
1517
1518#else /* XNU_MONITOR */
5ba3f43e 1519
d9a64523
A
1520#define VALIDATE_USER_PMAP(x)
1521#define VALIDATE_PMAP(x)
1522#define VALIDATE_LEDGER(x)
1523
c6bf4f31 1524#endif
d9a64523
A
1525
1526#if DEVELOPMENT || DEBUG
1527
0a7de745 1528/*
d9a64523
A
1529 * Trace levels are controlled by a bitmask in which each
1530 * level can be enabled/disabled by the (1<<level) position
1531 * in the boot arg
1532 * Level 1: pmap lifecycle (create/destroy/switch)
1533 * Level 2: mapping lifecycle (enter/remove/protect/nest/unnest)
1534 * Level 3: internal state management (tte/attributes/fast-fault)
1535 */
1536
1537SECURITY_READ_ONLY_LATE(unsigned int) pmap_trace_mask = 0;
5ba3f43e 1538
d9a64523
A
1539#define PMAP_TRACE(level, ...) \
1540 if (__improbable((1 << (level)) & pmap_trace_mask)) { \
0a7de745 1541 KDBG_RELEASE(__VA_ARGS__); \
5ba3f43e
A
1542 }
1543#else
d9a64523
A
1544
1545#define PMAP_TRACE(level, ...)
1546
5ba3f43e
A
1547#endif
1548
5ba3f43e
A
1549
1550/*
1551 * Internal function prototypes (forward declarations).
1552 */
1553
1554static void pv_init(
0a7de745 1555 void);
5ba3f43e
A
1556
1557static boolean_t pv_alloc(
0a7de745
A
1558 pmap_t pmap,
1559 unsigned int pai,
1560 pv_entry_t **pvepp);
5ba3f43e
A
1561
1562static void pv_free(
0a7de745 1563 pv_entry_t *pvep);
5ba3f43e
A
1564
1565static void pv_list_free(
0a7de745
A
1566 pv_entry_t *pvehp,
1567 pv_entry_t *pvetp,
1568 unsigned int cnt);
5ba3f43e
A
1569
1570static void ptd_bootstrap(
0a7de745 1571 pt_desc_t *ptdp, unsigned int ptd_cnt);
5ba3f43e 1572
0a7de745 1573static inline pt_desc_t *ptd_alloc_unlinked(bool reclaim);
5ba3f43e 1574
0a7de745 1575static pt_desc_t *ptd_alloc(pmap_t pmap, bool reclaim);
d9a64523
A
1576
1577static void ptd_deallocate(pt_desc_t *ptdp);
5ba3f43e
A
1578
1579static void ptd_init(
0a7de745 1580 pt_desc_t *ptdp, pmap_t pmap, vm_map_address_t va, unsigned int ttlevel, pt_entry_t * pte_p);
5ba3f43e 1581
0a7de745
A
1582static void pmap_zone_init(
1583 void);
5ba3f43e 1584
0a7de745
A
1585static void pmap_set_reference(
1586 ppnum_t pn);
5ba3f43e 1587
0a7de745
A
1588ppnum_t pmap_vtophys(
1589 pmap_t pmap, addr64_t va);
5ba3f43e
A
1590
1591void pmap_switch_user_ttb(
0a7de745 1592 pmap_t pmap);
5ba3f43e 1593
5ba3f43e 1594static kern_return_t pmap_expand(
0a7de745 1595 pmap_t, vm_map_address_t, unsigned int options, unsigned int level);
5ba3f43e
A
1596
1597static int pmap_remove_range(
0a7de745 1598 pmap_t, vm_map_address_t, pt_entry_t *, pt_entry_t *, uint32_t *);
5ba3f43e
A
1599
1600static int pmap_remove_range_options(
cb323159 1601 pmap_t, vm_map_address_t, pt_entry_t *, pt_entry_t *, uint32_t *, bool *, int);
5ba3f43e
A
1602
1603static tt_entry_t *pmap_tt1_allocate(
0a7de745 1604 pmap_t, vm_size_t, unsigned int);
5ba3f43e 1605
0a7de745 1606#define PMAP_TT_ALLOCATE_NOWAIT 0x1
5ba3f43e
A
1607
1608static void pmap_tt1_deallocate(
0a7de745 1609 pmap_t, tt_entry_t *, vm_size_t, unsigned int);
5ba3f43e 1610
0a7de745 1611#define PMAP_TT_DEALLOCATE_NOBLOCK 0x1
5ba3f43e
A
1612
1613static kern_return_t pmap_tt_allocate(
0a7de745 1614 pmap_t, tt_entry_t **, unsigned int, unsigned int);
5ba3f43e 1615
0a7de745 1616#define PMAP_TT_ALLOCATE_NOWAIT 0x1
5ba3f43e
A
1617
1618static void pmap_tte_deallocate(
0a7de745 1619 pmap_t, tt_entry_t *, unsigned int);
5ba3f43e 1620
5ba3f43e
A
1621#ifdef __ARM64_PMAP_SUBPAGE_L1__
1622#if (__ARM_VMSA__ <= 7)
1623#error This is not supported for old-style page tables
94ff46dc 1624#endif /* (__ARM_VMSA__ <= 7) */
5ba3f43e 1625#define PMAP_ROOT_ALLOC_SIZE (((ARM_TT_L1_INDEX_MASK >> ARM_TT_L1_SHIFT) + 1) * sizeof(tt_entry_t))
94ff46dc
A
1626#else /* !defined(__ARM64_PMAP_SUBPAGE_L1__) */
1627#if (__ARM_VMSA__ <= 7)
1628#define PMAP_ROOT_ALLOC_SIZE (ARM_PGBYTES * 2)
1629#else /* (__ARM_VMSA__ > 7) */
5ba3f43e 1630#define PMAP_ROOT_ALLOC_SIZE (ARM_PGBYTES)
94ff46dc
A
1631#endif /* (__ARM_VMSA__ > 7) */
1632#endif /* !defined(__ARM64_PMAP_SUBPAGE_L1__) */
5ba3f43e
A
1633
1634const unsigned int arm_hardware_page_size = ARM_PGBYTES;
1635const unsigned int arm_pt_desc_size = sizeof(pt_desc_t);
1636const unsigned int arm_pt_root_size = PMAP_ROOT_ALLOC_SIZE;
1637
0a7de745 1638#define PMAP_TT_DEALLOCATE_NOBLOCK 0x1
5ba3f43e 1639
0a7de745 1640#if (__ARM_VMSA__ > 7)
5ba3f43e
A
1641
1642static inline tt_entry_t *pmap_tt1e(
0a7de745 1643 pmap_t, vm_map_address_t);
5ba3f43e
A
1644
1645static inline tt_entry_t *pmap_tt2e(
0a7de745 1646 pmap_t, vm_map_address_t);
5ba3f43e
A
1647
1648static inline pt_entry_t *pmap_tt3e(
0a7de745 1649 pmap_t, vm_map_address_t);
5ba3f43e 1650
cb323159
A
1651static inline pt_entry_t *pmap_ttne(
1652 pmap_t, unsigned int, vm_map_address_t);
1653
5c9f4661 1654static void pmap_unmap_sharedpage(
0a7de745 1655 pmap_t pmap);
5ba3f43e 1656
5ba3f43e 1657static boolean_t
0a7de745 1658pmap_is_64bit(pmap_t);
5ba3f43e
A
1659
1660
1661#endif
1662static inline tt_entry_t *pmap_tte(
0a7de745 1663 pmap_t, vm_map_address_t);
5ba3f43e
A
1664
1665static inline pt_entry_t *pmap_pte(
0a7de745 1666 pmap_t, vm_map_address_t);
5ba3f43e
A
1667
1668static void pmap_update_cache_attributes_locked(
0a7de745 1669 ppnum_t, unsigned);
5ba3f43e
A
1670
1671boolean_t arm_clear_fast_fault(
0a7de745
A
1672 ppnum_t ppnum,
1673 vm_prot_t fault_type);
5ba3f43e 1674
0a7de745
A
1675static pmap_paddr_t pmap_pages_reclaim(
1676 void);
5ba3f43e
A
1677
1678static kern_return_t pmap_pages_alloc(
0a7de745
A
1679 pmap_paddr_t *pa,
1680 unsigned size,
1681 unsigned option);
5ba3f43e 1682
0a7de745
A
1683#define PMAP_PAGES_ALLOCATE_NOWAIT 0x1
1684#define PMAP_PAGES_RECLAIM_NOWAIT 0x2
5ba3f43e
A
1685
1686static void pmap_pages_free(
0a7de745
A
1687 pmap_paddr_t pa,
1688 unsigned size);
5ba3f43e 1689
d9a64523
A
1690static void pmap_pin_kernel_pages(vm_offset_t kva, size_t nbytes);
1691
1692static void pmap_unpin_kernel_pages(vm_offset_t kva, size_t nbytes);
1693
d9a64523
A
1694static void pmap_trim_self(pmap_t pmap);
1695static void pmap_trim_subord(pmap_t subord);
5ba3f43e 1696
c6bf4f31
A
1697#if __APRR_SUPPORTED__
1698static uint64_t pte_to_xprr_perm(pt_entry_t pte);
1699static pt_entry_t xprr_perm_to_pte(uint64_t perm);
1700#endif /* __APRR_SUPPORTED__*/
1701
1702#if XNU_MONITOR
1703static pmap_paddr_t pmap_alloc_page_for_kern(void);
1704static void pmap_alloc_page_for_ppl(void);
1705
1706
1707/*
1708 * This macro generates prototypes for the *_internal functions, which
1709 * represent the PPL interface. When the PPL is enabled, this will also
1710 * generate prototypes for the PPL entrypoints (*_ppl), as well as generating
1711 * the entrypoints.
1712 */
1713#define GEN_ASM_NAME(__function_name) _##__function_name##_ppl
1714
1715#define PMAP_SUPPORT_PROTOTYPES_WITH_ASM_INTERNAL(__return_type, __function_name, __function_args, __function_index, __assembly_function_name) \
1716 static __return_type __function_name##_internal __function_args; \
1717 extern __return_type __function_name##_ppl __function_args; \
1718 __asm__ (".text \n" \
1719 ".align 2 \n" \
1720 ".globl " #__assembly_function_name "\n" \
1721 #__assembly_function_name ":\n" \
1722 "mov x15, " #__function_index "\n" \
1723 "b _aprr_ppl_enter\n")
1724
1725#define PMAP_SUPPORT_PROTOTYPES_WITH_ASM(__return_type, __function_name, __function_args, __function_index, __assembly_function_name) \
1726 PMAP_SUPPORT_PROTOTYPES_WITH_ASM_INTERNAL(__return_type, __function_name, __function_args, __function_index, __assembly_function_name)
cb323159 1727
c6bf4f31
A
1728#define PMAP_SUPPORT_PROTOTYPES(__return_type, __function_name, __function_args, __function_index) \
1729 PMAP_SUPPORT_PROTOTYPES_WITH_ASM(__return_type, __function_name, __function_args, __function_index, GEN_ASM_NAME(__function_name))
1730#else /* XNU_MONITOR */
5ba3f43e 1731#define PMAP_SUPPORT_PROTOTYPES(__return_type, __function_name, __function_args, __function_index) \
cb323159 1732 static __return_type __function_name##_internal __function_args
c6bf4f31 1733#endif /* XNU_MONITOR */
5ba3f43e
A
1734
1735PMAP_SUPPORT_PROTOTYPES(
0a7de745
A
1736 kern_return_t,
1737 arm_fast_fault, (pmap_t pmap,
1738 vm_map_address_t va,
1739 vm_prot_t fault_type,
cb323159
A
1740 bool was_af_fault,
1741 bool from_user), ARM_FAST_FAULT_INDEX);
5ba3f43e
A
1742
1743
1744PMAP_SUPPORT_PROTOTYPES(
0a7de745
A
1745 boolean_t,
1746 arm_force_fast_fault, (ppnum_t ppnum,
1747 vm_prot_t allow_mode,
1748 int options), ARM_FORCE_FAST_FAULT_INDEX);
5ba3f43e
A
1749
1750PMAP_SUPPORT_PROTOTYPES(
0a7de745
A
1751 kern_return_t,
1752 mapping_free_prime, (void), MAPPING_FREE_PRIME_INDEX);
5ba3f43e
A
1753
1754PMAP_SUPPORT_PROTOTYPES(
0a7de745 1755 kern_return_t,
4ba76501 1756 mapping_replenish, (uint32_t kern_target_count, uint32_t user_target_count), MAPPING_REPLENISH_INDEX);
5ba3f43e
A
1757
1758PMAP_SUPPORT_PROTOTYPES(
0a7de745
A
1759 boolean_t,
1760 pmap_batch_set_cache_attributes, (ppnum_t pn,
1761 unsigned int cacheattr,
1762 unsigned int page_cnt,
1763 unsigned int page_index,
1764 boolean_t doit,
1765 unsigned int *res), PMAP_BATCH_SET_CACHE_ATTRIBUTES_INDEX);
5ba3f43e
A
1766
1767PMAP_SUPPORT_PROTOTYPES(
0a7de745
A
1768 void,
1769 pmap_change_wiring, (pmap_t pmap,
1770 vm_map_address_t v,
1771 boolean_t wired), PMAP_CHANGE_WIRING_INDEX);
5ba3f43e
A
1772
1773PMAP_SUPPORT_PROTOTYPES(
0a7de745 1774 pmap_t,
cb323159 1775 pmap_create_options, (ledger_t ledger,
0a7de745 1776 vm_map_size_t size,
cb323159 1777 unsigned int flags), PMAP_CREATE_INDEX);
5ba3f43e
A
1778
1779PMAP_SUPPORT_PROTOTYPES(
0a7de745
A
1780 void,
1781 pmap_destroy, (pmap_t pmap), PMAP_DESTROY_INDEX);
5ba3f43e 1782
5ba3f43e 1783PMAP_SUPPORT_PROTOTYPES(
0a7de745
A
1784 kern_return_t,
1785 pmap_enter_options, (pmap_t pmap,
1786 vm_map_address_t v,
1787 ppnum_t pn,
1788 vm_prot_t prot,
1789 vm_prot_t fault_type,
1790 unsigned int flags,
1791 boolean_t wired,
1792 unsigned int options), PMAP_ENTER_OPTIONS_INDEX);
5ba3f43e
A
1793
1794PMAP_SUPPORT_PROTOTYPES(
0a7de745
A
1795 vm_offset_t,
1796 pmap_extract, (pmap_t pmap,
1797 vm_map_address_t va), PMAP_EXTRACT_INDEX);
5ba3f43e
A
1798
1799PMAP_SUPPORT_PROTOTYPES(
0a7de745
A
1800 ppnum_t,
1801 pmap_find_phys, (pmap_t pmap,
1802 addr64_t va), PMAP_FIND_PHYS_INDEX);
5ba3f43e
A
1803
1804#if (__ARM_VMSA__ > 7)
1805PMAP_SUPPORT_PROTOTYPES(
0a7de745
A
1806 kern_return_t,
1807 pmap_insert_sharedpage, (pmap_t pmap), PMAP_INSERT_SHAREDPAGE_INDEX);
5ba3f43e
A
1808#endif
1809
1810
1811PMAP_SUPPORT_PROTOTYPES(
0a7de745
A
1812 boolean_t,
1813 pmap_is_empty, (pmap_t pmap,
1814 vm_map_offset_t va_start,
1815 vm_map_offset_t va_end), PMAP_IS_EMPTY_INDEX);
5ba3f43e
A
1816
1817
1818PMAP_SUPPORT_PROTOTYPES(
0a7de745
A
1819 unsigned int,
1820 pmap_map_cpu_windows_copy, (ppnum_t pn,
1821 vm_prot_t prot,
1822 unsigned int wimg_bits), PMAP_MAP_CPU_WINDOWS_COPY_INDEX);
1823
1824PMAP_SUPPORT_PROTOTYPES(
1825 kern_return_t,
1826 pmap_nest, (pmap_t grand,
1827 pmap_t subord,
1828 addr64_t vstart,
1829 addr64_t nstart,
1830 uint64_t size), PMAP_NEST_INDEX);
5ba3f43e
A
1831
1832PMAP_SUPPORT_PROTOTYPES(
0a7de745
A
1833 void,
1834 pmap_page_protect_options, (ppnum_t ppnum,
1835 vm_prot_t prot,
1836 unsigned int options), PMAP_PAGE_PROTECT_OPTIONS_INDEX);
5ba3f43e
A
1837
1838PMAP_SUPPORT_PROTOTYPES(
0a7de745
A
1839 void,
1840 pmap_protect_options, (pmap_t pmap,
1841 vm_map_address_t start,
1842 vm_map_address_t end,
1843 vm_prot_t prot,
1844 unsigned int options,
1845 void *args), PMAP_PROTECT_OPTIONS_INDEX);
5ba3f43e
A
1846
1847PMAP_SUPPORT_PROTOTYPES(
0a7de745
A
1848 kern_return_t,
1849 pmap_query_page_info, (pmap_t pmap,
1850 vm_map_offset_t va,
1851 int *disp_p), PMAP_QUERY_PAGE_INFO_INDEX);
5ba3f43e
A
1852
1853PMAP_SUPPORT_PROTOTYPES(
0a7de745
A
1854 mach_vm_size_t,
1855 pmap_query_resident, (pmap_t pmap,
1856 vm_map_address_t start,
1857 vm_map_address_t end,
1858 mach_vm_size_t * compressed_bytes_p), PMAP_QUERY_RESIDENT_INDEX);
5ba3f43e
A
1859
1860PMAP_SUPPORT_PROTOTYPES(
0a7de745
A
1861 void,
1862 pmap_reference, (pmap_t pmap), PMAP_REFERENCE_INDEX);
5ba3f43e
A
1863
1864PMAP_SUPPORT_PROTOTYPES(
0a7de745
A
1865 int,
1866 pmap_remove_options, (pmap_t pmap,
1867 vm_map_address_t start,
1868 vm_map_address_t end,
1869 int options), PMAP_REMOVE_OPTIONS_INDEX);
5ba3f43e
A
1870
1871PMAP_SUPPORT_PROTOTYPES(
0a7de745
A
1872 kern_return_t,
1873 pmap_return, (boolean_t do_panic,
1874 boolean_t do_recurse), PMAP_RETURN_INDEX);
5ba3f43e
A
1875
1876PMAP_SUPPORT_PROTOTYPES(
0a7de745
A
1877 void,
1878 pmap_set_cache_attributes, (ppnum_t pn,
1879 unsigned int cacheattr), PMAP_SET_CACHE_ATTRIBUTES_INDEX);
5ba3f43e
A
1880
1881PMAP_SUPPORT_PROTOTYPES(
0a7de745
A
1882 void,
1883 pmap_update_compressor_page, (ppnum_t pn,
1884 unsigned int prev_cacheattr, unsigned int new_cacheattr), PMAP_UPDATE_COMPRESSOR_PAGE_INDEX);
5ba3f43e
A
1885
1886PMAP_SUPPORT_PROTOTYPES(
0a7de745
A
1887 void,
1888 pmap_set_nested, (pmap_t pmap), PMAP_SET_NESTED_INDEX);
5ba3f43e 1889
c6bf4f31 1890#if MACH_ASSERT || XNU_MONITOR
5ba3f43e 1891PMAP_SUPPORT_PROTOTYPES(
0a7de745
A
1892 void,
1893 pmap_set_process, (pmap_t pmap,
1894 int pid,
1895 char *procname), PMAP_SET_PROCESS_INDEX);
5ba3f43e
A
1896#endif
1897
5ba3f43e 1898PMAP_SUPPORT_PROTOTYPES(
0a7de745
A
1899 void,
1900 pmap_unmap_cpu_windows_copy, (unsigned int index), PMAP_UNMAP_CPU_WINDOWS_COPY_INDEX);
5ba3f43e
A
1901
1902PMAP_SUPPORT_PROTOTYPES(
0a7de745
A
1903 kern_return_t,
1904 pmap_unnest_options, (pmap_t grand,
1905 addr64_t vaddr,
1906 uint64_t size,
1907 unsigned int option), PMAP_UNNEST_OPTIONS_INDEX);
5ba3f43e 1908
c6bf4f31
A
1909#if XNU_MONITOR
1910PMAP_SUPPORT_PROTOTYPES(
1911 void,
1912 pmap_cpu_data_init, (unsigned int cpu_number), PMAP_CPU_DATA_INIT_INDEX);
1913#endif
5ba3f43e
A
1914
1915PMAP_SUPPORT_PROTOTYPES(
0a7de745
A
1916 void,
1917 phys_attribute_set, (ppnum_t pn,
1918 unsigned int bits), PHYS_ATTRIBUTE_SET_INDEX);
5ba3f43e 1919
c6bf4f31
A
1920#if XNU_MONITOR
1921PMAP_SUPPORT_PROTOTYPES(
1922 void,
1923 pmap_mark_page_as_ppl_page, (pmap_paddr_t pa), PMAP_MARK_PAGE_AS_PMAP_PAGE_INDEX);
1924#endif
5ba3f43e
A
1925
1926PMAP_SUPPORT_PROTOTYPES(
0a7de745
A
1927 void,
1928 phys_attribute_clear, (ppnum_t pn,
1929 unsigned int bits,
1930 int options,
1931 void *arg), PHYS_ATTRIBUTE_CLEAR_INDEX);
5ba3f43e
A
1932
1933PMAP_SUPPORT_PROTOTYPES(
0a7de745
A
1934 void,
1935 pmap_switch, (pmap_t pmap), PMAP_SWITCH_INDEX);
5ba3f43e
A
1936
1937PMAP_SUPPORT_PROTOTYPES(
0a7de745
A
1938 void,
1939 pmap_switch_user_ttb, (pmap_t pmap), PMAP_SWITCH_USER_TTB_INDEX);
5ba3f43e 1940
d9a64523 1941PMAP_SUPPORT_PROTOTYPES(
0a7de745
A
1942 void,
1943 pmap_clear_user_ttb, (void), PMAP_CLEAR_USER_TTB_INDEX);
d9a64523 1944
c6bf4f31
A
1945#if XNU_MONITOR
1946PMAP_SUPPORT_PROTOTYPES(
1947 uint64_t,
1948 pmap_release_ppl_pages_to_kernel, (void), PMAP_RELEASE_PAGES_TO_KERNEL_INDEX);
1949#endif
d9a64523
A
1950
1951PMAP_SUPPORT_PROTOTYPES(
0a7de745
A
1952 void,
1953 pmap_set_jit_entitled, (pmap_t pmap), PMAP_SET_JIT_ENTITLED_INDEX);
d9a64523
A
1954
1955PMAP_SUPPORT_PROTOTYPES(
0a7de745
A
1956 void,
1957 pmap_trim, (pmap_t grand,
1958 pmap_t subord,
1959 addr64_t vstart,
1960 addr64_t nstart,
1961 uint64_t size), PMAP_TRIM_INDEX);
d9a64523 1962
c6bf4f31
A
1963#if HAS_APPLE_PAC && XNU_MONITOR
1964PMAP_SUPPORT_PROTOTYPES(
1965 void *,
1966 pmap_sign_user_ptr, (void *value, ptrauth_key key, uint64_t discriminator), PMAP_SIGN_USER_PTR);
1967PMAP_SUPPORT_PROTOTYPES(
1968 void *,
1969 pmap_auth_user_ptr, (void *value, ptrauth_key key, uint64_t discriminator), PMAP_AUTH_USER_PTR);
1970#endif /* HAS_APPLE_PAC && XNU_MONITOR */
d9a64523
A
1971
1972
5ba3f43e
A
1973
1974
c6bf4f31
A
1975#if XNU_MONITOR
1976static void pmap_mark_page_as_ppl_page(pmap_paddr_t pa);
1977#endif
cb323159 1978
0a7de745
A
1979void pmap_footprint_suspend(vm_map_t map,
1980 boolean_t suspend);
5ba3f43e
A
1981PMAP_SUPPORT_PROTOTYPES(
1982 void,
1983 pmap_footprint_suspend, (vm_map_t map,
0a7de745 1984 boolean_t suspend),
5ba3f43e
A
1985 PMAP_FOOTPRINT_SUSPEND_INDEX);
1986
c6bf4f31
A
1987#if XNU_MONITOR
1988PMAP_SUPPORT_PROTOTYPES(
1989 void,
1990 pmap_ledger_alloc_init, (size_t),
1991 PMAP_LEDGER_ALLOC_INIT_INDEX);
1992
1993PMAP_SUPPORT_PROTOTYPES(
1994 ledger_t,
1995 pmap_ledger_alloc, (void),
1996 PMAP_LEDGER_ALLOC_INDEX);
1997
1998PMAP_SUPPORT_PROTOTYPES(
1999 void,
2000 pmap_ledger_free, (ledger_t),
2001 PMAP_LEDGER_FREE_INDEX);
2002#endif
d9a64523 2003
5ba3f43e
A
2004#if CONFIG_PGTRACE
2005boolean_t pgtrace_enabled = 0;
2006
2007typedef struct {
0a7de745
A
2008 queue_chain_t chain;
2009
2010 /*
2011 * pmap - pmap for below addresses
2012 * ova - original va page address
2013 * cva - clone va addresses for pre, target and post pages
2014 * cva_spte - clone saved ptes
2015 * range - trace range in this map
2016 * cloned - has been cloned or not
2017 */
2018 pmap_t pmap;
2019 vm_map_offset_t ova;
2020 vm_map_offset_t cva[3];
2021 pt_entry_t cva_spte[3];
2022 struct {
2023 pmap_paddr_t start;
2024 pmap_paddr_t end;
2025 } range;
2026 bool cloned;
5ba3f43e
A
2027} pmap_pgtrace_map_t;
2028
2029static void pmap_pgtrace_init(void);
2030static bool pmap_pgtrace_enter_clone(pmap_t pmap, vm_map_offset_t va_page, vm_map_offset_t start, vm_map_offset_t end);
2031static void pmap_pgtrace_remove_clone(pmap_t pmap, pmap_paddr_t pa_page, vm_map_offset_t va_page);
2032static void pmap_pgtrace_remove_all_clone(pmap_paddr_t pa);
2033#endif
2034
0a7de745 2035#if (__ARM_VMSA__ > 7)
5ba3f43e
A
2036/*
2037 * The low global vector page is mapped at a fixed alias.
2038 * Since the page size is 16k for H8 and newer we map the globals to a 16k
2039 * aligned address. Readers of the globals (e.g. lldb, panic server) need
2040 * to check both addresses anyway for backward compatibility. So for now
2041 * we leave H6 and H7 where they were.
2042 */
2043#if (ARM_PGSHIFT == 14)
0a7de745 2044#define LOWGLOBAL_ALIAS (LOW_GLOBAL_BASE_ADDRESS + 0x4000)
5ba3f43e 2045#else
0a7de745 2046#define LOWGLOBAL_ALIAS (LOW_GLOBAL_BASE_ADDRESS + 0x2000)
5ba3f43e
A
2047#endif
2048
2049#else
0a7de745 2050#define LOWGLOBAL_ALIAS (0xFFFF1000)
5ba3f43e
A
2051#endif
2052
2053long long alloc_tteroot_count __attribute__((aligned(8))) MARK_AS_PMAP_DATA = 0LL;
2054long long alloc_ttepages_count __attribute__((aligned(8))) MARK_AS_PMAP_DATA = 0LL;
2055long long alloc_ptepages_count __attribute__((aligned(8))) MARK_AS_PMAP_DATA = 0LL;
2056long long alloc_pmap_pages_count __attribute__((aligned(8))) = 0LL;
2057
0a7de745 2058int pt_fake_zone_index = -1; /* index of pmap fake zone */
5ba3f43e 2059
c6bf4f31
A
2060#if XNU_MONITOR
2061/*
2062 * Table of function pointers used for PPL dispatch.
2063 */
2064const void * const ppl_handler_table[PMAP_COUNT] = {
2065 [ARM_FAST_FAULT_INDEX] = arm_fast_fault_internal,
2066 [ARM_FORCE_FAST_FAULT_INDEX] = arm_force_fast_fault_internal,
2067 [MAPPING_FREE_PRIME_INDEX] = mapping_free_prime_internal,
2068 [MAPPING_REPLENISH_INDEX] = mapping_replenish_internal,
2069 [PHYS_ATTRIBUTE_CLEAR_INDEX] = phys_attribute_clear_internal,
2070 [PHYS_ATTRIBUTE_SET_INDEX] = phys_attribute_set_internal,
2071 [PMAP_BATCH_SET_CACHE_ATTRIBUTES_INDEX] = pmap_batch_set_cache_attributes_internal,
2072 [PMAP_CHANGE_WIRING_INDEX] = pmap_change_wiring_internal,
2073 [PMAP_CREATE_INDEX] = pmap_create_options_internal,
2074 [PMAP_DESTROY_INDEX] = pmap_destroy_internal,
2075 [PMAP_ENTER_OPTIONS_INDEX] = pmap_enter_options_internal,
2076 [PMAP_EXTRACT_INDEX] = pmap_extract_internal,
2077 [PMAP_FIND_PHYS_INDEX] = pmap_find_phys_internal,
2078 [PMAP_INSERT_SHAREDPAGE_INDEX] = pmap_insert_sharedpage_internal,
2079 [PMAP_IS_EMPTY_INDEX] = pmap_is_empty_internal,
2080 [PMAP_MAP_CPU_WINDOWS_COPY_INDEX] = pmap_map_cpu_windows_copy_internal,
2081 [PMAP_MARK_PAGE_AS_PMAP_PAGE_INDEX] = pmap_mark_page_as_ppl_page_internal,
2082 [PMAP_NEST_INDEX] = pmap_nest_internal,
2083 [PMAP_PAGE_PROTECT_OPTIONS_INDEX] = pmap_page_protect_options_internal,
2084 [PMAP_PROTECT_OPTIONS_INDEX] = pmap_protect_options_internal,
2085 [PMAP_QUERY_PAGE_INFO_INDEX] = pmap_query_page_info_internal,
2086 [PMAP_QUERY_RESIDENT_INDEX] = pmap_query_resident_internal,
2087 [PMAP_REFERENCE_INDEX] = pmap_reference_internal,
2088 [PMAP_REMOVE_OPTIONS_INDEX] = pmap_remove_options_internal,
2089 [PMAP_RETURN_INDEX] = pmap_return_internal,
2090 [PMAP_SET_CACHE_ATTRIBUTES_INDEX] = pmap_set_cache_attributes_internal,
2091 [PMAP_UPDATE_COMPRESSOR_PAGE_INDEX] = pmap_update_compressor_page_internal,
2092 [PMAP_SET_NESTED_INDEX] = pmap_set_nested_internal,
2093 [PMAP_SET_PROCESS_INDEX] = pmap_set_process_internal,
2094 [PMAP_SWITCH_INDEX] = pmap_switch_internal,
2095 [PMAP_SWITCH_USER_TTB_INDEX] = pmap_switch_user_ttb_internal,
2096 [PMAP_CLEAR_USER_TTB_INDEX] = pmap_clear_user_ttb_internal,
2097 [PMAP_UNMAP_CPU_WINDOWS_COPY_INDEX] = pmap_unmap_cpu_windows_copy_internal,
2098 [PMAP_UNNEST_OPTIONS_INDEX] = pmap_unnest_options_internal,
2099 [PMAP_FOOTPRINT_SUSPEND_INDEX] = pmap_footprint_suspend_internal,
2100 [PMAP_CPU_DATA_INIT_INDEX] = pmap_cpu_data_init_internal,
2101 [PMAP_RELEASE_PAGES_TO_KERNEL_INDEX] = pmap_release_ppl_pages_to_kernel_internal,
2102 [PMAP_SET_JIT_ENTITLED_INDEX] = pmap_set_jit_entitled_internal,
2103 [PMAP_TRIM_INDEX] = pmap_trim_internal,
2104 [PMAP_LEDGER_ALLOC_INIT_INDEX] = pmap_ledger_alloc_init_internal,
2105 [PMAP_LEDGER_ALLOC_INDEX] = pmap_ledger_alloc_internal,
2106 [PMAP_LEDGER_FREE_INDEX] = pmap_ledger_free_internal,
2107#if HAS_APPLE_PAC && XNU_MONITOR
2108 [PMAP_SIGN_USER_PTR] = pmap_sign_user_ptr_internal,
2109 [PMAP_AUTH_USER_PTR] = pmap_auth_user_ptr_internal,
2110#endif /* HAS_APPLE_PAC && XNU_MONITOR */
2111};
2112
2113static uint64_t
2114pmap_get_ppl_cpu_id(void)
2115{
2116 uint64_t mpidr_el1_value = 0;
2117
2118 /* We identify the CPU based on the constant bits of MPIDR_EL1. */
2119 MRS(mpidr_el1_value, "MPIDR_EL1");
2120
2121#ifdef CPU_CLUSTER_OFFSETS
2122 uint64_t cluster_id = (mpidr_el1_value & MPIDR_AFF1_MASK) >> MPIDR_AFF1_SHIFT;
2123 assert(cluster_id < (sizeof(pmap_cluster_offsets) / sizeof(pmap_cluster_offsets[0])));
2124
2125 /* For multi-cluster configurations, AFF0 reflects the core number within the cluster. */
2126 mpidr_el1_value = (mpidr_el1_value & MPIDR_AFF0_MASK) + pmap_cluster_offsets[cluster_id];
2127#else
2128 /*
2129 * AFF2 is not constant (it can change for e-core versus p-core on H9),
2130 * so mask it out.
2131 */
2132 mpidr_el1_value &= MPIDR_AFF0_MASK;
2133#endif
2134
2135 if (mpidr_el1_value > MAX_CPUS) {
2136 panic("%s: mpidr_el1_value=%#llx > MAX_CPUS=%#x",
2137 __FUNCTION__, mpidr_el1_value, MAX_CPUS);
2138 }
2139
2140 return mpidr_el1_value;
2141}
2142
2143
2144#endif
5ba3f43e
A
2145
2146
2147/*
2148 * Allocates and initializes a per-CPU data structure for the pmap.
2149 */
d9a64523 2150MARK_AS_PMAP_TEXT static void
5ba3f43e
A
2151pmap_cpu_data_init_internal(unsigned int cpu_number)
2152{
d9a64523 2153 pmap_cpu_data_t * pmap_cpu_data = pmap_get_cpu_data();
5ba3f43e 2154
c6bf4f31
A
2155#if XNU_MONITOR
2156 /* Verify cacheline-aligned */
2157 assert(((vm_offset_t)pmap_cpu_data & ((1 << L2_CLINE) - 1)) == 0);
2158 if (pmap_cpu_data->cpu_number != PMAP_INVALID_CPU_NUM) {
2159 panic("%s: pmap_cpu_data->cpu_number=%u, "
2160 "cpu_number=%u",
2161 __FUNCTION__, pmap_cpu_data->cpu_number,
2162 cpu_number);
2163 }
2164#endif
5ba3f43e
A
2165 pmap_cpu_data->cpu_number = cpu_number;
2166}
2167
2168void
2169pmap_cpu_data_init(void)
2170{
c6bf4f31
A
2171#if XNU_MONITOR
2172 pmap_cpu_data_init_ppl(cpu_number());
2173#else
5ba3f43e 2174 pmap_cpu_data_init_internal(cpu_number());
c6bf4f31 2175#endif
5ba3f43e
A
2176}
2177
2178static void
2179pmap_cpu_data_array_init(void)
2180{
c6bf4f31
A
2181#if XNU_MONITOR
2182 unsigned int i = 0;
2183 pmap_paddr_t ppl_cpu_save_area_cur = 0;
2184 pt_entry_t template, *pte_p;
2185 vm_offset_t stack_va = (vm_offset_t)pmap_stacks_start + ARM_PGBYTES;
2186 assert((pmap_stacks_start != NULL) && (pmap_stacks_end != NULL));
2187 pmap_stacks_start_pa = avail_start;
2188
2189 for (i = 0; i < MAX_CPUS; i++) {
2190 for (vm_offset_t cur_va = stack_va; cur_va < (stack_va + PPL_STACK_SIZE); cur_va += ARM_PGBYTES) {
2191 assert(cur_va < (vm_offset_t)pmap_stacks_end);
2192 pte_p = pmap_pte(kernel_pmap, cur_va);
2193 assert(*pte_p == ARM_PTE_EMPTY);
2194 template = pa_to_pte(avail_start) | ARM_PTE_AF | ARM_PTE_SH(SH_OUTER_MEMORY) | ARM_PTE_TYPE |
2195 ARM_PTE_ATTRINDX(CACHE_ATTRINDX_DEFAULT) | xprr_perm_to_pte(XPRR_PPL_RW_PERM);
2196#if __ARM_KERNEL_PROTECT__
2197 template |= ARM_PTE_NG;
2198#endif /* __ARM_KERNEL_PROTECT__ */
2199 WRITE_PTE(pte_p, template);
2200 __builtin_arm_isb(ISB_SY);
2201 avail_start += ARM_PGBYTES;
2202 }
2203#if KASAN
2204 kasan_map_shadow(stack_va, PPL_STACK_SIZE, false);
2205#endif
2206 pmap_cpu_data_array[i].cpu_data.cpu_id = i;
2207 pmap_cpu_data_array[i].cpu_data.cpu_number = PMAP_INVALID_CPU_NUM;
2208 pmap_cpu_data_array[i].cpu_data.ppl_state = PPL_STATE_KERNEL;
2209 pmap_cpu_data_array[i].cpu_data.ppl_stack = (void*)(stack_va + PPL_STACK_SIZE);
2210 stack_va += (PPL_STACK_SIZE + ARM_PGBYTES);
2211 }
2212 sync_tlb_flush();
2213 pmap_stacks_end_pa = avail_start;
2214
2215 ppl_cpu_save_area_start = avail_start;
2216 ppl_cpu_save_area_end = ppl_cpu_save_area_start;
2217 ppl_cpu_save_area_cur = ppl_cpu_save_area_start;
2218
2219 for (i = 0; i < MAX_CPUS; i++) {
2220 while ((ppl_cpu_save_area_end - ppl_cpu_save_area_cur) < sizeof(arm_context_t)) {
2221 avail_start += PAGE_SIZE;
2222 ppl_cpu_save_area_end = avail_start;
2223 }
2224
2225 pmap_cpu_data_array[i].cpu_data.save_area = (arm_context_t *)phystokv(ppl_cpu_save_area_cur);
2226 ppl_cpu_save_area_cur += sizeof(arm_context_t);
2227 }
2228#endif
5ba3f43e
A
2229
2230 pmap_cpu_data_init();
2231}
2232
2233pmap_cpu_data_t *
2234pmap_get_cpu_data(void)
2235{
2236 pmap_cpu_data_t * pmap_cpu_data = NULL;
2237
c6bf4f31
A
2238#if XNU_MONITOR
2239 uint64_t cpu_id = 0;
2240
2241 cpu_id = pmap_get_ppl_cpu_id();
2242 pmap_cpu_data = &pmap_cpu_data_array[cpu_id].cpu_data;
2243
2244 if (pmap_cpu_data->cpu_id != cpu_id) {
2245 panic("%s: CPU ID mismatch, cpu_id=0x%#llx, pmap_cpu_data->cpu_id=%#llx",
2246 __FUNCTION__, cpu_id, pmap_cpu_data->cpu_id);
2247 }
2248#else
5ba3f43e 2249 pmap_cpu_data = &getCpuDatap()->cpu_pmap_cpu_data;
c6bf4f31 2250#endif
5ba3f43e
A
2251
2252 return pmap_cpu_data;
2253}
2254
c6bf4f31
A
2255#if XNU_MONITOR
2256/*
2257 * pmap_set_range_xprr_perm takes a range (specified using start and end) that
2258 * falls within the physical aperture. All mappings within this range have
2259 * their protections changed from those specified by the expected_perm to those
2260 * specified by the new_perm.
2261 */
2262static void
2263pmap_set_range_xprr_perm(vm_address_t start,
2264 vm_address_t end,
2265 unsigned int expected_perm,
2266 unsigned int new_perm)
2267{
2268#if (__ARM_VMSA__ == 7)
2269#error This function is not supported on older ARM hardware
2270#else
2271 pmap_t pmap = NULL;
2272
2273 vm_address_t va = 0;
2274 vm_address_t tte_start = 0;
2275 vm_address_t tte_end = 0;
2276
2277 tt_entry_t *tte_p = NULL;
2278 pt_entry_t *pte_p = NULL;
2279 pt_entry_t *cpte_p = NULL;
2280 pt_entry_t *bpte_p = NULL;
2281 pt_entry_t *epte_p = NULL;
2282
2283 tt_entry_t tte = 0;
2284 pt_entry_t cpte = 0;
2285 pt_entry_t template = 0;
2286
2287 pmap = kernel_pmap;
2288
2289 va = start;
2290
2291 /*
2292 * Validate our arguments; any invalid argument will be grounds for a
2293 * panic.
2294 */
2295 if ((start | end) % ARM_PGBYTES) {
2296 panic("%s: start or end not page aligned, "
2297 "start=%p, end=%p, new_perm=%u, expected_perm=%u",
2298 __FUNCTION__,
2299 (void *)start, (void *)end, new_perm, expected_perm);
2300 }
2301
2302 if (start > end) {
2303 panic("%s: start > end, "
2304 "start=%p, end=%p, new_perm=%u, expected_perm=%u",
2305 __FUNCTION__,
2306 (void *)start, (void *)end, new_perm, expected_perm);
2307 }
2308
2309 if (start < gVirtBase) {
2310 panic("%s: start is before physical aperture, "
2311 "start=%p, end=%p, new_perm=%u, expected_perm=%u",
2312 __FUNCTION__,
2313 (void *)start, (void *)end, new_perm, expected_perm);
2314 }
2315
2316 if (end > static_memory_end) {
2317 panic("%s: end is after physical aperture, "
2318 "start=%p, end=%p, new_perm=%u, expected_perm=%u",
2319 __FUNCTION__,
2320 (void *)start, (void *)end, new_perm, expected_perm);
2321 }
2322
2323 if ((new_perm > XPRR_MAX_PERM) || (expected_perm > XPRR_MAX_PERM)) {
2324 panic("%s: invalid XPRR index, "
2325 "start=%p, end=%p, new_perm=%u, expected_perm=%u",
2326 __FUNCTION__,
2327 (void *)start, (void *)end, new_perm, expected_perm);
2328 }
2329
2330 /*
2331 * Walk over the PTEs for the given range, and set the protections on
2332 * those PTEs.
2333 */
2334 while (va < end) {
2335 tte_start = va;
2336 tte_end = ((va + pt_attr_twig_size(native_pt_attr)) & ~pt_attr_twig_offmask(native_pt_attr));
2337
2338 if (tte_end > end) {
2339 tte_end = end;
2340 }
2341
2342 tte_p = pmap_tte(pmap, va);
2343
2344 /*
2345 * The physical aperture should not have holes.
2346 * The physical aperture should be contiguous.
2347 * Do not make eye contact with the physical aperture.
2348 */
2349 if (tte_p == NULL) {
2350 panic("%s: physical aperture tte is NULL, "
2351 "start=%p, end=%p, new_perm=%u, expected_perm=%u",
2352 __FUNCTION__,
2353 (void *)start, (void *)end, new_perm, expected_perm);
2354 }
2355
2356 tte = *tte_p;
2357
2358 if ((tte & ARM_TTE_TYPE_MASK) == ARM_TTE_TYPE_TABLE) {
2359 /*
2360 * Walk over the given L3 page table page and update the
2361 * PTEs.
2362 */
2363 pte_p = (pt_entry_t *)ttetokv(tte);
2364 bpte_p = &pte_p[ptenum(va)];
2365 epte_p = bpte_p + ((tte_end - va) >> pt_attr_leaf_shift(native_pt_attr));
2366
2367 for (cpte_p = bpte_p; cpte_p < epte_p;
2368 cpte_p += PAGE_SIZE / ARM_PGBYTES, va += PAGE_SIZE) {
2369 int pai = (int)pa_index(pte_to_pa(*cpte_p));
2370 LOCK_PVH(pai);
2371 cpte = *cpte_p;
2372
2373 /*
2374 * Every PTE involved should be valid, should
2375 * not have the hint bit set, and should have
2376 * Every valid PTE involved should
2377 * not have the hint bit set and should have
2378 * the expected APRR index.
2379 */
2380 if ((cpte & ARM_PTE_TYPE_MASK) ==
2381 ARM_PTE_TYPE_FAULT) {
2382 panic("%s: physical aperture PTE is invalid, va=%p, "
2383 "start=%p, end=%p, new_perm=%u, expected_perm=%u",
2384 __FUNCTION__,
2385 (void *)va,
2386 (void *)start, (void *)end, new_perm, expected_perm);
2387 UNLOCK_PVH(pai);
2388 continue;
2389 }
2390
2391 if (cpte & ARM_PTE_HINT_MASK) {
2392 panic("%s: physical aperture PTE has hint bit set, va=%p, cpte=0x%llx, "
2393 "start=%p, end=%p, new_perm=%u, expected_perm=%u",
2394 __FUNCTION__,
2395 (void *)va, cpte,
2396 (void *)start, (void *)end, new_perm, expected_perm);
2397 }
2398
2399 if (pte_to_xprr_perm(cpte) != expected_perm) {
2400 panic("%s: perm=%llu does not match expected_perm, cpte=0x%llx, "
2401 "start=%p, end=%p, new_perm=%u, expected_perm=%u",
2402 __FUNCTION__,
2403 pte_to_xprr_perm(cpte), cpte,
2404 (void *)start, (void *)end, new_perm, expected_perm);
2405 }
2406
2407 template = cpte;
2408 template &= ~ARM_PTE_XPRR_MASK;
2409 template |= xprr_perm_to_pte(new_perm);
2410
2411 WRITE_PTE_STRONG(cpte_p, template);
2412 UNLOCK_PVH(pai);
2413 }
2414 } else {
2415 panic("%s: tte=0x%llx is not a table type entry, "
2416 "start=%p, end=%p, new_perm=%u, expected_perm=%u",
2417 __FUNCTION__,
2418 tte,
2419 (void *)start, (void *)end, new_perm, expected_perm);
2420 }
2421
2422 va = tte_end;
2423 }
2424
2425 PMAP_UPDATE_TLBS(pmap, start, end, false);
2426#endif /* (__ARM_VMSA__ == 7) */
2427}
2428
2429/*
2430 * A convenience function for setting protections on a single page.
2431 */
2432static inline void
2433pmap_set_xprr_perm(vm_address_t page_kva,
2434 unsigned int expected_perm,
2435 unsigned int new_perm)
2436{
2437 pmap_set_range_xprr_perm(page_kva, page_kva + PAGE_SIZE, expected_perm, new_perm);
2438}
2439#endif /* XNU_MONITOR */
5ba3f43e 2440
cb323159 2441
5ba3f43e
A
2442/* TODO */
2443pmap_paddr_t
2444pmap_pages_reclaim(
2445 void)
2446{
0a7de745
A
2447 boolean_t found_page;
2448 unsigned i;
2449 pt_desc_t *ptdp;
5ba3f43e 2450
5ba3f43e
A
2451 /*
2452 * pmap_pages_reclaim() is returning a page by freeing an active pt page.
2453 * To be eligible, a pt page is assigned to a user pmap. It doesn't have any wired pte
2454 * entry and it contains at least one valid pte entry.
2455 *
2456 * In a loop, check for a page in the reclaimed pt page list.
2457 * if one is present, unlink that page and return the physical page address.
2458 * Otherwise, scan the pt page list for an eligible pt page to reclaim.
2459 * If found, invoke pmap_remove_range() on its pmap and address range then
2460 * deallocates that pt page. This will end up adding the pt page to the
2461 * reclaimed pt page list.
2462 * If no eligible page were found in the pt page list, panic.
2463 */
2464
d9a64523 2465 pmap_simple_lock(&pmap_pages_lock);
5ba3f43e
A
2466 pmap_pages_request_count++;
2467 pmap_pages_request_acum++;
2468
2469 while (1) {
5ba3f43e 2470 if (pmap_pages_reclaim_list != (page_free_entry_t *)NULL) {
0a7de745 2471 page_free_entry_t *page_entry;
5ba3f43e
A
2472
2473 page_entry = pmap_pages_reclaim_list;
2474 pmap_pages_reclaim_list = pmap_pages_reclaim_list->next;
d9a64523 2475 pmap_simple_unlock(&pmap_pages_lock);
5ba3f43e 2476
0a7de745 2477 return (pmap_paddr_t)ml_static_vtop((vm_offset_t)page_entry);
5ba3f43e
A
2478 }
2479
d9a64523 2480 pmap_simple_unlock(&pmap_pages_lock);
5ba3f43e 2481
d9a64523 2482 pmap_simple_lock(&pt_pages_lock);
5ba3f43e
A
2483 ptdp = (pt_desc_t *)queue_first(&pt_page_list);
2484 found_page = FALSE;
2485
2486 while (!queue_end(&pt_page_list, (queue_entry_t)ptdp)) {
d9a64523
A
2487 if ((ptdp->pmap->nested == FALSE)
2488 && (pmap_simple_lock_try(&ptdp->pmap->lock))) {
d9a64523 2489 assert(ptdp->pmap != kernel_pmap);
5ba3f43e
A
2490 unsigned refcnt_acc = 0;
2491 unsigned wiredcnt_acc = 0;
2492
0a7de745 2493 for (i = 0; i < PT_INDEX_MAX; i++) {
cb323159 2494 if (ptdp->ptd_info[i].refcnt == PT_DESC_REFCOUNT) {
d9a64523 2495 /* Do not attempt to free a page that contains an L2 table */
5ba3f43e
A
2496 refcnt_acc = 0;
2497 break;
2498 }
cb323159
A
2499 refcnt_acc += ptdp->ptd_info[i].refcnt;
2500 wiredcnt_acc += ptdp->ptd_info[i].wiredcnt;
5ba3f43e
A
2501 }
2502 if ((wiredcnt_acc == 0) && (refcnt_acc != 0)) {
2503 found_page = TRUE;
2504 /* Leave ptdp->pmap locked here. We're about to reclaim
2505 * a tt page from it, so we don't want anyone else messing
2506 * with it while we do that. */
2507 break;
2508 }
d9a64523 2509 pmap_simple_unlock(&ptdp->pmap->lock);
5ba3f43e
A
2510 }
2511 ptdp = (pt_desc_t *)queue_next((queue_t)ptdp);
2512 }
2513 if (!found_page) {
cb323159 2514 panic("%s: No eligible page in pt_page_list", __FUNCTION__);
5ba3f43e 2515 } else {
cb323159
A
2516 int remove_count = 0;
2517 bool need_strong_sync = false;
0a7de745 2518 vm_map_address_t va;
cb323159
A
2519 pmap_t pmap;
2520 pt_entry_t *bpte, *epte;
2521 pt_entry_t *pte_p;
2522 tt_entry_t *tte_p;
2523 uint32_t rmv_spte = 0;
5ba3f43e 2524
d9a64523 2525 pmap_simple_unlock(&pt_pages_lock);
5ba3f43e
A
2526 pmap = ptdp->pmap;
2527 PMAP_ASSERT_LOCKED(pmap); // pmap lock should be held from loop above
cb323159
A
2528
2529 __unused const pt_attr_t * const pt_attr = pmap_get_pt_attr(pmap);
2530
0a7de745 2531 for (i = 0; i < PT_INDEX_MAX; i++) {
cb323159 2532 va = ptdp->ptd_info[i].va;
5ba3f43e 2533
d9a64523
A
2534 /* If the VA is bogus, this may represent an unallocated region
2535 * or one which is in transition (already being freed or expanded).
2536 * Don't try to remove mappings here. */
0a7de745 2537 if (va == (vm_offset_t)-1) {
d9a64523 2538 continue;
0a7de745 2539 }
d9a64523 2540
5ba3f43e
A
2541 tte_p = pmap_tte(pmap, va);
2542 if ((tte_p != (tt_entry_t *) NULL)
2543 && ((*tte_p & ARM_TTE_TYPE_MASK) == ARM_TTE_TYPE_TABLE)) {
5ba3f43e 2544 pte_p = (pt_entry_t *) ttetokv(*tte_p);
cb323159 2545 bpte = &pte_p[pte_index(pmap, pt_attr, va)];
eb6b6ca3 2546 epte = bpte + pt_attr_leaf_size(pt_attr) / sizeof(pt_entry_t);
5ba3f43e
A
2547 /*
2548 * Use PMAP_OPTIONS_REMOVE to clear any
2549 * "compressed" markers and update the
2550 * "compressed" counter in pmap->stats.
2551 * This means that we lose accounting for
2552 * any compressed pages in this range
2553 * but the alternative is to not be able
2554 * to account for their future decompression,
2555 * which could cause the counter to drift
2556 * more and more.
2557 */
2558 remove_count += pmap_remove_range_options(
2559 pmap, va, bpte, epte,
cb323159
A
2560 &rmv_spte, &need_strong_sync, PMAP_OPTIONS_REMOVE);
2561 if (ptdp->ptd_info[ARM_PT_DESC_INDEX(pte_p)].refcnt != 0) {
2562 panic("%s: ptdp %p, count %d", __FUNCTION__, ptdp, ptdp->ptd_info[ARM_PT_DESC_INDEX(pte_p)].refcnt);
0a7de745 2563 }
cb323159
A
2564
2565 pmap_tte_deallocate(pmap, tte_p, PMAP_TT_TWIG_LEVEL);
5ba3f43e
A
2566
2567 if (remove_count > 0) {
cb323159
A
2568 pmap_get_pt_ops(pmap)->flush_tlb_region_async(va, (unsigned int)pt_attr_leaf_table_size(pt_attr), pmap);
2569 } else {
2570 pmap_get_pt_ops(pmap)->flush_tlb_tte_async(va, pmap);
5ba3f43e
A
2571 }
2572 }
2573 }
2574 // Undo the lock we grabbed when we found ptdp above
2575 PMAP_UNLOCK(pmap);
cb323159 2576 pmap_sync_tlb(need_strong_sync);
5ba3f43e 2577 }
d9a64523 2578 pmap_simple_lock(&pmap_pages_lock);
5ba3f43e
A
2579 }
2580}
2581
c6bf4f31
A
2582#if XNU_MONITOR
2583/*
2584 * Return a PPL page to the free list.
2585 */
2586static void
2587pmap_give_free_ppl_page(pmap_paddr_t paddr)
2588{
2589 assert((paddr & ARM_PGMASK) == 0);
2590 void ** new_head = (void **)phystokv(paddr);
2591 pmap_simple_lock(&pmap_ppl_free_page_lock);
2592
2593 void * cur_head = pmap_ppl_free_page_list;
2594 *new_head = cur_head;
2595 pmap_ppl_free_page_list = new_head;
2596 pmap_ppl_free_page_count++;
2597
2598 pmap_simple_unlock(&pmap_ppl_free_page_lock);
2599}
2600
2601/*
2602 * Get a PPL page from the free list.
2603 */
2604static pmap_paddr_t
2605pmap_get_free_ppl_page(void)
2606{
2607 pmap_paddr_t result = 0;
2608
2609 pmap_simple_lock(&pmap_ppl_free_page_lock);
2610
2611 if (pmap_ppl_free_page_list != NULL) {
2612 void ** new_head = NULL;
2613 new_head = *((void**)pmap_ppl_free_page_list);
2614 result = kvtophys((vm_offset_t)pmap_ppl_free_page_list);
2615 pmap_ppl_free_page_list = new_head;
2616 pmap_ppl_free_page_count--;
2617 } else {
2618 result = 0L;
2619 }
2620
2621 pmap_simple_unlock(&pmap_ppl_free_page_lock);
2622 assert((result & ARM_PGMASK) == 0);
2623
2624 return result;
2625}
2626
2627/*
2628 * pmap_mark_page_as_ppl_page claims a page on behalf of the PPL by marking it
2629 * as PPL-owned and only allowing the PPL to write to it.
2630 */
2631MARK_AS_PMAP_TEXT static void
2632pmap_mark_page_as_ppl_page_internal(pmap_paddr_t pa)
2633{
2634 vm_offset_t kva = 0;
2635 unsigned int pai = 0;
2636 pp_attr_t attr;
2637
2638 /*
2639 * Mark each page that we allocate as belonging to the monitor, as we
2640 * intend to use it for monitor-y stuff (page tables, table pages, that
2641 * sort of thing).
2642 */
2643 assert(!TEST_PAGE_RATIO_4);
2644
2645 if (!pa_valid(pa)) {
2646 panic("%s: bad address, "
2647 "pa=%p",
2648 __func__,
2649 (void *)pa);
2650 }
2651
2652 pai = (unsigned int)pa_index(pa);
2653 LOCK_PVH(pai);
2654
2655 /* A page that the PPL already owns can't be given to the PPL. */
2656 if (pa_test_monitor(pa)) {
2657 panic("%s: page already belongs to PPL, "
2658 "pa=0x%llx",
2659 __FUNCTION__,
2660 pa);
2661 }
2662 /* The page cannot be mapped outside of the physical aperture. */
2663 if (!pmap_verify_free((ppnum_t)atop(pa))) {
2664 panic("%s: page is not free, "
2665 "pa=0x%llx",
2666 __FUNCTION__,
2667 pa);
2668 }
2669
2670 do {
2671 attr = pp_attr_table[pai];
2672 if (attr & PP_ATTR_NO_MONITOR) {
2673 panic("%s: page excluded from PPL, "
2674 "pa=0x%llx",
2675 __FUNCTION__,
2676 pa);
2677 }
2678 } while (!OSCompareAndSwap16(attr, attr | PP_ATTR_MONITOR, &pp_attr_table[pai]));
2679
2680 UNLOCK_PVH(pai);
2681
2682 kva = phystokv(pa);
2683 pmap_set_xprr_perm(kva, XPRR_KERN_RW_PERM, XPRR_PPL_RW_PERM);
2684 bzero((void *)(kva & ~PAGE_MASK), PAGE_SIZE);
2685
2686 pmap_give_free_ppl_page(pa);
2687}
2688
2689static void
2690pmap_mark_page_as_ppl_page(pmap_paddr_t pa)
2691{
2692 pmap_mark_page_as_ppl_page_ppl(pa);
2693}
2694
2695static void
2696pmap_mark_page_as_kernel_page(pmap_paddr_t pa)
2697{
2698 vm_offset_t kva = 0;
2699 unsigned int pai = 0;
2700
2701 pai = (unsigned int)pa_index(pa);
2702 LOCK_PVH(pai);
2703
2704 if (!pa_test_monitor(pa)) {
2705 panic("%s: page is not a PPL page, "
2706 "pa=%p",
2707 __FUNCTION__,
2708 (void *)pa);
2709 }
2710
2711 pa_clear_monitor(pa);
2712 UNLOCK_PVH(pai);
2713
2714 kva = phystokv(pa);
2715 pmap_set_xprr_perm(kva, XPRR_PPL_RW_PERM, XPRR_KERN_RW_PERM);
2716}
2717
2718MARK_AS_PMAP_TEXT static pmap_paddr_t
2719pmap_release_ppl_pages_to_kernel_internal(void)
2720{
2721 pmap_paddr_t pa = 0;
2722
2723 if (pmap_ppl_free_page_count <= PMAP_MIN_FREE_PPL_PAGES) {
2724 goto done;
2725 }
2726
2727 pa = pmap_get_free_ppl_page();
2728
2729 if (!pa) {
2730 goto done;
2731 }
2732
2733 pmap_mark_page_as_kernel_page(pa);
2734
2735done:
2736 return pa;
2737}
2738
2739static uint64_t
2740pmap_release_ppl_pages_to_kernel(void)
2741{
2742 pmap_paddr_t pa = 0;
2743 vm_page_t m = VM_PAGE_NULL;
2744 vm_page_t local_freeq = VM_PAGE_NULL;
2745 uint64_t pmap_ppl_pages_returned_to_kernel_count = 0;
2746
2747 while (pmap_ppl_free_page_count > PMAP_MIN_FREE_PPL_PAGES) {
2748 pa = pmap_release_ppl_pages_to_kernel_ppl();
2749
2750 if (!pa) {
2751 break;
2752 }
2753
2754 /* If we retrieved a page, add it to the free queue. */
2755 vm_object_lock(pmap_object);
2756 m = vm_page_lookup(pmap_object, (pa - gPhysBase));
2757 assert(m != VM_PAGE_NULL);
2758 assert(VM_PAGE_WIRED(m));
2759
2760 m->vmp_busy = TRUE;
2761 m->vmp_snext = local_freeq;
2762 local_freeq = m;
2763 pmap_ppl_pages_returned_to_kernel_count++;
2764 pmap_ppl_pages_returned_to_kernel_count_total++;
2765
2766 vm_object_unlock(pmap_object);
2767 }
2768
2769 if (local_freeq) {
2770 /* We need to hold the object lock for freeing pages. */
2771 vm_object_lock(pmap_object);
2772 vm_page_free_list(local_freeq, TRUE);
2773 vm_object_unlock(pmap_object);
2774 }
2775
2776 return pmap_ppl_pages_returned_to_kernel_count;
2777}
2778#endif
5ba3f43e
A
2779
2780static kern_return_t
2781pmap_pages_alloc(
0a7de745
A
2782 pmap_paddr_t *pa,
2783 unsigned size,
2784 unsigned option)
5ba3f43e 2785{
c6bf4f31
A
2786#if XNU_MONITOR
2787 if (size != PAGE_SIZE) {
2788 panic("%s: size != PAGE_SIZE, "
2789 "pa=%p, size=%u, option=%u",
2790 __FUNCTION__,
2791 pa, size, option);
2792 }
2793
2794 if (option & PMAP_PAGES_RECLAIM_NOWAIT) {
2795 *pa = pmap_pages_reclaim();
2796 assert(*pa);
2797 return KERN_SUCCESS;
2798 }
2799
2800 assert(option & PMAP_PAGES_ALLOCATE_NOWAIT);
2801
2802 *pa = pmap_get_free_ppl_page();
2803
2804 if (*pa == 0) {
2805 return KERN_RESOURCE_SHORTAGE;
2806 } else {
2807 return KERN_SUCCESS;
2808 }
2809#else
5ba3f43e
A
2810 vm_page_t m = VM_PAGE_NULL, m_prev;
2811
0a7de745 2812 if (option & PMAP_PAGES_RECLAIM_NOWAIT) {
5ba3f43e
A
2813 assert(size == PAGE_SIZE);
2814 *pa = pmap_pages_reclaim();
2815 return KERN_SUCCESS;
2816 }
2817 if (size == PAGE_SIZE) {
2818 while ((m = vm_page_grab()) == VM_PAGE_NULL) {
0a7de745 2819 if (option & PMAP_PAGES_ALLOCATE_NOWAIT) {
5ba3f43e
A
2820 return KERN_RESOURCE_SHORTAGE;
2821 }
2822
2823 VM_PAGE_WAIT();
2824 }
2825 vm_page_lock_queues();
2826 vm_page_wire(m, VM_KERN_MEMORY_PTE, TRUE);
2827 vm_page_unlock_queues();
2828 }
0a7de745 2829 if (size == 2 * PAGE_SIZE) {
5ba3f43e 2830 while (cpm_allocate(size, &m, 0, 1, TRUE, 0) != KERN_SUCCESS) {
0a7de745 2831 if (option & PMAP_PAGES_ALLOCATE_NOWAIT) {
5ba3f43e 2832 return KERN_RESOURCE_SHORTAGE;
0a7de745 2833 }
5ba3f43e
A
2834
2835 VM_PAGE_WAIT();
2836 }
2837 }
2838
2839 *pa = (pmap_paddr_t)ptoa(VM_PAGE_GET_PHYS_PAGE(m));
2840
2841 vm_object_lock(pmap_object);
2842 while (m != VM_PAGE_NULL) {
2843 vm_page_insert_wired(m, pmap_object, (vm_object_offset_t) ((ptoa(VM_PAGE_GET_PHYS_PAGE(m))) - gPhysBase), VM_KERN_MEMORY_PTE);
2844 m_prev = m;
2845 m = NEXT_PAGE(m_prev);
2846 *(NEXT_PAGE_PTR(m_prev)) = VM_PAGE_NULL;
2847 }
c6bf4f31
A
2848 vm_object_unlock(pmap_object);
2849
2850 OSAddAtomic(size >> PAGE_SHIFT, &inuse_pmap_pages_count);
2851 OSAddAtomic64(size >> PAGE_SHIFT, &alloc_pmap_pages_count);
2852
2853 return KERN_SUCCESS;
2854#endif
2855}
2856
2857#if XNU_MONITOR
2858static pmap_paddr_t
2859pmap_alloc_page_for_kern(void)
2860{
2861 pmap_paddr_t paddr = 0;
2862 vm_page_t m, m_prev;
2863
2864 while ((m = vm_page_grab()) == VM_PAGE_NULL) {
2865 VM_PAGE_WAIT();
2866 }
2867
2868 vm_page_lock_queues();
2869 vm_page_wire(m, VM_KERN_MEMORY_PTE, TRUE);
2870 vm_page_unlock_queues();
2871
2872 paddr = (pmap_paddr_t)ptoa(VM_PAGE_GET_PHYS_PAGE(m));
2873
2874 if (paddr == 0) {
2875 panic("%s: paddr is 0",
2876 __FUNCTION__);
2877 }
2878
2879 vm_object_lock(pmap_object);
2880
2881 while (m != VM_PAGE_NULL) {
2882 vm_page_insert_wired(m, pmap_object, (vm_object_offset_t) ((ptoa(VM_PAGE_GET_PHYS_PAGE(m))) - gPhysBase), VM_KERN_MEMORY_PTE);
2883 m_prev = m;
2884 m = NEXT_PAGE(m_prev);
2885 *(NEXT_PAGE_PTR(m_prev)) = VM_PAGE_NULL;
2886 }
2887
2888 vm_object_unlock(pmap_object);
2889
2890 OSAddAtomic(1, &inuse_pmap_pages_count);
2891 OSAddAtomic64(1, &alloc_pmap_pages_count);
2892
2893 return paddr;
2894}
2895
2896static void
2897pmap_alloc_page_for_ppl(void)
2898{
2899 pmap_mark_page_as_ppl_page(pmap_alloc_page_for_kern());
2900}
2901
2902static pmap_t
2903pmap_alloc_pmap(void)
2904{
2905 pmap_t pmap = PMAP_NULL;
2906
2907 pmap_simple_lock(&pmap_free_list_lock);
2908
2909 if (pmap_free_list != PMAP_NULL) {
2910 pmap = pmap_free_list;
2911 pmap_free_list = *((pmap_t *)pmap);
2912
2913 if (!PMAP_PTR_IS_VALID(pmap)) {
2914 panic("%s: allocated pmap is not valid, pmap=%p",
2915 __FUNCTION__, pmap);
2916 }
2917 }
2918
2919 pmap_simple_unlock(&pmap_free_list_lock);
2920
2921 return pmap;
2922}
2923
2924static void
2925pmap_free_pmap(pmap_t pmap)
2926{
2927 if (!PMAP_PTR_IS_VALID(pmap)) {
2928 panic("%s: pmap is not valid, "
2929 "pmap=%p",
2930 __FUNCTION__,
2931 pmap);
2932 }
5ba3f43e 2933
c6bf4f31
A
2934 pmap_simple_lock(&pmap_free_list_lock);
2935 *((pmap_t *)pmap) = pmap_free_list;
2936 pmap_free_list = pmap;
2937 pmap_simple_unlock(&pmap_free_list_lock);
5ba3f43e
A
2938}
2939
c6bf4f31
A
2940static void
2941pmap_bootstrap_pmap_free_list(void)
2942{
2943 pmap_t cur_head = PMAP_NULL;
2944 unsigned long i = 0;
2945
2946 simple_lock_init(&pmap_free_list_lock, 0);
2947
2948 for (i = 0; i < pmap_array_count; i++) {
2949 *((pmap_t *)(&pmap_array[i])) = cur_head;
2950 cur_head = &pmap_array[i];
2951 }
2952
2953 pmap_free_list = cur_head;
2954}
2955#endif
5ba3f43e
A
2956
2957static void
2958pmap_pages_free(
0a7de745
A
2959 pmap_paddr_t pa,
2960 unsigned size)
5ba3f43e 2961{
d9a64523 2962 pmap_simple_lock(&pmap_pages_lock);
5ba3f43e
A
2963
2964 if (pmap_pages_request_count != 0) {
0a7de745 2965 page_free_entry_t *page_entry;
5ba3f43e
A
2966
2967 pmap_pages_request_count--;
2968 page_entry = (page_free_entry_t *)phystokv(pa);
2969 page_entry->next = pmap_pages_reclaim_list;
2970 pmap_pages_reclaim_list = page_entry;
d9a64523 2971 pmap_simple_unlock(&pmap_pages_lock);
5ba3f43e
A
2972
2973 return;
2974 }
2975
d9a64523 2976 pmap_simple_unlock(&pmap_pages_lock);
5ba3f43e 2977
c6bf4f31
A
2978#if XNU_MONITOR
2979 (void)size;
2980
2981 pmap_give_free_ppl_page(pa);
2982#else
5ba3f43e 2983 vm_page_t m;
0a7de745 2984 pmap_paddr_t pa_max;
5ba3f43e 2985
0a7de745 2986 OSAddAtomic(-(size >> PAGE_SHIFT), &inuse_pmap_pages_count);
5ba3f43e
A
2987
2988 for (pa_max = pa + size; pa < pa_max; pa = pa + PAGE_SIZE) {
2989 vm_object_lock(pmap_object);
2990 m = vm_page_lookup(pmap_object, (pa - gPhysBase));
2991 assert(m != VM_PAGE_NULL);
2992 assert(VM_PAGE_WIRED(m));
2993 vm_page_lock_queues();
2994 vm_page_free(m);
2995 vm_page_unlock_queues();
2996 vm_object_unlock(pmap_object);
2997 }
c6bf4f31 2998#endif
5ba3f43e
A
2999}
3000
3001static inline void
3002PMAP_ZINFO_PALLOC(
3003 pmap_t pmap, int bytes)
3004{
3005 pmap_ledger_credit(pmap, task_ledgers.tkm_private, bytes);
3006}
3007
3008static inline void
3009PMAP_ZINFO_PFREE(
3010 pmap_t pmap,
3011 int bytes)
3012{
3013 pmap_ledger_debit(pmap, task_ledgers.tkm_private, bytes);
3014}
3015
3016static inline void
3017pmap_tt_ledger_credit(
0a7de745
A
3018 pmap_t pmap,
3019 vm_size_t size)
5ba3f43e
A
3020{
3021 if (pmap != kernel_pmap) {
3022 pmap_ledger_credit(pmap, task_ledgers.phys_footprint, size);
3023 pmap_ledger_credit(pmap, task_ledgers.page_table, size);
3024 }
3025}
3026
3027static inline void
3028pmap_tt_ledger_debit(
0a7de745
A
3029 pmap_t pmap,
3030 vm_size_t size)
5ba3f43e
A
3031{
3032 if (pmap != kernel_pmap) {
3033 pmap_ledger_debit(pmap, task_ledgers.phys_footprint, size);
3034 pmap_ledger_debit(pmap, task_ledgers.page_table, size);
3035 }
3036}
3037
cb323159
A
3038static bool
3039alloc_asid(pmap_t pmap)
5ba3f43e 3040{
cb323159
A
3041 int vasid;
3042 uint16_t hw_asid;
5ba3f43e 3043
cb323159
A
3044 pmap_simple_lock(&asid_lock);
3045 vasid = bitmap_first(&asid_bitmap[0], MAX_ASID);
3046 if (vasid < 0) {
3047 pmap_simple_unlock(&asid_lock);
3048 return false;
5ba3f43e 3049 }
cb323159
A
3050 assert(vasid < MAX_ASID);
3051 bitmap_clear(&asid_bitmap[0], (unsigned int)vasid);
3052 pmap_simple_unlock(&asid_lock);
3053 // bitmap_first() returns highest-order bits first, but a 0-based scheme works
3054 // slightly better with the collision detection scheme used by pmap_switch_internal().
3055 vasid = MAX_ASID - 1 - vasid;
3056 hw_asid = vasid % MAX_HW_ASID;
3057 pmap->sw_asid = vasid / MAX_HW_ASID;
3058 hw_asid += 1; // Account for ASID 0, which is reserved for the kernel
3059#if __ARM_KERNEL_PROTECT__
3060 hw_asid <<= 1; // We're really handing out 2 hardware ASIDs, one for EL0 and one for EL1 access
3061#endif
3062 pmap->hw_asid = hw_asid;
3063 return true;
5ba3f43e
A
3064}
3065
3066static void
cb323159 3067free_asid(pmap_t pmap)
5ba3f43e 3068{
cb323159
A
3069 unsigned int vasid;
3070 uint16_t hw_asid = pmap->hw_asid;
3071 assert(hw_asid != 0); // Should not try to free kernel ASID
5c9f4661
A
3072
3073#if __ARM_KERNEL_PROTECT__
cb323159
A
3074 hw_asid >>= 1;
3075#endif
3076 hw_asid -= 1;
5c9f4661 3077
cb323159
A
3078 vasid = ((unsigned int)pmap->sw_asid * MAX_HW_ASID) + hw_asid;
3079 vasid = MAX_ASID - 1 - vasid;
3080
3081 pmap_simple_lock(&asid_lock);
3082 assert(!bitmap_test(&asid_bitmap[0], vasid));
3083 bitmap_set(&asid_bitmap[0], vasid);
3084 pmap_simple_unlock(&asid_lock);
5ba3f43e
A
3085}
3086
cb323159 3087
d9a64523
A
3088#ifndef PMAP_PV_LOAD_FACTOR
3089#define PMAP_PV_LOAD_FACTOR 1
3090#endif
3091
3092#define PV_LOW_WATER_MARK_DEFAULT (0x200 * PMAP_PV_LOAD_FACTOR)
3093#define PV_KERN_LOW_WATER_MARK_DEFAULT (0x200 * PMAP_PV_LOAD_FACTOR)
3094#define PV_ALLOC_CHUNK_INITIAL (0x200 * PMAP_PV_LOAD_FACTOR)
3095#define PV_KERN_ALLOC_CHUNK_INITIAL (0x200 * PMAP_PV_LOAD_FACTOR)
5ba3f43e
A
3096#define PV_ALLOC_INITIAL_TARGET (PV_ALLOC_CHUNK_INITIAL * 5)
3097#define PV_KERN_ALLOC_INITIAL_TARGET (PV_KERN_ALLOC_CHUNK_INITIAL)
3098
5ba3f43e
A
3099uint32_t pv_free_count MARK_AS_PMAP_DATA = 0;
3100uint32_t pv_page_count MARK_AS_PMAP_DATA = 0;
3101uint32_t pv_kern_free_count MARK_AS_PMAP_DATA = 0;
3102
3103uint32_t pv_low_water_mark MARK_AS_PMAP_DATA;
3104uint32_t pv_kern_low_water_mark MARK_AS_PMAP_DATA;
3105uint32_t pv_alloc_chunk MARK_AS_PMAP_DATA;
3106uint32_t pv_kern_alloc_chunk MARK_AS_PMAP_DATA;
3107
3108thread_t mapping_replenish_thread;
0a7de745 3109event_t mapping_replenish_event;
5ba3f43e
A
3110volatile uint32_t mappingrecurse = 0;
3111
5ba3f43e 3112unsigned pmap_mapping_thread_wakeups;
4ba76501 3113unsigned pmap_reserve_replenish_stat MARK_AS_PMAP_DATA;
5ba3f43e
A
3114unsigned pmap_kern_reserve_alloc_stat MARK_AS_PMAP_DATA;
3115
3116
3117static void
3118pv_init(
3119 void)
3120{
3121 simple_lock_init(&pv_free_list_lock, 0);
3122 simple_lock_init(&pv_kern_free_list_lock, 0);
3123 pv_free_list = PV_ENTRY_NULL;
3124 pv_free_count = 0x0U;
3125 pv_kern_free_list = PV_ENTRY_NULL;
3126 pv_kern_free_count = 0x0U;
3127}
3128
0a7de745
A
3129static inline void PV_ALLOC(pv_entry_t **pv_ep);
3130static inline void PV_KERN_ALLOC(pv_entry_t **pv_e);
4ba76501 3131static inline void PV_FREE_LIST(pv_entry_t *pv_eh, pv_entry_t *pv_et, int pv_cnt, uint32_t kern_target);
5ba3f43e
A
3132
3133static boolean_t
3134pv_alloc(
3135 pmap_t pmap,
3136 unsigned int pai,
3137 pv_entry_t **pvepp)
3138{
0a7de745 3139 if (pmap != NULL) {
d9a64523 3140 PMAP_ASSERT_LOCKED(pmap);
0a7de745 3141 }
5ba3f43e
A
3142 ASSERT_PVH_LOCKED(pai);
3143 PV_ALLOC(pvepp);
3144 if (PV_ENTRY_NULL == *pvepp) {
d9a64523 3145 if ((pmap == NULL) || (kernel_pmap == pmap)) {
5ba3f43e
A
3146 PV_KERN_ALLOC(pvepp);
3147
3148 if (PV_ENTRY_NULL == *pvepp) {
0a7de745
A
3149 pv_entry_t *pv_e;
3150 pv_entry_t *pv_eh;
3151 pv_entry_t *pv_et;
3152 int pv_cnt;
3153 unsigned j;
5ba3f43e 3154 pmap_paddr_t pa;
0a7de745 3155 kern_return_t ret;
5ba3f43e
A
3156
3157 UNLOCK_PVH(pai);
0a7de745 3158 if (pmap != NULL) {
d9a64523 3159 PMAP_UNLOCK(pmap);
0a7de745 3160 }
5ba3f43e
A
3161
3162 ret = pmap_pages_alloc(&pa, PAGE_SIZE, PMAP_PAGES_ALLOCATE_NOWAIT);
3163
3164 if (ret == KERN_RESOURCE_SHORTAGE) {
3165 ret = pmap_pages_alloc(&pa, PAGE_SIZE, PMAP_PAGES_RECLAIM_NOWAIT);
3166 }
3167
3168 if (ret != KERN_SUCCESS) {
3169 panic("%s: failed to alloc page for kernel, ret=%d, "
0a7de745
A
3170 "pmap=%p, pai=%u, pvepp=%p",
3171 __FUNCTION__, ret,
3172 pmap, pai, pvepp);
5ba3f43e
A
3173 }
3174
3175 pv_page_count++;
3176
3177 pv_e = (pv_entry_t *)phystokv(pa);
3178 pv_cnt = 0;
3179 pv_eh = pv_et = PV_ENTRY_NULL;
3180 *pvepp = pv_e;
3181 pv_e++;
3182
0a7de745 3183 for (j = 1; j < (PAGE_SIZE / sizeof(pv_entry_t)); j++) {
5ba3f43e
A
3184 pv_e->pve_next = pv_eh;
3185 pv_eh = pv_e;
3186
0a7de745 3187 if (pv_et == PV_ENTRY_NULL) {
5ba3f43e 3188 pv_et = pv_e;
0a7de745 3189 }
5ba3f43e
A
3190 pv_cnt++;
3191 pv_e++;
3192 }
4ba76501 3193 PV_FREE_LIST(pv_eh, pv_et, pv_cnt, pv_kern_low_water_mark);
0a7de745 3194 if (pmap != NULL) {
d9a64523 3195 PMAP_LOCK(pmap);
0a7de745 3196 }
5ba3f43e
A
3197 LOCK_PVH(pai);
3198 return FALSE;
3199 }
3200 } else {
3201 UNLOCK_PVH(pai);
3202 PMAP_UNLOCK(pmap);
5ba3f43e 3203
4ba76501
A
3204 pv_entry_t *pv_e;
3205 pv_entry_t *pv_eh;
3206 pv_entry_t *pv_et;
3207 int pv_cnt;
3208 unsigned j;
3209 pmap_paddr_t pa;
3210 kern_return_t ret;
5ba3f43e 3211
c6bf4f31
A
3212#if XNU_MONITOR
3213 /*
3214 * The PPL has no guarantee that its allocation
3215 * will succeed, so steal pages if necessary to
3216 * ensure that we can free up a PV allocation.
3217 */
3218 ret = pmap_pages_alloc(&pa, PAGE_SIZE, PMAP_PAGES_ALLOCATE_NOWAIT);
3219
3220 if (ret == KERN_RESOURCE_SHORTAGE) {
3221 ret = pmap_pages_alloc(&pa, PAGE_SIZE, PMAP_PAGES_RECLAIM_NOWAIT);
3222 }
3223#else
4ba76501 3224 ret = pmap_pages_alloc(&pa, PAGE_SIZE, 0);
c6bf4f31 3225#endif
5ba3f43e 3226
4ba76501
A
3227 if (ret != KERN_SUCCESS) {
3228 panic("%s: failed to alloc page, ret=%d, "
3229 "pmap=%p, pai=%u, pvepp=%p",
3230 __FUNCTION__, ret,
3231 pmap, pai, pvepp);
3232 }
5ba3f43e 3233
4ba76501 3234 pv_page_count++;
5ba3f43e 3235
4ba76501
A
3236 pv_e = (pv_entry_t *)phystokv(pa);
3237 pv_cnt = 0;
3238 pv_eh = pv_et = PV_ENTRY_NULL;
3239 *pvepp = pv_e;
3240 pv_e++;
5ba3f43e 3241
4ba76501
A
3242 for (j = 1; j < (PAGE_SIZE / sizeof(pv_entry_t)); j++) {
3243 pv_e->pve_next = pv_eh;
3244 pv_eh = pv_e;
3245
3246 if (pv_et == PV_ENTRY_NULL) {
3247 pv_et = pv_e;
5ba3f43e 3248 }
4ba76501
A
3249 pv_cnt++;
3250 pv_e++;
5ba3f43e 3251 }
4ba76501
A
3252
3253 PV_FREE_LIST(pv_eh, pv_et, pv_cnt, pv_kern_low_water_mark);
3254
5ba3f43e
A
3255 PMAP_LOCK(pmap);
3256 LOCK_PVH(pai);
3257 return FALSE;
3258 }
3259 }
3260 assert(PV_ENTRY_NULL != *pvepp);
3261 return TRUE;
3262}
3263
3264static void
3265pv_free(
3266 pv_entry_t *pvep)
3267{
4ba76501 3268 PV_FREE_LIST(pvep, pvep, 1, pv_kern_low_water_mark);
5ba3f43e
A
3269}
3270
3271static void
3272pv_list_free(
3273 pv_entry_t *pvehp,
3274 pv_entry_t *pvetp,
3275 unsigned int cnt)
3276{
4ba76501 3277 PV_FREE_LIST(pvehp, pvetp, cnt, pv_kern_low_water_mark);
5ba3f43e
A
3278}
3279
d9a64523
A
3280static inline void
3281pv_water_mark_check(void)
3282{
cb323159
A
3283 if (__improbable((pv_free_count < pv_low_water_mark) || (pv_kern_free_count < pv_kern_low_water_mark))) {
3284 if (!mappingrecurse && os_atomic_cmpxchg(&mappingrecurse, 0, 1, acq_rel)) {
d9a64523 3285 thread_wakeup(&mapping_replenish_event);
0a7de745 3286 }
d9a64523
A
3287 }
3288}
5ba3f43e 3289
0a7de745
A
3290static inline void
3291PV_ALLOC(pv_entry_t **pv_ep)
3292{
5ba3f43e 3293 assert(*pv_ep == PV_ENTRY_NULL);
c6bf4f31 3294#if !XNU_MONITOR
4ba76501
A
3295 if (pv_kern_free_count < pv_kern_low_water_mark) {
3296 /*
3297 * If the kernel reserved pool is low, let non-kernel mappings wait for a page
3298 * from the VM.
3299 */
3300 return;
3301 }
c6bf4f31 3302#endif
d9a64523 3303 pmap_simple_lock(&pv_free_list_lock);
4ba76501
A
3304
3305 if ((*pv_ep = pv_free_list) != 0) {
5ba3f43e
A
3306 pv_free_list = (pv_entry_t *)(*pv_ep)->pve_next;
3307 (*pv_ep)->pve_next = PV_ENTRY_NULL;
3308 pv_free_count--;
3309 }
3310
d9a64523 3311 pmap_simple_unlock(&pv_free_list_lock);
5ba3f43e
A
3312}
3313
0a7de745 3314static inline void
4ba76501 3315PV_FREE_LIST(pv_entry_t *pv_eh, pv_entry_t *pv_et, int pv_cnt, uint32_t kern_target)
0a7de745 3316{
4ba76501
A
3317 bool use_kernel_list = false;
3318 pmap_simple_lock(&pv_kern_free_list_lock);
3319 if (pv_kern_free_count < kern_target) {
3320 pv_et->pve_next = pv_kern_free_list;
3321 pv_kern_free_list = pv_eh;
3322 pv_kern_free_count += pv_cnt;
3323 use_kernel_list = true;
3324 }
3325 pmap_simple_unlock(&pv_kern_free_list_lock);
3326
3327 if (!use_kernel_list) {
3328 pmap_simple_lock(&pv_free_list_lock);
3329 pv_et->pve_next = (pv_entry_t *)pv_free_list;
3330 pv_free_list = pv_eh;
3331 pv_free_count += pv_cnt;
3332 pmap_simple_unlock(&pv_free_list_lock);
3333 }
5ba3f43e
A
3334}
3335
0a7de745
A
3336static inline void
3337PV_KERN_ALLOC(pv_entry_t **pv_e)
3338{
5ba3f43e 3339 assert(*pv_e == PV_ENTRY_NULL);
d9a64523 3340 pmap_simple_lock(&pv_kern_free_list_lock);
5ba3f43e
A
3341
3342 if ((*pv_e = pv_kern_free_list) != 0) {
3343 pv_kern_free_list = (pv_entry_t *)(*pv_e)->pve_next;
3344 (*pv_e)->pve_next = PV_ENTRY_NULL;
3345 pv_kern_free_count--;
3346 pmap_kern_reserve_alloc_stat++;
3347 }
3348
d9a64523 3349 pmap_simple_unlock(&pv_kern_free_list_lock);
5ba3f43e
A
3350}
3351
5ba3f43e
A
3352/*
3353 * Creates a target number of free pv_entry_t objects for the kernel free list
3354 * and the general free list.
3355 */
d9a64523 3356MARK_AS_PMAP_TEXT static kern_return_t
5ba3f43e
A
3357mapping_free_prime_internal(void)
3358{
5ba3f43e
A
3359 SECURITY_READ_ONLY_LATE(static boolean_t) mapping_free_prime_internal_called = FALSE;
3360 SECURITY_READ_ONLY_LATE(static boolean_t) mapping_free_prime_internal_done = FALSE;
3361
3362 if (mapping_free_prime_internal_done) {
3363 return KERN_FAILURE;
3364 }
3365
3366 if (!mapping_free_prime_internal_called) {
3367 mapping_free_prime_internal_called = TRUE;
3368
3369 pv_low_water_mark = PV_LOW_WATER_MARK_DEFAULT;
3370
3371 /* Alterable via sysctl */
3372 pv_kern_low_water_mark = PV_KERN_LOW_WATER_MARK_DEFAULT;
3373
3374 pv_kern_alloc_chunk = PV_KERN_ALLOC_CHUNK_INITIAL;
3375 pv_alloc_chunk = PV_ALLOC_CHUNK_INITIAL;
3376 }
3377
4ba76501 3378 return mapping_replenish_internal(PV_KERN_ALLOC_INITIAL_TARGET, PV_ALLOC_INITIAL_TARGET);
5ba3f43e
A
3379}
3380
3381void
3382mapping_free_prime(void)
3383{
3384 kern_return_t kr = KERN_FAILURE;
3385
c6bf4f31
A
3386#if XNU_MONITOR
3387 unsigned int i = 0;
3388
3389 /*
3390 * Allocate the needed PPL pages up front, to minimize the change that
3391 * we will need to call into the PPL multiple times.
3392 */
3393 for (i = 0; i < PV_ALLOC_INITIAL_TARGET; i += (PAGE_SIZE / sizeof(pv_entry_t))) {
3394 pmap_alloc_page_for_ppl();
3395 }
3396
3397 for (i = 0; i < PV_KERN_ALLOC_INITIAL_TARGET; i += (PAGE_SIZE / sizeof(pv_entry_t))) {
3398 pmap_alloc_page_for_ppl();
3399 }
3400
3401 while ((kr = mapping_free_prime_ppl()) == KERN_RESOURCE_SHORTAGE) {
3402 pmap_alloc_page_for_ppl();
3403 }
3404#else
5ba3f43e 3405 kr = mapping_free_prime_internal();
c6bf4f31 3406#endif
5ba3f43e
A
3407
3408 if (kr != KERN_SUCCESS) {
cb323159
A
3409 panic("%s: failed, kr=%d",
3410 __FUNCTION__, kr);
5ba3f43e
A
3411 }
3412}
3413
3414void mapping_replenish(void);
3415
0a7de745
A
3416void
3417mapping_adjust(void)
3418{
5ba3f43e
A
3419 kern_return_t mres;
3420
3421 mres = kernel_thread_start_priority((thread_continue_t)mapping_replenish, NULL, MAXPRI_KERNEL, &mapping_replenish_thread);
3422 if (mres != KERN_SUCCESS) {
cb323159
A
3423 panic("%s: mapping_replenish thread creation failed",
3424 __FUNCTION__);
5ba3f43e
A
3425 }
3426 thread_deallocate(mapping_replenish_thread);
3427}
3428
3429/*
3430 * Fills the kernel and general PV free lists back up to their low watermarks.
3431 */
d9a64523 3432MARK_AS_PMAP_TEXT static kern_return_t
4ba76501 3433mapping_replenish_internal(uint32_t kern_target_count, uint32_t user_target_count)
5ba3f43e
A
3434{
3435 pv_entry_t *pv_e;
3436 pv_entry_t *pv_eh;
3437 pv_entry_t *pv_et;
3438 int pv_cnt;
3439 unsigned j;
3440 pmap_paddr_t pa;
3441 kern_return_t ret = KERN_SUCCESS;
3442
4ba76501 3443 while ((pv_free_count < user_target_count) || (pv_kern_free_count < kern_target_count)) {
5ba3f43e
A
3444 pv_cnt = 0;
3445 pv_eh = pv_et = PV_ENTRY_NULL;
3446
c6bf4f31
A
3447#if XNU_MONITOR
3448 if ((ret = pmap_pages_alloc(&pa, PAGE_SIZE, PMAP_PAGES_ALLOCATE_NOWAIT)) != KERN_SUCCESS) {
3449 return ret;
3450 }
3451#else
5ba3f43e
A
3452 ret = pmap_pages_alloc(&pa, PAGE_SIZE, 0);
3453 assert(ret == KERN_SUCCESS);
c6bf4f31 3454#endif
5ba3f43e
A
3455
3456 pv_page_count++;
3457
3458 pv_e = (pv_entry_t *)phystokv(pa);
3459
0a7de745 3460 for (j = 0; j < (PAGE_SIZE / sizeof(pv_entry_t)); j++) {
5ba3f43e
A
3461 pv_e->pve_next = pv_eh;
3462 pv_eh = pv_e;
3463
0a7de745 3464 if (pv_et == PV_ENTRY_NULL) {
5ba3f43e 3465 pv_et = pv_e;
0a7de745 3466 }
5ba3f43e
A
3467 pv_cnt++;
3468 pv_e++;
3469 }
4ba76501
A
3470 pmap_reserve_replenish_stat += pv_cnt;
3471 PV_FREE_LIST(pv_eh, pv_et, pv_cnt, kern_target_count);
5ba3f43e
A
3472 }
3473
3474 return ret;
3475}
3476
3477/*
3478 * Continuation function that keeps the PV free lists from running out of free
3479 * elements.
3480 */
3481__attribute__((noreturn))
3482void
3483mapping_replenish(void)
3484{
3485 kern_return_t kr;
3486
3487 /* We qualify for VM privileges...*/
3488 current_thread()->options |= TH_OPT_VMPRIV;
3489
3490 for (;;) {
c6bf4f31
A
3491#if XNU_MONITOR
3492
3493 while ((kr = mapping_replenish_ppl(pv_kern_low_water_mark, pv_low_water_mark)) == KERN_RESOURCE_SHORTAGE) {
3494 pmap_alloc_page_for_ppl();
3495 }
3496#else
4ba76501 3497 kr = mapping_replenish_internal(pv_kern_low_water_mark, pv_low_water_mark);
c6bf4f31 3498#endif
5ba3f43e
A
3499
3500 if (kr != KERN_SUCCESS) {
3501 panic("%s: failed, kr=%d", __FUNCTION__, kr);
3502 }
3503
5ba3f43e
A
3504 /* Check if the kernel pool has been depleted since the
3505 * first pass, to reduce refill latency.
3506 */
0a7de745 3507 if (pv_kern_free_count < pv_kern_low_water_mark) {
5ba3f43e 3508 continue;
0a7de745 3509 }
5ba3f43e
A
3510 /* Block sans continuation to avoid yielding kernel stack */
3511 assert_wait(&mapping_replenish_event, THREAD_UNINT);
3512 mappingrecurse = 0;
3513 thread_block(THREAD_CONTINUE_NULL);
3514 pmap_mapping_thread_wakeups++;
3515 }
3516}
3517
3518
3519static void
3520ptd_bootstrap(
3521 pt_desc_t *ptdp,
3522 unsigned int ptd_cnt)
3523{
3524 simple_lock_init(&ptd_free_list_lock, 0);
3525 while (ptd_cnt != 0) {
3526 (*(void **)ptdp) = (void *)ptd_free_list;
3527 ptd_free_list = ptdp;
3528 ptdp++;
3529 ptd_cnt--;
3530 ptd_free_count++;
3531 }
3532 ptd_preboot = FALSE;
3533}
3534
0a7de745
A
3535static pt_desc_t*
3536ptd_alloc_unlinked(bool reclaim)
5ba3f43e 3537{
0a7de745
A
3538 pt_desc_t *ptdp;
3539 unsigned i;
5ba3f43e 3540
0a7de745 3541 if (!ptd_preboot) {
d9a64523 3542 pmap_simple_lock(&ptd_free_list_lock);
0a7de745 3543 }
5ba3f43e
A
3544
3545 if (ptd_free_count == 0) {
3546 unsigned int ptd_cnt;
0a7de745 3547 pt_desc_t *ptdp_next;
5ba3f43e
A
3548
3549 if (ptd_preboot) {
3550 ptdp = (pt_desc_t *)avail_start;
3551 avail_start += ARM_PGBYTES;
3552 ptdp_next = ptdp;
0a7de745 3553 ptd_cnt = ARM_PGBYTES / sizeof(pt_desc_t);
5ba3f43e
A
3554 } else {
3555 pmap_paddr_t pa;
0a7de745 3556 kern_return_t ret;
5ba3f43e 3557
d9a64523 3558 pmap_simple_unlock(&ptd_free_list_lock);
5ba3f43e
A
3559
3560 if (pmap_pages_alloc(&pa, PAGE_SIZE, PMAP_PAGES_ALLOCATE_NOWAIT) != KERN_SUCCESS) {
0a7de745
A
3561 if (reclaim) {
3562 ret = pmap_pages_alloc(&pa, PAGE_SIZE, PMAP_PAGES_RECLAIM_NOWAIT);
3563 assert(ret == KERN_SUCCESS);
3564 } else {
3565 return NULL;
3566 }
5ba3f43e
A
3567 }
3568 ptdp = (pt_desc_t *)phystokv(pa);
3569
d9a64523 3570 pmap_simple_lock(&ptd_free_list_lock);
5ba3f43e 3571 ptdp_next = ptdp;
0a7de745 3572 ptd_cnt = PAGE_SIZE / sizeof(pt_desc_t);
5ba3f43e
A
3573 }
3574
3575 while (ptd_cnt != 0) {
3576 (*(void **)ptdp_next) = (void *)ptd_free_list;
3577 ptd_free_list = ptdp_next;
3578 ptdp_next++;
3579 ptd_cnt--;
3580 ptd_free_count++;
3581 }
3582 }
3583
3584 if ((ptdp = ptd_free_list) != PTD_ENTRY_NULL) {
3585 ptd_free_list = (pt_desc_t *)(*(void **)ptdp);
3586 ptd_free_count--;
3587 } else {
cb323159
A
3588 panic("%s: out of ptd entry",
3589 __FUNCTION__);
5ba3f43e
A
3590 }
3591
0a7de745 3592 if (!ptd_preboot) {
d9a64523 3593 pmap_simple_unlock(&ptd_free_list_lock);
0a7de745 3594 }
5ba3f43e
A
3595
3596 ptdp->pt_page.next = NULL;
3597 ptdp->pt_page.prev = NULL;
d9a64523 3598 ptdp->pmap = NULL;
5ba3f43e 3599
0a7de745 3600 for (i = 0; i < PT_INDEX_MAX; i++) {
cb323159
A
3601 ptdp->ptd_info[i].va = (vm_offset_t)-1;
3602 ptdp->ptd_info[i].refcnt = 0;
3603 ptdp->ptd_info[i].wiredcnt = 0;
5ba3f43e 3604 }
5ba3f43e 3605
0a7de745 3606 return ptdp;
5ba3f43e
A
3607}
3608
d9a64523 3609static inline pt_desc_t*
0a7de745 3610ptd_alloc(pmap_t pmap, bool reclaim)
d9a64523 3611{
0a7de745
A
3612 pt_desc_t *ptdp = ptd_alloc_unlinked(reclaim);
3613
3614 if (ptdp == NULL) {
3615 return NULL;
3616 }
d9a64523
A
3617
3618 ptdp->pmap = pmap;
3619 if (pmap != kernel_pmap) {
3620 /* We should never try to reclaim kernel pagetable pages in
3621 * pmap_pages_reclaim(), so don't enter them into the list. */
3622 pmap_simple_lock(&pt_pages_lock);
3623 queue_enter(&pt_page_list, ptdp, pt_desc_t *, pt_page);
3624 pmap_simple_unlock(&pt_pages_lock);
3625 }
3626
3627 pmap_tt_ledger_credit(pmap, sizeof(*ptdp));
3628 return ptdp;
3629}
3630
5ba3f43e 3631static void
d9a64523 3632ptd_deallocate(pt_desc_t *ptdp)
5ba3f43e 3633{
0a7de745 3634 pmap_t pmap = ptdp->pmap;
5ba3f43e
A
3635
3636 if (ptd_preboot) {
cb323159
A
3637 panic("%s: early boot, "
3638 "ptdp=%p",
3639 __FUNCTION__,
3640 ptdp);
5ba3f43e 3641 }
5ba3f43e
A
3642
3643 if (ptdp->pt_page.next != NULL) {
d9a64523 3644 pmap_simple_lock(&pt_pages_lock);
5ba3f43e 3645 queue_remove(&pt_page_list, ptdp, pt_desc_t *, pt_page);
d9a64523 3646 pmap_simple_unlock(&pt_pages_lock);
5ba3f43e 3647 }
d9a64523 3648 pmap_simple_lock(&ptd_free_list_lock);
5ba3f43e
A
3649 (*(void **)ptdp) = (void *)ptd_free_list;
3650 ptd_free_list = (pt_desc_t *)ptdp;
3651 ptd_free_count++;
d9a64523 3652 pmap_simple_unlock(&ptd_free_list_lock);
0a7de745 3653 if (pmap != NULL) {
d9a64523 3654 pmap_tt_ledger_debit(pmap, sizeof(*ptdp));
0a7de745 3655 }
5ba3f43e
A
3656}
3657
3658static void
3659ptd_init(
3660 pt_desc_t *ptdp,
3661 pmap_t pmap,
3662 vm_map_address_t va,
3663 unsigned int level,
3664 pt_entry_t *pte_p)
3665{
0a7de745 3666 if (ptdp->pmap != pmap) {
cb323159
A
3667 panic("%s: pmap mismatch, "
3668 "ptdp=%p, pmap=%p, va=%p, level=%u, pte_p=%p",
3669 __FUNCTION__,
3670 ptdp, pmap, (void*)va, level, pte_p);
0a7de745 3671 }
5ba3f43e 3672
0a7de745 3673#if (__ARM_VMSA__ == 7)
5ba3f43e 3674 assert(level == 2);
cb323159 3675 ptdp->ptd_info[ARM_PT_DESC_INDEX(pte_p)].va = (vm_offset_t) va & ~(ARM_TT_L1_PT_OFFMASK);
5ba3f43e 3676#else
cb323159
A
3677 assert(level > pt_attr_root_level(pmap_get_pt_attr(pmap)));
3678 ptdp->ptd_info[ARM_PT_DESC_INDEX(pte_p)].va = (vm_offset_t) va & ~(pt_attr_ln_offmask(pmap_get_pt_attr(pmap), level - 1));
5ba3f43e 3679#endif
0a7de745 3680 if (level < PMAP_TT_MAX_LEVEL) {
cb323159 3681 ptdp->ptd_info[ARM_PT_DESC_INDEX(pte_p)].refcnt = PT_DESC_REFCOUNT;
0a7de745 3682 }
5ba3f43e
A
3683}
3684
3685
3686boolean_t
3687pmap_valid_address(
3688 pmap_paddr_t addr)
3689{
3690 return pa_valid(addr);
3691}
3692
0a7de745 3693#if (__ARM_VMSA__ == 7)
5ba3f43e
A
3694
3695/*
3696 * Given an offset and a map, compute the address of the
3697 * corresponding translation table entry.
3698 */
3699static inline tt_entry_t *
3700pmap_tte(pmap_t pmap,
0a7de745 3701 vm_map_address_t addr)
5ba3f43e 3702{
cb323159
A
3703 __unused const pt_attr_t * const pt_attr = pmap_get_pt_attr(pmap);
3704
3705 if (!(tte_index(pmap, pt_attr, addr) < pmap->tte_index_max)) {
5ba3f43e 3706 return (tt_entry_t *)NULL;
0a7de745 3707 }
cb323159 3708 return &pmap->tte[tte_index(pmap, pt_attr, addr)];
5ba3f43e
A
3709}
3710
3711
3712/*
3713 * Given an offset and a map, compute the address of the
3714 * pte. If the address is invalid with respect to the map
3715 * then PT_ENTRY_NULL is returned (and the map may need to grow).
3716 *
3717 * This is only used internally.
3718 */
3719static inline pt_entry_t *
3720pmap_pte(
0a7de745
A
3721 pmap_t pmap,
3722 vm_map_address_t addr)
5ba3f43e
A
3723{
3724 pt_entry_t *ptp;
3725 tt_entry_t *ttp;
3726 tt_entry_t tte;
3727
3728 ttp = pmap_tte(pmap, addr);
0a7de745
A
3729 if (ttp == (tt_entry_t *)NULL) {
3730 return PT_ENTRY_NULL;
3731 }
5ba3f43e 3732 tte = *ttp;
cb323159 3733#if MACH_ASSERT
0a7de745 3734 if ((tte & ARM_TTE_TYPE_MASK) == ARM_TTE_TYPE_BLOCK) {
cb323159
A
3735 panic("%s: Attempt to demote L1 block, tte=0x%lx, "
3736 "pmap=%p, addr=%p",
3737 __FUNCTION__, (unsigned long)tte,
3738 pmap, (void*)addr);
0a7de745 3739 }
cb323159 3740#endif
0a7de745
A
3741 if ((tte & ARM_TTE_TYPE_MASK) != ARM_TTE_TYPE_TABLE) {
3742 return PT_ENTRY_NULL;
3743 }
5ba3f43e 3744 ptp = (pt_entry_t *) ttetokv(tte) + ptenum(addr);
0a7de745 3745 return ptp;
5ba3f43e
A
3746}
3747
cb323159
A
3748__unused static inline tt_entry_t *
3749pmap_ttne(pmap_t pmap,
3750 unsigned int target_level,
3751 vm_map_address_t addr)
3752{
3753 tt_entry_t * ret_ttep = NULL;
3754
3755 switch (target_level) {
3756 case 1:
3757 ret_ttep = pmap_tte(pmap, addr);
3758 break;
3759 case 2:
3760 ret_ttep = (tt_entry_t *)pmap_pte(pmap, addr);
3761 break;
3762 default:
3763 panic("%s: bad level, "
3764 "pmap=%p, target_level=%u, addr=%p",
3765 __FUNCTION__,
3766 pmap, target_level, (void *)addr);
3767 }
3768
3769 return ret_ttep;
3770}
3771
5ba3f43e
A
3772#else
3773
cb323159
A
3774static inline tt_entry_t *
3775pmap_ttne(pmap_t pmap,
3776 unsigned int target_level,
3777 vm_map_address_t addr)
3778{
3779 tt_entry_t * ttp = NULL;
3780 tt_entry_t * ttep = NULL;
3781 tt_entry_t tte = ARM_TTE_EMPTY;
3782 unsigned int cur_level;
3783
3784 const pt_attr_t * const pt_attr = pmap_get_pt_attr(pmap);
3785
3786 ttp = pmap->tte;
3787
3788 assert(target_level <= pt_attr->pta_max_level);
3789
3790 for (cur_level = pt_attr->pta_root_level; cur_level <= target_level; cur_level++) {
3791 ttep = &ttp[ttn_index(pmap, pt_attr, addr, cur_level)];
3792
3793 if (cur_level == target_level) {
3794 break;
3795 }
3796
3797 tte = *ttep;
3798
3799#if MACH_ASSERT
3800 if ((tte & (ARM_TTE_TYPE_MASK | ARM_TTE_VALID)) == (ARM_TTE_TYPE_BLOCK | ARM_TTE_VALID)) {
3801 panic("%s: Attempt to demote L%u block, tte=0x%llx, "
3802 "pmap=%p, target_level=%u, addr=%p",
3803 __FUNCTION__, cur_level, tte,
3804 pmap, target_level, (void*)addr);
3805 }
3806#endif
3807 if ((tte & (ARM_TTE_TYPE_MASK | ARM_TTE_VALID)) != (ARM_TTE_TYPE_TABLE | ARM_TTE_VALID)) {
3808 return TT_ENTRY_NULL;
3809 }
3810
3811 ttp = (tt_entry_t*)phystokv(tte & ARM_TTE_TABLE_MASK);
3812 }
3813
3814 return ttep;
3815}
3816
5ba3f43e
A
3817/*
3818 * Given an offset and a map, compute the address of level 1 translation table entry.
3819 * If the tranlation is invalid then PT_ENTRY_NULL is returned.
3820 */
3821static inline tt_entry_t *
3822pmap_tt1e(pmap_t pmap,
0a7de745 3823 vm_map_address_t addr)
5ba3f43e 3824{
cb323159 3825 return pmap_ttne(pmap, PMAP_TT_L1_LEVEL, addr);
5ba3f43e
A
3826}
3827
3828/*
3829 * Given an offset and a map, compute the address of level 2 translation table entry.
3830 * If the tranlation is invalid then PT_ENTRY_NULL is returned.
3831 */
3832static inline tt_entry_t *
3833pmap_tt2e(pmap_t pmap,
0a7de745 3834 vm_map_address_t addr)
5ba3f43e 3835{
cb323159 3836 return pmap_ttne(pmap, PMAP_TT_L2_LEVEL, addr);
5ba3f43e
A
3837}
3838
3839
3840/*
3841 * Given an offset and a map, compute the address of level 3 translation table entry.
3842 * If the tranlation is invalid then PT_ENTRY_NULL is returned.
3843 */
3844static inline pt_entry_t *
3845pmap_tt3e(
0a7de745
A
3846 pmap_t pmap,
3847 vm_map_address_t addr)
5ba3f43e 3848{
cb323159 3849 return (pt_entry_t*)pmap_ttne(pmap, PMAP_TT_L3_LEVEL, addr);
5ba3f43e
A
3850}
3851
5ba3f43e
A
3852static inline tt_entry_t *
3853pmap_tte(
3854 pmap_t pmap,
3855 vm_map_address_t addr)
3856{
0a7de745 3857 return pmap_tt2e(pmap, addr);
5ba3f43e
A
3858}
3859
5ba3f43e
A
3860static inline pt_entry_t *
3861pmap_pte(
0a7de745
A
3862 pmap_t pmap,
3863 vm_map_address_t addr)
5ba3f43e 3864{
0a7de745 3865 return pmap_tt3e(pmap, addr);
5ba3f43e
A
3866}
3867
3868#endif
3869
c6bf4f31
A
3870#if __APRR_SUPPORTED__
3871/*
3872 * Indicates whether the given PTE has special restrictions due to the current
3873 * APRR settings.
3874 */
3875static boolean_t
3876is_pte_aprr_protected(pt_entry_t pte)
3877{
3878 uint64_t aprr_el0_value;
3879 uint64_t aprr_el1_value;
3880 uint64_t aprr_index;
3881
3882 MRS(aprr_el0_value, APRR_EL0);
3883 MRS(aprr_el1_value, APRR_EL1);
3884 aprr_index = PTE_TO_APRR_INDEX(pte);
3885
3886 /* Check to see if this mapping had APRR restrictions. */
3887 if ((APRR_EXTRACT_IDX_ATTR(aprr_el0_value, aprr_index) != APRR_EXTRACT_IDX_ATTR(APRR_EL0_RESET, aprr_index)) ||
3888 (APRR_EXTRACT_IDX_ATTR(aprr_el1_value, aprr_index) != APRR_EXTRACT_IDX_ATTR(APRR_EL1_RESET, aprr_index))
3889 ) {
3890 return TRUE;
3891 }
3892
3893 return FALSE;
3894}
3895#endif /* __APRR_SUPPORTED__ */
3896
5ba3f43e 3897
c6bf4f31
A
3898#if __APRR_SUPPORTED__
3899static boolean_t
3900is_pte_xprr_protected(pt_entry_t pte)
3901{
3902#if __APRR_SUPPORTED__
3903 return is_pte_aprr_protected(pte);
3904#else /* __APRR_SUPPORTED__ */
3905#error "XPRR configuration error"
3906#endif /* __APRR_SUPPORTED__ */
3907}
3908#endif /* __APRR_SUPPORTED__*/
cb323159 3909
c6bf4f31
A
3910#if __APRR_SUPPORTED__
3911static uint64_t
3912__unused pte_to_xprr_perm(pt_entry_t pte)
3913{
3914#if __APRR_SUPPORTED__
3915 switch (PTE_TO_APRR_INDEX(pte)) {
3916 case APRR_FIRM_RX_INDEX: return XPRR_FIRM_RX_PERM;
3917 case APRR_FIRM_RO_INDEX: return XPRR_FIRM_RO_PERM;
3918 case APRR_PPL_RW_INDEX: return XPRR_PPL_RW_PERM;
3919 case APRR_KERN_RW_INDEX: return XPRR_KERN_RW_PERM;
3920 case APRR_FIRM_RW_INDEX: return XPRR_FIRM_RW_PERM;
3921 case APRR_KERN0_RW_INDEX: return XPRR_KERN0_RW_PERM;
3922 case APRR_USER_JIT_INDEX: return XPRR_USER_JIT_PERM;
3923 case APRR_USER_RW_INDEX: return XPRR_USER_RW_PERM;
3924 case APRR_PPL_RX_INDEX: return XPRR_PPL_RX_PERM;
3925 case APRR_KERN_RX_INDEX: return XPRR_KERN_RX_PERM;
ea3f0419 3926 case APRR_USER_XO_INDEX: return XPRR_USER_XO_PERM;
c6bf4f31
A
3927 case APRR_KERN_RO_INDEX: return XPRR_KERN_RO_PERM;
3928 case APRR_KERN0_RX_INDEX: return XPRR_KERN0_RO_PERM;
3929 case APRR_KERN0_RO_INDEX: return XPRR_KERN0_RO_PERM;
3930 case APRR_USER_RX_INDEX: return XPRR_USER_RX_PERM;
3931 case APRR_USER_RO_INDEX: return XPRR_USER_RO_PERM;
3932 default: return XPRR_MAX_PERM;
3933 }
3934#else
3935#error "XPRR configuration error"
3936#endif /**/
3937}
cb323159 3938
c6bf4f31
A
3939#if __APRR_SUPPORTED__
3940static uint64_t
3941xprr_perm_to_aprr_index(uint64_t perm)
3942{
3943 switch (perm) {
3944 case XPRR_FIRM_RX_PERM: return APRR_FIRM_RX_INDEX;
3945 case XPRR_FIRM_RO_PERM: return APRR_FIRM_RO_INDEX;
3946 case XPRR_PPL_RW_PERM: return APRR_PPL_RW_INDEX;
3947 case XPRR_KERN_RW_PERM: return APRR_KERN_RW_INDEX;
3948 case XPRR_FIRM_RW_PERM: return APRR_FIRM_RW_INDEX;
3949 case XPRR_KERN0_RW_PERM: return APRR_KERN0_RW_INDEX;
3950 case XPRR_USER_JIT_PERM: return APRR_USER_JIT_INDEX;
3951 case XPRR_USER_RW_PERM: return APRR_USER_RW_INDEX;
3952 case XPRR_PPL_RX_PERM: return APRR_PPL_RX_INDEX;
3953 case XPRR_KERN_RX_PERM: return APRR_KERN_RX_INDEX;
ea3f0419 3954 case XPRR_USER_XO_PERM: return APRR_USER_XO_INDEX;
c6bf4f31
A
3955 case XPRR_KERN_RO_PERM: return APRR_KERN_RO_INDEX;
3956 case XPRR_KERN0_RX_PERM: return APRR_KERN0_RO_INDEX;
3957 case XPRR_KERN0_RO_PERM: return APRR_KERN0_RO_INDEX;
3958 case XPRR_USER_RX_PERM: return APRR_USER_RX_INDEX;
3959 case XPRR_USER_RO_PERM: return APRR_USER_RO_INDEX;
3960 default: return APRR_MAX_INDEX;
3961 }
3962}
3963#endif /* __APRR_SUPPORTED__ */
3964
3965static pt_entry_t
3966__unused xprr_perm_to_pte(uint64_t perm)
3967{
3968#if __APRR_SUPPORTED__
3969 return APRR_INDEX_TO_PTE(xprr_perm_to_aprr_index(perm));
3970#else
3971#error "XPRR configuration error"
3972#endif /**/
3973}
3974#endif /* __APRR_SUPPORTED__*/
cb323159
A
3975
3976
5ba3f43e
A
3977/*
3978 * Map memory at initialization. The physical addresses being
3979 * mapped are not managed and are never unmapped.
3980 *
3981 * For now, VM is already on, we only need to map the
3982 * specified memory.
3983 */
3984vm_map_address_t
3985pmap_map(
0a7de745
A
3986 vm_map_address_t virt,
3987 vm_offset_t start,
3988 vm_offset_t end,
3989 vm_prot_t prot,
3990 unsigned int flags)
5ba3f43e
A
3991{
3992 kern_return_t kr;
3993 vm_size_t ps;
3994
3995 ps = PAGE_SIZE;
3996 while (start < end) {
3997 kr = pmap_enter(kernel_pmap, virt, (ppnum_t)atop(start),
0a7de745 3998 prot, VM_PROT_NONE, flags, FALSE);
5ba3f43e
A
3999
4000 if (kr != KERN_SUCCESS) {
4001 panic("%s: failed pmap_enter, "
0a7de745
A
4002 "virt=%p, start_addr=%p, end_addr=%p, prot=%#x, flags=%#x",
4003 __FUNCTION__,
4004 (void *) virt, (void *) start, (void *) end, prot, flags);
5ba3f43e
A
4005 }
4006
4007 virt += ps;
4008 start += ps;
4009 }
0a7de745 4010 return virt;
5ba3f43e
A
4011}
4012
4013vm_map_address_t
4014pmap_map_bd_with_options(
0a7de745
A
4015 vm_map_address_t virt,
4016 vm_offset_t start,
4017 vm_offset_t end,
4018 vm_prot_t prot,
4019 int32_t options)
5ba3f43e
A
4020{
4021 pt_entry_t tmplate;
4022 pt_entry_t *ptep;
4023 vm_map_address_t vaddr;
4024 vm_offset_t paddr;
0a7de745 4025 pt_entry_t mem_attr;
5ba3f43e
A
4026
4027 switch (options & PMAP_MAP_BD_MASK) {
4028 case PMAP_MAP_BD_WCOMB:
4029 mem_attr = ARM_PTE_ATTRINDX(CACHE_ATTRINDX_WRITECOMB);
0a7de745 4030#if (__ARM_VMSA__ > 7)
5ba3f43e
A
4031 mem_attr |= ARM_PTE_SH(SH_OUTER_MEMORY);
4032#else
4033 mem_attr |= ARM_PTE_SH;
4034#endif
4035 break;
4036 case PMAP_MAP_BD_POSTED:
4037 mem_attr = ARM_PTE_ATTRINDX(CACHE_ATTRINDX_POSTED);
4038 break;
cb323159
A
4039 case PMAP_MAP_BD_POSTED_REORDERED:
4040 mem_attr = ARM_PTE_ATTRINDX(CACHE_ATTRINDX_POSTED_REORDERED);
4041 break;
4042 case PMAP_MAP_BD_POSTED_COMBINED_REORDERED:
4043 mem_attr = ARM_PTE_ATTRINDX(CACHE_ATTRINDX_POSTED_COMBINED_REORDERED);
4044 break;
5ba3f43e
A
4045 default:
4046 mem_attr = ARM_PTE_ATTRINDX(CACHE_ATTRINDX_DISABLE);
4047 break;
4048 }
4049
4050 tmplate = pa_to_pte(start) | ARM_PTE_AP((prot & VM_PROT_WRITE) ? AP_RWNA : AP_RONA) |
0a7de745 4051 mem_attr | ARM_PTE_TYPE | ARM_PTE_NX | ARM_PTE_PNX | ARM_PTE_AF;
5c9f4661
A
4052#if __ARM_KERNEL_PROTECT__
4053 tmplate |= ARM_PTE_NG;
4054#endif /* __ARM_KERNEL_PROTECT__ */
5ba3f43e
A
4055
4056 vaddr = virt;
4057 paddr = start;
4058 while (paddr < end) {
5ba3f43e
A
4059 ptep = pmap_pte(kernel_pmap, vaddr);
4060 if (ptep == PT_ENTRY_NULL) {
cb323159
A
4061 panic("%s: no PTE for vaddr=%p, "
4062 "virt=%p, start=%p, end=%p, prot=0x%x, options=0x%x",
4063 __FUNCTION__, (void*)vaddr,
4064 (void*)virt, (void*)start, (void*)end, prot, options);
5ba3f43e 4065 }
cb323159
A
4066
4067 assert(!ARM_PTE_IS_COMPRESSED(*ptep, ptep));
d9a64523 4068 WRITE_PTE_STRONG(ptep, tmplate);
5ba3f43e
A
4069
4070 pte_increment_pa(tmplate);
4071 vaddr += PAGE_SIZE;
4072 paddr += PAGE_SIZE;
4073 }
4074
0a7de745 4075 if (end >= start) {
5ba3f43e 4076 flush_mmu_tlb_region(virt, (unsigned)(end - start));
0a7de745 4077 }
5ba3f43e 4078
0a7de745 4079 return vaddr;
5ba3f43e
A
4080}
4081
4082/*
4083 * Back-door routine for mapping kernel VM at initialization.
4084 * Useful for mapping memory outside the range
4085 * [vm_first_phys, vm_last_phys] (i.e., devices).
4086 * Otherwise like pmap_map.
4087 */
4088vm_map_address_t
4089pmap_map_bd(
4090 vm_map_address_t virt,
4091 vm_offset_t start,
4092 vm_offset_t end,
4093 vm_prot_t prot)
4094{
4095 pt_entry_t tmplate;
0a7de745 4096 pt_entry_t *ptep;
5ba3f43e 4097 vm_map_address_t vaddr;
0a7de745 4098 vm_offset_t paddr;
5ba3f43e
A
4099
4100 /* not cacheable and not buffered */
4101 tmplate = pa_to_pte(start)
0a7de745
A
4102 | ARM_PTE_TYPE | ARM_PTE_AF | ARM_PTE_NX | ARM_PTE_PNX
4103 | ARM_PTE_AP((prot & VM_PROT_WRITE) ? AP_RWNA : AP_RONA)
4104 | ARM_PTE_ATTRINDX(CACHE_ATTRINDX_DISABLE);
5c9f4661
A
4105#if __ARM_KERNEL_PROTECT__
4106 tmplate |= ARM_PTE_NG;
4107#endif /* __ARM_KERNEL_PROTECT__ */
5ba3f43e
A
4108
4109 vaddr = virt;
4110 paddr = start;
4111 while (paddr < end) {
5ba3f43e
A
4112 ptep = pmap_pte(kernel_pmap, vaddr);
4113 if (ptep == PT_ENTRY_NULL) {
4114 panic("pmap_map_bd");
4115 }
cb323159 4116 assert(!ARM_PTE_IS_COMPRESSED(*ptep, ptep));
d9a64523 4117 WRITE_PTE_STRONG(ptep, tmplate);
5ba3f43e
A
4118
4119 pte_increment_pa(tmplate);
4120 vaddr += PAGE_SIZE;
4121 paddr += PAGE_SIZE;
4122 }
4123
0a7de745 4124 if (end >= start) {
5ba3f43e 4125 flush_mmu_tlb_region(virt, (unsigned)(end - start));
0a7de745 4126 }
5ba3f43e 4127
0a7de745 4128 return vaddr;
5ba3f43e
A
4129}
4130
4131/*
4132 * Back-door routine for mapping kernel VM at initialization.
4133 * Useful for mapping memory specific physical addresses in early
4134 * boot (i.e., before kernel_map is initialized).
4135 *
4136 * Maps are in the VM_HIGH_KERNEL_WINDOW area.
4137 */
4138
4139vm_map_address_t
4140pmap_map_high_window_bd(
4141 vm_offset_t pa_start,
4142 vm_size_t len,
4143 vm_prot_t prot)
4144{
0a7de745 4145 pt_entry_t *ptep, pte;
5ba3f43e 4146#if (__ARM_VMSA__ == 7)
0a7de745
A
4147 vm_map_address_t va_start = VM_HIGH_KERNEL_WINDOW;
4148 vm_map_address_t va_max = VM_MAX_KERNEL_ADDRESS;
5ba3f43e 4149#else
0a7de745
A
4150 vm_map_address_t va_start = VREGION1_START;
4151 vm_map_address_t va_max = VREGION1_START + VREGION1_SIZE;
5ba3f43e 4152#endif
0a7de745
A
4153 vm_map_address_t va_end;
4154 vm_map_address_t va;
4155 vm_size_t offset;
5ba3f43e
A
4156
4157 offset = pa_start & PAGE_MASK;
4158 pa_start -= offset;
4159 len += offset;
4160
4161 if (len > (va_max - va_start)) {
cb323159
A
4162 panic("%s: area too large, "
4163 "pa_start=%p, len=%p, prot=0x%x",
4164 __FUNCTION__,
4165 (void*)pa_start, (void*)len, prot);
5ba3f43e
A
4166 }
4167
4168scan:
0a7de745 4169 for (; va_start < va_max; va_start += PAGE_SIZE) {
5ba3f43e 4170 ptep = pmap_pte(kernel_pmap, va_start);
cb323159 4171 assert(!ARM_PTE_IS_COMPRESSED(*ptep, ptep));
0a7de745 4172 if (*ptep == ARM_PTE_TYPE_FAULT) {
5ba3f43e 4173 break;
0a7de745 4174 }
5ba3f43e
A
4175 }
4176 if (va_start > va_max) {
cb323159
A
4177 panic("%s: insufficient pages, "
4178 "pa_start=%p, len=%p, prot=0x%x",
4179 __FUNCTION__,
4180 (void*)pa_start, (void*)len, prot);
5ba3f43e
A
4181 }
4182
4183 for (va_end = va_start + PAGE_SIZE; va_end < va_start + len; va_end += PAGE_SIZE) {
4184 ptep = pmap_pte(kernel_pmap, va_end);
cb323159 4185 assert(!ARM_PTE_IS_COMPRESSED(*ptep, ptep));
5ba3f43e
A
4186 if (*ptep != ARM_PTE_TYPE_FAULT) {
4187 va_start = va_end + PAGE_SIZE;
4188 goto scan;
4189 }
4190 }
4191
4192 for (va = va_start; va < va_end; va += PAGE_SIZE, pa_start += PAGE_SIZE) {
4193 ptep = pmap_pte(kernel_pmap, va);
4194 pte = pa_to_pte(pa_start)
0a7de745
A
4195 | ARM_PTE_TYPE | ARM_PTE_AF | ARM_PTE_NX | ARM_PTE_PNX
4196 | ARM_PTE_AP((prot & VM_PROT_WRITE) ? AP_RWNA : AP_RONA)
4197 | ARM_PTE_ATTRINDX(CACHE_ATTRINDX_DEFAULT);
4198#if (__ARM_VMSA__ > 7)
5ba3f43e
A
4199 pte |= ARM_PTE_SH(SH_OUTER_MEMORY);
4200#else
4201 pte |= ARM_PTE_SH;
4202#endif
5c9f4661
A
4203#if __ARM_KERNEL_PROTECT__
4204 pte |= ARM_PTE_NG;
4205#endif /* __ARM_KERNEL_PROTECT__ */
d9a64523 4206 WRITE_PTE_STRONG(ptep, pte);
5ba3f43e 4207 }
cb323159 4208 PMAP_UPDATE_TLBS(kernel_pmap, va_start, va_start + len, false);
5ba3f43e
A
4209#if KASAN
4210 kasan_notify_address(va_start, len);
4211#endif
4212 return va_start;
4213}
4214
4215#define PMAP_ALIGN(addr, align) ((addr) + ((align) - 1) & ~((align) - 1))
4216
d9a64523 4217static vm_size_t
5ba3f43e
A
4218pmap_compute_io_rgns(void)
4219{
4220 DTEntry entry;
4221 pmap_io_range_t *ranges;
d9a64523 4222 uint64_t rgn_end;
5ba3f43e 4223 void *prop = NULL;
0a7de745 4224 int err;
5ba3f43e
A
4225 unsigned int prop_size;
4226
0a7de745
A
4227 err = DTLookupEntry(NULL, "/defaults", &entry);
4228 assert(err == kSuccess);
5ba3f43e 4229
0a7de745 4230 if (kSuccess != DTGetProperty(entry, "pmap-io-ranges", &prop, &prop_size)) {
5ba3f43e 4231 return 0;
0a7de745 4232 }
5ba3f43e 4233
5ba3f43e
A
4234 ranges = prop;
4235 for (unsigned int i = 0; i < (prop_size / sizeof(*ranges)); ++i) {
0a7de745 4236 if (ranges[i].addr & PAGE_MASK) {
d9a64523 4237 panic("pmap I/O region %u addr 0x%llx is not page-aligned", i, ranges[i].addr);
0a7de745
A
4238 }
4239 if (ranges[i].len & PAGE_MASK) {
cb323159 4240 panic("pmap I/O region %u length 0x%llx is not page-aligned", i, ranges[i].len);
0a7de745
A
4241 }
4242 if (os_add_overflow(ranges[i].addr, ranges[i].len, &rgn_end)) {
cb323159 4243 panic("pmap I/O region %u addr 0x%llx length 0x%llx wraps around", i, ranges[i].addr, ranges[i].len);
0a7de745 4244 }
cb323159
A
4245 if (((ranges[i].addr <= gPhysBase) && (rgn_end > gPhysBase)) ||
4246 ((ranges[i].addr < avail_end) && (rgn_end >= avail_end)) ||
4247 ((ranges[i].addr > gPhysBase) && (rgn_end < avail_end))) {
4248 panic("pmap I/O region %u addr 0x%llx length 0x%llx overlaps physical memory", i, ranges[i].addr, ranges[i].len);
0a7de745 4249 }
5ba3f43e 4250
cb323159 4251 ++num_io_rgns;
0a7de745 4252 }
5ba3f43e 4253
0a7de745 4254 return num_io_rgns * sizeof(*ranges);
d9a64523
A
4255}
4256
4257/*
4258 * return < 0 for a < b
4259 * 0 for a == b
4260 * > 0 for a > b
4261 */
4262typedef int (*cmpfunc_t)(const void *a, const void *b);
4263
4264extern void
4265qsort(void *a, size_t n, size_t es, cmpfunc_t cmp);
4266
4267static int
4268cmp_io_rgns(const void *a, const void *b)
4269{
4270 const pmap_io_range_t *range_a = a;
4271 const pmap_io_range_t *range_b = b;
0a7de745 4272 if ((range_b->addr + range_b->len) <= range_a->addr) {
d9a64523 4273 return 1;
0a7de745 4274 } else if ((range_a->addr + range_a->len) <= range_b->addr) {
d9a64523 4275 return -1;
0a7de745 4276 } else {
d9a64523 4277 return 0;
0a7de745 4278 }
5ba3f43e
A
4279}
4280
4281static void
4282pmap_load_io_rgns(void)
4283{
4284 DTEntry entry;
4285 pmap_io_range_t *ranges;
4286 void *prop = NULL;
0a7de745 4287 int err;
5ba3f43e
A
4288 unsigned int prop_size;
4289
0a7de745 4290 if (num_io_rgns == 0) {
5ba3f43e 4291 return;
0a7de745 4292 }
5ba3f43e 4293
d9a64523
A
4294 err = DTLookupEntry(NULL, "/defaults", &entry);
4295 assert(err == kSuccess);
5ba3f43e
A
4296
4297 err = DTGetProperty(entry, "pmap-io-ranges", &prop, &prop_size);
d9a64523 4298 assert(err == kSuccess);
5ba3f43e
A
4299
4300 ranges = prop;
0a7de745 4301 for (unsigned int i = 0; i < (prop_size / sizeof(*ranges)); ++i) {
d9a64523 4302 io_attr_table[i] = ranges[i];
0a7de745 4303 }
d9a64523
A
4304
4305 qsort(io_attr_table, num_io_rgns, sizeof(*ranges), cmp_io_rgns);
4306}
4307
4308#if __arm64__
4309/*
4310 * pmap_get_arm64_prot
4311 *
4312 * return effective armv8 VMSA block protections including
4313 * table AP/PXN/XN overrides of a pmap entry
4314 *
4315 */
4316
4317uint64_t
4318pmap_get_arm64_prot(
4319 pmap_t pmap,
4320 vm_offset_t addr)
4321{
cb323159
A
4322 tt_entry_t tte = 0;
4323 unsigned int level = 0;
4324 uint64_t tte_type = 0;
4325 uint64_t effective_prot_bits = 0;
4326 uint64_t aggregate_tte = 0;
4327 uint64_t table_ap_bits = 0, table_xn = 0, table_pxn = 0;
4328 const pt_attr_t * const pt_attr = pmap_get_pt_attr(pmap);
d9a64523 4329
cb323159
A
4330 for (level = pt_attr->pta_root_level; level <= pt_attr->pta_max_level; level++) {
4331 tte = *pmap_ttne(pmap, level, addr);
d9a64523 4332
cb323159
A
4333 if (!(tte & ARM_TTE_VALID)) {
4334 return 0;
4335 }
d9a64523 4336
cb323159 4337 tte_type = tte & ARM_TTE_TYPE_MASK;
d9a64523 4338
cb323159
A
4339 if ((tte_type == ARM_TTE_TYPE_BLOCK) ||
4340 (level == pt_attr->pta_max_level)) {
4341 /* Block or page mapping; both have the same protection bit layout. */
4342 break;
4343 } else if (tte_type == ARM_TTE_TYPE_TABLE) {
4344 /* All of the table bits we care about are overrides, so just OR them together. */
4345 aggregate_tte |= tte;
4346 }
d9a64523
A
4347 }
4348
cb323159
A
4349 table_ap_bits = ((aggregate_tte >> ARM_TTE_TABLE_APSHIFT) & AP_MASK);
4350 table_xn = (aggregate_tte & ARM_TTE_TABLE_XN);
4351 table_pxn = (aggregate_tte & ARM_TTE_TABLE_PXN);
d9a64523 4352
cb323159
A
4353 /* Start with the PTE bits. */
4354 effective_prot_bits = tte & (ARM_PTE_APMASK | ARM_PTE_NX | ARM_PTE_PNX);
d9a64523 4355
cb323159
A
4356 /* Table AP bits mask out block/page AP bits */
4357 effective_prot_bits &= ~(ARM_PTE_AP(table_ap_bits));
d9a64523 4358
cb323159
A
4359 /* XN/PXN bits can be OR'd in. */
4360 effective_prot_bits |= (table_xn ? ARM_PTE_NX : 0);
4361 effective_prot_bits |= (table_pxn ? ARM_PTE_PNX : 0);
d9a64523 4362
cb323159 4363 return effective_prot_bits;
5ba3f43e 4364}
d9a64523 4365#endif /* __arm64__ */
5ba3f43e
A
4366
4367
4368/*
4369 * Bootstrap the system enough to run with virtual memory.
4370 *
4371 * The early VM initialization code has already allocated
4372 * the first CPU's translation table and made entries for
4373 * all the one-to-one mappings to be found there.
4374 *
4375 * We must set up the kernel pmap structures, the
4376 * physical-to-virtual translation lookup tables for the
4377 * physical memory to be managed (between avail_start and
4378 * avail_end).
0a7de745 4379 *
5ba3f43e
A
4380 * Map the kernel's code and data, and allocate the system page table.
4381 * Page_size must already be set.
4382 *
4383 * Parameters:
4384 * first_avail first available physical page -
4385 * after kernel page tables
4386 * avail_start PA of first managed physical page
4387 * avail_end PA of last managed physical page
4388 */
4389
4390void
4391pmap_bootstrap(
4392 vm_offset_t vstart)
4393{
0a7de745 4394 pmap_paddr_t pmap_struct_start;
5ba3f43e 4395 vm_size_t pv_head_size;
0a7de745 4396 vm_size_t ptd_root_table_size;
5ba3f43e 4397 vm_size_t pp_attr_table_size;
0a7de745 4398 vm_size_t io_attr_table_size;
5ba3f43e 4399 unsigned int npages;
0a7de745
A
4400 vm_map_offset_t maxoffset;
4401
4402 lck_grp_init(&pmap_lck_grp, "pmap", LCK_GRP_ATTR_NULL);
5ba3f43e 4403
c6bf4f31
A
4404#if XNU_MONITOR
4405
4406#if DEVELOPMENT || DEBUG
4407 PE_parse_boot_argn("-unsafe_kernel_text", &pmap_ppl_disable, sizeof(pmap_ppl_disable));
4408#endif
4409
4410 simple_lock_init(&pmap_ppl_free_page_lock, 0);
4411
4412#if __APRR_SUPPORTED__
4413 if (((uintptr_t)(&ppl_trampoline_start)) % PAGE_SIZE) {
4414 panic("%s: ppl_trampoline_start is not page aligned, "
4415 "vstart=%#lx",
4416 __FUNCTION__,
4417 vstart);
4418 }
4419
4420 if (((uintptr_t)(&ppl_trampoline_end)) % PAGE_SIZE) {
4421 panic("%s: ppl_trampoline_end is not page aligned, "
4422 "vstart=%#lx",
4423 __FUNCTION__,
4424 vstart);
4425 }
4426#endif /* __APRR_SUPPORTED__ */
4427#endif /* XNU_MONITOR */
5ba3f43e 4428
d9a64523 4429#if DEVELOPMENT || DEBUG
0a7de745 4430 if (PE_parse_boot_argn("pmap_trace", &pmap_trace_mask, sizeof(pmap_trace_mask))) {
5ba3f43e
A
4431 kprintf("Kernel traces for pmap operations enabled\n");
4432 }
4433#endif
4434
4435 /*
4436 * Initialize the kernel pmap.
4437 */
4438 pmap_stamp = 1;
cb323159
A
4439#if ARM_PARAMETERIZED_PMAP
4440 kernel_pmap->pmap_pt_attr = native_pt_attr;
4441#endif /* ARM_PARAMETERIZED_PMAP */
4442#if HAS_APPLE_PAC
4443 kernel_pmap->disable_jop = 0;
4444#endif /* HAS_APPLE_PAC */
5ba3f43e
A
4445 kernel_pmap->tte = cpu_tte;
4446 kernel_pmap->ttep = cpu_ttep;
4447#if (__ARM_VMSA__ > 7)
4448 kernel_pmap->min = ARM64_TTBR1_MIN_ADDR;
4449#else
4450 kernel_pmap->min = VM_MIN_KERNEL_AND_KEXT_ADDRESS;
4451#endif
4452 kernel_pmap->max = VM_MAX_KERNEL_ADDRESS;
cb323159 4453 os_atomic_init(&kernel_pmap->ref_count, 1);
5ba3f43e
A
4454 kernel_pmap->gc_status = 0;
4455 kernel_pmap->nx_enabled = TRUE;
0a7de745 4456#ifdef __arm64__
5ba3f43e
A
4457 kernel_pmap->is_64bit = TRUE;
4458#else
4459 kernel_pmap->is_64bit = FALSE;
4460#endif
cb323159 4461 kernel_pmap->stamp = os_atomic_inc(&pmap_stamp, relaxed);
5ba3f43e
A
4462
4463 kernel_pmap->nested_region_grand_addr = 0x0ULL;
4464 kernel_pmap->nested_region_subord_addr = 0x0ULL;
4465 kernel_pmap->nested_region_size = 0x0ULL;
4466 kernel_pmap->nested_region_asid_bitmap = NULL;
4467 kernel_pmap->nested_region_asid_bitmap_size = 0x0UL;
4468
4469#if (__ARM_VMSA__ == 7)
0a7de745 4470 kernel_pmap->tte_index_max = 4 * NTTES;
5ba3f43e 4471#endif
cb323159
A
4472 kernel_pmap->hw_asid = 0;
4473 kernel_pmap->sw_asid = 0;
5ba3f43e
A
4474
4475 PMAP_LOCK_INIT(kernel_pmap);
5ba3f43e
A
4476 memset((void *) &kernel_pmap->stats, 0, sizeof(kernel_pmap->stats));
4477
4478 /* allocate space for and initialize the bookkeeping structures */
d9a64523 4479 io_attr_table_size = pmap_compute_io_rgns();
5ba3f43e
A
4480 npages = (unsigned int)atop(mem_size);
4481 pp_attr_table_size = npages * sizeof(pp_attr_t);
5ba3f43e 4482 pv_head_size = round_page(sizeof(pv_entry_t *) * npages);
0a7de745
A
4483 // allocate enough initial PTDs to map twice the available physical memory
4484 ptd_root_table_size = sizeof(pt_desc_t) * (mem_size / ((PAGE_SIZE / sizeof(pt_entry_t)) * ARM_PGBYTES)) * 2;
5ba3f43e
A
4485
4486 pmap_struct_start = avail_start;
4487
4488 pp_attr_table = (pp_attr_t *) phystokv(avail_start);
4489 avail_start = PMAP_ALIGN(avail_start + pp_attr_table_size, __alignof(pp_attr_t));
d9a64523
A
4490 io_attr_table = (pmap_io_range_t *) phystokv(avail_start);
4491 avail_start = PMAP_ALIGN(avail_start + io_attr_table_size, __alignof(pv_entry_t*));
5ba3f43e
A
4492 pv_head_table = (pv_entry_t **) phystokv(avail_start);
4493 avail_start = PMAP_ALIGN(avail_start + pv_head_size, __alignof(pt_desc_t));
4494 ptd_root_table = (pt_desc_t *)phystokv(avail_start);
4495 avail_start = round_page(avail_start + ptd_root_table_size);
4496
4497 memset((char *)phystokv(pmap_struct_start), 0, avail_start - pmap_struct_start);
4498
4499 pmap_load_io_rgns();
0a7de745 4500 ptd_bootstrap(ptd_root_table, (unsigned int)(ptd_root_table_size / sizeof(pt_desc_t)));
5ba3f43e 4501
c6bf4f31
A
4502#if XNU_MONITOR
4503 pmap_array_begin = (void *)phystokv(avail_start);
4504 pmap_array = pmap_array_begin;
4505 avail_start += round_page(MAX_ASID * sizeof(struct pmap));
4506 pmap_array_end = (void *)phystokv(avail_start);
4507
4508 pmap_array_count = ((pmap_array_end - pmap_array_begin) / sizeof(struct pmap));
4509
4510 pmap_bootstrap_pmap_free_list();
4511
4512 pmap_ledger_ptr_array_begin = (void *)phystokv(avail_start);
4513 pmap_ledger_ptr_array = pmap_ledger_ptr_array_begin;
4514 avail_start += round_page(MAX_PMAP_LEDGERS * sizeof(void*));
4515 pmap_ledger_ptr_array_end = (void *)phystokv(avail_start);
4516
4517 pmap_ledger_refcnt_begin = (void *)phystokv(avail_start);
4518 pmap_ledger_refcnt = pmap_ledger_refcnt_begin;
4519 avail_start += round_page(MAX_PMAP_LEDGERS * sizeof(os_refcnt_t));
4520 pmap_ledger_refcnt_end = (void *)phystokv(avail_start);
4521
4522 simple_lock_init(&pmap_ledger_lock, 0);
4523#endif
5ba3f43e
A
4524 pmap_cpu_data_array_init();
4525
4526 vm_first_phys = gPhysBase;
4527 vm_last_phys = trunc_page(avail_end);
4528
4529 simple_lock_init(&pmaps_lock, 0);
cb323159
A
4530 simple_lock_init(&asid_lock, 0);
4531 simple_lock_init(&tt1_lock, 0);
5ba3f43e
A
4532 queue_init(&map_pmap_list);
4533 queue_enter(&map_pmap_list, kernel_pmap, pmap_t, pmaps);
5ba3f43e
A
4534 free_page_size_tt_list = TT_FREE_ENTRY_NULL;
4535 free_page_size_tt_count = 0;
4536 free_page_size_tt_max = 0;
4537 free_two_page_size_tt_list = TT_FREE_ENTRY_NULL;
4538 free_two_page_size_tt_count = 0;
4539 free_two_page_size_tt_max = 0;
4540 free_tt_list = TT_FREE_ENTRY_NULL;
4541 free_tt_count = 0;
4542 free_tt_max = 0;
4543
4544 simple_lock_init(&pt_pages_lock, 0);
4545 queue_init(&pt_page_list);
4546
4547 simple_lock_init(&pmap_pages_lock, 0);
4548 pmap_pages_request_count = 0;
4549 pmap_pages_request_acum = 0;
4550 pmap_pages_reclaim_list = PAGE_FREE_ENTRY_NULL;
4551
4552 virtual_space_start = vstart;
4553 virtual_space_end = VM_MAX_KERNEL_ADDRESS;
4554
cb323159 4555 bitmap_full(&asid_bitmap[0], MAX_ASID);
5ba3f43e 4556
5ba3f43e 4557
d9a64523 4558
0a7de745 4559 if (PE_parse_boot_argn("arm_maxoffset", &maxoffset, sizeof(maxoffset))) {
5ba3f43e
A
4560 maxoffset = trunc_page(maxoffset);
4561 if ((maxoffset >= pmap_max_offset(FALSE, ARM_PMAP_MAX_OFFSET_MIN))
4562 && (maxoffset <= pmap_max_offset(FALSE, ARM_PMAP_MAX_OFFSET_MAX))) {
0a7de745 4563 arm_pmap_max_offset_default = maxoffset;
5ba3f43e
A
4564 }
4565 }
4566#if defined(__arm64__)
0a7de745 4567 if (PE_parse_boot_argn("arm64_maxoffset", &maxoffset, sizeof(maxoffset))) {
5ba3f43e
A
4568 maxoffset = trunc_page(maxoffset);
4569 if ((maxoffset >= pmap_max_offset(TRUE, ARM_PMAP_MAX_OFFSET_MIN))
4570 && (maxoffset <= pmap_max_offset(TRUE, ARM_PMAP_MAX_OFFSET_MAX))) {
0a7de745 4571 arm64_pmap_max_offset_default = maxoffset;
5ba3f43e
A
4572 }
4573 }
4574#endif
4575
4576#if DEVELOPMENT || DEBUG
0a7de745 4577 PE_parse_boot_argn("panic_on_unsigned_execute", &panic_on_unsigned_execute, sizeof(panic_on_unsigned_execute));
5ba3f43e
A
4578#endif /* DEVELOPMENT || DEBUG */
4579
4580 pmap_nesting_size_min = ARM_NESTING_SIZE_MIN;
4581 pmap_nesting_size_max = ARM_NESTING_SIZE_MAX;
4582
4583 simple_lock_init(&phys_backup_lock, 0);
5c9f4661 4584
d9a64523 4585
5c9f4661
A
4586#if MACH_ASSERT
4587 PE_parse_boot_argn("pmap_stats_assert",
0a7de745
A
4588 &pmap_stats_assert,
4589 sizeof(pmap_stats_assert));
d9a64523 4590 PE_parse_boot_argn("vm_footprint_suspend_allowed",
0a7de745
A
4591 &vm_footprint_suspend_allowed,
4592 sizeof(vm_footprint_suspend_allowed));
5c9f4661 4593#endif /* MACH_ASSERT */
a39ff7e2
A
4594
4595#if KASAN
4596 /* Shadow the CPU copy windows, as they fall outside of the physical aperture */
4597 kasan_map_shadow(CPUWINDOWS_BASE, CPUWINDOWS_TOP - CPUWINDOWS_BASE, true);
4598#endif /* KASAN */
5ba3f43e
A
4599}
4600
c6bf4f31
A
4601#if XNU_MONITOR
4602
4603static inline void
4604pa_set_range_monitor(pmap_paddr_t start_pa, pmap_paddr_t end_pa)
4605{
4606 pmap_paddr_t cur_pa;
4607 for (cur_pa = start_pa; cur_pa < end_pa; cur_pa += ARM_PGBYTES) {
4608 assert(pa_valid(cur_pa));
4609 pa_set_monitor(cur_pa);
4610 }
4611}
4612
4613static void
4614pa_set_range_xprr_perm(pmap_paddr_t start_pa,
4615 pmap_paddr_t end_pa,
4616 unsigned int expected_perm,
4617 unsigned int new_perm)
4618{
4619 vm_offset_t start_va = phystokv(start_pa);
4620 vm_offset_t end_va = start_va + (end_pa - start_pa);
4621
4622 pa_set_range_monitor(start_pa, end_pa);
4623 pmap_set_range_xprr_perm(start_va, end_va, expected_perm, new_perm);
4624}
4625
4626void
4627pmap_static_allocations_done(void)
4628{
4629 pmap_paddr_t monitor_start_pa;
4630 pmap_paddr_t monitor_end_pa;
4631
4632 /*
4633 * We allocate memory for bootstrap starting at topOfKernelData (which
4634 * is at the end of the device tree and ramdisk data, if applicable).
4635 * We use avail_start as a pointer to the first address that has not
4636 * been reserved for bootstrap, so we know which pages to give to the
4637 * virtual memory layer.
4638 *
4639 * These bootstrap allocations will be used primarily for page tables.
4640 * If we wish to secure the page tables, we need to start by marking
4641 * these bootstrap allocations as pages that we want to protect.
4642 */
4643 monitor_start_pa = BootArgs->topOfKernelData;
4644 monitor_end_pa = BootArgs->topOfKernelData + BOOTSTRAP_TABLE_SIZE;
4645
ea3f0419
A
4646 /*
4647 * The bootstrap page tables are mapped RO at boostrap.
4648 *
4649 * Note that this function call requests switching XPRR permissions from
4650 * XPRR_KERN_RO_PERM to XPRR_KERN_RO_PERM. Whilst this may seem redundant,
4651 * pa_set_range_xprr_perm() does other things too, such as calling
4652 * pa_set_range_monitor() on the requested address range and performing a number
4653 * of integrity checks on the PTEs. We should still
4654 * call this function for all PPL-owned memory, regardless of whether
4655 * permissions are required to be changed or not.
4656 */
4657 pa_set_range_xprr_perm(monitor_start_pa, monitor_end_pa, XPRR_KERN_RO_PERM, XPRR_KERN_RO_PERM);
c6bf4f31
A
4658
4659 monitor_start_pa = BootArgs->topOfKernelData + BOOTSTRAP_TABLE_SIZE;
4660 monitor_end_pa = avail_start;
4661
4662 /* The other bootstrap allocations are mapped RW at bootstrap. */
4663 pa_set_range_xprr_perm(monitor_start_pa, monitor_end_pa, XPRR_KERN_RW_PERM, XPRR_PPL_RW_PERM);
4664
ea3f0419
A
4665 /*
4666 * The RO page tables are mapped RW at bootstrap and remain RW after the call
4667 * to pa_set_range_xprr_perm(). We do this, as opposed to using XPRR_PPL_RW_PERM,
4668 * to work around a functional issue on H11 devices where CTRR shifts the APRR
4669 * lookup table index to USER_XO before APRR is applied, hence causing the hardware
4670 * to believe we are dealing with an user XO page upon performing a translation.
4671 *
4672 * Note that this workaround does not pose a security risk, because the RO
4673 * page tables still remain read-only, due to KTRR/CTRR, and further protecting
bca245ac 4674 * them would be unnecessary.
ea3f0419 4675 */
c6bf4f31
A
4676 monitor_start_pa = kvtophys((vm_offset_t)&ropagetable_begin);
4677 monitor_end_pa = monitor_start_pa + ((vm_offset_t)&ropagetable_end - (vm_offset_t)&ropagetable_begin);
ea3f0419 4678 pa_set_range_xprr_perm(monitor_start_pa, monitor_end_pa, XPRR_KERN_RW_PERM, XPRR_KERN_RW_PERM);
c6bf4f31
A
4679
4680 monitor_start_pa = kvtophys(segPPLDATAB);
4681 monitor_end_pa = monitor_start_pa + segSizePPLDATA;
4682
4683 /* PPL data is RW for the PPL, RO for the kernel. */
4684 pa_set_range_xprr_perm(monitor_start_pa, monitor_end_pa, XPRR_KERN_RW_PERM, XPRR_PPL_RW_PERM);
4685
4686 monitor_start_pa = kvtophys(segPPLTEXTB);
4687 monitor_end_pa = monitor_start_pa + segSizePPLTEXT;
4688
4689 /* PPL text is RX for the PPL, RO for the kernel. */
4690 pa_set_range_xprr_perm(monitor_start_pa, monitor_end_pa, XPRR_KERN_RX_PERM, XPRR_PPL_RX_PERM);
4691
4692#if __APRR_SUPPORTED__
4693 monitor_start_pa = kvtophys(segPPLTRAMPB);
4694 monitor_end_pa = monitor_start_pa + segSizePPLTRAMP;
4695
4696 /*
4697 * The PPLTRAMP pages will be a mix of PPL RX/kernel RO and
4698 * PPL RX/kernel RX. However, all of these pages belong to the PPL.
4699 */
4700 pa_set_range_monitor(monitor_start_pa, monitor_end_pa);
4701#endif
4702
4703 /*
4704 * In order to support DTrace, the save areas for the PPL must be
4705 * writable. This is due to the fact that DTrace will try to update
4706 * register state.
4707 */
4708 if (pmap_ppl_disable) {
4709 vm_offset_t monitor_start_va = phystokv(ppl_cpu_save_area_start);
4710 vm_offset_t monitor_end_va = monitor_start_va + (ppl_cpu_save_area_end - ppl_cpu_save_area_start);
4711
4712 pmap_set_range_xprr_perm(monitor_start_va, monitor_end_va, XPRR_PPL_RW_PERM, XPRR_KERN_RW_PERM);
4713 }
4714
4715#if __APRR_SUPPORTED__
4716 /* The trampoline must also be specially protected. */
4717 pmap_set_range_xprr_perm((vm_offset_t)&ppl_trampoline_start, (vm_offset_t)&ppl_trampoline_end, XPRR_KERN_RX_PERM, XPRR_PPL_RX_PERM);
4718#endif
4719
4720 if (segSizePPLDATACONST > 0) {
4721 monitor_start_pa = kvtophys(segPPLDATACONSTB);
4722 monitor_end_pa = monitor_start_pa + segSizePPLDATACONST;
4723
ea3f0419 4724 pa_set_range_xprr_perm(monitor_start_pa, monitor_end_pa, XPRR_KERN_RO_PERM, XPRR_KERN_RO_PERM);
c6bf4f31
A
4725 }
4726
4727 /*
4728 * Mark the original physical aperture mapping for the PPL stack pages RO as an additional security
4729 * precaution. The real RW mappings are at a different location with guard pages.
4730 */
ea3f0419 4731 pa_set_range_xprr_perm(pmap_stacks_start_pa, pmap_stacks_end_pa, XPRR_PPL_RW_PERM, XPRR_KERN_RO_PERM);
c6bf4f31
A
4732}
4733
4734
4735void
4736pmap_lockdown_ppl(void)
4737{
4738 /* Mark the PPL as being locked down. */
4739
4740#if __APRR_SUPPORTED__
4741 pmap_ppl_locked_down = TRUE;
4742 /* Force a trap into to the PPL to update APRR_EL1. */
4743 pmap_return(FALSE, FALSE);
4744#else
4745#error "XPRR configuration error"
4746#endif /* __APRR_SUPPORTED__ */
4747
4748}
4749#endif /* XNU_MONITOR */
5ba3f43e
A
4750
4751void
4752pmap_virtual_space(
0a7de745
A
4753 vm_offset_t *startp,
4754 vm_offset_t *endp
4755 )
5ba3f43e
A
4756{
4757 *startp = virtual_space_start;
4758 *endp = virtual_space_end;
4759}
4760
4761
4762boolean_t
4763pmap_virtual_region(
4764 unsigned int region_select,
4765 vm_map_offset_t *startp,
4766 vm_map_size_t *size
0a7de745 4767 )
5ba3f43e 4768{
0a7de745
A
4769 boolean_t ret = FALSE;
4770#if __ARM64_PMAP_SUBPAGE_L1__ && __ARM_16K_PG__
5ba3f43e
A
4771 if (region_select == 0) {
4772 /*
4773 * In this config, the bootstrap mappings should occupy their own L2
4774 * TTs, as they should be immutable after boot. Having the associated
4775 * TTEs and PTEs in their own pages allows us to lock down those pages,
4776 * while allowing the rest of the kernel address range to be remapped.
4777 */
0a7de745 4778#if (__ARM_VMSA__ > 7)
5ba3f43e
A
4779 *startp = LOW_GLOBAL_BASE_ADDRESS & ~ARM_TT_L2_OFFMASK;
4780#else
4781#error Unsupported configuration
4782#endif
4783 *size = ((VM_MAX_KERNEL_ADDRESS - *startp) & ~PAGE_MASK);
4784 ret = TRUE;
4785 }
4786#else
4787#if (__ARM_VMSA__ > 7)
4788 unsigned long low_global_vr_mask = 0;
4789 vm_map_size_t low_global_vr_size = 0;
4790#endif
4791
4792 if (region_select == 0) {
0a7de745 4793#if (__ARM_VMSA__ == 7)
5ba3f43e 4794 *startp = gVirtBase & 0xFFC00000;
0a7de745 4795 *size = ((virtual_space_start - (gVirtBase & 0xFFC00000)) + ~0xFFC00000) & 0xFFC00000;
5ba3f43e
A
4796#else
4797 /* Round to avoid overlapping with the V=P area; round to at least the L2 block size. */
4798 if (!TEST_PAGE_SIZE_4K) {
4799 *startp = gVirtBase & 0xFFFFFFFFFE000000;
0a7de745 4800 *size = ((virtual_space_start - (gVirtBase & 0xFFFFFFFFFE000000)) + ~0xFFFFFFFFFE000000) & 0xFFFFFFFFFE000000;
5ba3f43e
A
4801 } else {
4802 *startp = gVirtBase & 0xFFFFFFFFFF800000;
0a7de745 4803 *size = ((virtual_space_start - (gVirtBase & 0xFFFFFFFFFF800000)) + ~0xFFFFFFFFFF800000) & 0xFFFFFFFFFF800000;
5ba3f43e
A
4804 }
4805#endif
4806 ret = TRUE;
4807 }
4808 if (region_select == 1) {
4809 *startp = VREGION1_START;
4810 *size = VREGION1_SIZE;
4811 ret = TRUE;
4812 }
0a7de745 4813#if (__ARM_VMSA__ > 7)
5ba3f43e
A
4814 /* We need to reserve a range that is at least the size of an L2 block mapping for the low globals */
4815 if (!TEST_PAGE_SIZE_4K) {
4816 low_global_vr_mask = 0xFFFFFFFFFE000000;
4817 low_global_vr_size = 0x2000000;
4818 } else {
4819 low_global_vr_mask = 0xFFFFFFFFFF800000;
4820 low_global_vr_size = 0x800000;
4821 }
4822
0a7de745 4823 if (((gVirtBase & low_global_vr_mask) != LOW_GLOBAL_BASE_ADDRESS) && (region_select == 2)) {
5ba3f43e
A
4824 *startp = LOW_GLOBAL_BASE_ADDRESS;
4825 *size = low_global_vr_size;
4826 ret = TRUE;
4827 }
4828
4829 if (region_select == 3) {
4830 /* In this config, we allow the bootstrap mappings to occupy the same
4831 * page table pages as the heap.
4832 */
4833 *startp = VM_MIN_KERNEL_ADDRESS;
4834 *size = LOW_GLOBAL_BASE_ADDRESS - *startp;
4835 ret = TRUE;
4836 }
4837#endif
4838#endif
4839 return ret;
4840}
4841
4842unsigned int
4843pmap_free_pages(
4844 void)
4845{
4846 return (unsigned int)atop(avail_end - first_avail);
4847}
4848
4849
4850boolean_t
4851pmap_next_page_hi(
cb323159
A
4852 ppnum_t * pnum,
4853 __unused boolean_t might_free)
5ba3f43e
A
4854{
4855 return pmap_next_page(pnum);
4856}
4857
4858
4859boolean_t
4860pmap_next_page(
4861 ppnum_t *pnum)
4862{
4863 if (first_avail != avail_end) {
4864 *pnum = (ppnum_t)atop(first_avail);
4865 first_avail += PAGE_SIZE;
4866 return TRUE;
4867 }
4868 return FALSE;
4869}
4870
4871
4872/*
4873 * Initialize the pmap module.
4874 * Called by vm_init, to initialize any structures that the pmap
4875 * system needs to map virtual memory.
4876 */
4877void
4878pmap_init(
4879 void)
4880{
4881 /*
4882 * Protect page zero in the kernel map.
4883 * (can be overruled by permanent transltion
4884 * table entries at page zero - see arm_vm_init).
4885 */
4886 vm_protect(kernel_map, 0, PAGE_SIZE, TRUE, VM_PROT_NONE);
4887
4888 pmap_initialized = TRUE;
4889
4890 pmap_zone_init();
4891
4892
4893 /*
4894 * Initialize the pmap object (for tracking the vm_page_t
4895 * structures for pages we allocate to be page tables in
4896 * pmap_expand().
4897 */
4898 _vm_object_allocate(mem_size, pmap_object);
4899 pmap_object->copy_strategy = MEMORY_OBJECT_COPY_NONE;
4900
4901 pv_init();
4902
4903 /*
cb323159
A
4904 * The values of [hard_]maxproc may have been scaled, make sure
4905 * they are still less than the value of MAX_ASID.
5ba3f43e 4906 */
cb323159
A
4907 if (maxproc > MAX_ASID) {
4908 maxproc = MAX_ASID;
4909 }
4910 if (hard_maxproc > MAX_ASID) {
4911 hard_maxproc = MAX_ASID;
4912 }
5ba3f43e
A
4913
4914#if CONFIG_PGTRACE
d9a64523 4915 pmap_pgtrace_init();
5ba3f43e
A
4916#endif
4917}
4918
4919boolean_t
4920pmap_verify_free(
4921 ppnum_t ppnum)
4922{
0a7de745 4923 pv_entry_t **pv_h;
5ba3f43e 4924 int pai;
5ba3f43e
A
4925 pmap_paddr_t phys = ptoa(ppnum);
4926
4927 assert(phys != vm_page_fictitious_addr);
4928
0a7de745
A
4929 if (!pa_valid(phys)) {
4930 return FALSE;
4931 }
5ba3f43e
A
4932
4933 pai = (int)pa_index(phys);
4934 pv_h = pai_to_pvh(pai);
4935
0a7de745
A
4936 return pvh_test_type(pv_h, PVH_TYPE_NULL);
4937}
4938
4939#if MACH_ASSERT
4940void
4941pmap_assert_free(ppnum_t ppnum)
4942{
4943 assertf(pmap_verify_free(ppnum), "page = 0x%x", ppnum);
4944 (void)ppnum;
5ba3f43e 4945}
0a7de745 4946#endif
5ba3f43e
A
4947
4948
4949/*
4950 * Initialize zones used by pmap.
4951 */
4952static void
4953pmap_zone_init(
4954 void)
4955{
4956 /*
4957 * Create the zone of physical maps
4958 * and the physical-to-virtual entries.
4959 */
0a7de745
A
4960 pmap_zone = zinit((vm_size_t) sizeof(struct pmap), (vm_size_t) sizeof(struct pmap) * 256,
4961 PAGE_SIZE, "pmap");
5ba3f43e
A
4962}
4963
c6bf4f31
A
4964#if XNU_MONITOR
4965MARK_AS_PMAP_TEXT static void
4966pmap_ledger_alloc_init_internal(size_t size)
4967{
4968 pmap_simple_lock(&pmap_ledger_lock);
4969
4970 if (pmap_ledger_alloc_initialized) {
4971 panic("%s: already initialized, "
4972 "size=%lu",
4973 __func__,
4974 size);
4975 }
4976
4977 if (size != sizeof(pmap_ledger_data_t)) {
4978 panic("%s: size mismatch, expected %lu, "
4979 "size=%lu",
4980 __func__, PMAP_LEDGER_DATA_BYTES,
4981 size);
4982 }
4983
4984 pmap_ledger_alloc_initialized = true;
4985
4986 pmap_simple_unlock(&pmap_ledger_lock);
4987}
4988
4989MARK_AS_PMAP_TEXT static ledger_t
4990pmap_ledger_alloc_internal(void)
4991{
4992 pmap_paddr_t paddr;
4993 uint64_t vaddr, vstart, vend;
4994 uint64_t index;
4995
4996 ledger_t new_ledger;
4997 uint64_t array_index;
4998
4999 pmap_simple_lock(&pmap_ledger_lock);
5000 if (pmap_ledger_free_list == NULL) {
5001 paddr = pmap_get_free_ppl_page();
5002
5003 if (paddr == 0) {
5004 pmap_simple_unlock(&pmap_ledger_lock);
5005 return NULL;
5006 }
5007
5008 vstart = phystokv(paddr);
5009 vend = vstart + PAGE_SIZE;
5010
5011 for (vaddr = vstart; (vaddr < vend) && ((vaddr + sizeof(pmap_ledger_t)) <= vend); vaddr += sizeof(pmap_ledger_t)) {
5012 pmap_ledger_t *free_ledger;
5013
5014 index = pmap_ledger_ptr_array_free_index++;
5015
5016 if (index >= MAX_PMAP_LEDGERS) {
5017 panic("%s: pmap_ledger_ptr_array is full, index=%llu",
5018 __func__, index);
5019 }
5020
5021 free_ledger = (pmap_ledger_t*)vaddr;
5022
5023 pmap_ledger_ptr_array[index] = free_ledger;
5024 free_ledger->back_ptr = &pmap_ledger_ptr_array[index];
5025
5026 free_ledger->next = pmap_ledger_free_list;
5027 pmap_ledger_free_list = free_ledger;
5028 }
5029
5030 pa_set_range_xprr_perm(paddr, paddr + PAGE_SIZE, XPRR_PPL_RW_PERM, XPRR_KERN_RW_PERM);
5031 }
5032
5033 new_ledger = (ledger_t)pmap_ledger_free_list;
5034 pmap_ledger_free_list = pmap_ledger_free_list->next;
5035
5036 array_index = pmap_ledger_validate(new_ledger);
5037 os_ref_init(&pmap_ledger_refcnt[array_index], NULL);
5038
5039 pmap_simple_unlock(&pmap_ledger_lock);
5040
5041 return new_ledger;
5042}
5043
5044MARK_AS_PMAP_TEXT static void
5045pmap_ledger_free_internal(ledger_t ledger)
5046{
5047 pmap_ledger_t* free_ledger;
5048
5049 free_ledger = (pmap_ledger_t*)ledger;
5050
5051 pmap_simple_lock(&pmap_ledger_lock);
5052 uint64_t array_index = pmap_ledger_validate(ledger);
5053
5054 if (os_ref_release(&pmap_ledger_refcnt[array_index]) != 0) {
5055 panic("%s: ledger still referenced, "
5056 "ledger=%p",
5057 __func__,
5058 ledger);
5059 }
5060
5061 free_ledger->next = pmap_ledger_free_list;
5062 pmap_ledger_free_list = free_ledger;
5063 pmap_simple_unlock(&pmap_ledger_lock);
5064}
5065
5066
5067static void
5068pmap_ledger_retain(ledger_t ledger)
5069{
5070 pmap_simple_lock(&pmap_ledger_lock);
5071 uint64_t array_index = pmap_ledger_validate(ledger);
5072 os_ref_retain(&pmap_ledger_refcnt[array_index]);
5073 pmap_simple_unlock(&pmap_ledger_lock);
5074}
5075
5076static void
5077pmap_ledger_release(ledger_t ledger)
5078{
5079 pmap_simple_lock(&pmap_ledger_lock);
5080 uint64_t array_index = pmap_ledger_validate(ledger);
5081 os_ref_release_live(&pmap_ledger_refcnt[array_index]);
5082 pmap_simple_unlock(&pmap_ledger_lock);
5083}
5084
5085void
5086pmap_ledger_alloc_init(size_t size)
5087{
5088 pmap_ledger_alloc_init_ppl(size);
5089}
5090
5091ledger_t
5092pmap_ledger_alloc(void)
5093{
5094 ledger_t retval = NULL;
5095
5096 while ((retval = pmap_ledger_alloc_ppl()) == NULL) {
5097 pmap_alloc_page_for_ppl();
5098 }
5099
5100 return retval;
5101}
5102
5103void
5104pmap_ledger_free(ledger_t ledger)
5105{
5106 pmap_ledger_free_ppl(ledger);
5107}
5108#else /* XNU_MONITOR */
cb323159 5109__dead2
d9a64523
A
5110void
5111pmap_ledger_alloc_init(size_t size)
5112{
5113 panic("%s: unsupported, "
0a7de745
A
5114 "size=%lu",
5115 __func__, size);
d9a64523
A
5116}
5117
cb323159 5118__dead2
d9a64523
A
5119ledger_t
5120pmap_ledger_alloc(void)
5121{
d9a64523 5122 panic("%s: unsupported",
0a7de745 5123 __func__);
d9a64523
A
5124}
5125
cb323159 5126__dead2
d9a64523
A
5127void
5128pmap_ledger_free(ledger_t ledger)
5129{
5130 panic("%s: unsupported, "
0a7de745
A
5131 "ledger=%p",
5132 __func__, ledger);
d9a64523 5133}
c6bf4f31 5134#endif /* XNU_MONITOR */
d9a64523 5135
5ba3f43e
A
5136/*
5137 * Create and return a physical map.
5138 *
5139 * If the size specified for the map
5140 * is zero, the map is an actual physical
5141 * map, and may be referenced by the
5142 * hardware.
5143 *
5144 * If the size specified is non-zero,
5145 * the map will be used in software only, and
5146 * is bounded by that size.
5147 */
d9a64523 5148MARK_AS_PMAP_TEXT static pmap_t
cb323159 5149pmap_create_options_internal(
5ba3f43e
A
5150 ledger_t ledger,
5151 vm_map_size_t size,
cb323159 5152 unsigned int flags)
5ba3f43e
A
5153{
5154 unsigned i;
cb323159 5155 unsigned tte_index_max;
5ba3f43e 5156 pmap_t p;
cb323159
A
5157 bool is_64bit = flags & PMAP_CREATE_64BIT;
5158#if defined(HAS_APPLE_PAC)
5159 bool disable_jop = flags & PMAP_CREATE_DISABLE_JOP;
5160#endif /* defined(HAS_APPLE_PAC) */
5ba3f43e
A
5161
5162 /*
5163 * A software use-only map doesn't even need a pmap.
5164 */
5165 if (size != 0) {
0a7de745 5166 return PMAP_NULL;
5ba3f43e
A
5167 }
5168
c6bf4f31
A
5169#if XNU_MONITOR
5170 if ((p = pmap_alloc_pmap()) == PMAP_NULL) {
5171 return PMAP_NULL;
5172 }
ea3f0419
A
5173
5174 if (ledger) {
5175 pmap_ledger_validate(ledger);
5176 pmap_ledger_retain(ledger);
5177 }
c6bf4f31 5178#else
5ba3f43e
A
5179 /*
5180 * Allocate a pmap struct from the pmap_zone. Then allocate
5181 * the translation table of the right size for the pmap.
5182 */
0a7de745
A
5183 if ((p = (pmap_t) zalloc(pmap_zone)) == PMAP_NULL) {
5184 return PMAP_NULL;
5185 }
c6bf4f31 5186#endif
5ba3f43e 5187
ea3f0419
A
5188 p->ledger = ledger;
5189
cb323159 5190 if (flags & PMAP_CREATE_64BIT) {
5ba3f43e
A
5191 p->min = MACH_VM_MIN_ADDRESS;
5192 p->max = MACH_VM_MAX_ADDRESS;
5193 } else {
5194 p->min = VM_MIN_ADDRESS;
5195 p->max = VM_MAX_ADDRESS;
5196 }
5197
cb323159
A
5198#if defined(HAS_APPLE_PAC)
5199 p->disable_jop = disable_jop;
5200#endif /* defined(HAS_APPLE_PAC) */
5201
d9a64523
A
5202 p->nested_region_true_start = 0;
5203 p->nested_region_true_end = ~0;
5204
cb323159 5205 os_atomic_init(&p->ref_count, 1);
5ba3f43e 5206 p->gc_status = 0;
cb323159 5207 p->stamp = os_atomic_inc(&pmap_stamp, relaxed);
5ba3f43e
A
5208 p->nx_enabled = TRUE;
5209 p->is_64bit = is_64bit;
5210 p->nested = FALSE;
5211 p->nested_pmap = PMAP_NULL;
5212
cb323159
A
5213#if ARM_PARAMETERIZED_PMAP
5214 p->pmap_pt_attr = native_pt_attr;
5215#endif /* ARM_PARAMETERIZED_PMAP */
5216
5217 if (!pmap_get_pt_ops(p)->alloc_id(p)) {
5218 goto id_alloc_fail;
5219 }
5220
5ba3f43e 5221
5ba3f43e
A
5222
5223 PMAP_LOCK_INIT(p);
5ba3f43e
A
5224 memset((void *) &p->stats, 0, sizeof(p->stats));
5225
5226 p->tt_entry_free = (tt_entry_t *)0;
94ff46dc
A
5227 tte_index_max = PMAP_ROOT_ALLOC_SIZE / sizeof(tt_entry_t);
5228
5229#if (__ARM_VMSA__ == 7)
5230 p->tte_index_max = tte_index_max;
5231#endif
5ba3f43e 5232
c6bf4f31
A
5233#if XNU_MONITOR
5234 p->tte = pmap_tt1_allocate(p, PMAP_ROOT_ALLOC_SIZE, PMAP_TT_ALLOCATE_NOWAIT);
5235#else
5ba3f43e 5236 p->tte = pmap_tt1_allocate(p, PMAP_ROOT_ALLOC_SIZE, 0);
c6bf4f31 5237#endif
cb323159
A
5238 if (!(p->tte)) {
5239 goto tt1_alloc_fail;
5240 }
5241
5ba3f43e 5242 p->ttep = ml_static_vtop((vm_offset_t)p->tte);
d9a64523 5243 PMAP_TRACE(3, PMAP_CODE(PMAP__TTE), VM_KERNEL_ADDRHIDE(p), VM_KERNEL_ADDRHIDE(p->min), VM_KERNEL_ADDRHIDE(p->max), p->ttep);
5ba3f43e 5244
5ba3f43e 5245 /* nullify the translation table */
cb323159 5246 for (i = 0; i < tte_index_max; i++) {
5ba3f43e 5247 p->tte[i] = ARM_TTE_TYPE_FAULT;
0a7de745 5248 }
5ba3f43e 5249
cb323159 5250 FLUSH_PTE_RANGE(p->tte, p->tte + tte_index_max);
5ba3f43e
A
5251
5252 /*
5253 * initialize the rest of the structure
5254 */
5255 p->nested_region_grand_addr = 0x0ULL;
5256 p->nested_region_subord_addr = 0x0ULL;
5257 p->nested_region_size = 0x0ULL;
5258 p->nested_region_asid_bitmap = NULL;
5259 p->nested_region_asid_bitmap_size = 0x0UL;
5260
d9a64523
A
5261 p->nested_has_no_bounds_ref = false;
5262 p->nested_no_bounds_refcnt = 0;
5263 p->nested_bounds_set = false;
5264
5265
5ba3f43e 5266#if MACH_ASSERT
5c9f4661 5267 p->pmap_stats_assert = TRUE;
5ba3f43e 5268 p->pmap_pid = 0;
0a7de745 5269 strlcpy(p->pmap_procname, "<nil>", sizeof(p->pmap_procname));
5ba3f43e
A
5270#endif /* MACH_ASSERT */
5271#if DEVELOPMENT || DEBUG
5ba3f43e
A
5272 p->footprint_was_suspended = FALSE;
5273#endif /* DEVELOPMENT || DEBUG */
5274
d9a64523 5275 pmap_simple_lock(&pmaps_lock);
5ba3f43e 5276 queue_enter(&map_pmap_list, p, pmap_t, pmaps);
d9a64523 5277 pmap_simple_unlock(&pmaps_lock);
5ba3f43e 5278
0a7de745 5279 return p;
cb323159
A
5280
5281tt1_alloc_fail:
5282 pmap_get_pt_ops(p)->free_id(p);
5283id_alloc_fail:
c6bf4f31
A
5284#if XNU_MONITOR
5285 pmap_free_pmap(p);
5286
5287 if (ledger) {
5288 pmap_ledger_release(ledger);
5289 }
5290#else
cb323159 5291 zfree(pmap_zone, p);
c6bf4f31 5292#endif
cb323159 5293 return PMAP_NULL;
5ba3f43e
A
5294}
5295
5296pmap_t
cb323159 5297pmap_create_options(
5ba3f43e
A
5298 ledger_t ledger,
5299 vm_map_size_t size,
cb323159 5300 unsigned int flags)
5ba3f43e
A
5301{
5302 pmap_t pmap;
5303
cb323159 5304 PMAP_TRACE(1, PMAP_CODE(PMAP__CREATE) | DBG_FUNC_START, size, flags);
d9a64523
A
5305
5306 ledger_reference(ledger);
5ba3f43e 5307
c6bf4f31
A
5308#if XNU_MONITOR
5309 /*
5310 * TODO: It should be valid for pmap_create_options_internal to fail; we could
5311 * be out of ASIDs.
5312 */
5313 while ((pmap = pmap_create_options_ppl(ledger, size, flags)) == PMAP_NULL) {
5314 pmap_alloc_page_for_ppl();
5315 }
5316#else
cb323159 5317 pmap = pmap_create_options_internal(ledger, size, flags);
c6bf4f31 5318#endif
5ba3f43e 5319
d9a64523
A
5320 if (pmap == PMAP_NULL) {
5321 ledger_dereference(ledger);
5322 }
5323
cb323159 5324 PMAP_TRACE(1, PMAP_CODE(PMAP__CREATE) | DBG_FUNC_END, VM_KERNEL_ADDRHIDE(pmap), PMAP_VASID(pmap), pmap->hw_asid);
5ba3f43e
A
5325
5326 return pmap;
5327}
5328
c6bf4f31
A
5329#if XNU_MONITOR
5330/*
5331 * This symbol remains in place when the PPL is enabled so that the dispatch
5332 * table does not change from development to release configurations.
5333 */
5334#endif
5335#if MACH_ASSERT || XNU_MONITOR
d9a64523 5336MARK_AS_PMAP_TEXT static void
5ba3f43e
A
5337pmap_set_process_internal(
5338 __unused pmap_t pmap,
5339 __unused int pid,
5340 __unused char *procname)
5341{
5342#if MACH_ASSERT
5343 if (pmap == NULL) {
5344 return;
5345 }
5346
d9a64523
A
5347 VALIDATE_PMAP(pmap);
5348
5ba3f43e 5349 pmap->pmap_pid = pid;
0a7de745 5350 strlcpy(pmap->pmap_procname, procname, sizeof(pmap->pmap_procname));
d9a64523 5351 if (pmap_ledgers_panic_leeway) {
5c9f4661
A
5352 /*
5353 * XXX FBDP
d9a64523
A
5354 * Some processes somehow trigger some issues that make
5355 * the pmap stats and ledgers go off track, causing
5c9f4661 5356 * some assertion failures and ledger panics.
d9a64523
A
5357 * Turn off the sanity checks if we allow some ledger leeway
5358 * because of that. We'll still do a final check in
5359 * pmap_check_ledgers() for discrepancies larger than the
5360 * allowed leeway after the address space has been fully
5361 * cleaned up.
5c9f4661
A
5362 */
5363 pmap->pmap_stats_assert = FALSE;
5364 ledger_disable_panic_on_negative(pmap->ledger,
0a7de745 5365 task_ledgers.phys_footprint);
5c9f4661 5366 ledger_disable_panic_on_negative(pmap->ledger,
0a7de745 5367 task_ledgers.internal);
5c9f4661 5368 ledger_disable_panic_on_negative(pmap->ledger,
0a7de745 5369 task_ledgers.internal_compressed);
5c9f4661 5370 ledger_disable_panic_on_negative(pmap->ledger,
0a7de745 5371 task_ledgers.iokit_mapped);
5c9f4661 5372 ledger_disable_panic_on_negative(pmap->ledger,
0a7de745 5373 task_ledgers.alternate_accounting);
5c9f4661 5374 ledger_disable_panic_on_negative(pmap->ledger,
0a7de745 5375 task_ledgers.alternate_accounting_compressed);
5c9f4661
A
5376 }
5377#endif /* MACH_ASSERT */
5ba3f43e 5378}
c6bf4f31 5379#endif /* MACH_ASSERT || XNU_MONITOR */
5ba3f43e
A
5380
5381#if MACH_ASSERT
5382void
5383pmap_set_process(
5384 pmap_t pmap,
5385 int pid,
5386 char *procname)
5387{
c6bf4f31
A
5388#if XNU_MONITOR
5389 pmap_set_process_ppl(pmap, pid, procname);
5390#else
5ba3f43e 5391 pmap_set_process_internal(pmap, pid, procname);
c6bf4f31 5392#endif
5ba3f43e 5393}
cb323159 5394#endif /* MACH_ASSERT */
5ba3f43e
A
5395
5396/*
5397 * We maintain stats and ledgers so that a task's physical footprint is:
5398 * phys_footprint = ((internal - alternate_accounting)
5399 * + (internal_compressed - alternate_accounting_compressed)
5400 * + iokit_mapped
5401 * + purgeable_nonvolatile
5402 * + purgeable_nonvolatile_compressed
5403 * + page_table)
5404 * where "alternate_accounting" includes "iokit" and "purgeable" memory.
5405 */
5406
5ba3f43e
A
5407
5408/*
5409 * Retire the given physical map from service.
5410 * Should only be called if the map contains
5411 * no valid mappings.
5412 */
d9a64523 5413MARK_AS_PMAP_TEXT static void
5ba3f43e
A
5414pmap_destroy_internal(
5415 pmap_t pmap)
5416{
0a7de745 5417 if (pmap == PMAP_NULL) {
5ba3f43e 5418 return;
0a7de745 5419 }
5ba3f43e 5420
d9a64523
A
5421 VALIDATE_PMAP(pmap);
5422
cb323159
A
5423 __unused const pt_attr_t * const pt_attr = pmap_get_pt_attr(pmap);
5424
5425 int32_t ref_count = os_atomic_dec(&pmap->ref_count, relaxed);
0a7de745 5426 if (ref_count > 0) {
5ba3f43e 5427 return;
0a7de745 5428 } else if (ref_count < 0) {
d9a64523 5429 panic("pmap %p: refcount underflow", pmap);
0a7de745 5430 } else if (pmap == kernel_pmap) {
d9a64523 5431 panic("pmap %p: attempt to destroy kernel pmap", pmap);
0a7de745 5432 }
5ba3f43e 5433
d9a64523 5434 pt_entry_t *ttep;
cb323159
A
5435
5436#if (__ARM_VMSA__ > 7)
5437 pmap_unmap_sharedpage(pmap);
5438#endif /* (__ARM_VMSA__ > 7) */
5ba3f43e 5439
d9a64523 5440 pmap_simple_lock(&pmaps_lock);
5ba3f43e
A
5441 while (pmap->gc_status & PMAP_GC_INFLIGHT) {
5442 pmap->gc_status |= PMAP_GC_WAIT;
0a7de745 5443 assert_wait((event_t) &pmap->gc_status, THREAD_UNINT);
d9a64523 5444 pmap_simple_unlock(&pmaps_lock);
0a7de745 5445 (void) thread_block(THREAD_CONTINUE_NULL);
d9a64523 5446 pmap_simple_lock(&pmaps_lock);
5ba3f43e 5447 }
5ba3f43e 5448 queue_remove(&map_pmap_list, pmap, pmap_t, pmaps);
d9a64523
A
5449 pmap_simple_unlock(&pmaps_lock);
5450
d9a64523 5451 pmap_trim_self(pmap);
5ba3f43e
A
5452
5453 /*
5454 * Free the memory maps, then the
5455 * pmap structure.
5456 */
cb323159
A
5457#if (__ARM_VMSA__ == 7)
5458 unsigned int i = 0;
5459
5ba3f43e
A
5460 PMAP_LOCK(pmap);
5461 for (i = 0; i < pmap->tte_index_max; i++) {
5462 ttep = &pmap->tte[i];
5463 if ((*ttep & ARM_TTE_TYPE_MASK) == ARM_TTE_TYPE_TABLE) {
5464 pmap_tte_deallocate(pmap, ttep, PMAP_TT_L1_LEVEL);
5ba3f43e
A
5465 }
5466 }
5467 PMAP_UNLOCK(pmap);
cb323159
A
5468#else /* (__ARM_VMSA__ == 7) */
5469 vm_map_address_t c;
5470 unsigned int level;
5471
5472 for (level = pt_attr->pta_max_level - 1; level >= pt_attr->pta_root_level; level--) {
5473 for (c = pmap->min; c < pmap->max; c += pt_attr_ln_size(pt_attr, level)) {
5474 ttep = pmap_ttne(pmap, level, c);
5475
5476 if ((ttep != PT_ENTRY_NULL) && (*ttep & ARM_TTE_TYPE_MASK) == ARM_TTE_TYPE_TABLE) {
5477 PMAP_LOCK(pmap);
5478 pmap_tte_deallocate(pmap, ttep, level);
5479 PMAP_UNLOCK(pmap);
5480 }
5481 }
5482 }
5483#endif /* (__ARM_VMSA__ == 7) */
5484
5485
5ba3f43e 5486
d9a64523 5487 if (pmap->tte) {
cb323159 5488#if (__ARM_VMSA__ == 7)
0a7de745 5489 pmap_tt1_deallocate(pmap, pmap->tte, pmap->tte_index_max * sizeof(tt_entry_t), 0);
cb323159
A
5490 pmap->tte_index_max = 0;
5491#else /* (__ARM_VMSA__ == 7) */
5492 pmap_tt1_deallocate(pmap, pmap->tte, PMAP_ROOT_ALLOC_SIZE, 0);
5493#endif /* (__ARM_VMSA__ == 7) */
d9a64523
A
5494 pmap->tte = (tt_entry_t *) NULL;
5495 pmap->ttep = 0;
d9a64523 5496 }
cb323159 5497
d9a64523 5498 assert((tt_free_entry_t*)pmap->tt_entry_free == NULL);
5ba3f43e 5499
cb323159
A
5500 pmap_get_pt_ops(pmap)->flush_tlb_async(pmap);
5501 sync_tlb_flush();
5502
d9a64523 5503 /* return its asid to the pool */
cb323159 5504 pmap_get_pt_ops(pmap)->free_id(pmap);
d9a64523 5505 pmap_check_ledgers(pmap);
5ba3f43e 5506
d9a64523 5507 if (pmap->nested_region_asid_bitmap) {
c6bf4f31
A
5508#if XNU_MONITOR
5509 pmap_pages_free(kvtophys((vm_offset_t)(pmap->nested_region_asid_bitmap)), PAGE_SIZE);
5510#else
0a7de745 5511 kfree(pmap->nested_region_asid_bitmap, pmap->nested_region_asid_bitmap_size * sizeof(unsigned int));
c6bf4f31 5512#endif
d9a64523 5513 }
5ba3f43e 5514
c6bf4f31
A
5515#if XNU_MONITOR
5516 if (pmap->ledger) {
5517 pmap_ledger_release(pmap->ledger);
5518 }
5519
5520 pmap_free_pmap(pmap);
5521#else
d9a64523 5522 zfree(pmap_zone, pmap);
c6bf4f31 5523#endif
5ba3f43e
A
5524}
5525
5526void
5527pmap_destroy(
5528 pmap_t pmap)
5529{
d9a64523
A
5530 ledger_t ledger;
5531
cb323159 5532 PMAP_TRACE(1, PMAP_CODE(PMAP__DESTROY) | DBG_FUNC_START, VM_KERNEL_ADDRHIDE(pmap), PMAP_VASID(pmap), pmap->hw_asid);
d9a64523
A
5533
5534 ledger = pmap->ledger;
5ba3f43e 5535
c6bf4f31
A
5536#if XNU_MONITOR
5537 pmap_destroy_ppl(pmap);
5538
5539 pmap_check_ledger_fields(ledger);
5540#else
5ba3f43e 5541 pmap_destroy_internal(pmap);
c6bf4f31 5542#endif
5ba3f43e 5543
d9a64523
A
5544 ledger_dereference(ledger);
5545
5546 PMAP_TRACE(1, PMAP_CODE(PMAP__DESTROY) | DBG_FUNC_END);
5ba3f43e
A
5547}
5548
5549
5550/*
5551 * Add a reference to the specified pmap.
5552 */
d9a64523 5553MARK_AS_PMAP_TEXT static void
5ba3f43e
A
5554pmap_reference_internal(
5555 pmap_t pmap)
5556{
5557 if (pmap != PMAP_NULL) {
d9a64523 5558 VALIDATE_PMAP(pmap);
cb323159 5559 os_atomic_inc(&pmap->ref_count, relaxed);
5ba3f43e
A
5560 }
5561}
5562
5563void
5564pmap_reference(
5565 pmap_t pmap)
5566{
c6bf4f31
A
5567#if XNU_MONITOR
5568 pmap_reference_ppl(pmap);
5569#else
5ba3f43e 5570 pmap_reference_internal(pmap);
c6bf4f31 5571#endif
5ba3f43e
A
5572}
5573
5574static tt_entry_t *
5575pmap_tt1_allocate(
0a7de745
A
5576 pmap_t pmap,
5577 vm_size_t size,
5578 unsigned option)
5ba3f43e 5579{
cb323159 5580 tt_entry_t *tt1 = NULL;
0a7de745
A
5581 tt_free_entry_t *tt1_free;
5582 pmap_paddr_t pa;
5583 vm_address_t va;
5584 vm_address_t va_end;
5585 kern_return_t ret;
5ba3f43e 5586
cb323159 5587 pmap_simple_lock(&tt1_lock);
5ba3f43e 5588 if ((size == PAGE_SIZE) && (free_page_size_tt_count != 0)) {
0a7de745
A
5589 free_page_size_tt_count--;
5590 tt1 = (tt_entry_t *)free_page_size_tt_list;
5591 free_page_size_tt_list = ((tt_free_entry_t *)tt1)->next;
cb323159 5592 } else if ((size == 2 * PAGE_SIZE) && (free_two_page_size_tt_count != 0)) {
0a7de745
A
5593 free_two_page_size_tt_count--;
5594 tt1 = (tt_entry_t *)free_two_page_size_tt_list;
5595 free_two_page_size_tt_list = ((tt_free_entry_t *)tt1)->next;
cb323159 5596 } else if ((size < PAGE_SIZE) && (free_tt_count != 0)) {
0a7de745
A
5597 free_tt_count--;
5598 tt1 = (tt_entry_t *)free_tt_list;
5599 free_tt_list = (tt_free_entry_t *)((tt_free_entry_t *)tt1)->next;
cb323159
A
5600 }
5601
5602 pmap_simple_unlock(&tt1_lock);
5603
5604 if (tt1 != NULL) {
0a7de745
A
5605 pmap_tt_ledger_credit(pmap, size);
5606 return (tt_entry_t *)tt1;
5ba3f43e
A
5607 }
5608
5ba3f43e
A
5609 ret = pmap_pages_alloc(&pa, (unsigned)((size < PAGE_SIZE)? PAGE_SIZE : size), ((option & PMAP_TT_ALLOCATE_NOWAIT)? PMAP_PAGES_ALLOCATE_NOWAIT : 0));
5610
0a7de745 5611 if (ret == KERN_RESOURCE_SHORTAGE) {
5ba3f43e 5612 return (tt_entry_t *)0;
0a7de745 5613 }
5ba3f43e 5614
c6bf4f31
A
5615#if XNU_MONITOR
5616 assert(pa);
5617#endif
5ba3f43e
A
5618
5619 if (size < PAGE_SIZE) {
cb323159
A
5620 va = phystokv(pa) + size;
5621 tt_free_entry_t *local_free_list = (tt_free_entry_t*)va;
5622 tt_free_entry_t *next_free = NULL;
5623 for (va_end = phystokv(pa) + PAGE_SIZE; va < va_end; va = va + size) {
5ba3f43e 5624 tt1_free = (tt_free_entry_t *)va;
cb323159
A
5625 tt1_free->next = next_free;
5626 next_free = tt1_free;
5ba3f43e 5627 }
cb323159
A
5628 pmap_simple_lock(&tt1_lock);
5629 local_free_list->next = free_tt_list;
5630 free_tt_list = next_free;
5631 free_tt_count += ((PAGE_SIZE / size) - 1);
0a7de745 5632 if (free_tt_count > free_tt_max) {
5ba3f43e 5633 free_tt_max = free_tt_count;
0a7de745 5634 }
cb323159 5635 pmap_simple_unlock(&tt1_lock);
5ba3f43e
A
5636 }
5637
5638 /* Always report root allocations in units of PMAP_ROOT_ALLOC_SIZE, which can be obtained by sysctl arm_pt_root_size.
5639 * Depending on the device, this can vary between 512b and 16K. */
5640 OSAddAtomic((uint32_t)(size / PMAP_ROOT_ALLOC_SIZE), (pmap == kernel_pmap ? &inuse_kernel_tteroot_count : &inuse_user_tteroot_count));
5641 OSAddAtomic64(size / PMAP_ROOT_ALLOC_SIZE, &alloc_tteroot_count);
5642 pmap_tt_ledger_credit(pmap, size);
5643
5644 return (tt_entry_t *) phystokv(pa);
5645}
5646
5647static void
5648pmap_tt1_deallocate(
5649 pmap_t pmap,
5650 tt_entry_t *tt,
5651 vm_size_t size,
5652 unsigned option)
5653{
0a7de745 5654 tt_free_entry_t *tt_entry;
5ba3f43e
A
5655
5656 tt_entry = (tt_free_entry_t *)tt;
cb323159
A
5657 assert(not_in_kdp);
5658 pmap_simple_lock(&tt1_lock);
5ba3f43e 5659
0a7de745 5660 if (size < PAGE_SIZE) {
5ba3f43e 5661 free_tt_count++;
0a7de745 5662 if (free_tt_count > free_tt_max) {
5ba3f43e 5663 free_tt_max = free_tt_count;
0a7de745 5664 }
5ba3f43e
A
5665 tt_entry->next = free_tt_list;
5666 free_tt_list = tt_entry;
5667 }
5668
5669 if (size == PAGE_SIZE) {
5670 free_page_size_tt_count++;
0a7de745 5671 if (free_page_size_tt_count > free_page_size_tt_max) {
5ba3f43e 5672 free_page_size_tt_max = free_page_size_tt_count;
0a7de745 5673 }
5ba3f43e
A
5674 tt_entry->next = free_page_size_tt_list;
5675 free_page_size_tt_list = tt_entry;
5676 }
5677
0a7de745 5678 if (size == 2 * PAGE_SIZE) {
5ba3f43e 5679 free_two_page_size_tt_count++;
0a7de745 5680 if (free_two_page_size_tt_count > free_two_page_size_tt_max) {
5ba3f43e 5681 free_two_page_size_tt_max = free_two_page_size_tt_count;
0a7de745 5682 }
5ba3f43e
A
5683 tt_entry->next = free_two_page_size_tt_list;
5684 free_two_page_size_tt_list = tt_entry;
5685 }
5686
cb323159
A
5687 if (option & PMAP_TT_DEALLOCATE_NOBLOCK) {
5688 pmap_simple_unlock(&tt1_lock);
5ba3f43e
A
5689 pmap_tt_ledger_debit(pmap, size);
5690 return;
5691 }
5692
5693 while (free_page_size_tt_count > FREE_PAGE_SIZE_TT_MAX) {
5ba3f43e
A
5694 free_page_size_tt_count--;
5695 tt = (tt_entry_t *)free_page_size_tt_list;
5696 free_page_size_tt_list = ((tt_free_entry_t *)tt)->next;
5697
cb323159 5698 pmap_simple_unlock(&tt1_lock);
5ba3f43e
A
5699
5700 pmap_pages_free(ml_static_vtop((vm_offset_t)tt), PAGE_SIZE);
5701
5702 OSAddAtomic(-(int32_t)(PAGE_SIZE / PMAP_ROOT_ALLOC_SIZE), (pmap == kernel_pmap ? &inuse_kernel_tteroot_count : &inuse_user_tteroot_count));
5703
cb323159 5704 pmap_simple_lock(&tt1_lock);
5ba3f43e
A
5705 }
5706
5707 while (free_two_page_size_tt_count > FREE_TWO_PAGE_SIZE_TT_MAX) {
5708 free_two_page_size_tt_count--;
5709 tt = (tt_entry_t *)free_two_page_size_tt_list;
5710 free_two_page_size_tt_list = ((tt_free_entry_t *)tt)->next;
5711
cb323159 5712 pmap_simple_unlock(&tt1_lock);
5ba3f43e 5713
0a7de745 5714 pmap_pages_free(ml_static_vtop((vm_offset_t)tt), 2 * PAGE_SIZE);
5ba3f43e
A
5715
5716 OSAddAtomic(-2 * (int32_t)(PAGE_SIZE / PMAP_ROOT_ALLOC_SIZE), (pmap == kernel_pmap ? &inuse_kernel_tteroot_count : &inuse_user_tteroot_count));
5717
cb323159 5718 pmap_simple_lock(&tt1_lock);
5ba3f43e 5719 }
cb323159 5720 pmap_simple_unlock(&tt1_lock);
5ba3f43e
A
5721 pmap_tt_ledger_debit(pmap, size);
5722}
5723
5724static kern_return_t
5725pmap_tt_allocate(
5726 pmap_t pmap,
5727 tt_entry_t **ttp,
5728 unsigned int level,
5729 unsigned int options)
5730{
5731 pmap_paddr_t pa;
5732 *ttp = NULL;
5733
5734 PMAP_LOCK(pmap);
0a7de745 5735 if ((tt_free_entry_t *)pmap->tt_entry_free != NULL) {
5ba3f43e
A
5736 tt_free_entry_t *tt_free_next;
5737
5738 tt_free_next = ((tt_free_entry_t *)pmap->tt_entry_free)->next;
5739 *ttp = (tt_entry_t *)pmap->tt_entry_free;
5740 pmap->tt_entry_free = (tt_entry_t *)tt_free_next;
5741 }
5742 PMAP_UNLOCK(pmap);
5743
5744 if (*ttp == NULL) {
0a7de745 5745 pt_desc_t *ptdp;
5ba3f43e
A
5746
5747 /*
5748 * Allocate a VM page for the level x page table entries.
5749 */
5750 while (pmap_pages_alloc(&pa, PAGE_SIZE, ((options & PMAP_TT_ALLOCATE_NOWAIT)? PMAP_PAGES_ALLOCATE_NOWAIT : 0)) != KERN_SUCCESS) {
0a7de745
A
5751 if (options & PMAP_OPTIONS_NOWAIT) {
5752 return KERN_RESOURCE_SHORTAGE;
5753 }
5754 VM_PAGE_WAIT();
5755 }
5756
5757 while ((ptdp = ptd_alloc(pmap, false)) == NULL) {
5758 if (options & PMAP_OPTIONS_NOWAIT) {
5759 pmap_pages_free(pa, PAGE_SIZE);
5ba3f43e
A
5760 return KERN_RESOURCE_SHORTAGE;
5761 }
5762 VM_PAGE_WAIT();
5763 }
5764
5765 if (level < PMAP_TT_MAX_LEVEL) {
5766 OSAddAtomic64(1, &alloc_ttepages_count);
5767 OSAddAtomic(1, (pmap == kernel_pmap ? &inuse_kernel_ttepages_count : &inuse_user_ttepages_count));
5768 } else {
5769 OSAddAtomic64(1, &alloc_ptepages_count);
5770 OSAddAtomic(1, (pmap == kernel_pmap ? &inuse_kernel_ptepages_count : &inuse_user_ptepages_count));
5771 }
5772
5773 pmap_tt_ledger_credit(pmap, PAGE_SIZE);
5774
5775 PMAP_ZINFO_PALLOC(pmap, PAGE_SIZE);
5776
d9a64523 5777 pvh_update_head_unlocked(pai_to_pvh(pa_index(pa)), ptdp, PVH_TYPE_PTDP);
5ba3f43e
A
5778
5779 __unreachable_ok_push
5780 if (TEST_PAGE_RATIO_4) {
0a7de745
A
5781 vm_address_t va;
5782 vm_address_t va_end;
5ba3f43e
A
5783
5784 PMAP_LOCK(pmap);
5785
0a7de745 5786 for (va_end = phystokv(pa) + PAGE_SIZE, va = phystokv(pa) + ARM_PGBYTES; va < va_end; va = va + ARM_PGBYTES) {
5ba3f43e
A
5787 ((tt_free_entry_t *)va)->next = (tt_free_entry_t *)pmap->tt_entry_free;
5788 pmap->tt_entry_free = (tt_entry_t *)va;
5789 }
5790 PMAP_UNLOCK(pmap);
5791 }
5792 __unreachable_ok_pop
5793
5794 *ttp = (tt_entry_t *)phystokv(pa);
5795 }
5796
c6bf4f31
A
5797#if XNU_MONITOR
5798 assert(*ttp);
5799#endif
5ba3f43e
A
5800
5801 return KERN_SUCCESS;
5802}
5803
5804
5805static void
5806pmap_tt_deallocate(
5807 pmap_t pmap,
5808 tt_entry_t *ttp,
5809 unsigned int level)
5810{
5811 pt_desc_t *ptdp;
5812 unsigned pt_acc_cnt;
5813 unsigned i, max_pt_index = PAGE_RATIO;
0a7de745 5814 vm_offset_t free_page = 0;
5ba3f43e
A
5815
5816 PMAP_LOCK(pmap);
5817
5818 ptdp = ptep_get_ptd((vm_offset_t)ttp);
5819
cb323159 5820 ptdp->ptd_info[ARM_PT_DESC_INDEX(ttp)].va = (vm_offset_t)-1;
5ba3f43e 5821
cb323159
A
5822 if ((level < PMAP_TT_MAX_LEVEL) && (ptdp->ptd_info[ARM_PT_DESC_INDEX(ttp)].refcnt == PT_DESC_REFCOUNT)) {
5823 ptdp->ptd_info[ARM_PT_DESC_INDEX(ttp)].refcnt = 0;
0a7de745 5824 }
5ba3f43e 5825
cb323159
A
5826 if (ptdp->ptd_info[ARM_PT_DESC_INDEX(ttp)].refcnt != 0) {
5827 panic("pmap_tt_deallocate(): ptdp %p, count %d\n", ptdp, ptdp->ptd_info[ARM_PT_DESC_INDEX(ttp)].refcnt);
0a7de745 5828 }
5ba3f43e 5829
cb323159 5830 ptdp->ptd_info[ARM_PT_DESC_INDEX(ttp)].refcnt = 0;
d9a64523 5831
0a7de745 5832 for (i = 0, pt_acc_cnt = 0; i < max_pt_index; i++) {
cb323159 5833 pt_acc_cnt += ptdp->ptd_info[i].refcnt;
0a7de745 5834 }
5ba3f43e
A
5835
5836 if (pt_acc_cnt == 0) {
5837 tt_free_entry_t *tt_free_list = (tt_free_entry_t *)&pmap->tt_entry_free;
5838 unsigned pt_free_entry_cnt = 1;
5839
5840 while (pt_free_entry_cnt < max_pt_index && tt_free_list) {
5841 tt_free_entry_t *tt_free_list_next;
5842
5843 tt_free_list_next = tt_free_list->next;
5844 if ((((vm_offset_t)tt_free_list_next) - ((vm_offset_t)ttp & ~PAGE_MASK)) < PAGE_SIZE) {
5845 pt_free_entry_cnt++;
5846 }
5847 tt_free_list = tt_free_list_next;
5848 }
5849 if (pt_free_entry_cnt == max_pt_index) {
5850 tt_free_entry_t *tt_free_list_cur;
5851
5852 free_page = (vm_offset_t)ttp & ~PAGE_MASK;
5853 tt_free_list = (tt_free_entry_t *)&pmap->tt_entry_free;
5854 tt_free_list_cur = (tt_free_entry_t *)&pmap->tt_entry_free;
5855
5856 while (tt_free_list_cur) {
5857 tt_free_entry_t *tt_free_list_next;
5858
5859 tt_free_list_next = tt_free_list_cur->next;
5860 if ((((vm_offset_t)tt_free_list_next) - free_page) < PAGE_SIZE) {
5861 tt_free_list->next = tt_free_list_next->next;
5862 } else {
5863 tt_free_list = tt_free_list_next;
5864 }
5865 tt_free_list_cur = tt_free_list_next;
5866 }
5867 } else {
5868 ((tt_free_entry_t *)ttp)->next = (tt_free_entry_t *)pmap->tt_entry_free;
5869 pmap->tt_entry_free = ttp;
5870 }
5871 } else {
5872 ((tt_free_entry_t *)ttp)->next = (tt_free_entry_t *)pmap->tt_entry_free;
5873 pmap->tt_entry_free = ttp;
5874 }
5875
5876 PMAP_UNLOCK(pmap);
5877
5878 if (free_page != 0) {
5ba3f43e
A
5879 ptd_deallocate(ptep_get_ptd((vm_offset_t)free_page));
5880 *(pt_desc_t **)pai_to_pvh(pa_index(ml_static_vtop(free_page))) = NULL;
5881 pmap_pages_free(ml_static_vtop(free_page), PAGE_SIZE);
0a7de745 5882 if (level < PMAP_TT_MAX_LEVEL) {
5ba3f43e 5883 OSAddAtomic(-1, (pmap == kernel_pmap ? &inuse_kernel_ttepages_count : &inuse_user_ttepages_count));
0a7de745 5884 } else {
5ba3f43e 5885 OSAddAtomic(-1, (pmap == kernel_pmap ? &inuse_kernel_ptepages_count : &inuse_user_ptepages_count));
0a7de745 5886 }
5ba3f43e
A
5887 PMAP_ZINFO_PFREE(pmap, PAGE_SIZE);
5888 pmap_tt_ledger_debit(pmap, PAGE_SIZE);
5889 }
5890}
5891
5892static void
d9a64523 5893pmap_tte_remove(
5ba3f43e
A
5894 pmap_t pmap,
5895 tt_entry_t *ttep,
5896 unsigned int level)
5897{
d9a64523 5898 tt_entry_t tte = *ttep;
5ba3f43e
A
5899
5900 if (tte == 0) {
5901 panic("pmap_tte_deallocate(): null tt_entry ttep==%p\n", ttep);
5902 }
5903
cb323159 5904 if (((level + 1) == PMAP_TT_MAX_LEVEL) && (tte_get_ptd(tte)->ptd_info[ARM_PT_DESC_INDEX(ttetokv(*ttep))].refcnt != 0)) {
5ba3f43e 5905 panic("pmap_tte_deallocate(): pmap=%p ttep=%p ptd=%p refcnt=0x%x \n", pmap, ttep,
cb323159 5906 tte_get_ptd(tte), (tte_get_ptd(tte)->ptd_info[ARM_PT_DESC_INDEX(ttetokv(*ttep))].refcnt));
5ba3f43e
A
5907 }
5908
0a7de745 5909#if (__ARM_VMSA__ == 7)
5ba3f43e
A
5910 {
5911 tt_entry_t *ttep_4M = (tt_entry_t *) ((vm_offset_t)ttep & 0xFFFFFFF0);
5912 unsigned i;
5913
0a7de745 5914 for (i = 0; i < 4; i++, ttep_4M++) {
5ba3f43e 5915 *ttep_4M = (tt_entry_t) 0;
0a7de745 5916 }
d9a64523 5917 FLUSH_PTE_RANGE_STRONG(ttep_4M - 4, ttep_4M);
5ba3f43e
A
5918 }
5919#else
5920 *ttep = (tt_entry_t) 0;
d9a64523 5921 FLUSH_PTE_STRONG(ttep);
5ba3f43e 5922#endif
d9a64523 5923}
5ba3f43e 5924
d9a64523
A
5925static void
5926pmap_tte_deallocate(
5927 pmap_t pmap,
5928 tt_entry_t *ttep,
5929 unsigned int level)
5930{
5931 pmap_paddr_t pa;
5932 tt_entry_t tte;
5933
5934 PMAP_ASSERT_LOCKED(pmap);
5935
5936 tte = *ttep;
5937
5938#if MACH_ASSERT
5939 if (tte_get_ptd(tte)->pmap != pmap) {
5940 panic("pmap_tte_deallocate(): ptd=%p ptd->pmap=%p pmap=%p \n",
0a7de745 5941 tte_get_ptd(tte), tte_get_ptd(tte)->pmap, pmap);
d9a64523 5942 }
5ba3f43e 5943#endif
d9a64523
A
5944
5945 pmap_tte_remove(pmap, ttep, level);
5946
5ba3f43e 5947 if ((tte & ARM_TTE_TYPE_MASK) == ARM_TTE_TYPE_TABLE) {
0a7de745 5948#if MACH_ASSERT
5ba3f43e 5949 {
0a7de745
A
5950 pt_entry_t *pte_p = ((pt_entry_t *) (ttetokv(tte) & ~ARM_PGMASK));
5951 unsigned i;
5ba3f43e 5952
0a7de745 5953 for (i = 0; i < (ARM_PGBYTES / sizeof(*pte_p)); i++, pte_p++) {
cb323159 5954 if (ARM_PTE_IS_COMPRESSED(*pte_p, pte_p)) {
5ba3f43e 5955 panic("pmap_tte_deallocate: tte=0x%llx pmap=%p, pte_p=%p, pte=0x%llx compressed\n",
0a7de745 5956 (uint64_t)tte, pmap, pte_p, (uint64_t)(*pte_p));
5ba3f43e
A
5957 } else if (((*pte_p) & ARM_PTE_TYPE_MASK) != ARM_PTE_TYPE_FAULT) {
5958 panic("pmap_tte_deallocate: tte=0x%llx pmap=%p, pte_p=%p, pte=0x%llx\n",
0a7de745 5959 (uint64_t)tte, pmap, pte_p, (uint64_t)(*pte_p));
5ba3f43e
A
5960 }
5961 }
5962 }
5963#endif
5964 PMAP_UNLOCK(pmap);
5965
5966 /* Clear any page offset: we mean to free the whole page, but armv7 TTEs may only be
5967 * aligned on 1K boundaries. We clear the surrounding "chunk" of 4 TTEs above. */
5968 pa = tte_to_pa(tte) & ~ARM_PGMASK;
0a7de745 5969 pmap_tt_deallocate(pmap, (tt_entry_t *) phystokv(pa), level + 1);
5ba3f43e
A
5970 PMAP_LOCK(pmap);
5971 }
5972}
5973
5974/*
5975 * Remove a range of hardware page-table entries.
5976 * The entries given are the first (inclusive)
5977 * and last (exclusive) entries for the VM pages.
5978 * The virtual address is the va for the first pte.
5979 *
5980 * The pmap must be locked.
5981 * If the pmap is not the kernel pmap, the range must lie
5982 * entirely within one pte-page. This is NOT checked.
5983 * Assumes that the pte-page exists.
5984 *
5985 * Returns the number of PTE changed, and sets *rmv_cnt
5986 * to the number of SPTE changed.
5987 */
5988static int
5989pmap_remove_range(
5990 pmap_t pmap,
5991 vm_map_address_t va,
5992 pt_entry_t *bpte,
5993 pt_entry_t *epte,
5994 uint32_t *rmv_cnt)
5995{
cb323159
A
5996 bool need_strong_sync = false;
5997 int num_changed = pmap_remove_range_options(pmap, va, bpte, epte, rmv_cnt,
5998 &need_strong_sync, PMAP_OPTIONS_REMOVE);
5999 if (num_changed > 0) {
6000 PMAP_UPDATE_TLBS(pmap, va, va + (PAGE_SIZE * (epte - bpte)), need_strong_sync);
6001 }
6002 return num_changed;
5ba3f43e
A
6003}
6004
d9a64523
A
6005
6006#ifdef PVH_FLAG_EXEC
6007
6008/*
6009 * Update the access protection bits of the physical aperture mapping for a page.
6010 * This is useful, for example, in guranteeing that a verified executable page
6011 * has no writable mappings anywhere in the system, including the physical
6012 * aperture. flush_tlb_async can be set to true to avoid unnecessary TLB
6013 * synchronization overhead in cases where the call to this function is
6014 * guaranteed to be followed by other TLB operations.
6015 */
6016static void
6017pmap_set_ptov_ap(unsigned int pai __unused, unsigned int ap __unused, boolean_t flush_tlb_async __unused)
6018{
6019#if __ARM_PTE_PHYSMAP__
6020 ASSERT_PVH_LOCKED(pai);
6021 vm_offset_t kva = phystokv(vm_first_phys + (pmap_paddr_t)ptoa(pai));
6022 pt_entry_t *pte_p = pmap_pte(kernel_pmap, kva);
6023
6024 pt_entry_t tmplate = *pte_p;
0a7de745 6025 if ((tmplate & ARM_PTE_APMASK) == ARM_PTE_AP(ap)) {
d9a64523 6026 return;
0a7de745 6027 }
d9a64523
A
6028 tmplate = (tmplate & ~ARM_PTE_APMASK) | ARM_PTE_AP(ap);
6029#if (__ARM_VMSA__ > 7)
6030 if (tmplate & ARM_PTE_HINT_MASK) {
6031 panic("%s: physical aperture PTE %p has hint bit set, va=%p, pte=0x%llx",
0a7de745 6032 __func__, pte_p, (void *)kva, tmplate);
d9a64523
A
6033 }
6034#endif
6035 WRITE_PTE_STRONG(pte_p, tmplate);
6036 flush_mmu_tlb_region_asid_async(kva, PAGE_SIZE, kernel_pmap);
0a7de745 6037 if (!flush_tlb_async) {
d9a64523 6038 sync_tlb_flush();
0a7de745 6039 }
d9a64523
A
6040#endif
6041}
6042
6043#endif /* defined(PVH_FLAG_EXEC) */
6044
6045static void
6046pmap_remove_pv(
6047 pmap_t pmap,
6048 pt_entry_t *cpte,
6049 int pai,
6050 int *num_internal,
6051 int *num_alt_internal,
6052 int *num_reusable,
6053 int *num_external)
6054{
6055 pv_entry_t **pv_h, **pve_pp;
6056 pv_entry_t *pve_p;
6057
6058 ASSERT_PVH_LOCKED(pai);
6059 pv_h = pai_to_pvh(pai);
6060 vm_offset_t pvh_flags = pvh_get_flags(pv_h);
6061
c6bf4f31
A
6062#if XNU_MONITOR
6063 if (pvh_flags & PVH_FLAG_LOCKDOWN) {
6064 panic("%d is locked down (%#lx), cannot remove", pai, pvh_flags);
6065 }
6066#endif
d9a64523
A
6067
6068 if (pvh_test_type(pv_h, PVH_TYPE_PTEP)) {
0a7de745 6069 if (__builtin_expect((cpte != pvh_ptep(pv_h)), 0)) {
d9a64523 6070 panic("%s: cpte=%p does not match pv_h=%p (%p), pai=0x%x\n", __func__, cpte, pv_h, pvh_ptep(pv_h), pai);
0a7de745 6071 }
d9a64523
A
6072 if (IS_ALTACCT_PAGE(pai, PV_ENTRY_NULL)) {
6073 assert(IS_INTERNAL_PAGE(pai));
6074 (*num_internal)++;
6075 (*num_alt_internal)++;
6076 CLR_ALTACCT_PAGE(pai, PV_ENTRY_NULL);
6077 } else if (IS_INTERNAL_PAGE(pai)) {
6078 if (IS_REUSABLE_PAGE(pai)) {
6079 (*num_reusable)++;
6080 } else {
6081 (*num_internal)++;
6082 }
6083 } else {
6084 (*num_external)++;
6085 }
6086 pvh_update_head(pv_h, PV_ENTRY_NULL, PVH_TYPE_NULL);
6087 } else if (pvh_test_type(pv_h, PVH_TYPE_PVEP)) {
d9a64523
A
6088 pve_pp = pv_h;
6089 pve_p = pvh_list(pv_h);
6090
6091 while (pve_p != PV_ENTRY_NULL &&
0a7de745 6092 (pve_get_ptep(pve_p) != cpte)) {
d9a64523
A
6093 pve_pp = pve_link_field(pve_p);
6094 pve_p = PVE_NEXT_PTR(pve_next(pve_p));
6095 }
6096
0a7de745 6097 if (__builtin_expect((pve_p == PV_ENTRY_NULL), 0)) {
d9a64523 6098 panic("%s: cpte=%p (pai=0x%x) not in pv_h=%p\n", __func__, cpte, pai, pv_h);
0a7de745 6099 }
d9a64523 6100
5ba3f43e 6101#if MACH_ASSERT
d9a64523
A
6102 if ((pmap != NULL) && (kern_feature_override(KF_PMAPV_OVRD) == FALSE)) {
6103 pv_entry_t *check_pve_p = PVE_NEXT_PTR(pve_next(pve_p));
6104 while (check_pve_p != PV_ENTRY_NULL) {
6105 if (pve_get_ptep(check_pve_p) == cpte) {
6106 panic("%s: duplicate pve entry cpte=%p pmap=%p, pv_h=%p, pve_p=%p, pai=0x%x",
6107 __func__, cpte, pmap, pv_h, pve_p, pai);
6108 }
6109 check_pve_p = PVE_NEXT_PTR(pve_next(check_pve_p));
6110 }
6111 }
6112#endif
6113
6114 if (IS_ALTACCT_PAGE(pai, pve_p)) {
6115 assert(IS_INTERNAL_PAGE(pai));
6116 (*num_internal)++;
6117 (*num_alt_internal)++;
6118 CLR_ALTACCT_PAGE(pai, pve_p);
6119 } else if (IS_INTERNAL_PAGE(pai)) {
6120 if (IS_REUSABLE_PAGE(pai)) {
6121 (*num_reusable)++;
6122 } else {
6123 (*num_internal)++;
6124 }
6125 } else {
6126 (*num_external)++;
6127 }
6128
6129 pvh_remove(pv_h, pve_pp, pve_p);
6130 pv_free(pve_p);
0a7de745 6131 if (!pvh_test_type(pv_h, PVH_TYPE_NULL)) {
d9a64523 6132 pvh_set_flags(pv_h, pvh_flags);
0a7de745 6133 }
d9a64523
A
6134 } else {
6135 panic("%s: unexpected PV head %p, cpte=%p pmap=%p pv_h=%p pai=0x%x",
0a7de745 6136 __func__, *pv_h, cpte, pmap, pv_h, pai);
d9a64523
A
6137 }
6138
6139#ifdef PVH_FLAG_EXEC
0a7de745 6140 if ((pvh_flags & PVH_FLAG_EXEC) && pvh_test_type(pv_h, PVH_TYPE_NULL)) {
d9a64523 6141 pmap_set_ptov_ap(pai, AP_RWNA, FALSE);
0a7de745 6142 }
d9a64523
A
6143#endif
6144}
5ba3f43e
A
6145
6146static int
6147pmap_remove_range_options(
6148 pmap_t pmap,
6149 vm_map_address_t va,
6150 pt_entry_t *bpte,
6151 pt_entry_t *epte,
6152 uint32_t *rmv_cnt,
cb323159 6153 bool *need_strong_sync __unused,
5ba3f43e
A
6154 int options)
6155{
6156 pt_entry_t *cpte;
6157 int num_removed, num_unwired;
6158 int num_pte_changed;
6159 int pai = 0;
6160 pmap_paddr_t pa;
0a7de745
A
6161 int num_external, num_internal, num_reusable;
6162 int num_alt_internal;
6163 uint64_t num_compressed, num_alt_compressed;
5ba3f43e
A
6164
6165 PMAP_ASSERT_LOCKED(pmap);
6166
eb6b6ca3
A
6167 const pt_attr_t * const pt_attr = pmap_get_pt_attr(pmap);
6168 uint64_t pmap_page_size = pt_attr_leaf_size(pt_attr);
6169
6170 if (__improbable((uintptr_t)epte > (((uintptr_t)bpte + pmap_page_size) & ~(pmap_page_size - 1)))) {
6171 panic("%s: PTE range [%p, %p) in pmap %p crosses page table boundary", __func__, bpte, epte, pmap);
6172 }
6173
5ba3f43e
A
6174 num_removed = 0;
6175 num_unwired = 0;
6176 num_pte_changed = 0;
6177 num_external = 0;
6178 num_internal = 0;
6179 num_reusable = 0;
6180 num_compressed = 0;
6181 num_alt_internal = 0;
6182 num_alt_compressed = 0;
6183
6184 for (cpte = bpte; cpte < epte;
0a7de745 6185 cpte += PAGE_SIZE / ARM_PGBYTES, va += PAGE_SIZE) {
5ba3f43e 6186 pt_entry_t spte;
0a7de745 6187 boolean_t managed = FALSE;
5ba3f43e
A
6188
6189 spte = *cpte;
6190
6191#if CONFIG_PGTRACE
0a7de745 6192 if (pgtrace_enabled) {
d9a64523 6193 pmap_pgtrace_remove_clone(pmap, pte_to_pa(spte), va);
0a7de745 6194 }
5ba3f43e
A
6195#endif
6196
6197 while (!managed) {
6198 if (pmap != kernel_pmap &&
6199 (options & PMAP_OPTIONS_REMOVE) &&
cb323159 6200 (ARM_PTE_IS_COMPRESSED(spte, cpte))) {
5ba3f43e
A
6201 /*
6202 * "pmap" must be locked at this point,
6203 * so this should not race with another
6204 * pmap_remove_range() or pmap_enter().
6205 */
6206
6207 /* one less "compressed"... */
6208 num_compressed++;
6209 if (spte & ARM_PTE_COMPRESSED_ALT) {
6210 /* ... but it used to be "ALTACCT" */
6211 num_alt_compressed++;
6212 }
6213
6214 /* clear marker */
6215 WRITE_PTE_FAST(cpte, ARM_PTE_TYPE_FAULT);
6216 /*
6217 * "refcnt" also accounts for
6218 * our "compressed" markers,
6219 * so let's update it here.
6220 */
cb323159 6221 if (OSAddAtomic16(-1, (SInt16 *) &(ptep_get_ptd(cpte)->ptd_info[ARM_PT_DESC_INDEX(cpte)].refcnt)) <= 0) {
5ba3f43e 6222 panic("pmap_remove_range_options: over-release of ptdp %p for pte %p\n", ptep_get_ptd(cpte), cpte);
0a7de745 6223 }
5ba3f43e
A
6224 spte = *cpte;
6225 }
6226 /*
6227 * It may be possible for the pte to transition from managed
6228 * to unmanaged in this timeframe; for now, elide the assert.
6229 * We should break out as a consequence of checking pa_valid.
6230 */
6231 //assert(!ARM_PTE_IS_COMPRESSED(spte));
6232 pa = pte_to_pa(spte);
6233 if (!pa_valid(pa)) {
c6bf4f31
A
6234#if XNU_MONITOR || HAS_MILD_DSB
6235 unsigned int cacheattr = pmap_cache_attributes((ppnum_t)atop(pa));
6236#endif
6237#if XNU_MONITOR
6238 if (!pmap_ppl_disable && (cacheattr & PP_ATTR_MONITOR)) {
6239 panic("%s: attempt to remove mapping of PPL-protected I/O address 0x%llx", __func__, (uint64_t)pa);
6240 }
6241#endif
5ba3f43e
A
6242 break;
6243 }
6244 pai = (int)pa_index(pa);
6245 LOCK_PVH(pai);
6246 spte = *cpte;
6247 pa = pte_to_pa(spte);
6248 if (pai == (int)pa_index(pa)) {
0a7de745 6249 managed = TRUE;
5ba3f43e
A
6250 break; // Leave pai locked as we will unlock it after we free the PV entry
6251 }
6252 UNLOCK_PVH(pai);
6253 }
6254
cb323159 6255 if (ARM_PTE_IS_COMPRESSED(*cpte, cpte)) {
5ba3f43e
A
6256 /*
6257 * There used to be a valid mapping here but it
6258 * has already been removed when the page was
6259 * sent to the VM compressor, so nothing left to
6260 * remove now...
6261 */
6262 continue;
6263 }
6264
6265 /* remove the translation, do not flush the TLB */
6266 if (*cpte != ARM_PTE_TYPE_FAULT) {
cb323159
A
6267 assertf(!ARM_PTE_IS_COMPRESSED(*cpte, cpte), "unexpected compressed pte %p (=0x%llx)", cpte, (uint64_t)*cpte);
6268 assertf((*cpte & ARM_PTE_TYPE_VALID) == ARM_PTE_TYPE, "invalid pte %p (=0x%llx)", cpte, (uint64_t)*cpte);
5ba3f43e
A
6269#if MACH_ASSERT
6270 if (managed && (pmap != kernel_pmap) && (ptep_get_va(cpte) != va)) {
6271 panic("pmap_remove_range_options(): cpte=%p ptd=%p pte=0x%llx va=0x%llx\n",
0a7de745 6272 cpte, ptep_get_ptd(cpte), (uint64_t)*cpte, (uint64_t)va);
5ba3f43e
A
6273 }
6274#endif
6275 WRITE_PTE_FAST(cpte, ARM_PTE_TYPE_FAULT);
6276 num_pte_changed++;
6277 }
6278
6279 if ((spte != ARM_PTE_TYPE_FAULT) &&
6280 (pmap != kernel_pmap)) {
cb323159
A
6281 assertf(!ARM_PTE_IS_COMPRESSED(spte, cpte), "unexpected compressed pte %p (=0x%llx)", cpte, (uint64_t)spte);
6282 assertf((spte & ARM_PTE_TYPE_VALID) == ARM_PTE_TYPE, "invalid pte %p (=0x%llx)", cpte, (uint64_t)spte);
6283 if (OSAddAtomic16(-1, (SInt16 *) &(ptep_get_ptd(cpte)->ptd_info[ARM_PT_DESC_INDEX(cpte)].refcnt)) <= 0) {
5ba3f43e 6284 panic("pmap_remove_range_options: over-release of ptdp %p for pte %p\n", ptep_get_ptd(cpte), cpte);
0a7de745
A
6285 }
6286 if (rmv_cnt) {
6287 (*rmv_cnt)++;
6288 }
5ba3f43e
A
6289 }
6290
6291 if (pte_is_wired(spte)) {
6292 pte_set_wired(cpte, 0);
6293 num_unwired++;
6294 }
6295 /*
6296 * if not managed, we're done
6297 */
0a7de745 6298 if (!managed) {
5ba3f43e 6299 continue;
0a7de745 6300 }
5ba3f43e
A
6301 /*
6302 * find and remove the mapping from the chain for this
6303 * physical address.
6304 */
5ba3f43e 6305
d9a64523 6306 pmap_remove_pv(pmap, cpte, pai, &num_internal, &num_alt_internal, &num_reusable, &num_external);
5ba3f43e
A
6307
6308 UNLOCK_PVH(pai);
6309 num_removed++;
6310 }
6311
6312 /*
6313 * Update the counts
6314 */
6315 OSAddAtomic(-num_removed, (SInt32 *) &pmap->stats.resident_count);
6316 pmap_ledger_debit(pmap, task_ledgers.phys_mem, machine_ptob(num_removed));
6317
6318 if (pmap != kernel_pmap) {
6319 /* sanity checks... */
6320#if MACH_ASSERT
6321 if (pmap->stats.internal < num_internal) {
0a7de745
A
6322 if ((!pmap_stats_assert ||
6323 !pmap->pmap_stats_assert)) {
d9a64523 6324 printf("%d[%s] pmap_remove_range_options(%p,0x%llx,%p,%p,0x%x): num_internal=%d num_removed=%d num_unwired=%d num_external=%d num_reusable=%d num_compressed=%lld num_alt_internal=%d num_alt_compressed=%lld num_pte_changed=%d stats.internal=%d stats.reusable=%d\n",
0a7de745
A
6325 pmap->pmap_pid,
6326 pmap->pmap_procname,
6327 pmap,
6328 (uint64_t) va,
6329 bpte,
6330 epte,
6331 options,
6332 num_internal,
6333 num_removed,
6334 num_unwired,
6335 num_external,
6336 num_reusable,
6337 num_compressed,
6338 num_alt_internal,
6339 num_alt_compressed,
6340 num_pte_changed,
6341 pmap->stats.internal,
6342 pmap->stats.reusable);
5ba3f43e 6343 } else {
d9a64523 6344 panic("%d[%s] pmap_remove_range_options(%p,0x%llx,%p,%p,0x%x): num_internal=%d num_removed=%d num_unwired=%d num_external=%d num_reusable=%d num_compressed=%lld num_alt_internal=%d num_alt_compressed=%lld num_pte_changed=%d stats.internal=%d stats.reusable=%d",
0a7de745
A
6345 pmap->pmap_pid,
6346 pmap->pmap_procname,
6347 pmap,
6348 (uint64_t) va,
6349 bpte,
6350 epte,
6351 options,
6352 num_internal,
6353 num_removed,
6354 num_unwired,
6355 num_external,
6356 num_reusable,
6357 num_compressed,
6358 num_alt_internal,
6359 num_alt_compressed,
6360 num_pte_changed,
6361 pmap->stats.internal,
6362 pmap->stats.reusable);
5ba3f43e
A
6363 }
6364 }
6365#endif /* MACH_ASSERT */
5c9f4661 6366 PMAP_STATS_ASSERTF(pmap->stats.external >= num_external,
0a7de745
A
6367 pmap,
6368 "pmap=%p num_external=%d stats.external=%d",
6369 pmap, num_external, pmap->stats.external);
5c9f4661 6370 PMAP_STATS_ASSERTF(pmap->stats.internal >= num_internal,
0a7de745
A
6371 pmap,
6372 "pmap=%p num_internal=%d stats.internal=%d num_reusable=%d stats.reusable=%d",
6373 pmap,
6374 num_internal, pmap->stats.internal,
6375 num_reusable, pmap->stats.reusable);
5c9f4661 6376 PMAP_STATS_ASSERTF(pmap->stats.reusable >= num_reusable,
0a7de745
A
6377 pmap,
6378 "pmap=%p num_internal=%d stats.internal=%d num_reusable=%d stats.reusable=%d",
6379 pmap,
6380 num_internal, pmap->stats.internal,
6381 num_reusable, pmap->stats.reusable);
5c9f4661 6382 PMAP_STATS_ASSERTF(pmap->stats.compressed >= num_compressed,
0a7de745
A
6383 pmap,
6384 "pmap=%p num_compressed=%lld num_alt_compressed=%lld stats.compressed=%lld",
6385 pmap, num_compressed, num_alt_compressed,
6386 pmap->stats.compressed);
5ba3f43e
A
6387
6388 /* update pmap stats... */
6389 OSAddAtomic(-num_unwired, (SInt32 *) &pmap->stats.wired_count);
0a7de745 6390 if (num_external) {
5ba3f43e 6391 OSAddAtomic(-num_external, &pmap->stats.external);
0a7de745
A
6392 }
6393 if (num_internal) {
5ba3f43e 6394 OSAddAtomic(-num_internal, &pmap->stats.internal);
0a7de745
A
6395 }
6396 if (num_reusable) {
5ba3f43e 6397 OSAddAtomic(-num_reusable, &pmap->stats.reusable);
0a7de745
A
6398 }
6399 if (num_compressed) {
5ba3f43e 6400 OSAddAtomic64(-num_compressed, &pmap->stats.compressed);
0a7de745 6401 }
5ba3f43e
A
6402 /* ... and ledgers */
6403 pmap_ledger_debit(pmap, task_ledgers.wired_mem, machine_ptob(num_unwired));
6404 pmap_ledger_debit(pmap, task_ledgers.internal, machine_ptob(num_internal));
6405 pmap_ledger_debit(pmap, task_ledgers.alternate_accounting, machine_ptob(num_alt_internal));
6406 pmap_ledger_debit(pmap, task_ledgers.alternate_accounting_compressed, machine_ptob(num_alt_compressed));
6407 pmap_ledger_debit(pmap, task_ledgers.internal_compressed, machine_ptob(num_compressed));
6408 /* make needed adjustments to phys_footprint */
6409 pmap_ledger_debit(pmap, task_ledgers.phys_footprint,
0a7de745
A
6410 machine_ptob((num_internal -
6411 num_alt_internal) +
6412 (num_compressed -
6413 num_alt_compressed)));
5ba3f43e
A
6414 }
6415
6416 /* flush the ptable entries we have written */
0a7de745 6417 if (num_pte_changed > 0) {
d9a64523 6418 FLUSH_PTE_RANGE_STRONG(bpte, epte);
0a7de745 6419 }
5ba3f43e
A
6420
6421 return num_pte_changed;
6422}
6423
6424
6425/*
6426 * Remove the given range of addresses
6427 * from the specified map.
6428 *
6429 * It is assumed that the start and end are properly
6430 * rounded to the hardware page size.
6431 */
6432void
6433pmap_remove(
6434 pmap_t pmap,
6435 vm_map_address_t start,
6436 vm_map_address_t end)
6437{
6438 pmap_remove_options(pmap, start, end, PMAP_OPTIONS_REMOVE);
6439}
6440
d9a64523
A
6441MARK_AS_PMAP_TEXT static int
6442pmap_remove_options_internal(
6443 pmap_t pmap,
6444 vm_map_address_t start,
6445 vm_map_address_t end,
6446 int options)
5ba3f43e 6447{
cb323159 6448 int remove_count = 0;
5ba3f43e
A
6449 pt_entry_t *bpte, *epte;
6450 pt_entry_t *pte_p;
6451 tt_entry_t *tte_p;
0a7de745 6452 uint32_t rmv_spte = 0;
cb323159
A
6453 bool need_strong_sync = false;
6454 bool flush_tte = false;
5ba3f43e 6455
0a7de745 6456 if (__improbable(end < start)) {
d9a64523 6457 panic("%s: invalid address range %p, %p", __func__, (void*)start, (void*)end);
0a7de745 6458 }
d9a64523
A
6459
6460 VALIDATE_PMAP(pmap);
cb323159
A
6461
6462 __unused const pt_attr_t * const pt_attr = pmap_get_pt_attr(pmap);
6463
5ba3f43e
A
6464 PMAP_LOCK(pmap);
6465
6466 tte_p = pmap_tte(pmap, start);
6467
6468 if (tte_p == (tt_entry_t *) NULL) {
6469 goto done;
6470 }
6471
6472 if ((*tte_p & ARM_TTE_TYPE_MASK) == ARM_TTE_TYPE_TABLE) {
6473 pte_p = (pt_entry_t *) ttetokv(*tte_p);
6474 bpte = &pte_p[ptenum(start)];
cb323159 6475 epte = bpte + ((end - start) >> pt_attr_leaf_shift(pt_attr));
5ba3f43e
A
6476
6477 remove_count += pmap_remove_range_options(pmap, start, bpte, epte,
cb323159 6478 &rmv_spte, &need_strong_sync, options);
5ba3f43e 6479
cb323159 6480 if (rmv_spte && (ptep_get_ptd(pte_p)->ptd_info[ARM_PT_DESC_INDEX(pte_p)].refcnt == 0) &&
0a7de745 6481 (pmap != kernel_pmap) && (pmap->nested == FALSE)) {
cb323159
A
6482 pmap_tte_deallocate(pmap, tte_p, pt_attr_twig_level(pt_attr));
6483 flush_tte = true;
5ba3f43e 6484 }
5ba3f43e
A
6485 }
6486
6487done:
6488 PMAP_UNLOCK(pmap);
cb323159
A
6489
6490 if (remove_count > 0) {
6491 PMAP_UPDATE_TLBS(pmap, start, end, need_strong_sync);
6492 } else if (flush_tte > 0) {
6493 pmap_get_pt_ops(pmap)->flush_tlb_tte_async(start, pmap);
6494 sync_tlb_flush();
6495 }
5ba3f43e
A
6496 return remove_count;
6497}
6498
6499void
6500pmap_remove_options(
6501 pmap_t pmap,
6502 vm_map_address_t start,
6503 vm_map_address_t end,
6504 int options)
6505{
6506 int remove_count = 0;
6507 vm_map_address_t va;
6508
0a7de745 6509 if (pmap == PMAP_NULL) {
5ba3f43e 6510 return;
0a7de745 6511 }
5ba3f43e 6512
cb323159
A
6513 __unused const pt_attr_t * const pt_attr = pmap_get_pt_attr(pmap);
6514
d9a64523 6515 PMAP_TRACE(2, PMAP_CODE(PMAP__REMOVE) | DBG_FUNC_START,
0a7de745
A
6516 VM_KERNEL_ADDRHIDE(pmap), VM_KERNEL_ADDRHIDE(start),
6517 VM_KERNEL_ADDRHIDE(end));
5ba3f43e
A
6518
6519#if MACH_ASSERT
0a7de745 6520 if ((start | end) & PAGE_MASK) {
5ba3f43e 6521 panic("pmap_remove_options() pmap %p start 0x%llx end 0x%llx\n",
0a7de745 6522 pmap, (uint64_t)start, (uint64_t)end);
5ba3f43e
A
6523 }
6524 if ((end < start) || (start < pmap->min) || (end > pmap->max)) {
6525 panic("pmap_remove_options(): invalid address range, pmap=%p, start=0x%llx, end=0x%llx\n",
0a7de745 6526 pmap, (uint64_t)start, (uint64_t)end);
5ba3f43e
A
6527 }
6528#endif
6529
6530 /*
6531 * Invalidate the translation buffer first
6532 */
6533 va = start;
6534 while (va < end) {
6535 vm_map_address_t l;
6536
cb323159 6537 l = ((va + pt_attr_twig_size(pt_attr)) & ~pt_attr_twig_offmask(pt_attr));
0a7de745 6538 if (l > end) {
5ba3f43e 6539 l = end;
0a7de745 6540 }
5ba3f43e 6541
c6bf4f31
A
6542#if XNU_MONITOR
6543 remove_count += pmap_remove_options_ppl(pmap, va, l, options);
6544
6545 pmap_ledger_check_balance(pmap);
6546#else
5ba3f43e 6547 remove_count += pmap_remove_options_internal(pmap, va, l, options);
c6bf4f31 6548#endif
5ba3f43e
A
6549
6550 va = l;
6551 }
6552
d9a64523 6553 PMAP_TRACE(2, PMAP_CODE(PMAP__REMOVE) | DBG_FUNC_END);
5ba3f43e
A
6554}
6555
6556
6557/*
6558 * Remove phys addr if mapped in specified map
6559 */
6560void
6561pmap_remove_some_phys(
6562 __unused pmap_t map,
6563 __unused ppnum_t pn)
6564{
6565 /* Implement to support working set code */
6566}
6567
5ba3f43e
A
6568void
6569pmap_set_pmap(
6570 pmap_t pmap,
0a7de745 6571#if !__ARM_USER_PROTECT__
5ba3f43e
A
6572 __unused
6573#endif
0a7de745 6574 thread_t thread)
5ba3f43e
A
6575{
6576 pmap_switch(pmap);
6577#if __ARM_USER_PROTECT__
6578 if (pmap->tte_index_max == NTTES) {
6579 thread->machine.uptw_ttc = 2;
5ba3f43e 6580 } else {
cb323159 6581 thread->machine.uptw_ttc = 1;
5ba3f43e 6582 }
cb323159
A
6583 thread->machine.uptw_ttb = ((unsigned int) pmap->ttep) | TTBR_SETUP;
6584 thread->machine.asid = pmap->hw_asid;
5ba3f43e
A
6585#endif
6586}
6587
6588static void
6589pmap_flush_core_tlb_asid(pmap_t pmap)
6590{
6591#if (__ARM_VMSA__ == 7)
cb323159 6592 flush_core_tlb_asid(pmap->hw_asid);
5ba3f43e 6593#else
cb323159 6594 flush_core_tlb_asid(((uint64_t) pmap->hw_asid) << TLBI_ASID_SHIFT);
5ba3f43e
A
6595#endif
6596}
6597
d9a64523 6598MARK_AS_PMAP_TEXT static void
5ba3f43e
A
6599pmap_switch_internal(
6600 pmap_t pmap)
6601{
d9a64523 6602 VALIDATE_PMAP(pmap);
5ba3f43e 6603 pmap_cpu_data_t *cpu_data_ptr = pmap_get_cpu_data();
cb323159
A
6604 uint16_t asid_index = pmap->hw_asid;
6605 boolean_t do_asid_flush = FALSE;
6606
6607#if __ARM_KERNEL_PROTECT__
6608 asid_index >>= 1;
6609#endif
5ba3f43e 6610
94ff46dc 6611#if (__ARM_VMSA__ > 7)
d9a64523 6612 pmap_t last_nested_pmap = cpu_data_ptr->cpu_nested_pmap;
5ba3f43e
A
6613#endif
6614
cb323159
A
6615#if MAX_ASID > MAX_HW_ASID
6616 if (asid_index > 0) {
6617 asid_index -= 1;
6618 /* Paranoia. */
6619 assert(asid_index < (sizeof(cpu_data_ptr->cpu_asid_high_bits) / sizeof(*cpu_data_ptr->cpu_asid_high_bits)));
5ba3f43e 6620
cb323159
A
6621 /* Extract the "virtual" bits of the ASIDs (which could cause us to alias). */
6622 uint8_t asid_high_bits = pmap->sw_asid;
6623 uint8_t last_asid_high_bits = cpu_data_ptr->cpu_asid_high_bits[asid_index];
5ba3f43e 6624
cb323159
A
6625 if (asid_high_bits != last_asid_high_bits) {
6626 /*
6627 * If the virtual ASID of the new pmap does not match the virtual ASID
6628 * last seen on this CPU for the physical ASID (that was a mouthful),
6629 * then this switch runs the risk of aliasing. We need to flush the
6630 * TLB for this phyiscal ASID in this case.
6631 */
6632 cpu_data_ptr->cpu_asid_high_bits[asid_index] = asid_high_bits;
6633 do_asid_flush = TRUE;
6634 }
5ba3f43e 6635 }
cb323159 6636#endif /* MAX_ASID > MAX_HW_ASID */
5ba3f43e 6637
5ba3f43e
A
6638 pmap_switch_user_ttb_internal(pmap);
6639
0a7de745 6640#if (__ARM_VMSA__ > 7)
d9a64523
A
6641 /* If we're switching to a different nested pmap (i.e. shared region), we'll need
6642 * to flush the userspace mappings for that region. Those mappings are global
6643 * and will not be protected by the ASID. It should also be cheaper to flush the
6644 * entire local TLB rather than to do a broadcast MMU flush by VA region. */
0a7de745 6645 if ((pmap != kernel_pmap) && (last_nested_pmap != NULL) && (pmap->nested_pmap != last_nested_pmap)) {
d9a64523 6646 flush_core_tlb();
0a7de745 6647 } else
d9a64523 6648#endif
0a7de745 6649 if (do_asid_flush) {
5ba3f43e 6650 pmap_flush_core_tlb_asid(pmap);
cb323159
A
6651#if DEVELOPMENT || DEBUG
6652 os_atomic_inc(&pmap_asid_flushes, relaxed);
6653#endif
0a7de745 6654 }
5ba3f43e
A
6655}
6656
6657void
6658pmap_switch(
6659 pmap_t pmap)
6660{
cb323159 6661 PMAP_TRACE(1, PMAP_CODE(PMAP__SWITCH) | DBG_FUNC_START, VM_KERNEL_ADDRHIDE(pmap), PMAP_VASID(pmap), pmap->hw_asid);
c6bf4f31
A
6662#if XNU_MONITOR
6663 pmap_switch_ppl(pmap);
6664#else
5ba3f43e 6665 pmap_switch_internal(pmap);
c6bf4f31 6666#endif
d9a64523 6667 PMAP_TRACE(1, PMAP_CODE(PMAP__SWITCH) | DBG_FUNC_END);
5ba3f43e
A
6668}
6669
6670void
6671pmap_page_protect(
6672 ppnum_t ppnum,
6673 vm_prot_t prot)
6674{
6675 pmap_page_protect_options(ppnum, prot, 0, NULL);
6676}
6677
6678/*
6679 * Routine: pmap_page_protect_options
6680 *
6681 * Function:
6682 * Lower the permission for all mappings to a given
6683 * page.
6684 */
d9a64523 6685MARK_AS_PMAP_TEXT static void
5ba3f43e
A
6686pmap_page_protect_options_internal(
6687 ppnum_t ppnum,
6688 vm_prot_t prot,
6689 unsigned int options)
6690{
6691 pmap_paddr_t phys = ptoa(ppnum);
6692 pv_entry_t **pv_h;
d9a64523 6693 pv_entry_t **pve_pp;
5ba3f43e
A
6694 pv_entry_t *pve_p;
6695 pv_entry_t *pveh_p;
6696 pv_entry_t *pvet_p;
6697 pt_entry_t *pte_p;
d9a64523
A
6698 pv_entry_t *new_pve_p;
6699 pt_entry_t *new_pte_p;
6700 vm_offset_t pvh_flags;
5ba3f43e
A
6701 int pai;
6702 boolean_t remove;
6703 boolean_t set_NX;
0a7de745
A
6704 boolean_t tlb_flush_needed = FALSE;
6705 unsigned int pvh_cnt = 0;
5ba3f43e
A
6706
6707 assert(ppnum != vm_page_fictitious_addr);
6708
6709 /* Only work with managed pages. */
6710 if (!pa_valid(phys)) {
6711 return;
6712 }
6713
6714 /*
6715 * Determine the new protection.
6716 */
6717 switch (prot) {
6718 case VM_PROT_ALL:
0a7de745 6719 return; /* nothing to do */
5ba3f43e
A
6720 case VM_PROT_READ:
6721 case VM_PROT_READ | VM_PROT_EXECUTE:
6722 remove = FALSE;
6723 break;
6724 default:
6725 remove = TRUE;
6726 break;
6727 }
6728
6729 pai = (int)pa_index(phys);
6730 LOCK_PVH(pai);
6731 pv_h = pai_to_pvh(pai);
d9a64523
A
6732 pvh_flags = pvh_get_flags(pv_h);
6733
c6bf4f31
A
6734#if XNU_MONITOR
6735 if (remove && (pvh_flags & PVH_FLAG_LOCKDOWN)) {
6736 panic("%d is locked down (%#llx), cannot remove", pai, pvh_get_flags(pv_h));
6737 }
6738#endif
5ba3f43e
A
6739
6740 pte_p = PT_ENTRY_NULL;
6741 pve_p = PV_ENTRY_NULL;
d9a64523 6742 pve_pp = pv_h;
5ba3f43e
A
6743 pveh_p = PV_ENTRY_NULL;
6744 pvet_p = PV_ENTRY_NULL;
d9a64523
A
6745 new_pve_p = PV_ENTRY_NULL;
6746 new_pte_p = PT_ENTRY_NULL;
5ba3f43e
A
6747 if (pvh_test_type(pv_h, PVH_TYPE_PTEP)) {
6748 pte_p = pvh_ptep(pv_h);
0a7de745 6749 } else if (pvh_test_type(pv_h, PVH_TYPE_PVEP)) {
5ba3f43e
A
6750 pve_p = pvh_list(pv_h);
6751 pveh_p = pve_p;
6752 }
6753
6754 while ((pve_p != PV_ENTRY_NULL) || (pte_p != PT_ENTRY_NULL)) {
6755 vm_map_address_t va;
6756 pmap_t pmap;
6757 pt_entry_t tmplate;
6758 boolean_t update = FALSE;
6759
0a7de745 6760 if (pve_p != PV_ENTRY_NULL) {
5ba3f43e 6761 pte_p = pve_get_ptep(pve_p);
0a7de745 6762 }
5ba3f43e 6763
d9a64523
A
6764#ifdef PVH_FLAG_IOMMU
6765 if ((vm_offset_t)pte_p & PVH_FLAG_IOMMU) {
c6bf4f31
A
6766#if XNU_MONITOR
6767 if (pvh_flags & PVH_FLAG_LOCKDOWN) {
6768 panic("pmap_page_protect: ppnum 0x%x locked down, cannot be owned by iommu 0x%llx, pve_p=%p",
6769 ppnum, (uint64_t)pte_p & ~PVH_FLAG_IOMMU, pve_p);
6770 }
6771#endif
d9a64523
A
6772 if (remove) {
6773 if (options & PMAP_OPTIONS_COMPRESSOR) {
6774 panic("pmap_page_protect: attempt to compress ppnum 0x%x owned by iommu 0x%llx, pve_p=%p",
0a7de745 6775 ppnum, (uint64_t)pte_p & ~PVH_FLAG_IOMMU, pve_p);
d9a64523
A
6776 }
6777 if (pve_p != PV_ENTRY_NULL) {
6778 pv_entry_t *temp_pve_p = PVE_NEXT_PTR(pve_next(pve_p));
6779 pvh_remove(pv_h, pve_pp, pve_p);
6780 pveh_p = pvh_list(pv_h);
6781 pve_next(pve_p) = new_pve_p;
6782 new_pve_p = pve_p;
6783 pve_p = temp_pve_p;
6784 continue;
6785 } else {
6786 new_pte_p = pte_p;
6787 break;
6788 }
6789 }
6790 goto protect_skip_pve;
6791 }
6792#endif
5ba3f43e
A
6793 pmap = ptep_get_pmap(pte_p);
6794 va = ptep_get_va(pte_p);
6795
6796 if (pte_p == PT_ENTRY_NULL) {
6797 panic("pmap_page_protect: pmap=%p prot=%d options=%u, pv_h=%p, pveh_p=%p, pve_p=%p, va=0x%llx ppnum: 0x%x\n",
0a7de745 6798 pmap, prot, options, pv_h, pveh_p, pve_p, (uint64_t)va, ppnum);
5ba3f43e
A
6799 } else if ((pmap == NULL) || (atop(pte_to_pa(*pte_p)) != ppnum)) {
6800#if MACH_ASSERT
6801 if (kern_feature_override(KF_PMAPV_OVRD) == FALSE) {
5ba3f43e
A
6802 pv_entry_t *check_pve_p = pveh_p;
6803 while (check_pve_p != PV_ENTRY_NULL) {
6804 if ((check_pve_p != pve_p) && (pve_get_ptep(check_pve_p) == pte_p)) {
6805 panic("pmap_page_protect: duplicate pve entry pte_p=%p pmap=%p prot=%d options=%u, pv_h=%p, pveh_p=%p, pve_p=%p, pte=0x%llx, va=0x%llx ppnum: 0x%x\n",
6806 pte_p, pmap, prot, options, pv_h, pveh_p, pve_p, (uint64_t)*pte_p, (uint64_t)va, ppnum);
6807 }
6808 check_pve_p = PVE_NEXT_PTR(pve_next(check_pve_p));
6809 }
6810 }
6811#endif
6812 panic("pmap_page_protect: bad pve entry pte_p=%p pmap=%p prot=%d options=%u, pv_h=%p, pveh_p=%p, pve_p=%p, pte=0x%llx, va=0x%llx ppnum: 0x%x\n",
6813 pte_p, pmap, prot, options, pv_h, pveh_p, pve_p, (uint64_t)*pte_p, (uint64_t)va, ppnum);
6814 }
6815
6816#if DEVELOPMENT || DEBUG
6817 if ((prot & VM_PROT_EXECUTE) || !nx_enabled || !pmap->nx_enabled)
6818#else
6819 if ((prot & VM_PROT_EXECUTE))
6820#endif
0a7de745 6821 { set_NX = FALSE;} else {
5ba3f43e 6822 set_NX = TRUE;
0a7de745 6823 }
5ba3f43e
A
6824
6825 /* Remove the mapping if new protection is NONE */
6826 if (remove) {
6827 boolean_t is_altacct = FALSE;
6828
6829 if (IS_ALTACCT_PAGE(pai, pve_p)) {
6830 is_altacct = TRUE;
6831 } else {
6832 is_altacct = FALSE;
6833 }
6834
6835 if (pte_is_wired(*pte_p)) {
6836 pte_set_wired(pte_p, 0);
6837 if (pmap != kernel_pmap) {
6838 pmap_ledger_debit(pmap, task_ledgers.wired_mem, PAGE_SIZE);
6839 OSAddAtomic(-1, (SInt32 *) &pmap->stats.wired_count);
6840 }
6841 }
6842
6843 if (*pte_p != ARM_PTE_TYPE_FAULT &&
6844 pmap != kernel_pmap &&
6845 (options & PMAP_OPTIONS_COMPRESSOR) &&
6846 IS_INTERNAL_PAGE(pai)) {
cb323159 6847 assert(!ARM_PTE_IS_COMPRESSED(*pte_p, pte_p));
5ba3f43e
A
6848 /* mark this PTE as having been "compressed" */
6849 tmplate = ARM_PTE_COMPRESSED;
6850 if (is_altacct) {
6851 tmplate |= ARM_PTE_COMPRESSED_ALT;
6852 is_altacct = TRUE;
6853 }
6854 } else {
6855 tmplate = ARM_PTE_TYPE_FAULT;
6856 }
6857
6858 if ((*pte_p != ARM_PTE_TYPE_FAULT) &&
6859 tmplate == ARM_PTE_TYPE_FAULT &&
6860 (pmap != kernel_pmap)) {
cb323159 6861 if (OSAddAtomic16(-1, (SInt16 *) &(ptep_get_ptd(pte_p)->ptd_info[ARM_PT_DESC_INDEX(pte_p)].refcnt)) <= 0) {
5ba3f43e 6862 panic("pmap_page_protect_options(): over-release of ptdp %p for pte %p\n", ptep_get_ptd(pte_p), pte_p);
0a7de745 6863 }
5ba3f43e
A
6864 }
6865
6866 if (*pte_p != tmplate) {
d9a64523 6867 WRITE_PTE_STRONG(pte_p, tmplate);
5ba3f43e
A
6868 update = TRUE;
6869 }
6870 pvh_cnt++;
6871 pmap_ledger_debit(pmap, task_ledgers.phys_mem, PAGE_SIZE);
6872 OSAddAtomic(-1, (SInt32 *) &pmap->stats.resident_count);
6873
6874#if MACH_ASSERT
6875 /*
6876 * We only ever compress internal pages.
6877 */
6878 if (options & PMAP_OPTIONS_COMPRESSOR) {
6879 assert(IS_INTERNAL_PAGE(pai));
6880 }
6881#endif
6882
6883 if (pmap != kernel_pmap) {
6884 if (IS_REUSABLE_PAGE(pai) &&
6885 IS_INTERNAL_PAGE(pai) &&
6886 !is_altacct) {
5c9f4661 6887 PMAP_STATS_ASSERTF(pmap->stats.reusable > 0, pmap, "stats.reusable %d", pmap->stats.reusable);
5ba3f43e
A
6888 OSAddAtomic(-1, &pmap->stats.reusable);
6889 } else if (IS_INTERNAL_PAGE(pai)) {
5c9f4661 6890 PMAP_STATS_ASSERTF(pmap->stats.internal > 0, pmap, "stats.internal %d", pmap->stats.internal);
5ba3f43e
A
6891 OSAddAtomic(-1, &pmap->stats.internal);
6892 } else {
5c9f4661 6893 PMAP_STATS_ASSERTF(pmap->stats.external > 0, pmap, "stats.external %d", pmap->stats.external);
5ba3f43e
A
6894 OSAddAtomic(-1, &pmap->stats.external);
6895 }
6896 if ((options & PMAP_OPTIONS_COMPRESSOR) &&
6897 IS_INTERNAL_PAGE(pai)) {
6898 /* adjust "compressed" stats */
6899 OSAddAtomic64(+1, &pmap->stats.compressed);
6900 PMAP_STATS_PEAK(pmap->stats.compressed);
6901 pmap->stats.compressed_lifetime++;
6902 }
6903
6904 if (IS_ALTACCT_PAGE(pai, pve_p)) {
6905 assert(IS_INTERNAL_PAGE(pai));
6906 pmap_ledger_debit(pmap, task_ledgers.internal, PAGE_SIZE);
6907 pmap_ledger_debit(pmap, task_ledgers.alternate_accounting, PAGE_SIZE);
6908 if (options & PMAP_OPTIONS_COMPRESSOR) {
6909 pmap_ledger_credit(pmap, task_ledgers.internal_compressed, PAGE_SIZE);
6910 pmap_ledger_credit(pmap, task_ledgers.alternate_accounting_compressed, PAGE_SIZE);
6911 }
6912
6913 /*
6914 * Cleanup our marker before
6915 * we free this pv_entry.
6916 */
6917 CLR_ALTACCT_PAGE(pai, pve_p);
5ba3f43e
A
6918 } else if (IS_REUSABLE_PAGE(pai)) {
6919 assert(IS_INTERNAL_PAGE(pai));
6920 if (options & PMAP_OPTIONS_COMPRESSOR) {
6921 pmap_ledger_credit(pmap, task_ledgers.internal_compressed, PAGE_SIZE);
6922 /* was not in footprint, but is now */
6923 pmap_ledger_credit(pmap, task_ledgers.phys_footprint, PAGE_SIZE);
6924 }
5ba3f43e
A
6925 } else if (IS_INTERNAL_PAGE(pai)) {
6926 pmap_ledger_debit(pmap, task_ledgers.internal, PAGE_SIZE);
6927
6928 /*
6929 * Update all stats related to physical footprint, which only
6930 * deals with internal pages.
6931 */
6932 if (options & PMAP_OPTIONS_COMPRESSOR) {
6933 /*
6934 * This removal is only being done so we can send this page to
6935 * the compressor; therefore it mustn't affect total task footprint.
6936 */
6937 pmap_ledger_credit(pmap, task_ledgers.internal_compressed, PAGE_SIZE);
6938 } else {
6939 /*
6940 * This internal page isn't going to the compressor, so adjust stats to keep
6941 * phys_footprint up to date.
6942 */
6943 pmap_ledger_debit(pmap, task_ledgers.phys_footprint, PAGE_SIZE);
6944 }
6945 } else {
6946 /* external page: no impact on ledgers */
6947 }
6948 }
6949
6950 if (pve_p != PV_ENTRY_NULL) {
6951 assert(pve_next(pve_p) == PVE_NEXT_PTR(pve_next(pve_p)));
6952 }
5ba3f43e
A
6953 } else {
6954 pt_entry_t spte;
cb323159 6955 const pt_attr_t *const pt_attr = pmap_get_pt_attr(pmap);
5ba3f43e
A
6956
6957 spte = *pte_p;
6958
0a7de745 6959 if (pmap == kernel_pmap) {
5ba3f43e 6960 tmplate = ((spte & ~ARM_PTE_APMASK) | ARM_PTE_AP(AP_RONA));
0a7de745 6961 } else {
cb323159 6962 tmplate = ((spte & ~ARM_PTE_APMASK) | pt_attr_leaf_ro(pt_attr));
0a7de745 6963 }
5ba3f43e 6964
0a7de745 6965 pte_set_was_writeable(tmplate, false);
cb323159
A
6966 /*
6967 * While the naive implementation of this would serve to add execute
6968 * permission, this is not how the VM uses this interface, or how
6969 * x86_64 implements it. So ignore requests to add execute permissions.
6970 */
0a7de745 6971 if (set_NX) {
cb323159 6972 tmplate |= pt_attr_leaf_xn(pt_attr);
5ba3f43e 6973 }
5ba3f43e 6974
c6bf4f31
A
6975#if __APRR_SUPPORTED__
6976 if (__improbable(is_pte_xprr_protected(spte))) {
6977 panic("pmap_page_protect: modifying an xPRR mapping pte_p=%p pmap=%p prot=%d options=%u, pv_h=%p, pveh_p=%p, pve_p=%p, pte=0x%llx, tmplate=0x%llx, va=0x%llx ppnum: 0x%x",
6978 pte_p, pmap, prot, options, pv_h, pveh_p, pve_p, (uint64_t)spte, (uint64_t)tmplate, (uint64_t)va, ppnum);
6979 }
6980
6981 if (__improbable(is_pte_xprr_protected(tmplate))) {
6982 panic("pmap_page_protect: creating an xPRR mapping pte_p=%p pmap=%p prot=%d options=%u, pv_h=%p, pveh_p=%p, pve_p=%p, pte=0x%llx, tmplate=0x%llx, va=0x%llx ppnum: 0x%x",
6983 pte_p, pmap, prot, options, pv_h, pveh_p, pve_p, (uint64_t)spte, (uint64_t)tmplate, (uint64_t)va, ppnum);
6984 }
6985#endif /* __APRR_SUPPORTED__*/
5ba3f43e
A
6986
6987 if (*pte_p != ARM_PTE_TYPE_FAULT &&
cb323159 6988 !ARM_PTE_IS_COMPRESSED(*pte_p, pte_p) &&
5ba3f43e 6989 *pte_p != tmplate) {
d9a64523 6990 WRITE_PTE_STRONG(pte_p, tmplate);
5ba3f43e
A
6991 update = TRUE;
6992 }
6993 }
6994
6995 /* Invalidate TLBs for all CPUs using it */
d9a64523
A
6996 if (update) {
6997 tlb_flush_needed = TRUE;
cb323159 6998 pmap_get_pt_ops(pmap)->flush_tlb_region_async(va, PAGE_SIZE, pmap);
d9a64523 6999 }
5ba3f43e 7000
d9a64523 7001#ifdef PVH_FLAG_IOMMU
0a7de745 7002protect_skip_pve:
d9a64523 7003#endif
5ba3f43e
A
7004 pte_p = PT_ENTRY_NULL;
7005 pvet_p = pve_p;
7006 if (pve_p != PV_ENTRY_NULL) {
5ba3f43e
A
7007 if (remove) {
7008 assert(pve_next(pve_p) == PVE_NEXT_PTR(pve_next(pve_p)));
7009 }
d9a64523 7010 pve_pp = pve_link_field(pve_p);
5ba3f43e
A
7011 pve_p = PVE_NEXT_PTR(pve_next(pve_p));
7012 }
7013 }
7014
d9a64523 7015#ifdef PVH_FLAG_EXEC
0a7de745 7016 if (remove && (pvh_get_flags(pv_h) & PVH_FLAG_EXEC)) {
d9a64523 7017 pmap_set_ptov_ap(pai, AP_RWNA, tlb_flush_needed);
0a7de745 7018 }
d9a64523 7019#endif
0a7de745 7020 if (tlb_flush_needed) {
d9a64523 7021 sync_tlb_flush();
0a7de745 7022 }
d9a64523 7023
5ba3f43e
A
7024 /* if we removed a bunch of entries, take care of them now */
7025 if (remove) {
d9a64523
A
7026 if (new_pve_p != PV_ENTRY_NULL) {
7027 pvh_update_head(pv_h, new_pve_p, PVH_TYPE_PVEP);
7028 pvh_set_flags(pv_h, pvh_flags);
7029 } else if (new_pte_p != PT_ENTRY_NULL) {
7030 pvh_update_head(pv_h, new_pte_p, PVH_TYPE_PTEP);
7031 pvh_set_flags(pv_h, pvh_flags);
7032 } else {
7033 pvh_update_head(pv_h, PV_ENTRY_NULL, PVH_TYPE_NULL);
7034 }
5ba3f43e
A
7035 }
7036
7037 UNLOCK_PVH(pai);
7038
d9a64523 7039 if (remove && (pvet_p != PV_ENTRY_NULL)) {
5ba3f43e
A
7040 pv_list_free(pveh_p, pvet_p, pvh_cnt);
7041 }
7042}
7043
7044void
7045pmap_page_protect_options(
7046 ppnum_t ppnum,
7047 vm_prot_t prot,
7048 unsigned int options,
7049 __unused void *arg)
7050{
7051 pmap_paddr_t phys = ptoa(ppnum);
7052
7053 assert(ppnum != vm_page_fictitious_addr);
7054
7055 /* Only work with managed pages. */
0a7de745 7056 if (!pa_valid(phys)) {
5ba3f43e 7057 return;
0a7de745 7058 }
5ba3f43e
A
7059
7060 /*
7061 * Determine the new protection.
7062 */
7063 if (prot == VM_PROT_ALL) {
0a7de745 7064 return; /* nothing to do */
5ba3f43e
A
7065 }
7066
d9a64523 7067 PMAP_TRACE(2, PMAP_CODE(PMAP__PAGE_PROTECT) | DBG_FUNC_START, ppnum, prot);
5ba3f43e 7068
c6bf4f31
A
7069#if XNU_MONITOR
7070 pmap_page_protect_options_ppl(ppnum, prot, options);
7071#else
5ba3f43e 7072 pmap_page_protect_options_internal(ppnum, prot, options);
c6bf4f31 7073#endif
5ba3f43e 7074
d9a64523 7075 PMAP_TRACE(2, PMAP_CODE(PMAP__PAGE_PROTECT) | DBG_FUNC_END);
5ba3f43e
A
7076}
7077
7078/*
7079 * Indicates if the pmap layer enforces some additional restrictions on the
7080 * given set of protections.
7081 */
0a7de745
A
7082bool
7083pmap_has_prot_policy(__unused vm_prot_t prot)
5ba3f43e
A
7084{
7085 return FALSE;
7086}
7087
7088/*
7089 * Set the physical protection on the
7090 * specified range of this map as requested.
7091 * VERY IMPORTANT: Will not increase permissions.
7092 * VERY IMPORTANT: Only pmap_enter() is allowed to grant permissions.
7093 */
7094void
7095pmap_protect(
7096 pmap_t pmap,
7097 vm_map_address_t b,
7098 vm_map_address_t e,
7099 vm_prot_t prot)
7100{
7101 pmap_protect_options(pmap, b, e, prot, 0, NULL);
7102}
7103
d9a64523 7104MARK_AS_PMAP_TEXT static void
0a7de745
A
7105pmap_protect_options_internal(
7106 pmap_t pmap,
5ba3f43e
A
7107 vm_map_address_t start,
7108 vm_map_address_t end,
7109 vm_prot_t prot,
7110 unsigned int options,
7111 __unused void *args)
7112{
cb323159
A
7113 const pt_attr_t *const pt_attr = pmap_get_pt_attr(pmap);
7114 tt_entry_t *tte_p;
7115 pt_entry_t *bpte_p, *epte_p;
7116 pt_entry_t *pte_p;
7117 boolean_t set_NX = TRUE;
5ba3f43e 7118#if (__ARM_VMSA__ > 7)
cb323159 7119 boolean_t set_XO = FALSE;
5ba3f43e 7120#endif
cb323159
A
7121 boolean_t should_have_removed = FALSE;
7122 bool need_strong_sync = false;
5ba3f43e 7123
eb6b6ca3
A
7124 if (__improbable((end < start) || (end > ((start + pt_attr_twig_size(pt_attr)) & ~pt_attr_twig_offmask(pt_attr))))) {
7125 panic("%s: invalid address range %p, %p", __func__, (void*)start, (void*)end);
0a7de745 7126 }
d9a64523 7127
5ba3f43e
A
7128#if DEVELOPMENT || DEBUG
7129 if (options & PMAP_OPTIONS_PROTECT_IMMEDIATE) {
7130 if ((prot & VM_PROT_ALL) == VM_PROT_NONE) {
7131 should_have_removed = TRUE;
7132 }
7133 } else
7134#endif
7135 {
7136 /* Determine the new protection. */
7137 switch (prot) {
7138#if (__ARM_VMSA__ > 7)
7139 case VM_PROT_EXECUTE:
7140 set_XO = TRUE;
7141 /* fall through */
7142#endif
7143 case VM_PROT_READ:
7144 case VM_PROT_READ | VM_PROT_EXECUTE:
7145 break;
7146 case VM_PROT_READ | VM_PROT_WRITE:
7147 case VM_PROT_ALL:
0a7de745 7148 return; /* nothing to do */
5ba3f43e
A
7149 default:
7150 should_have_removed = TRUE;
7151 }
7152 }
7153
7154 if (should_have_removed) {
7155 panic("%s: should have been a remove operation, "
0a7de745
A
7156 "pmap=%p, start=%p, end=%p, prot=%#x, options=%#x, args=%p",
7157 __FUNCTION__,
7158 pmap, (void *)start, (void *)end, prot, options, args);
5ba3f43e
A
7159 }
7160
7161#if DEVELOPMENT || DEBUG
7162 if ((prot & VM_PROT_EXECUTE) || !nx_enabled || !pmap->nx_enabled)
7163#else
7164 if ((prot & VM_PROT_EXECUTE))
7165#endif
7166 {
7167 set_NX = FALSE;
7168 } else {
7169 set_NX = TRUE;
7170 }
7171
d9a64523 7172 VALIDATE_PMAP(pmap);
5ba3f43e
A
7173 PMAP_LOCK(pmap);
7174 tte_p = pmap_tte(pmap, start);
7175
7176 if ((tte_p != (tt_entry_t *) NULL) && (*tte_p & ARM_TTE_TYPE_MASK) == ARM_TTE_TYPE_TABLE) {
7177 bpte_p = (pt_entry_t *) ttetokv(*tte_p);
7178 bpte_p = &bpte_p[ptenum(start)];
7179 epte_p = bpte_p + arm_atop(end - start);
7180 pte_p = bpte_p;
7181
7182 for (pte_p = bpte_p;
0a7de745
A
7183 pte_p < epte_p;
7184 pte_p += PAGE_SIZE / ARM_PGBYTES) {
5ba3f43e
A
7185 pt_entry_t spte;
7186#if DEVELOPMENT || DEBUG
7187 boolean_t force_write = FALSE;
7188#endif
7189
7190 spte = *pte_p;
7191
7192 if ((spte == ARM_PTE_TYPE_FAULT) ||
cb323159 7193 ARM_PTE_IS_COMPRESSED(spte, pte_p)) {
5ba3f43e
A
7194 continue;
7195 }
7196
0a7de745
A
7197 pmap_paddr_t pa;
7198 int pai = 0;
7199 boolean_t managed = FALSE;
5ba3f43e
A
7200
7201 while (!managed) {
7202 /*
7203 * It may be possible for the pte to transition from managed
7204 * to unmanaged in this timeframe; for now, elide the assert.
7205 * We should break out as a consequence of checking pa_valid.
7206 */
7207 // assert(!ARM_PTE_IS_COMPRESSED(spte));
7208 pa = pte_to_pa(spte);
0a7de745 7209 if (!pa_valid(pa)) {
5ba3f43e 7210 break;
0a7de745 7211 }
5ba3f43e
A
7212 pai = (int)pa_index(pa);
7213 LOCK_PVH(pai);
7214 spte = *pte_p;
7215 pa = pte_to_pa(spte);
7216 if (pai == (int)pa_index(pa)) {
0a7de745 7217 managed = TRUE;
5ba3f43e
A
7218 break; // Leave the PVH locked as we will unlock it after we free the PTE
7219 }
7220 UNLOCK_PVH(pai);
7221 }
7222
7223 if ((spte == ARM_PTE_TYPE_FAULT) ||
cb323159 7224 ARM_PTE_IS_COMPRESSED(spte, pte_p)) {
5ba3f43e
A
7225 continue;
7226 }
7227
7228 pt_entry_t tmplate;
7229
7230 if (pmap == kernel_pmap) {
7231#if DEVELOPMENT || DEBUG
7232 if ((options & PMAP_OPTIONS_PROTECT_IMMEDIATE) && (prot & VM_PROT_WRITE)) {
7233 force_write = TRUE;
7234 tmplate = ((spte & ~ARM_PTE_APMASK) | ARM_PTE_AP(AP_RWNA));
7235 } else
7236#endif
7237 {
7238 tmplate = ((spte & ~ARM_PTE_APMASK) | ARM_PTE_AP(AP_RONA));
7239 }
7240 } else {
7241#if DEVELOPMENT || DEBUG
7242 if ((options & PMAP_OPTIONS_PROTECT_IMMEDIATE) && (prot & VM_PROT_WRITE)) {
7243 force_write = TRUE;
cb323159 7244 tmplate = ((spte & ~ARM_PTE_APMASK) | pt_attr_leaf_rw(pt_attr));
5ba3f43e
A
7245 } else
7246#endif
7247 {
cb323159 7248 tmplate = ((spte & ~ARM_PTE_APMASK) | pt_attr_leaf_ro(pt_attr));
5ba3f43e
A
7249 }
7250 }
7251
7252 /*
7253 * XXX Removing "NX" would
7254 * grant "execute" access
7255 * immediately, bypassing any
7256 * checks VM might want to do
7257 * in its soft fault path.
7258 * pmap_protect() and co. are
7259 * not allowed to increase
7260 * access permissions.
7261 */
0a7de745 7262 if (set_NX) {
cb323159 7263 tmplate |= pt_attr_leaf_xn(pt_attr);
0a7de745 7264 } else {
cb323159 7265#if (__ARM_VMSA__ > 7)
5ba3f43e 7266 if (pmap == kernel_pmap) {
cb323159 7267 /* do NOT clear "PNX"! */
5ba3f43e
A
7268 tmplate |= ARM_PTE_NX;
7269 } else {
7270 /* do NOT clear "NX"! */
cb323159 7271 tmplate |= pt_attr_leaf_x(pt_attr);
5ba3f43e
A
7272 if (set_XO) {
7273 tmplate &= ~ARM_PTE_APMASK;
cb323159 7274 tmplate |= pt_attr_leaf_rona(pt_attr);
5ba3f43e
A
7275 }
7276 }
5ba3f43e 7277#endif
cb323159 7278 }
5ba3f43e
A
7279
7280#if DEVELOPMENT || DEBUG
7281 if (force_write) {
7282 /*
7283 * TODO: Run CS/Monitor checks here.
7284 */
7285 if (managed) {
7286 /*
7287 * We are marking the page as writable,
7288 * so we consider it to be modified and
7289 * referenced.
7290 */
7291 pa_set_bits(pa, PP_ATTR_REFERENCED | PP_ATTR_MODIFIED);
7292 tmplate |= ARM_PTE_AF;
7293
7294 if (IS_REFFAULT_PAGE(pai)) {
7295 CLR_REFFAULT_PAGE(pai);
7296 }
7297
7298 if (IS_MODFAULT_PAGE(pai)) {
7299 CLR_MODFAULT_PAGE(pai);
7300 }
7301 }
7302 } else if (options & PMAP_OPTIONS_PROTECT_IMMEDIATE) {
7303 /*
7304 * An immediate request for anything other than
7305 * write should still mark the page as
7306 * referenced if managed.
7307 */
7308 if (managed) {
7309 pa_set_bits(pa, PP_ATTR_REFERENCED);
7310 tmplate |= ARM_PTE_AF;
7311
7312 if (IS_REFFAULT_PAGE(pai)) {
7313 CLR_REFFAULT_PAGE(pai);
7314 }
7315 }
7316 }
7317#endif
7318
7319 /* We do not expect to write fast fault the entry. */
0a7de745 7320 pte_set_was_writeable(tmplate, false);
5ba3f43e 7321
c6bf4f31 7322#if __APRR_SUPPORTED__
ea3f0419
A
7323 if (__improbable(is_pte_xprr_protected(spte) && (pte_to_xprr_perm(spte) != XPRR_USER_JIT_PERM)
7324 && (pte_to_xprr_perm(spte) != XPRR_USER_XO_PERM))) {
c6bf4f31
A
7325 /* Only test for PPL protection here, User-JIT mappings may be mutated by this function. */
7326 panic("%s: modifying a PPL mapping pte_p=%p pmap=%p prot=%d options=%u, pte=0x%llx, tmplate=0x%llx",
7327 __func__, pte_p, pmap, prot, options, (uint64_t)spte, (uint64_t)tmplate);
7328 }
7329
ea3f0419 7330 if (__improbable(is_pte_xprr_protected(tmplate) && (pte_to_xprr_perm(tmplate) != XPRR_USER_XO_PERM))) {
c6bf4f31
A
7331 panic("%s: creating an xPRR mapping pte_p=%p pmap=%p prot=%d options=%u, pte=0x%llx, tmplate=0x%llx",
7332 __func__, pte_p, pmap, prot, options, (uint64_t)spte, (uint64_t)tmplate);
7333 }
7334#endif /* __APRR_SUPPORTED__*/
5ba3f43e
A
7335 WRITE_PTE_FAST(pte_p, tmplate);
7336
7337 if (managed) {
7338 ASSERT_PVH_LOCKED(pai);
7339 UNLOCK_PVH(pai);
7340 }
7341 }
d9a64523 7342 FLUSH_PTE_RANGE_STRONG(bpte_p, epte_p);
cb323159 7343 PMAP_UPDATE_TLBS(pmap, start, end, need_strong_sync);
5ba3f43e
A
7344 }
7345
7346 PMAP_UNLOCK(pmap);
7347}
7348
7349void
7350pmap_protect_options(
7351 pmap_t pmap,
7352 vm_map_address_t b,
7353 vm_map_address_t e,
7354 vm_prot_t prot,
7355 unsigned int options,
7356 __unused void *args)
7357{
7358 vm_map_address_t l, beg;
7359
cb323159
A
7360 __unused const pt_attr_t * const pt_attr = pmap_get_pt_attr(pmap);
7361
0a7de745 7362 if ((b | e) & PAGE_MASK) {
5ba3f43e 7363 panic("pmap_protect_options() pmap %p start 0x%llx end 0x%llx\n",
0a7de745 7364 pmap, (uint64_t)b, (uint64_t)e);
5ba3f43e
A
7365 }
7366
7367#if DEVELOPMENT || DEBUG
7368 if (options & PMAP_OPTIONS_PROTECT_IMMEDIATE) {
7369 if ((prot & VM_PROT_ALL) == VM_PROT_NONE) {
7370 pmap_remove_options(pmap, b, e, options);
7371 return;
7372 }
7373 } else
7374#endif
7375 {
7376 /* Determine the new protection. */
7377 switch (prot) {
7378 case VM_PROT_EXECUTE:
7379 case VM_PROT_READ:
7380 case VM_PROT_READ | VM_PROT_EXECUTE:
7381 break;
7382 case VM_PROT_READ | VM_PROT_WRITE:
7383 case VM_PROT_ALL:
0a7de745 7384 return; /* nothing to do */
5ba3f43e
A
7385 default:
7386 pmap_remove_options(pmap, b, e, options);
7387 return;
7388 }
7389 }
7390
d9a64523 7391 PMAP_TRACE(2, PMAP_CODE(PMAP__PROTECT) | DBG_FUNC_START,
0a7de745
A
7392 VM_KERNEL_ADDRHIDE(pmap), VM_KERNEL_ADDRHIDE(b),
7393 VM_KERNEL_ADDRHIDE(e));
5ba3f43e
A
7394
7395 beg = b;
7396
7397 while (beg < e) {
cb323159 7398 l = ((beg + pt_attr_twig_size(pt_attr)) & ~pt_attr_twig_offmask(pt_attr));
5ba3f43e 7399
0a7de745 7400 if (l > e) {
5ba3f43e 7401 l = e;
0a7de745 7402 }
5ba3f43e 7403
c6bf4f31
A
7404#if XNU_MONITOR
7405 pmap_protect_options_ppl(pmap, beg, l, prot, options, args);
7406#else
5ba3f43e 7407 pmap_protect_options_internal(pmap, beg, l, prot, options, args);
c6bf4f31 7408#endif
5ba3f43e
A
7409
7410 beg = l;
7411 }
7412
d9a64523 7413 PMAP_TRACE(2, PMAP_CODE(PMAP__PROTECT) | DBG_FUNC_END);
5ba3f43e
A
7414}
7415
7416/* Map a (possibly) autogenned block */
7417kern_return_t
7418pmap_map_block(
7419 pmap_t pmap,
7420 addr64_t va,
7421 ppnum_t pa,
7422 uint32_t size,
7423 vm_prot_t prot,
7424 int attr,
7425 __unused unsigned int flags)
7426{
7427 kern_return_t kr;
7428 addr64_t original_va = va;
7429 uint32_t page;
7430
7431 for (page = 0; page < size; page++) {
7432 kr = pmap_enter(pmap, va, pa, prot, VM_PROT_NONE, attr, TRUE);
7433
7434 if (kr != KERN_SUCCESS) {
7435 /*
7436 * This will panic for now, as it is unclear that
7437 * removing the mappings is correct.
7438 */
7439 panic("%s: failed pmap_enter, "
0a7de745
A
7440 "pmap=%p, va=%#llx, pa=%u, size=%u, prot=%#x, flags=%#x",
7441 __FUNCTION__,
7442 pmap, va, pa, size, prot, flags);
5ba3f43e
A
7443
7444 pmap_remove(pmap, original_va, va - original_va);
7445 return kr;
7446 }
7447
7448 va += PAGE_SIZE;
7449 pa++;
7450 }
7451
7452 return KERN_SUCCESS;
7453}
7454
7455/*
7456 * Insert the given physical page (p) at
7457 * the specified virtual address (v) in the
7458 * target physical map with the protection requested.
7459 *
7460 * If specified, the page will be wired down, meaning
7461 * that the related pte can not be reclaimed.
7462 *
7463 * NB: This is the only routine which MAY NOT lazy-evaluate
7464 * or lose information. That is, this routine must actually
7465 * insert this page into the given map eventually (must make
7466 * forward progress eventually.
7467 */
7468kern_return_t
7469pmap_enter(
7470 pmap_t pmap,
7471 vm_map_address_t v,
7472 ppnum_t pn,
7473 vm_prot_t prot,
7474 vm_prot_t fault_type,
7475 unsigned int flags,
7476 boolean_t wired)
7477{
7478 return pmap_enter_options(pmap, v, pn, prot, fault_type, flags, wired, 0, NULL);
7479}
7480
7481
0a7de745
A
7482static inline void
7483pmap_enter_pte(pmap_t pmap, pt_entry_t *pte_p, pt_entry_t pte, vm_map_address_t v)
5ba3f43e 7484{
0a7de745 7485 if (pmap != kernel_pmap && ((pte & ARM_PTE_WIRED) != (*pte_p & ARM_PTE_WIRED))) {
cb323159 7486 SInt16 *ptd_wiredcnt_ptr = (SInt16 *)&(ptep_get_ptd(pte_p)->ptd_info[ARM_PT_DESC_INDEX(pte_p)].wiredcnt);
5ba3f43e
A
7487 if (pte & ARM_PTE_WIRED) {
7488 OSAddAtomic16(1, ptd_wiredcnt_ptr);
7489 pmap_ledger_credit(pmap, task_ledgers.wired_mem, PAGE_SIZE);
7490 OSAddAtomic(1, (SInt32 *) &pmap->stats.wired_count);
7491 } else {
7492 OSAddAtomic16(-1, ptd_wiredcnt_ptr);
7493 pmap_ledger_debit(pmap, task_ledgers.wired_mem, PAGE_SIZE);
7494 OSAddAtomic(-1, (SInt32 *) &pmap->stats.wired_count);
7495 }
7496 }
7497 if (*pte_p != ARM_PTE_TYPE_FAULT &&
cb323159 7498 !ARM_PTE_IS_COMPRESSED(*pte_p, pte_p)) {
d9a64523 7499 WRITE_PTE_STRONG(pte_p, pte);
cb323159 7500 PMAP_UPDATE_TLBS(pmap, v, v + PAGE_SIZE, false);
5ba3f43e
A
7501 } else {
7502 WRITE_PTE(pte_p, pte);
d9a64523 7503 __builtin_arm_isb(ISB_SY);
5ba3f43e 7504 }
d9a64523
A
7505
7506 PMAP_TRACE(3, PMAP_CODE(PMAP__TTE), VM_KERNEL_ADDRHIDE(pmap), VM_KERNEL_ADDRHIDE(v), VM_KERNEL_ADDRHIDE(v + PAGE_SIZE), pte);
5ba3f43e
A
7507}
7508
cb323159 7509MARK_AS_PMAP_TEXT static pt_entry_t
5ba3f43e
A
7510wimg_to_pte(unsigned int wimg)
7511{
7512 pt_entry_t pte;
7513
7514 switch (wimg & (VM_WIMG_MASK)) {
0a7de745
A
7515 case VM_WIMG_IO:
7516 case VM_WIMG_RT:
7517 pte = ARM_PTE_ATTRINDX(CACHE_ATTRINDX_DISABLE);
7518 pte |= ARM_PTE_NX | ARM_PTE_PNX;
7519 break;
7520 case VM_WIMG_POSTED:
7521 pte = ARM_PTE_ATTRINDX(CACHE_ATTRINDX_POSTED);
7522 pte |= ARM_PTE_NX | ARM_PTE_PNX;
7523 break;
cb323159
A
7524 case VM_WIMG_POSTED_REORDERED:
7525 pte = ARM_PTE_ATTRINDX(CACHE_ATTRINDX_POSTED_REORDERED);
7526 pte |= ARM_PTE_NX | ARM_PTE_PNX;
7527 break;
7528 case VM_WIMG_POSTED_COMBINED_REORDERED:
7529 pte = ARM_PTE_ATTRINDX(CACHE_ATTRINDX_POSTED_COMBINED_REORDERED);
7530 pte |= ARM_PTE_NX | ARM_PTE_PNX;
7531 break;
0a7de745
A
7532 case VM_WIMG_WCOMB:
7533 pte = ARM_PTE_ATTRINDX(CACHE_ATTRINDX_WRITECOMB);
7534 pte |= ARM_PTE_NX | ARM_PTE_PNX;
7535 break;
7536 case VM_WIMG_WTHRU:
7537 pte = ARM_PTE_ATTRINDX(CACHE_ATTRINDX_WRITETHRU);
7538#if (__ARM_VMSA__ > 7)
7539 pte |= ARM_PTE_SH(SH_OUTER_MEMORY);
5ba3f43e 7540#else
0a7de745 7541 pte |= ARM_PTE_SH;
5ba3f43e 7542#endif
0a7de745
A
7543 break;
7544 case VM_WIMG_COPYBACK:
7545 pte = ARM_PTE_ATTRINDX(CACHE_ATTRINDX_WRITEBACK);
7546#if (__ARM_VMSA__ > 7)
7547 pte |= ARM_PTE_SH(SH_OUTER_MEMORY);
5ba3f43e 7548#else
0a7de745 7549 pte |= ARM_PTE_SH;
5ba3f43e 7550#endif
0a7de745
A
7551 break;
7552 case VM_WIMG_INNERWBACK:
7553 pte = ARM_PTE_ATTRINDX(CACHE_ATTRINDX_INNERWRITEBACK);
7554#if (__ARM_VMSA__ > 7)
7555 pte |= ARM_PTE_SH(SH_INNER_MEMORY);
5ba3f43e 7556#else
0a7de745 7557 pte |= ARM_PTE_SH;
5ba3f43e 7558#endif
0a7de745
A
7559 break;
7560 default:
7561 pte = ARM_PTE_ATTRINDX(CACHE_ATTRINDX_DEFAULT);
7562#if (__ARM_VMSA__ > 7)
7563 pte |= ARM_PTE_SH(SH_OUTER_MEMORY);
5ba3f43e 7564#else
0a7de745 7565 pte |= ARM_PTE_SH;
5ba3f43e
A
7566#endif
7567 }
7568
7569 return pte;
7570}
7571
d9a64523
A
7572static boolean_t
7573pmap_enter_pv(
7574 pmap_t pmap,
7575 pt_entry_t *pte_p,
7576 int pai,
7577 unsigned int options,
7578 pv_entry_t **pve_p,
7579 boolean_t *is_altacct)
7580{
7581 pv_entry_t **pv_h;
7582 pv_h = pai_to_pvh(pai);
7583 boolean_t first_cpu_mapping;
7584
7585 ASSERT_PVH_LOCKED(pai);
7586
7587 vm_offset_t pvh_flags = pvh_get_flags(pv_h);
7588
c6bf4f31
A
7589#if XNU_MONITOR
7590 if (pvh_flags & PVH_FLAG_LOCKDOWN) {
7591 panic("%d is locked down (%#lx), cannot enter", pai, pvh_flags);
7592 }
7593#endif
d9a64523
A
7594
7595#ifdef PVH_FLAG_CPU
7596 /* An IOMMU mapping may already be present for a page that hasn't yet
7597 * had a CPU mapping established, so we use PVH_FLAG_CPU to determine
7598 * if this is the first CPU mapping. We base internal/reusable
7599 * accounting on the options specified for the first CPU mapping.
7600 * PVH_FLAG_CPU, and thus this accounting, will then persist as long
7601 * as there are *any* mappings of the page. The accounting for a
7602 * page should not need to change until the page is recycled by the
7603 * VM layer, and we assert that there are no mappings when a page
7604 * is recycled. An IOMMU mapping of a freed/recycled page is
7605 * considered a security violation & potential DMA corruption path.*/
7606 first_cpu_mapping = ((pmap != NULL) && !(pvh_flags & PVH_FLAG_CPU));
0a7de745 7607 if (first_cpu_mapping) {
d9a64523 7608 pvh_flags |= PVH_FLAG_CPU;
0a7de745 7609 }
d9a64523
A
7610#else
7611 first_cpu_mapping = pvh_test_type(pv_h, PVH_TYPE_NULL);
7612#endif
7613
7614 if (first_cpu_mapping) {
7615 if (options & PMAP_OPTIONS_INTERNAL) {
7616 SET_INTERNAL_PAGE(pai);
7617 } else {
7618 CLR_INTERNAL_PAGE(pai);
7619 }
7620 if ((options & PMAP_OPTIONS_INTERNAL) &&
7621 (options & PMAP_OPTIONS_REUSABLE)) {
7622 SET_REUSABLE_PAGE(pai);
7623 } else {
7624 CLR_REUSABLE_PAGE(pai);
7625 }
7626 }
0a7de745 7627 if (pvh_test_type(pv_h, PVH_TYPE_NULL)) {
d9a64523
A
7628 pvh_update_head(pv_h, pte_p, PVH_TYPE_PTEP);
7629 if (pmap != NULL && pmap != kernel_pmap &&
7630 ((options & PMAP_OPTIONS_ALT_ACCT) ||
0a7de745 7631 PMAP_FOOTPRINT_SUSPENDED(pmap)) &&
d9a64523
A
7632 IS_INTERNAL_PAGE(pai)) {
7633 /*
7634 * Make a note to ourselves that this mapping is using alternative
7635 * accounting. We'll need this in order to know which ledger to
7636 * debit when the mapping is removed.
7637 *
7638 * The altacct bit must be set while the pv head is locked. Defer
7639 * the ledger accounting until after we've dropped the lock.
7640 */
7641 SET_ALTACCT_PAGE(pai, PV_ENTRY_NULL);
7642 *is_altacct = TRUE;
7643 } else {
7644 CLR_ALTACCT_PAGE(pai, PV_ENTRY_NULL);
7645 }
7646 } else {
7647 if (pvh_test_type(pv_h, PVH_TYPE_PTEP)) {
0a7de745 7648 pt_entry_t *pte1_p;
d9a64523
A
7649
7650 /*
7651 * convert pvh list from PVH_TYPE_PTEP to PVH_TYPE_PVEP
7652 */
7653 pte1_p = pvh_ptep(pv_h);
7654 pvh_set_flags(pv_h, pvh_flags);
0a7de745 7655 if ((*pve_p == PV_ENTRY_NULL) && (!pv_alloc(pmap, pai, pve_p))) {
d9a64523 7656 return FALSE;
0a7de745 7657 }
d9a64523
A
7658
7659 pve_set_ptep(*pve_p, pte1_p);
7660 (*pve_p)->pve_next = PV_ENTRY_NULL;
7661
7662 if (IS_ALTACCT_PAGE(pai, PV_ENTRY_NULL)) {
7663 /*
7664 * transfer "altacct" from
7665 * pp_attr to this pve
7666 */
7667 CLR_ALTACCT_PAGE(pai, PV_ENTRY_NULL);
7668 SET_ALTACCT_PAGE(pai, *pve_p);
7669 }
7670 pvh_update_head(pv_h, *pve_p, PVH_TYPE_PVEP);
7671 *pve_p = PV_ENTRY_NULL;
7672 } else if (!pvh_test_type(pv_h, PVH_TYPE_PVEP)) {
7673 panic("%s: unexpected PV head %p, pte_p=%p pmap=%p pv_h=%p",
0a7de745 7674 __func__, *pv_h, pte_p, pmap, pv_h);
d9a64523
A
7675 }
7676 /*
7677 * Set up pv_entry for this new mapping and then
7678 * add it to the list for this physical page.
7679 */
7680 pvh_set_flags(pv_h, pvh_flags);
0a7de745 7681 if ((*pve_p == PV_ENTRY_NULL) && (!pv_alloc(pmap, pai, pve_p))) {
d9a64523 7682 return FALSE;
0a7de745 7683 }
d9a64523
A
7684
7685 pve_set_ptep(*pve_p, pte_p);
7686 (*pve_p)->pve_next = PV_ENTRY_NULL;
7687
7688 pvh_add(pv_h, *pve_p);
7689
7690 if (pmap != NULL && pmap != kernel_pmap &&
7691 ((options & PMAP_OPTIONS_ALT_ACCT) ||
0a7de745 7692 PMAP_FOOTPRINT_SUSPENDED(pmap)) &&
d9a64523
A
7693 IS_INTERNAL_PAGE(pai)) {
7694 /*
7695 * Make a note to ourselves that this
7696 * mapping is using alternative
7697 * accounting. We'll need this in order
7698 * to know which ledger to debit when
7699 * the mapping is removed.
7700 *
7701 * The altacct bit must be set while
7702 * the pv head is locked. Defer the
7703 * ledger accounting until after we've
7704 * dropped the lock.
7705 */
7706 SET_ALTACCT_PAGE(pai, *pve_p);
7707 *is_altacct = TRUE;
7708 }
7709
7710 *pve_p = PV_ENTRY_NULL;
7711 }
7712
7713 pvh_set_flags(pv_h, pvh_flags);
7714
7715 return TRUE;
0a7de745 7716}
d9a64523
A
7717
7718MARK_AS_PMAP_TEXT static kern_return_t
5ba3f43e
A
7719pmap_enter_options_internal(
7720 pmap_t pmap,
7721 vm_map_address_t v,
7722 ppnum_t pn,
7723 vm_prot_t prot,
7724 vm_prot_t fault_type,
7725 unsigned int flags,
7726 boolean_t wired,
7727 unsigned int options)
7728{
a39ff7e2
A
7729 pmap_paddr_t pa = ptoa(pn);
7730 pt_entry_t pte;
7731 pt_entry_t spte;
7732 pt_entry_t *pte_p;
7733 pv_entry_t *pve_p;
7734 boolean_t set_NX;
7735 boolean_t set_XO = FALSE;
7736 boolean_t refcnt_updated;
7737 boolean_t wiredcnt_updated;
7738 unsigned int wimg_bits;
7739 boolean_t was_compressed, was_alt_compressed;
d9a64523
A
7740 kern_return_t kr = KERN_SUCCESS;
7741
7742 VALIDATE_PMAP(pmap);
5ba3f43e 7743
cb323159
A
7744 __unused const pt_attr_t * const pt_attr = pmap_get_pt_attr(pmap);
7745
5ba3f43e
A
7746 if ((v) & PAGE_MASK) {
7747 panic("pmap_enter_options() pmap %p v 0x%llx\n",
0a7de745 7748 pmap, (uint64_t)v);
5ba3f43e
A
7749 }
7750
7751 if ((prot & VM_PROT_EXECUTE) && (prot & VM_PROT_WRITE) && (pmap == kernel_pmap)) {
7752 panic("pmap_enter_options(): WX request on kernel_pmap");
7753 }
7754
7755#if DEVELOPMENT || DEBUG
7756 if ((prot & VM_PROT_EXECUTE) || !nx_enabled || !pmap->nx_enabled)
7757#else
7758 if ((prot & VM_PROT_EXECUTE))
7759#endif
0a7de745 7760 { set_NX = FALSE;} else {
5ba3f43e 7761 set_NX = TRUE;
0a7de745 7762 }
5ba3f43e
A
7763
7764#if (__ARM_VMSA__ > 7)
7765 if (prot == VM_PROT_EXECUTE) {
7766 set_XO = TRUE;
7767 }
7768#endif
7769
7770 assert(pn != vm_page_fictitious_addr);
7771
7772 refcnt_updated = FALSE;
a39ff7e2 7773 wiredcnt_updated = FALSE;
5ba3f43e
A
7774 pve_p = PV_ENTRY_NULL;
7775 was_compressed = FALSE;
7776 was_alt_compressed = FALSE;
7777
7778 PMAP_LOCK(pmap);
7779
7780 /*
7781 * Expand pmap to include this pte. Assume that
7782 * pmap is always expanded to include enough hardware
7783 * pages to map one VM page.
7784 */
7785 while ((pte_p = pmap_pte(pmap, v)) == PT_ENTRY_NULL) {
7786 /* Must unlock to expand the pmap. */
7787 PMAP_UNLOCK(pmap);
7788
d9a64523 7789 kr = pmap_expand(pmap, v, options, PMAP_TT_MAX_LEVEL);
5ba3f43e 7790
0a7de745 7791 if (kr != KERN_SUCCESS) {
5ba3f43e 7792 return kr;
0a7de745 7793 }
5ba3f43e
A
7794
7795 PMAP_LOCK(pmap);
7796 }
7797
7798 if (options & PMAP_OPTIONS_NOENTER) {
7799 PMAP_UNLOCK(pmap);
7800 return KERN_SUCCESS;
7801 }
7802
7803Pmap_enter_retry:
7804
7805 spte = *pte_p;
7806
cb323159 7807 if (ARM_PTE_IS_COMPRESSED(spte, pte_p)) {
5ba3f43e
A
7808 /*
7809 * "pmap" should be locked at this point, so this should
7810 * not race with another pmap_enter() or pmap_remove_range().
7811 */
7812 assert(pmap != kernel_pmap);
7813
7814 /* one less "compressed" */
7815 OSAddAtomic64(-1, &pmap->stats.compressed);
7816 pmap_ledger_debit(pmap, task_ledgers.internal_compressed,
0a7de745 7817 PAGE_SIZE);
5ba3f43e
A
7818
7819 was_compressed = TRUE;
7820 if (spte & ARM_PTE_COMPRESSED_ALT) {
7821 was_alt_compressed = TRUE;
d9a64523 7822 pmap_ledger_debit(pmap, task_ledgers.alternate_accounting_compressed, PAGE_SIZE);
5ba3f43e
A
7823 } else {
7824 /* was part of the footprint */
7825 pmap_ledger_debit(pmap, task_ledgers.phys_footprint, PAGE_SIZE);
7826 }
7827
7828 /* clear "compressed" marker */
7829 /* XXX is it necessary since we're about to overwrite it ? */
7830 WRITE_PTE_FAST(pte_p, ARM_PTE_TYPE_FAULT);
7831 spte = ARM_PTE_TYPE_FAULT;
7832
7833 /*
7834 * We're replacing a "compressed" marker with a valid PTE,
7835 * so no change for "refcnt".
7836 */
7837 refcnt_updated = TRUE;
7838 }
7839
7840 if ((spte != ARM_PTE_TYPE_FAULT) && (pte_to_pa(spte) != pa)) {
7841 pmap_remove_range(pmap, v, pte_p, pte_p + 1, 0);
5ba3f43e
A
7842 }
7843
7844 pte = pa_to_pte(pa) | ARM_PTE_TYPE;
7845
7846 /* Don't bother tracking wiring for kernel PTEs. We use ARM_PTE_WIRED to track
7847 * wired memory statistics for user pmaps, but kernel PTEs are assumed
7848 * to be wired in nearly all cases. For VM layer functionality, the wired
7849 * count in vm_page_t is sufficient. */
0a7de745 7850 if (wired && pmap != kernel_pmap) {
5ba3f43e 7851 pte |= ARM_PTE_WIRED;
0a7de745 7852 }
5ba3f43e 7853
0a7de745 7854 if (set_NX) {
cb323159 7855 pte |= pt_attr_leaf_xn(pt_attr);
0a7de745 7856 } else {
cb323159 7857#if (__ARM_VMSA__ > 7)
5ba3f43e
A
7858 if (pmap == kernel_pmap) {
7859 pte |= ARM_PTE_NX;
7860 } else {
cb323159 7861 pte |= pt_attr_leaf_x(pt_attr);
5ba3f43e 7862 }
5ba3f43e 7863#endif
cb323159 7864 }
5ba3f43e 7865
5ba3f43e 7866 if (pmap == kernel_pmap) {
5c9f4661
A
7867#if __ARM_KERNEL_PROTECT__
7868 pte |= ARM_PTE_NG;
7869#endif /* __ARM_KERNEL_PROTECT__ */
5ba3f43e
A
7870 if (prot & VM_PROT_WRITE) {
7871 pte |= ARM_PTE_AP(AP_RWNA);
7872 pa_set_bits(pa, PP_ATTR_MODIFIED | PP_ATTR_REFERENCED);
7873 } else {
7874 pte |= ARM_PTE_AP(AP_RONA);
7875 pa_set_bits(pa, PP_ATTR_REFERENCED);
7876 }
0a7de745
A
7877#if (__ARM_VMSA__ == 7)
7878 if ((_COMM_PAGE_BASE_ADDRESS <= v) && (v < _COMM_PAGE_BASE_ADDRESS + _COMM_PAGE_AREA_LENGTH)) {
5ba3f43e 7879 pte = (pte & ~(ARM_PTE_APMASK)) | ARM_PTE_AP(AP_RORO);
0a7de745 7880 }
5ba3f43e
A
7881#endif
7882 } else {
cb323159 7883 if (!pmap->nested) {
5ba3f43e
A
7884 pte |= ARM_PTE_NG;
7885 } else if ((pmap->nested_region_asid_bitmap)
0a7de745
A
7886 && (v >= pmap->nested_region_subord_addr)
7887 && (v < (pmap->nested_region_subord_addr + pmap->nested_region_size))) {
cb323159 7888 unsigned int index = (unsigned int)((v - pmap->nested_region_subord_addr) >> pt_attr_twig_shift(pt_attr));
5ba3f43e
A
7889
7890 if ((pmap->nested_region_asid_bitmap)
0a7de745 7891 && testbit(index, (int *)pmap->nested_region_asid_bitmap)) {
5ba3f43e 7892 pte |= ARM_PTE_NG;
0a7de745 7893 }
5ba3f43e
A
7894 }
7895#if MACH_ASSERT
7896 if (pmap->nested_pmap != NULL) {
7897 vm_map_address_t nest_vaddr;
0a7de745 7898 pt_entry_t *nest_pte_p;
5ba3f43e
A
7899
7900 nest_vaddr = v - pmap->nested_region_grand_addr + pmap->nested_region_subord_addr;
7901
7902 if ((nest_vaddr >= pmap->nested_region_subord_addr)
0a7de745
A
7903 && (nest_vaddr < (pmap->nested_region_subord_addr + pmap->nested_region_size))
7904 && ((nest_pte_p = pmap_pte(pmap->nested_pmap, nest_vaddr)) != PT_ENTRY_NULL)
7905 && (*nest_pte_p != ARM_PTE_TYPE_FAULT)
cb323159 7906 && (!ARM_PTE_IS_COMPRESSED(*nest_pte_p, nest_pte_p))
0a7de745 7907 && (((*nest_pte_p) & ARM_PTE_NG) != ARM_PTE_NG)) {
cb323159 7908 unsigned int index = (unsigned int)((v - pmap->nested_region_subord_addr) >> pt_attr_twig_shift(pt_attr));
5ba3f43e
A
7909
7910 if ((pmap->nested_pmap->nested_region_asid_bitmap)
0a7de745 7911 && !testbit(index, (int *)pmap->nested_pmap->nested_region_asid_bitmap)) {
5ba3f43e 7912 panic("pmap_enter(): Global attribute conflict nest_pte_p=%p pmap=%p v=0x%llx spte=0x%llx \n",
0a7de745 7913 nest_pte_p, pmap, (uint64_t)v, (uint64_t)*nest_pte_p);
5ba3f43e
A
7914 }
7915 }
5ba3f43e
A
7916 }
7917#endif
7918 if (prot & VM_PROT_WRITE) {
5ba3f43e
A
7919 if (pa_valid(pa) && (!pa_test_bits(pa, PP_ATTR_MODIFIED))) {
7920 if (fault_type & VM_PROT_WRITE) {
0a7de745 7921 if (set_XO) {
cb323159 7922 pte |= pt_attr_leaf_rwna(pt_attr);
0a7de745 7923 } else {
cb323159 7924 pte |= pt_attr_leaf_rw(pt_attr);
0a7de745 7925 }
5ba3f43e
A
7926 pa_set_bits(pa, PP_ATTR_REFERENCED | PP_ATTR_MODIFIED);
7927 } else {
0a7de745 7928 if (set_XO) {
cb323159 7929 pte |= pt_attr_leaf_rona(pt_attr);
0a7de745 7930 } else {
cb323159 7931 pte |= pt_attr_leaf_ro(pt_attr);
0a7de745 7932 }
5ba3f43e 7933 pa_set_bits(pa, PP_ATTR_REFERENCED);
0a7de745 7934 pte_set_was_writeable(pte, true);
5ba3f43e
A
7935 }
7936 } else {
0a7de745 7937 if (set_XO) {
cb323159 7938 pte |= pt_attr_leaf_rwna(pt_attr);
0a7de745 7939 } else {
cb323159 7940 pte |= pt_attr_leaf_rw(pt_attr);
0a7de745 7941 }
5ba3f43e
A
7942 pa_set_bits(pa, PP_ATTR_REFERENCED);
7943 }
7944 } else {
0a7de745 7945 if (set_XO) {
cb323159 7946 pte |= pt_attr_leaf_rona(pt_attr);
0a7de745 7947 } else {
cb323159 7948 pte |= pt_attr_leaf_ro(pt_attr);;
0a7de745 7949 }
5ba3f43e
A
7950 pa_set_bits(pa, PP_ATTR_REFERENCED);
7951 }
7952 }
7953
7954 pte |= ARM_PTE_AF;
7955
7956 volatile uint16_t *refcnt = NULL;
a39ff7e2 7957 volatile uint16_t *wiredcnt = NULL;
5ba3f43e 7958 if (pmap != kernel_pmap) {
cb323159
A
7959 refcnt = &(ptep_get_ptd(pte_p)->ptd_info[ARM_PT_DESC_INDEX(pte_p)].refcnt);
7960 wiredcnt = &(ptep_get_ptd(pte_p)->ptd_info[ARM_PT_DESC_INDEX(pte_p)].wiredcnt);
a39ff7e2 7961 /* Bump the wired count to keep the PTE page from being reclaimed. We need this because
5ba3f43e 7962 * we may drop the PVH and pmap locks later in pmap_enter() if we need to allocate
a39ff7e2
A
7963 * a new PV entry. */
7964 if (!wiredcnt_updated) {
7965 OSAddAtomic16(1, (volatile int16_t*)wiredcnt);
7966 wiredcnt_updated = TRUE;
7967 }
5ba3f43e
A
7968 if (!refcnt_updated) {
7969 OSAddAtomic16(1, (volatile int16_t*)refcnt);
7970 refcnt_updated = TRUE;
7971 }
7972 }
7973
7974 if (pa_valid(pa)) {
d9a64523
A
7975 int pai;
7976 boolean_t is_altacct, is_internal;
5ba3f43e
A
7977
7978 is_internal = FALSE;
7979 is_altacct = FALSE;
7980
7981 pai = (int)pa_index(pa);
5ba3f43e
A
7982
7983 LOCK_PVH(pai);
0a7de745 7984
5ba3f43e 7985Pmap_enter_loop:
0a7de745 7986 if ((flags & (VM_WIMG_MASK | VM_WIMG_USE_DEFAULT))) {
d9a64523 7987 wimg_bits = (flags & (VM_WIMG_MASK | VM_WIMG_USE_DEFAULT));
0a7de745 7988 } else {
d9a64523 7989 wimg_bits = pmap_cache_attributes(pn);
0a7de745 7990 }
d9a64523
A
7991
7992 /* We may be retrying this operation after dropping the PVH lock.
7993 * Cache attributes for the physical page may have changed while the lock
7994 * was dropped, so clear any cache attributes we may have previously set
7995 * in the PTE template. */
7996 pte &= ~(ARM_PTE_ATTRINDXMASK | ARM_PTE_SHMASK);
cb323159 7997 pte |= pmap_get_pt_ops(pmap)->wimg_to_pte(wimg_bits);
d9a64523 7998
c6bf4f31
A
7999#if XNU_MONITOR
8000 /* The regular old kernel is not allowed to remap PPL pages. */
8001 if (pa_test_monitor(pa)) {
8002 panic("%s: page belongs to PPL, "
8003 "pmap=%p, v=0x%llx, pn=%u, prot=0x%x, fault_type=0x%x, flags=0x%x, wired=%u, options=0x%x",
8004 __FUNCTION__,
8005 pmap, v, pn, prot, fault_type, flags, wired, options);
8006 }
8007
8008 if (pvh_get_flags(pai_to_pvh(pai)) & PVH_FLAG_LOCKDOWN) {
8009 panic("%s: page locked down, "
8010 "pmap=%p, v=0x%llx, pn=%u, prot=0x%x, fault_type=0x%x, flags=0x%x, wired=%u, options=0x%x",
8011 __FUNCTION__,
8012 pmap, v, pn, prot, fault_type, flags, wired, options);
8013 }
8014#endif
d9a64523 8015
5ba3f43e
A
8016
8017 if (pte == *pte_p) {
8018 /*
8019 * This pmap_enter operation has been completed by another thread
8020 * undo refcnt on pt and return
8021 */
5ba3f43e 8022 UNLOCK_PVH(pai);
d9a64523 8023 goto Pmap_enter_cleanup;
5ba3f43e 8024 } else if (pte_to_pa(*pte_p) == pa) {
5ba3f43e
A
8025 pmap_enter_pte(pmap, pte_p, pte, v);
8026 UNLOCK_PVH(pai);
d9a64523 8027 goto Pmap_enter_cleanup;
5ba3f43e
A
8028 } else if (*pte_p != ARM_PTE_TYPE_FAULT) {
8029 /*
8030 * pte has been modified by another thread
8031 * hold refcnt on pt and retry pmap_enter operation
8032 */
8033 UNLOCK_PVH(pai);
8034 goto Pmap_enter_retry;
8035 }
0a7de745 8036 if (!pmap_enter_pv(pmap, pte_p, pai, options, &pve_p, &is_altacct)) {
d9a64523 8037 goto Pmap_enter_loop;
0a7de745 8038 }
5ba3f43e
A
8039
8040 pmap_enter_pte(pmap, pte_p, pte, v);
8041
8042 if (pmap != kernel_pmap) {
8043 if (IS_REUSABLE_PAGE(pai) &&
8044 !is_altacct) {
8045 assert(IS_INTERNAL_PAGE(pai));
8046 OSAddAtomic(+1, &pmap->stats.reusable);
8047 PMAP_STATS_PEAK(pmap->stats.reusable);
8048 } else if (IS_INTERNAL_PAGE(pai)) {
8049 OSAddAtomic(+1, &pmap->stats.internal);
8050 PMAP_STATS_PEAK(pmap->stats.internal);
8051 is_internal = TRUE;
8052 } else {
8053 OSAddAtomic(+1, &pmap->stats.external);
8054 PMAP_STATS_PEAK(pmap->stats.external);
8055 }
8056 }
8057
8058 UNLOCK_PVH(pai);
8059
8060 if (pmap != kernel_pmap) {
8061 pmap_ledger_credit(pmap, task_ledgers.phys_mem, PAGE_SIZE);
8062
8063 if (is_internal) {
8064 /*
8065 * Make corresponding adjustments to
8066 * phys_footprint statistics.
8067 */
8068 pmap_ledger_credit(pmap, task_ledgers.internal, PAGE_SIZE);
8069 if (is_altacct) {
8070 /*
8071 * If this page is internal and
8072 * in an IOKit region, credit
8073 * the task's total count of
8074 * dirty, internal IOKit pages.
8075 * It should *not* count towards
8076 * the task's total physical
8077 * memory footprint, because
8078 * this entire region was
8079 * already billed to the task
8080 * at the time the mapping was
8081 * created.
8082 *
8083 * Put another way, this is
8084 * internal++ and
8085 * alternate_accounting++, so
8086 * net effect on phys_footprint
8087 * is 0. That means: don't
8088 * touch phys_footprint here.
8089 */
8090 pmap_ledger_credit(pmap, task_ledgers.alternate_accounting, PAGE_SIZE);
0a7de745 8091 } else {
5ba3f43e
A
8092 pmap_ledger_credit(pmap, task_ledgers.phys_footprint, PAGE_SIZE);
8093 }
8094 }
8095 }
8096
8097 OSAddAtomic(1, (SInt32 *) &pmap->stats.resident_count);
0a7de745 8098 if (pmap->stats.resident_count > pmap->stats.resident_max) {
5ba3f43e 8099 pmap->stats.resident_max = pmap->stats.resident_count;
0a7de745 8100 }
5ba3f43e 8101 } else {
d9a64523
A
8102 if (prot & VM_PROT_EXECUTE) {
8103 kr = KERN_FAILURE;
8104 goto Pmap_enter_cleanup;
8105 }
8106
8107 wimg_bits = pmap_cache_attributes(pn);
0a7de745 8108 if ((flags & (VM_WIMG_MASK | VM_WIMG_USE_DEFAULT))) {
d9a64523 8109 wimg_bits = (wimg_bits & (~VM_WIMG_MASK)) | (flags & (VM_WIMG_MASK | VM_WIMG_USE_DEFAULT));
0a7de745 8110 }
d9a64523 8111
cb323159 8112 pte |= pmap_get_pt_ops(pmap)->wimg_to_pte(wimg_bits);
d9a64523 8113
c6bf4f31
A
8114#if XNU_MONITOR
8115 if (!pmap_ppl_disable && (wimg_bits & PP_ATTR_MONITOR)) {
8116 uint64_t xprr_perm = pte_to_xprr_perm(pte);
c6bf4f31
A
8117 switch (xprr_perm) {
8118 case XPRR_KERN_RO_PERM:
c6bf4f31
A
8119 break;
8120 case XPRR_KERN_RW_PERM:
ea3f0419 8121 pte &= ~ARM_PTE_XPRR_MASK;
c6bf4f31
A
8122 pte |= xprr_perm_to_pte(XPRR_PPL_RW_PERM);
8123 break;
8124 default:
8125 panic("Unsupported xPRR perm %llu for pte 0x%llx", xprr_perm, (uint64_t)pte);
8126 }
8127 }
8128#endif
5ba3f43e
A
8129 pmap_enter_pte(pmap, pte_p, pte, v);
8130 }
8131
d9a64523
A
8132 goto Pmap_enter_return;
8133
8134Pmap_enter_cleanup:
8135
8136 if (refcnt != NULL) {
8137 assert(refcnt_updated);
0a7de745 8138 if (OSAddAtomic16(-1, (volatile int16_t*)refcnt) <= 0) {
d9a64523 8139 panic("pmap_enter(): over-release of ptdp %p for pte %p\n", ptep_get_ptd(pte_p), pte_p);
0a7de745 8140 }
d9a64523
A
8141 }
8142
5ba3f43e
A
8143Pmap_enter_return:
8144
8145#if CONFIG_PGTRACE
a39ff7e2
A
8146 if (pgtrace_enabled) {
8147 // Clone and invalidate original mapping if eligible
8148 for (int i = 0; i < PAGE_RATIO; i++) {
0a7de745 8149 pmap_pgtrace_enter_clone(pmap, v + ARM_PGBYTES * i, 0, 0);
a39ff7e2
A
8150 }
8151 }
5ba3f43e
A
8152#endif
8153
0a7de745 8154 if (pve_p != PV_ENTRY_NULL) {
5ba3f43e 8155 pv_free(pve_p);
0a7de745 8156 }
5ba3f43e 8157
0a7de745 8158 if (wiredcnt_updated && (OSAddAtomic16(-1, (volatile int16_t*)wiredcnt) <= 0)) {
a39ff7e2 8159 panic("pmap_enter(): over-unwire of ptdp %p for pte %p\n", ptep_get_ptd(pte_p), pte_p);
0a7de745 8160 }
a39ff7e2 8161
5ba3f43e
A
8162 PMAP_UNLOCK(pmap);
8163
d9a64523 8164 return kr;
5ba3f43e
A
8165}
8166
8167kern_return_t
8168pmap_enter_options(
8169 pmap_t pmap,
8170 vm_map_address_t v,
8171 ppnum_t pn,
8172 vm_prot_t prot,
8173 vm_prot_t fault_type,
8174 unsigned int flags,
8175 boolean_t wired,
8176 unsigned int options,
0a7de745 8177 __unused void *arg)
5ba3f43e
A
8178{
8179 kern_return_t kr = KERN_FAILURE;
8180
d9a64523 8181 PMAP_TRACE(2, PMAP_CODE(PMAP__ENTER) | DBG_FUNC_START,
0a7de745 8182 VM_KERNEL_ADDRHIDE(pmap), VM_KERNEL_ADDRHIDE(v), pn, prot);
5ba3f43e 8183
c6bf4f31
A
8184#if XNU_MONITOR
8185 if (options & PMAP_OPTIONS_NOWAIT) {
8186 /* If NOWAIT was requested, just return the result. */
8187 kr = pmap_enter_options_ppl(pmap, v, pn, prot, fault_type, flags, wired, options);
8188 } else {
8189 /*
8190 * If NOWAIT was not requested, loop until the enter does not
8191 * fail due to lack of resources.
8192 */
8193 while ((kr = pmap_enter_options_ppl(pmap, v, pn, prot, fault_type, flags, wired, options | PMAP_OPTIONS_NOWAIT)) == KERN_RESOURCE_SHORTAGE) {
8194 pv_water_mark_check();
8195 pmap_alloc_page_for_ppl();
8196 }
8197 }
8198
8199 pmap_ledger_check_balance(pmap);
8200#else
5ba3f43e 8201 kr = pmap_enter_options_internal(pmap, v, pn, prot, fault_type, flags, wired, options);
c6bf4f31 8202#endif
d9a64523 8203 pv_water_mark_check();
5ba3f43e 8204
d9a64523 8205 PMAP_TRACE(2, PMAP_CODE(PMAP__ENTER) | DBG_FUNC_END, kr);
5ba3f43e
A
8206
8207 return kr;
8208}
8209
8210/*
8211 * Routine: pmap_change_wiring
8212 * Function: Change the wiring attribute for a map/virtual-address
8213 * pair.
8214 * In/out conditions:
8215 * The mapping must already exist in the pmap.
8216 */
d9a64523 8217MARK_AS_PMAP_TEXT static void
5ba3f43e
A
8218pmap_change_wiring_internal(
8219 pmap_t pmap,
8220 vm_map_address_t v,
8221 boolean_t wired)
8222{
8223 pt_entry_t *pte_p;
8224 pmap_paddr_t pa;
8225
8226 /* Don't bother tracking wiring for kernel PTEs. We use ARM_PTE_WIRED to track
8227 * wired memory statistics for user pmaps, but kernel PTEs are assumed
8228 * to be wired in nearly all cases. For VM layer functionality, the wired
8229 * count in vm_page_t is sufficient. */
8230 if (pmap == kernel_pmap) {
8231 return;
8232 }
d9a64523 8233 VALIDATE_USER_PMAP(pmap);
5ba3f43e
A
8234
8235 PMAP_LOCK(pmap);
8236 pte_p = pmap_pte(pmap, v);
8237 assert(pte_p != PT_ENTRY_NULL);
8238 pa = pte_to_pa(*pte_p);
cb323159
A
8239
8240 while (pa_valid(pa)) {
8241 pmap_paddr_t new_pa;
8242
5ba3f43e 8243 LOCK_PVH((int)pa_index(pa));
cb323159
A
8244 new_pa = pte_to_pa(*pte_p);
8245
8246 if (pa == new_pa) {
8247 break;
8248 }
8249
8250 UNLOCK_PVH((int)pa_index(pa));
8251 pa = new_pa;
0a7de745 8252 }
5ba3f43e
A
8253
8254 if (wired && !pte_is_wired(*pte_p)) {
8255 pte_set_wired(pte_p, wired);
8256 OSAddAtomic(+1, (SInt32 *) &pmap->stats.wired_count);
8257 pmap_ledger_credit(pmap, task_ledgers.wired_mem, PAGE_SIZE);
8258 } else if (!wired && pte_is_wired(*pte_p)) {
0a7de745 8259 PMAP_STATS_ASSERTF(pmap->stats.wired_count >= 1, pmap, "stats.wired_count %d", pmap->stats.wired_count);
5ba3f43e
A
8260 pte_set_wired(pte_p, wired);
8261 OSAddAtomic(-1, (SInt32 *) &pmap->stats.wired_count);
8262 pmap_ledger_debit(pmap, task_ledgers.wired_mem, PAGE_SIZE);
8263 }
8264
0a7de745 8265 if (pa_valid(pa)) {
5ba3f43e 8266 UNLOCK_PVH((int)pa_index(pa));
0a7de745 8267 }
5ba3f43e
A
8268
8269 PMAP_UNLOCK(pmap);
8270}
8271
8272void
8273pmap_change_wiring(
8274 pmap_t pmap,
8275 vm_map_address_t v,
8276 boolean_t wired)
8277{
c6bf4f31
A
8278#if XNU_MONITOR
8279 pmap_change_wiring_ppl(pmap, v, wired);
8280
8281 pmap_ledger_check_balance(pmap);
8282#else
5ba3f43e 8283 pmap_change_wiring_internal(pmap, v, wired);
c6bf4f31 8284#endif
5ba3f43e
A
8285}
8286
d9a64523 8287MARK_AS_PMAP_TEXT static ppnum_t
5ba3f43e
A
8288pmap_find_phys_internal(
8289 pmap_t pmap,
8290 addr64_t va)
8291{
0a7de745 8292 ppnum_t ppn = 0;
5ba3f43e 8293
d9a64523
A
8294 VALIDATE_PMAP(pmap);
8295
5ba3f43e
A
8296 if (pmap != kernel_pmap) {
8297 PMAP_LOCK(pmap);
8298 }
8299
8300 ppn = pmap_vtophys(pmap, va);
8301
8302 if (pmap != kernel_pmap) {
8303 PMAP_UNLOCK(pmap);
8304 }
8305
8306 return ppn;
8307}
8308
8309ppnum_t
8310pmap_find_phys(
8311 pmap_t pmap,
8312 addr64_t va)
8313{
0a7de745 8314 pmap_paddr_t pa = 0;
5ba3f43e 8315
0a7de745 8316 if (pmap == kernel_pmap) {
5ba3f43e 8317 pa = mmu_kvtop(va);
0a7de745 8318 } else if ((current_thread()->map) && (pmap == vm_map_pmap(current_thread()->map))) {
5ba3f43e 8319 pa = mmu_uvtop(va);
0a7de745 8320 }
5ba3f43e 8321
0a7de745
A
8322 if (pa) {
8323 return (ppnum_t)(pa >> PAGE_SHIFT);
8324 }
5ba3f43e
A
8325
8326 if (not_in_kdp) {
c6bf4f31
A
8327#if XNU_MONITOR
8328 return pmap_find_phys_ppl(pmap, va);
8329#else
5ba3f43e 8330 return pmap_find_phys_internal(pmap, va);
c6bf4f31 8331#endif
5ba3f43e
A
8332 } else {
8333 return pmap_vtophys(pmap, va);
8334 }
8335}
8336
8337pmap_paddr_t
8338kvtophys(
8339 vm_offset_t va)
8340{
8341 pmap_paddr_t pa;
8342
8343 pa = mmu_kvtop(va);
0a7de745
A
8344 if (pa) {
8345 return pa;
8346 }
5ba3f43e 8347 pa = ((pmap_paddr_t)pmap_vtophys(kernel_pmap, va)) << PAGE_SHIFT;
0a7de745 8348 if (pa) {
5ba3f43e 8349 pa |= (va & PAGE_MASK);
0a7de745 8350 }
5ba3f43e 8351
0a7de745 8352 return (pmap_paddr_t)pa;
5ba3f43e
A
8353}
8354
8355ppnum_t
8356pmap_vtophys(
8357 pmap_t pmap,
8358 addr64_t va)
8359{
8360 if ((va < pmap->min) || (va >= pmap->max)) {
8361 return 0;
8362 }
8363
0a7de745 8364#if (__ARM_VMSA__ == 7)
5ba3f43e
A
8365 tt_entry_t *tte_p, tte;
8366 pt_entry_t *pte_p;
8367 ppnum_t ppn;
8368
8369 tte_p = pmap_tte(pmap, va);
0a7de745 8370 if (tte_p == (tt_entry_t *) NULL) {
5ba3f43e 8371 return (ppnum_t) 0;
0a7de745 8372 }
5ba3f43e
A
8373
8374 tte = *tte_p;
8375 if ((tte & ARM_TTE_TYPE_MASK) == ARM_TTE_TYPE_TABLE) {
8376 pte_p = (pt_entry_t *) ttetokv(tte) + ptenum(va);
8377 ppn = (ppnum_t) atop(pte_to_pa(*pte_p) | (va & ARM_PGMASK));
8378#if DEVELOPMENT || DEBUG
8379 if (ppn != 0 &&
cb323159 8380 ARM_PTE_IS_COMPRESSED(*pte_p, pte_p)) {
5ba3f43e 8381 panic("pmap_vtophys(%p,0x%llx): compressed pte_p=%p 0x%llx with ppn=0x%x\n",
0a7de745 8382 pmap, va, pte_p, (uint64_t) (*pte_p), ppn);
5ba3f43e
A
8383 }
8384#endif /* DEVELOPMENT || DEBUG */
0a7de745
A
8385 } else if ((tte & ARM_TTE_TYPE_MASK) == ARM_TTE_TYPE_BLOCK) {
8386 if ((tte & ARM_TTE_BLOCK_SUPER) == ARM_TTE_BLOCK_SUPER) {
5ba3f43e 8387 ppn = (ppnum_t) atop(suptte_to_pa(tte) | (va & ARM_TT_L1_SUPER_OFFMASK));
0a7de745 8388 } else {
5ba3f43e 8389 ppn = (ppnum_t) atop(sectte_to_pa(tte) | (va & ARM_TT_L1_BLOCK_OFFMASK));
0a7de745
A
8390 }
8391 } else {
5ba3f43e 8392 ppn = 0;
0a7de745 8393 }
5ba3f43e 8394#else
0a7de745
A
8395 tt_entry_t *ttp;
8396 tt_entry_t tte;
8397 ppnum_t ppn = 0;
5ba3f43e 8398
cb323159
A
8399 __unused const pt_attr_t * const pt_attr = pmap_get_pt_attr(pmap);
8400
5ba3f43e
A
8401 /* Level 0 currently unused */
8402
5ba3f43e
A
8403 /* Get first-level (1GB) entry */
8404 ttp = pmap_tt1e(pmap, va);
8405 tte = *ttp;
0a7de745
A
8406 if ((tte & (ARM_TTE_TYPE_MASK | ARM_TTE_VALID)) != (ARM_TTE_TYPE_TABLE | ARM_TTE_VALID)) {
8407 return ppn;
8408 }
5ba3f43e 8409
cb323159
A
8410 tte = ((tt_entry_t*) phystokv(tte & ARM_TTE_TABLE_MASK))[tt2_index(pmap, pt_attr, va)];
8411
0a7de745
A
8412 if ((tte & ARM_TTE_VALID) != (ARM_TTE_VALID)) {
8413 return ppn;
8414 }
5ba3f43e
A
8415
8416 if ((tte & ARM_TTE_TYPE_MASK) == ARM_TTE_TYPE_BLOCK) {
0a7de745
A
8417 ppn = (ppnum_t) atop((tte & ARM_TTE_BLOCK_L2_MASK) | (va & ARM_TT_L2_OFFMASK));
8418 return ppn;
5ba3f43e 8419 }
cb323159 8420 tte = ((tt_entry_t*) phystokv(tte & ARM_TTE_TABLE_MASK))[tt3_index(pmap, pt_attr, va)];
0a7de745 8421 ppn = (ppnum_t) atop((tte & ARM_PTE_MASK) | (va & ARM_TT_L3_OFFMASK));
5ba3f43e
A
8422#endif
8423
8424 return ppn;
8425}
8426
d9a64523 8427MARK_AS_PMAP_TEXT static vm_offset_t
5ba3f43e
A
8428pmap_extract_internal(
8429 pmap_t pmap,
8430 vm_map_address_t va)
8431{
0a7de745
A
8432 pmap_paddr_t pa = 0;
8433 ppnum_t ppn = 0;
5ba3f43e
A
8434
8435 if (pmap == NULL) {
8436 return 0;
8437 }
8438
d9a64523
A
8439 VALIDATE_PMAP(pmap);
8440
5ba3f43e
A
8441 PMAP_LOCK(pmap);
8442
8443 ppn = pmap_vtophys(pmap, va);
8444
0a7de745
A
8445 if (ppn != 0) {
8446 pa = ptoa(ppn) | ((va) & PAGE_MASK);
8447 }
5ba3f43e
A
8448
8449 PMAP_UNLOCK(pmap);
8450
8451 return pa;
8452}
8453
8454/*
8455 * Routine: pmap_extract
8456 * Function:
8457 * Extract the physical page address associated
8458 * with the given map/virtual_address pair.
8459 *
8460 */
8461vm_offset_t
8462pmap_extract(
8463 pmap_t pmap,
8464 vm_map_address_t va)
8465{
0a7de745 8466 pmap_paddr_t pa = 0;
5ba3f43e 8467
0a7de745 8468 if (pmap == kernel_pmap) {
5ba3f43e 8469 pa = mmu_kvtop(va);
0a7de745 8470 } else if (pmap == vm_map_pmap(current_thread()->map)) {
5ba3f43e 8471 pa = mmu_uvtop(va);
0a7de745 8472 }
5ba3f43e 8473
0a7de745
A
8474 if (pa) {
8475 return pa;
8476 }
5ba3f43e 8477
c6bf4f31
A
8478#if XNU_MONITOR
8479 return pmap_extract_ppl(pmap, va);
8480#else
5ba3f43e 8481 return pmap_extract_internal(pmap, va);
c6bf4f31 8482#endif
5ba3f43e
A
8483}
8484
8485/*
8486 * pmap_init_pte_page - Initialize a page table page.
8487 */
8488void
8489pmap_init_pte_page(
8490 pmap_t pmap,
8491 pt_entry_t *pte_p,
8492 vm_offset_t va,
8493 unsigned int ttlevel,
cb323159
A
8494 boolean_t alloc_ptd,
8495 boolean_t clear)
5ba3f43e 8496{
d9a64523
A
8497 pt_desc_t *ptdp = NULL;
8498 vm_offset_t *pvh;
5ba3f43e 8499
d9a64523 8500 pvh = (vm_offset_t *)(pai_to_pvh(pa_index(ml_static_vtop((vm_offset_t)pte_p))));
5ba3f43e 8501
d9a64523 8502 if (pvh_test_type(pvh, PVH_TYPE_NULL)) {
5ba3f43e
A
8503 if (alloc_ptd) {
8504 /*
8505 * This path should only be invoked from arm_vm_init. If we are emulating 16KB pages
8506 * on 4KB hardware, we may already have allocated a page table descriptor for a
8507 * bootstrap request, so we check for an existing PTD here.
8508 */
0a7de745 8509 ptdp = ptd_alloc(pmap, true);
d9a64523 8510 pvh_update_head_unlocked(pvh, ptdp, PVH_TYPE_PTDP);
5ba3f43e 8511 } else {
d9a64523 8512 panic("pmap_init_pte_page(): pte_p %p", pte_p);
5ba3f43e 8513 }
d9a64523
A
8514 } else if (pvh_test_type(pvh, PVH_TYPE_PTDP)) {
8515 ptdp = (pt_desc_t*)(pvh_list(pvh));
8516 } else {
8517 panic("pmap_init_pte_page(): invalid PVH type for pte_p %p", pte_p);
5ba3f43e
A
8518 }
8519
cb323159
A
8520 if (clear) {
8521 bzero(pte_p, ARM_PGBYTES);
8522 // below barrier ensures the page zeroing is visible to PTW before
8523 // it is linked to the PTE of previous level
8524 __builtin_arm_dmb(DMB_ISHST);
8525 }
d9a64523 8526 ptd_init(ptdp, pmap, va, ttlevel, pte_p);
5ba3f43e
A
8527}
8528
5ba3f43e
A
8529/*
8530 * Routine: pmap_expand
8531 *
8532 * Expands a pmap to be able to map the specified virtual address.
8533 *
8534 * Allocates new memory for the default (COARSE) translation table
8535 * entry, initializes all the pte entries to ARM_PTE_TYPE_FAULT and
8536 * also allocates space for the corresponding pv entries.
8537 *
8538 * Nothing should be locked.
8539 */
8540static kern_return_t
8541pmap_expand(
8542 pmap_t pmap,
8543 vm_map_address_t v,
8544 unsigned int options,
8545 unsigned int level)
8546{
cb323159
A
8547 __unused const pt_attr_t * const pt_attr = pmap_get_pt_attr(pmap);
8548
0a7de745 8549#if (__ARM_VMSA__ == 7)
5ba3f43e 8550 vm_offset_t pa;
0a7de745
A
8551 tt_entry_t *tte_p;
8552 tt_entry_t *tt_p;
8553 unsigned int i;
5ba3f43e 8554
94ff46dc
A
8555#if DEVELOPMENT || DEBUG
8556 /*
8557 * We no longer support root level expansion; panic in case something
8558 * still attempts to trigger it.
8559 */
8560 i = tte_index(pmap, pt_attr, v);
d9a64523 8561
94ff46dc
A
8562 if (i >= pmap->tte_index_max) {
8563 panic("%s: index out of range, index=%u, max=%u, "
8564 "pmap=%p, addr=%p, options=%u, level=%u",
8565 __func__, i, pmap->tte_index_max,
8566 pmap, (void *)v, options, level);
5ba3f43e 8567 }
94ff46dc 8568#endif /* DEVELOPMENT || DEBUG */
5ba3f43e 8569
0a7de745
A
8570 if (level == 1) {
8571 return KERN_SUCCESS;
8572 }
5ba3f43e
A
8573
8574 {
8575 tt_entry_t *tte_next_p;
8576
8577 PMAP_LOCK(pmap);
8578 pa = 0;
8579 if (pmap_pte(pmap, v) != PT_ENTRY_NULL) {
8580 PMAP_UNLOCK(pmap);
0a7de745 8581 return KERN_SUCCESS;
5ba3f43e
A
8582 }
8583 tte_p = &pmap->tte[ttenum(v & ~ARM_TT_L1_PT_OFFMASK)];
0a7de745 8584 for (i = 0, tte_next_p = tte_p; i < 4; i++) {
5ba3f43e
A
8585 if (tte_to_pa(*tte_next_p)) {
8586 pa = tte_to_pa(*tte_next_p);
8587 break;
8588 }
8589 tte_next_p++;
8590 }
8591 pa = pa & ~PAGE_MASK;
8592 if (pa) {
8593 tte_p = &pmap->tte[ttenum(v)];
8594 *tte_p = pa_to_tte(pa) | (((v >> ARM_TT_L1_SHIFT) & 0x3) << 10) | ARM_TTE_TYPE_TABLE;
d9a64523
A
8595 FLUSH_PTE(tte_p);
8596 PMAP_TRACE(3, PMAP_CODE(PMAP__TTE), VM_KERNEL_ADDRHIDE(pmap), VM_KERNEL_ADDRHIDE(v & ~ARM_TT_L1_OFFMASK),
8597 VM_KERNEL_ADDRHIDE((v & ~ARM_TT_L1_OFFMASK) + ARM_TT_L1_SIZE), *tte_p);
5ba3f43e 8598 PMAP_UNLOCK(pmap);
0a7de745 8599 return KERN_SUCCESS;
5ba3f43e
A
8600 }
8601 PMAP_UNLOCK(pmap);
8602 }
8603 v = v & ~ARM_TT_L1_PT_OFFMASK;
8604
8605
8606 while (pmap_pte(pmap, v) == PT_ENTRY_NULL) {
8607 /*
8608 * Allocate a VM page for the level 2 page table entries.
8609 */
8610 while (pmap_tt_allocate(pmap, &tt_p, PMAP_TT_L2_LEVEL, ((options & PMAP_TT_ALLOCATE_NOWAIT)? PMAP_PAGES_ALLOCATE_NOWAIT : 0)) != KERN_SUCCESS) {
0a7de745 8611 if (options & PMAP_OPTIONS_NOWAIT) {
5ba3f43e
A
8612 return KERN_RESOURCE_SHORTAGE;
8613 }
8614 VM_PAGE_WAIT();
8615 }
8616
8617 PMAP_LOCK(pmap);
8618 /*
8619 * See if someone else expanded us first
8620 */
8621 if (pmap_pte(pmap, v) == PT_ENTRY_NULL) {
8622 tt_entry_t *tte_next_p;
8623
cb323159 8624 pmap_init_pte_page(pmap, (pt_entry_t *) tt_p, v, PMAP_TT_L2_LEVEL, FALSE, TRUE);
5ba3f43e 8625 pa = kvtophys((vm_offset_t)tt_p);
5ba3f43e 8626 tte_p = &pmap->tte[ttenum(v)];
0a7de745 8627 for (i = 0, tte_next_p = tte_p; i < 4; i++) {
5ba3f43e 8628 *tte_next_p = pa_to_tte(pa) | ARM_TTE_TYPE_TABLE;
d9a64523
A
8629 PMAP_TRACE(3, PMAP_CODE(PMAP__TTE), VM_KERNEL_ADDRHIDE(pmap), VM_KERNEL_ADDRHIDE((v & ~ARM_TT_L1_PT_OFFMASK) + (i * ARM_TT_L1_SIZE)),
8630 VM_KERNEL_ADDRHIDE((v & ~ARM_TT_L1_PT_OFFMASK) + ((i + 1) * ARM_TT_L1_SIZE)), *tte_p);
5ba3f43e 8631 tte_next_p++;
0a7de745 8632 pa = pa + 0x400;
5ba3f43e 8633 }
d9a64523
A
8634 FLUSH_PTE_RANGE(tte_p, tte_p + 4);
8635
5ba3f43e
A
8636 pa = 0x0ULL;
8637 tt_p = (tt_entry_t *)NULL;
8638 }
8639 PMAP_UNLOCK(pmap);
8640 if (tt_p != (tt_entry_t *)NULL) {
8641 pmap_tt_deallocate(pmap, tt_p, PMAP_TT_L2_LEVEL);
8642 tt_p = (tt_entry_t *)NULL;
8643 }
8644 }
0a7de745 8645 return KERN_SUCCESS;
5ba3f43e 8646#else
0a7de745 8647 pmap_paddr_t pa;
cb323159 8648 unsigned int ttlevel = pt_attr_root_level(pt_attr);
0a7de745
A
8649 tt_entry_t *tte_p;
8650 tt_entry_t *tt_p;
5ba3f43e
A
8651
8652 pa = 0x0ULL;
8653 tt_p = (tt_entry_t *)NULL;
8654
8655 for (; ttlevel < level; ttlevel++) {
5ba3f43e
A
8656 PMAP_LOCK(pmap);
8657
cb323159
A
8658 if (pmap_ttne(pmap, ttlevel + 1, v) == PT_ENTRY_NULL) {
8659 PMAP_UNLOCK(pmap);
8660 while (pmap_tt_allocate(pmap, &tt_p, ttlevel + 1, ((options & PMAP_TT_ALLOCATE_NOWAIT)? PMAP_PAGES_ALLOCATE_NOWAIT : 0)) != KERN_SUCCESS) {
8661 if (options & PMAP_OPTIONS_NOWAIT) {
8662 return KERN_RESOURCE_SHORTAGE;
5ba3f43e 8663 }
c6bf4f31
A
8664#if XNU_MONITOR
8665 panic("%s: failed to allocate tt, "
8666 "pmap=%p, v=%p, options=0x%x, level=%u",
8667 __FUNCTION__,
8668 pmap, (void *)v, options, level);
8669#else
cb323159 8670 VM_PAGE_WAIT();
c6bf4f31 8671#endif
5ba3f43e 8672 }
cb323159
A
8673 PMAP_LOCK(pmap);
8674 if ((pmap_ttne(pmap, ttlevel + 1, v) == PT_ENTRY_NULL)) {
8675 pmap_init_pte_page(pmap, (pt_entry_t *) tt_p, v, ttlevel + 1, FALSE, TRUE);
8676 pa = kvtophys((vm_offset_t)tt_p);
8677 tte_p = pmap_ttne(pmap, ttlevel, v);
8678 *tte_p = (pa & ARM_TTE_TABLE_MASK) | ARM_TTE_TYPE_TABLE | ARM_TTE_VALID;
8679 PMAP_TRACE(ttlevel + 1, PMAP_CODE(PMAP__TTE), VM_KERNEL_ADDRHIDE(pmap), VM_KERNEL_ADDRHIDE(v & ~pt_attr_ln_offmask(pt_attr, ttlevel)),
8680 VM_KERNEL_ADDRHIDE((v & ~pt_attr_ln_offmask(pt_attr, ttlevel)) + pt_attr_ln_size(pt_attr, ttlevel)), *tte_p);
8681 pa = 0x0ULL;
8682 tt_p = (tt_entry_t *)NULL;
5ba3f43e
A
8683 }
8684 }
8685
8686 PMAP_UNLOCK(pmap);
8687
8688 if (tt_p != (tt_entry_t *)NULL) {
0a7de745 8689 pmap_tt_deallocate(pmap, tt_p, ttlevel + 1);
5ba3f43e
A
8690 tt_p = (tt_entry_t *)NULL;
8691 }
8692 }
8693
0a7de745 8694 return KERN_SUCCESS;
5ba3f43e
A
8695#endif
8696}
8697
8698/*
8699 * Routine: pmap_collect
8700 * Function:
8701 * Garbage collects the physical map system for
8702 * pages which are no longer used.
8703 * Success need not be guaranteed -- that is, there
8704 * may well be pages which are not referenced, but
8705 * others may be collected.
8706 */
8707void
8708pmap_collect(pmap_t pmap)
8709{
0a7de745 8710 if (pmap == PMAP_NULL) {
5ba3f43e 8711 return;
0a7de745 8712 }
5ba3f43e
A
8713
8714#if 0
8715 PMAP_LOCK(pmap);
8716 if ((pmap->nested == FALSE) && (pmap != kernel_pmap)) {
8717 /* TODO: Scan for vm page assigned to top level page tables with no reference */
8718 }
8719 PMAP_UNLOCK(pmap);
8720#endif
8721
8722 return;
8723}
8724
8725/*
8726 * Routine: pmap_gc
8727 * Function:
0a7de745 8728 * Pmap garbage collection
5ba3f43e
A
8729 * Called by the pageout daemon when pages are scarce.
8730 *
8731 */
8732void
8733pmap_gc(
8734 void)
8735{
c6bf4f31
A
8736#if XNU_MONITOR
8737 /*
8738 * We cannot invoke the scheduler from the PPL, so for now we elide the
8739 * GC logic if the PPL is enabled.
8740 */
8741#endif
8742#if !XNU_MONITOR
0a7de745
A
8743 pmap_t pmap, pmap_next;
8744 boolean_t gc_wait;
5ba3f43e
A
8745
8746 if (pmap_gc_allowed &&
8747 (pmap_gc_allowed_by_time_throttle ||
0a7de745 8748 pmap_gc_forced)) {
5ba3f43e
A
8749 pmap_gc_forced = FALSE;
8750 pmap_gc_allowed_by_time_throttle = FALSE;
d9a64523 8751 pmap_simple_lock(&pmaps_lock);
5ba3f43e
A
8752 pmap = CAST_DOWN_EXPLICIT(pmap_t, queue_first(&map_pmap_list));
8753 while (!queue_end(&map_pmap_list, (queue_entry_t)pmap)) {
0a7de745 8754 if (!(pmap->gc_status & PMAP_GC_INFLIGHT)) {
5ba3f43e 8755 pmap->gc_status |= PMAP_GC_INFLIGHT;
0a7de745 8756 }
d9a64523 8757 pmap_simple_unlock(&pmaps_lock);
5ba3f43e
A
8758
8759 pmap_collect(pmap);
8760
d9a64523 8761 pmap_simple_lock(&pmaps_lock);
5ba3f43e 8762 gc_wait = (pmap->gc_status & PMAP_GC_WAIT);
0a7de745 8763 pmap->gc_status &= ~(PMAP_GC_INFLIGHT | PMAP_GC_WAIT);
5ba3f43e
A
8764 pmap_next = CAST_DOWN_EXPLICIT(pmap_t, queue_next(&pmap->pmaps));
8765 if (gc_wait) {
0a7de745 8766 if (!queue_end(&map_pmap_list, (queue_entry_t)pmap_next)) {
5ba3f43e 8767 pmap_next->gc_status |= PMAP_GC_INFLIGHT;
0a7de745 8768 }
d9a64523 8769 pmap_simple_unlock(&pmaps_lock);
0a7de745 8770 thread_wakeup((event_t) &pmap->gc_status);
d9a64523 8771 pmap_simple_lock(&pmaps_lock);
5ba3f43e
A
8772 }
8773 pmap = pmap_next;
8774 }
d9a64523 8775 pmap_simple_unlock(&pmaps_lock);
5ba3f43e 8776 }
c6bf4f31 8777#endif
5ba3f43e
A
8778}
8779
8780/*
8781 * Called by the VM to reclaim pages that we can reclaim quickly and cheaply.
8782 */
d9a64523 8783uint64_t
5ba3f43e
A
8784pmap_release_pages_fast(void)
8785{
c6bf4f31
A
8786#if XNU_MONITOR
8787 return pmap_release_ppl_pages_to_kernel();
8788#else /* XNU_MONITOR */
d9a64523 8789 return 0;
c6bf4f31 8790#endif
5ba3f43e
A
8791}
8792
8793/*
8794 * By default, don't attempt pmap GC more frequently
8795 * than once / 1 minutes.
8796 */
8797
8798void
8799compute_pmap_gc_throttle(
8800 void *arg __unused)
8801{
8802 pmap_gc_allowed_by_time_throttle = TRUE;
8803}
8804
8805/*
8806 * pmap_attribute_cache_sync(vm_offset_t pa)
8807 *
8808 * Invalidates all of the instruction cache on a physical page and
8809 * pushes any dirty data from the data cache for the same physical page
8810 */
8811
8812kern_return_t
8813pmap_attribute_cache_sync(
8814 ppnum_t pp,
8815 vm_size_t size,
8816 __unused vm_machine_attribute_t attribute,
8817 __unused vm_machine_attribute_val_t * value)
8818{
8819 if (size > PAGE_SIZE) {
8820 panic("pmap_attribute_cache_sync size: 0x%llx\n", (uint64_t)size);
0a7de745 8821 } else {
5ba3f43e 8822 cache_sync_page(pp);
0a7de745 8823 }
5ba3f43e
A
8824
8825 return KERN_SUCCESS;
8826}
8827
8828/*
8829 * pmap_sync_page_data_phys(ppnum_t pp)
8830 *
8831 * Invalidates all of the instruction cache on a physical page and
8832 * pushes any dirty data from the data cache for the same physical page
8833 */
8834void
8835pmap_sync_page_data_phys(
8836 ppnum_t pp)
8837{
8838 cache_sync_page(pp);
8839}
8840
8841/*
8842 * pmap_sync_page_attributes_phys(ppnum_t pp)
8843 *
8844 * Write back and invalidate all cachelines on a physical page.
8845 */
8846void
8847pmap_sync_page_attributes_phys(
8848 ppnum_t pp)
8849{
8850 flush_dcache((vm_offset_t) (pp << PAGE_SHIFT), PAGE_SIZE, TRUE);
8851}
8852
8853#if CONFIG_COREDUMP
8854/* temporary workaround */
8855boolean_t
8856coredumpok(
8857 vm_map_t map,
8858 vm_offset_t va)
8859{
8860 pt_entry_t *pte_p;
8861 pt_entry_t spte;
8862
8863 pte_p = pmap_pte(map->pmap, va);
0a7de745 8864 if (0 == pte_p) {
5ba3f43e 8865 return FALSE;
0a7de745 8866 }
5ba3f43e 8867 spte = *pte_p;
0a7de745 8868 return (spte & ARM_PTE_ATTRINDXMASK) == ARM_PTE_ATTRINDX(CACHE_ATTRINDX_DEFAULT);
5ba3f43e
A
8869}
8870#endif
8871
8872void
8873fillPage(
8874 ppnum_t pn,
8875 unsigned int fill)
8876{
8877 unsigned int *addr;
8878 int count;
8879
8880 addr = (unsigned int *) phystokv(ptoa(pn));
8881 count = PAGE_SIZE / sizeof(unsigned int);
0a7de745 8882 while (count--) {
5ba3f43e 8883 *addr++ = fill;
0a7de745 8884 }
5ba3f43e
A
8885}
8886
8887extern void mapping_set_mod(ppnum_t pn);
8888
8889void
8890mapping_set_mod(
8891 ppnum_t pn)
8892{
8893 pmap_set_modify(pn);
8894}
8895
8896extern void mapping_set_ref(ppnum_t pn);
8897
8898void
8899mapping_set_ref(
8900 ppnum_t pn)
8901{
8902 pmap_set_reference(pn);
8903}
8904
8905/*
d9a64523 8906 * Clear specified attribute bits.
5ba3f43e 8907 *
d9a64523
A
8908 * Try to force an arm_fast_fault() for all mappings of
8909 * the page - to force attributes to be set again at fault time.
8910 * If the forcing succeeds, clear the cached bits at the head.
8911 * Otherwise, something must have been wired, so leave the cached
8912 * attributes alone.
5ba3f43e 8913 */
d9a64523 8914MARK_AS_PMAP_TEXT static void
5ba3f43e 8915phys_attribute_clear_internal(
0a7de745
A
8916 ppnum_t pn,
8917 unsigned int bits,
8918 int options,
8919 void *arg)
5ba3f43e
A
8920{
8921 pmap_paddr_t pa = ptoa(pn);
8922 vm_prot_t allow_mode = VM_PROT_ALL;
8923
c6bf4f31
A
8924#if XNU_MONITOR
8925 if (bits & PP_ATTR_PPL_OWNED_BITS) {
8926 panic("%s: illegal request, "
8927 "pn=%u, bits=%#x, options=%#x, arg=%p",
8928 __FUNCTION__,
8929 pn, bits, options, arg);
8930 }
8931#endif
5ba3f43e
A
8932
8933 if ((bits & PP_ATTR_MODIFIED) &&
8934 (options & PMAP_OPTIONS_NOFLUSH) &&
8935 (arg == NULL)) {
8936 panic("phys_attribute_clear(0x%x,0x%x,0x%x,%p): "
0a7de745
A
8937 "should not clear 'modified' without flushing TLBs\n",
8938 pn, bits, options, arg);
5ba3f43e
A
8939 }
8940
8941 assert(pn != vm_page_fictitious_addr);
d9a64523
A
8942
8943 if (options & PMAP_OPTIONS_CLEAR_WRITE) {
8944 assert(bits == PP_ATTR_MODIFIED);
0a7de745 8945
d9a64523
A
8946 pmap_page_protect_options_internal(pn, (VM_PROT_ALL & ~VM_PROT_WRITE), 0);
8947 /*
8948 * We short circuit this case; it should not need to
8949 * invoke arm_force_fast_fault, so just clear the modified bit.
8950 * pmap_page_protect has taken care of resetting
8951 * the state so that we'll see the next write as a fault to
8952 * the VM (i.e. we don't want a fast fault).
8953 */
8954 pa_clear_bits(pa, bits);
8955 return;
8956 }
0a7de745 8957 if (bits & PP_ATTR_REFERENCED) {
5ba3f43e 8958 allow_mode &= ~(VM_PROT_READ | VM_PROT_EXECUTE);
0a7de745
A
8959 }
8960 if (bits & PP_ATTR_MODIFIED) {
5ba3f43e 8961 allow_mode &= ~VM_PROT_WRITE;
0a7de745 8962 }
5ba3f43e
A
8963
8964 if (bits == PP_ATTR_NOENCRYPT) {
8965 /*
8966 * We short circuit this case; it should not need to
8967 * invoke arm_force_fast_fault, so just clear and
8968 * return. On ARM, this bit is just a debugging aid.
8969 */
8970 pa_clear_bits(pa, bits);
8971 return;
8972 }
8973
0a7de745 8974 if (arm_force_fast_fault_internal(pn, allow_mode, options)) {
5ba3f43e 8975 pa_clear_bits(pa, bits);
0a7de745 8976 }
5ba3f43e
A
8977 return;
8978}
8979
8980static void
8981phys_attribute_clear(
0a7de745
A
8982 ppnum_t pn,
8983 unsigned int bits,
8984 int options,
8985 void *arg)
5ba3f43e
A
8986{
8987 /*
8988 * Do we really want this tracepoint? It will be extremely chatty.
8989 * Also, should we have a corresponding trace point for the set path?
8990 */
d9a64523 8991 PMAP_TRACE(3, PMAP_CODE(PMAP__ATTRIBUTE_CLEAR) | DBG_FUNC_START, pn, bits);
5ba3f43e 8992
c6bf4f31
A
8993#if XNU_MONITOR
8994 phys_attribute_clear_ppl(pn, bits, options, arg);
8995#else
5ba3f43e 8996 phys_attribute_clear_internal(pn, bits, options, arg);
c6bf4f31 8997#endif
5ba3f43e 8998
d9a64523 8999 PMAP_TRACE(3, PMAP_CODE(PMAP__ATTRIBUTE_CLEAR) | DBG_FUNC_END);
5ba3f43e
A
9000}
9001
9002/*
9003 * Set specified attribute bits.
9004 *
9005 * Set cached value in the pv head because we have
9006 * no per-mapping hardware support for referenced and
9007 * modify bits.
9008 */
d9a64523 9009MARK_AS_PMAP_TEXT static void
5ba3f43e
A
9010phys_attribute_set_internal(
9011 ppnum_t pn,
9012 unsigned int bits)
9013{
9014 pmap_paddr_t pa = ptoa(pn);
9015 assert(pn != vm_page_fictitious_addr);
9016
c6bf4f31
A
9017#if XNU_MONITOR
9018 if (bits & PP_ATTR_PPL_OWNED_BITS) {
9019 panic("%s: illegal request, "
9020 "pn=%u, bits=%#x",
9021 __FUNCTION__,
9022 pn, bits);
9023 }
9024#endif
5ba3f43e
A
9025
9026 pa_set_bits(pa, bits);
9027
9028 return;
9029}
9030
9031static void
9032phys_attribute_set(
9033 ppnum_t pn,
9034 unsigned int bits)
9035{
c6bf4f31
A
9036#if XNU_MONITOR
9037 phys_attribute_set_ppl(pn, bits);
9038#else
5ba3f43e 9039 phys_attribute_set_internal(pn, bits);
c6bf4f31 9040#endif
5ba3f43e
A
9041}
9042
9043
9044/*
9045 * Check specified attribute bits.
9046 *
9047 * use the software cached bits (since no hw support).
9048 */
9049static boolean_t
9050phys_attribute_test(
9051 ppnum_t pn,
9052 unsigned int bits)
9053{
9054 pmap_paddr_t pa = ptoa(pn);
9055 assert(pn != vm_page_fictitious_addr);
9056 return pa_test_bits(pa, bits);
9057}
9058
9059
9060/*
9061 * Set the modify/reference bits on the specified physical page.
9062 */
9063void
9064pmap_set_modify(ppnum_t pn)
9065{
9066 phys_attribute_set(pn, PP_ATTR_MODIFIED);
9067}
9068
9069
9070/*
9071 * Clear the modify bits on the specified physical page.
9072 */
9073void
9074pmap_clear_modify(
9075 ppnum_t pn)
9076{
9077 phys_attribute_clear(pn, PP_ATTR_MODIFIED, 0, NULL);
9078}
9079
9080
9081/*
9082 * pmap_is_modified:
9083 *
9084 * Return whether or not the specified physical page is modified
9085 * by any physical maps.
9086 */
9087boolean_t
9088pmap_is_modified(
9089 ppnum_t pn)
9090{
9091 return phys_attribute_test(pn, PP_ATTR_MODIFIED);
9092}
9093
9094
9095/*
9096 * Set the reference bit on the specified physical page.
9097 */
9098static void
9099pmap_set_reference(
9100 ppnum_t pn)
9101{
9102 phys_attribute_set(pn, PP_ATTR_REFERENCED);
9103}
9104
9105/*
9106 * Clear the reference bits on the specified physical page.
9107 */
9108void
9109pmap_clear_reference(
9110 ppnum_t pn)
9111{
9112 phys_attribute_clear(pn, PP_ATTR_REFERENCED, 0, NULL);
9113}
9114
9115
9116/*
9117 * pmap_is_referenced:
9118 *
9119 * Return whether or not the specified physical page is referenced
9120 * by any physical maps.
9121 */
9122boolean_t
9123pmap_is_referenced(
9124 ppnum_t pn)
9125{
9126 return phys_attribute_test(pn, PP_ATTR_REFERENCED);
9127}
9128
9129/*
9130 * pmap_get_refmod(phys)
9131 * returns the referenced and modified bits of the specified
9132 * physical page.
9133 */
9134unsigned int
9135pmap_get_refmod(
9136 ppnum_t pn)
9137{
0a7de745
A
9138 return ((phys_attribute_test(pn, PP_ATTR_MODIFIED)) ? VM_MEM_MODIFIED : 0)
9139 | ((phys_attribute_test(pn, PP_ATTR_REFERENCED)) ? VM_MEM_REFERENCED : 0);
5ba3f43e
A
9140}
9141
9142/*
9143 * pmap_clear_refmod(phys, mask)
9144 * clears the referenced and modified bits as specified by the mask
9145 * of the specified physical page.
9146 */
9147void
9148pmap_clear_refmod_options(
0a7de745
A
9149 ppnum_t pn,
9150 unsigned int mask,
9151 unsigned int options,
9152 void *arg)
5ba3f43e
A
9153{
9154 unsigned int bits;
9155
9156 bits = ((mask & VM_MEM_MODIFIED) ? PP_ATTR_MODIFIED : 0) |
0a7de745 9157 ((mask & VM_MEM_REFERENCED) ? PP_ATTR_REFERENCED : 0);
5ba3f43e
A
9158 phys_attribute_clear(pn, bits, options, arg);
9159}
9160
9161void
9162pmap_clear_refmod(
9163 ppnum_t pn,
9164 unsigned int mask)
9165{
9166 pmap_clear_refmod_options(pn, mask, 0, NULL);
9167}
9168
9169unsigned int
9170pmap_disconnect_options(
9171 ppnum_t pn,
9172 unsigned int options,
9173 void *arg)
9174{
9175 if ((options & PMAP_OPTIONS_COMPRESSOR_IFF_MODIFIED)) {
9176 /*
9177 * On ARM, the "modified" bit is managed by software, so
9178 * we know up-front if the physical page is "modified",
9179 * without having to scan all the PTEs pointing to it.
9180 * The caller should have made the VM page "busy" so noone
9181 * should be able to establish any new mapping and "modify"
9182 * the page behind us.
9183 */
9184 if (pmap_is_modified(pn)) {
9185 /*
9186 * The page has been modified and will be sent to
9187 * the VM compressor.
9188 */
9189 options |= PMAP_OPTIONS_COMPRESSOR;
9190 } else {
9191 /*
9192 * The page hasn't been modified and will be freed
9193 * instead of compressed.
9194 */
9195 }
9196 }
9197
9198 /* disconnect the page */
9199 pmap_page_protect_options(pn, 0, options, arg);
9200
9201 /* return ref/chg status */
0a7de745 9202 return pmap_get_refmod(pn);
5ba3f43e
A
9203}
9204
9205/*
9206 * Routine:
9207 * pmap_disconnect
9208 *
9209 * Function:
9210 * Disconnect all mappings for this page and return reference and change status
9211 * in generic format.
9212 *
9213 */
9214unsigned int
9215pmap_disconnect(
9216 ppnum_t pn)
9217{
0a7de745
A
9218 pmap_page_protect(pn, 0); /* disconnect the page */
9219 return pmap_get_refmod(pn); /* return ref/chg status */
5ba3f43e
A
9220}
9221
9222boolean_t
9223pmap_has_managed_page(ppnum_t first, ppnum_t last)
9224{
0a7de745
A
9225 if (ptoa(first) >= vm_last_phys) {
9226 return FALSE;
9227 }
9228 if (ptoa(last) < vm_first_phys) {
9229 return FALSE;
9230 }
5ba3f43e 9231
0a7de745 9232 return TRUE;
5ba3f43e
A
9233}
9234
9235/*
9236 * The state maintained by the noencrypt functions is used as a
9237 * debugging aid on ARM. This incurs some overhead on the part
9238 * of the caller. A special case check in phys_attribute_clear
9239 * (the most expensive path) currently minimizes this overhead,
9240 * but stubbing these functions out on RELEASE kernels yields
9241 * further wins.
9242 */
9243boolean_t
9244pmap_is_noencrypt(
9245 ppnum_t pn)
9246{
9247#if DEVELOPMENT || DEBUG
9248 boolean_t result = FALSE;
9249
0a7de745
A
9250 if (!pa_valid(ptoa(pn))) {
9251 return FALSE;
9252 }
5ba3f43e
A
9253
9254 result = (phys_attribute_test(pn, PP_ATTR_NOENCRYPT));
9255
9256 return result;
9257#else
9258#pragma unused(pn)
9259 return FALSE;
9260#endif
9261}
9262
9263void
9264pmap_set_noencrypt(
9265 ppnum_t pn)
9266{
9267#if DEVELOPMENT || DEBUG
0a7de745
A
9268 if (!pa_valid(ptoa(pn))) {
9269 return;
9270 }
5ba3f43e
A
9271
9272 phys_attribute_set(pn, PP_ATTR_NOENCRYPT);
9273#else
9274#pragma unused(pn)
9275#endif
9276}
9277
9278void
9279pmap_clear_noencrypt(
9280 ppnum_t pn)
9281{
9282#if DEVELOPMENT || DEBUG
0a7de745
A
9283 if (!pa_valid(ptoa(pn))) {
9284 return;
9285 }
5ba3f43e
A
9286
9287 phys_attribute_clear(pn, PP_ATTR_NOENCRYPT, 0, NULL);
9288#else
9289#pragma unused(pn)
9290#endif
9291}
9292
c6bf4f31
A
9293#if XNU_MONITOR
9294boolean_t
9295pmap_is_monitor(ppnum_t pn)
9296{
9297 assert(pa_valid(ptoa(pn)));
9298 return phys_attribute_test(pn, PP_ATTR_MONITOR);
9299}
9300#endif
5ba3f43e
A
9301
9302void
9303pmap_lock_phys_page(ppnum_t pn)
9304{
c6bf4f31 9305#if !XNU_MONITOR
5ba3f43e 9306 int pai;
0a7de745 9307 pmap_paddr_t phys = ptoa(pn);
5ba3f43e
A
9308
9309 if (pa_valid(phys)) {
9310 pai = (int)pa_index(phys);
9311 LOCK_PVH(pai);
9312 } else
c6bf4f31
A
9313#else
9314 (void)pn;
9315#endif
0a7de745 9316 { simple_lock(&phys_backup_lock, LCK_GRP_NULL);}
5ba3f43e
A
9317}
9318
9319
9320void
9321pmap_unlock_phys_page(ppnum_t pn)
9322{
c6bf4f31 9323#if !XNU_MONITOR
5ba3f43e 9324 int pai;
0a7de745 9325 pmap_paddr_t phys = ptoa(pn);
5ba3f43e
A
9326
9327 if (pa_valid(phys)) {
9328 pai = (int)pa_index(phys);
9329 UNLOCK_PVH(pai);
9330 } else
c6bf4f31
A
9331#else
9332 (void)pn;
9333#endif
0a7de745 9334 { simple_unlock(&phys_backup_lock);}
5ba3f43e
A
9335}
9336
d9a64523 9337MARK_AS_PMAP_TEXT static void
5ba3f43e
A
9338pmap_switch_user_ttb_internal(
9339 pmap_t pmap)
9340{
d9a64523 9341 VALIDATE_PMAP(pmap);
0a7de745 9342 pmap_cpu_data_t *cpu_data_ptr;
5ba3f43e
A
9343 cpu_data_ptr = pmap_get_cpu_data();
9344
0a7de745 9345#if (__ARM_VMSA__ == 7)
5ba3f43e
A
9346 cpu_data_ptr->cpu_user_pmap = pmap;
9347 cpu_data_ptr->cpu_user_pmap_stamp = pmap->stamp;
5ba3f43e 9348
0a7de745 9349#if MACH_ASSERT && __ARM_USER_PROTECT__
5ba3f43e
A
9350 {
9351 unsigned int ttbr0_val, ttbr1_val;
0a7de745
A
9352 __asm__ volatile ("mrc p15,0,%0,c2,c0,0\n" : "=r"(ttbr0_val));
9353 __asm__ volatile ("mrc p15,0,%0,c2,c0,1\n" : "=r"(ttbr1_val));
5ba3f43e
A
9354 if (ttbr0_val != ttbr1_val) {
9355 panic("Misaligned ttbr0 %08X\n", ttbr0_val);
9356 }
9357 }
9358#endif
9359 if (pmap->tte_index_max == NTTES) {
9360 /* Setting TTBCR.N for TTBR0 TTBR1 boundary at 0x40000000 */
0a7de745 9361 __asm__ volatile ("mcr p15,0,%0,c2,c0,2" : : "r"(2));
d9a64523 9362 __builtin_arm_isb(ISB_SY);
5ba3f43e
A
9363#if !__ARM_USER_PROTECT__
9364 set_mmu_ttb(pmap->ttep);
9365#endif
9366 } else {
9367#if !__ARM_USER_PROTECT__
9368 set_mmu_ttb(pmap->ttep);
9369#endif
9370 /* Setting TTBCR.N for TTBR0 TTBR1 boundary at 0x80000000 */
0a7de745 9371 __asm__ volatile ("mcr p15,0,%0,c2,c0,2" : : "r"(1));
d9a64523 9372 __builtin_arm_isb(ISB_SY);
0a7de745 9373#if MACH_ASSERT && __ARM_USER_PROTECT__
5ba3f43e
A
9374 if (pmap->ttep & 0x1000) {
9375 panic("Misaligned ttbr0 %08X\n", pmap->ttep);
9376 }
9377#endif
9378 }
9379
9380#if !__ARM_USER_PROTECT__
cb323159 9381 set_context_id(pmap->hw_asid);
5ba3f43e 9382#endif
5ba3f43e 9383
d9a64523
A
9384#else /* (__ARM_VMSA__ == 7) */
9385
0a7de745
A
9386 if (pmap != kernel_pmap) {
9387 cpu_data_ptr->cpu_nested_pmap = pmap->nested_pmap;
9388 }
5ba3f43e 9389
5ba3f43e 9390 if (pmap == kernel_pmap) {
d9a64523 9391 pmap_clear_user_ttb_internal();
5ba3f43e 9392 } else {
cb323159 9393 set_mmu_ttb((pmap->ttep & TTBR_BADDR_MASK) | (((uint64_t)pmap->hw_asid) << TTBR_ASID_SHIFT));
5ba3f43e 9394 }
cb323159
A
9395
9396#if defined(HAS_APPLE_PAC) && (__APCFG_SUPPORTED__ || __APSTS_SUPPORTED__)
9397 if (!(BootArgs->bootFlags & kBootFlagsDisableJOP) && !(BootArgs->bootFlags & kBootFlagsDisableUserJOP)) {
9398 uint64_t sctlr = __builtin_arm_rsr64("SCTLR_EL1");
9399 bool jop_enabled = sctlr & SCTLR_JOP_KEYS_ENABLED;
9400 if (!jop_enabled && !pmap->disable_jop) {
9401 // turn on JOP
9402 sctlr |= SCTLR_JOP_KEYS_ENABLED;
9403 __builtin_arm_wsr64("SCTLR_EL1", sctlr);
9404 // no ISB necessary because this won't take effect until eret returns to EL0
9405 } else if (jop_enabled && pmap->disable_jop) {
9406 // turn off JOP
9407 sctlr &= ~SCTLR_JOP_KEYS_ENABLED;
9408 __builtin_arm_wsr64("SCTLR_EL1", sctlr);
9409 }
9410 }
9411#endif /* HAS_APPLE_PAC && (__APCFG_SUPPORTED__ || __APSTS_SUPPORTED__) */
9412#endif /* (__ARM_VMSA__ == 7) */
5ba3f43e
A
9413}
9414
9415void
9416pmap_switch_user_ttb(
9417 pmap_t pmap)
9418{
cb323159 9419 PMAP_TRACE(1, PMAP_CODE(PMAP__SWITCH_USER_TTB) | DBG_FUNC_START, VM_KERNEL_ADDRHIDE(pmap), PMAP_VASID(pmap), pmap->hw_asid);
c6bf4f31
A
9420#if XNU_MONITOR
9421 pmap_switch_user_ttb_ppl(pmap);
9422#else
5ba3f43e 9423 pmap_switch_user_ttb_internal(pmap);
c6bf4f31 9424#endif
d9a64523 9425 PMAP_TRACE(1, PMAP_CODE(PMAP__SWITCH_USER_TTB) | DBG_FUNC_END);
5ba3f43e
A
9426}
9427
d9a64523
A
9428MARK_AS_PMAP_TEXT static void
9429pmap_clear_user_ttb_internal(void)
5ba3f43e 9430{
d9a64523
A
9431#if (__ARM_VMSA__ > 7)
9432 set_mmu_ttb(invalid_ttep & TTBR_BADDR_MASK);
9433#else
9434 set_mmu_ttb(kernel_pmap->ttep);
9435#endif
9436}
5ba3f43e 9437
d9a64523
A
9438void
9439pmap_clear_user_ttb(void)
9440{
c6bf4f31
A
9441#if XNU_MONITOR
9442 pmap_clear_user_ttb_ppl();
9443#else
d9a64523 9444 pmap_clear_user_ttb_internal();
c6bf4f31 9445#endif
5ba3f43e 9446}
5ba3f43e
A
9447
9448/*
9449 * Routine: arm_force_fast_fault
9450 *
9451 * Function:
9452 * Force all mappings for this page to fault according
9453 * to the access modes allowed, so we can gather ref/modify
9454 * bits again.
9455 */
d9a64523 9456MARK_AS_PMAP_TEXT static boolean_t
5ba3f43e 9457arm_force_fast_fault_internal(
0a7de745
A
9458 ppnum_t ppnum,
9459 vm_prot_t allow_mode,
9460 int options)
5ba3f43e 9461{
cb323159
A
9462 pmap_paddr_t phys = ptoa(ppnum);
9463 pv_entry_t *pve_p;
9464 pt_entry_t *pte_p;
9465 int pai;
9466 boolean_t result;
9467 pv_entry_t **pv_h;
9468 boolean_t is_reusable, is_internal;
9469 boolean_t tlb_flush_needed = FALSE;
9470 boolean_t ref_fault;
9471 boolean_t mod_fault;
5ba3f43e
A
9472
9473 assert(ppnum != vm_page_fictitious_addr);
9474
9475 if (!pa_valid(phys)) {
0a7de745 9476 return FALSE; /* Not a managed page. */
5ba3f43e
A
9477 }
9478
9479 result = TRUE;
9480 ref_fault = FALSE;
9481 mod_fault = FALSE;
9482 pai = (int)pa_index(phys);
9483 LOCK_PVH(pai);
9484 pv_h = pai_to_pvh(pai);
9485
9486 pte_p = PT_ENTRY_NULL;
9487 pve_p = PV_ENTRY_NULL;
0a7de745 9488 if (pvh_test_type(pv_h, PVH_TYPE_PTEP)) {
5ba3f43e 9489 pte_p = pvh_ptep(pv_h);
0a7de745 9490 } else if (pvh_test_type(pv_h, PVH_TYPE_PVEP)) {
5ba3f43e
A
9491 pve_p = pvh_list(pv_h);
9492 }
9493
9494 is_reusable = IS_REUSABLE_PAGE(pai);
9495 is_internal = IS_INTERNAL_PAGE(pai);
9496
9497 while ((pve_p != PV_ENTRY_NULL) || (pte_p != PT_ENTRY_NULL)) {
9498 vm_map_address_t va;
d9a64523
A
9499 pt_entry_t spte;
9500 pt_entry_t tmplate;
9501 pmap_t pmap;
9502 boolean_t update_pte;
5ba3f43e 9503
0a7de745 9504 if (pve_p != PV_ENTRY_NULL) {
5ba3f43e 9505 pte_p = pve_get_ptep(pve_p);
0a7de745 9506 }
5ba3f43e
A
9507
9508 if (pte_p == PT_ENTRY_NULL) {
9509 panic("pte_p is NULL: pve_p=%p ppnum=0x%x\n", pve_p, ppnum);
9510 }
d9a64523
A
9511#ifdef PVH_FLAG_IOMMU
9512 if ((vm_offset_t)pte_p & PVH_FLAG_IOMMU) {
9513 goto fff_skip_pve;
0a7de745 9514 }
d9a64523 9515#endif
5ba3f43e
A
9516 if (*pte_p == ARM_PTE_EMPTY) {
9517 panic("pte is NULL: pte_p=%p ppnum=0x%x\n", pte_p, ppnum);
9518 }
cb323159 9519 if (ARM_PTE_IS_COMPRESSED(*pte_p, pte_p)) {
5ba3f43e
A
9520 panic("pte is COMPRESSED: pte_p=%p ppnum=0x%x\n", pte_p, ppnum);
9521 }
9522
9523 pmap = ptep_get_pmap(pte_p);
9524 va = ptep_get_va(pte_p);
9525
9526 assert(va >= pmap->min && va < pmap->max);
9527
9528 if (pte_is_wired(*pte_p) || pmap == kernel_pmap) {
9529 result = FALSE;
9530 break;
9531 }
9532
9533 spte = *pte_p;
9534 tmplate = spte;
9535 update_pte = FALSE;
9536
9537 if ((allow_mode & VM_PROT_READ) != VM_PROT_READ) {
9538 /* read protection sets the pte to fault */
9539 tmplate = tmplate & ~ARM_PTE_AF;
9540 update_pte = TRUE;
9541 ref_fault = TRUE;
9542 }
9543 if ((allow_mode & VM_PROT_WRITE) != VM_PROT_WRITE) {
9544 /* take away write permission if set */
9545 if (pmap == kernel_pmap) {
9546 if ((tmplate & ARM_PTE_APMASK) == ARM_PTE_AP(AP_RWNA)) {
9547 tmplate = ((tmplate & ~ARM_PTE_APMASK) | ARM_PTE_AP(AP_RONA));
0a7de745
A
9548 pte_set_was_writeable(tmplate, true);
9549 update_pte = TRUE;
9550 mod_fault = TRUE;
5ba3f43e
A
9551 }
9552 } else {
9553 if ((tmplate & ARM_PTE_APMASK) == ARM_PTE_AP(AP_RWRW)) {
cb323159 9554 tmplate = ((tmplate & ~ARM_PTE_APMASK) | pt_attr_leaf_ro(pmap_get_pt_attr(pmap)));
0a7de745
A
9555 pte_set_was_writeable(tmplate, true);
9556 update_pte = TRUE;
9557 mod_fault = TRUE;
5ba3f43e
A
9558 }
9559 }
5ba3f43e
A
9560 }
9561
c6bf4f31
A
9562#if MACH_ASSERT && XNU_MONITOR
9563 if (is_pte_xprr_protected(spte)) {
9564 if (pte_to_xprr_perm(spte) != pte_to_xprr_perm(tmplate)) {
9565 panic("%s: attempted to mutate an xPRR mapping pte_p=%p, pmap=%p, pv_h=%p, pve_p=%p, pte=0x%llx, tmplate=0x%llx, va=0x%llx, "
9566 "ppnum=0x%x, options=0x%x, allow_mode=0x%x",
9567 __FUNCTION__, pte_p, pmap, pv_h, pve_p, (unsigned long long)spte, (unsigned long long)tmplate, (unsigned long long)va,
9568 ppnum, options, allow_mode);
9569 }
9570 }
9571#endif /* MACH_ASSERT && XNU_MONITOR */
5ba3f43e
A
9572
9573 if (update_pte) {
9574 if (*pte_p != ARM_PTE_TYPE_FAULT &&
cb323159 9575 !ARM_PTE_IS_COMPRESSED(*pte_p, pte_p)) {
d9a64523 9576 WRITE_PTE_STRONG(pte_p, tmplate);
cb323159 9577 pmap_get_pt_ops(pmap)->flush_tlb_region_async(va, PAGE_SIZE, pmap);
d9a64523 9578 tlb_flush_needed = TRUE;
5ba3f43e
A
9579 } else {
9580 WRITE_PTE(pte_p, tmplate);
d9a64523 9581 __builtin_arm_isb(ISB_SY);
5ba3f43e
A
9582 }
9583 }
9584
9585 /* update pmap stats and ledgers */
9586 if (IS_ALTACCT_PAGE(pai, pve_p)) {
9587 /*
9588 * We do not track "reusable" status for
9589 * "alternate accounting" mappings.
9590 */
9591 } else if ((options & PMAP_OPTIONS_CLEAR_REUSABLE) &&
0a7de745
A
9592 is_reusable &&
9593 is_internal &&
9594 pmap != kernel_pmap) {
5ba3f43e 9595 /* one less "reusable" */
5c9f4661 9596 PMAP_STATS_ASSERTF(pmap->stats.reusable > 0, pmap, "stats.reusable %d", pmap->stats.reusable);
5ba3f43e
A
9597 OSAddAtomic(-1, &pmap->stats.reusable);
9598 /* one more "internal" */
9599 OSAddAtomic(+1, &pmap->stats.internal);
9600 PMAP_STATS_PEAK(pmap->stats.internal);
5c9f4661 9601 PMAP_STATS_ASSERTF(pmap->stats.internal > 0, pmap, "stats.internal %d", pmap->stats.internal);
d9a64523 9602 pmap_ledger_credit(pmap, task_ledgers.internal, machine_ptob(1));
5ba3f43e
A
9603 assert(!IS_ALTACCT_PAGE(pai, pve_p));
9604 assert(IS_INTERNAL_PAGE(pai));
d9a64523 9605 pmap_ledger_credit(pmap, task_ledgers.phys_footprint, machine_ptob(1));
5ba3f43e
A
9606
9607 /*
9608 * Avoid the cost of another trap to handle the fast
9609 * fault when we next write to this page: let's just
9610 * handle that now since we already have all the
9611 * necessary information.
9612 */
9613 {
9614 arm_clear_fast_fault(ppnum, VM_PROT_WRITE);
9615 }
9616 } else if ((options & PMAP_OPTIONS_SET_REUSABLE) &&
0a7de745
A
9617 !is_reusable &&
9618 is_internal &&
9619 pmap != kernel_pmap) {
5ba3f43e
A
9620 /* one more "reusable" */
9621 OSAddAtomic(+1, &pmap->stats.reusable);
9622 PMAP_STATS_PEAK(pmap->stats.reusable);
5c9f4661 9623 PMAP_STATS_ASSERTF(pmap->stats.reusable > 0, pmap, "stats.reusable %d", pmap->stats.reusable);
5ba3f43e 9624 /* one less "internal" */
5c9f4661 9625 PMAP_STATS_ASSERTF(pmap->stats.internal > 0, pmap, "stats.internal %d", pmap->stats.internal);
5ba3f43e 9626 OSAddAtomic(-1, &pmap->stats.internal);
d9a64523 9627 pmap_ledger_debit(pmap, task_ledgers.internal, machine_ptob(1));
5ba3f43e
A
9628 assert(!IS_ALTACCT_PAGE(pai, pve_p));
9629 assert(IS_INTERNAL_PAGE(pai));
d9a64523 9630 pmap_ledger_debit(pmap, task_ledgers.phys_footprint, machine_ptob(1));
5ba3f43e
A
9631 }
9632
d9a64523 9633#ifdef PVH_FLAG_IOMMU
0a7de745 9634fff_skip_pve:
d9a64523 9635#endif
5ba3f43e 9636 pte_p = PT_ENTRY_NULL;
0a7de745 9637 if (pve_p != PV_ENTRY_NULL) {
5ba3f43e 9638 pve_p = PVE_NEXT_PTR(pve_next(pve_p));
0a7de745 9639 }
5ba3f43e
A
9640 }
9641
0a7de745 9642 if (tlb_flush_needed) {
d9a64523 9643 sync_tlb_flush();
0a7de745 9644 }
d9a64523 9645
5ba3f43e
A
9646 /* update global "reusable" status for this page */
9647 if (is_internal) {
9648 if ((options & PMAP_OPTIONS_CLEAR_REUSABLE) &&
9649 is_reusable) {
9650 CLR_REUSABLE_PAGE(pai);
9651 } else if ((options & PMAP_OPTIONS_SET_REUSABLE) &&
0a7de745 9652 !is_reusable) {
5ba3f43e
A
9653 SET_REUSABLE_PAGE(pai);
9654 }
9655 }
9656
9657 if (mod_fault) {
9658 SET_MODFAULT_PAGE(pai);
9659 }
9660 if (ref_fault) {
9661 SET_REFFAULT_PAGE(pai);
9662 }
9663
9664 UNLOCK_PVH(pai);
9665 return result;
9666}
9667
9668boolean_t
9669arm_force_fast_fault(
0a7de745
A
9670 ppnum_t ppnum,
9671 vm_prot_t allow_mode,
9672 int options,
9673 __unused void *arg)
5ba3f43e
A
9674{
9675 pmap_paddr_t phys = ptoa(ppnum);
9676
9677 assert(ppnum != vm_page_fictitious_addr);
9678
9679 if (!pa_valid(phys)) {
0a7de745 9680 return FALSE; /* Not a managed page. */
5ba3f43e
A
9681 }
9682
c6bf4f31
A
9683#if XNU_MONITOR
9684 return arm_force_fast_fault_ppl(ppnum, allow_mode, options);
9685#else
5ba3f43e 9686 return arm_force_fast_fault_internal(ppnum, allow_mode, options);
c6bf4f31 9687#endif
5ba3f43e
A
9688}
9689
9690/*
9691 * Routine: arm_clear_fast_fault
9692 *
9693 * Function:
9694 * Clear pending force fault for all mappings for this page based on
9695 * the observed fault type, update ref/modify bits.
9696 */
9697boolean_t
9698arm_clear_fast_fault(
9699 ppnum_t ppnum,
9700 vm_prot_t fault_type)
9701{
9702 pmap_paddr_t pa = ptoa(ppnum);
9703 pv_entry_t *pve_p;
9704 pt_entry_t *pte_p;
9705 int pai;
9706 boolean_t result;
0a7de745 9707 boolean_t tlb_flush_needed = FALSE;
5ba3f43e
A
9708 pv_entry_t **pv_h;
9709
9710 assert(ppnum != vm_page_fictitious_addr);
9711
9712 if (!pa_valid(pa)) {
0a7de745 9713 return FALSE; /* Not a managed page. */
5ba3f43e
A
9714 }
9715
9716 result = FALSE;
9717 pai = (int)pa_index(pa);
9718 ASSERT_PVH_LOCKED(pai);
9719 pv_h = pai_to_pvh(pai);
9720
9721 pte_p = PT_ENTRY_NULL;
9722 pve_p = PV_ENTRY_NULL;
0a7de745 9723 if (pvh_test_type(pv_h, PVH_TYPE_PTEP)) {
5ba3f43e 9724 pte_p = pvh_ptep(pv_h);
0a7de745 9725 } else if (pvh_test_type(pv_h, PVH_TYPE_PVEP)) {
5ba3f43e
A
9726 pve_p = pvh_list(pv_h);
9727 }
9728
9729 while ((pve_p != PV_ENTRY_NULL) || (pte_p != PT_ENTRY_NULL)) {
9730 vm_map_address_t va;
0a7de745 9731 pt_entry_t spte;
5ba3f43e
A
9732 pt_entry_t tmplate;
9733 pmap_t pmap;
9734
0a7de745 9735 if (pve_p != PV_ENTRY_NULL) {
5ba3f43e 9736 pte_p = pve_get_ptep(pve_p);
0a7de745 9737 }
5ba3f43e
A
9738
9739 if (pte_p == PT_ENTRY_NULL) {
9740 panic("pte_p is NULL: pve_p=%p ppnum=0x%x\n", pve_p, ppnum);
9741 }
d9a64523
A
9742#ifdef PVH_FLAG_IOMMU
9743 if ((vm_offset_t)pte_p & PVH_FLAG_IOMMU) {
9744 goto cff_skip_pve;
0a7de745 9745 }
d9a64523 9746#endif
5ba3f43e
A
9747 if (*pte_p == ARM_PTE_EMPTY) {
9748 panic("pte is NULL: pte_p=%p ppnum=0x%x\n", pte_p, ppnum);
9749 }
9750
9751 pmap = ptep_get_pmap(pte_p);
9752 va = ptep_get_va(pte_p);
9753
9754 assert(va >= pmap->min && va < pmap->max);
9755
9756 spte = *pte_p;
9757 tmplate = spte;
9758
0a7de745 9759 if ((fault_type & VM_PROT_WRITE) && (pte_was_writeable(spte))) {
5ba3f43e 9760 {
0a7de745 9761 if (pmap == kernel_pmap) {
5ba3f43e 9762 tmplate = ((spte & ~ARM_PTE_APMASK) | ARM_PTE_AP(AP_RWNA));
0a7de745 9763 } else {
cb323159 9764 tmplate = ((spte & ~ARM_PTE_APMASK) | pt_attr_leaf_rw(pmap_get_pt_attr(pmap)));
0a7de745 9765 }
5ba3f43e
A
9766 }
9767
9768 tmplate |= ARM_PTE_AF;
9769
0a7de745 9770 pte_set_was_writeable(tmplate, false);
5ba3f43e 9771 pa_set_bits(pa, PP_ATTR_REFERENCED | PP_ATTR_MODIFIED);
5ba3f43e
A
9772 } else if ((fault_type & VM_PROT_READ) && ((spte & ARM_PTE_AF) != ARM_PTE_AF)) {
9773 tmplate = spte | ARM_PTE_AF;
9774
9775 {
9776 pa_set_bits(pa, PP_ATTR_REFERENCED);
9777 }
9778 }
9779
c6bf4f31
A
9780#if MACH_ASSERT && XNU_MONITOR
9781 if (is_pte_xprr_protected(spte)) {
9782 if (pte_to_xprr_perm(spte) != pte_to_xprr_perm(tmplate)) {
9783 panic("%s: attempted to mutate an xPRR mapping pte_p=%p, pmap=%p, pv_h=%p, pve_p=%p, pte=0x%llx, tmplate=0x%llx, va=0x%llx, "
9784 "ppnum=0x%x, fault_type=0x%x",
9785 __FUNCTION__, pte_p, pmap, pv_h, pve_p, (unsigned long long)spte, (unsigned long long)tmplate, (unsigned long long)va,
9786 ppnum, fault_type);
9787 }
9788 }
9789#endif /* MACH_ASSERT && XNU_MONITOR */
5ba3f43e
A
9790
9791 if (spte != tmplate) {
9792 if (spte != ARM_PTE_TYPE_FAULT) {
d9a64523 9793 WRITE_PTE_STRONG(pte_p, tmplate);
cb323159 9794 pmap_get_pt_ops(pmap)->flush_tlb_region_async(va, PAGE_SIZE, pmap);
d9a64523 9795 tlb_flush_needed = TRUE;
5ba3f43e
A
9796 } else {
9797 WRITE_PTE(pte_p, tmplate);
d9a64523 9798 __builtin_arm_isb(ISB_SY);
5ba3f43e
A
9799 }
9800 result = TRUE;
9801 }
9802
d9a64523 9803#ifdef PVH_FLAG_IOMMU
0a7de745 9804cff_skip_pve:
d9a64523 9805#endif
5ba3f43e 9806 pte_p = PT_ENTRY_NULL;
0a7de745 9807 if (pve_p != PV_ENTRY_NULL) {
5ba3f43e 9808 pve_p = PVE_NEXT_PTR(pve_next(pve_p));
0a7de745 9809 }
5ba3f43e 9810 }
0a7de745 9811 if (tlb_flush_needed) {
d9a64523 9812 sync_tlb_flush();
0a7de745 9813 }
5ba3f43e
A
9814 return result;
9815}
9816
9817/*
9818 * Determine if the fault was induced by software tracking of
9819 * modify/reference bits. If so, re-enable the mapping (and set
9820 * the appropriate bits).
9821 *
9822 * Returns KERN_SUCCESS if the fault was induced and was
9823 * successfully handled.
9824 *
9825 * Returns KERN_FAILURE if the fault was not induced and
9826 * the function was unable to deal with it.
9827 *
9828 * Returns KERN_PROTECTION_FAILURE if the pmap layer explictly
9829 * disallows this type of access.
9830 */
d9a64523 9831MARK_AS_PMAP_TEXT static kern_return_t
5ba3f43e
A
9832arm_fast_fault_internal(
9833 pmap_t pmap,
9834 vm_map_address_t va,
9835 vm_prot_t fault_type,
cb323159
A
9836 __unused bool was_af_fault,
9837 __unused bool from_user)
5ba3f43e
A
9838{
9839 kern_return_t result = KERN_FAILURE;
9840 pt_entry_t *ptep;
9841 pt_entry_t spte = ARM_PTE_TYPE_FAULT;
9842 int pai;
9843 pmap_paddr_t pa;
d9a64523
A
9844 VALIDATE_PMAP(pmap);
9845
5ba3f43e
A
9846 PMAP_LOCK(pmap);
9847
9848 /*
9849 * If the entry doesn't exist, is completely invalid, or is already
9850 * valid, we can't fix it here.
9851 */
9852
9853 ptep = pmap_pte(pmap, va);
9854 if (ptep != PT_ENTRY_NULL) {
cb323159
A
9855 while (true) {
9856 spte = *ptep;
5ba3f43e 9857
cb323159 9858 pa = pte_to_pa(spte);
5ba3f43e 9859
cb323159
A
9860 if ((spte == ARM_PTE_TYPE_FAULT) ||
9861 ARM_PTE_IS_COMPRESSED(spte, ptep)) {
9862 PMAP_UNLOCK(pmap);
9863 return result;
9864 }
5ba3f43e 9865
cb323159
A
9866 if (!pa_valid(pa)) {
9867 PMAP_UNLOCK(pmap);
c6bf4f31
A
9868#if XNU_MONITOR
9869 if (pmap_cache_attributes((ppnum_t)atop(pa)) & PP_ATTR_MONITOR) {
9870 return KERN_PROTECTION_FAILURE;
9871 } else
9872#endif
cb323159
A
9873 return result;
9874 }
9875 pai = (int)pa_index(pa);
9876 LOCK_PVH(pai);
c6bf4f31
A
9877#if __APRR_SUPPORTED__
9878 if (*ptep == spte) {
9879 /*
9880 * Double-check the spte value, as we care
9881 * about the AF bit.
9882 */
9883 break;
9884 }
9885 UNLOCK_PVH(pai);
9886#else /* !(__APRR_SUPPORTED__*/
cb323159 9887 break;
c6bf4f31 9888#endif /* !(__APRR_SUPPORTED__*/
d9a64523 9889 }
5ba3f43e
A
9890 } else {
9891 PMAP_UNLOCK(pmap);
9892 return result;
9893 }
9894
c6bf4f31
A
9895#if __APRR_SUPPORTED__
9896 /* Check to see if this mapping had APRR restrictions. */
9897 if (is_pte_xprr_protected(spte)) {
9898 /*
9899 * We have faulted on an XPRR managed mapping; decide if the access should be
9900 * reattempted or if it should cause an exception. Now that all JIT entitled
9901 * task threads always have MPRR enabled we're only here because of
9902 * an AF fault or an actual permission fault. AF faults will have result
9903 * changed to KERN_SUCCESS below upon arm_clear_fast_fault return.
9904 */
9905 if (was_af_fault && (spte & ARM_PTE_AF)) {
9906 result = KERN_SUCCESS;
9907 goto out;
9908 } else {
9909 result = KERN_PROTECTION_FAILURE;
9910 }
9911 }
9912#endif /* __APRR_SUPPORTED__*/
5ba3f43e
A
9913
9914 if ((IS_REFFAULT_PAGE(pai)) ||
9915 ((fault_type & VM_PROT_WRITE) && IS_MODFAULT_PAGE(pai))) {
9916 /*
9917 * An attempted access will always clear ref/mod fault state, as
9918 * appropriate for the fault type. arm_clear_fast_fault will
9919 * update the associated PTEs for the page as appropriate; if
9920 * any PTEs are updated, we redrive the access. If the mapping
9921 * does not actually allow for the attempted access, the
9922 * following fault will (hopefully) fail to update any PTEs, and
9923 * thus cause arm_fast_fault to decide that it failed to handle
9924 * the fault.
9925 */
9926 if (IS_REFFAULT_PAGE(pai)) {
9927 CLR_REFFAULT_PAGE(pai);
9928 }
0a7de745 9929 if ((fault_type & VM_PROT_WRITE) && IS_MODFAULT_PAGE(pai)) {
5ba3f43e
A
9930 CLR_MODFAULT_PAGE(pai);
9931 }
9932
0a7de745 9933 if (arm_clear_fast_fault((ppnum_t)atop(pa), fault_type)) {
5ba3f43e
A
9934 /*
9935 * Should this preserve KERN_PROTECTION_FAILURE? The
9936 * cost of not doing so is a another fault in a case
9937 * that should already result in an exception.
9938 */
9939 result = KERN_SUCCESS;
9940 }
9941 }
9942
c6bf4f31
A
9943#if __APRR_SUPPORTED__
9944out:
9945#endif /* __APRR_SUPPORTED__*/
5ba3f43e
A
9946 UNLOCK_PVH(pai);
9947 PMAP_UNLOCK(pmap);
9948 return result;
9949}
9950
9951kern_return_t
9952arm_fast_fault(
9953 pmap_t pmap,
9954 vm_map_address_t va,
9955 vm_prot_t fault_type,
cb323159
A
9956 bool was_af_fault,
9957 __unused bool from_user)
5ba3f43e
A
9958{
9959 kern_return_t result = KERN_FAILURE;
9960
0a7de745 9961 if (va < pmap->min || va >= pmap->max) {
5ba3f43e 9962 return result;
0a7de745 9963 }
5ba3f43e 9964
d9a64523 9965 PMAP_TRACE(3, PMAP_CODE(PMAP__FAST_FAULT) | DBG_FUNC_START,
0a7de745
A
9966 VM_KERNEL_ADDRHIDE(pmap), VM_KERNEL_ADDRHIDE(va), fault_type,
9967 from_user);
5ba3f43e 9968
0a7de745 9969#if (__ARM_VMSA__ == 7)
5ba3f43e
A
9970 if (pmap != kernel_pmap) {
9971 pmap_cpu_data_t *cpu_data_ptr = pmap_get_cpu_data();
9972 pmap_t cur_pmap;
9973 pmap_t cur_user_pmap;
9974
9975 cur_pmap = current_pmap();
9976 cur_user_pmap = cpu_data_ptr->cpu_user_pmap;
9977
9978 if ((cur_user_pmap == cur_pmap) && (cur_pmap == pmap)) {
9979 if (cpu_data_ptr->cpu_user_pmap_stamp != pmap->stamp) {
9980 pmap_set_pmap(pmap, current_thread());
9981 result = KERN_SUCCESS;
9982 goto done;
9983 }
9984 }
9985 }
9986#endif
9987
c6bf4f31
A
9988#if XNU_MONITOR
9989 result = arm_fast_fault_ppl(pmap, va, fault_type, was_af_fault, from_user);
9990#else
cb323159 9991 result = arm_fast_fault_internal(pmap, va, fault_type, was_af_fault, from_user);
c6bf4f31 9992#endif
5ba3f43e
A
9993
9994#if (__ARM_VMSA__ == 7)
9995done:
9996#endif
9997
d9a64523 9998 PMAP_TRACE(3, PMAP_CODE(PMAP__FAST_FAULT) | DBG_FUNC_END, result);
5ba3f43e
A
9999
10000 return result;
10001}
10002
10003void
10004pmap_copy_page(
10005 ppnum_t psrc,
10006 ppnum_t pdst)
10007{
10008 bcopy_phys((addr64_t) (ptoa(psrc)),
0a7de745
A
10009 (addr64_t) (ptoa(pdst)),
10010 PAGE_SIZE);
5ba3f43e
A
10011}
10012
10013
10014/*
10015 * pmap_copy_page copies the specified (machine independent) pages.
10016 */
10017void
10018pmap_copy_part_page(
10019 ppnum_t psrc,
10020 vm_offset_t src_offset,
10021 ppnum_t pdst,
10022 vm_offset_t dst_offset,
10023 vm_size_t len)
10024{
10025 bcopy_phys((addr64_t) (ptoa(psrc) + src_offset),
0a7de745
A
10026 (addr64_t) (ptoa(pdst) + dst_offset),
10027 len);
5ba3f43e
A
10028}
10029
10030
10031/*
10032 * pmap_zero_page zeros the specified (machine independent) page.
10033 */
10034void
10035pmap_zero_page(
10036 ppnum_t pn)
10037{
10038 assert(pn != vm_page_fictitious_addr);
10039 bzero_phys((addr64_t) ptoa(pn), PAGE_SIZE);
10040}
10041
10042/*
10043 * pmap_zero_part_page
10044 * zeros the specified (machine independent) part of a page.
10045 */
10046void
10047pmap_zero_part_page(
10048 ppnum_t pn,
10049 vm_offset_t offset,
10050 vm_size_t len)
10051{
10052 assert(pn != vm_page_fictitious_addr);
10053 assert(offset + len <= PAGE_SIZE);
10054 bzero_phys((addr64_t) (ptoa(pn) + offset), len);
10055}
10056
10057
10058/*
10059 * nop in current arm implementation
10060 */
10061void
10062inval_copy_windows(
10063 __unused thread_t t)
10064{
10065}
10066
10067void
10068pmap_map_globals(
10069 void)
10070{
0a7de745 10071 pt_entry_t *ptep, pte;
5ba3f43e
A
10072
10073 ptep = pmap_pte(kernel_pmap, LOWGLOBAL_ALIAS);
10074 assert(ptep != PT_ENTRY_NULL);
10075 assert(*ptep == ARM_PTE_EMPTY);
10076
10077 pte = pa_to_pte(ml_static_vtop((vm_offset_t)&lowGlo)) | AP_RONA | ARM_PTE_NX | ARM_PTE_PNX | ARM_PTE_AF | ARM_PTE_TYPE;
5c9f4661
A
10078#if __ARM_KERNEL_PROTECT__
10079 pte |= ARM_PTE_NG;
10080#endif /* __ARM_KERNEL_PROTECT__ */
5ba3f43e 10081 pte |= ARM_PTE_ATTRINDX(CACHE_ATTRINDX_WRITEBACK);
0a7de745 10082#if (__ARM_VMSA__ > 7)
5ba3f43e
A
10083 pte |= ARM_PTE_SH(SH_OUTER_MEMORY);
10084#else
10085 pte |= ARM_PTE_SH;
10086#endif
10087 *ptep = pte;
0a7de745 10088 FLUSH_PTE_RANGE(ptep, (ptep + 1));
cb323159 10089 PMAP_UPDATE_TLBS(kernel_pmap, LOWGLOBAL_ALIAS, LOWGLOBAL_ALIAS + PAGE_SIZE, false);
5ba3f43e
A
10090}
10091
10092vm_offset_t
10093pmap_cpu_windows_copy_addr(int cpu_num, unsigned int index)
10094{
0a7de745 10095 if (__improbable(index >= CPUWINDOWS_MAX)) {
d9a64523 10096 panic("%s: invalid index %u", __func__, index);
0a7de745 10097 }
5ba3f43e
A
10098 return (vm_offset_t)(CPUWINDOWS_BASE + (PAGE_SIZE * ((CPUWINDOWS_MAX * cpu_num) + index)));
10099}
10100
d9a64523 10101MARK_AS_PMAP_TEXT static unsigned int
5ba3f43e 10102pmap_map_cpu_windows_copy_internal(
0a7de745 10103 ppnum_t pn,
5ba3f43e
A
10104 vm_prot_t prot,
10105 unsigned int wimg_bits)
10106{
0a7de745 10107 pt_entry_t *ptep = NULL, pte;
cb323159 10108 pmap_cpu_data_t *pmap_cpu_data = pmap_get_cpu_data();
0a7de745
A
10109 unsigned int cpu_num;
10110 unsigned int i;
10111 vm_offset_t cpu_copywindow_vaddr = 0;
cb323159 10112 bool need_strong_sync = false;
5ba3f43e 10113
c6bf4f31
A
10114#if XNU_MONITOR || HAS_MILD_DSB
10115 unsigned int cacheattr = (!pa_valid(ptoa(pn)) ? pmap_cache_attributes(pn) : 0);
10116 need_strong_sync = ((cacheattr & PMAP_IO_RANGE_STRONG_SYNC) != 0);
10117#endif
10118
10119#if XNU_MONITOR
10120#ifdef __ARM_COHERENT_IO__
10121 if (pa_valid(ptoa(pn)) && !pmap_ppl_disable) {
10122 panic("%s: attempted to map a managed page, "
10123 "pn=%u, prot=0x%x, wimg_bits=0x%x",
10124 __FUNCTION__,
10125 pn, prot, wimg_bits);
10126 }
10127 if (!pmap_ppl_disable && (cacheattr & PP_ATTR_MONITOR)) {
10128 panic("%s: attempt to map PPL-protected I/O address 0x%llx", __func__, (uint64_t)ptoa(pn));
10129 }
cb323159 10130
c6bf4f31
A
10131#else /* __ARM_COHERENT_IO__ */
10132#error CPU copy windows are not properly supported with both the PPL and incoherent IO
10133#endif /* __ARM_COHERENT_IO__ */
10134#endif /* XNU_MONITOR */
cb323159 10135 cpu_num = pmap_cpu_data->cpu_number;
5ba3f43e 10136
0a7de745 10137 for (i = 0; i < CPUWINDOWS_MAX; i++) {
5ba3f43e
A
10138 cpu_copywindow_vaddr = pmap_cpu_windows_copy_addr(cpu_num, i);
10139 ptep = pmap_pte(kernel_pmap, cpu_copywindow_vaddr);
cb323159 10140 assert(!ARM_PTE_IS_COMPRESSED(*ptep, ptep));
0a7de745 10141 if (*ptep == ARM_PTE_TYPE_FAULT) {
5ba3f43e 10142 break;
0a7de745 10143 }
5ba3f43e
A
10144 }
10145 if (i == CPUWINDOWS_MAX) {
10146 panic("pmap_map_cpu_windows_copy: out of window\n");
10147 }
10148
10149 pte = pa_to_pte(ptoa(pn)) | ARM_PTE_TYPE | ARM_PTE_AF | ARM_PTE_NX | ARM_PTE_PNX;
5c9f4661
A
10150#if __ARM_KERNEL_PROTECT__
10151 pte |= ARM_PTE_NG;
10152#endif /* __ARM_KERNEL_PROTECT__ */
5ba3f43e
A
10153
10154 pte |= wimg_to_pte(wimg_bits);
10155
10156 if (prot & VM_PROT_WRITE) {
10157 pte |= ARM_PTE_AP(AP_RWNA);
10158 } else {
10159 pte |= ARM_PTE_AP(AP_RONA);
10160 }
10161
d9a64523 10162 WRITE_PTE_FAST(ptep, pte);
5ba3f43e
A
10163 /*
10164 * Invalidate tlb. Cover nested cpu_copywindow_vaddr usage with the interrupted context
10165 * in pmap_unmap_cpu_windows_copy() after clearing the pte and before tlb invalidate.
10166 */
d9a64523 10167 FLUSH_PTE_STRONG(ptep);
cb323159
A
10168 PMAP_UPDATE_TLBS(kernel_pmap, cpu_copywindow_vaddr, cpu_copywindow_vaddr + PAGE_SIZE, pmap_cpu_data->copywindow_strong_sync[i]);
10169 pmap_cpu_data->copywindow_strong_sync[i] = need_strong_sync;
5ba3f43e 10170
0a7de745 10171 return i;
5ba3f43e
A
10172}
10173
10174unsigned int
10175pmap_map_cpu_windows_copy(
0a7de745 10176 ppnum_t pn,
5ba3f43e
A
10177 vm_prot_t prot,
10178 unsigned int wimg_bits)
10179{
c6bf4f31
A
10180#if XNU_MONITOR
10181 return pmap_map_cpu_windows_copy_ppl(pn, prot, wimg_bits);
10182#else
5ba3f43e 10183 return pmap_map_cpu_windows_copy_internal(pn, prot, wimg_bits);
c6bf4f31 10184#endif
5ba3f43e
A
10185}
10186
d9a64523 10187MARK_AS_PMAP_TEXT static void
5ba3f43e
A
10188pmap_unmap_cpu_windows_copy_internal(
10189 unsigned int index)
10190{
0a7de745
A
10191 pt_entry_t *ptep;
10192 unsigned int cpu_num;
10193 vm_offset_t cpu_copywindow_vaddr = 0;
cb323159 10194 pmap_cpu_data_t *pmap_cpu_data = pmap_get_cpu_data();
5ba3f43e 10195
cb323159 10196 cpu_num = pmap_cpu_data->cpu_number;
5ba3f43e
A
10197
10198 cpu_copywindow_vaddr = pmap_cpu_windows_copy_addr(cpu_num, index);
d9a64523
A
10199 /* Issue full-system DSB to ensure prior operations on the per-CPU window
10200 * (which are likely to have been on I/O memory) are complete before
10201 * tearing down the mapping. */
10202 __builtin_arm_dsb(DSB_SY);
5ba3f43e 10203 ptep = pmap_pte(kernel_pmap, cpu_copywindow_vaddr);
d9a64523 10204 WRITE_PTE_STRONG(ptep, ARM_PTE_TYPE_FAULT);
cb323159 10205 PMAP_UPDATE_TLBS(kernel_pmap, cpu_copywindow_vaddr, cpu_copywindow_vaddr + PAGE_SIZE, pmap_cpu_data->copywindow_strong_sync[index]);
5ba3f43e
A
10206}
10207
10208void
10209pmap_unmap_cpu_windows_copy(
10210 unsigned int index)
10211{
c6bf4f31
A
10212#if XNU_MONITOR
10213 return pmap_unmap_cpu_windows_copy_ppl(index);
10214#else
5ba3f43e 10215 return pmap_unmap_cpu_windows_copy_internal(index);
c6bf4f31 10216#endif
5ba3f43e
A
10217}
10218
10219/*
d9a64523
A
10220 * Indicate that a pmap is intended to be used as a nested pmap
10221 * within one or more larger address spaces. This must be set
10222 * before pmap_nest() is called with this pmap as the 'subordinate'.
5ba3f43e 10223 */
d9a64523 10224MARK_AS_PMAP_TEXT static void
5ba3f43e
A
10225pmap_set_nested_internal(
10226 pmap_t pmap)
10227{
d9a64523 10228 VALIDATE_PMAP(pmap);
5ba3f43e
A
10229 pmap->nested = TRUE;
10230}
10231
10232void
10233pmap_set_nested(
10234 pmap_t pmap)
10235{
c6bf4f31
A
10236#if XNU_MONITOR
10237 pmap_set_nested_ppl(pmap);
10238#else
5ba3f43e 10239 pmap_set_nested_internal(pmap);
c6bf4f31 10240#endif
5ba3f43e
A
10241}
10242
d9a64523
A
10243/*
10244 * pmap_trim_range(pmap, start, end)
10245 *
10246 * pmap = pmap to operate on
10247 * start = start of the range
10248 * end = end of the range
10249 *
10250 * Attempts to deallocate TTEs for the given range in the nested range.
10251 */
10252MARK_AS_PMAP_TEXT static void
10253pmap_trim_range(
10254 pmap_t pmap,
10255 addr64_t start,
10256 addr64_t end)
10257{
10258 addr64_t cur;
10259 addr64_t nested_region_start;
10260 addr64_t nested_region_end;
10261 addr64_t adjusted_start;
10262 addr64_t adjusted_end;
10263 addr64_t adjust_offmask;
10264 tt_entry_t * tte_p;
10265 pt_entry_t * pte_p;
cb323159 10266 __unused const pt_attr_t * const pt_attr = pmap_get_pt_attr(pmap);
d9a64523
A
10267
10268 if (__improbable(end < start)) {
10269 panic("%s: invalid address range, "
0a7de745
A
10270 "pmap=%p, start=%p, end=%p",
10271 __func__,
10272 pmap, (void*)start, (void*)end);
d9a64523
A
10273 }
10274
10275 nested_region_start = pmap->nested ? pmap->nested_region_subord_addr : pmap->nested_region_subord_addr;
10276 nested_region_end = nested_region_start + pmap->nested_region_size;
10277
10278 if (__improbable((start < nested_region_start) || (end > nested_region_end))) {
10279 panic("%s: range outside nested region %p-%p, "
0a7de745
A
10280 "pmap=%p, start=%p, end=%p",
10281 __func__, (void *)nested_region_start, (void *)nested_region_end,
10282 pmap, (void*)start, (void*)end);
d9a64523
A
10283 }
10284
10285 /* Contract the range to TT page boundaries. */
cb323159 10286 adjust_offmask = pt_attr_leaf_table_offmask(pt_attr);
d9a64523
A
10287 adjusted_start = ((start + adjust_offmask) & ~adjust_offmask);
10288 adjusted_end = end & ~adjust_offmask;
cb323159 10289 bool modified = false;
d9a64523
A
10290
10291 /* Iterate over the range, trying to remove TTEs. */
cb323159 10292 for (cur = adjusted_start; (cur < adjusted_end) && (cur >= adjusted_start); cur += pt_attr_twig_size(pt_attr)) {
d9a64523
A
10293 PMAP_LOCK(pmap);
10294
10295 tte_p = pmap_tte(pmap, cur);
10296
10297 if (tte_p == (tt_entry_t *) NULL) {
10298 goto done;
10299 }
10300
10301 if ((*tte_p & ARM_TTE_TYPE_MASK) == ARM_TTE_TYPE_TABLE) {
10302 pte_p = (pt_entry_t *) ttetokv(*tte_p);
10303
cb323159 10304 if ((ptep_get_ptd(pte_p)->ptd_info[ARM_PT_DESC_INDEX(pte_p)].refcnt == 0) &&
0a7de745 10305 (pmap != kernel_pmap)) {
d9a64523
A
10306 if (pmap->nested == TRUE) {
10307 /* Deallocate for the nested map. */
cb323159 10308 pmap_tte_deallocate(pmap, tte_p, pt_attr_twig_level(pt_attr));
d9a64523
A
10309 } else {
10310 /* Just remove for the parent map. */
cb323159 10311 pmap_tte_remove(pmap, tte_p, pt_attr_twig_level(pt_attr));
d9a64523
A
10312 }
10313
cb323159 10314 pmap_get_pt_ops(pmap)->flush_tlb_tte_async(cur, pmap);
d9a64523
A
10315 modified = true;
10316 }
d9a64523
A
10317 }
10318
10319done:
10320 PMAP_UNLOCK(pmap);
cb323159 10321 }
d9a64523 10322
cb323159
A
10323 if (modified) {
10324 sync_tlb_flush();
d9a64523
A
10325 }
10326
10327#if (__ARM_VMSA__ > 7)
10328 /* Remove empty L2 TTs. */
10329 adjusted_start = ((start + ARM_TT_L1_OFFMASK) & ~ARM_TT_L1_OFFMASK);
10330 adjusted_end = end & ~ARM_TT_L1_OFFMASK;
10331
10332 for (cur = adjusted_start; (cur < adjusted_end) && (cur >= adjusted_start); cur += ARM_TT_L1_SIZE) {
10333 /* For each L1 entry in our range... */
10334 PMAP_LOCK(pmap);
10335
10336 bool remove_tt1e = true;
10337 tt_entry_t * tt1e_p = pmap_tt1e(pmap, cur);
10338 tt_entry_t * tt2e_start;
10339 tt_entry_t * tt2e_end;
10340 tt_entry_t * tt2e_p;
10341 tt_entry_t tt1e;
10342
10343 if (tt1e_p == NULL) {
10344 PMAP_UNLOCK(pmap);
10345 continue;
10346 }
10347
10348 tt1e = *tt1e_p;
10349
10350 if (tt1e == ARM_TTE_TYPE_FAULT) {
10351 PMAP_UNLOCK(pmap);
10352 continue;
10353 }
10354
10355 tt2e_start = &((tt_entry_t*) phystokv(tt1e & ARM_TTE_TABLE_MASK))[0];
10356 tt2e_end = &tt2e_start[TTE_PGENTRIES];
10357
10358 for (tt2e_p = tt2e_start; tt2e_p < tt2e_end; tt2e_p++) {
10359 if (*tt2e_p != ARM_TTE_TYPE_FAULT) {
10360 /*
10361 * If any TTEs are populated, don't remove the
10362 * L1 TT.
10363 */
10364 remove_tt1e = false;
10365 }
10366 }
10367
10368 if (remove_tt1e) {
10369 pmap_tte_deallocate(pmap, tt1e_p, PMAP_TT_L1_LEVEL);
cb323159 10370 PMAP_UPDATE_TLBS(pmap, cur, cur + PAGE_SIZE, false);
d9a64523
A
10371 }
10372
10373 PMAP_UNLOCK(pmap);
10374 }
10375#endif /* (__ARM_VMSA__ > 7) */
10376}
10377
10378/*
10379 * pmap_trim_internal(grand, subord, vstart, nstart, size)
10380 *
10381 * grand = pmap subord is nested in
10382 * subord = nested pmap
10383 * vstart = start of the used range in grand
10384 * nstart = start of the used range in nstart
10385 * size = size of the used range
10386 *
10387 * Attempts to trim the shared region page tables down to only cover the given
10388 * range in subord and grand.
10389 */
10390MARK_AS_PMAP_TEXT static void
10391pmap_trim_internal(
10392 pmap_t grand,
10393 pmap_t subord,
10394 addr64_t vstart,
10395 addr64_t nstart,
10396 uint64_t size)
10397{
10398 addr64_t vend, nend;
10399 addr64_t adjust_offmask;
10400
10401 if (__improbable(os_add_overflow(vstart, size, &vend))) {
10402 panic("%s: grand addr wraps around, "
0a7de745
A
10403 "grand=%p, subord=%p, vstart=%p, nstart=%p, size=%#llx",
10404 __func__, grand, subord, (void*)vstart, (void*)nstart, size);
d9a64523
A
10405 }
10406
10407 if (__improbable(os_add_overflow(nstart, size, &nend))) {
10408 panic("%s: nested addr wraps around, "
0a7de745
A
10409 "grand=%p, subord=%p, vstart=%p, nstart=%p, size=%#llx",
10410 __func__, grand, subord, (void*)vstart, (void*)nstart, size);
d9a64523
A
10411 }
10412
10413 VALIDATE_PMAP(grand);
10414 VALIDATE_PMAP(subord);
10415
cb323159
A
10416 __unused const pt_attr_t * const pt_attr = pmap_get_pt_attr(grand);
10417
d9a64523
A
10418 PMAP_LOCK(subord);
10419
10420 if (!subord->nested) {
10421 panic("%s: subord is not nestable, "
0a7de745
A
10422 "grand=%p, subord=%p, vstart=%p, nstart=%p, size=%#llx",
10423 __func__, grand, subord, (void*)vstart, (void*)nstart, size);
d9a64523
A
10424 }
10425
10426 if (grand->nested) {
10427 panic("%s: grand is nestable, "
0a7de745
A
10428 "grand=%p, subord=%p, vstart=%p, nstart=%p, size=%#llx",
10429 __func__, grand, subord, (void*)vstart, (void*)nstart, size);
d9a64523
A
10430 }
10431
10432 if (grand->nested_pmap != subord) {
10433 panic("%s: grand->nested != subord, "
0a7de745
A
10434 "grand=%p, subord=%p, vstart=%p, nstart=%p, size=%#llx",
10435 __func__, grand, subord, (void*)vstart, (void*)nstart, size);
d9a64523
A
10436 }
10437
10438 if (size != 0) {
10439 if ((vstart < grand->nested_region_grand_addr) || (vend > (grand->nested_region_grand_addr + grand->nested_region_size))) {
10440 panic("%s: grand range not in nested region, "
0a7de745
A
10441 "grand=%p, subord=%p, vstart=%p, nstart=%p, size=%#llx",
10442 __func__, grand, subord, (void*)vstart, (void*)nstart, size);
d9a64523
A
10443 }
10444
10445 if ((nstart < grand->nested_region_grand_addr) || (nend > (grand->nested_region_grand_addr + grand->nested_region_size))) {
10446 panic("%s: subord range not in nested region, "
0a7de745
A
10447 "grand=%p, subord=%p, vstart=%p, nstart=%p, size=%#llx",
10448 __func__, grand, subord, (void*)vstart, (void*)nstart, size);
d9a64523
A
10449 }
10450 }
10451
10452
10453 if (!grand->nested_has_no_bounds_ref) {
10454 assert(subord->nested_bounds_set);
10455
10456 if (!grand->nested_bounds_set) {
10457 /* Inherit the bounds from subord. */
10458 grand->nested_region_true_start = (subord->nested_region_true_start - grand->nested_region_subord_addr) + grand->nested_region_grand_addr;
10459 grand->nested_region_true_end = (subord->nested_region_true_end - grand->nested_region_subord_addr) + grand->nested_region_grand_addr;
10460 grand->nested_bounds_set = true;
10461 }
10462
10463 PMAP_UNLOCK(subord);
10464 return;
10465 }
10466
10467 if ((!subord->nested_bounds_set) && size) {
cb323159 10468 adjust_offmask = pt_attr_leaf_table_offmask(pt_attr);
d9a64523
A
10469
10470 subord->nested_region_true_start = nstart;
10471 subord->nested_region_true_end = nend;
10472 subord->nested_region_true_start &= ~adjust_offmask;
10473
10474 if (__improbable(os_add_overflow(subord->nested_region_true_end, adjust_offmask, &subord->nested_region_true_end))) {
10475 panic("%s: padded true end wraps around, "
0a7de745
A
10476 "grand=%p, subord=%p, vstart=%p, nstart=%p, size=%#llx",
10477 __func__, grand, subord, (void*)vstart, (void*)nstart, size);
d9a64523
A
10478 }
10479
10480 subord->nested_region_true_end &= ~adjust_offmask;
10481 subord->nested_bounds_set = true;
10482 }
10483
10484 if (subord->nested_bounds_set) {
10485 /* Inherit the bounds from subord. */
10486 grand->nested_region_true_start = (subord->nested_region_true_start - grand->nested_region_subord_addr) + grand->nested_region_grand_addr;
10487 grand->nested_region_true_end = (subord->nested_region_true_end - grand->nested_region_subord_addr) + grand->nested_region_grand_addr;
10488 grand->nested_bounds_set = true;
10489
10490 /* If we know the bounds, we can trim the pmap. */
10491 grand->nested_has_no_bounds_ref = false;
10492 PMAP_UNLOCK(subord);
10493 } else {
10494 /* Don't trim if we don't know the bounds. */
10495 PMAP_UNLOCK(subord);
10496 return;
10497 }
10498
10499 /* Trim grand to only cover the given range. */
10500 pmap_trim_range(grand, grand->nested_region_grand_addr, grand->nested_region_true_start);
10501 pmap_trim_range(grand, grand->nested_region_true_end, (grand->nested_region_grand_addr + grand->nested_region_size));
10502
10503 /* Try to trim subord. */
10504 pmap_trim_subord(subord);
10505}
10506
0a7de745
A
10507MARK_AS_PMAP_TEXT static void
10508pmap_trim_self(pmap_t pmap)
d9a64523
A
10509{
10510 if (pmap->nested_has_no_bounds_ref && pmap->nested_pmap) {
10511 /* If we have a no bounds ref, we need to drop it. */
10512 PMAP_LOCK(pmap->nested_pmap);
10513 pmap->nested_has_no_bounds_ref = false;
10514 boolean_t nested_bounds_set = pmap->nested_pmap->nested_bounds_set;
10515 vm_map_offset_t nested_region_true_start = (pmap->nested_pmap->nested_region_true_start - pmap->nested_region_subord_addr) + pmap->nested_region_grand_addr;
10516 vm_map_offset_t nested_region_true_end = (pmap->nested_pmap->nested_region_true_end - pmap->nested_region_subord_addr) + pmap->nested_region_grand_addr;
10517 PMAP_UNLOCK(pmap->nested_pmap);
10518
10519 if (nested_bounds_set) {
10520 pmap_trim_range(pmap, pmap->nested_region_grand_addr, nested_region_true_start);
10521 pmap_trim_range(pmap, nested_region_true_end, (pmap->nested_region_grand_addr + pmap->nested_region_size));
10522 }
10523 /*
10524 * Try trimming the nested pmap, in case we had the
10525 * last reference.
10526 */
10527 pmap_trim_subord(pmap->nested_pmap);
10528 }
10529}
10530
10531/*
10532 * pmap_trim_subord(grand, subord)
10533 *
10534 * grand = pmap that we have nested subord in
10535 * subord = nested pmap we are attempting to trim
10536 *
10537 * Trims subord if possible
10538 */
10539MARK_AS_PMAP_TEXT static void
10540pmap_trim_subord(pmap_t subord)
10541{
10542 bool contract_subord = false;
10543
10544 PMAP_LOCK(subord);
10545
10546 subord->nested_no_bounds_refcnt--;
10547
10548 if ((subord->nested_no_bounds_refcnt == 0) && (subord->nested_bounds_set)) {
10549 /* If this was the last no bounds reference, trim subord. */
10550 contract_subord = true;
10551 }
10552
10553 PMAP_UNLOCK(subord);
10554
10555 if (contract_subord) {
10556 pmap_trim_range(subord, subord->nested_region_subord_addr, subord->nested_region_true_start);
10557 pmap_trim_range(subord, subord->nested_region_true_end, subord->nested_region_subord_addr + subord->nested_region_size);
10558 }
10559}
10560
10561void
10562pmap_trim(
10563 pmap_t grand,
10564 pmap_t subord,
10565 addr64_t vstart,
10566 addr64_t nstart,
10567 uint64_t size)
10568{
c6bf4f31
A
10569#if XNU_MONITOR
10570 pmap_trim_ppl(grand, subord, vstart, nstart, size);
10571
10572 pmap_ledger_check_balance(grand);
10573 pmap_ledger_check_balance(subord);
10574#else
d9a64523 10575 pmap_trim_internal(grand, subord, vstart, nstart, size);
c6bf4f31 10576#endif
d9a64523
A
10577}
10578
c6bf4f31
A
10579#if HAS_APPLE_PAC && XNU_MONITOR
10580static void *
10581pmap_sign_user_ptr_internal(void *value, ptrauth_key key, uint64_t discriminator)
10582{
10583 void *res = NULL;
10584 boolean_t current_intr_state = ml_set_interrupts_enabled(FALSE);
10585
10586 ml_set_kernelkey_enabled(FALSE);
10587 switch (key) {
10588 case ptrauth_key_asia:
10589 res = ptrauth_sign_unauthenticated(value, ptrauth_key_asia, discriminator);
10590 break;
10591 case ptrauth_key_asda:
10592 res = ptrauth_sign_unauthenticated(value, ptrauth_key_asda, discriminator);
10593 break;
10594 default:
10595 panic("attempt to sign user pointer without process independent key");
10596 }
10597 ml_set_kernelkey_enabled(TRUE);
10598
10599 ml_set_interrupts_enabled(current_intr_state);
10600
10601 return res;
10602}
10603
10604void *
10605pmap_sign_user_ptr(void *value, ptrauth_key key, uint64_t discriminator)
10606{
10607 return pmap_sign_user_ptr_internal(value, key, discriminator);
10608}
10609
10610static void *
10611pmap_auth_user_ptr_internal(void *value, ptrauth_key key, uint64_t discriminator)
10612{
10613 if ((key != ptrauth_key_asia) && (key != ptrauth_key_asda)) {
10614 panic("attempt to auth user pointer without process independent key");
10615 }
10616
10617 void *res = NULL;
10618 boolean_t current_intr_state = ml_set_interrupts_enabled(FALSE);
10619
10620 ml_set_kernelkey_enabled(FALSE);
10621 res = ml_auth_ptr_unchecked(value, key, discriminator);
10622 ml_set_kernelkey_enabled(TRUE);
10623
10624 ml_set_interrupts_enabled(current_intr_state);
10625
10626 return res;
10627}
10628
10629void *
10630pmap_auth_user_ptr(void *value, ptrauth_key key, uint64_t discriminator)
10631{
10632 return pmap_auth_user_ptr_internal(value, key, discriminator);
10633}
10634#endif /* HAS_APPLE_PAC && XNU_MONITOR */
cb323159 10635
5ba3f43e
A
10636/*
10637 * kern_return_t pmap_nest(grand, subord, vstart, size)
10638 *
10639 * grand = the pmap that we will nest subord into
10640 * subord = the pmap that goes into the grand
10641 * vstart = start of range in pmap to be inserted
10642 * nstart = start of range in pmap nested pmap
10643 * size = Size of nest area (up to 16TB)
10644 *
10645 * Inserts a pmap into another. This is used to implement shared segments.
10646 *
10647 */
10648
d9a64523 10649MARK_AS_PMAP_TEXT static kern_return_t
5ba3f43e
A
10650pmap_nest_internal(
10651 pmap_t grand,
10652 pmap_t subord,
10653 addr64_t vstart,
10654 addr64_t nstart,
10655 uint64_t size)
10656{
10657 kern_return_t kr = KERN_FAILURE;
10658 vm_map_offset_t vaddr, nvaddr;
10659 tt_entry_t *stte_p;
10660 tt_entry_t *gtte_p;
10661 unsigned int i;
10662 unsigned int num_tte;
0a7de745
A
10663 unsigned int nested_region_asid_bitmap_size;
10664 unsigned int* nested_region_asid_bitmap;
5ba3f43e
A
10665 int expand_options = 0;
10666
d9a64523 10667 addr64_t vend, nend;
0a7de745 10668 if (__improbable(os_add_overflow(vstart, size, &vend))) {
d9a64523 10669 panic("%s: %p grand addr wraps around: 0x%llx + 0x%llx", __func__, grand, vstart, size);
0a7de745
A
10670 }
10671 if (__improbable(os_add_overflow(nstart, size, &nend))) {
d9a64523 10672 panic("%s: %p nested addr wraps around: 0x%llx + 0x%llx", __func__, subord, nstart, size);
0a7de745 10673 }
cb323159 10674
d9a64523
A
10675 VALIDATE_PMAP(grand);
10676 VALIDATE_PMAP(subord);
10677
cb323159
A
10678 __unused const pt_attr_t * const pt_attr = pmap_get_pt_attr(grand);
10679 assert(pmap_get_pt_attr(subord) == pt_attr);
5ba3f43e 10680
c6bf4f31
A
10681#if XNU_MONITOR
10682 expand_options |= PMAP_TT_ALLOCATE_NOWAIT;
10683#endif
cb323159
A
10684
10685 if (((size | vstart | nstart) & (pt_attr_leaf_table_offmask(pt_attr))) != 0x0ULL) {
d9a64523 10686 panic("pmap_nest() pmap %p unaligned nesting request 0x%llx, 0x%llx, 0x%llx\n", grand, vstart, nstart, size);
5ba3f43e 10687 }
5ba3f43e 10688
0a7de745 10689 if (!subord->nested) {
d9a64523 10690 panic("%s: subordinate pmap %p is not nestable", __func__, subord);
0a7de745 10691 }
d9a64523 10692
5ba3f43e
A
10693 if ((grand->nested_pmap != PMAP_NULL) && (grand->nested_pmap != subord)) {
10694 panic("pmap_nest() pmap %p has a nested pmap\n", grand);
10695 }
10696
10697 if (subord->nested_region_asid_bitmap == NULL) {
cb323159 10698 nested_region_asid_bitmap_size = (unsigned int)(size >> pt_attr_twig_shift(pt_attr)) / (sizeof(unsigned int) * NBBY);
5ba3f43e 10699
c6bf4f31
A
10700#if XNU_MONITOR
10701 pmap_paddr_t pa = 0;
10702
10703 if ((nested_region_asid_bitmap_size * sizeof(unsigned int)) > PAGE_SIZE) {
10704 panic("%s: nested_region_asid_bitmap_size=%u will not fit in a page, "
10705 "grand=%p, subord=%p, vstart=0x%llx, nstart=0x%llx, size=%llx",
10706 __FUNCTION__,
10707 nested_region_asid_bitmap_size,
10708 grand, subord, vstart, nstart, size);
10709 }
10710
10711 kr = pmap_pages_alloc(&pa, PAGE_SIZE, PMAP_PAGES_ALLOCATE_NOWAIT);
10712
10713 if (kr != KERN_SUCCESS) {
10714 return kr;
10715 }
10716
10717 assert(pa);
10718
10719 nested_region_asid_bitmap = (unsigned int *)phystokv(pa);
10720#else
0a7de745 10721 nested_region_asid_bitmap = kalloc(nested_region_asid_bitmap_size * sizeof(unsigned int));
c6bf4f31 10722#endif
0a7de745 10723 bzero(nested_region_asid_bitmap, nested_region_asid_bitmap_size * sizeof(unsigned int));
5ba3f43e
A
10724
10725 PMAP_LOCK(subord);
10726 if (subord->nested_region_asid_bitmap == NULL) {
10727 subord->nested_region_asid_bitmap = nested_region_asid_bitmap;
10728 subord->nested_region_asid_bitmap_size = nested_region_asid_bitmap_size;
10729 subord->nested_region_subord_addr = nstart;
10730 subord->nested_region_size = (mach_vm_offset_t) size;
10731 nested_region_asid_bitmap = NULL;
10732 }
10733 PMAP_UNLOCK(subord);
10734 if (nested_region_asid_bitmap != NULL) {
c6bf4f31
A
10735#if XNU_MONITOR
10736 pmap_pages_free(kvtophys((vm_offset_t)nested_region_asid_bitmap), PAGE_SIZE);
10737#else
0a7de745 10738 kfree(nested_region_asid_bitmap, nested_region_asid_bitmap_size * sizeof(unsigned int));
c6bf4f31 10739#endif
5ba3f43e
A
10740 }
10741 }
d9a64523 10742 if ((subord->nested_region_subord_addr + subord->nested_region_size) < nend) {
0a7de745
A
10743 uint64_t new_size;
10744 unsigned int new_nested_region_asid_bitmap_size;
10745 unsigned int* new_nested_region_asid_bitmap;
5ba3f43e
A
10746
10747 nested_region_asid_bitmap = NULL;
10748 nested_region_asid_bitmap_size = 0;
d9a64523 10749 new_size = nend - subord->nested_region_subord_addr;
5ba3f43e
A
10750
10751 /* We explicitly add 1 to the bitmap allocation size in order to avoid issues with truncation. */
cb323159 10752 new_nested_region_asid_bitmap_size = (unsigned int)((new_size >> pt_attr_twig_shift(pt_attr)) / (sizeof(unsigned int) * NBBY)) + 1;
5ba3f43e 10753
c6bf4f31
A
10754#if XNU_MONITOR
10755 pmap_paddr_t pa = 0;
10756
10757 if ((new_nested_region_asid_bitmap_size * sizeof(unsigned int)) > PAGE_SIZE) {
10758 panic("%s: new_nested_region_asid_bitmap_size=%u will not fit in a page, "
10759 "grand=%p, subord=%p, vstart=0x%llx, nstart=0x%llx, size=%llx",
10760 __FUNCTION__,
10761 new_nested_region_asid_bitmap_size,
10762 grand, subord, vstart, nstart, size);
10763 }
10764
10765 kr = pmap_pages_alloc(&pa, PAGE_SIZE, PMAP_PAGES_ALLOCATE_NOWAIT);
10766
10767 if (kr != KERN_SUCCESS) {
10768 return kr;
10769 }
10770
10771 assert(pa);
10772
10773 new_nested_region_asid_bitmap = (unsigned int *)phystokv(pa);
10774#else
0a7de745 10775 new_nested_region_asid_bitmap = kalloc(new_nested_region_asid_bitmap_size * sizeof(unsigned int));
c6bf4f31 10776#endif
5ba3f43e
A
10777 PMAP_LOCK(subord);
10778 if (subord->nested_region_size < new_size) {
0a7de745 10779 bzero(new_nested_region_asid_bitmap, new_nested_region_asid_bitmap_size * sizeof(unsigned int));
5ba3f43e
A
10780 bcopy(subord->nested_region_asid_bitmap, new_nested_region_asid_bitmap, subord->nested_region_asid_bitmap_size);
10781 nested_region_asid_bitmap_size = subord->nested_region_asid_bitmap_size;
10782 nested_region_asid_bitmap = subord->nested_region_asid_bitmap;
10783 subord->nested_region_asid_bitmap = new_nested_region_asid_bitmap;
10784 subord->nested_region_asid_bitmap_size = new_nested_region_asid_bitmap_size;
10785 subord->nested_region_size = new_size;
10786 new_nested_region_asid_bitmap = NULL;
10787 }
10788 PMAP_UNLOCK(subord);
10789 if (nested_region_asid_bitmap != NULL)
c6bf4f31
A
10790#if XNU_MONITOR
10791 {pmap_pages_free(kvtophys((vm_offset_t)nested_region_asid_bitmap), PAGE_SIZE);}
10792#else
0a7de745 10793 { kfree(nested_region_asid_bitmap, nested_region_asid_bitmap_size * sizeof(unsigned int));}
c6bf4f31 10794#endif
5ba3f43e 10795 if (new_nested_region_asid_bitmap != NULL)
c6bf4f31
A
10796#if XNU_MONITOR
10797 {pmap_pages_free(kvtophys((vm_offset_t)new_nested_region_asid_bitmap), PAGE_SIZE);}
10798#else
0a7de745 10799 { kfree(new_nested_region_asid_bitmap, new_nested_region_asid_bitmap_size * sizeof(unsigned int));}
c6bf4f31 10800#endif
5ba3f43e
A
10801 }
10802
10803 PMAP_LOCK(subord);
10804 if (grand->nested_pmap == PMAP_NULL) {
10805 grand->nested_pmap = subord;
d9a64523
A
10806
10807 if (!subord->nested_bounds_set) {
10808 /*
10809 * We are nesting without the shared regions bounds
10810 * being known. We'll have to trim the pmap later.
10811 */
10812 grand->nested_has_no_bounds_ref = true;
10813 subord->nested_no_bounds_refcnt++;
10814 }
10815
5ba3f43e
A
10816 grand->nested_region_grand_addr = vstart;
10817 grand->nested_region_subord_addr = nstart;
10818 grand->nested_region_size = (mach_vm_offset_t) size;
10819 } else {
10820 if ((grand->nested_region_grand_addr > vstart)) {
10821 panic("pmap_nest() pmap %p : attempt to nest outside the nested region\n", grand);
0a7de745 10822 } else if ((grand->nested_region_grand_addr + grand->nested_region_size) < vend) {
5ba3f43e
A
10823 grand->nested_region_size = (mach_vm_offset_t)(vstart - grand->nested_region_grand_addr + size);
10824 }
10825 }
10826
0a7de745 10827#if (__ARM_VMSA__ == 7)
5ba3f43e
A
10828 nvaddr = (vm_map_offset_t) nstart;
10829 vaddr = (vm_map_offset_t) vstart;
10830 num_tte = size >> ARM_TT_L1_SHIFT;
10831
10832 for (i = 0; i < num_tte; i++) {
d9a64523
A
10833 if (((subord->nested_region_true_start) > nvaddr) || ((subord->nested_region_true_end) <= nvaddr)) {
10834 goto expand_next;
10835 }
10836
5ba3f43e
A
10837 stte_p = pmap_tte(subord, nvaddr);
10838 if ((stte_p == (tt_entry_t *)NULL) || (((*stte_p) & ARM_TTE_TYPE_MASK) != ARM_TTE_TYPE_TABLE)) {
10839 PMAP_UNLOCK(subord);
10840 kr = pmap_expand(subord, nvaddr, expand_options, PMAP_TT_L2_LEVEL);
10841
10842 if (kr != KERN_SUCCESS) {
10843 PMAP_LOCK(grand);
10844 goto done;
10845 }
10846
10847 PMAP_LOCK(subord);
10848 }
10849 PMAP_UNLOCK(subord);
10850 PMAP_LOCK(grand);
10851 stte_p = pmap_tte(grand, vaddr);
10852 if (stte_p == (tt_entry_t *)NULL) {
10853 PMAP_UNLOCK(grand);
10854 kr = pmap_expand(grand, vaddr, expand_options, PMAP_TT_L1_LEVEL);
10855
10856 if (kr != KERN_SUCCESS) {
10857 PMAP_LOCK(grand);
10858 goto done;
10859 }
10860 } else {
10861 PMAP_UNLOCK(grand);
10862 kr = KERN_SUCCESS;
10863 }
10864 PMAP_LOCK(subord);
10865
d9a64523 10866expand_next:
5ba3f43e
A
10867 nvaddr += ARM_TT_L1_SIZE;
10868 vaddr += ARM_TT_L1_SIZE;
10869 }
10870
10871#else
10872 nvaddr = (vm_map_offset_t) nstart;
cb323159 10873 num_tte = (unsigned int)(size >> pt_attr_twig_shift(pt_attr));
5ba3f43e
A
10874
10875 for (i = 0; i < num_tte; i++) {
d9a64523
A
10876 if (((subord->nested_region_true_start) > nvaddr) || ((subord->nested_region_true_end) <= nvaddr)) {
10877 goto expand_next;
10878 }
10879
cb323159 10880 stte_p = pmap_tte(subord, nvaddr);
5ba3f43e
A
10881 if (stte_p == PT_ENTRY_NULL || *stte_p == ARM_TTE_EMPTY) {
10882 PMAP_UNLOCK(subord);
cb323159 10883 kr = pmap_expand(subord, nvaddr, expand_options, PMAP_TT_LEAF_LEVEL);
5ba3f43e
A
10884
10885 if (kr != KERN_SUCCESS) {
10886 PMAP_LOCK(grand);
10887 goto done;
10888 }
10889
10890 PMAP_LOCK(subord);
10891 }
d9a64523 10892expand_next:
cb323159 10893 nvaddr += pt_attr_twig_size(pt_attr);
5ba3f43e
A
10894 }
10895#endif
10896 PMAP_UNLOCK(subord);
10897
10898 /*
10899 * copy tte's from subord pmap into grand pmap
10900 */
10901
10902 PMAP_LOCK(grand);
10903 nvaddr = (vm_map_offset_t) nstart;
10904 vaddr = (vm_map_offset_t) vstart;
10905
10906
0a7de745 10907#if (__ARM_VMSA__ == 7)
5ba3f43e 10908 for (i = 0; i < num_tte; i++) {
d9a64523
A
10909 if (((subord->nested_region_true_start) > nvaddr) || ((subord->nested_region_true_end) <= nvaddr)) {
10910 goto nest_next;
10911 }
5ba3f43e
A
10912
10913 stte_p = pmap_tte(subord, nvaddr);
10914 gtte_p = pmap_tte(grand, vaddr);
10915 *gtte_p = *stte_p;
10916
d9a64523 10917nest_next:
5ba3f43e
A
10918 nvaddr += ARM_TT_L1_SIZE;
10919 vaddr += ARM_TT_L1_SIZE;
10920 }
10921#else
10922 for (i = 0; i < num_tte; i++) {
d9a64523
A
10923 if (((subord->nested_region_true_start) > nvaddr) || ((subord->nested_region_true_end) <= nvaddr)) {
10924 goto nest_next;
10925 }
5ba3f43e 10926
cb323159
A
10927 stte_p = pmap_tte(subord, nvaddr);
10928 gtte_p = pmap_tte(grand, vaddr);
5ba3f43e
A
10929 if (gtte_p == PT_ENTRY_NULL) {
10930 PMAP_UNLOCK(grand);
cb323159 10931 kr = pmap_expand(grand, vaddr, expand_options, PMAP_TT_TWIG_LEVEL);
5ba3f43e
A
10932 PMAP_LOCK(grand);
10933
10934 if (kr != KERN_SUCCESS) {
10935 goto done;
10936 }
10937
10938 gtte_p = pmap_tt2e(grand, vaddr);
10939 }
10940 *gtte_p = *stte_p;
d9a64523
A
10941
10942nest_next:
cb323159
A
10943 vaddr += pt_attr_twig_size(pt_attr);
10944 nvaddr += pt_attr_twig_size(pt_attr);
5ba3f43e
A
10945 }
10946#endif
10947
10948 kr = KERN_SUCCESS;
10949done:
10950
d9a64523
A
10951 stte_p = pmap_tte(grand, vstart);
10952 FLUSH_PTE_RANGE_STRONG(stte_p, stte_p + num_tte);
5ba3f43e 10953
0a7de745 10954#if (__ARM_VMSA__ > 7)
5ba3f43e
A
10955 /*
10956 * check for overflow on LP64 arch
10957 */
10958 assert((size & 0xFFFFFFFF00000000ULL) == 0);
10959#endif
cb323159 10960 PMAP_UPDATE_TLBS(grand, vstart, vend, false);
5ba3f43e
A
10961
10962 PMAP_UNLOCK(grand);
10963 return kr;
10964}
10965
0a7de745
A
10966kern_return_t
10967pmap_nest(
5ba3f43e
A
10968 pmap_t grand,
10969 pmap_t subord,
10970 addr64_t vstart,
10971 addr64_t nstart,
10972 uint64_t size)
10973{
10974 kern_return_t kr = KERN_FAILURE;
10975
d9a64523 10976 PMAP_TRACE(2, PMAP_CODE(PMAP__NEST) | DBG_FUNC_START,
0a7de745
A
10977 VM_KERNEL_ADDRHIDE(grand), VM_KERNEL_ADDRHIDE(subord),
10978 VM_KERNEL_ADDRHIDE(vstart));
5ba3f43e 10979
c6bf4f31
A
10980#if XNU_MONITOR
10981 while ((kr = pmap_nest_ppl(grand, subord, vstart, nstart, size)) == KERN_RESOURCE_SHORTAGE) {
10982 pmap_alloc_page_for_ppl();
10983 }
10984
10985 pmap_ledger_check_balance(grand);
10986 pmap_ledger_check_balance(subord);
10987#else
5ba3f43e 10988 kr = pmap_nest_internal(grand, subord, vstart, nstart, size);
c6bf4f31 10989#endif
5ba3f43e 10990
d9a64523 10991 PMAP_TRACE(2, PMAP_CODE(PMAP__NEST) | DBG_FUNC_END, kr);
5ba3f43e
A
10992
10993 return kr;
10994}
10995
10996/*
10997 * kern_return_t pmap_unnest(grand, vaddr)
10998 *
d9a64523 10999 * grand = the pmap that will have the virtual range unnested
5ba3f43e
A
11000 * vaddr = start of range in pmap to be unnested
11001 * size = size of range in pmap to be unnested
11002 *
11003 */
11004
11005kern_return_t
11006pmap_unnest(
11007 pmap_t grand,
11008 addr64_t vaddr,
11009 uint64_t size)
11010{
0a7de745 11011 return pmap_unnest_options(grand, vaddr, size, 0);
5ba3f43e
A
11012}
11013
d9a64523 11014MARK_AS_PMAP_TEXT static kern_return_t
5ba3f43e
A
11015pmap_unnest_options_internal(
11016 pmap_t grand,
11017 addr64_t vaddr,
11018 uint64_t size,
11019 unsigned int option)
11020{
11021 vm_map_offset_t start;
11022 vm_map_offset_t addr;
11023 tt_entry_t *tte_p;
11024 unsigned int current_index;
11025 unsigned int start_index;
11026 unsigned int max_index;
11027 unsigned int num_tte;
11028 unsigned int i;
11029
d9a64523 11030 addr64_t vend;
0a7de745 11031 if (__improbable(os_add_overflow(vaddr, size, &vend))) {
d9a64523 11032 panic("%s: %p vaddr wraps around: 0x%llx + 0x%llx", __func__, grand, vaddr, size);
0a7de745 11033 }
d9a64523
A
11034
11035 VALIDATE_PMAP(grand);
11036
cb323159
A
11037 __unused const pt_attr_t * const pt_attr = pmap_get_pt_attr(grand);
11038
11039 if (((size | vaddr) & pt_attr_twig_offmask(pt_attr)) != 0x0ULL) {
11040 panic("pmap_unnest(): unaligned request");
5ba3f43e 11041 }
5ba3f43e 11042
0a7de745
A
11043 if ((option & PMAP_UNNEST_CLEAN) == 0) {
11044 if (grand->nested_pmap == NULL) {
d9a64523 11045 panic("%s: %p has no nested pmap", __func__, grand);
0a7de745 11046 }
d9a64523 11047
0a7de745 11048 if ((vaddr < grand->nested_region_grand_addr) || (vend > (grand->nested_region_grand_addr + grand->nested_region_size))) {
d9a64523 11049 panic("%s: %p: unnest request to region not-fully-nested region [%p, %p)", __func__, grand, (void*)vaddr, (void*)vend);
0a7de745 11050 }
d9a64523 11051
5ba3f43e
A
11052 PMAP_LOCK(grand->nested_pmap);
11053
0a7de745 11054 start = vaddr - grand->nested_region_grand_addr + grand->nested_region_subord_addr;
cb323159
A
11055 start_index = (unsigned int)((vaddr - grand->nested_region_grand_addr) >> pt_attr_twig_shift(pt_attr));
11056 max_index = (unsigned int)(start_index + (size >> pt_attr_twig_shift(pt_attr)));
11057 num_tte = (unsigned int)(size >> pt_attr_twig_shift(pt_attr));
5ba3f43e 11058
cb323159 11059 for (current_index = start_index, addr = start; current_index < max_index; current_index++, addr += pt_attr_twig_size(pt_attr)) {
5ba3f43e
A
11060 pt_entry_t *bpte, *epte, *cpte;
11061
d9a64523
A
11062 if (addr < grand->nested_pmap->nested_region_true_start) {
11063 /* We haven't reached the interesting range. */
11064 continue;
11065 }
5ba3f43e 11066
d9a64523
A
11067 if (addr >= grand->nested_pmap->nested_region_true_end) {
11068 /* We're done with the interesting range. */
11069 break;
11070 }
11071
11072 bpte = pmap_pte(grand->nested_pmap, addr);
cb323159 11073 epte = bpte + (pt_attr_leaf_index_mask(pt_attr) >> pt_attr_leaf_shift(pt_attr));
5ba3f43e 11074
0a7de745 11075 if (!testbit(current_index, (int *)grand->nested_pmap->nested_region_asid_bitmap)) {
5ba3f43e 11076 setbit(current_index, (int *)grand->nested_pmap->nested_region_asid_bitmap);
5ba3f43e
A
11077
11078 for (cpte = bpte; cpte <= epte; cpte++) {
0a7de745
A
11079 pmap_paddr_t pa;
11080 int pai = 0;
11081 boolean_t managed = FALSE;
5ba3f43e
A
11082 pt_entry_t spte;
11083
11084 if ((*cpte != ARM_PTE_TYPE_FAULT)
cb323159 11085 && (!ARM_PTE_IS_COMPRESSED(*cpte, cpte))) {
5ba3f43e
A
11086 spte = *cpte;
11087 while (!managed) {
11088 pa = pte_to_pa(spte);
0a7de745 11089 if (!pa_valid(pa)) {
5ba3f43e 11090 break;
0a7de745 11091 }
5ba3f43e
A
11092 pai = (int)pa_index(pa);
11093 LOCK_PVH(pai);
11094 spte = *cpte;
11095 pa = pte_to_pa(spte);
11096 if (pai == (int)pa_index(pa)) {
0a7de745 11097 managed = TRUE;
5ba3f43e
A
11098 break; // Leave the PVH locked as we'll unlock it after we update the PTE
11099 }
11100 UNLOCK_PVH(pai);
11101 }
11102
11103 if (((spte & ARM_PTE_NG) != ARM_PTE_NG)) {
d9a64523 11104 WRITE_PTE_FAST(cpte, (spte | ARM_PTE_NG));
5ba3f43e
A
11105 }
11106
0a7de745 11107 if (managed) {
5ba3f43e
A
11108 ASSERT_PVH_LOCKED(pai);
11109 UNLOCK_PVH(pai);
11110 }
11111 }
11112 }
11113 }
11114
0a7de745 11115 FLUSH_PTE_RANGE_STRONG(bpte, epte);
d9a64523 11116 flush_mmu_tlb_region_asid_async(start, (unsigned)size, grand->nested_pmap);
5ba3f43e
A
11117 }
11118
d9a64523
A
11119 sync_tlb_flush();
11120
5ba3f43e
A
11121 PMAP_UNLOCK(grand->nested_pmap);
11122 }
11123
11124 PMAP_LOCK(grand);
11125
11126 /*
11127 * invalidate all pdes for segment at vaddr in pmap grand
11128 */
11129 start = vaddr;
11130 addr = vaddr;
11131
cb323159 11132 num_tte = (unsigned int)(size >> pt_attr_twig_shift(pt_attr));
5ba3f43e 11133
cb323159 11134 for (i = 0; i < num_tte; i++, addr += pt_attr_twig_size(pt_attr)) {
d9a64523
A
11135 if (addr < grand->nested_pmap->nested_region_true_start) {
11136 /* We haven't reached the interesting range. */
11137 continue;
11138 }
11139
11140 if (addr >= grand->nested_pmap->nested_region_true_end) {
11141 /* We're done with the interesting range. */
11142 break;
11143 }
11144
5ba3f43e
A
11145 tte_p = pmap_tte(grand, addr);
11146 *tte_p = ARM_TTE_TYPE_FAULT;
5ba3f43e
A
11147 }
11148
d9a64523
A
11149 tte_p = pmap_tte(grand, start);
11150 FLUSH_PTE_RANGE_STRONG(tte_p, tte_p + num_tte);
cb323159 11151 PMAP_UPDATE_TLBS(grand, start, vend, false);
5ba3f43e
A
11152
11153 PMAP_UNLOCK(grand);
11154
11155 return KERN_SUCCESS;
11156}
11157
11158kern_return_t
11159pmap_unnest_options(
11160 pmap_t grand,
11161 addr64_t vaddr,
11162 uint64_t size,
11163 unsigned int option)
11164{
11165 kern_return_t kr = KERN_FAILURE;
11166
d9a64523 11167 PMAP_TRACE(2, PMAP_CODE(PMAP__UNNEST) | DBG_FUNC_START,
0a7de745 11168 VM_KERNEL_ADDRHIDE(grand), VM_KERNEL_ADDRHIDE(vaddr));
5ba3f43e 11169
c6bf4f31
A
11170#if XNU_MONITOR
11171 kr = pmap_unnest_options_ppl(grand, vaddr, size, option);
11172#else
5ba3f43e 11173 kr = pmap_unnest_options_internal(grand, vaddr, size, option);
c6bf4f31 11174#endif
5ba3f43e 11175
d9a64523 11176 PMAP_TRACE(2, PMAP_CODE(PMAP__UNNEST) | DBG_FUNC_END, kr);
5ba3f43e
A
11177
11178 return kr;
11179}
11180
11181boolean_t
11182pmap_adjust_unnest_parameters(
11183 __unused pmap_t p,
11184 __unused vm_map_offset_t *s,
11185 __unused vm_map_offset_t *e)
11186{
11187 return TRUE; /* to get to log_unnest_badness()... */
11188}
11189
11190/*
11191 * disable no-execute capability on
11192 * the specified pmap
11193 */
11194#if DEVELOPMENT || DEBUG
11195void
11196pmap_disable_NX(
11197 pmap_t pmap)
11198{
11199 pmap->nx_enabled = FALSE;
11200}
11201#else
11202void
11203pmap_disable_NX(
11204 __unused pmap_t pmap)
11205{
11206}
11207#endif
11208
11209void
11210pt_fake_zone_init(
11211 int zone_index)
11212{
11213 pt_fake_zone_index = zone_index;
11214}
11215
11216void
11217pt_fake_zone_info(
11218 int *count,
11219 vm_size_t *cur_size, vm_size_t *max_size, vm_size_t *elem_size, vm_size_t *alloc_size,
11220 uint64_t *sum_size, int *collectable, int *exhaustable, int *caller_acct)
11221{
11222 *count = inuse_pmap_pages_count;
11223 *cur_size = PAGE_SIZE * (inuse_pmap_pages_count);
11224 *max_size = PAGE_SIZE * (inuse_pmap_pages_count + vm_page_inactive_count + vm_page_active_count + vm_page_free_count);
11225 *elem_size = PAGE_SIZE;
11226 *alloc_size = PAGE_SIZE;
11227 *sum_size = (alloc_pmap_pages_count) * PAGE_SIZE;
11228
11229 *collectable = 1;
11230 *exhaustable = 0;
11231 *caller_acct = 1;
11232}
11233
11234/*
11235 * flush a range of hardware TLB entries.
11236 * NOTE: assumes the smallest TLB entry in use will be for
11237 * an ARM small page (4K).
11238 */
11239
cb323159
A
11240#define ARM_FULL_TLB_FLUSH_THRESHOLD 64
11241
11242#if __ARM_RANGE_TLBI__
11243#define ARM64_RANGE_TLB_FLUSH_THRESHOLD 1
11244#define ARM64_FULL_TLB_FLUSH_THRESHOLD ARM64_16K_TLB_RANGE_PAGES
11245#else
0a7de745 11246#define ARM64_FULL_TLB_FLUSH_THRESHOLD 256
cb323159 11247#endif // __ARM_RANGE_TLBI__
5ba3f43e
A
11248
11249static void
d9a64523 11250flush_mmu_tlb_region_asid_async(
5ba3f43e
A
11251 vm_offset_t va,
11252 unsigned length,
11253 pmap_t pmap)
11254{
0a7de745 11255#if (__ARM_VMSA__ == 7)
5ba3f43e 11256 vm_offset_t end = va + length;
0a7de745 11257 uint32_t asid;
5ba3f43e 11258
cb323159 11259 asid = pmap->hw_asid;
5ba3f43e
A
11260
11261 if (length / ARM_SMALL_PAGE_SIZE > ARM_FULL_TLB_FLUSH_THRESHOLD) {
0a7de745 11262 boolean_t flush_all = FALSE;
5ba3f43e 11263
0a7de745 11264 if ((asid == 0) || (pmap->nested == TRUE)) {
5ba3f43e 11265 flush_all = TRUE;
0a7de745
A
11266 }
11267 if (flush_all) {
d9a64523 11268 flush_mmu_tlb_async();
0a7de745 11269 } else {
d9a64523 11270 flush_mmu_tlb_asid_async(asid);
0a7de745 11271 }
5ba3f43e
A
11272
11273 return;
11274 }
11275 if (pmap->nested == TRUE) {
0a7de745 11276#if !__ARM_MP_EXT__
5ba3f43e
A
11277 flush_mmu_tlb();
11278#else
11279 va = arm_trunc_page(va);
11280 while (va < end) {
d9a64523 11281 flush_mmu_tlb_mva_entries_async(va);
5ba3f43e
A
11282 va += ARM_SMALL_PAGE_SIZE;
11283 }
11284#endif
11285 return;
11286 }
11287 va = arm_trunc_page(va) | (asid & 0xff);
d9a64523 11288 flush_mmu_tlb_entries_async(va, end);
5ba3f43e
A
11289
11290#else
cb323159
A
11291 unsigned npages = length >> pt_attr_leaf_shift(pmap_get_pt_attr(pmap));
11292 uint32_t asid;
5ba3f43e 11293
cb323159 11294 asid = pmap->hw_asid;
5ba3f43e 11295
cb323159 11296 if (npages > ARM64_FULL_TLB_FLUSH_THRESHOLD) {
5ba3f43e
A
11297 boolean_t flush_all = FALSE;
11298
0a7de745 11299 if ((asid == 0) || (pmap->nested == TRUE)) {
5ba3f43e 11300 flush_all = TRUE;
0a7de745
A
11301 }
11302 if (flush_all) {
d9a64523 11303 flush_mmu_tlb_async();
0a7de745 11304 } else {
d9a64523 11305 flush_mmu_tlb_asid_async((uint64_t)asid << TLBI_ASID_SHIFT);
0a7de745 11306 }
5ba3f43e
A
11307 return;
11308 }
cb323159
A
11309#if __ARM_RANGE_TLBI__
11310 if (npages > ARM64_RANGE_TLB_FLUSH_THRESHOLD) {
11311 va = generate_rtlbi_param(npages, asid, va);
11312 if (pmap->nested == TRUE) {
11313 flush_mmu_tlb_allrange_async(va);
11314 } else {
11315 flush_mmu_tlb_range_async(va);
11316 }
11317 return;
11318 }
11319#endif
11320 vm_offset_t end = tlbi_asid(asid) | tlbi_addr(va + length);
5ba3f43e 11321 va = tlbi_asid(asid) | tlbi_addr(va);
5ba3f43e 11322 if (pmap->nested == TRUE) {
d9a64523 11323 flush_mmu_tlb_allentries_async(va, end);
5ba3f43e 11324 } else {
d9a64523 11325 flush_mmu_tlb_entries_async(va, end);
5ba3f43e
A
11326 }
11327
11328#endif
11329}
11330
cb323159
A
11331MARK_AS_PMAP_TEXT static void
11332flush_mmu_tlb_tte_asid_async(vm_offset_t va, pmap_t pmap)
11333{
11334#if (__ARM_VMSA__ == 7)
11335 flush_mmu_tlb_entry_async((va & ~ARM_TT_L1_PT_OFFMASK) | (pmap->hw_asid & 0xff));
11336 flush_mmu_tlb_entry_async(((va & ~ARM_TT_L1_PT_OFFMASK) + ARM_TT_L1_SIZE) | (pmap->hw_asid & 0xff));
11337 flush_mmu_tlb_entry_async(((va & ~ARM_TT_L1_PT_OFFMASK) + 2 * ARM_TT_L1_SIZE) | (pmap->hw_asid & 0xff));
11338 flush_mmu_tlb_entry_async(((va & ~ARM_TT_L1_PT_OFFMASK) + 3 * ARM_TT_L1_SIZE) | (pmap->hw_asid & 0xff));
11339#else
11340 flush_mmu_tlb_entry_async(tlbi_addr(va & ~pt_attr_twig_offmask(pmap_get_pt_attr(pmap))) | tlbi_asid(pmap->hw_asid));
11341#endif
11342}
11343
11344MARK_AS_PMAP_TEXT static void
11345flush_mmu_tlb_full_asid_async(pmap_t pmap)
11346{
11347#if (__ARM_VMSA__ == 7)
11348 flush_mmu_tlb_asid_async(pmap->hw_asid);
11349#else /* (__ARM_VMSA__ == 7) */
11350 flush_mmu_tlb_asid_async((uint64_t)(pmap->hw_asid) << TLBI_ASID_SHIFT);
11351#endif /* (__ARM_VMSA__ == 7) */
11352}
11353
5ba3f43e
A
11354void
11355flush_mmu_tlb_region(
11356 vm_offset_t va,
11357 unsigned length)
11358{
d9a64523
A
11359 flush_mmu_tlb_region_asid_async(va, length, kernel_pmap);
11360 sync_tlb_flush();
11361}
11362
cb323159 11363static pmap_io_range_t*
d9a64523
A
11364pmap_find_io_attr(pmap_paddr_t paddr)
11365{
cb323159 11366 pmap_io_range_t find_range = {.addr = paddr & ~PAGE_MASK, .len = PAGE_SIZE};
d9a64523 11367 unsigned int begin = 0, end = num_io_rgns - 1;
cb323159
A
11368 if ((num_io_rgns == 0) || (paddr < io_attr_table[begin].addr) ||
11369 (paddr >= (io_attr_table[end].addr + io_attr_table[end].len))) {
11370 return NULL;
11371 }
d9a64523
A
11372
11373 for (;;) {
11374 unsigned int middle = (begin + end) / 2;
11375 int cmp = cmp_io_rgns(&find_range, &io_attr_table[middle]);
0a7de745 11376 if (cmp == 0) {
cb323159 11377 return &io_attr_table[middle];
0a7de745 11378 } else if (begin == end) {
d9a64523 11379 break;
0a7de745 11380 } else if (cmp > 0) {
d9a64523 11381 begin = middle + 1;
0a7de745 11382 } else {
d9a64523 11383 end = middle;
0a7de745
A
11384 }
11385 }
d9a64523 11386
cb323159 11387 return NULL;
5ba3f43e
A
11388}
11389
11390unsigned int
11391pmap_cache_attributes(
11392 ppnum_t pn)
11393{
11394 pmap_paddr_t paddr;
0a7de745
A
11395 int pai;
11396 unsigned int result;
11397 pp_attr_t pp_attr_current;
5ba3f43e
A
11398
11399 paddr = ptoa(pn);
11400
cb323159 11401 assert(vm_last_phys > vm_first_phys); // Check that pmap has been bootstrapped
5ba3f43e 11402
0a7de745 11403 if (!pa_valid(paddr)) {
cb323159
A
11404 pmap_io_range_t *io_rgn = pmap_find_io_attr(paddr);
11405 return (io_rgn == NULL) ? VM_WIMG_IO : io_rgn->wimg;
0a7de745 11406 }
5ba3f43e
A
11407
11408 result = VM_WIMG_DEFAULT;
11409
11410 pai = (int)pa_index(paddr);
11411
11412 pp_attr_current = pp_attr_table[pai];
0a7de745 11413 if (pp_attr_current & PP_ATTR_WIMG_MASK) {
5ba3f43e 11414 result = pp_attr_current & PP_ATTR_WIMG_MASK;
0a7de745 11415 }
5ba3f43e
A
11416 return result;
11417}
11418
0a7de745
A
11419MARK_AS_PMAP_TEXT static void
11420pmap_sync_wimg(ppnum_t pn, unsigned int wimg_bits_prev, unsigned int wimg_bits_new)
11421{
11422 if ((wimg_bits_prev != wimg_bits_new)
11423 && ((wimg_bits_prev == VM_WIMG_COPYBACK)
11424 || ((wimg_bits_prev == VM_WIMG_INNERWBACK)
11425 && (wimg_bits_new != VM_WIMG_COPYBACK))
11426 || ((wimg_bits_prev == VM_WIMG_WTHRU)
11427 && ((wimg_bits_new != VM_WIMG_COPYBACK) || (wimg_bits_new != VM_WIMG_INNERWBACK))))) {
11428 pmap_sync_page_attributes_phys(pn);
11429 }
11430
11431 if ((wimg_bits_new == VM_WIMG_RT) && (wimg_bits_prev != VM_WIMG_RT)) {
11432 pmap_force_dcache_clean(phystokv(ptoa(pn)), PAGE_SIZE);
11433 }
11434}
11435
11436MARK_AS_PMAP_TEXT static __unused void
11437pmap_update_compressor_page_internal(ppnum_t pn, unsigned int prev_cacheattr, unsigned int new_cacheattr)
11438{
11439 pmap_paddr_t paddr = ptoa(pn);
11440 int pai = (int)pa_index(paddr);
11441
11442 if (__improbable(!pa_valid(paddr))) {
11443 panic("%s called on non-managed page 0x%08x", __func__, pn);
11444 }
11445
11446 LOCK_PVH(pai);
11447
c6bf4f31
A
11448#if XNU_MONITOR
11449 if (__improbable(pa_test_monitor(paddr))) {
11450 panic("%s invoked on PPL page 0x%08x", __func__, pn);
11451 }
11452#endif
0a7de745
A
11453
11454 pmap_update_cache_attributes_locked(pn, new_cacheattr);
11455
11456 UNLOCK_PVH(pai);
11457
11458 pmap_sync_wimg(pn, prev_cacheattr & VM_WIMG_MASK, new_cacheattr & VM_WIMG_MASK);
11459}
11460
11461void *
11462pmap_map_compressor_page(ppnum_t pn)
11463{
11464#if __ARM_PTE_PHYSMAP__
11465 unsigned int cacheattr = pmap_cache_attributes(pn) & VM_WIMG_MASK;
11466 if (cacheattr != VM_WIMG_DEFAULT) {
c6bf4f31
A
11467#if XNU_MONITOR
11468 pmap_update_compressor_page_ppl(pn, cacheattr, VM_WIMG_DEFAULT);
11469#else
0a7de745 11470 pmap_update_compressor_page_internal(pn, cacheattr, VM_WIMG_DEFAULT);
c6bf4f31 11471#endif
0a7de745
A
11472 }
11473#endif
11474 return (void*)phystokv(ptoa(pn));
11475}
11476
11477void
11478pmap_unmap_compressor_page(ppnum_t pn __unused, void *kva __unused)
11479{
11480#if __ARM_PTE_PHYSMAP__
11481 unsigned int cacheattr = pmap_cache_attributes(pn) & VM_WIMG_MASK;
11482 if (cacheattr != VM_WIMG_DEFAULT) {
c6bf4f31
A
11483#if XNU_MONITOR
11484 pmap_update_compressor_page_ppl(pn, VM_WIMG_DEFAULT, cacheattr);
11485#else
0a7de745 11486 pmap_update_compressor_page_internal(pn, VM_WIMG_DEFAULT, cacheattr);
c6bf4f31 11487#endif
0a7de745
A
11488 }
11489#endif
11490}
11491
d9a64523 11492MARK_AS_PMAP_TEXT static boolean_t
5ba3f43e 11493pmap_batch_set_cache_attributes_internal(
0a7de745 11494 ppnum_t pn,
5ba3f43e
A
11495 unsigned int cacheattr,
11496 unsigned int page_cnt,
11497 unsigned int page_index,
11498 boolean_t doit,
11499 unsigned int *res)
11500{
11501 pmap_paddr_t paddr;
0a7de745
A
11502 int pai;
11503 pp_attr_t pp_attr_current;
11504 pp_attr_t pp_attr_template;
11505 unsigned int wimg_bits_prev, wimg_bits_new;
5ba3f43e 11506
0a7de745 11507 if (cacheattr & VM_WIMG_USE_DEFAULT) {
5ba3f43e 11508 cacheattr = VM_WIMG_DEFAULT;
0a7de745 11509 }
5ba3f43e 11510
0a7de745 11511 if ((doit == FALSE) && (*res == 0)) {
d9a64523 11512 pmap_pin_kernel_pages((vm_offset_t)res, sizeof(*res));
5ba3f43e 11513 *res = page_cnt;
d9a64523 11514 pmap_unpin_kernel_pages((vm_offset_t)res, sizeof(*res));
0a7de745 11515 if (platform_cache_batch_wimg(cacheattr & (VM_WIMG_MASK), page_cnt << PAGE_SHIFT) == FALSE) {
5ba3f43e
A
11516 return FALSE;
11517 }
11518 }
11519
11520 paddr = ptoa(pn);
11521
11522 if (!pa_valid(paddr)) {
d9a64523 11523 panic("pmap_batch_set_cache_attributes(): pn 0x%08x not managed", pn);
5ba3f43e
A
11524 }
11525
11526 pai = (int)pa_index(paddr);
11527
d9a64523 11528 if (doit) {
5ba3f43e 11529 LOCK_PVH(pai);
c6bf4f31
A
11530#if XNU_MONITOR
11531 if (pa_test_monitor(paddr)) {
11532 panic("%s invoked on PPL page 0x%llx", __func__, (uint64_t)paddr);
11533 }
11534#endif
d9a64523 11535 }
5ba3f43e 11536
d9a64523
A
11537 do {
11538 pp_attr_current = pp_attr_table[pai];
11539 wimg_bits_prev = VM_WIMG_DEFAULT;
0a7de745 11540 if (pp_attr_current & PP_ATTR_WIMG_MASK) {
d9a64523 11541 wimg_bits_prev = pp_attr_current & PP_ATTR_WIMG_MASK;
0a7de745 11542 }
5ba3f43e 11543
d9a64523
A
11544 pp_attr_template = (pp_attr_current & ~PP_ATTR_WIMG_MASK) | PP_ATTR_WIMG(cacheattr & (VM_WIMG_MASK));
11545
0a7de745 11546 if (!doit) {
d9a64523 11547 break;
0a7de745 11548 }
5ba3f43e 11549
d9a64523
A
11550 /* WIMG bits should only be updated under the PVH lock, but we should do this in a CAS loop
11551 * to avoid losing simultaneous updates to other bits like refmod. */
11552 } while (!OSCompareAndSwap16(pp_attr_current, pp_attr_template, &pp_attr_table[pai]));
5ba3f43e
A
11553
11554 wimg_bits_new = VM_WIMG_DEFAULT;
0a7de745 11555 if (pp_attr_template & PP_ATTR_WIMG_MASK) {
5ba3f43e 11556 wimg_bits_new = pp_attr_template & PP_ATTR_WIMG_MASK;
0a7de745 11557 }
5ba3f43e
A
11558
11559 if (doit) {
0a7de745 11560 if (wimg_bits_new != wimg_bits_prev) {
5ba3f43e 11561 pmap_update_cache_attributes_locked(pn, cacheattr);
0a7de745 11562 }
5ba3f43e 11563 UNLOCK_PVH(pai);
0a7de745 11564 if ((wimg_bits_new == VM_WIMG_RT) && (wimg_bits_prev != VM_WIMG_RT)) {
d9a64523 11565 pmap_force_dcache_clean(phystokv(paddr), PAGE_SIZE);
0a7de745 11566 }
5ba3f43e
A
11567 } else {
11568 if (wimg_bits_new == VM_WIMG_COPYBACK) {
11569 return FALSE;
11570 }
11571 if (wimg_bits_prev == wimg_bits_new) {
d9a64523 11572 pmap_pin_kernel_pages((vm_offset_t)res, sizeof(*res));
0a7de745 11573 *res = *res - 1;
d9a64523 11574 pmap_unpin_kernel_pages((vm_offset_t)res, sizeof(*res));
0a7de745 11575 if (!platform_cache_batch_wimg(wimg_bits_new, (*res) << PAGE_SHIFT)) {
5ba3f43e
A
11576 return FALSE;
11577 }
11578 }
11579 return TRUE;
11580 }
11581
0a7de745 11582 if (page_cnt == (page_index + 1)) {
5ba3f43e 11583 wimg_bits_prev = VM_WIMG_COPYBACK;
d9a64523 11584 if (((wimg_bits_prev != wimg_bits_new))
5ba3f43e 11585 && ((wimg_bits_prev == VM_WIMG_COPYBACK)
0a7de745
A
11586 || ((wimg_bits_prev == VM_WIMG_INNERWBACK)
11587 && (wimg_bits_new != VM_WIMG_COPYBACK))
11588 || ((wimg_bits_prev == VM_WIMG_WTHRU)
11589 && ((wimg_bits_new != VM_WIMG_COPYBACK) || (wimg_bits_new != VM_WIMG_INNERWBACK))))) {
5ba3f43e
A
11590 platform_cache_flush_wimg(wimg_bits_new);
11591 }
11592 }
11593
11594 return TRUE;
cb323159 11595}
5ba3f43e
A
11596
11597boolean_t
11598pmap_batch_set_cache_attributes(
0a7de745 11599 ppnum_t pn,
5ba3f43e
A
11600 unsigned int cacheattr,
11601 unsigned int page_cnt,
11602 unsigned int page_index,
11603 boolean_t doit,
11604 unsigned int *res)
11605{
c6bf4f31
A
11606#if XNU_MONITOR
11607 return pmap_batch_set_cache_attributes_ppl(pn, cacheattr, page_cnt, page_index, doit, res);
11608#else
5ba3f43e 11609 return pmap_batch_set_cache_attributes_internal(pn, cacheattr, page_cnt, page_index, doit, res);
c6bf4f31 11610#endif
5ba3f43e
A
11611}
11612
d9a64523
A
11613MARK_AS_PMAP_TEXT static void
11614pmap_set_cache_attributes_priv(
5ba3f43e 11615 ppnum_t pn,
d9a64523
A
11616 unsigned int cacheattr,
11617 boolean_t external __unused)
5ba3f43e
A
11618{
11619 pmap_paddr_t paddr;
0a7de745
A
11620 int pai;
11621 pp_attr_t pp_attr_current;
11622 pp_attr_t pp_attr_template;
11623 unsigned int wimg_bits_prev, wimg_bits_new;
5ba3f43e
A
11624
11625 paddr = ptoa(pn);
11626
11627 if (!pa_valid(paddr)) {
0a7de745 11628 return; /* Not a managed page. */
5ba3f43e
A
11629 }
11630
0a7de745 11631 if (cacheattr & VM_WIMG_USE_DEFAULT) {
5ba3f43e 11632 cacheattr = VM_WIMG_DEFAULT;
0a7de745 11633 }
5ba3f43e
A
11634
11635 pai = (int)pa_index(paddr);
11636
11637 LOCK_PVH(pai);
11638
c6bf4f31
A
11639#if XNU_MONITOR
11640 if (external && pa_test_monitor(paddr)) {
11641 panic("%s invoked on PPL page 0x%llx", __func__, (uint64_t)paddr);
11642 } else if (!external && !pa_test_monitor(paddr)) {
11643 panic("%s invoked on non-PPL page 0x%llx", __func__, (uint64_t)paddr);
11644 }
11645#endif
5ba3f43e 11646
d9a64523
A
11647 do {
11648 pp_attr_current = pp_attr_table[pai];
11649 wimg_bits_prev = VM_WIMG_DEFAULT;
0a7de745 11650 if (pp_attr_current & PP_ATTR_WIMG_MASK) {
d9a64523 11651 wimg_bits_prev = pp_attr_current & PP_ATTR_WIMG_MASK;
0a7de745 11652 }
d9a64523 11653
0a7de745 11654 pp_attr_template = (pp_attr_current & ~PP_ATTR_WIMG_MASK) | PP_ATTR_WIMG(cacheattr & (VM_WIMG_MASK));
d9a64523
A
11655
11656 /* WIMG bits should only be updated under the PVH lock, but we should do this in a CAS loop
11657 * to avoid losing simultaneous updates to other bits like refmod. */
11658 } while (!OSCompareAndSwap16(pp_attr_current, pp_attr_template, &pp_attr_table[pai]));
5ba3f43e 11659
5ba3f43e 11660 wimg_bits_new = VM_WIMG_DEFAULT;
0a7de745 11661 if (pp_attr_template & PP_ATTR_WIMG_MASK) {
5ba3f43e 11662 wimg_bits_new = pp_attr_template & PP_ATTR_WIMG_MASK;
0a7de745 11663 }
5ba3f43e 11664
0a7de745 11665 if (wimg_bits_new != wimg_bits_prev) {
5ba3f43e 11666 pmap_update_cache_attributes_locked(pn, cacheattr);
0a7de745 11667 }
5ba3f43e
A
11668
11669 UNLOCK_PVH(pai);
11670
0a7de745 11671 pmap_sync_wimg(pn, wimg_bits_prev, wimg_bits_new);
d9a64523
A
11672}
11673
11674MARK_AS_PMAP_TEXT static void
11675pmap_set_cache_attributes_internal(
11676 ppnum_t pn,
11677 unsigned int cacheattr)
11678{
11679 pmap_set_cache_attributes_priv(pn, cacheattr, TRUE);
5ba3f43e
A
11680}
11681
11682void
11683pmap_set_cache_attributes(
11684 ppnum_t pn,
11685 unsigned int cacheattr)
11686{
c6bf4f31
A
11687#if XNU_MONITOR
11688 pmap_set_cache_attributes_ppl(pn, cacheattr);
11689#else
5ba3f43e 11690 pmap_set_cache_attributes_internal(pn, cacheattr);
c6bf4f31 11691#endif
5ba3f43e
A
11692}
11693
cb323159 11694MARK_AS_PMAP_TEXT void
5ba3f43e
A
11695pmap_update_cache_attributes_locked(
11696 ppnum_t ppnum,
11697 unsigned attributes)
11698{
d9a64523
A
11699 pmap_paddr_t phys = ptoa(ppnum);
11700 pv_entry_t *pve_p;
11701 pt_entry_t *pte_p;
11702 pv_entry_t **pv_h;
5ba3f43e 11703 pt_entry_t tmplate;
d9a64523
A
11704 unsigned int pai;
11705 boolean_t tlb_flush_needed = FALSE;
5ba3f43e 11706
cb323159
A
11707 PMAP_TRACE(2, PMAP_CODE(PMAP__UPDATE_CACHING) | DBG_FUNC_START, ppnum, attributes);
11708
5ba3f43e
A
11709#if __ARM_PTE_PHYSMAP__
11710 vm_offset_t kva = phystokv(phys);
11711 pte_p = pmap_pte(kernel_pmap, kva);
11712
11713 tmplate = *pte_p;
11714 tmplate &= ~(ARM_PTE_ATTRINDXMASK | ARM_PTE_SHMASK);
c6bf4f31
A
11715#if XNU_MONITOR
11716 tmplate |= (wimg_to_pte(attributes) & ~ARM_PTE_XPRR_MASK);
11717#else
5ba3f43e 11718 tmplate |= wimg_to_pte(attributes);
c6bf4f31 11719#endif
d9a64523
A
11720#if (__ARM_VMSA__ > 7)
11721 if (tmplate & ARM_PTE_HINT_MASK) {
11722 panic("%s: physical aperture PTE %p has hint bit set, va=%p, pte=0x%llx",
0a7de745 11723 __FUNCTION__, pte_p, (void *)kva, tmplate);
d9a64523
A
11724 }
11725#endif
11726 WRITE_PTE_STRONG(pte_p, tmplate);
11727 flush_mmu_tlb_region_asid_async(kva, PAGE_SIZE, kernel_pmap);
11728 tlb_flush_needed = TRUE;
5ba3f43e
A
11729#endif
11730
11731 pai = (unsigned int)pa_index(phys);
11732
11733 pv_h = pai_to_pvh(pai);
11734
11735 pte_p = PT_ENTRY_NULL;
11736 pve_p = PV_ENTRY_NULL;
11737 if (pvh_test_type(pv_h, PVH_TYPE_PTEP)) {
11738 pte_p = pvh_ptep(pv_h);
0a7de745 11739 } else if (pvh_test_type(pv_h, PVH_TYPE_PVEP)) {
5ba3f43e
A
11740 pve_p = pvh_list(pv_h);
11741 pte_p = PT_ENTRY_NULL;
11742 }
11743
11744 while ((pve_p != PV_ENTRY_NULL) || (pte_p != PT_ENTRY_NULL)) {
11745 vm_map_address_t va;
11746 pmap_t pmap;
11747
0a7de745 11748 if (pve_p != PV_ENTRY_NULL) {
5ba3f43e 11749 pte_p = pve_get_ptep(pve_p);
0a7de745 11750 }
d9a64523 11751#ifdef PVH_FLAG_IOMMU
0a7de745 11752 if ((vm_offset_t)pte_p & PVH_FLAG_IOMMU) {
d9a64523 11753 goto cache_skip_pve;
0a7de745 11754 }
d9a64523 11755#endif
5ba3f43e
A
11756 pmap = ptep_get_pmap(pte_p);
11757 va = ptep_get_va(pte_p);
11758
11759 tmplate = *pte_p;
5c9f4661 11760 tmplate &= ~(ARM_PTE_ATTRINDXMASK | ARM_PTE_SHMASK);
cb323159 11761 tmplate |= pmap_get_pt_ops(pmap)->wimg_to_pte(attributes);
5ba3f43e 11762
d9a64523 11763 WRITE_PTE_STRONG(pte_p, tmplate);
cb323159 11764 pmap_get_pt_ops(pmap)->flush_tlb_region_async(va, PAGE_SIZE, pmap);
d9a64523 11765 tlb_flush_needed = TRUE;
5ba3f43e 11766
d9a64523 11767#ifdef PVH_FLAG_IOMMU
0a7de745 11768cache_skip_pve:
d9a64523 11769#endif
5ba3f43e 11770 pte_p = PT_ENTRY_NULL;
0a7de745 11771 if (pve_p != PV_ENTRY_NULL) {
5ba3f43e 11772 pve_p = PVE_NEXT_PTR(pve_next(pve_p));
0a7de745 11773 }
5ba3f43e 11774 }
0a7de745 11775 if (tlb_flush_needed) {
d9a64523 11776 sync_tlb_flush();
0a7de745 11777 }
cb323159
A
11778
11779 PMAP_TRACE(2, PMAP_CODE(PMAP__UPDATE_CACHING) | DBG_FUNC_END, ppnum, attributes);
5ba3f43e
A
11780}
11781
0a7de745 11782#if (__ARM_VMSA__ == 7)
5ba3f43e
A
11783vm_map_address_t
11784pmap_create_sharedpage(
11785 void)
11786{
11787 pmap_paddr_t pa;
11788 kern_return_t kr;
11789
11790 (void) pmap_pages_alloc(&pa, PAGE_SIZE, 0);
11791 memset((char *) phystokv(pa), 0, PAGE_SIZE);
11792
11793 kr = pmap_enter(kernel_pmap, _COMM_PAGE_BASE_ADDRESS, atop(pa), VM_PROT_READ | VM_PROT_WRITE, VM_PROT_NONE, VM_WIMG_USE_DEFAULT, TRUE);
11794 assert(kr == KERN_SUCCESS);
11795
0a7de745 11796 return (vm_map_address_t)phystokv(pa);
5ba3f43e
A
11797}
11798#else
11799static void
11800pmap_update_tt3e(
11801 pmap_t pmap,
11802 vm_address_t address,
11803 tt_entry_t template)
11804{
11805 tt_entry_t *ptep, pte;
11806
11807 ptep = pmap_tt3e(pmap, address);
11808 if (ptep == NULL) {
11809 panic("%s: no ptep?\n", __FUNCTION__);
11810 }
11811
11812 pte = *ptep;
11813 pte = tte_to_pa(pte) | template;
d9a64523 11814 WRITE_PTE_STRONG(ptep, pte);
5ba3f43e
A
11815}
11816
11817/* Note absence of non-global bit */
11818#define PMAP_COMM_PAGE_PTE_TEMPLATE (ARM_PTE_TYPE_VALID \
0a7de745
A
11819 | ARM_PTE_ATTRINDX(CACHE_ATTRINDX_WRITEBACK) \
11820 | ARM_PTE_SH(SH_INNER_MEMORY) | ARM_PTE_NX \
11821 | ARM_PTE_PNX | ARM_PTE_AP(AP_RORO) | ARM_PTE_AF)
5ba3f43e
A
11822
11823vm_map_address_t
11824pmap_create_sharedpage(
0a7de745
A
11825 void
11826 )
5ba3f43e
A
11827{
11828 kern_return_t kr;
11829 pmap_paddr_t pa = 0;
11830
c6bf4f31
A
11831#if XNU_MONITOR
11832 pa = pmap_alloc_page_for_kern();
11833 assert(pa);
11834#else
5ba3f43e 11835
5ba3f43e 11836 (void) pmap_pages_alloc(&pa, PAGE_SIZE, 0);
c6bf4f31 11837#endif
5ba3f43e
A
11838
11839 memset((char *) phystokv(pa), 0, PAGE_SIZE);
11840
d9a64523 11841#ifdef CONFIG_XNUPOST
5ba3f43e 11842 /*
5c9f4661
A
11843 * The kernel pmap maintains a user accessible mapping of the commpage
11844 * to test PAN.
5ba3f43e 11845 */
5c9f4661 11846 kr = pmap_enter(kernel_pmap, _COMM_HIGH_PAGE64_BASE_ADDRESS, (ppnum_t)atop(pa), VM_PROT_READ, VM_PROT_NONE, VM_WIMG_USE_DEFAULT, TRUE);
5ba3f43e 11847 assert(kr == KERN_SUCCESS);
5c9f4661
A
11848
11849 /*
11850 * This mapping should not be global (as we only expect to reference it
11851 * during testing).
11852 */
11853 pmap_update_tt3e(kernel_pmap, _COMM_HIGH_PAGE64_BASE_ADDRESS, PMAP_COMM_PAGE_PTE_TEMPLATE | ARM_PTE_NG);
5ba3f43e 11854
d9a64523
A
11855#if KASAN
11856 kasan_map_shadow(_COMM_HIGH_PAGE64_BASE_ADDRESS, PAGE_SIZE, true);
11857#endif
11858#endif /* CONFIG_XNUPOST */
5ba3f43e
A
11859
11860 /*
5c9f4661
A
11861 * In order to avoid burning extra pages on mapping the shared page, we
11862 * create a dedicated pmap for the shared page. We forcibly nest the
11863 * translation tables from this pmap into other pmaps. The level we
11864 * will nest at depends on the MMU configuration (page size, TTBR range,
11865 * etc).
11866 *
11867 * Note that this is NOT "the nested pmap" (which is used to nest the
11868 * shared cache).
5ba3f43e 11869 *
5c9f4661
A
11870 * Note that we update parameters of the entry for our unique needs (NG
11871 * entry, etc.).
5ba3f43e 11872 */
cb323159 11873 sharedpage_pmap = pmap_create_options(NULL, 0x0, 0);
5c9f4661
A
11874 assert(sharedpage_pmap != NULL);
11875
11876 /* The user 64-bit mapping... */
11877 kr = pmap_enter(sharedpage_pmap, _COMM_PAGE64_BASE_ADDRESS, (ppnum_t)atop(pa), VM_PROT_READ, VM_PROT_NONE, VM_WIMG_USE_DEFAULT, TRUE);
11878 assert(kr == KERN_SUCCESS);
11879 pmap_update_tt3e(sharedpage_pmap, _COMM_PAGE64_BASE_ADDRESS, PMAP_COMM_PAGE_PTE_TEMPLATE);
11880
11881 /* ...and the user 32-bit mapping. */
11882 kr = pmap_enter(sharedpage_pmap, _COMM_PAGE32_BASE_ADDRESS, (ppnum_t)atop(pa), VM_PROT_READ, VM_PROT_NONE, VM_WIMG_USE_DEFAULT, TRUE);
5ba3f43e 11883 assert(kr == KERN_SUCCESS);
5c9f4661 11884 pmap_update_tt3e(sharedpage_pmap, _COMM_PAGE32_BASE_ADDRESS, PMAP_COMM_PAGE_PTE_TEMPLATE);
5ba3f43e
A
11885
11886 /* For manipulation in kernel, go straight to physical page */
0a7de745 11887 return (vm_map_address_t)phystokv(pa);
5ba3f43e
A
11888}
11889
5c9f4661
A
11890/*
11891 * Asserts to ensure that the TTEs we nest to map the shared page do not overlap
11892 * with user controlled TTEs.
11893 */
cb323159 11894#if (ARM_PGSHIFT == 14)
5c9f4661
A
11895static_assert((_COMM_PAGE64_BASE_ADDRESS & ~ARM_TT_L2_OFFMASK) >= MACH_VM_MAX_ADDRESS);
11896static_assert((_COMM_PAGE32_BASE_ADDRESS & ~ARM_TT_L2_OFFMASK) >= VM_MAX_ADDRESS);
11897#elif (ARM_PGSHIFT == 12)
11898static_assert((_COMM_PAGE64_BASE_ADDRESS & ~ARM_TT_L1_OFFMASK) >= MACH_VM_MAX_ADDRESS);
11899static_assert((_COMM_PAGE32_BASE_ADDRESS & ~ARM_TT_L1_OFFMASK) >= VM_MAX_ADDRESS);
11900#else
11901#error Nested shared page mapping is unsupported on this config
11902#endif
11903
d9a64523 11904MARK_AS_PMAP_TEXT static kern_return_t
5ba3f43e
A
11905pmap_insert_sharedpage_internal(
11906 pmap_t pmap)
11907{
d9a64523 11908 kern_return_t kr = KERN_SUCCESS;
5c9f4661 11909 vm_offset_t sharedpage_vaddr;
5ba3f43e 11910 pt_entry_t *ttep, *src_ttep;
d9a64523
A
11911 int options = 0;
11912
11913 VALIDATE_PMAP(pmap);
c6bf4f31
A
11914#if XNU_MONITOR
11915 options |= PMAP_OPTIONS_NOWAIT;
11916#endif /* XNU_MONITOR */
d9a64523 11917
5ba3f43e
A
11918#if _COMM_PAGE_AREA_LENGTH != PAGE_SIZE
11919#error We assume a single page.
11920#endif
11921
11922 if (pmap_is_64bit(pmap)) {
5c9f4661
A
11923 sharedpage_vaddr = _COMM_PAGE64_BASE_ADDRESS;
11924 } else {
11925 sharedpage_vaddr = _COMM_PAGE32_BASE_ADDRESS;
5ba3f43e
A
11926 }
11927
11928 PMAP_LOCK(pmap);
11929
11930 /*
11931 * For 4KB pages, we can force the commpage to nest at the level one
11932 * page table, as each entry is 1GB (i.e, there will be no overlap
11933 * with regular userspace mappings). For 16KB pages, each level one
11934 * entry is 64GB, so we must go to the second level entry (32MB) in
11935 * order to nest.
11936 */
11937#if (ARM_PGSHIFT == 12)
d9a64523
A
11938 (void)options;
11939
5ba3f43e 11940 /* Just slam in the L1 entry. */
5c9f4661
A
11941 ttep = pmap_tt1e(pmap, sharedpage_vaddr);
11942
5ba3f43e
A
11943 if (*ttep != ARM_PTE_EMPTY) {
11944 panic("%s: Found something mapped at the commpage address?!", __FUNCTION__);
11945 }
11946
5c9f4661 11947 src_ttep = pmap_tt1e(sharedpage_pmap, sharedpage_vaddr);
5ba3f43e 11948#elif (ARM_PGSHIFT == 14)
5ba3f43e
A
11949 /* Allocate for the L2 entry if necessary, and slam it into place. */
11950 /*
11951 * As long as we are use a three level page table, the first level
11952 * should always exist, so we don't need to check for it.
11953 */
5c9f4661 11954 while (*pmap_tt1e(pmap, sharedpage_vaddr) == ARM_PTE_EMPTY) {
5ba3f43e
A
11955 PMAP_UNLOCK(pmap);
11956
d9a64523 11957 kr = pmap_expand(pmap, sharedpage_vaddr, options, PMAP_TT_L2_LEVEL);
5ba3f43e
A
11958
11959 if (kr != KERN_SUCCESS) {
c6bf4f31
A
11960#if XNU_MONITOR
11961 if (kr == KERN_RESOURCE_SHORTAGE) {
11962 return kr;
11963 } else
11964#endif
d9a64523
A
11965 {
11966 panic("Failed to pmap_expand for commpage, pmap=%p", pmap);
11967 }
5ba3f43e
A
11968 }
11969
11970 PMAP_LOCK(pmap);
11971 }
5ba3f43e 11972
5c9f4661
A
11973 ttep = pmap_tt2e(pmap, sharedpage_vaddr);
11974
5ba3f43e
A
11975 if (*ttep != ARM_PTE_EMPTY) {
11976 panic("%s: Found something mapped at the commpage address?!", __FUNCTION__);
11977 }
11978
5c9f4661 11979 src_ttep = pmap_tt2e(sharedpage_pmap, sharedpage_vaddr);
5ba3f43e
A
11980#endif
11981
11982 *ttep = *src_ttep;
d9a64523
A
11983 FLUSH_PTE_STRONG(ttep);
11984
5c9f4661 11985 /* TODO: Should we flush in the 64-bit case? */
d9a64523 11986 flush_mmu_tlb_region_asid_async(sharedpage_vaddr, PAGE_SIZE, kernel_pmap);
5c9f4661 11987
cb323159
A
11988#if (ARM_PGSHIFT == 12)
11989 flush_mmu_tlb_entry_async(tlbi_addr(sharedpage_vaddr & ~ARM_TT_L1_OFFMASK) | tlbi_asid(pmap->hw_asid));
5ba3f43e 11990#elif (ARM_PGSHIFT == 14)
cb323159 11991 flush_mmu_tlb_entry_async(tlbi_addr(sharedpage_vaddr & ~ARM_TT_L2_OFFMASK) | tlbi_asid(pmap->hw_asid));
5ba3f43e 11992#endif
d9a64523 11993 sync_tlb_flush();
5ba3f43e
A
11994
11995 PMAP_UNLOCK(pmap);
5ba3f43e 11996
d9a64523 11997 return kr;
5ba3f43e
A
11998}
11999
12000static void
5c9f4661 12001pmap_unmap_sharedpage(
5ba3f43e
A
12002 pmap_t pmap)
12003{
12004 pt_entry_t *ttep;
5c9f4661 12005 vm_offset_t sharedpage_vaddr;
5ba3f43e
A
12006
12007#if _COMM_PAGE_AREA_LENGTH != PAGE_SIZE
12008#error We assume a single page.
12009#endif
12010
5c9f4661
A
12011 if (pmap_is_64bit(pmap)) {
12012 sharedpage_vaddr = _COMM_PAGE64_BASE_ADDRESS;
12013 } else {
12014 sharedpage_vaddr = _COMM_PAGE32_BASE_ADDRESS;
12015 }
12016
5ba3f43e 12017#if (ARM_PGSHIFT == 12)
5c9f4661
A
12018 ttep = pmap_tt1e(pmap, sharedpage_vaddr);
12019
5ba3f43e
A
12020 if (ttep == NULL) {
12021 return;
12022 }
12023
12024 /* It had better be mapped to the shared page */
5c9f4661 12025 if (*ttep != ARM_TTE_EMPTY && *ttep != *pmap_tt1e(sharedpage_pmap, sharedpage_vaddr)) {
5ba3f43e
A
12026 panic("%s: Something other than commpage mapped in shared page slot?", __FUNCTION__);
12027 }
12028#elif (ARM_PGSHIFT == 14)
5c9f4661
A
12029 ttep = pmap_tt2e(pmap, sharedpage_vaddr);
12030
5ba3f43e
A
12031 if (ttep == NULL) {
12032 return;
12033 }
12034
12035 /* It had better be mapped to the shared page */
5c9f4661 12036 if (*ttep != ARM_TTE_EMPTY && *ttep != *pmap_tt2e(sharedpage_pmap, sharedpage_vaddr)) {
5ba3f43e
A
12037 panic("%s: Something other than commpage mapped in shared page slot?", __FUNCTION__);
12038 }
12039#endif
12040
12041 *ttep = ARM_TTE_EMPTY;
d9a64523 12042 flush_mmu_tlb_region_asid_async(sharedpage_vaddr, PAGE_SIZE, kernel_pmap);
5ba3f43e
A
12043
12044#if (ARM_PGSHIFT == 12)
cb323159 12045 flush_mmu_tlb_entry_async(tlbi_addr(sharedpage_vaddr & ~ARM_TT_L1_OFFMASK) | tlbi_asid(pmap->hw_asid));
5ba3f43e 12046#elif (ARM_PGSHIFT == 14)
cb323159 12047 flush_mmu_tlb_entry_async(tlbi_addr(sharedpage_vaddr & ~ARM_TT_L2_OFFMASK) | tlbi_asid(pmap->hw_asid));
5ba3f43e 12048#endif
d9a64523 12049 sync_tlb_flush();
5ba3f43e
A
12050}
12051
12052void
12053pmap_insert_sharedpage(
12054 pmap_t pmap)
12055{
c6bf4f31
A
12056#if XNU_MONITOR
12057 kern_return_t kr = KERN_FAILURE;
12058
12059 while ((kr = pmap_insert_sharedpage_ppl(pmap)) == KERN_RESOURCE_SHORTAGE) {
12060 pmap_alloc_page_for_ppl();
12061 }
12062
12063 pmap_ledger_check_balance(pmap);
12064
12065 if (kr != KERN_SUCCESS) {
12066 panic("%s: failed to insert the shared page, kr=%d, "
12067 "pmap=%p",
12068 __FUNCTION__, kr,
12069 pmap);
12070 }
12071#else
5ba3f43e 12072 pmap_insert_sharedpage_internal(pmap);
c6bf4f31 12073#endif
5ba3f43e
A
12074}
12075
12076static boolean_t
12077pmap_is_64bit(
12078 pmap_t pmap)
12079{
0a7de745 12080 return pmap->is_64bit;
5ba3f43e
A
12081}
12082
12083#endif
12084
12085/* ARMTODO -- an implementation that accounts for
12086 * holes in the physical map, if any.
12087 */
12088boolean_t
12089pmap_valid_page(
0a7de745
A
12090 ppnum_t pn)
12091{
5ba3f43e
A
12092 return pa_valid(ptoa(pn));
12093}
12094
4ba76501
A
12095boolean_t
12096pmap_bootloader_page(
12097 ppnum_t pn)
12098{
12099 pmap_paddr_t paddr = ptoa(pn);
12100
12101 if (pa_valid(paddr)) {
12102 return FALSE;
12103 }
12104 pmap_io_range_t *io_rgn = pmap_find_io_attr(paddr);
12105 return (io_rgn != NULL) && (io_rgn->wimg & PMAP_IO_RANGE_CARVEOUT);
12106}
12107
d9a64523 12108MARK_AS_PMAP_TEXT static boolean_t
5ba3f43e
A
12109pmap_is_empty_internal(
12110 pmap_t pmap,
12111 vm_map_offset_t va_start,
12112 vm_map_offset_t va_end)
12113{
12114 vm_map_offset_t block_start, block_end;
12115 tt_entry_t *tte_p;
12116
12117 if (pmap == NULL) {
12118 return TRUE;
12119 }
12120
d9a64523
A
12121 VALIDATE_PMAP(pmap);
12122
cb323159
A
12123 __unused const pt_attr_t * const pt_attr = pmap_get_pt_attr(pmap);
12124 unsigned int initial_not_in_kdp = not_in_kdp;
12125
12126 if ((pmap != kernel_pmap) && (initial_not_in_kdp)) {
5ba3f43e
A
12127 PMAP_LOCK(pmap);
12128 }
12129
0a7de745 12130#if (__ARM_VMSA__ == 7)
cb323159
A
12131 if (tte_index(pmap, pt_attr, va_end) >= pmap->tte_index_max) {
12132 if ((pmap != kernel_pmap) && (initial_not_in_kdp)) {
5ba3f43e
A
12133 PMAP_UNLOCK(pmap);
12134 }
12135 return TRUE;
12136 }
cb323159 12137#endif
5ba3f43e 12138
cb323159 12139 /* TODO: This will be faster if we increment ttep at each level. */
5ba3f43e
A
12140 block_start = va_start;
12141
12142 while (block_start < va_end) {
12143 pt_entry_t *bpte_p, *epte_p;
12144 pt_entry_t *pte_p;
12145
cb323159 12146 block_end = (block_start + pt_attr_twig_size(pt_attr)) & ~pt_attr_twig_offmask(pt_attr);
0a7de745 12147 if (block_end > va_end) {
5ba3f43e 12148 block_end = va_end;
0a7de745 12149 }
5ba3f43e 12150
cb323159 12151 tte_p = pmap_tte(pmap, block_start);
5ba3f43e 12152 if ((tte_p != PT_ENTRY_NULL)
0a7de745 12153 && ((*tte_p & ARM_TTE_TYPE_MASK) == ARM_TTE_TYPE_TABLE)) {
5ba3f43e 12154 pte_p = (pt_entry_t *) ttetokv(*tte_p);
cb323159
A
12155 bpte_p = &pte_p[pte_index(pmap, pt_attr, block_start)];
12156 epte_p = &pte_p[pte_index(pmap, pt_attr, block_end)];
5ba3f43e
A
12157
12158 for (pte_p = bpte_p; pte_p < epte_p; pte_p++) {
12159 if (*pte_p != ARM_PTE_EMPTY) {
cb323159 12160 if ((pmap != kernel_pmap) && (initial_not_in_kdp)) {
5ba3f43e
A
12161 PMAP_UNLOCK(pmap);
12162 }
12163 return FALSE;
12164 }
12165 }
0a7de745 12166 }
5ba3f43e
A
12167 block_start = block_end;
12168 }
5ba3f43e 12169
cb323159 12170 if ((pmap != kernel_pmap) && (initial_not_in_kdp)) {
5ba3f43e
A
12171 PMAP_UNLOCK(pmap);
12172 }
12173
12174 return TRUE;
12175}
12176
12177boolean_t
12178pmap_is_empty(
12179 pmap_t pmap,
12180 vm_map_offset_t va_start,
12181 vm_map_offset_t va_end)
12182{
c6bf4f31
A
12183#if XNU_MONITOR
12184 return pmap_is_empty_ppl(pmap, va_start, va_end);
12185#else
5ba3f43e 12186 return pmap_is_empty_internal(pmap, va_start, va_end);
c6bf4f31 12187#endif
5ba3f43e
A
12188}
12189
0a7de745
A
12190vm_map_offset_t
12191pmap_max_offset(
12192 boolean_t is64,
12193 unsigned int option)
d9a64523
A
12194{
12195 return (is64) ? pmap_max_64bit_offset(option) : pmap_max_32bit_offset(option);
12196}
12197
0a7de745
A
12198vm_map_offset_t
12199pmap_max_64bit_offset(
d9a64523 12200 __unused unsigned int option)
5ba3f43e 12201{
0a7de745 12202 vm_map_offset_t max_offset_ret = 0;
5ba3f43e
A
12203
12204#if defined(__arm64__)
d9a64523 12205 const vm_map_offset_t min_max_offset = SHARED_REGION_BASE_ARM64 + SHARED_REGION_SIZE_ARM64 + 0x20000000; // end of shared region + 512MB for various purposes
5ba3f43e
A
12206 if (option == ARM_PMAP_MAX_OFFSET_DEFAULT) {
12207 max_offset_ret = arm64_pmap_max_offset_default;
12208 } else if (option == ARM_PMAP_MAX_OFFSET_MIN) {
12209 max_offset_ret = min_max_offset;
12210 } else if (option == ARM_PMAP_MAX_OFFSET_MAX) {
12211 max_offset_ret = MACH_VM_MAX_ADDRESS;
12212 } else if (option == ARM_PMAP_MAX_OFFSET_DEVICE) {
12213 if (arm64_pmap_max_offset_default) {
12214 max_offset_ret = arm64_pmap_max_offset_default;
12215 } else if (max_mem > 0xC0000000) {
d9a64523 12216 max_offset_ret = min_max_offset + 0x138000000; // Max offset is 13.375GB for devices with > 3GB of memory
5ba3f43e 12217 } else if (max_mem > 0x40000000) {
d9a64523 12218 max_offset_ret = min_max_offset + 0x38000000; // Max offset is 9.375GB for devices with > 1GB and <= 3GB of memory
5ba3f43e
A
12219 } else {
12220 max_offset_ret = min_max_offset;
12221 }
12222 } else if (option == ARM_PMAP_MAX_OFFSET_JUMBO) {
a39ff7e2
A
12223 if (arm64_pmap_max_offset_default) {
12224 // Allow the boot-arg to override jumbo size
12225 max_offset_ret = arm64_pmap_max_offset_default;
12226 } else {
d9a64523 12227 max_offset_ret = MACH_VM_MAX_ADDRESS; // Max offset is 64GB for pmaps with special "jumbo" blessing
a39ff7e2 12228 }
5ba3f43e 12229 } else {
d9a64523 12230 panic("pmap_max_64bit_offset illegal option 0x%x\n", option);
5ba3f43e
A
12231 }
12232
a39ff7e2 12233 assert(max_offset_ret <= MACH_VM_MAX_ADDRESS);
d9a64523 12234 assert(max_offset_ret >= min_max_offset);
5ba3f43e 12235#else
d9a64523
A
12236 panic("Can't run pmap_max_64bit_offset on non-64bit architectures\n");
12237#endif
12238
12239 return max_offset_ret;
12240}
12241
0a7de745
A
12242vm_map_offset_t
12243pmap_max_32bit_offset(
d9a64523
A
12244 unsigned int option)
12245{
0a7de745 12246 vm_map_offset_t max_offset_ret = 0;
d9a64523 12247
5ba3f43e
A
12248 if (option == ARM_PMAP_MAX_OFFSET_DEFAULT) {
12249 max_offset_ret = arm_pmap_max_offset_default;
12250 } else if (option == ARM_PMAP_MAX_OFFSET_MIN) {
94ff46dc 12251 max_offset_ret = 0x80000000;
5ba3f43e
A
12252 } else if (option == ARM_PMAP_MAX_OFFSET_MAX) {
12253 max_offset_ret = VM_MAX_ADDRESS;
12254 } else if (option == ARM_PMAP_MAX_OFFSET_DEVICE) {
12255 if (arm_pmap_max_offset_default) {
12256 max_offset_ret = arm_pmap_max_offset_default;
12257 } else if (max_mem > 0x20000000) {
12258 max_offset_ret = 0x80000000;
12259 } else {
94ff46dc 12260 max_offset_ret = 0x80000000;
5ba3f43e 12261 }
d9a64523
A
12262 } else if (option == ARM_PMAP_MAX_OFFSET_JUMBO) {
12263 max_offset_ret = 0x80000000;
5ba3f43e 12264 } else {
d9a64523 12265 panic("pmap_max_32bit_offset illegal option 0x%x\n", option);
5ba3f43e
A
12266 }
12267
d9a64523 12268 assert(max_offset_ret <= MACH_VM_MAX_ADDRESS);
5ba3f43e 12269 return max_offset_ret;
5ba3f43e
A
12270}
12271
12272#if CONFIG_DTRACE
12273/*
12274 * Constrain DTrace copyin/copyout actions
12275 */
12276extern kern_return_t dtrace_copyio_preflight(addr64_t);
12277extern kern_return_t dtrace_copyio_postflight(addr64_t);
12278
0a7de745
A
12279kern_return_t
12280dtrace_copyio_preflight(
5ba3f43e
A
12281 __unused addr64_t va)
12282{
0a7de745 12283 if (current_map() == kernel_map) {
5ba3f43e 12284 return KERN_FAILURE;
0a7de745 12285 } else {
5ba3f43e 12286 return KERN_SUCCESS;
0a7de745 12287 }
5ba3f43e
A
12288}
12289
0a7de745
A
12290kern_return_t
12291dtrace_copyio_postflight(
5ba3f43e
A
12292 __unused addr64_t va)
12293{
12294 return KERN_SUCCESS;
12295}
12296#endif /* CONFIG_DTRACE */
12297
12298
12299void
12300pmap_flush_context_init(__unused pmap_flush_context *pfc)
12301{
12302}
12303
12304
12305void
12306pmap_flush(
12307 __unused pmap_flush_context *cpus_to_flush)
12308{
12309 /* not implemented yet */
12310 return;
12311}
12312
c6bf4f31
A
12313#if XNU_MONITOR
12314
12315/*
12316 * Enforce that the address range described by kva and nbytes is not currently
12317 * PPL-owned, and won't become PPL-owned while pinned. This is to prevent
12318 * unintentionally writing to PPL-owned memory.
12319 */
12320static void
12321pmap_pin_kernel_pages(vm_offset_t kva, size_t nbytes)
12322{
12323 vm_offset_t end;
12324 if (os_add_overflow(kva, nbytes, &end)) {
12325 panic("%s(%p, 0x%llx): overflow", __func__, (void*)kva, (uint64_t)nbytes);
12326 }
12327 for (vm_offset_t ckva = kva; ckva < end; ckva = round_page(ckva + 1)) {
12328 pmap_paddr_t pa = kvtophys(ckva);
12329 if (!pa_valid(pa)) {
12330 panic("%s(%p): invalid physical page 0x%llx", __func__, (void*)kva, (uint64_t)pa);
12331 }
12332 pp_attr_t attr;
12333 unsigned int pai = (unsigned int)pa_index(pa);
12334 if (ckva == phystokv(pa)) {
12335 panic("%s(%p): attempt to pin static mapping for page 0x%llx", __func__, (void*)kva, (uint64_t)pa);
12336 }
12337 do {
12338 attr = pp_attr_table[pai] & ~PP_ATTR_NO_MONITOR;
12339 if (attr & PP_ATTR_MONITOR) {
12340 panic("%s(%p): physical page 0x%llx belongs to PPL", __func__, (void*)kva, (uint64_t)pa);
12341 }
12342 } while (!OSCompareAndSwap16(attr, attr | PP_ATTR_NO_MONITOR, &pp_attr_table[pai]));
12343 }
12344}
12345
12346static void
12347pmap_unpin_kernel_pages(vm_offset_t kva, size_t nbytes)
12348{
12349 vm_offset_t end;
12350 if (os_add_overflow(kva, nbytes, &end)) {
12351 panic("%s(%p, 0x%llx): overflow", __func__, (void*)kva, (uint64_t)nbytes);
12352 }
12353 for (vm_offset_t ckva = kva; ckva < end; ckva = round_page(ckva + 1)) {
12354 pmap_paddr_t pa = kvtophys(ckva);
12355 if (!pa_valid(pa)) {
12356 panic("%s(%p): invalid physical page 0x%llx", __func__, (void*)kva, (uint64_t)pa);
12357 }
12358 if (!(pp_attr_table[pa_index(pa)] & PP_ATTR_NO_MONITOR)) {
12359 panic("%s(%p): physical page 0x%llx not pinned", __func__, (void*)kva, (uint64_t)pa);
12360 }
12361 assert(!(pp_attr_table[pa_index(pa)] & PP_ATTR_MONITOR));
12362 pa_clear_no_monitor(pa);
12363 }
12364}
12365
12366/*
12367 * Lock down a page, making all mappings read-only, and preventing
12368 * further mappings or removal of this particular kva's mapping.
12369 * Effectively, it makes the page at kva immutable.
12370 */
12371MARK_AS_PMAP_TEXT static void
12372pmap_ppl_lockdown_page(vm_address_t kva)
12373{
12374 pmap_paddr_t pa = kvtophys(kva);
12375 unsigned int pai = (unsigned int)pa_index(pa);
12376 LOCK_PVH(pai);
12377 pv_entry_t **pv_h = pai_to_pvh(pai);
12378
12379 if (pa_test_monitor(pa)) {
12380 panic("%#lx: page %llx belongs to PPL", kva, pa);
12381 }
12382
12383 if (pvh_get_flags(pv_h) & (PVH_FLAG_LOCKDOWN | PVH_FLAG_EXEC)) {
12384 panic("%#lx: already locked down/executable (%#llx)", kva, pvh_get_flags(pv_h));
12385 }
12386
12387 pt_entry_t *pte_p = pmap_pte(kernel_pmap, kva);
12388
12389 if (pte_p == PT_ENTRY_NULL) {
12390 panic("%#lx: NULL pte", kva);
12391 }
12392
12393 pt_entry_t tmplate = *pte_p;
12394 if ((tmplate & ARM_PTE_APMASK) != ARM_PTE_AP(AP_RWNA)) {
12395 panic("%#lx: not a kernel r/w page (%#llx)", kva, tmplate & ARM_PTE_APMASK);
12396 }
12397
12398 pvh_set_flags(pv_h, pvh_get_flags(pv_h) | PVH_FLAG_LOCKDOWN);
12399
12400 pmap_set_ptov_ap(pai, AP_RONA, FALSE);
12401
12402 UNLOCK_PVH(pai);
12403
12404 pmap_page_protect_options_internal((ppnum_t)atop(pa), VM_PROT_READ, 0);
12405}
12406
12407/*
12408 * Release a page from being locked down to the PPL, making it writable
12409 * to the kernel once again.
12410 */
12411MARK_AS_PMAP_TEXT static void
12412pmap_ppl_unlockdown_page(vm_address_t kva)
12413{
12414 pmap_paddr_t pa = kvtophys(kva);
12415 unsigned int pai = (unsigned int)pa_index(pa);
12416 LOCK_PVH(pai);
12417 pv_entry_t **pv_h = pai_to_pvh(pai);
12418
12419 vm_offset_t pvh_flags = pvh_get_flags(pv_h);
12420
12421 if (!(pvh_flags & PVH_FLAG_LOCKDOWN)) {
12422 panic("unlockdown attempt on not locked down virtual %#lx/pai %d", kva, pai);
12423 }
12424
12425 pvh_set_flags(pv_h, pvh_flags & ~PVH_FLAG_LOCKDOWN);
12426 pmap_set_ptov_ap(pai, AP_RWNA, FALSE);
12427 UNLOCK_PVH(pai);
12428}
12429
12430#else /* XNU_MONITOR */
d9a64523
A
12431
12432static void __unused
12433pmap_pin_kernel_pages(vm_offset_t kva __unused, size_t nbytes __unused)
12434{
12435}
12436
12437static void __unused
12438pmap_unpin_kernel_pages(vm_offset_t kva __unused, size_t nbytes __unused)
12439{
12440}
12441
c6bf4f31 12442#endif /* !XNU_MONITOR */
d9a64523
A
12443
12444
0a7de745 12445#define PMAP_RESIDENT_INVALID ((mach_vm_size_t)-1)
d9a64523
A
12446
12447MARK_AS_PMAP_TEXT static mach_vm_size_t
5ba3f43e 12448pmap_query_resident_internal(
0a7de745
A
12449 pmap_t pmap,
12450 vm_map_address_t start,
12451 vm_map_address_t end,
12452 mach_vm_size_t *compressed_bytes_p)
5ba3f43e 12453{
0a7de745
A
12454 mach_vm_size_t resident_bytes = 0;
12455 mach_vm_size_t compressed_bytes = 0;
5ba3f43e
A
12456
12457 pt_entry_t *bpte, *epte;
12458 pt_entry_t *pte_p;
12459 tt_entry_t *tte_p;
12460
12461 if (pmap == NULL) {
d9a64523 12462 return PMAP_RESIDENT_INVALID;
5ba3f43e
A
12463 }
12464
d9a64523
A
12465 VALIDATE_PMAP(pmap);
12466
5ba3f43e 12467 /* Ensure that this request is valid, and addresses exactly one TTE. */
0a7de745 12468 if (__improbable((start % ARM_PGBYTES) || (end % ARM_PGBYTES))) {
d9a64523 12469 panic("%s: address range %p, %p not page-aligned", __func__, (void*)start, (void*)end);
0a7de745 12470 }
d9a64523 12471
0a7de745 12472 if (__improbable((end < start) || ((end - start) > (PTE_PGENTRIES * ARM_PGBYTES)))) {
d9a64523 12473 panic("%s: invalid address range %p, %p", __func__, (void*)start, (void*)end);
0a7de745 12474 }
5ba3f43e
A
12475
12476 PMAP_LOCK(pmap);
12477 tte_p = pmap_tte(pmap, start);
12478 if (tte_p == (tt_entry_t *) NULL) {
12479 PMAP_UNLOCK(pmap);
d9a64523 12480 return PMAP_RESIDENT_INVALID;
5ba3f43e
A
12481 }
12482 if ((*tte_p & ARM_TTE_TYPE_MASK) == ARM_TTE_TYPE_TABLE) {
cb323159 12483 __unused const pt_attr_t * const pt_attr = pmap_get_pt_attr(pmap);
5ba3f43e 12484 pte_p = (pt_entry_t *) ttetokv(*tte_p);
cb323159
A
12485 bpte = &pte_p[pte_index(pmap, pt_attr, start)];
12486 epte = &pte_p[pte_index(pmap, pt_attr, end)];
5ba3f43e
A
12487
12488 for (; bpte < epte; bpte++) {
cb323159 12489 if (ARM_PTE_IS_COMPRESSED(*bpte, bpte)) {
5ba3f43e
A
12490 compressed_bytes += ARM_PGBYTES;
12491 } else if (pa_valid(pte_to_pa(*bpte))) {
12492 resident_bytes += ARM_PGBYTES;
12493 }
12494 }
12495 }
12496 PMAP_UNLOCK(pmap);
12497
12498 if (compressed_bytes_p) {
d9a64523 12499 pmap_pin_kernel_pages((vm_offset_t)compressed_bytes_p, sizeof(*compressed_bytes_p));
5ba3f43e 12500 *compressed_bytes_p += compressed_bytes;
d9a64523 12501 pmap_unpin_kernel_pages((vm_offset_t)compressed_bytes_p, sizeof(*compressed_bytes_p));
5ba3f43e
A
12502 }
12503
d9a64523 12504 return resident_bytes;
5ba3f43e
A
12505}
12506
12507mach_vm_size_t
12508pmap_query_resident(
0a7de745
A
12509 pmap_t pmap,
12510 vm_map_address_t start,
12511 vm_map_address_t end,
12512 mach_vm_size_t *compressed_bytes_p)
5ba3f43e 12513{
0a7de745
A
12514 mach_vm_size_t total_resident_bytes;
12515 mach_vm_size_t compressed_bytes;
12516 vm_map_address_t va;
5ba3f43e
A
12517
12518
12519 if (pmap == PMAP_NULL) {
12520 if (compressed_bytes_p) {
12521 *compressed_bytes_p = 0;
12522 }
12523 return 0;
12524 }
12525
cb323159
A
12526 __unused const pt_attr_t * const pt_attr = pmap_get_pt_attr(pmap);
12527
d9a64523 12528 total_resident_bytes = 0;
5ba3f43e
A
12529 compressed_bytes = 0;
12530
d9a64523 12531 PMAP_TRACE(3, PMAP_CODE(PMAP__QUERY_RESIDENT) | DBG_FUNC_START,
0a7de745
A
12532 VM_KERNEL_ADDRHIDE(pmap), VM_KERNEL_ADDRHIDE(start),
12533 VM_KERNEL_ADDRHIDE(end));
5ba3f43e
A
12534
12535 va = start;
12536 while (va < end) {
12537 vm_map_address_t l;
d9a64523 12538 mach_vm_size_t resident_bytes;
5ba3f43e 12539
cb323159 12540 l = ((va + pt_attr_twig_size(pt_attr)) & ~pt_attr_twig_offmask(pt_attr));
5ba3f43e 12541
0a7de745 12542 if (l > end) {
5ba3f43e 12543 l = end;
0a7de745 12544 }
c6bf4f31
A
12545#if XNU_MONITOR
12546 resident_bytes = pmap_query_resident_ppl(pmap, va, l, compressed_bytes_p);
12547#else
d9a64523 12548 resident_bytes = pmap_query_resident_internal(pmap, va, l, compressed_bytes_p);
c6bf4f31 12549#endif
0a7de745 12550 if (resident_bytes == PMAP_RESIDENT_INVALID) {
5ba3f43e 12551 break;
0a7de745 12552 }
d9a64523
A
12553
12554 total_resident_bytes += resident_bytes;
5ba3f43e
A
12555
12556 va = l;
12557 }
12558
12559 if (compressed_bytes_p) {
12560 *compressed_bytes_p = compressed_bytes;
12561 }
12562
d9a64523 12563 PMAP_TRACE(3, PMAP_CODE(PMAP__QUERY_RESIDENT) | DBG_FUNC_END,
0a7de745 12564 total_resident_bytes);
5ba3f43e 12565
d9a64523 12566 return total_resident_bytes;
5ba3f43e
A
12567}
12568
12569#if MACH_ASSERT
5ba3f43e
A
12570static void
12571pmap_check_ledgers(
12572 pmap_t pmap)
12573{
cb323159
A
12574 int pid;
12575 char *procname;
5ba3f43e
A
12576
12577 if (pmap->pmap_pid == 0) {
12578 /*
12579 * This pmap was not or is no longer fully associated
12580 * with a task (e.g. the old pmap after a fork()/exec() or
12581 * spawn()). Its "ledger" still points at a task that is
12582 * now using a different (and active) address space, so
12583 * we can't check that all the pmap ledgers are balanced here.
12584 *
12585 * If the "pid" is set, that means that we went through
12586 * pmap_set_process() in task_terminate_internal(), so
12587 * this task's ledger should not have been re-used and
12588 * all the pmap ledgers should be back to 0.
12589 */
12590 return;
12591 }
12592
5ba3f43e
A
12593 pid = pmap->pmap_pid;
12594 procname = pmap->pmap_procname;
12595
cb323159 12596 vm_map_pmap_check_ledgers(pmap, pmap->ledger, pid, procname);
5ba3f43e 12597
5c9f4661 12598 PMAP_STATS_ASSERTF(pmap->stats.resident_count == 0, pmap, "stats.resident_count %d", pmap->stats.resident_count);
5ba3f43e 12599#if 00
5c9f4661 12600 PMAP_STATS_ASSERTF(pmap->stats.wired_count == 0, pmap, "stats.wired_count %d", pmap->stats.wired_count);
5ba3f43e 12601#endif
5c9f4661
A
12602 PMAP_STATS_ASSERTF(pmap->stats.device == 0, pmap, "stats.device %d", pmap->stats.device);
12603 PMAP_STATS_ASSERTF(pmap->stats.internal == 0, pmap, "stats.internal %d", pmap->stats.internal);
12604 PMAP_STATS_ASSERTF(pmap->stats.external == 0, pmap, "stats.external %d", pmap->stats.external);
12605 PMAP_STATS_ASSERTF(pmap->stats.reusable == 0, pmap, "stats.reusable %d", pmap->stats.reusable);
12606 PMAP_STATS_ASSERTF(pmap->stats.compressed == 0, pmap, "stats.compressed %lld", pmap->stats.compressed);
5ba3f43e
A
12607}
12608#endif /* MACH_ASSERT */
12609
0a7de745
A
12610void
12611pmap_advise_pagezero_range(__unused pmap_t p, __unused uint64_t a)
12612{
5ba3f43e
A
12613}
12614
12615
12616#if CONFIG_PGTRACE
12617#define PROF_START uint64_t t, nanot;\
0a7de745 12618 t = mach_absolute_time();
5ba3f43e
A
12619
12620#define PROF_END absolutetime_to_nanoseconds(mach_absolute_time()-t, &nanot);\
0a7de745 12621 kprintf("%s: took %llu ns\n", __func__, nanot);
5ba3f43e
A
12622
12623#define PMAP_PGTRACE_LOCK(p) \
12624 do { \
0a7de745
A
12625 *(p) = ml_set_interrupts_enabled(false); \
12626 if (simple_lock_try(&(pmap_pgtrace.lock), LCK_GRP_NULL)) break; \
12627 ml_set_interrupts_enabled(*(p)); \
5ba3f43e
A
12628 } while (true)
12629
12630#define PMAP_PGTRACE_UNLOCK(p) \
12631 do { \
0a7de745
A
12632 simple_unlock(&(pmap_pgtrace.lock)); \
12633 ml_set_interrupts_enabled(*(p)); \
5ba3f43e
A
12634 } while (0)
12635
12636#define PGTRACE_WRITE_PTE(pte_p, pte_entry) \
12637 do { \
0a7de745
A
12638 *(pte_p) = (pte_entry); \
12639 FLUSH_PTE(pte_p); \
5ba3f43e
A
12640 } while (0)
12641
12642#define PGTRACE_MAX_MAP 16 // maximum supported va to same pa
12643
12644typedef enum {
0a7de745
A
12645 UNDEFINED,
12646 PA_UNDEFINED,
12647 VA_UNDEFINED,
12648 DEFINED
5ba3f43e
A
12649} pmap_pgtrace_page_state_t;
12650
12651typedef struct {
0a7de745
A
12652 queue_chain_t chain;
12653
12654 /*
12655 * pa - pa
12656 * maps - list of va maps to upper pa
12657 * map_pool - map pool
12658 * map_waste - waste can
12659 * state - state
12660 */
12661 pmap_paddr_t pa;
12662 queue_head_t maps;
12663 queue_head_t map_pool;
12664 queue_head_t map_waste;
12665 pmap_pgtrace_page_state_t state;
5ba3f43e
A
12666} pmap_pgtrace_page_t;
12667
12668static struct {
0a7de745
A
12669 /*
12670 * pages - list of tracing page info
12671 */
12672 queue_head_t pages;
12673 decl_simple_lock_data(, lock);
5ba3f43e
A
12674} pmap_pgtrace = {};
12675
0a7de745
A
12676static void
12677pmap_pgtrace_init(void)
5ba3f43e 12678{
0a7de745
A
12679 queue_init(&(pmap_pgtrace.pages));
12680 simple_lock_init(&(pmap_pgtrace.lock), 0);
5ba3f43e 12681
0a7de745 12682 boolean_t enabled;
5ba3f43e 12683
0a7de745
A
12684 if (PE_parse_boot_argn("pgtrace", &enabled, sizeof(enabled))) {
12685 pgtrace_enabled = enabled;
12686 }
5ba3f43e
A
12687}
12688
12689// find a page with given pa - pmap_pgtrace should be locked
0a7de745
A
12690inline static pmap_pgtrace_page_t *
12691pmap_pgtrace_find_page(pmap_paddr_t pa)
5ba3f43e 12692{
0a7de745
A
12693 queue_head_t *q = &(pmap_pgtrace.pages);
12694 pmap_pgtrace_page_t *p;
5ba3f43e 12695
0a7de745
A
12696 queue_iterate(q, p, pmap_pgtrace_page_t *, chain) {
12697 if (p->state == UNDEFINED) {
12698 continue;
12699 }
12700 if (p->state == PA_UNDEFINED) {
12701 continue;
12702 }
12703 if (p->pa == pa) {
12704 return p;
12705 }
12706 }
5ba3f43e 12707
0a7de745 12708 return NULL;
5ba3f43e
A
12709}
12710
12711// enter clone of given pmap, va page and range - pmap should be locked
0a7de745
A
12712static bool
12713pmap_pgtrace_enter_clone(pmap_t pmap, vm_map_offset_t va_page, vm_map_offset_t start, vm_map_offset_t end)
12714{
12715 bool ints;
12716 queue_head_t *q = &(pmap_pgtrace.pages);
12717 pmap_paddr_t pa_page;
12718 pt_entry_t *ptep, *cptep;
12719 pmap_pgtrace_page_t *p;
12720 bool found = false;
12721
12722 PMAP_ASSERT_LOCKED(pmap);
12723 assert(va_page == arm_trunc_page(va_page));
12724
12725 PMAP_PGTRACE_LOCK(&ints);
12726
12727 ptep = pmap_pte(pmap, va_page);
12728
12729 // target pte should exist
12730 if (!ptep || !(*ptep & ARM_PTE_TYPE_VALID)) {
12731 PMAP_PGTRACE_UNLOCK(&ints);
12732 return false;
12733 }
12734
12735 queue_head_t *mapq;
12736 queue_head_t *mappool;
12737 pmap_pgtrace_map_t *map = NULL;
12738
12739 pa_page = pte_to_pa(*ptep);
12740
12741 // find if we have a page info defined for this
12742 queue_iterate(q, p, pmap_pgtrace_page_t *, chain) {
12743 mapq = &(p->maps);
12744 mappool = &(p->map_pool);
12745
12746 switch (p->state) {
12747 case PA_UNDEFINED:
12748 queue_iterate(mapq, map, pmap_pgtrace_map_t *, chain) {
12749 if (map->cloned == false && map->pmap == pmap && map->ova == va_page) {
12750 p->pa = pa_page;
12751 map->range.start = start;
12752 map->range.end = end;
12753 found = true;
12754 break;
12755 }
12756 }
12757 break;
12758
12759 case VA_UNDEFINED:
12760 if (p->pa != pa_page) {
12761 break;
12762 }
12763 queue_iterate(mapq, map, pmap_pgtrace_map_t *, chain) {
12764 if (map->cloned == false) {
12765 map->pmap = pmap;
12766 map->ova = va_page;
12767 map->range.start = start;
12768 map->range.end = end;
12769 found = true;
12770 break;
12771 }
12772 }
12773 break;
12774
12775 case DEFINED:
12776 if (p->pa != pa_page) {
12777 break;
12778 }
12779 queue_iterate(mapq, map, pmap_pgtrace_map_t *, chain) {
12780 if (map->cloned == true && map->pmap == pmap && map->ova == va_page) {
12781 kprintf("%s: skip existing mapping at va=%llx\n", __func__, va_page);
12782 break;
12783 } else if (map->cloned == true && map->pmap == kernel_pmap && map->cva[1] == va_page) {
12784 kprintf("%s: skip clone mapping at va=%llx\n", __func__, va_page);
12785 break;
12786 } else if (map->cloned == false && map->pmap == pmap && map->ova == va_page) {
12787 // range should be already defined as well
12788 found = true;
12789 break;
12790 }
12791 }
12792 break;
12793
12794 default:
12795 panic("invalid state p->state=%x\n", p->state);
12796 }
12797
12798 if (found == true) {
12799 break;
12800 }
12801 }
12802
12803 // do not clone if no page info found
12804 if (found == false) {
12805 PMAP_PGTRACE_UNLOCK(&ints);
12806 return false;
12807 }
12808
12809 // copy pre, target and post ptes to clone ptes
12810 for (int i = 0; i < 3; i++) {
12811 ptep = pmap_pte(pmap, va_page + (i - 1) * ARM_PGBYTES);
12812 cptep = pmap_pte(kernel_pmap, map->cva[i]);
12813 assert(cptep != NULL);
12814 if (ptep == NULL) {
12815 PGTRACE_WRITE_PTE(cptep, (pt_entry_t)NULL);
12816 } else {
12817 PGTRACE_WRITE_PTE(cptep, *ptep);
12818 }
cb323159 12819 PMAP_UPDATE_TLBS(kernel_pmap, map->cva[i], map->cva[i] + ARM_PGBYTES, false);
0a7de745
A
12820 }
12821
12822 // get ptes for original and clone
12823 ptep = pmap_pte(pmap, va_page);
12824 cptep = pmap_pte(kernel_pmap, map->cva[1]);
12825
12826 // invalidate original pte and mark it as a pgtrace page
12827 PGTRACE_WRITE_PTE(ptep, (*ptep | ARM_PTE_PGTRACE) & ~ARM_PTE_TYPE_VALID);
cb323159 12828 PMAP_UPDATE_TLBS(pmap, map->ova, map->ova + ARM_PGBYTES, false);
0a7de745
A
12829
12830 map->cloned = true;
12831 p->state = DEFINED;
12832
12833 kprintf("%s: pa_page=%llx va_page=%llx cva[1]=%llx pmap=%p ptep=%p cptep=%p\n", __func__, pa_page, va_page, map->cva[1], pmap, ptep, cptep);
12834
12835 PMAP_PGTRACE_UNLOCK(&ints);
12836
12837 return true;
5ba3f43e
A
12838}
12839
12840// This function removes trace bit and validate pte if applicable. Pmap must be locked.
0a7de745
A
12841static void
12842pmap_pgtrace_remove_clone(pmap_t pmap, pmap_paddr_t pa, vm_map_offset_t va)
12843{
12844 bool ints, found = false;
12845 pmap_pgtrace_page_t *p;
12846 pt_entry_t *ptep;
12847
12848 PMAP_PGTRACE_LOCK(&ints);
12849
12850 // we must have this page info
12851 p = pmap_pgtrace_find_page(pa);
12852 if (p == NULL) {
12853 goto unlock_exit;
12854 }
12855
12856 // find matching map
12857 queue_head_t *mapq = &(p->maps);
12858 queue_head_t *mappool = &(p->map_pool);
12859 pmap_pgtrace_map_t *map;
12860
12861 queue_iterate(mapq, map, pmap_pgtrace_map_t *, chain) {
12862 if (map->pmap == pmap && map->ova == va) {
12863 found = true;
12864 break;
12865 }
12866 }
12867
12868 if (!found) {
12869 goto unlock_exit;
12870 }
12871
12872 if (map->cloned == true) {
12873 // Restore back the pte to original state
12874 ptep = pmap_pte(pmap, map->ova);
12875 assert(ptep);
12876 PGTRACE_WRITE_PTE(ptep, *ptep | ARM_PTE_TYPE_VALID);
cb323159 12877 PMAP_UPDATE_TLBS(pmap, va, va + ARM_PGBYTES, false);
0a7de745
A
12878
12879 // revert clone pages
12880 for (int i = 0; i < 3; i++) {
12881 ptep = pmap_pte(kernel_pmap, map->cva[i]);
12882 assert(ptep != NULL);
12883 PGTRACE_WRITE_PTE(ptep, map->cva_spte[i]);
cb323159 12884 PMAP_UPDATE_TLBS(kernel_pmap, map->cva[i], map->cva[i] + ARM_PGBYTES, false);
0a7de745
A
12885 }
12886 }
12887
12888 queue_remove(mapq, map, pmap_pgtrace_map_t *, chain);
12889 map->pmap = NULL;
12890 map->ova = (vm_map_offset_t)NULL;
12891 map->cloned = false;
12892 queue_enter_first(mappool, map, pmap_pgtrace_map_t *, chain);
12893
12894 kprintf("%s: p=%p pa=%llx va=%llx\n", __func__, p, pa, va);
5ba3f43e
A
12895
12896unlock_exit:
0a7de745 12897 PMAP_PGTRACE_UNLOCK(&ints);
5ba3f43e
A
12898}
12899
12900// remove all clones of given pa - pmap must be locked
0a7de745
A
12901static void
12902pmap_pgtrace_remove_all_clone(pmap_paddr_t pa)
5ba3f43e 12903{
0a7de745
A
12904 bool ints;
12905 pmap_pgtrace_page_t *p;
12906 pt_entry_t *ptep;
5ba3f43e 12907
0a7de745 12908 PMAP_PGTRACE_LOCK(&ints);
5ba3f43e 12909
0a7de745
A
12910 // we must have this page info
12911 p = pmap_pgtrace_find_page(pa);
12912 if (p == NULL) {
12913 PMAP_PGTRACE_UNLOCK(&ints);
12914 return;
12915 }
5ba3f43e 12916
0a7de745
A
12917 queue_head_t *mapq = &(p->maps);
12918 queue_head_t *mappool = &(p->map_pool);
12919 queue_head_t *mapwaste = &(p->map_waste);
12920 pmap_pgtrace_map_t *map;
5ba3f43e 12921
0a7de745
A
12922 // move maps to waste
12923 while (!queue_empty(mapq)) {
12924 queue_remove_first(mapq, map, pmap_pgtrace_map_t *, chain);
12925 queue_enter_first(mapwaste, map, pmap_pgtrace_map_t*, chain);
12926 }
5ba3f43e 12927
0a7de745 12928 PMAP_PGTRACE_UNLOCK(&ints);
5ba3f43e 12929
0a7de745
A
12930 // sanitize maps in waste
12931 queue_iterate(mapwaste, map, pmap_pgtrace_map_t *, chain) {
12932 if (map->cloned == true) {
12933 PMAP_LOCK(map->pmap);
5ba3f43e 12934
0a7de745
A
12935 // restore back original pte
12936 ptep = pmap_pte(map->pmap, map->ova);
12937 assert(ptep);
12938 PGTRACE_WRITE_PTE(ptep, *ptep | ARM_PTE_TYPE_VALID);
cb323159 12939 PMAP_UPDATE_TLBS(map->pmap, map->ova, map->ova + ARM_PGBYTES, false);
5ba3f43e 12940
0a7de745
A
12941 // revert clone ptes
12942 for (int i = 0; i < 3; i++) {
12943 ptep = pmap_pte(kernel_pmap, map->cva[i]);
12944 assert(ptep != NULL);
12945 PGTRACE_WRITE_PTE(ptep, map->cva_spte[i]);
cb323159 12946 PMAP_UPDATE_TLBS(kernel_pmap, map->cva[i], map->cva[i] + ARM_PGBYTES, false);
0a7de745 12947 }
5ba3f43e 12948
0a7de745
A
12949 PMAP_UNLOCK(map->pmap);
12950 }
5ba3f43e 12951
0a7de745
A
12952 map->pmap = NULL;
12953 map->ova = (vm_map_offset_t)NULL;
12954 map->cloned = false;
12955 }
5ba3f43e 12956
0a7de745 12957 PMAP_PGTRACE_LOCK(&ints);
5ba3f43e 12958
0a7de745
A
12959 // recycle maps back to map_pool
12960 while (!queue_empty(mapwaste)) {
12961 queue_remove_first(mapwaste, map, pmap_pgtrace_map_t *, chain);
12962 queue_enter_first(mappool, map, pmap_pgtrace_map_t*, chain);
12963 }
5ba3f43e 12964
0a7de745 12965 PMAP_PGTRACE_UNLOCK(&ints);
5ba3f43e
A
12966}
12967
0a7de745
A
12968inline static void
12969pmap_pgtrace_get_search_space(pmap_t pmap, vm_map_offset_t *startp, vm_map_offset_t *endp)
5ba3f43e 12970{
0a7de745
A
12971 uint64_t tsz;
12972 vm_map_offset_t end;
5ba3f43e 12973
0a7de745
A
12974 if (pmap == kernel_pmap) {
12975 tsz = (get_tcr() >> TCR_T1SZ_SHIFT) & TCR_TSZ_MASK;
12976 *startp = MAX(VM_MIN_KERNEL_ADDRESS, (UINT64_MAX >> (64 - tsz)) << (64 - tsz));
12977 *endp = VM_MAX_KERNEL_ADDRESS;
12978 } else {
12979 tsz = (get_tcr() >> TCR_T0SZ_SHIFT) & TCR_TSZ_MASK;
12980 if (tsz == 64) {
12981 end = 0;
12982 } else {
12983 end = ((uint64_t)1 << (64 - tsz)) - 1;
12984 }
5ba3f43e 12985
0a7de745
A
12986 *startp = 0;
12987 *endp = end;
12988 }
5ba3f43e 12989
0a7de745 12990 assert(*endp > *startp);
5ba3f43e 12991
0a7de745 12992 return;
5ba3f43e
A
12993}
12994
12995// has pa mapped in given pmap? then clone it
0a7de745
A
12996static uint64_t
12997pmap_pgtrace_clone_from_pa(pmap_t pmap, pmap_paddr_t pa, vm_map_offset_t start_offset, vm_map_offset_t end_offset)
12998{
12999 uint64_t ret = 0;
13000 vm_map_offset_t min, max;
13001 vm_map_offset_t cur_page, end_page;
13002 pt_entry_t *ptep;
13003 tt_entry_t *ttep;
13004 tt_entry_t tte;
cb323159 13005 __unused const pt_attr_t * const pt_attr = pmap_get_pt_attr(pmap);
0a7de745
A
13006
13007 pmap_pgtrace_get_search_space(pmap, &min, &max);
13008
13009 cur_page = arm_trunc_page(min);
13010 end_page = arm_trunc_page(max);
13011 while (cur_page <= end_page) {
13012 vm_map_offset_t add = 0;
13013
13014 PMAP_LOCK(pmap);
13015
13016 // skip uninterested space
13017 if (pmap == kernel_pmap &&
13018 ((vm_kernel_base <= cur_page && cur_page < vm_kernel_top) ||
13019 (vm_kext_base <= cur_page && cur_page < vm_kext_top))) {
13020 add = ARM_PGBYTES;
13021 goto unlock_continue;
13022 }
5ba3f43e 13023
0a7de745
A
13024 // check whether we can skip l1
13025 ttep = pmap_tt1e(pmap, cur_page);
13026 assert(ttep);
13027 tte = *ttep;
13028 if ((tte & (ARM_TTE_TYPE_MASK | ARM_TTE_VALID)) != (ARM_TTE_TYPE_TABLE | ARM_TTE_VALID)) {
13029 add = ARM_TT_L1_SIZE;
13030 goto unlock_continue;
13031 }
13032
13033 // how about l2
cb323159
A
13034 tte = ((tt_entry_t*) phystokv(tte & ARM_TTE_TABLE_MASK))[tt2_index(pmap, pt_attr, cur_page)];
13035
0a7de745
A
13036 if ((tte & (ARM_TTE_TYPE_MASK | ARM_TTE_VALID)) != (ARM_TTE_TYPE_TABLE | ARM_TTE_VALID)) {
13037 add = ARM_TT_L2_SIZE;
13038 goto unlock_continue;
13039 }
13040
13041 // ptep finally
cb323159 13042 ptep = &(((pt_entry_t*) phystokv(tte & ARM_TTE_TABLE_MASK))[tt3_index(pmap, pt_attr, cur_page)]);
0a7de745
A
13043 if (ptep == PT_ENTRY_NULL) {
13044 add = ARM_TT_L3_SIZE;
13045 goto unlock_continue;
13046 }
13047
13048 if (arm_trunc_page(pa) == pte_to_pa(*ptep)) {
13049 if (pmap_pgtrace_enter_clone(pmap, cur_page, start_offset, end_offset) == true) {
13050 ret++;
13051 }
13052 }
13053
13054 add = ARM_PGBYTES;
5ba3f43e
A
13055
13056unlock_continue:
0a7de745 13057 PMAP_UNLOCK(pmap);
5ba3f43e 13058
0a7de745
A
13059 //overflow
13060 if (cur_page + add < cur_page) {
13061 break;
13062 }
5ba3f43e 13063
0a7de745
A
13064 cur_page += add;
13065 }
5ba3f43e
A
13066
13067
0a7de745 13068 return ret;
5ba3f43e
A
13069}
13070
13071// search pv table and clone vas of given pa
0a7de745
A
13072static uint64_t
13073pmap_pgtrace_clone_from_pvtable(pmap_paddr_t pa, vm_map_offset_t start_offset, vm_map_offset_t end_offset)
5ba3f43e 13074{
0a7de745
A
13075 uint64_t ret = 0;
13076 unsigned long pai;
13077 pv_entry_t **pvh;
13078 pt_entry_t *ptep;
13079 pmap_t pmap;
5ba3f43e 13080
0a7de745
A
13081 typedef struct {
13082 queue_chain_t chain;
13083 pmap_t pmap;
13084 vm_map_offset_t va;
13085 } pmap_va_t;
5ba3f43e 13086
0a7de745
A
13087 queue_head_t pmapvaq;
13088 pmap_va_t *pmapva;
5ba3f43e 13089
0a7de745 13090 queue_init(&pmapvaq);
5ba3f43e 13091
0a7de745
A
13092 pai = pa_index(pa);
13093 LOCK_PVH(pai);
13094 pvh = pai_to_pvh(pai);
5ba3f43e 13095
0a7de745
A
13096 // collect pmap/va pair from pvh
13097 if (pvh_test_type(pvh, PVH_TYPE_PTEP)) {
13098 ptep = pvh_ptep(pvh);
13099 pmap = ptep_get_pmap(ptep);
5ba3f43e 13100
0a7de745
A
13101 pmapva = (pmap_va_t *)kalloc(sizeof(pmap_va_t));
13102 pmapva->pmap = pmap;
13103 pmapva->va = ptep_get_va(ptep);
5ba3f43e 13104
0a7de745
A
13105 queue_enter_first(&pmapvaq, pmapva, pmap_va_t *, chain);
13106 } else if (pvh_test_type(pvh, PVH_TYPE_PVEP)) {
13107 pv_entry_t *pvep;
5ba3f43e 13108
0a7de745
A
13109 pvep = pvh_list(pvh);
13110 while (pvep) {
13111 ptep = pve_get_ptep(pvep);
13112 pmap = ptep_get_pmap(ptep);
5ba3f43e 13113
0a7de745
A
13114 pmapva = (pmap_va_t *)kalloc(sizeof(pmap_va_t));
13115 pmapva->pmap = pmap;
13116 pmapva->va = ptep_get_va(ptep);
5ba3f43e 13117
0a7de745 13118 queue_enter_first(&pmapvaq, pmapva, pmap_va_t *, chain);
5ba3f43e 13119
0a7de745
A
13120 pvep = PVE_NEXT_PTR(pve_next(pvep));
13121 }
13122 }
5ba3f43e 13123
0a7de745 13124 UNLOCK_PVH(pai);
5ba3f43e 13125
0a7de745
A
13126 // clone them while making sure mapping still exists
13127 queue_iterate(&pmapvaq, pmapva, pmap_va_t *, chain) {
13128 PMAP_LOCK(pmapva->pmap);
13129 ptep = pmap_pte(pmapva->pmap, pmapva->va);
13130 if (pte_to_pa(*ptep) == pa) {
13131 if (pmap_pgtrace_enter_clone(pmapva->pmap, pmapva->va, start_offset, end_offset) == true) {
13132 ret++;
13133 }
13134 }
13135 PMAP_UNLOCK(pmapva->pmap);
5ba3f43e 13136
0a7de745
A
13137 kfree(pmapva, sizeof(pmap_va_t));
13138 }
5ba3f43e 13139
0a7de745 13140 return ret;
5ba3f43e
A
13141}
13142
13143// allocate a page info
0a7de745
A
13144static pmap_pgtrace_page_t *
13145pmap_pgtrace_alloc_page(void)
13146{
13147 pmap_pgtrace_page_t *p;
13148 queue_head_t *mapq;
13149 queue_head_t *mappool;
13150 queue_head_t *mapwaste;
13151 pmap_pgtrace_map_t *map;
13152
13153 p = kalloc(sizeof(pmap_pgtrace_page_t));
13154 assert(p);
13155
13156 p->state = UNDEFINED;
13157
13158 mapq = &(p->maps);
13159 mappool = &(p->map_pool);
13160 mapwaste = &(p->map_waste);
13161 queue_init(mapq);
13162 queue_init(mappool);
13163 queue_init(mapwaste);
13164
13165 for (int i = 0; i < PGTRACE_MAX_MAP; i++) {
13166 vm_map_offset_t newcva;
13167 pt_entry_t *cptep;
13168 kern_return_t kr;
13169 vm_map_entry_t entry;
13170
13171 // get a clone va
13172 vm_object_reference(kernel_object);
13173 kr = vm_map_find_space(kernel_map, &newcva, vm_map_round_page(3 * ARM_PGBYTES, PAGE_MASK), 0, 0, VM_MAP_KERNEL_FLAGS_NONE, VM_KERN_MEMORY_DIAG, &entry);
13174 if (kr != KERN_SUCCESS) {
13175 panic("%s VM couldn't find any space kr=%d\n", __func__, kr);
13176 }
13177 VME_OBJECT_SET(entry, kernel_object);
13178 VME_OFFSET_SET(entry, newcva);
13179 vm_map_unlock(kernel_map);
13180
13181 // fill default clone page info and add to pool
13182 map = kalloc(sizeof(pmap_pgtrace_map_t));
13183 for (int j = 0; j < 3; j++) {
13184 vm_map_offset_t addr = newcva + j * ARM_PGBYTES;
13185
13186 // pre-expand pmap while preemption enabled
13187 kr = pmap_expand(kernel_pmap, addr, 0, PMAP_TT_MAX_LEVEL);
13188 if (kr != KERN_SUCCESS) {
13189 panic("%s: pmap_expand(kernel_pmap, addr=%llx) returns kr=%d\n", __func__, addr, kr);
13190 }
13191
13192 cptep = pmap_pte(kernel_pmap, addr);
13193 assert(cptep != NULL);
13194
13195 map->cva[j] = addr;
13196 map->cva_spte[j] = *cptep;
13197 }
13198 map->range.start = map->range.end = 0;
13199 map->cloned = false;
13200 queue_enter_first(mappool, map, pmap_pgtrace_map_t *, chain);
13201 }
13202
13203 return p;
5ba3f43e
A
13204}
13205
13206// free a page info
0a7de745
A
13207static void
13208pmap_pgtrace_free_page(pmap_pgtrace_page_t *p)
5ba3f43e 13209{
0a7de745
A
13210 queue_head_t *mapq;
13211 queue_head_t *mappool;
13212 queue_head_t *mapwaste;
13213 pmap_pgtrace_map_t *map;
5ba3f43e 13214
0a7de745 13215 assert(p);
5ba3f43e 13216
0a7de745
A
13217 mapq = &(p->maps);
13218 mappool = &(p->map_pool);
13219 mapwaste = &(p->map_waste);
5ba3f43e 13220
0a7de745
A
13221 while (!queue_empty(mapq)) {
13222 queue_remove_first(mapq, map, pmap_pgtrace_map_t *, chain);
13223 kfree(map, sizeof(pmap_pgtrace_map_t));
13224 }
5ba3f43e 13225
0a7de745
A
13226 while (!queue_empty(mappool)) {
13227 queue_remove_first(mappool, map, pmap_pgtrace_map_t *, chain);
13228 kfree(map, sizeof(pmap_pgtrace_map_t));
13229 }
5ba3f43e 13230
0a7de745
A
13231 while (!queue_empty(mapwaste)) {
13232 queue_remove_first(mapwaste, map, pmap_pgtrace_map_t *, chain);
13233 kfree(map, sizeof(pmap_pgtrace_map_t));
13234 }
5ba3f43e 13235
0a7de745 13236 kfree(p, sizeof(pmap_pgtrace_page_t));
5ba3f43e
A
13237}
13238
13239// construct page infos with the given address range
0a7de745
A
13240int
13241pmap_pgtrace_add_page(pmap_t pmap, vm_map_offset_t start, vm_map_offset_t end)
13242{
13243 int ret = 0;
13244 pt_entry_t *ptep;
13245 queue_head_t *q = &(pmap_pgtrace.pages);
13246 bool ints;
13247 vm_map_offset_t cur_page, end_page;
13248
13249 if (start > end) {
13250 kprintf("%s: invalid start=%llx > end=%llx\n", __func__, start, end);
13251 return -1;
13252 }
13253
13254 PROF_START
13255
13256 // add each page in given range
13257 cur_page = arm_trunc_page(start);
13258 end_page = arm_trunc_page(end);
13259 while (cur_page <= end_page) {
13260 pmap_paddr_t pa_page = 0;
13261 uint64_t num_cloned = 0;
13262 pmap_pgtrace_page_t *p = NULL, *newp;
13263 bool free_newp = true;
13264 pmap_pgtrace_page_state_t state;
13265
13266 // do all allocations outside of spinlocks
13267 newp = pmap_pgtrace_alloc_page();
13268
13269 // keep lock orders in pmap, kernel_pmap and pgtrace lock
13270 if (pmap != NULL) {
13271 PMAP_LOCK(pmap);
13272 }
13273 if (pmap != kernel_pmap) {
13274 PMAP_LOCK(kernel_pmap);
13275 }
13276
13277 // addresses are physical if pmap is null
13278 if (pmap == NULL) {
13279 ptep = NULL;
13280 pa_page = cur_page;
13281 state = VA_UNDEFINED;
13282 } else {
13283 ptep = pmap_pte(pmap, cur_page);
13284 if (ptep != NULL) {
13285 pa_page = pte_to_pa(*ptep);
13286 state = DEFINED;
13287 } else {
13288 state = PA_UNDEFINED;
13289 }
13290 }
13291
13292 // search if we have a page info already
13293 PMAP_PGTRACE_LOCK(&ints);
13294 if (state != PA_UNDEFINED) {
13295 p = pmap_pgtrace_find_page(pa_page);
13296 }
13297
13298 // add pre-allocated page info if nothing found
13299 if (p == NULL) {
13300 queue_enter_first(q, newp, pmap_pgtrace_page_t *, chain);
13301 p = newp;
13302 free_newp = false;
13303 }
13304
13305 // now p points what we want
13306 p->state = state;
13307
13308 queue_head_t *mapq = &(p->maps);
13309 queue_head_t *mappool = &(p->map_pool);
13310 pmap_pgtrace_map_t *map;
13311 vm_map_offset_t start_offset, end_offset;
13312
13313 // calculate trace offsets in the page
13314 if (cur_page > start) {
13315 start_offset = 0;
13316 } else {
13317 start_offset = start - cur_page;
13318 }
13319 if (cur_page == end_page) {
13320 end_offset = end - end_page;
13321 } else {
13322 end_offset = ARM_PGBYTES - 1;
13323 }
13324
13325 kprintf("%s: pmap=%p cur_page=%llx ptep=%p state=%d start_offset=%llx end_offset=%llx\n", __func__, pmap, cur_page, ptep, state, start_offset, end_offset);
13326
13327 // fill map info
13328 assert(!queue_empty(mappool));
13329 queue_remove_first(mappool, map, pmap_pgtrace_map_t *, chain);
13330 if (p->state == PA_UNDEFINED) {
13331 map->pmap = pmap;
13332 map->ova = cur_page;
13333 map->range.start = start_offset;
13334 map->range.end = end_offset;
13335 } else if (p->state == VA_UNDEFINED) {
13336 p->pa = pa_page;
13337 map->range.start = start_offset;
13338 map->range.end = end_offset;
13339 } else if (p->state == DEFINED) {
13340 p->pa = pa_page;
13341 map->pmap = pmap;
13342 map->ova = cur_page;
13343 map->range.start = start_offset;
13344 map->range.end = end_offset;
13345 } else {
13346 panic("invalid p->state=%d\n", p->state);
13347 }
13348
13349 // not cloned yet
13350 map->cloned = false;
13351 queue_enter(mapq, map, pmap_pgtrace_map_t *, chain);
13352
13353 // unlock locks
13354 PMAP_PGTRACE_UNLOCK(&ints);
13355 if (pmap != kernel_pmap) {
13356 PMAP_UNLOCK(kernel_pmap);
13357 }
13358 if (pmap != NULL) {
13359 PMAP_UNLOCK(pmap);
13360 }
13361
13362 // now clone it
13363 if (pa_valid(pa_page)) {
13364 num_cloned = pmap_pgtrace_clone_from_pvtable(pa_page, start_offset, end_offset);
13365 }
13366 if (pmap == NULL) {
13367 num_cloned += pmap_pgtrace_clone_from_pa(kernel_pmap, pa_page, start_offset, end_offset);
13368 } else {
13369 num_cloned += pmap_pgtrace_clone_from_pa(pmap, pa_page, start_offset, end_offset);
13370 }
13371
13372 // free pre-allocations if we didn't add it to the q
13373 if (free_newp) {
13374 pmap_pgtrace_free_page(newp);
13375 }
13376
13377 if (num_cloned == 0) {
13378 kprintf("%s: no mapping found for pa_page=%llx but will be added when a page entered\n", __func__, pa_page);
13379 }
13380
13381 ret += num_cloned;
13382
13383 // overflow
13384 if (cur_page + ARM_PGBYTES < cur_page) {
13385 break;
13386 } else {
13387 cur_page += ARM_PGBYTES;
13388 }
13389 }
13390
13391 PROF_END
13392
13393 return ret;
5ba3f43e
A
13394}
13395
13396// delete page infos for given address range
0a7de745
A
13397int
13398pmap_pgtrace_delete_page(pmap_t pmap, vm_map_offset_t start, vm_map_offset_t end)
13399{
13400 int ret = 0;
13401 bool ints;
13402 queue_head_t *q = &(pmap_pgtrace.pages);
13403 pmap_pgtrace_page_t *p;
13404 vm_map_offset_t cur_page, end_page;
13405
13406 kprintf("%s start=%llx end=%llx\n", __func__, start, end);
13407
13408 PROF_START
13409
13410 pt_entry_t *ptep;
13411 pmap_paddr_t pa_page;
13412
13413 // remove page info from start to end
13414 cur_page = arm_trunc_page(start);
13415 end_page = arm_trunc_page(end);
13416 while (cur_page <= end_page) {
13417 p = NULL;
13418
13419 if (pmap == NULL) {
13420 pa_page = cur_page;
13421 } else {
13422 PMAP_LOCK(pmap);
13423 ptep = pmap_pte(pmap, cur_page);
13424 if (ptep == NULL) {
13425 PMAP_UNLOCK(pmap);
13426 goto cont;
13427 }
13428 pa_page = pte_to_pa(*ptep);
13429 PMAP_UNLOCK(pmap);
13430 }
13431
13432 // remove all clones and validate
13433 pmap_pgtrace_remove_all_clone(pa_page);
13434
13435 // find page info and delete
13436 PMAP_PGTRACE_LOCK(&ints);
13437 p = pmap_pgtrace_find_page(pa_page);
13438 if (p != NULL) {
13439 queue_remove(q, p, pmap_pgtrace_page_t *, chain);
13440 ret++;
13441 }
13442 PMAP_PGTRACE_UNLOCK(&ints);
13443
13444 // free outside of locks
13445 if (p != NULL) {
13446 pmap_pgtrace_free_page(p);
13447 }
5ba3f43e
A
13448
13449cont:
0a7de745
A
13450 // overflow
13451 if (cur_page + ARM_PGBYTES < cur_page) {
13452 break;
13453 } else {
13454 cur_page += ARM_PGBYTES;
13455 }
13456 }
5ba3f43e 13457
0a7de745 13458 PROF_END
5ba3f43e 13459
0a7de745 13460 return ret;
5ba3f43e
A
13461}
13462
0a7de745
A
13463kern_return_t
13464pmap_pgtrace_fault(pmap_t pmap, vm_map_offset_t va, arm_saved_state_t *ss)
5ba3f43e 13465{
0a7de745
A
13466 pt_entry_t *ptep;
13467 pgtrace_run_result_t res;
13468 pmap_pgtrace_page_t *p;
13469 bool ints, found = false;
13470 pmap_paddr_t pa;
5ba3f43e 13471
0a7de745
A
13472 // Quick check if we are interested
13473 ptep = pmap_pte(pmap, va);
13474 if (!ptep || !(*ptep & ARM_PTE_PGTRACE)) {
13475 return KERN_FAILURE;
13476 }
5ba3f43e 13477
0a7de745 13478 PMAP_PGTRACE_LOCK(&ints);
5ba3f43e 13479
0a7de745
A
13480 // Check again since access is serialized
13481 ptep = pmap_pte(pmap, va);
13482 if (!ptep || !(*ptep & ARM_PTE_PGTRACE)) {
13483 PMAP_PGTRACE_UNLOCK(&ints);
13484 return KERN_FAILURE;
13485 } else if ((*ptep & ARM_PTE_TYPE_VALID) == ARM_PTE_TYPE_VALID) {
13486 // Somehow this cpu's tlb has not updated
13487 kprintf("%s Somehow this cpu's tlb has not updated?\n", __func__);
cb323159 13488 PMAP_UPDATE_TLBS(pmap, va, va + ARM_PGBYTES, false);
5ba3f43e 13489
0a7de745
A
13490 PMAP_PGTRACE_UNLOCK(&ints);
13491 return KERN_SUCCESS;
13492 }
5ba3f43e 13493
0a7de745
A
13494 // Find if this pa is what we are tracing
13495 pa = pte_to_pa(*ptep);
5ba3f43e 13496
0a7de745
A
13497 p = pmap_pgtrace_find_page(arm_trunc_page(pa));
13498 if (p == NULL) {
13499 panic("%s Can't find va=%llx pa=%llx from tracing pages\n", __func__, va, pa);
13500 }
5ba3f43e 13501
0a7de745
A
13502 // find if pmap and va are also matching
13503 queue_head_t *mapq = &(p->maps);
13504 queue_head_t *mapwaste = &(p->map_waste);
13505 pmap_pgtrace_map_t *map;
5ba3f43e 13506
0a7de745
A
13507 queue_iterate(mapq, map, pmap_pgtrace_map_t *, chain) {
13508 if (map->pmap == pmap && map->ova == arm_trunc_page(va)) {
13509 found = true;
13510 break;
13511 }
13512 }
5ba3f43e 13513
0a7de745
A
13514 // if not found, search map waste as they are still valid
13515 if (!found) {
13516 queue_iterate(mapwaste, map, pmap_pgtrace_map_t *, chain) {
13517 if (map->pmap == pmap && map->ova == arm_trunc_page(va)) {
13518 found = true;
13519 break;
13520 }
13521 }
13522 }
5ba3f43e 13523
0a7de745
A
13524 if (!found) {
13525 panic("%s Can't find va=%llx pa=%llx from tracing pages\n", __func__, va, pa);
13526 }
5ba3f43e 13527
0a7de745
A
13528 // Decode and run it on the clone map
13529 bzero(&res, sizeof(res));
13530 pgtrace_decode_and_run(*(uint32_t *)get_saved_state_pc(ss), // instruction
13531 va, map->cva, // fault va and clone page vas
13532 ss, &res);
5ba3f43e 13533
0a7de745
A
13534 // write a log if in range
13535 vm_map_offset_t offset = va - map->ova;
13536 if (map->range.start <= offset && offset <= map->range.end) {
13537 pgtrace_write_log(res);
13538 }
5ba3f43e 13539
0a7de745 13540 PMAP_PGTRACE_UNLOCK(&ints);
5ba3f43e 13541
0a7de745 13542 // Return to next instruction
cb323159 13543 add_saved_state_pc(ss, sizeof(uint32_t));
5ba3f43e 13544
0a7de745 13545 return KERN_SUCCESS;
5ba3f43e
A
13546}
13547#endif
13548
13549boolean_t
13550pmap_enforces_execute_only(
13551#if (__ARM_VMSA__ == 7)
13552 __unused
13553#endif
13554 pmap_t pmap)
13555{
13556#if (__ARM_VMSA__ > 7)
0a7de745 13557 return pmap != kernel_pmap;
5ba3f43e
A
13558#else
13559 return FALSE;
13560#endif
13561}
13562
d9a64523
A
13563MARK_AS_PMAP_TEXT void
13564pmap_set_jit_entitled_internal(
5ba3f43e
A
13565 __unused pmap_t pmap)
13566{
13567 return;
13568}
13569
d9a64523
A
13570void
13571pmap_set_jit_entitled(
13572 pmap_t pmap)
13573{
c6bf4f31
A
13574#if XNU_MONITOR
13575 pmap_set_jit_entitled_ppl(pmap);
13576#else
d9a64523 13577 pmap_set_jit_entitled_internal(pmap);
c6bf4f31 13578#endif
d9a64523
A
13579}
13580
13581MARK_AS_PMAP_TEXT static kern_return_t
5ba3f43e 13582pmap_query_page_info_internal(
0a7de745
A
13583 pmap_t pmap,
13584 vm_map_offset_t va,
13585 int *disp_p)
5ba3f43e 13586{
d9a64523
A
13587 pmap_paddr_t pa;
13588 int disp;
13589 int pai;
13590 pt_entry_t *pte;
13591 pv_entry_t **pv_h, *pve_p;
5ba3f43e
A
13592
13593 if (pmap == PMAP_NULL || pmap == kernel_pmap) {
d9a64523 13594 pmap_pin_kernel_pages((vm_offset_t)disp_p, sizeof(*disp_p));
5ba3f43e 13595 *disp_p = 0;
d9a64523 13596 pmap_unpin_kernel_pages((vm_offset_t)disp_p, sizeof(*disp_p));
5ba3f43e
A
13597 return KERN_INVALID_ARGUMENT;
13598 }
13599
13600 disp = 0;
13601
d9a64523 13602 VALIDATE_PMAP(pmap);
5ba3f43e
A
13603 PMAP_LOCK(pmap);
13604
13605 pte = pmap_pte(pmap, va);
13606 if (pte == PT_ENTRY_NULL) {
13607 goto done;
13608 }
13609
13610 pa = pte_to_pa(*pte);
13611 if (pa == 0) {
cb323159 13612 if (ARM_PTE_IS_COMPRESSED(*pte, pte)) {
5ba3f43e
A
13613 disp |= PMAP_QUERY_PAGE_COMPRESSED;
13614 if (*pte & ARM_PTE_COMPRESSED_ALT) {
13615 disp |= PMAP_QUERY_PAGE_COMPRESSED_ALTACCT;
13616 }
13617 }
13618 } else {
13619 disp |= PMAP_QUERY_PAGE_PRESENT;
13620 pai = (int) pa_index(pa);
13621 if (!pa_valid(pa)) {
13622 goto done;
13623 }
13624 LOCK_PVH(pai);
13625 pv_h = pai_to_pvh(pai);
13626 pve_p = PV_ENTRY_NULL;
13627 if (pvh_test_type(pv_h, PVH_TYPE_PVEP)) {
13628 pve_p = pvh_list(pv_h);
13629 while (pve_p != PV_ENTRY_NULL &&
0a7de745 13630 pve_get_ptep(pve_p) != pte) {
a39ff7e2 13631 pve_p = PVE_NEXT_PTR(pve_next(pve_p));
5ba3f43e
A
13632 }
13633 }
13634 if (IS_ALTACCT_PAGE(pai, pve_p)) {
13635 disp |= PMAP_QUERY_PAGE_ALTACCT;
13636 } else if (IS_REUSABLE_PAGE(pai)) {
13637 disp |= PMAP_QUERY_PAGE_REUSABLE;
13638 } else if (IS_INTERNAL_PAGE(pai)) {
13639 disp |= PMAP_QUERY_PAGE_INTERNAL;
13640 }
13641 UNLOCK_PVH(pai);
13642 }
13643
13644done:
13645 PMAP_UNLOCK(pmap);
d9a64523 13646 pmap_pin_kernel_pages((vm_offset_t)disp_p, sizeof(*disp_p));
5ba3f43e 13647 *disp_p = disp;
d9a64523 13648 pmap_unpin_kernel_pages((vm_offset_t)disp_p, sizeof(*disp_p));
5ba3f43e
A
13649 return KERN_SUCCESS;
13650}
13651
13652kern_return_t
13653pmap_query_page_info(
0a7de745
A
13654 pmap_t pmap,
13655 vm_map_offset_t va,
13656 int *disp_p)
5ba3f43e 13657{
c6bf4f31
A
13658#if XNU_MONITOR
13659 return pmap_query_page_info_ppl(pmap, va, disp_p);
13660#else
5ba3f43e 13661 return pmap_query_page_info_internal(pmap, va, disp_p);
c6bf4f31 13662#endif
5ba3f43e
A
13663}
13664
d9a64523 13665MARK_AS_PMAP_TEXT kern_return_t
5ba3f43e
A
13666pmap_return_internal(__unused boolean_t do_panic, __unused boolean_t do_recurse)
13667{
13668
13669 return KERN_SUCCESS;
13670}
13671
13672kern_return_t
13673pmap_return(boolean_t do_panic, boolean_t do_recurse)
13674{
c6bf4f31
A
13675#if XNU_MONITOR
13676 return pmap_return_ppl(do_panic, do_recurse);
13677#else
5ba3f43e 13678 return pmap_return_internal(do_panic, do_recurse);
c6bf4f31 13679#endif
5ba3f43e
A
13680}
13681
d9a64523
A
13682
13683
cb323159 13684
d9a64523 13685MARK_AS_PMAP_TEXT static void
5ba3f43e 13686pmap_footprint_suspend_internal(
0a7de745
A
13687 vm_map_t map,
13688 boolean_t suspend)
5ba3f43e
A
13689{
13690#if DEVELOPMENT || DEBUG
13691 if (suspend) {
d9a64523 13692 current_thread()->pmap_footprint_suspended = TRUE;
5ba3f43e
A
13693 map->pmap->footprint_was_suspended = TRUE;
13694 } else {
d9a64523 13695 current_thread()->pmap_footprint_suspended = FALSE;
5ba3f43e
A
13696 }
13697#else /* DEVELOPMENT || DEBUG */
13698 (void) map;
13699 (void) suspend;
13700#endif /* DEVELOPMENT || DEBUG */
13701}
d9a64523 13702
5ba3f43e
A
13703void
13704pmap_footprint_suspend(
13705 vm_map_t map,
13706 boolean_t suspend)
13707{
c6bf4f31
A
13708#if XNU_MONITOR
13709 pmap_footprint_suspend_ppl(map, suspend);
13710#else
5ba3f43e 13711 pmap_footprint_suspend_internal(map, suspend);
c6bf4f31 13712#endif
5ba3f43e 13713}
d9a64523
A
13714
13715#if defined(__arm64__) && (DEVELOPMENT || DEBUG)
13716
d9a64523
A
13717struct page_table_dump_header {
13718 uint64_t pa;
13719 uint64_t num_entries;
13720 uint64_t start_va;
13721 uint64_t end_va;
13722};
13723
d9a64523 13724static size_t
cb323159
A
13725pmap_dump_page_tables_recurse(pmap_t pmap,
13726 const tt_entry_t *ttp,
0a7de745
A
13727 unsigned int cur_level,
13728 uint64_t start_va,
13729 void *bufp,
13730 void *buf_end)
d9a64523
A
13731{
13732 size_t bytes_used = 0;
13733 uint64_t num_entries = ARM_PGBYTES / sizeof(*ttp);
cb323159
A
13734 const pt_attr_t * const pt_attr = pmap_get_pt_attr(pmap);
13735
13736 uint64_t size = pt_attr->pta_level_info[cur_level].size;
13737 uint64_t valid_mask = pt_attr->pta_level_info[cur_level].valid_mask;
13738 uint64_t type_mask = pt_attr->pta_level_info[cur_level].type_mask;
13739 uint64_t type_block = pt_attr->pta_level_info[cur_level].type_block;
d9a64523 13740
0a7de745 13741 if (cur_level == arm64_root_pgtable_level) {
d9a64523 13742 num_entries = arm64_root_pgtable_num_ttes;
0a7de745 13743 }
d9a64523
A
13744
13745 uint64_t tt_size = num_entries * sizeof(tt_entry_t);
13746 const tt_entry_t *tt_end = &ttp[num_entries];
13747
13748 if (((vm_offset_t)buf_end - (vm_offset_t)bufp) < (tt_size + sizeof(struct page_table_dump_header))) {
13749 return 0;
13750 }
13751
13752 struct page_table_dump_header *header = (struct page_table_dump_header*)bufp;
13753 header->pa = ml_static_vtop((vm_offset_t)ttp);
13754 header->num_entries = num_entries;
13755 header->start_va = start_va;
13756 header->end_va = start_va + (num_entries * size);
13757
13758 bcopy(ttp, (uint8_t*)bufp + sizeof(*header), tt_size);
13759 bytes_used += (sizeof(*header) + tt_size);
13760 uint64_t current_va = start_va;
13761
13762 for (const tt_entry_t *ttep = ttp; ttep < tt_end; ttep++, current_va += size) {
13763 tt_entry_t tte = *ttep;
13764
13765 if (!(tte & valid_mask)) {
13766 continue;
13767 }
13768
13769 if ((tte & type_mask) == type_block) {
13770 continue;
13771 } else {
13772 if (cur_level >= PMAP_TT_MAX_LEVEL) {
13773 panic("%s: corrupt entry %#llx at %p, "
0a7de745
A
13774 "ttp=%p, cur_level=%u, bufp=%p, buf_end=%p",
13775 __FUNCTION__, tte, ttep,
13776 ttp, cur_level, bufp, buf_end);
d9a64523
A
13777 }
13778
13779 const tt_entry_t *next_tt = (const tt_entry_t*)phystokv(tte & ARM_TTE_TABLE_MASK);
13780
cb323159 13781 size_t recurse_result = pmap_dump_page_tables_recurse(pmap, next_tt, cur_level + 1, current_va, (uint8_t*)bufp + bytes_used, buf_end);
d9a64523
A
13782
13783 if (recurse_result == 0) {
13784 return 0;
13785 }
13786
13787 bytes_used += recurse_result;
13788 }
13789 }
13790
13791 return bytes_used;
13792}
13793
13794size_t
13795pmap_dump_page_tables(pmap_t pmap, void *bufp, void *buf_end)
13796{
0a7de745 13797 if (not_in_kdp) {
d9a64523 13798 panic("pmap_dump_page_tables must only be called from kernel debugger context");
0a7de745 13799 }
cb323159 13800 return pmap_dump_page_tables_recurse(pmap, pmap->tte, arm64_root_pgtable_level, pmap->min, bufp, buf_end);
d9a64523
A
13801}
13802
13803#else /* defined(__arm64__) && (DEVELOPMENT || DEBUG) */
13804
13805size_t
13806pmap_dump_page_tables(pmap_t pmap __unused, void *bufp __unused, void *buf_end __unused)
13807{
13808 return (size_t)-1;
13809}
13810
13811#endif /* !defined(__arm64__) */