]> git.saurik.com Git - apple/xnu.git/blob - osfmk/arm/pmap.c
xnu-6153.101.6.tar.gz
[apple/xnu.git] / osfmk / arm / pmap.c
1 /*
2 * Copyright (c) 2011-2019 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28 #include <string.h>
29 #include <mach_assert.h>
30 #include <mach_ldebug.h>
31
32 #include <mach/shared_region.h>
33 #include <mach/vm_param.h>
34 #include <mach/vm_prot.h>
35 #include <mach/vm_map.h>
36 #include <mach/machine/vm_param.h>
37 #include <mach/machine/vm_types.h>
38
39 #include <mach/boolean.h>
40 #include <kern/bits.h>
41 #include <kern/thread.h>
42 #include <kern/sched.h>
43 #include <kern/zalloc.h>
44 #include <kern/kalloc.h>
45 #include <kern/ledger.h>
46 #include <kern/spl.h>
47 #include <kern/trustcache.h>
48
49 #include <os/overflow.h>
50
51 #include <vm/pmap.h>
52 #include <vm/vm_map.h>
53 #include <vm/vm_kern.h>
54 #include <vm/vm_protos.h>
55 #include <vm/vm_object.h>
56 #include <vm/vm_page.h>
57 #include <vm/vm_pageout.h>
58 #include <vm/cpm.h>
59
60 #include <libkern/img4/interface.h>
61 #include <libkern/section_keywords.h>
62
63 #include <machine/atomic.h>
64 #include <machine/thread.h>
65 #include <machine/lowglobals.h>
66
67 #include <arm/caches_internal.h>
68 #include <arm/cpu_data.h>
69 #include <arm/cpu_data_internal.h>
70 #include <arm/cpu_capabilities.h>
71 #include <arm/cpu_number.h>
72 #include <arm/machine_cpu.h>
73 #include <arm/misc_protos.h>
74 #include <arm/trap.h>
75
76 #if (__ARM_VMSA__ > 7)
77 #include <arm64/proc_reg.h>
78 #include <pexpert/arm64/boot.h>
79 #if CONFIG_PGTRACE
80 #include <stdint.h>
81 #include <arm64/pgtrace.h>
82 #if CONFIG_PGTRACE_NONKEXT
83 #include <arm64/pgtrace_decoder.h>
84 #endif // CONFIG_PGTRACE_NONKEXT
85 #endif
86 #endif
87
88 #include <pexpert/device_tree.h>
89
90 #include <san/kasan.h>
91 #include <sys/cdefs.h>
92
93 #if defined(HAS_APPLE_PAC)
94 #include <ptrauth.h>
95 #endif
96
97 #define PMAP_TT_L0_LEVEL 0x0
98 #define PMAP_TT_L1_LEVEL 0x1
99 #define PMAP_TT_L2_LEVEL 0x2
100 #define PMAP_TT_L3_LEVEL 0x3
101 #if (__ARM_VMSA__ == 7)
102 #define PMAP_TT_MAX_LEVEL PMAP_TT_L2_LEVEL
103 #else
104 #define PMAP_TT_MAX_LEVEL PMAP_TT_L3_LEVEL
105 #endif
106 #define PMAP_TT_LEAF_LEVEL PMAP_TT_MAX_LEVEL
107 #define PMAP_TT_TWIG_LEVEL (PMAP_TT_MAX_LEVEL - 1)
108
109 static bool alloc_asid(pmap_t pmap);
110 static void free_asid(pmap_t pmap);
111 static void flush_mmu_tlb_region_asid_async(vm_offset_t va, unsigned length, pmap_t pmap);
112 static void flush_mmu_tlb_tte_asid_async(vm_offset_t va, pmap_t pmap);
113 static void flush_mmu_tlb_full_asid_async(pmap_t pmap);
114 static pt_entry_t wimg_to_pte(unsigned int wimg);
115
116 struct page_table_ops {
117 bool (*alloc_id)(pmap_t pmap);
118 void (*free_id)(pmap_t pmap);
119 void (*flush_tlb_region_async)(vm_offset_t va, unsigned length, pmap_t pmap);
120 void (*flush_tlb_tte_async)(vm_offset_t va, pmap_t pmap);
121 void (*flush_tlb_async)(pmap_t pmap);
122 pt_entry_t (*wimg_to_pte)(unsigned int wimg);
123 };
124
125 static const struct page_table_ops native_pt_ops =
126 {
127 .alloc_id = alloc_asid,
128 .free_id = free_asid,
129 .flush_tlb_region_async = flush_mmu_tlb_region_asid_async,
130 .flush_tlb_tte_async = flush_mmu_tlb_tte_asid_async,
131 .flush_tlb_async = flush_mmu_tlb_full_asid_async,
132 .wimg_to_pte = wimg_to_pte,
133 };
134
135 #if (__ARM_VMSA__ > 7)
136 const struct page_table_level_info pmap_table_level_info_16k[] =
137 {
138 [0] = {
139 .size = ARM_16K_TT_L0_SIZE,
140 .offmask = ARM_16K_TT_L0_OFFMASK,
141 .shift = ARM_16K_TT_L0_SHIFT,
142 .index_mask = ARM_16K_TT_L0_INDEX_MASK,
143 .valid_mask = ARM_TTE_VALID,
144 .type_mask = ARM_TTE_TYPE_MASK,
145 .type_block = ARM_TTE_TYPE_BLOCK
146 },
147 [1] = {
148 .size = ARM_16K_TT_L1_SIZE,
149 .offmask = ARM_16K_TT_L1_OFFMASK,
150 .shift = ARM_16K_TT_L1_SHIFT,
151 .index_mask = ARM_16K_TT_L1_INDEX_MASK,
152 .valid_mask = ARM_TTE_VALID,
153 .type_mask = ARM_TTE_TYPE_MASK,
154 .type_block = ARM_TTE_TYPE_BLOCK
155 },
156 [2] = {
157 .size = ARM_16K_TT_L2_SIZE,
158 .offmask = ARM_16K_TT_L2_OFFMASK,
159 .shift = ARM_16K_TT_L2_SHIFT,
160 .index_mask = ARM_16K_TT_L2_INDEX_MASK,
161 .valid_mask = ARM_TTE_VALID,
162 .type_mask = ARM_TTE_TYPE_MASK,
163 .type_block = ARM_TTE_TYPE_BLOCK
164 },
165 [3] = {
166 .size = ARM_16K_TT_L3_SIZE,
167 .offmask = ARM_16K_TT_L3_OFFMASK,
168 .shift = ARM_16K_TT_L3_SHIFT,
169 .index_mask = ARM_16K_TT_L3_INDEX_MASK,
170 .valid_mask = ARM_PTE_TYPE_VALID,
171 .type_mask = ARM_PTE_TYPE_MASK,
172 .type_block = ARM_TTE_TYPE_L3BLOCK
173 }
174 };
175
176 const struct page_table_level_info pmap_table_level_info_4k[] =
177 {
178 [0] = {
179 .size = ARM_4K_TT_L0_SIZE,
180 .offmask = ARM_4K_TT_L0_OFFMASK,
181 .shift = ARM_4K_TT_L0_SHIFT,
182 .index_mask = ARM_4K_TT_L0_INDEX_MASK,
183 .valid_mask = ARM_TTE_VALID,
184 .type_mask = ARM_TTE_TYPE_MASK,
185 .type_block = ARM_TTE_TYPE_BLOCK
186 },
187 [1] = {
188 .size = ARM_4K_TT_L1_SIZE,
189 .offmask = ARM_4K_TT_L1_OFFMASK,
190 .shift = ARM_4K_TT_L1_SHIFT,
191 .index_mask = ARM_4K_TT_L1_INDEX_MASK,
192 .valid_mask = ARM_TTE_VALID,
193 .type_mask = ARM_TTE_TYPE_MASK,
194 .type_block = ARM_TTE_TYPE_BLOCK
195 },
196 [2] = {
197 .size = ARM_4K_TT_L2_SIZE,
198 .offmask = ARM_4K_TT_L2_OFFMASK,
199 .shift = ARM_4K_TT_L2_SHIFT,
200 .index_mask = ARM_4K_TT_L2_INDEX_MASK,
201 .valid_mask = ARM_TTE_VALID,
202 .type_mask = ARM_TTE_TYPE_MASK,
203 .type_block = ARM_TTE_TYPE_BLOCK
204 },
205 [3] = {
206 .size = ARM_4K_TT_L3_SIZE,
207 .offmask = ARM_4K_TT_L3_OFFMASK,
208 .shift = ARM_4K_TT_L3_SHIFT,
209 .index_mask = ARM_4K_TT_L3_INDEX_MASK,
210 .valid_mask = ARM_PTE_TYPE_VALID,
211 .type_mask = ARM_PTE_TYPE_MASK,
212 .type_block = ARM_TTE_TYPE_L3BLOCK
213 }
214 };
215
216 struct page_table_attr {
217 const struct page_table_level_info * const pta_level_info;
218 const struct page_table_ops * const pta_ops;
219 const uintptr_t ap_ro;
220 const uintptr_t ap_rw;
221 const uintptr_t ap_rona;
222 const uintptr_t ap_rwna;
223 const uintptr_t ap_xn;
224 const uintptr_t ap_x;
225 const unsigned int pta_root_level;
226 const unsigned int pta_max_level;
227 };
228
229 const struct page_table_attr pmap_pt_attr_4k = {
230 .pta_level_info = pmap_table_level_info_4k,
231 .pta_root_level = PMAP_TT_L1_LEVEL,
232 .pta_max_level = PMAP_TT_L3_LEVEL,
233 .pta_ops = &native_pt_ops,
234 .ap_ro = ARM_PTE_AP(AP_RORO),
235 .ap_rw = ARM_PTE_AP(AP_RWRW),
236 .ap_rona = ARM_PTE_AP(AP_RONA),
237 .ap_rwna = ARM_PTE_AP(AP_RWNA),
238 .ap_xn = ARM_PTE_PNX | ARM_PTE_NX,
239 .ap_x = ARM_PTE_PNX,
240 };
241
242 const struct page_table_attr pmap_pt_attr_16k = {
243 .pta_level_info = pmap_table_level_info_16k,
244 .pta_root_level = PMAP_TT_L1_LEVEL,
245 .pta_max_level = PMAP_TT_L3_LEVEL,
246 .pta_ops = &native_pt_ops,
247 .ap_ro = ARM_PTE_AP(AP_RORO),
248 .ap_rw = ARM_PTE_AP(AP_RWRW),
249 .ap_rona = ARM_PTE_AP(AP_RONA),
250 .ap_rwna = ARM_PTE_AP(AP_RWNA),
251 .ap_xn = ARM_PTE_PNX | ARM_PTE_NX,
252 .ap_x = ARM_PTE_PNX,
253 };
254
255 #if __ARM_16K_PG__
256 const struct page_table_attr * const native_pt_attr = &pmap_pt_attr_16k;
257 #else /* !__ARM_16K_PG__ */
258 const struct page_table_attr * const native_pt_attr = &pmap_pt_attr_4k;
259 #endif /* !__ARM_16K_PG__ */
260
261
262 #else /* (__ARM_VMSA__ > 7) */
263 /*
264 * We don't support pmap parameterization for VMSA7, so use an opaque
265 * page_table_attr structure.
266 */
267 const struct page_table_attr * const native_pt_attr = NULL;
268 #endif /* (__ARM_VMSA__ > 7) */
269
270 typedef struct page_table_attr pt_attr_t;
271
272 /* Macro for getting pmap attributes; not a function for const propagation. */
273 #if ARM_PARAMETERIZED_PMAP
274 /* The page table attributes are linked to the pmap */
275 #define pmap_get_pt_attr(pmap) ((pmap)->pmap_pt_attr)
276 #define pmap_get_pt_ops(pmap) ((pmap)->pmap_pt_attr->pta_ops)
277 #else /* !ARM_PARAMETERIZED_PMAP */
278 /* The page table attributes are fixed (to allow for const propagation) */
279 #define pmap_get_pt_attr(pmap) (native_pt_attr)
280 #define pmap_get_pt_ops(pmap) (&native_pt_ops)
281 #endif /* !ARM_PARAMETERIZED_PMAP */
282
283 #if (__ARM_VMSA__ > 7)
284 static inline uint64_t
285 pt_attr_ln_size(const pt_attr_t * const pt_attr, unsigned int level)
286 {
287 return pt_attr->pta_level_info[level].size;
288 }
289
290 __unused static inline uint64_t
291 pt_attr_ln_shift(const pt_attr_t * const pt_attr, unsigned int level)
292 {
293 return pt_attr->pta_level_info[level].shift;
294 }
295
296 __unused static inline uint64_t
297 pt_attr_ln_offmask(const pt_attr_t * const pt_attr, unsigned int level)
298 {
299 return pt_attr->pta_level_info[level].offmask;
300 }
301
302 static inline unsigned int
303 pt_attr_twig_level(const pt_attr_t * const pt_attr)
304 {
305 return pt_attr->pta_max_level - 1;
306 }
307
308 static inline unsigned int
309 pt_attr_root_level(const pt_attr_t * const pt_attr)
310 {
311 return pt_attr->pta_root_level;
312 }
313
314 static __unused inline uint64_t
315 pt_attr_leaf_size(const pt_attr_t * const pt_attr)
316 {
317 return pt_attr->pta_level_info[pt_attr->pta_max_level].size;
318 }
319
320 static __unused inline uint64_t
321 pt_attr_leaf_offmask(const pt_attr_t * const pt_attr)
322 {
323 return pt_attr->pta_level_info[pt_attr->pta_max_level].offmask;
324 }
325
326 static inline uint64_t
327 pt_attr_leaf_shift(const pt_attr_t * const pt_attr)
328 {
329 return pt_attr->pta_level_info[pt_attr->pta_max_level].shift;
330 }
331
332 static __unused inline uint64_t
333 pt_attr_leaf_index_mask(const pt_attr_t * const pt_attr)
334 {
335 return pt_attr->pta_level_info[pt_attr->pta_max_level].index_mask;
336 }
337
338 static inline uint64_t
339 pt_attr_twig_size(const pt_attr_t * const pt_attr)
340 {
341 return pt_attr->pta_level_info[pt_attr->pta_max_level - 1].size;
342 }
343
344 static inline uint64_t
345 pt_attr_twig_offmask(const pt_attr_t * const pt_attr)
346 {
347 return pt_attr->pta_level_info[pt_attr->pta_max_level - 1].offmask;
348 }
349
350 static inline uint64_t
351 pt_attr_twig_shift(const pt_attr_t * const pt_attr)
352 {
353 return pt_attr->pta_level_info[pt_attr->pta_max_level - 1].shift;
354 }
355
356 static __unused inline uint64_t
357 pt_attr_twig_index_mask(const pt_attr_t * const pt_attr)
358 {
359 return pt_attr->pta_level_info[pt_attr->pta_max_level - 1].index_mask;
360 }
361
362 static inline uint64_t
363 pt_attr_leaf_table_size(const pt_attr_t * const pt_attr)
364 {
365 return pt_attr_twig_size(pt_attr);
366 }
367
368 static inline uint64_t
369 pt_attr_leaf_table_offmask(const pt_attr_t * const pt_attr)
370 {
371 return pt_attr_twig_offmask(pt_attr);
372 }
373
374 static inline uintptr_t
375 pt_attr_leaf_rw(const pt_attr_t * const pt_attr)
376 {
377 return pt_attr->ap_rw;
378 }
379
380 static inline uintptr_t
381 pt_attr_leaf_ro(const pt_attr_t * const pt_attr)
382 {
383 return pt_attr->ap_ro;
384 }
385
386 static inline uintptr_t
387 pt_attr_leaf_rona(const pt_attr_t * const pt_attr)
388 {
389 return pt_attr->ap_rona;
390 }
391
392 static inline uintptr_t
393 pt_attr_leaf_rwna(const pt_attr_t * const pt_attr)
394 {
395 return pt_attr->ap_rwna;
396 }
397
398 static inline uintptr_t
399 pt_attr_leaf_xn(const pt_attr_t * const pt_attr)
400 {
401 return pt_attr->ap_xn;
402 }
403
404 static inline uintptr_t
405 pt_attr_leaf_x(const pt_attr_t * const pt_attr)
406 {
407 return pt_attr->ap_x;
408 }
409
410 #else /* (__ARM_VMSA__ > 7) */
411
412 static inline unsigned int
413 pt_attr_twig_level(__unused const pt_attr_t * const pt_attr)
414 {
415 return PMAP_TT_L1_LEVEL;
416 }
417
418 static inline uint64_t
419 pt_attr_twig_size(__unused const pt_attr_t * const pt_attr)
420 {
421 return ARM_TT_TWIG_SIZE;
422 }
423
424 static inline uint64_t
425 pt_attr_twig_offmask(__unused const pt_attr_t * const pt_attr)
426 {
427 return ARM_TT_TWIG_OFFMASK;
428 }
429
430 static inline uint64_t
431 pt_attr_twig_shift(__unused const pt_attr_t * const pt_attr)
432 {
433 return ARM_TT_TWIG_SHIFT;
434 }
435
436 static __unused inline uint64_t
437 pt_attr_twig_index_mask(__unused const pt_attr_t * const pt_attr)
438 {
439 return ARM_TT_TWIG_INDEX_MASK;
440 }
441
442 __unused static inline uint64_t
443 pt_attr_leaf_size(__unused const pt_attr_t * const pt_attr)
444 {
445 return ARM_TT_LEAF_SIZE;
446 }
447
448 __unused static inline uint64_t
449 pt_attr_leaf_offmask(__unused const pt_attr_t * const pt_attr)
450 {
451 return ARM_TT_LEAF_OFFMASK;
452 }
453
454 static inline uint64_t
455 pt_attr_leaf_shift(__unused const pt_attr_t * const pt_attr)
456 {
457 return ARM_TT_LEAF_SHIFT;
458 }
459
460 static __unused inline uint64_t
461 pt_attr_leaf_index_mask(__unused const pt_attr_t * const pt_attr)
462 {
463 return ARM_TT_LEAF_INDEX_MASK;
464 }
465
466 static inline uint64_t
467 pt_attr_leaf_table_size(__unused const pt_attr_t * const pt_attr)
468 {
469 return ARM_TT_L1_PT_SIZE;
470 }
471
472 static inline uint64_t
473 pt_attr_leaf_table_offmask(__unused const pt_attr_t * const pt_attr)
474 {
475 return ARM_TT_L1_PT_OFFMASK;
476 }
477
478 static inline uintptr_t
479 pt_attr_leaf_rw(__unused const pt_attr_t * const pt_attr)
480 {
481 return ARM_PTE_AP(AP_RWRW);
482 }
483
484 static inline uintptr_t
485 pt_attr_leaf_ro(__unused const pt_attr_t * const pt_attr)
486 {
487 return ARM_PTE_AP(AP_RORO);
488 }
489
490 static inline uintptr_t
491 pt_attr_leaf_rona(__unused const pt_attr_t * const pt_attr)
492 {
493 return ARM_PTE_AP(AP_RONA);
494 }
495
496 static inline uintptr_t
497 pt_attr_leaf_rwna(__unused const pt_attr_t * const pt_attr)
498 {
499 return ARM_PTE_AP(AP_RWNA);
500 }
501
502 static inline uintptr_t
503 pt_attr_leaf_xn(__unused const pt_attr_t * const pt_attr)
504 {
505 return ARM_PTE_NX;
506 }
507
508 #endif /* (__ARM_VMSA__ > 7) */
509
510 static inline void
511 pmap_sync_tlb(bool strong __unused)
512 {
513 sync_tlb_flush();
514 }
515
516 #if MACH_ASSERT
517 int vm_footprint_suspend_allowed = 1;
518
519 extern int pmap_ledgers_panic;
520 extern int pmap_ledgers_panic_leeway;
521
522 int pmap_stats_assert = 1;
523 #define PMAP_STATS_ASSERTF(cond, pmap, fmt, ...) \
524 MACRO_BEGIN \
525 if (pmap_stats_assert && (pmap)->pmap_stats_assert) \
526 assertf(cond, fmt, ##__VA_ARGS__); \
527 MACRO_END
528 #else /* MACH_ASSERT */
529 #define PMAP_STATS_ASSERTF(cond, pmap, fmt, ...)
530 #endif /* MACH_ASSERT */
531
532 #if DEVELOPMENT || DEBUG
533 #define PMAP_FOOTPRINT_SUSPENDED(pmap) \
534 (current_thread()->pmap_footprint_suspended)
535 #else /* DEVELOPMENT || DEBUG */
536 #define PMAP_FOOTPRINT_SUSPENDED(pmap) (FALSE)
537 #endif /* DEVELOPMENT || DEBUG */
538
539
540 #if XNU_MONITOR
541 /*
542 * PPL External References.
543 */
544 extern vm_offset_t segPPLDATAB;
545 extern unsigned long segSizePPLDATA;
546 extern vm_offset_t segPPLTEXTB;
547 extern unsigned long segSizePPLTEXT;
548 #if __APRR_SUPPORTED__
549 extern vm_offset_t segPPLTRAMPB;
550 extern unsigned long segSizePPLTRAMP;
551 extern void ppl_trampoline_start;
552 extern void ppl_trampoline_end;
553 #endif
554 extern vm_offset_t segPPLDATACONSTB;
555 extern unsigned long segSizePPLDATACONST;
556
557
558 /*
559 * PPL Global Variables
560 */
561
562 #if (DEVELOPMENT || DEBUG)
563 /* Indicates if the PPL will enforce mapping policies; set by -unsafe_kernel_text */
564 SECURITY_READ_ONLY_LATE(boolean_t) pmap_ppl_disable = FALSE;
565 #else
566 const boolean_t pmap_ppl_disable = FALSE;
567 #endif
568
569 /* Indicates if the PPL has started applying APRR. */
570 boolean_t pmap_ppl_locked_down MARK_AS_PMAP_DATA = FALSE;
571
572 /*
573 * The PPL cannot invoke the kernel in order to allocate memory, so we must
574 * maintain a list of free pages that the PPL owns. The kernel can give the PPL
575 * additional pages.
576 */
577 decl_simple_lock_data(, pmap_ppl_free_page_lock MARK_AS_PMAP_DATA);
578 void ** pmap_ppl_free_page_list MARK_AS_PMAP_DATA = NULL;
579 uint64_t pmap_ppl_free_page_count MARK_AS_PMAP_DATA = 0;
580 uint64_t pmap_ppl_pages_returned_to_kernel_count_total = 0;
581
582 struct pmap_cpu_data_array_entry pmap_cpu_data_array[MAX_CPUS] MARK_AS_PMAP_DATA;
583
584 #ifdef CPU_CLUSTER_OFFSETS
585 const uint64_t pmap_cluster_offsets[] = CPU_CLUSTER_OFFSETS;
586 _Static_assert((sizeof(pmap_cluster_offsets) / sizeof(pmap_cluster_offsets[0])) == __ARM_CLUSTER_COUNT__,
587 "pmap_cluster_offsets[] count does not match __ARM_CLUSTER_COUNT__");
588 #endif
589
590 extern void *pmap_stacks_start;
591 extern void *pmap_stacks_end;
592 SECURITY_READ_ONLY_LATE(pmap_paddr_t) pmap_stacks_start_pa = 0;
593 SECURITY_READ_ONLY_LATE(pmap_paddr_t) pmap_stacks_end_pa = 0;
594 SECURITY_READ_ONLY_LATE(pmap_paddr_t) ppl_cpu_save_area_start = 0;
595 SECURITY_READ_ONLY_LATE(pmap_paddr_t) ppl_cpu_save_area_end = 0;
596
597 /* Allocation data/locks for pmap structures. */
598 decl_simple_lock_data(, pmap_free_list_lock MARK_AS_PMAP_DATA);
599 SECURITY_READ_ONLY_LATE(unsigned long) pmap_array_count = 0;
600 SECURITY_READ_ONLY_LATE(void *) pmap_array_begin = NULL;
601 SECURITY_READ_ONLY_LATE(void *) pmap_array_end = NULL;
602 SECURITY_READ_ONLY_LATE(pmap_t) pmap_array = NULL;
603 pmap_t pmap_free_list MARK_AS_PMAP_DATA = NULL;
604
605 /* Allocation data/locks/structs for task ledger structures. */
606 #define PMAP_LEDGER_DATA_BYTES \
607 (((sizeof(task_ledgers) / sizeof(int)) * sizeof(struct ledger_entry)) + sizeof(struct ledger))
608
609 /*
610 * Maximum number of ledgers allowed are maximum number of tasks
611 * allowed on system plus some more i.e. ~10% of total tasks = 200.
612 */
613 #define MAX_PMAP_LEDGERS (MAX_ASID + 200)
614
615 typedef struct pmap_ledger_data {
616 char pld_data[PMAP_LEDGER_DATA_BYTES];
617 } pmap_ledger_data_t;
618
619 typedef struct pmap_ledger {
620 union {
621 struct pmap_ledger_data ple_data;
622 struct pmap_ledger * next;
623 };
624
625 struct pmap_ledger ** back_ptr;
626 } pmap_ledger_t;
627
628 SECURITY_READ_ONLY_LATE(bool) pmap_ledger_alloc_initialized = false;
629 decl_simple_lock_data(, pmap_ledger_lock MARK_AS_PMAP_DATA);
630 SECURITY_READ_ONLY_LATE(void *) pmap_ledger_refcnt_begin = NULL;
631 SECURITY_READ_ONLY_LATE(void *) pmap_ledger_refcnt_end = NULL;
632 SECURITY_READ_ONLY_LATE(os_refcnt_t *) pmap_ledger_refcnt = NULL;
633 SECURITY_READ_ONLY_LATE(void *) pmap_ledger_ptr_array_begin = NULL;
634 SECURITY_READ_ONLY_LATE(void *) pmap_ledger_ptr_array_end = NULL;
635 SECURITY_READ_ONLY_LATE(pmap_ledger_t * *) pmap_ledger_ptr_array = NULL;
636 uint64_t pmap_ledger_ptr_array_free_index MARK_AS_PMAP_DATA = 0;
637 pmap_ledger_t * pmap_ledger_free_list MARK_AS_PMAP_DATA = NULL;
638
639 #define pmap_ledger_debit(p, e, a) ledger_debit_nocheck((p)->ledger, e, a)
640 #define pmap_ledger_credit(p, e, a) ledger_credit_nocheck((p)->ledger, e, a)
641
642 static inline void
643 pmap_check_ledger_fields(ledger_t ledger)
644 {
645 if (ledger == NULL) {
646 return;
647 }
648
649 thread_t cur_thread = current_thread();
650 ledger_check_new_balance(cur_thread, ledger, task_ledgers.alternate_accounting);
651 ledger_check_new_balance(cur_thread, ledger, task_ledgers.alternate_accounting_compressed);
652 ledger_check_new_balance(cur_thread, ledger, task_ledgers.internal);
653 ledger_check_new_balance(cur_thread, ledger, task_ledgers.internal_compressed);
654 ledger_check_new_balance(cur_thread, ledger, task_ledgers.page_table);
655 ledger_check_new_balance(cur_thread, ledger, task_ledgers.phys_footprint);
656 ledger_check_new_balance(cur_thread, ledger, task_ledgers.phys_mem);
657 ledger_check_new_balance(cur_thread, ledger, task_ledgers.tkm_private);
658 ledger_check_new_balance(cur_thread, ledger, task_ledgers.wired_mem);
659 }
660
661 #define pmap_ledger_check_balance(p) pmap_check_ledger_fields((p)->ledger)
662
663 #else /* XNU_MONITOR */
664
665 #define pmap_ledger_debit(p, e, a) ledger_debit((p)->ledger, e, a)
666 #define pmap_ledger_credit(p, e, a) ledger_credit((p)->ledger, e, a)
667
668 #endif /* !XNU_MONITOR */
669
670 #if DEVELOPMENT || DEBUG
671 int panic_on_unsigned_execute = 0;
672 #endif /* DEVELOPMENT || DEBUG */
673
674
675 /* Virtual memory region for early allocation */
676 #if (__ARM_VMSA__ == 7)
677 #define VREGION1_HIGH_WINDOW (0)
678 #else
679 #define VREGION1_HIGH_WINDOW (PE_EARLY_BOOT_VA)
680 #endif
681 #define VREGION1_START ((VM_MAX_KERNEL_ADDRESS & CPUWINDOWS_BASE_MASK) - VREGION1_HIGH_WINDOW)
682 #define VREGION1_SIZE (trunc_page(VM_MAX_KERNEL_ADDRESS - (VREGION1_START)))
683
684 extern unsigned int not_in_kdp;
685
686 extern vm_offset_t first_avail;
687
688 extern pmap_paddr_t avail_start;
689 extern pmap_paddr_t avail_end;
690
691 extern vm_offset_t virtual_space_start; /* Next available kernel VA */
692 extern vm_offset_t virtual_space_end; /* End of kernel address space */
693 extern vm_offset_t static_memory_end;
694
695 extern int maxproc, hard_maxproc;
696
697 #if (__ARM_VMSA__ > 7)
698 /* The number of address bits one TTBR can cover. */
699 #define PGTABLE_ADDR_BITS (64ULL - T0SZ_BOOT)
700
701 /*
702 * The bounds on our TTBRs. These are for sanity checking that
703 * an address is accessible by a TTBR before we attempt to map it.
704 */
705 #define ARM64_TTBR0_MIN_ADDR (0ULL)
706 #define ARM64_TTBR0_MAX_ADDR (0ULL + (1ULL << PGTABLE_ADDR_BITS) - 1)
707 #define ARM64_TTBR1_MIN_ADDR (0ULL - (1ULL << PGTABLE_ADDR_BITS))
708 #define ARM64_TTBR1_MAX_ADDR (~0ULL)
709
710 /* The level of the root of a page table. */
711 const uint64_t arm64_root_pgtable_level = (3 - ((PGTABLE_ADDR_BITS - 1 - ARM_PGSHIFT) / (ARM_PGSHIFT - TTE_SHIFT)));
712
713 /* The number of entries in the root TT of a page table. */
714 const uint64_t arm64_root_pgtable_num_ttes = (2 << ((PGTABLE_ADDR_BITS - 1 - ARM_PGSHIFT) % (ARM_PGSHIFT - TTE_SHIFT)));
715 #else
716 const uint64_t arm64_root_pgtable_level = 0;
717 const uint64_t arm64_root_pgtable_num_ttes = 0;
718 #endif
719
720 struct pmap kernel_pmap_store MARK_AS_PMAP_DATA;
721 SECURITY_READ_ONLY_LATE(pmap_t) kernel_pmap = &kernel_pmap_store;
722
723 struct vm_object pmap_object_store __attribute__((aligned(VM_PACKED_POINTER_ALIGNMENT))); /* store pt pages */
724 vm_object_t pmap_object = &pmap_object_store;
725
726 static struct zone *pmap_zone; /* zone of pmap structures */
727
728 decl_simple_lock_data(, pmaps_lock MARK_AS_PMAP_DATA);
729 decl_simple_lock_data(, tt1_lock MARK_AS_PMAP_DATA);
730 unsigned int pmap_stamp MARK_AS_PMAP_DATA;
731 queue_head_t map_pmap_list MARK_AS_PMAP_DATA;
732
733 decl_simple_lock_data(, pt_pages_lock MARK_AS_PMAP_DATA);
734 queue_head_t pt_page_list MARK_AS_PMAP_DATA; /* pt page ptd entries list */
735
736 decl_simple_lock_data(, pmap_pages_lock MARK_AS_PMAP_DATA);
737
738 typedef struct page_free_entry {
739 struct page_free_entry *next;
740 } page_free_entry_t;
741
742 #define PAGE_FREE_ENTRY_NULL ((page_free_entry_t *) 0)
743
744 page_free_entry_t *pmap_pages_reclaim_list MARK_AS_PMAP_DATA; /* Reclaimed pt page list */
745 unsigned int pmap_pages_request_count MARK_AS_PMAP_DATA; /* Pending requests to reclaim pt page */
746 unsigned long long pmap_pages_request_acum MARK_AS_PMAP_DATA;
747
748
749 typedef struct tt_free_entry {
750 struct tt_free_entry *next;
751 } tt_free_entry_t;
752
753 #define TT_FREE_ENTRY_NULL ((tt_free_entry_t *) 0)
754
755 tt_free_entry_t *free_page_size_tt_list MARK_AS_PMAP_DATA;
756 unsigned int free_page_size_tt_count MARK_AS_PMAP_DATA;
757 unsigned int free_page_size_tt_max MARK_AS_PMAP_DATA;
758 #define FREE_PAGE_SIZE_TT_MAX 4
759 tt_free_entry_t *free_two_page_size_tt_list MARK_AS_PMAP_DATA;
760 unsigned int free_two_page_size_tt_count MARK_AS_PMAP_DATA;
761 unsigned int free_two_page_size_tt_max MARK_AS_PMAP_DATA;
762 #define FREE_TWO_PAGE_SIZE_TT_MAX 4
763 tt_free_entry_t *free_tt_list MARK_AS_PMAP_DATA;
764 unsigned int free_tt_count MARK_AS_PMAP_DATA;
765 unsigned int free_tt_max MARK_AS_PMAP_DATA;
766
767 #define TT_FREE_ENTRY_NULL ((tt_free_entry_t *) 0)
768
769 boolean_t pmap_gc_allowed MARK_AS_PMAP_DATA = TRUE;
770 boolean_t pmap_gc_forced MARK_AS_PMAP_DATA = FALSE;
771 boolean_t pmap_gc_allowed_by_time_throttle = TRUE;
772
773 unsigned int inuse_user_ttepages_count MARK_AS_PMAP_DATA = 0; /* non-root, non-leaf user pagetable pages, in units of PAGE_SIZE */
774 unsigned int inuse_user_ptepages_count MARK_AS_PMAP_DATA = 0; /* leaf user pagetable pages, in units of PAGE_SIZE */
775 unsigned int inuse_user_tteroot_count MARK_AS_PMAP_DATA = 0; /* root user pagetables, in units of PMAP_ROOT_ALLOC_SIZE */
776 unsigned int inuse_kernel_ttepages_count MARK_AS_PMAP_DATA = 0; /* non-root, non-leaf kernel pagetable pages, in units of PAGE_SIZE */
777 unsigned int inuse_kernel_ptepages_count MARK_AS_PMAP_DATA = 0; /* leaf kernel pagetable pages, in units of PAGE_SIZE */
778 unsigned int inuse_kernel_tteroot_count MARK_AS_PMAP_DATA = 0; /* root kernel pagetables, in units of PMAP_ROOT_ALLOC_SIZE */
779 unsigned int inuse_pmap_pages_count = 0; /* debugging */
780
781 SECURITY_READ_ONLY_LATE(tt_entry_t *) invalid_tte = 0;
782 SECURITY_READ_ONLY_LATE(pmap_paddr_t) invalid_ttep = 0;
783
784 SECURITY_READ_ONLY_LATE(tt_entry_t *) cpu_tte = 0; /* set by arm_vm_init() - keep out of bss */
785 SECURITY_READ_ONLY_LATE(pmap_paddr_t) cpu_ttep = 0; /* set by arm_vm_init() - phys tte addr */
786
787 #if DEVELOPMENT || DEBUG
788 int nx_enabled = 1; /* enable no-execute protection */
789 int allow_data_exec = 0; /* No apps may execute data */
790 int allow_stack_exec = 0; /* No apps may execute from the stack */
791 unsigned long pmap_asid_flushes MARK_AS_PMAP_DATA = 0;
792 #else /* DEVELOPMENT || DEBUG */
793 const int nx_enabled = 1; /* enable no-execute protection */
794 const int allow_data_exec = 0; /* No apps may execute data */
795 const int allow_stack_exec = 0; /* No apps may execute from the stack */
796 #endif /* DEVELOPMENT || DEBUG */
797
798 /*
799 * pv_entry_t - structure to track the active mappings for a given page
800 */
801 typedef struct pv_entry {
802 struct pv_entry *pve_next; /* next alias */
803 pt_entry_t *pve_ptep; /* page table entry */
804 }
805 #if __arm__ && (__BIGGEST_ALIGNMENT__ > 4)
806 /* For the newer ARMv7k ABI where 64-bit types are 64-bit aligned, but pointers
807 * are 32-bit:
808 * Since pt_desc is 64-bit aligned and we cast often from pv_entry to
809 * pt_desc.
810 */
811 __attribute__ ((aligned(8))) pv_entry_t;
812 #else
813 pv_entry_t;
814 #endif
815
816 #define PV_ENTRY_NULL ((pv_entry_t *) 0)
817
818 /*
819 * PMAP LEDGERS:
820 * We use the least significant bit of the "pve_next" pointer in a "pv_entry"
821 * as a marker for pages mapped through an "alternate accounting" mapping.
822 * These macros set, clear and test for this marker and extract the actual
823 * value of the "pve_next" pointer.
824 */
825 #define PVE_NEXT_ALTACCT ((uintptr_t) 0x1)
826 #define PVE_NEXT_SET_ALTACCT(pve_next_p) \
827 *(pve_next_p) = (struct pv_entry *) (((uintptr_t) *(pve_next_p)) | \
828 PVE_NEXT_ALTACCT)
829 #define PVE_NEXT_CLR_ALTACCT(pve_next_p) \
830 *(pve_next_p) = (struct pv_entry *) (((uintptr_t) *(pve_next_p)) & \
831 ~PVE_NEXT_ALTACCT)
832 #define PVE_NEXT_IS_ALTACCT(pve_next) \
833 ((((uintptr_t) (pve_next)) & PVE_NEXT_ALTACCT) ? TRUE : FALSE)
834 #define PVE_NEXT_PTR(pve_next) \
835 ((struct pv_entry *)(((uintptr_t) (pve_next)) & \
836 ~PVE_NEXT_ALTACCT))
837 #if MACH_ASSERT
838 static void pmap_check_ledgers(pmap_t pmap);
839 #else
840 static inline void
841 pmap_check_ledgers(__unused pmap_t pmap)
842 {
843 }
844 #endif /* MACH_ASSERT */
845
846 SECURITY_READ_ONLY_LATE(pv_entry_t * *) pv_head_table; /* array of pv entry pointers */
847
848 pv_entry_t *pv_free_list MARK_AS_PMAP_DATA;
849 pv_entry_t *pv_kern_free_list MARK_AS_PMAP_DATA;
850 decl_simple_lock_data(, pv_free_list_lock MARK_AS_PMAP_DATA);
851 decl_simple_lock_data(, pv_kern_free_list_lock MARK_AS_PMAP_DATA);
852
853 decl_simple_lock_data(, phys_backup_lock);
854
855 /*
856 * pt_desc - structure to keep info on page assigned to page tables
857 */
858 #if (__ARM_VMSA__ == 7)
859 #define PT_INDEX_MAX 1
860 #else
861 #if (ARM_PGSHIFT == 14)
862 #define PT_INDEX_MAX 1
863 #else
864 #define PT_INDEX_MAX 4
865 #endif
866 #endif
867
868 #define PT_DESC_REFCOUNT 0x4000U
869 #define PT_DESC_IOMMU_REFCOUNT 0x8000U
870
871 typedef struct pt_desc {
872 queue_chain_t pt_page;
873 union {
874 struct pmap *pmap;
875 };
876 /*
877 * Locate this struct towards the end of the pt_desc; our long term
878 * goal is to make this a VLA to avoid wasting memory if we don't need
879 * multiple entries.
880 */
881 struct {
882 /*
883 * For non-leaf pagetables, should always be PT_DESC_REFCOUNT
884 * For leaf pagetables, should reflect the number of non-empty PTEs
885 * For IOMMU pages, should always be PT_DESC_IOMMU_REFCOUNT
886 */
887 unsigned short refcnt;
888 /*
889 * For non-leaf pagetables, should be 0
890 * For leaf pagetables, should reflect the number of wired entries
891 * For IOMMU pages, may optionally reflect a driver-defined refcount (IOMMU operations are implicitly wired)
892 */
893 unsigned short wiredcnt;
894 vm_offset_t va;
895 } ptd_info[PT_INDEX_MAX];
896 } pt_desc_t;
897
898
899 #define PTD_ENTRY_NULL ((pt_desc_t *) 0)
900
901 SECURITY_READ_ONLY_LATE(pt_desc_t *) ptd_root_table;
902
903 pt_desc_t *ptd_free_list MARK_AS_PMAP_DATA = PTD_ENTRY_NULL;
904 SECURITY_READ_ONLY_LATE(boolean_t) ptd_preboot = TRUE;
905 unsigned int ptd_free_count MARK_AS_PMAP_DATA = 0;
906 decl_simple_lock_data(, ptd_free_list_lock MARK_AS_PMAP_DATA);
907
908 /*
909 * physical page attribute
910 */
911 typedef u_int16_t pp_attr_t;
912
913 #define PP_ATTR_WIMG_MASK 0x003F
914 #define PP_ATTR_WIMG(x) ((x) & PP_ATTR_WIMG_MASK)
915
916 #define PP_ATTR_REFERENCED 0x0040
917 #define PP_ATTR_MODIFIED 0x0080
918
919 #define PP_ATTR_INTERNAL 0x0100
920 #define PP_ATTR_REUSABLE 0x0200
921 #define PP_ATTR_ALTACCT 0x0400
922 #define PP_ATTR_NOENCRYPT 0x0800
923
924 #define PP_ATTR_REFFAULT 0x1000
925 #define PP_ATTR_MODFAULT 0x2000
926
927 #if XNU_MONITOR
928 /*
929 * Denotes that a page is owned by the PPL. This is modified/checked with the
930 * PVH lock held, to avoid ownership related races. This does not need to be a
931 * PP_ATTR bit (as we have the lock), but for now this is a convenient place to
932 * put the bit.
933 */
934 #define PP_ATTR_MONITOR 0x4000
935
936 /*
937 * Denotes that a page *cannot* be owned by the PPL. This is required in order
938 * to temporarily 'pin' kernel pages that are used to store PPL output parameters.
939 * Otherwise a malicious or buggy caller could pass PPL-owned memory for these
940 * parameters and in so doing stage a write gadget against the PPL.
941 */
942 #define PP_ATTR_NO_MONITOR 0x8000
943
944 /*
945 * All of the bits owned by the PPL; kernel requests to set or clear these bits
946 * are illegal.
947 */
948 #define PP_ATTR_PPL_OWNED_BITS (PP_ATTR_MONITOR | PP_ATTR_NO_MONITOR)
949 #endif
950
951 SECURITY_READ_ONLY_LATE(pp_attr_t*) pp_attr_table;
952
953 typedef struct pmap_io_range {
954 uint64_t addr;
955 uint64_t len;
956 #define PMAP_IO_RANGE_STRONG_SYNC (1UL << 31) // Strong DSB required for pages in this range
957 #define PMAP_IO_RANGE_CARVEOUT (1UL << 30) // Corresponds to memory carved out by bootloader
958 uint32_t wimg; // lower 16 bits treated as pp_attr_t, upper 16 bits contain additional mapping flags
959 uint32_t signature; // 4CC
960 } __attribute__((packed)) pmap_io_range_t;
961
962 SECURITY_READ_ONLY_LATE(pmap_io_range_t*) io_attr_table;
963
964 SECURITY_READ_ONLY_LATE(pmap_paddr_t) vm_first_phys = (pmap_paddr_t) 0;
965 SECURITY_READ_ONLY_LATE(pmap_paddr_t) vm_last_phys = (pmap_paddr_t) 0;
966
967 SECURITY_READ_ONLY_LATE(unsigned int) num_io_rgns = 0;
968
969 SECURITY_READ_ONLY_LATE(boolean_t) pmap_initialized = FALSE; /* Has pmap_init completed? */
970
971 SECURITY_READ_ONLY_LATE(uint64_t) pmap_nesting_size_min;
972 SECURITY_READ_ONLY_LATE(uint64_t) pmap_nesting_size_max;
973
974 SECURITY_READ_ONLY_LATE(vm_map_offset_t) arm_pmap_max_offset_default = 0x0;
975 #if defined(__arm64__)
976 SECURITY_READ_ONLY_LATE(vm_map_offset_t) arm64_pmap_max_offset_default = 0x0;
977 #endif
978
979 #define PMAP_MAX_SW_ASID ((MAX_ASID + MAX_HW_ASID - 1) / MAX_HW_ASID)
980 _Static_assert(PMAP_MAX_SW_ASID <= (UINT8_MAX + 1),
981 "VASID bits can't be represented by an 8-bit integer");
982
983 decl_simple_lock_data(, asid_lock MARK_AS_PMAP_DATA);
984 static bitmap_t asid_bitmap[BITMAP_LEN(MAX_ASID)] MARK_AS_PMAP_DATA;
985
986
987 #if (__ARM_VMSA__ > 7)
988 SECURITY_READ_ONLY_LATE(pmap_t) sharedpage_pmap;
989 #endif
990
991 #if XNU_MONITOR
992 /*
993 * We define our target as 8 pages; enough for 2 page table pages, a PTD page,
994 * and a PV page; in essence, twice as many pages as may be necessary to satisfy
995 * a single pmap_enter request.
996 */
997 #define PMAP_MIN_FREE_PPL_PAGES 8
998 #endif
999
1000 #define pa_index(pa) \
1001 (atop((pa) - vm_first_phys))
1002
1003 #define pai_to_pvh(pai) \
1004 (&pv_head_table[pai])
1005
1006 #define pa_valid(x) \
1007 ((x) >= vm_first_phys && (x) < vm_last_phys)
1008
1009 /* PTE Define Macros */
1010
1011 #define pte_is_wired(pte) \
1012 (((pte) & ARM_PTE_WIRED_MASK) == ARM_PTE_WIRED)
1013
1014 #define pte_set_wired(ptep, wired) \
1015 do { \
1016 SInt16 *ptd_wiredcnt_ptr; \
1017 ptd_wiredcnt_ptr = (SInt16 *)&(ptep_get_ptd(ptep)->ptd_info[ARM_PT_DESC_INDEX(ptep)].wiredcnt); \
1018 if (wired) { \
1019 *ptep |= ARM_PTE_WIRED; \
1020 OSAddAtomic16(1, ptd_wiredcnt_ptr); \
1021 } else { \
1022 *ptep &= ~ARM_PTE_WIRED; \
1023 OSAddAtomic16(-1, ptd_wiredcnt_ptr); \
1024 } \
1025 } while(0)
1026
1027 #define pte_was_writeable(pte) \
1028 (((pte) & ARM_PTE_WRITEABLE) == ARM_PTE_WRITEABLE)
1029
1030 #define pte_set_was_writeable(pte, was_writeable) \
1031 do { \
1032 if ((was_writeable)) { \
1033 (pte) |= ARM_PTE_WRITEABLE; \
1034 } else { \
1035 (pte) &= ~ARM_PTE_WRITEABLE; \
1036 } \
1037 } while(0)
1038
1039 /* PVE Define Macros */
1040
1041 #define pve_next(pve) \
1042 ((pve)->pve_next)
1043
1044 #define pve_link_field(pve) \
1045 (&pve_next(pve))
1046
1047 #define pve_link(pp, e) \
1048 ((pve_next(e) = pve_next(pp)), (pve_next(pp) = (e)))
1049
1050 #define pve_unlink(pp, e) \
1051 (pve_next(pp) = pve_next(e))
1052
1053 /* bits held in the ptep pointer field */
1054
1055 #define pve_get_ptep(pve) \
1056 ((pve)->pve_ptep)
1057
1058 #define pve_set_ptep(pve, ptep_new) \
1059 do { \
1060 (pve)->pve_ptep = (ptep_new); \
1061 } while (0)
1062
1063 /* PTEP Define Macros */
1064
1065 /* mask for page descriptor index */
1066 #define ARM_TT_PT_INDEX_MASK ARM_PGMASK
1067
1068 #if (__ARM_VMSA__ == 7)
1069 #define ARM_PT_DESC_INDEX_MASK 0x00000
1070 #define ARM_PT_DESC_INDEX_SHIFT 0
1071
1072 /*
1073 * Shift value used for reconstructing the virtual address for a PTE.
1074 */
1075 #define ARM_TT_PT_ADDR_SHIFT (10U)
1076
1077 #define ptep_get_va(ptep) \
1078 ((((pt_desc_t *) (pvh_list(pai_to_pvh(pa_index(ml_static_vtop((((vm_offset_t)(ptep) & ~ARM_PGMASK))))))))->ptd_info[ARM_PT_DESC_INDEX(ptep)].va)+ ((((unsigned)(ptep)) & ARM_TT_PT_INDEX_MASK)<<ARM_TT_PT_ADDR_SHIFT))
1079
1080 #define ptep_get_pmap(ptep) \
1081 ((((pt_desc_t *) (pvh_list(pai_to_pvh(pa_index(ml_static_vtop((((vm_offset_t)(ptep) & ~ARM_PGMASK))))))))->pmap))
1082
1083 #else
1084
1085 #if (ARM_PGSHIFT == 12)
1086 #define ARM_PT_DESC_INDEX_MASK ((PAGE_SHIFT_CONST == ARM_PGSHIFT )? 0x00000ULL : 0x03000ULL)
1087 #define ARM_PT_DESC_INDEX_SHIFT ((PAGE_SHIFT_CONST == ARM_PGSHIFT )? 0 : 12)
1088 /*
1089 * Shift value used for reconstructing the virtual address for a PTE.
1090 */
1091 #define ARM_TT_PT_ADDR_SHIFT (9ULL)
1092 #else
1093
1094 #define ARM_PT_DESC_INDEX_MASK (0x00000)
1095 #define ARM_PT_DESC_INDEX_SHIFT (0)
1096 /*
1097 * Shift value used for reconstructing the virtual address for a PTE.
1098 */
1099 #define ARM_TT_PT_ADDR_SHIFT (11ULL)
1100 #endif
1101
1102
1103 #define ARM_PT_DESC_INDEX(ptep) \
1104 (((unsigned)(ptep) & ARM_PT_DESC_INDEX_MASK) >> ARM_PT_DESC_INDEX_SHIFT)
1105
1106 #define ptep_get_va(ptep) \
1107 ((((pt_desc_t *) (pvh_list(pai_to_pvh(pa_index(ml_static_vtop((((vm_offset_t)(ptep) & ~ARM_PGMASK))))))))->ptd_info[ARM_PT_DESC_INDEX(ptep)].va)+ ((((unsigned)(ptep)) & ARM_TT_PT_INDEX_MASK)<<ARM_TT_PT_ADDR_SHIFT))
1108
1109 #define ptep_get_pmap(ptep) \
1110 ((((pt_desc_t *) (pvh_list(pai_to_pvh(pa_index(ml_static_vtop((((vm_offset_t)(ptep) & ~ARM_PGMASK))))))))->pmap))
1111
1112 #endif
1113
1114 #define ARM_PT_DESC_INDEX(ptep) \
1115 (((unsigned)(ptep) & ARM_PT_DESC_INDEX_MASK) >> ARM_PT_DESC_INDEX_SHIFT)
1116
1117 #define ptep_get_ptd(ptep) \
1118 ((struct pt_desc *)(pvh_list(pai_to_pvh(pa_index(ml_static_vtop((vm_offset_t)(ptep)))))))
1119
1120
1121 /* PVH Define Macros */
1122
1123 /* pvhead type */
1124 #define PVH_TYPE_NULL 0x0UL
1125 #define PVH_TYPE_PVEP 0x1UL
1126 #define PVH_TYPE_PTEP 0x2UL
1127 #define PVH_TYPE_PTDP 0x3UL
1128
1129 #define PVH_TYPE_MASK (0x3UL)
1130
1131 #ifdef __arm64__
1132
1133 /* All flags listed below are stored in the PV head pointer unless otherwise noted */
1134 #define PVH_FLAG_IOMMU 0x4UL /* Stored in each PTE, or in PV head for single-PTE PV heads */
1135 #define PVH_FLAG_IOMMU_TABLE (1ULL << 63) /* Stored in each PTE, or in PV head for single-PTE PV heads */
1136 #define PVH_FLAG_CPU (1ULL << 62)
1137 #define PVH_LOCK_BIT 61
1138 #define PVH_FLAG_LOCK (1ULL << PVH_LOCK_BIT)
1139 #define PVH_FLAG_EXEC (1ULL << 60)
1140 #define PVH_FLAG_LOCKDOWN (1ULL << 59)
1141 #define PVH_HIGH_FLAGS (PVH_FLAG_CPU | PVH_FLAG_LOCK | PVH_FLAG_EXEC | PVH_FLAG_LOCKDOWN)
1142
1143 #else /* !__arm64__ */
1144
1145 #define PVH_LOCK_BIT 31
1146 #define PVH_FLAG_LOCK (1UL << PVH_LOCK_BIT)
1147 #define PVH_HIGH_FLAGS PVH_FLAG_LOCK
1148
1149 #endif
1150
1151 #define PVH_LIST_MASK (~PVH_TYPE_MASK)
1152
1153 #define pvh_test_type(h, b) \
1154 ((*(vm_offset_t *)(h) & (PVH_TYPE_MASK)) == (b))
1155
1156 #define pvh_ptep(h) \
1157 ((pt_entry_t *)((*(vm_offset_t *)(h) & PVH_LIST_MASK) | PVH_HIGH_FLAGS))
1158
1159 #define pvh_list(h) \
1160 ((pv_entry_t *)((*(vm_offset_t *)(h) & PVH_LIST_MASK) | PVH_HIGH_FLAGS))
1161
1162 #define pvh_get_flags(h) \
1163 (*(vm_offset_t *)(h) & PVH_HIGH_FLAGS)
1164
1165 #define pvh_set_flags(h, f) \
1166 do { \
1167 os_atomic_store((vm_offset_t *)(h), (*(vm_offset_t *)(h) & ~PVH_HIGH_FLAGS) | (f), \
1168 relaxed); \
1169 } while (0)
1170
1171 #define pvh_update_head(h, e, t) \
1172 do { \
1173 assert(*(vm_offset_t *)(h) & PVH_FLAG_LOCK); \
1174 os_atomic_store((vm_offset_t *)(h), (vm_offset_t)(e) | (t) | PVH_FLAG_LOCK, \
1175 relaxed); \
1176 } while (0)
1177
1178 #define pvh_update_head_unlocked(h, e, t) \
1179 do { \
1180 assert(!(*(vm_offset_t *)(h) & PVH_FLAG_LOCK)); \
1181 *(vm_offset_t *)(h) = ((vm_offset_t)(e) | (t)) & ~PVH_FLAG_LOCK; \
1182 } while (0)
1183
1184 #define pvh_add(h, e) \
1185 do { \
1186 assert(!pvh_test_type((h), PVH_TYPE_PTEP)); \
1187 pve_next(e) = pvh_list(h); \
1188 pvh_update_head((h), (e), PVH_TYPE_PVEP); \
1189 } while (0)
1190
1191 #define pvh_remove(h, p, e) \
1192 do { \
1193 assert(!PVE_NEXT_IS_ALTACCT(pve_next((e)))); \
1194 if ((p) == (h)) { \
1195 if (PVE_NEXT_PTR(pve_next((e))) == PV_ENTRY_NULL) { \
1196 pvh_update_head((h), PV_ENTRY_NULL, PVH_TYPE_NULL); \
1197 } else { \
1198 pvh_update_head((h), PVE_NEXT_PTR(pve_next((e))), PVH_TYPE_PVEP); \
1199 } \
1200 } else { \
1201 /* \
1202 * PMAP LEDGERS: \
1203 * preserve the "alternate accounting" bit \
1204 * when updating "p" (the previous entry's \
1205 * "pve_next"). \
1206 */ \
1207 boolean_t __is_altacct; \
1208 __is_altacct = PVE_NEXT_IS_ALTACCT(*(p)); \
1209 *(p) = PVE_NEXT_PTR(pve_next((e))); \
1210 if (__is_altacct) { \
1211 PVE_NEXT_SET_ALTACCT((p)); \
1212 } else { \
1213 PVE_NEXT_CLR_ALTACCT((p)); \
1214 } \
1215 } \
1216 } while (0)
1217
1218
1219 /* PPATTR Define Macros */
1220
1221 #define ppattr_set_bits(h, b) \
1222 do { \
1223 while (!OSCompareAndSwap16(*(pp_attr_t *)(h), *(pp_attr_t *)(h) | (b), (pp_attr_t *)(h))); \
1224 } while (0)
1225
1226 #define ppattr_clear_bits(h, b) \
1227 do { \
1228 while (!OSCompareAndSwap16(*(pp_attr_t *)(h), *(pp_attr_t *)(h) & ~(b), (pp_attr_t *)(h))); \
1229 } while (0)
1230
1231 #define ppattr_test_bits(h, b) \
1232 ((*(pp_attr_t *)(h) & (b)) == (b))
1233
1234 #define pa_set_bits(x, b) \
1235 do { \
1236 if (pa_valid(x)) \
1237 ppattr_set_bits(&pp_attr_table[pa_index(x)], \
1238 (b)); \
1239 } while (0)
1240
1241 #define pa_test_bits(x, b) \
1242 (pa_valid(x) ? ppattr_test_bits(&pp_attr_table[pa_index(x)],\
1243 (b)) : FALSE)
1244
1245 #define pa_clear_bits(x, b) \
1246 do { \
1247 if (pa_valid(x)) \
1248 ppattr_clear_bits(&pp_attr_table[pa_index(x)], \
1249 (b)); \
1250 } while (0)
1251
1252 #define pa_set_modify(x) \
1253 pa_set_bits(x, PP_ATTR_MODIFIED)
1254
1255 #define pa_clear_modify(x) \
1256 pa_clear_bits(x, PP_ATTR_MODIFIED)
1257
1258 #define pa_set_reference(x) \
1259 pa_set_bits(x, PP_ATTR_REFERENCED)
1260
1261 #define pa_clear_reference(x) \
1262 pa_clear_bits(x, PP_ATTR_REFERENCED)
1263
1264 #if XNU_MONITOR
1265 #define pa_set_monitor(x) \
1266 pa_set_bits((x), PP_ATTR_MONITOR)
1267
1268 #define pa_clear_monitor(x) \
1269 pa_clear_bits((x), PP_ATTR_MONITOR)
1270
1271 #define pa_test_monitor(x) \
1272 pa_test_bits((x), PP_ATTR_MONITOR)
1273
1274 #define pa_set_no_monitor(x) \
1275 pa_set_bits((x), PP_ATTR_NO_MONITOR)
1276
1277 #define pa_clear_no_monitor(x) \
1278 pa_clear_bits((x), PP_ATTR_NO_MONITOR)
1279
1280 #define pa_test_no_monitor(x) \
1281 pa_test_bits((x), PP_ATTR_NO_MONITOR)
1282 #endif
1283
1284 #define IS_INTERNAL_PAGE(pai) \
1285 ppattr_test_bits(&pp_attr_table[pai], PP_ATTR_INTERNAL)
1286 #define SET_INTERNAL_PAGE(pai) \
1287 ppattr_set_bits(&pp_attr_table[pai], PP_ATTR_INTERNAL)
1288 #define CLR_INTERNAL_PAGE(pai) \
1289 ppattr_clear_bits(&pp_attr_table[pai], PP_ATTR_INTERNAL)
1290
1291 #define IS_REUSABLE_PAGE(pai) \
1292 ppattr_test_bits(&pp_attr_table[pai], PP_ATTR_REUSABLE)
1293 #define SET_REUSABLE_PAGE(pai) \
1294 ppattr_set_bits(&pp_attr_table[pai], PP_ATTR_REUSABLE)
1295 #define CLR_REUSABLE_PAGE(pai) \
1296 ppattr_clear_bits(&pp_attr_table[pai], PP_ATTR_REUSABLE)
1297
1298 #define IS_ALTACCT_PAGE(pai, pve_p) \
1299 (((pve_p) == NULL) \
1300 ? ppattr_test_bits(&pp_attr_table[pai], PP_ATTR_ALTACCT) \
1301 : PVE_NEXT_IS_ALTACCT(pve_next((pve_p))))
1302 #define SET_ALTACCT_PAGE(pai, pve_p) \
1303 if ((pve_p) == NULL) { \
1304 ppattr_set_bits(&pp_attr_table[pai], PP_ATTR_ALTACCT); \
1305 } else { \
1306 PVE_NEXT_SET_ALTACCT(&pve_next((pve_p))); \
1307 }
1308 #define CLR_ALTACCT_PAGE(pai, pve_p) \
1309 if ((pve_p) == NULL) { \
1310 ppattr_clear_bits(&pp_attr_table[pai], PP_ATTR_ALTACCT); \
1311 } else { \
1312 PVE_NEXT_CLR_ALTACCT(&pve_next((pve_p))); \
1313 }
1314
1315 #define IS_REFFAULT_PAGE(pai) \
1316 ppattr_test_bits(&pp_attr_table[pai], PP_ATTR_REFFAULT)
1317 #define SET_REFFAULT_PAGE(pai) \
1318 ppattr_set_bits(&pp_attr_table[pai], PP_ATTR_REFFAULT)
1319 #define CLR_REFFAULT_PAGE(pai) \
1320 ppattr_clear_bits(&pp_attr_table[pai], PP_ATTR_REFFAULT)
1321
1322 #define IS_MODFAULT_PAGE(pai) \
1323 ppattr_test_bits(&pp_attr_table[pai], PP_ATTR_MODFAULT)
1324 #define SET_MODFAULT_PAGE(pai) \
1325 ppattr_set_bits(&pp_attr_table[pai], PP_ATTR_MODFAULT)
1326 #define CLR_MODFAULT_PAGE(pai) \
1327 ppattr_clear_bits(&pp_attr_table[pai], PP_ATTR_MODFAULT)
1328
1329 #define tte_get_ptd(tte) \
1330 ((struct pt_desc *)(pvh_list(pai_to_pvh(pa_index((vm_offset_t)((tte) & ~PAGE_MASK))))))
1331
1332
1333 #if (__ARM_VMSA__ == 7)
1334
1335 #define tte_index(pmap, pt_attr, addr) \
1336 ttenum((addr))
1337
1338 #define pte_index(pmap, pt_attr, addr) \
1339 ptenum((addr))
1340
1341 #else
1342
1343 #define ttn_index(pmap, pt_attr, addr, pt_level) \
1344 (((addr) & (pt_attr)->pta_level_info[(pt_level)].index_mask) >> (pt_attr)->pta_level_info[(pt_level)].shift)
1345
1346 #define tt0_index(pmap, pt_attr, addr) \
1347 ttn_index((pmap), (pt_attr), (addr), PMAP_TT_L0_LEVEL)
1348
1349 #define tt1_index(pmap, pt_attr, addr) \
1350 ttn_index((pmap), (pt_attr), (addr), PMAP_TT_L1_LEVEL)
1351
1352 #define tt2_index(pmap, pt_attr, addr) \
1353 ttn_index((pmap), (pt_attr), (addr), PMAP_TT_L2_LEVEL)
1354
1355 #define tt3_index(pmap, pt_attr, addr) \
1356 ttn_index((pmap), (pt_attr), (addr), PMAP_TT_L3_LEVEL)
1357
1358 #define tte_index(pmap, pt_attr, addr) \
1359 tt2_index((pmap), (pt_attr), (addr))
1360
1361 #define pte_index(pmap, pt_attr, addr) \
1362 tt3_index((pmap), (pt_attr), (addr))
1363
1364 #endif
1365
1366 /*
1367 * Lock on pmap system
1368 */
1369
1370 lck_grp_t pmap_lck_grp;
1371
1372 #define PMAP_LOCK_INIT(pmap) { \
1373 simple_lock_init(&(pmap)->lock, 0); \
1374 }
1375
1376 #define PMAP_LOCK(pmap) { \
1377 pmap_simple_lock(&(pmap)->lock); \
1378 }
1379
1380 #define PMAP_UNLOCK(pmap) { \
1381 pmap_simple_unlock(&(pmap)->lock); \
1382 }
1383
1384 #if MACH_ASSERT
1385 #define PMAP_ASSERT_LOCKED(pmap) { \
1386 simple_lock_assert(&(pmap)->lock, LCK_ASSERT_OWNED); \
1387 }
1388 #else
1389 #define PMAP_ASSERT_LOCKED(pmap)
1390 #endif
1391
1392 #if defined(__arm64__)
1393 #define PVH_LOCK_WORD 1 /* Assumes little-endian */
1394 #else
1395 #define PVH_LOCK_WORD 0
1396 #endif
1397
1398 #define ASSERT_PVH_LOCKED(index) \
1399 do { \
1400 assert((vm_offset_t)(pv_head_table[index]) & PVH_FLAG_LOCK); \
1401 } while (0)
1402
1403 #define LOCK_PVH(index) \
1404 do { \
1405 pmap_lock_bit((uint32_t*)(&pv_head_table[index]) + PVH_LOCK_WORD, PVH_LOCK_BIT - (PVH_LOCK_WORD * 32)); \
1406 } while (0)
1407
1408 #define UNLOCK_PVH(index) \
1409 do { \
1410 ASSERT_PVH_LOCKED(index); \
1411 pmap_unlock_bit((uint32_t*)(&pv_head_table[index]) + PVH_LOCK_WORD, PVH_LOCK_BIT - (PVH_LOCK_WORD * 32)); \
1412 } while (0)
1413
1414 #define PMAP_UPDATE_TLBS(pmap, s, e, strong) { \
1415 pmap_get_pt_ops(pmap)->flush_tlb_region_async(s, (unsigned)(e - s), pmap); \
1416 pmap_sync_tlb(strong); \
1417 }
1418
1419 #define FLUSH_PTE_RANGE(spte, epte) \
1420 __builtin_arm_dmb(DMB_ISH);
1421
1422 #define FLUSH_PTE(pte_p) \
1423 __builtin_arm_dmb(DMB_ISH);
1424
1425 #define FLUSH_PTE_STRONG(pte_p) \
1426 __builtin_arm_dsb(DSB_ISH);
1427
1428 #define FLUSH_PTE_RANGE_STRONG(spte, epte) \
1429 __builtin_arm_dsb(DSB_ISH);
1430
1431 #define WRITE_PTE_FAST(pte_p, pte_entry) \
1432 __unreachable_ok_push \
1433 if (TEST_PAGE_RATIO_4) { \
1434 if (((unsigned)(pte_p)) & 0x1f) { \
1435 panic("%s: WRITE_PTE_FAST is unaligned, " \
1436 "pte_p=%p, pte_entry=%p", \
1437 __FUNCTION__, \
1438 pte_p, (void*)pte_entry); \
1439 } \
1440 if (((pte_entry) & ~ARM_PTE_COMPRESSED_MASK) == ARM_PTE_EMPTY) { \
1441 *(pte_p) = (pte_entry); \
1442 *((pte_p)+1) = (pte_entry); \
1443 *((pte_p)+2) = (pte_entry); \
1444 *((pte_p)+3) = (pte_entry); \
1445 } else { \
1446 *(pte_p) = (pte_entry); \
1447 *((pte_p)+1) = (pte_entry) | 0x1000; \
1448 *((pte_p)+2) = (pte_entry) | 0x2000; \
1449 *((pte_p)+3) = (pte_entry) | 0x3000; \
1450 } \
1451 } else { \
1452 *(pte_p) = (pte_entry); \
1453 } \
1454 __unreachable_ok_pop
1455
1456 #define WRITE_PTE(pte_p, pte_entry) \
1457 WRITE_PTE_FAST(pte_p, pte_entry); \
1458 FLUSH_PTE(pte_p);
1459
1460 #define WRITE_PTE_STRONG(pte_p, pte_entry) \
1461 WRITE_PTE_FAST(pte_p, pte_entry); \
1462 FLUSH_PTE_STRONG(pte_p);
1463
1464 /*
1465 * Other useful macros.
1466 */
1467 #define current_pmap() \
1468 (vm_map_pmap(current_thread()->map))
1469
1470 #if XNU_MONITOR
1471 /*
1472 * PPL-related macros.
1473 */
1474 #define ARRAY_ELEM_PTR_IS_VALID(_ptr_, _elem_size_, _array_begin_, _array_end_) \
1475 (((_ptr_) >= (typeof(_ptr_))_array_begin_) && \
1476 ((_ptr_) < (typeof(_ptr_))_array_end_) && \
1477 !((((void *)(_ptr_)) - ((void *)_array_begin_)) % (_elem_size_)))
1478
1479 #define PMAP_PTR_IS_VALID(x) ARRAY_ELEM_PTR_IS_VALID(x, sizeof(struct pmap), pmap_array_begin, pmap_array_end)
1480
1481 #define USER_PMAP_IS_VALID(x) (PMAP_PTR_IS_VALID(x) && (os_atomic_load(&(x)->ref_count, relaxed) > 0))
1482
1483 #define VALIDATE_USER_PMAP(x) \
1484 if (__improbable(!USER_PMAP_IS_VALID(x))) \
1485 panic("%s: invalid pmap %p", __func__, (x));
1486
1487 #define VALIDATE_PMAP(x) \
1488 if (__improbable(((x) != kernel_pmap) && !USER_PMAP_IS_VALID(x))) \
1489 panic("%s: invalid pmap %p", __func__, (x));
1490
1491 #define VALIDATE_LEDGER_PTR(x) \
1492 if (__improbable(!ARRAY_ELEM_PTR_IS_VALID(x, sizeof(void *), pmap_ledger_ptr_array_begin, pmap_ledger_ptr_array_end))) \
1493 panic("%s: invalid ledger ptr %p", __func__, (x));
1494
1495 #define ARRAY_ELEM_INDEX(x, _elem_size_, _array_begin_) ((uint64_t)((((void *)(x)) - (_array_begin_)) / (_elem_size_)))
1496
1497 static uint64_t
1498 pmap_ledger_validate(void * ledger)
1499 {
1500 uint64_t array_index;
1501 pmap_ledger_t ** ledger_ptr_array_ptr = ((pmap_ledger_t*)ledger)->back_ptr;
1502 VALIDATE_LEDGER_PTR(ledger_ptr_array_ptr);
1503 array_index = ARRAY_ELEM_INDEX(ledger_ptr_array_ptr, sizeof(pmap_ledger_t *), pmap_ledger_ptr_array_begin);
1504
1505 if (array_index >= MAX_PMAP_LEDGERS) {
1506 panic("%s: ledger %p array index invalid, index was %#llx", __func__, ledger, array_index);
1507 }
1508
1509 pmap_ledger_t *ledger_ptr = *ledger_ptr_array_ptr;
1510
1511 if (__improbable(ledger_ptr != ledger)) {
1512 panic("%s: ledger pointer mismatch, %p != %p", __func__, ledger, ledger_ptr);
1513 }
1514
1515 return array_index;
1516 }
1517
1518 #else /* XNU_MONITOR */
1519
1520 #define VALIDATE_USER_PMAP(x)
1521 #define VALIDATE_PMAP(x)
1522 #define VALIDATE_LEDGER(x)
1523
1524 #endif
1525
1526 #if DEVELOPMENT || DEBUG
1527
1528 /*
1529 * Trace levels are controlled by a bitmask in which each
1530 * level can be enabled/disabled by the (1<<level) position
1531 * in the boot arg
1532 * Level 1: pmap lifecycle (create/destroy/switch)
1533 * Level 2: mapping lifecycle (enter/remove/protect/nest/unnest)
1534 * Level 3: internal state management (tte/attributes/fast-fault)
1535 */
1536
1537 SECURITY_READ_ONLY_LATE(unsigned int) pmap_trace_mask = 0;
1538
1539 #define PMAP_TRACE(level, ...) \
1540 if (__improbable((1 << (level)) & pmap_trace_mask)) { \
1541 KDBG_RELEASE(__VA_ARGS__); \
1542 }
1543 #else
1544
1545 #define PMAP_TRACE(level, ...)
1546
1547 #endif
1548
1549
1550 /*
1551 * Internal function prototypes (forward declarations).
1552 */
1553
1554 static void pv_init(
1555 void);
1556
1557 static boolean_t pv_alloc(
1558 pmap_t pmap,
1559 unsigned int pai,
1560 pv_entry_t **pvepp);
1561
1562 static void pv_free(
1563 pv_entry_t *pvep);
1564
1565 static void pv_list_free(
1566 pv_entry_t *pvehp,
1567 pv_entry_t *pvetp,
1568 unsigned int cnt);
1569
1570 static void ptd_bootstrap(
1571 pt_desc_t *ptdp, unsigned int ptd_cnt);
1572
1573 static inline pt_desc_t *ptd_alloc_unlinked(bool reclaim);
1574
1575 static pt_desc_t *ptd_alloc(pmap_t pmap, bool reclaim);
1576
1577 static void ptd_deallocate(pt_desc_t *ptdp);
1578
1579 static void ptd_init(
1580 pt_desc_t *ptdp, pmap_t pmap, vm_map_address_t va, unsigned int ttlevel, pt_entry_t * pte_p);
1581
1582 static void pmap_zone_init(
1583 void);
1584
1585 static void pmap_set_reference(
1586 ppnum_t pn);
1587
1588 ppnum_t pmap_vtophys(
1589 pmap_t pmap, addr64_t va);
1590
1591 void pmap_switch_user_ttb(
1592 pmap_t pmap);
1593
1594 static kern_return_t pmap_expand(
1595 pmap_t, vm_map_address_t, unsigned int options, unsigned int level);
1596
1597 static int pmap_remove_range(
1598 pmap_t, vm_map_address_t, pt_entry_t *, pt_entry_t *, uint32_t *);
1599
1600 static int pmap_remove_range_options(
1601 pmap_t, vm_map_address_t, pt_entry_t *, pt_entry_t *, uint32_t *, bool *, int);
1602
1603 static tt_entry_t *pmap_tt1_allocate(
1604 pmap_t, vm_size_t, unsigned int);
1605
1606 #define PMAP_TT_ALLOCATE_NOWAIT 0x1
1607
1608 static void pmap_tt1_deallocate(
1609 pmap_t, tt_entry_t *, vm_size_t, unsigned int);
1610
1611 #define PMAP_TT_DEALLOCATE_NOBLOCK 0x1
1612
1613 static kern_return_t pmap_tt_allocate(
1614 pmap_t, tt_entry_t **, unsigned int, unsigned int);
1615
1616 #define PMAP_TT_ALLOCATE_NOWAIT 0x1
1617
1618 static void pmap_tte_deallocate(
1619 pmap_t, tt_entry_t *, unsigned int);
1620
1621 #ifdef __ARM64_PMAP_SUBPAGE_L1__
1622 #if (__ARM_VMSA__ <= 7)
1623 #error This is not supported for old-style page tables
1624 #endif /* (__ARM_VMSA__ <= 7) */
1625 #define PMAP_ROOT_ALLOC_SIZE (((ARM_TT_L1_INDEX_MASK >> ARM_TT_L1_SHIFT) + 1) * sizeof(tt_entry_t))
1626 #else /* !defined(__ARM64_PMAP_SUBPAGE_L1__) */
1627 #if (__ARM_VMSA__ <= 7)
1628 #define PMAP_ROOT_ALLOC_SIZE (ARM_PGBYTES * 2)
1629 #else /* (__ARM_VMSA__ > 7) */
1630 #define PMAP_ROOT_ALLOC_SIZE (ARM_PGBYTES)
1631 #endif /* (__ARM_VMSA__ > 7) */
1632 #endif /* !defined(__ARM64_PMAP_SUBPAGE_L1__) */
1633
1634 const unsigned int arm_hardware_page_size = ARM_PGBYTES;
1635 const unsigned int arm_pt_desc_size = sizeof(pt_desc_t);
1636 const unsigned int arm_pt_root_size = PMAP_ROOT_ALLOC_SIZE;
1637
1638 #define PMAP_TT_DEALLOCATE_NOBLOCK 0x1
1639
1640 #if (__ARM_VMSA__ > 7)
1641
1642 static inline tt_entry_t *pmap_tt1e(
1643 pmap_t, vm_map_address_t);
1644
1645 static inline tt_entry_t *pmap_tt2e(
1646 pmap_t, vm_map_address_t);
1647
1648 static inline pt_entry_t *pmap_tt3e(
1649 pmap_t, vm_map_address_t);
1650
1651 static inline pt_entry_t *pmap_ttne(
1652 pmap_t, unsigned int, vm_map_address_t);
1653
1654 static void pmap_unmap_sharedpage(
1655 pmap_t pmap);
1656
1657 static boolean_t
1658 pmap_is_64bit(pmap_t);
1659
1660
1661 #endif
1662 static inline tt_entry_t *pmap_tte(
1663 pmap_t, vm_map_address_t);
1664
1665 static inline pt_entry_t *pmap_pte(
1666 pmap_t, vm_map_address_t);
1667
1668 static void pmap_update_cache_attributes_locked(
1669 ppnum_t, unsigned);
1670
1671 boolean_t arm_clear_fast_fault(
1672 ppnum_t ppnum,
1673 vm_prot_t fault_type);
1674
1675 static pmap_paddr_t pmap_pages_reclaim(
1676 void);
1677
1678 static kern_return_t pmap_pages_alloc(
1679 pmap_paddr_t *pa,
1680 unsigned size,
1681 unsigned option);
1682
1683 #define PMAP_PAGES_ALLOCATE_NOWAIT 0x1
1684 #define PMAP_PAGES_RECLAIM_NOWAIT 0x2
1685
1686 static void pmap_pages_free(
1687 pmap_paddr_t pa,
1688 unsigned size);
1689
1690 static void pmap_pin_kernel_pages(vm_offset_t kva, size_t nbytes);
1691
1692 static void pmap_unpin_kernel_pages(vm_offset_t kva, size_t nbytes);
1693
1694 static void pmap_trim_self(pmap_t pmap);
1695 static void pmap_trim_subord(pmap_t subord);
1696
1697 #if __APRR_SUPPORTED__
1698 static uint64_t pte_to_xprr_perm(pt_entry_t pte);
1699 static pt_entry_t xprr_perm_to_pte(uint64_t perm);
1700 #endif /* __APRR_SUPPORTED__*/
1701
1702 #if XNU_MONITOR
1703 static pmap_paddr_t pmap_alloc_page_for_kern(void);
1704 static void pmap_alloc_page_for_ppl(void);
1705
1706
1707 /*
1708 * This macro generates prototypes for the *_internal functions, which
1709 * represent the PPL interface. When the PPL is enabled, this will also
1710 * generate prototypes for the PPL entrypoints (*_ppl), as well as generating
1711 * the entrypoints.
1712 */
1713 #define GEN_ASM_NAME(__function_name) _##__function_name##_ppl
1714
1715 #define PMAP_SUPPORT_PROTOTYPES_WITH_ASM_INTERNAL(__return_type, __function_name, __function_args, __function_index, __assembly_function_name) \
1716 static __return_type __function_name##_internal __function_args; \
1717 extern __return_type __function_name##_ppl __function_args; \
1718 __asm__ (".text \n" \
1719 ".align 2 \n" \
1720 ".globl " #__assembly_function_name "\n" \
1721 #__assembly_function_name ":\n" \
1722 "mov x15, " #__function_index "\n" \
1723 "b _aprr_ppl_enter\n")
1724
1725 #define PMAP_SUPPORT_PROTOTYPES_WITH_ASM(__return_type, __function_name, __function_args, __function_index, __assembly_function_name) \
1726 PMAP_SUPPORT_PROTOTYPES_WITH_ASM_INTERNAL(__return_type, __function_name, __function_args, __function_index, __assembly_function_name)
1727
1728 #define PMAP_SUPPORT_PROTOTYPES(__return_type, __function_name, __function_args, __function_index) \
1729 PMAP_SUPPORT_PROTOTYPES_WITH_ASM(__return_type, __function_name, __function_args, __function_index, GEN_ASM_NAME(__function_name))
1730 #else /* XNU_MONITOR */
1731 #define PMAP_SUPPORT_PROTOTYPES(__return_type, __function_name, __function_args, __function_index) \
1732 static __return_type __function_name##_internal __function_args
1733 #endif /* XNU_MONITOR */
1734
1735 PMAP_SUPPORT_PROTOTYPES(
1736 kern_return_t,
1737 arm_fast_fault, (pmap_t pmap,
1738 vm_map_address_t va,
1739 vm_prot_t fault_type,
1740 bool was_af_fault,
1741 bool from_user), ARM_FAST_FAULT_INDEX);
1742
1743
1744 PMAP_SUPPORT_PROTOTYPES(
1745 boolean_t,
1746 arm_force_fast_fault, (ppnum_t ppnum,
1747 vm_prot_t allow_mode,
1748 int options), ARM_FORCE_FAST_FAULT_INDEX);
1749
1750 PMAP_SUPPORT_PROTOTYPES(
1751 kern_return_t,
1752 mapping_free_prime, (void), MAPPING_FREE_PRIME_INDEX);
1753
1754 PMAP_SUPPORT_PROTOTYPES(
1755 kern_return_t,
1756 mapping_replenish, (uint32_t kern_target_count, uint32_t user_target_count), MAPPING_REPLENISH_INDEX);
1757
1758 PMAP_SUPPORT_PROTOTYPES(
1759 boolean_t,
1760 pmap_batch_set_cache_attributes, (ppnum_t pn,
1761 unsigned int cacheattr,
1762 unsigned int page_cnt,
1763 unsigned int page_index,
1764 boolean_t doit,
1765 unsigned int *res), PMAP_BATCH_SET_CACHE_ATTRIBUTES_INDEX);
1766
1767 PMAP_SUPPORT_PROTOTYPES(
1768 void,
1769 pmap_change_wiring, (pmap_t pmap,
1770 vm_map_address_t v,
1771 boolean_t wired), PMAP_CHANGE_WIRING_INDEX);
1772
1773 PMAP_SUPPORT_PROTOTYPES(
1774 pmap_t,
1775 pmap_create_options, (ledger_t ledger,
1776 vm_map_size_t size,
1777 unsigned int flags), PMAP_CREATE_INDEX);
1778
1779 PMAP_SUPPORT_PROTOTYPES(
1780 void,
1781 pmap_destroy, (pmap_t pmap), PMAP_DESTROY_INDEX);
1782
1783 PMAP_SUPPORT_PROTOTYPES(
1784 kern_return_t,
1785 pmap_enter_options, (pmap_t pmap,
1786 vm_map_address_t v,
1787 ppnum_t pn,
1788 vm_prot_t prot,
1789 vm_prot_t fault_type,
1790 unsigned int flags,
1791 boolean_t wired,
1792 unsigned int options), PMAP_ENTER_OPTIONS_INDEX);
1793
1794 PMAP_SUPPORT_PROTOTYPES(
1795 vm_offset_t,
1796 pmap_extract, (pmap_t pmap,
1797 vm_map_address_t va), PMAP_EXTRACT_INDEX);
1798
1799 PMAP_SUPPORT_PROTOTYPES(
1800 ppnum_t,
1801 pmap_find_phys, (pmap_t pmap,
1802 addr64_t va), PMAP_FIND_PHYS_INDEX);
1803
1804 #if (__ARM_VMSA__ > 7)
1805 PMAP_SUPPORT_PROTOTYPES(
1806 kern_return_t,
1807 pmap_insert_sharedpage, (pmap_t pmap), PMAP_INSERT_SHAREDPAGE_INDEX);
1808 #endif
1809
1810
1811 PMAP_SUPPORT_PROTOTYPES(
1812 boolean_t,
1813 pmap_is_empty, (pmap_t pmap,
1814 vm_map_offset_t va_start,
1815 vm_map_offset_t va_end), PMAP_IS_EMPTY_INDEX);
1816
1817
1818 PMAP_SUPPORT_PROTOTYPES(
1819 unsigned int,
1820 pmap_map_cpu_windows_copy, (ppnum_t pn,
1821 vm_prot_t prot,
1822 unsigned int wimg_bits), PMAP_MAP_CPU_WINDOWS_COPY_INDEX);
1823
1824 PMAP_SUPPORT_PROTOTYPES(
1825 kern_return_t,
1826 pmap_nest, (pmap_t grand,
1827 pmap_t subord,
1828 addr64_t vstart,
1829 addr64_t nstart,
1830 uint64_t size), PMAP_NEST_INDEX);
1831
1832 PMAP_SUPPORT_PROTOTYPES(
1833 void,
1834 pmap_page_protect_options, (ppnum_t ppnum,
1835 vm_prot_t prot,
1836 unsigned int options), PMAP_PAGE_PROTECT_OPTIONS_INDEX);
1837
1838 PMAP_SUPPORT_PROTOTYPES(
1839 void,
1840 pmap_protect_options, (pmap_t pmap,
1841 vm_map_address_t start,
1842 vm_map_address_t end,
1843 vm_prot_t prot,
1844 unsigned int options,
1845 void *args), PMAP_PROTECT_OPTIONS_INDEX);
1846
1847 PMAP_SUPPORT_PROTOTYPES(
1848 kern_return_t,
1849 pmap_query_page_info, (pmap_t pmap,
1850 vm_map_offset_t va,
1851 int *disp_p), PMAP_QUERY_PAGE_INFO_INDEX);
1852
1853 PMAP_SUPPORT_PROTOTYPES(
1854 mach_vm_size_t,
1855 pmap_query_resident, (pmap_t pmap,
1856 vm_map_address_t start,
1857 vm_map_address_t end,
1858 mach_vm_size_t * compressed_bytes_p), PMAP_QUERY_RESIDENT_INDEX);
1859
1860 PMAP_SUPPORT_PROTOTYPES(
1861 void,
1862 pmap_reference, (pmap_t pmap), PMAP_REFERENCE_INDEX);
1863
1864 PMAP_SUPPORT_PROTOTYPES(
1865 int,
1866 pmap_remove_options, (pmap_t pmap,
1867 vm_map_address_t start,
1868 vm_map_address_t end,
1869 int options), PMAP_REMOVE_OPTIONS_INDEX);
1870
1871 PMAP_SUPPORT_PROTOTYPES(
1872 kern_return_t,
1873 pmap_return, (boolean_t do_panic,
1874 boolean_t do_recurse), PMAP_RETURN_INDEX);
1875
1876 PMAP_SUPPORT_PROTOTYPES(
1877 void,
1878 pmap_set_cache_attributes, (ppnum_t pn,
1879 unsigned int cacheattr), PMAP_SET_CACHE_ATTRIBUTES_INDEX);
1880
1881 PMAP_SUPPORT_PROTOTYPES(
1882 void,
1883 pmap_update_compressor_page, (ppnum_t pn,
1884 unsigned int prev_cacheattr, unsigned int new_cacheattr), PMAP_UPDATE_COMPRESSOR_PAGE_INDEX);
1885
1886 PMAP_SUPPORT_PROTOTYPES(
1887 void,
1888 pmap_set_nested, (pmap_t pmap), PMAP_SET_NESTED_INDEX);
1889
1890 #if MACH_ASSERT || XNU_MONITOR
1891 PMAP_SUPPORT_PROTOTYPES(
1892 void,
1893 pmap_set_process, (pmap_t pmap,
1894 int pid,
1895 char *procname), PMAP_SET_PROCESS_INDEX);
1896 #endif
1897
1898 PMAP_SUPPORT_PROTOTYPES(
1899 void,
1900 pmap_unmap_cpu_windows_copy, (unsigned int index), PMAP_UNMAP_CPU_WINDOWS_COPY_INDEX);
1901
1902 PMAP_SUPPORT_PROTOTYPES(
1903 kern_return_t,
1904 pmap_unnest_options, (pmap_t grand,
1905 addr64_t vaddr,
1906 uint64_t size,
1907 unsigned int option), PMAP_UNNEST_OPTIONS_INDEX);
1908
1909 #if XNU_MONITOR
1910 PMAP_SUPPORT_PROTOTYPES(
1911 void,
1912 pmap_cpu_data_init, (unsigned int cpu_number), PMAP_CPU_DATA_INIT_INDEX);
1913 #endif
1914
1915 PMAP_SUPPORT_PROTOTYPES(
1916 void,
1917 phys_attribute_set, (ppnum_t pn,
1918 unsigned int bits), PHYS_ATTRIBUTE_SET_INDEX);
1919
1920 #if XNU_MONITOR
1921 PMAP_SUPPORT_PROTOTYPES(
1922 void,
1923 pmap_mark_page_as_ppl_page, (pmap_paddr_t pa), PMAP_MARK_PAGE_AS_PMAP_PAGE_INDEX);
1924 #endif
1925
1926 PMAP_SUPPORT_PROTOTYPES(
1927 void,
1928 phys_attribute_clear, (ppnum_t pn,
1929 unsigned int bits,
1930 int options,
1931 void *arg), PHYS_ATTRIBUTE_CLEAR_INDEX);
1932
1933 PMAP_SUPPORT_PROTOTYPES(
1934 void,
1935 pmap_switch, (pmap_t pmap), PMAP_SWITCH_INDEX);
1936
1937 PMAP_SUPPORT_PROTOTYPES(
1938 void,
1939 pmap_switch_user_ttb, (pmap_t pmap), PMAP_SWITCH_USER_TTB_INDEX);
1940
1941 PMAP_SUPPORT_PROTOTYPES(
1942 void,
1943 pmap_clear_user_ttb, (void), PMAP_CLEAR_USER_TTB_INDEX);
1944
1945 #if XNU_MONITOR
1946 PMAP_SUPPORT_PROTOTYPES(
1947 uint64_t,
1948 pmap_release_ppl_pages_to_kernel, (void), PMAP_RELEASE_PAGES_TO_KERNEL_INDEX);
1949 #endif
1950
1951 PMAP_SUPPORT_PROTOTYPES(
1952 void,
1953 pmap_set_jit_entitled, (pmap_t pmap), PMAP_SET_JIT_ENTITLED_INDEX);
1954
1955 PMAP_SUPPORT_PROTOTYPES(
1956 void,
1957 pmap_trim, (pmap_t grand,
1958 pmap_t subord,
1959 addr64_t vstart,
1960 addr64_t nstart,
1961 uint64_t size), PMAP_TRIM_INDEX);
1962
1963 #if HAS_APPLE_PAC && XNU_MONITOR
1964 PMAP_SUPPORT_PROTOTYPES(
1965 void *,
1966 pmap_sign_user_ptr, (void *value, ptrauth_key key, uint64_t discriminator), PMAP_SIGN_USER_PTR);
1967 PMAP_SUPPORT_PROTOTYPES(
1968 void *,
1969 pmap_auth_user_ptr, (void *value, ptrauth_key key, uint64_t discriminator), PMAP_AUTH_USER_PTR);
1970 #endif /* HAS_APPLE_PAC && XNU_MONITOR */
1971
1972
1973
1974
1975 #if XNU_MONITOR
1976 static void pmap_mark_page_as_ppl_page(pmap_paddr_t pa);
1977 #endif
1978
1979 void pmap_footprint_suspend(vm_map_t map,
1980 boolean_t suspend);
1981 PMAP_SUPPORT_PROTOTYPES(
1982 void,
1983 pmap_footprint_suspend, (vm_map_t map,
1984 boolean_t suspend),
1985 PMAP_FOOTPRINT_SUSPEND_INDEX);
1986
1987 #if XNU_MONITOR
1988 PMAP_SUPPORT_PROTOTYPES(
1989 void,
1990 pmap_ledger_alloc_init, (size_t),
1991 PMAP_LEDGER_ALLOC_INIT_INDEX);
1992
1993 PMAP_SUPPORT_PROTOTYPES(
1994 ledger_t,
1995 pmap_ledger_alloc, (void),
1996 PMAP_LEDGER_ALLOC_INDEX);
1997
1998 PMAP_SUPPORT_PROTOTYPES(
1999 void,
2000 pmap_ledger_free, (ledger_t),
2001 PMAP_LEDGER_FREE_INDEX);
2002 #endif
2003
2004 #if CONFIG_PGTRACE
2005 boolean_t pgtrace_enabled = 0;
2006
2007 typedef struct {
2008 queue_chain_t chain;
2009
2010 /*
2011 * pmap - pmap for below addresses
2012 * ova - original va page address
2013 * cva - clone va addresses for pre, target and post pages
2014 * cva_spte - clone saved ptes
2015 * range - trace range in this map
2016 * cloned - has been cloned or not
2017 */
2018 pmap_t pmap;
2019 vm_map_offset_t ova;
2020 vm_map_offset_t cva[3];
2021 pt_entry_t cva_spte[3];
2022 struct {
2023 pmap_paddr_t start;
2024 pmap_paddr_t end;
2025 } range;
2026 bool cloned;
2027 } pmap_pgtrace_map_t;
2028
2029 static void pmap_pgtrace_init(void);
2030 static bool pmap_pgtrace_enter_clone(pmap_t pmap, vm_map_offset_t va_page, vm_map_offset_t start, vm_map_offset_t end);
2031 static void pmap_pgtrace_remove_clone(pmap_t pmap, pmap_paddr_t pa_page, vm_map_offset_t va_page);
2032 static void pmap_pgtrace_remove_all_clone(pmap_paddr_t pa);
2033 #endif
2034
2035 #if (__ARM_VMSA__ > 7)
2036 /*
2037 * The low global vector page is mapped at a fixed alias.
2038 * Since the page size is 16k for H8 and newer we map the globals to a 16k
2039 * aligned address. Readers of the globals (e.g. lldb, panic server) need
2040 * to check both addresses anyway for backward compatibility. So for now
2041 * we leave H6 and H7 where they were.
2042 */
2043 #if (ARM_PGSHIFT == 14)
2044 #define LOWGLOBAL_ALIAS (LOW_GLOBAL_BASE_ADDRESS + 0x4000)
2045 #else
2046 #define LOWGLOBAL_ALIAS (LOW_GLOBAL_BASE_ADDRESS + 0x2000)
2047 #endif
2048
2049 #else
2050 #define LOWGLOBAL_ALIAS (0xFFFF1000)
2051 #endif
2052
2053 long long alloc_tteroot_count __attribute__((aligned(8))) MARK_AS_PMAP_DATA = 0LL;
2054 long long alloc_ttepages_count __attribute__((aligned(8))) MARK_AS_PMAP_DATA = 0LL;
2055 long long alloc_ptepages_count __attribute__((aligned(8))) MARK_AS_PMAP_DATA = 0LL;
2056 long long alloc_pmap_pages_count __attribute__((aligned(8))) = 0LL;
2057
2058 int pt_fake_zone_index = -1; /* index of pmap fake zone */
2059
2060 #if XNU_MONITOR
2061 /*
2062 * Table of function pointers used for PPL dispatch.
2063 */
2064 const void * const ppl_handler_table[PMAP_COUNT] = {
2065 [ARM_FAST_FAULT_INDEX] = arm_fast_fault_internal,
2066 [ARM_FORCE_FAST_FAULT_INDEX] = arm_force_fast_fault_internal,
2067 [MAPPING_FREE_PRIME_INDEX] = mapping_free_prime_internal,
2068 [MAPPING_REPLENISH_INDEX] = mapping_replenish_internal,
2069 [PHYS_ATTRIBUTE_CLEAR_INDEX] = phys_attribute_clear_internal,
2070 [PHYS_ATTRIBUTE_SET_INDEX] = phys_attribute_set_internal,
2071 [PMAP_BATCH_SET_CACHE_ATTRIBUTES_INDEX] = pmap_batch_set_cache_attributes_internal,
2072 [PMAP_CHANGE_WIRING_INDEX] = pmap_change_wiring_internal,
2073 [PMAP_CREATE_INDEX] = pmap_create_options_internal,
2074 [PMAP_DESTROY_INDEX] = pmap_destroy_internal,
2075 [PMAP_ENTER_OPTIONS_INDEX] = pmap_enter_options_internal,
2076 [PMAP_EXTRACT_INDEX] = pmap_extract_internal,
2077 [PMAP_FIND_PHYS_INDEX] = pmap_find_phys_internal,
2078 [PMAP_INSERT_SHAREDPAGE_INDEX] = pmap_insert_sharedpage_internal,
2079 [PMAP_IS_EMPTY_INDEX] = pmap_is_empty_internal,
2080 [PMAP_MAP_CPU_WINDOWS_COPY_INDEX] = pmap_map_cpu_windows_copy_internal,
2081 [PMAP_MARK_PAGE_AS_PMAP_PAGE_INDEX] = pmap_mark_page_as_ppl_page_internal,
2082 [PMAP_NEST_INDEX] = pmap_nest_internal,
2083 [PMAP_PAGE_PROTECT_OPTIONS_INDEX] = pmap_page_protect_options_internal,
2084 [PMAP_PROTECT_OPTIONS_INDEX] = pmap_protect_options_internal,
2085 [PMAP_QUERY_PAGE_INFO_INDEX] = pmap_query_page_info_internal,
2086 [PMAP_QUERY_RESIDENT_INDEX] = pmap_query_resident_internal,
2087 [PMAP_REFERENCE_INDEX] = pmap_reference_internal,
2088 [PMAP_REMOVE_OPTIONS_INDEX] = pmap_remove_options_internal,
2089 [PMAP_RETURN_INDEX] = pmap_return_internal,
2090 [PMAP_SET_CACHE_ATTRIBUTES_INDEX] = pmap_set_cache_attributes_internal,
2091 [PMAP_UPDATE_COMPRESSOR_PAGE_INDEX] = pmap_update_compressor_page_internal,
2092 [PMAP_SET_NESTED_INDEX] = pmap_set_nested_internal,
2093 [PMAP_SET_PROCESS_INDEX] = pmap_set_process_internal,
2094 [PMAP_SWITCH_INDEX] = pmap_switch_internal,
2095 [PMAP_SWITCH_USER_TTB_INDEX] = pmap_switch_user_ttb_internal,
2096 [PMAP_CLEAR_USER_TTB_INDEX] = pmap_clear_user_ttb_internal,
2097 [PMAP_UNMAP_CPU_WINDOWS_COPY_INDEX] = pmap_unmap_cpu_windows_copy_internal,
2098 [PMAP_UNNEST_OPTIONS_INDEX] = pmap_unnest_options_internal,
2099 [PMAP_FOOTPRINT_SUSPEND_INDEX] = pmap_footprint_suspend_internal,
2100 [PMAP_CPU_DATA_INIT_INDEX] = pmap_cpu_data_init_internal,
2101 [PMAP_RELEASE_PAGES_TO_KERNEL_INDEX] = pmap_release_ppl_pages_to_kernel_internal,
2102 [PMAP_SET_JIT_ENTITLED_INDEX] = pmap_set_jit_entitled_internal,
2103 [PMAP_TRIM_INDEX] = pmap_trim_internal,
2104 [PMAP_LEDGER_ALLOC_INIT_INDEX] = pmap_ledger_alloc_init_internal,
2105 [PMAP_LEDGER_ALLOC_INDEX] = pmap_ledger_alloc_internal,
2106 [PMAP_LEDGER_FREE_INDEX] = pmap_ledger_free_internal,
2107 #if HAS_APPLE_PAC && XNU_MONITOR
2108 [PMAP_SIGN_USER_PTR] = pmap_sign_user_ptr_internal,
2109 [PMAP_AUTH_USER_PTR] = pmap_auth_user_ptr_internal,
2110 #endif /* HAS_APPLE_PAC && XNU_MONITOR */
2111 };
2112
2113 static uint64_t
2114 pmap_get_ppl_cpu_id(void)
2115 {
2116 uint64_t mpidr_el1_value = 0;
2117
2118 /* We identify the CPU based on the constant bits of MPIDR_EL1. */
2119 MRS(mpidr_el1_value, "MPIDR_EL1");
2120
2121 #ifdef CPU_CLUSTER_OFFSETS
2122 uint64_t cluster_id = (mpidr_el1_value & MPIDR_AFF1_MASK) >> MPIDR_AFF1_SHIFT;
2123 assert(cluster_id < (sizeof(pmap_cluster_offsets) / sizeof(pmap_cluster_offsets[0])));
2124
2125 /* For multi-cluster configurations, AFF0 reflects the core number within the cluster. */
2126 mpidr_el1_value = (mpidr_el1_value & MPIDR_AFF0_MASK) + pmap_cluster_offsets[cluster_id];
2127 #else
2128 /*
2129 * AFF2 is not constant (it can change for e-core versus p-core on H9),
2130 * so mask it out.
2131 */
2132 mpidr_el1_value &= MPIDR_AFF0_MASK;
2133 #endif
2134
2135 if (mpidr_el1_value > MAX_CPUS) {
2136 panic("%s: mpidr_el1_value=%#llx > MAX_CPUS=%#x",
2137 __FUNCTION__, mpidr_el1_value, MAX_CPUS);
2138 }
2139
2140 return mpidr_el1_value;
2141 }
2142
2143
2144 #endif
2145
2146
2147 /*
2148 * Allocates and initializes a per-CPU data structure for the pmap.
2149 */
2150 MARK_AS_PMAP_TEXT static void
2151 pmap_cpu_data_init_internal(unsigned int cpu_number)
2152 {
2153 pmap_cpu_data_t * pmap_cpu_data = pmap_get_cpu_data();
2154
2155 #if XNU_MONITOR
2156 /* Verify cacheline-aligned */
2157 assert(((vm_offset_t)pmap_cpu_data & ((1 << L2_CLINE) - 1)) == 0);
2158 if (pmap_cpu_data->cpu_number != PMAP_INVALID_CPU_NUM) {
2159 panic("%s: pmap_cpu_data->cpu_number=%u, "
2160 "cpu_number=%u",
2161 __FUNCTION__, pmap_cpu_data->cpu_number,
2162 cpu_number);
2163 }
2164 #endif
2165 pmap_cpu_data->cpu_number = cpu_number;
2166 }
2167
2168 void
2169 pmap_cpu_data_init(void)
2170 {
2171 #if XNU_MONITOR
2172 pmap_cpu_data_init_ppl(cpu_number());
2173 #else
2174 pmap_cpu_data_init_internal(cpu_number());
2175 #endif
2176 }
2177
2178 static void
2179 pmap_cpu_data_array_init(void)
2180 {
2181 #if XNU_MONITOR
2182 unsigned int i = 0;
2183 pmap_paddr_t ppl_cpu_save_area_cur = 0;
2184 pt_entry_t template, *pte_p;
2185 vm_offset_t stack_va = (vm_offset_t)pmap_stacks_start + ARM_PGBYTES;
2186 assert((pmap_stacks_start != NULL) && (pmap_stacks_end != NULL));
2187 pmap_stacks_start_pa = avail_start;
2188
2189 for (i = 0; i < MAX_CPUS; i++) {
2190 for (vm_offset_t cur_va = stack_va; cur_va < (stack_va + PPL_STACK_SIZE); cur_va += ARM_PGBYTES) {
2191 assert(cur_va < (vm_offset_t)pmap_stacks_end);
2192 pte_p = pmap_pte(kernel_pmap, cur_va);
2193 assert(*pte_p == ARM_PTE_EMPTY);
2194 template = pa_to_pte(avail_start) | ARM_PTE_AF | ARM_PTE_SH(SH_OUTER_MEMORY) | ARM_PTE_TYPE |
2195 ARM_PTE_ATTRINDX(CACHE_ATTRINDX_DEFAULT) | xprr_perm_to_pte(XPRR_PPL_RW_PERM);
2196 #if __ARM_KERNEL_PROTECT__
2197 template |= ARM_PTE_NG;
2198 #endif /* __ARM_KERNEL_PROTECT__ */
2199 WRITE_PTE(pte_p, template);
2200 __builtin_arm_isb(ISB_SY);
2201 avail_start += ARM_PGBYTES;
2202 }
2203 #if KASAN
2204 kasan_map_shadow(stack_va, PPL_STACK_SIZE, false);
2205 #endif
2206 pmap_cpu_data_array[i].cpu_data.cpu_id = i;
2207 pmap_cpu_data_array[i].cpu_data.cpu_number = PMAP_INVALID_CPU_NUM;
2208 pmap_cpu_data_array[i].cpu_data.ppl_state = PPL_STATE_KERNEL;
2209 pmap_cpu_data_array[i].cpu_data.ppl_stack = (void*)(stack_va + PPL_STACK_SIZE);
2210 stack_va += (PPL_STACK_SIZE + ARM_PGBYTES);
2211 }
2212 sync_tlb_flush();
2213 pmap_stacks_end_pa = avail_start;
2214
2215 ppl_cpu_save_area_start = avail_start;
2216 ppl_cpu_save_area_end = ppl_cpu_save_area_start;
2217 ppl_cpu_save_area_cur = ppl_cpu_save_area_start;
2218
2219 for (i = 0; i < MAX_CPUS; i++) {
2220 while ((ppl_cpu_save_area_end - ppl_cpu_save_area_cur) < sizeof(arm_context_t)) {
2221 avail_start += PAGE_SIZE;
2222 ppl_cpu_save_area_end = avail_start;
2223 }
2224
2225 pmap_cpu_data_array[i].cpu_data.save_area = (arm_context_t *)phystokv(ppl_cpu_save_area_cur);
2226 ppl_cpu_save_area_cur += sizeof(arm_context_t);
2227 }
2228 #endif
2229
2230 pmap_cpu_data_init();
2231 }
2232
2233 pmap_cpu_data_t *
2234 pmap_get_cpu_data(void)
2235 {
2236 pmap_cpu_data_t * pmap_cpu_data = NULL;
2237
2238 #if XNU_MONITOR
2239 uint64_t cpu_id = 0;
2240
2241 cpu_id = pmap_get_ppl_cpu_id();
2242 pmap_cpu_data = &pmap_cpu_data_array[cpu_id].cpu_data;
2243
2244 if (pmap_cpu_data->cpu_id != cpu_id) {
2245 panic("%s: CPU ID mismatch, cpu_id=0x%#llx, pmap_cpu_data->cpu_id=%#llx",
2246 __FUNCTION__, cpu_id, pmap_cpu_data->cpu_id);
2247 }
2248 #else
2249 pmap_cpu_data = &getCpuDatap()->cpu_pmap_cpu_data;
2250 #endif
2251
2252 return pmap_cpu_data;
2253 }
2254
2255 #if XNU_MONITOR
2256 /*
2257 * pmap_set_range_xprr_perm takes a range (specified using start and end) that
2258 * falls within the physical aperture. All mappings within this range have
2259 * their protections changed from those specified by the expected_perm to those
2260 * specified by the new_perm.
2261 */
2262 static void
2263 pmap_set_range_xprr_perm(vm_address_t start,
2264 vm_address_t end,
2265 unsigned int expected_perm,
2266 unsigned int new_perm)
2267 {
2268 #if (__ARM_VMSA__ == 7)
2269 #error This function is not supported on older ARM hardware
2270 #else
2271 pmap_t pmap = NULL;
2272
2273 vm_address_t va = 0;
2274 vm_address_t tte_start = 0;
2275 vm_address_t tte_end = 0;
2276
2277 tt_entry_t *tte_p = NULL;
2278 pt_entry_t *pte_p = NULL;
2279 pt_entry_t *cpte_p = NULL;
2280 pt_entry_t *bpte_p = NULL;
2281 pt_entry_t *epte_p = NULL;
2282
2283 tt_entry_t tte = 0;
2284 pt_entry_t cpte = 0;
2285 pt_entry_t template = 0;
2286
2287 pmap = kernel_pmap;
2288
2289 va = start;
2290
2291 /*
2292 * Validate our arguments; any invalid argument will be grounds for a
2293 * panic.
2294 */
2295 if ((start | end) % ARM_PGBYTES) {
2296 panic("%s: start or end not page aligned, "
2297 "start=%p, end=%p, new_perm=%u, expected_perm=%u",
2298 __FUNCTION__,
2299 (void *)start, (void *)end, new_perm, expected_perm);
2300 }
2301
2302 if (start > end) {
2303 panic("%s: start > end, "
2304 "start=%p, end=%p, new_perm=%u, expected_perm=%u",
2305 __FUNCTION__,
2306 (void *)start, (void *)end, new_perm, expected_perm);
2307 }
2308
2309 if (start < gVirtBase) {
2310 panic("%s: start is before physical aperture, "
2311 "start=%p, end=%p, new_perm=%u, expected_perm=%u",
2312 __FUNCTION__,
2313 (void *)start, (void *)end, new_perm, expected_perm);
2314 }
2315
2316 if (end > static_memory_end) {
2317 panic("%s: end is after physical aperture, "
2318 "start=%p, end=%p, new_perm=%u, expected_perm=%u",
2319 __FUNCTION__,
2320 (void *)start, (void *)end, new_perm, expected_perm);
2321 }
2322
2323 if ((new_perm > XPRR_MAX_PERM) || (expected_perm > XPRR_MAX_PERM)) {
2324 panic("%s: invalid XPRR index, "
2325 "start=%p, end=%p, new_perm=%u, expected_perm=%u",
2326 __FUNCTION__,
2327 (void *)start, (void *)end, new_perm, expected_perm);
2328 }
2329
2330 /*
2331 * Walk over the PTEs for the given range, and set the protections on
2332 * those PTEs.
2333 */
2334 while (va < end) {
2335 tte_start = va;
2336 tte_end = ((va + pt_attr_twig_size(native_pt_attr)) & ~pt_attr_twig_offmask(native_pt_attr));
2337
2338 if (tte_end > end) {
2339 tte_end = end;
2340 }
2341
2342 tte_p = pmap_tte(pmap, va);
2343
2344 /*
2345 * The physical aperture should not have holes.
2346 * The physical aperture should be contiguous.
2347 * Do not make eye contact with the physical aperture.
2348 */
2349 if (tte_p == NULL) {
2350 panic("%s: physical aperture tte is NULL, "
2351 "start=%p, end=%p, new_perm=%u, expected_perm=%u",
2352 __FUNCTION__,
2353 (void *)start, (void *)end, new_perm, expected_perm);
2354 }
2355
2356 tte = *tte_p;
2357
2358 if ((tte & ARM_TTE_TYPE_MASK) == ARM_TTE_TYPE_TABLE) {
2359 /*
2360 * Walk over the given L3 page table page and update the
2361 * PTEs.
2362 */
2363 pte_p = (pt_entry_t *)ttetokv(tte);
2364 bpte_p = &pte_p[ptenum(va)];
2365 epte_p = bpte_p + ((tte_end - va) >> pt_attr_leaf_shift(native_pt_attr));
2366
2367 for (cpte_p = bpte_p; cpte_p < epte_p;
2368 cpte_p += PAGE_SIZE / ARM_PGBYTES, va += PAGE_SIZE) {
2369 int pai = (int)pa_index(pte_to_pa(*cpte_p));
2370 LOCK_PVH(pai);
2371 cpte = *cpte_p;
2372
2373 /*
2374 * Every PTE involved should be valid, should
2375 * not have the hint bit set, and should have
2376 * Every valid PTE involved should
2377 * not have the hint bit set and should have
2378 * the expected APRR index.
2379 */
2380 if ((cpte & ARM_PTE_TYPE_MASK) ==
2381 ARM_PTE_TYPE_FAULT) {
2382 panic("%s: physical aperture PTE is invalid, va=%p, "
2383 "start=%p, end=%p, new_perm=%u, expected_perm=%u",
2384 __FUNCTION__,
2385 (void *)va,
2386 (void *)start, (void *)end, new_perm, expected_perm);
2387 UNLOCK_PVH(pai);
2388 continue;
2389 }
2390
2391 if (cpte & ARM_PTE_HINT_MASK) {
2392 panic("%s: physical aperture PTE has hint bit set, va=%p, cpte=0x%llx, "
2393 "start=%p, end=%p, new_perm=%u, expected_perm=%u",
2394 __FUNCTION__,
2395 (void *)va, cpte,
2396 (void *)start, (void *)end, new_perm, expected_perm);
2397 }
2398
2399 if (pte_to_xprr_perm(cpte) != expected_perm) {
2400 panic("%s: perm=%llu does not match expected_perm, cpte=0x%llx, "
2401 "start=%p, end=%p, new_perm=%u, expected_perm=%u",
2402 __FUNCTION__,
2403 pte_to_xprr_perm(cpte), cpte,
2404 (void *)start, (void *)end, new_perm, expected_perm);
2405 }
2406
2407 template = cpte;
2408 template &= ~ARM_PTE_XPRR_MASK;
2409 template |= xprr_perm_to_pte(new_perm);
2410
2411 WRITE_PTE_STRONG(cpte_p, template);
2412 UNLOCK_PVH(pai);
2413 }
2414 } else {
2415 panic("%s: tte=0x%llx is not a table type entry, "
2416 "start=%p, end=%p, new_perm=%u, expected_perm=%u",
2417 __FUNCTION__,
2418 tte,
2419 (void *)start, (void *)end, new_perm, expected_perm);
2420 }
2421
2422 va = tte_end;
2423 }
2424
2425 PMAP_UPDATE_TLBS(pmap, start, end, false);
2426 #endif /* (__ARM_VMSA__ == 7) */
2427 }
2428
2429 /*
2430 * A convenience function for setting protections on a single page.
2431 */
2432 static inline void
2433 pmap_set_xprr_perm(vm_address_t page_kva,
2434 unsigned int expected_perm,
2435 unsigned int new_perm)
2436 {
2437 pmap_set_range_xprr_perm(page_kva, page_kva + PAGE_SIZE, expected_perm, new_perm);
2438 }
2439 #endif /* XNU_MONITOR */
2440
2441
2442 /* TODO */
2443 pmap_paddr_t
2444 pmap_pages_reclaim(
2445 void)
2446 {
2447 boolean_t found_page;
2448 unsigned i;
2449 pt_desc_t *ptdp;
2450
2451 /*
2452 * pmap_pages_reclaim() is returning a page by freeing an active pt page.
2453 * To be eligible, a pt page is assigned to a user pmap. It doesn't have any wired pte
2454 * entry and it contains at least one valid pte entry.
2455 *
2456 * In a loop, check for a page in the reclaimed pt page list.
2457 * if one is present, unlink that page and return the physical page address.
2458 * Otherwise, scan the pt page list for an eligible pt page to reclaim.
2459 * If found, invoke pmap_remove_range() on its pmap and address range then
2460 * deallocates that pt page. This will end up adding the pt page to the
2461 * reclaimed pt page list.
2462 * If no eligible page were found in the pt page list, panic.
2463 */
2464
2465 pmap_simple_lock(&pmap_pages_lock);
2466 pmap_pages_request_count++;
2467 pmap_pages_request_acum++;
2468
2469 while (1) {
2470 if (pmap_pages_reclaim_list != (page_free_entry_t *)NULL) {
2471 page_free_entry_t *page_entry;
2472
2473 page_entry = pmap_pages_reclaim_list;
2474 pmap_pages_reclaim_list = pmap_pages_reclaim_list->next;
2475 pmap_simple_unlock(&pmap_pages_lock);
2476
2477 return (pmap_paddr_t)ml_static_vtop((vm_offset_t)page_entry);
2478 }
2479
2480 pmap_simple_unlock(&pmap_pages_lock);
2481
2482 pmap_simple_lock(&pt_pages_lock);
2483 ptdp = (pt_desc_t *)queue_first(&pt_page_list);
2484 found_page = FALSE;
2485
2486 while (!queue_end(&pt_page_list, (queue_entry_t)ptdp)) {
2487 if ((ptdp->pmap->nested == FALSE)
2488 && (pmap_simple_lock_try(&ptdp->pmap->lock))) {
2489 assert(ptdp->pmap != kernel_pmap);
2490 unsigned refcnt_acc = 0;
2491 unsigned wiredcnt_acc = 0;
2492
2493 for (i = 0; i < PT_INDEX_MAX; i++) {
2494 if (ptdp->ptd_info[i].refcnt == PT_DESC_REFCOUNT) {
2495 /* Do not attempt to free a page that contains an L2 table */
2496 refcnt_acc = 0;
2497 break;
2498 }
2499 refcnt_acc += ptdp->ptd_info[i].refcnt;
2500 wiredcnt_acc += ptdp->ptd_info[i].wiredcnt;
2501 }
2502 if ((wiredcnt_acc == 0) && (refcnt_acc != 0)) {
2503 found_page = TRUE;
2504 /* Leave ptdp->pmap locked here. We're about to reclaim
2505 * a tt page from it, so we don't want anyone else messing
2506 * with it while we do that. */
2507 break;
2508 }
2509 pmap_simple_unlock(&ptdp->pmap->lock);
2510 }
2511 ptdp = (pt_desc_t *)queue_next((queue_t)ptdp);
2512 }
2513 if (!found_page) {
2514 panic("%s: No eligible page in pt_page_list", __FUNCTION__);
2515 } else {
2516 int remove_count = 0;
2517 bool need_strong_sync = false;
2518 vm_map_address_t va;
2519 pmap_t pmap;
2520 pt_entry_t *bpte, *epte;
2521 pt_entry_t *pte_p;
2522 tt_entry_t *tte_p;
2523 uint32_t rmv_spte = 0;
2524
2525 pmap_simple_unlock(&pt_pages_lock);
2526 pmap = ptdp->pmap;
2527 PMAP_ASSERT_LOCKED(pmap); // pmap lock should be held from loop above
2528
2529 __unused const pt_attr_t * const pt_attr = pmap_get_pt_attr(pmap);
2530
2531 for (i = 0; i < PT_INDEX_MAX; i++) {
2532 va = ptdp->ptd_info[i].va;
2533
2534 /* If the VA is bogus, this may represent an unallocated region
2535 * or one which is in transition (already being freed or expanded).
2536 * Don't try to remove mappings here. */
2537 if (va == (vm_offset_t)-1) {
2538 continue;
2539 }
2540
2541 tte_p = pmap_tte(pmap, va);
2542 if ((tte_p != (tt_entry_t *) NULL)
2543 && ((*tte_p & ARM_TTE_TYPE_MASK) == ARM_TTE_TYPE_TABLE)) {
2544 pte_p = (pt_entry_t *) ttetokv(*tte_p);
2545 bpte = &pte_p[pte_index(pmap, pt_attr, va)];
2546 epte = bpte + PAGE_SIZE / sizeof(pt_entry_t);
2547 /*
2548 * Use PMAP_OPTIONS_REMOVE to clear any
2549 * "compressed" markers and update the
2550 * "compressed" counter in pmap->stats.
2551 * This means that we lose accounting for
2552 * any compressed pages in this range
2553 * but the alternative is to not be able
2554 * to account for their future decompression,
2555 * which could cause the counter to drift
2556 * more and more.
2557 */
2558 remove_count += pmap_remove_range_options(
2559 pmap, va, bpte, epte,
2560 &rmv_spte, &need_strong_sync, PMAP_OPTIONS_REMOVE);
2561 if (ptdp->ptd_info[ARM_PT_DESC_INDEX(pte_p)].refcnt != 0) {
2562 panic("%s: ptdp %p, count %d", __FUNCTION__, ptdp, ptdp->ptd_info[ARM_PT_DESC_INDEX(pte_p)].refcnt);
2563 }
2564
2565 pmap_tte_deallocate(pmap, tte_p, PMAP_TT_TWIG_LEVEL);
2566
2567 if (remove_count > 0) {
2568 pmap_get_pt_ops(pmap)->flush_tlb_region_async(va, (unsigned int)pt_attr_leaf_table_size(pt_attr), pmap);
2569 } else {
2570 pmap_get_pt_ops(pmap)->flush_tlb_tte_async(va, pmap);
2571 }
2572 }
2573 }
2574 // Undo the lock we grabbed when we found ptdp above
2575 PMAP_UNLOCK(pmap);
2576 pmap_sync_tlb(need_strong_sync);
2577 }
2578 pmap_simple_lock(&pmap_pages_lock);
2579 }
2580 }
2581
2582 #if XNU_MONITOR
2583 /*
2584 * Return a PPL page to the free list.
2585 */
2586 static void
2587 pmap_give_free_ppl_page(pmap_paddr_t paddr)
2588 {
2589 assert((paddr & ARM_PGMASK) == 0);
2590 void ** new_head = (void **)phystokv(paddr);
2591 pmap_simple_lock(&pmap_ppl_free_page_lock);
2592
2593 void * cur_head = pmap_ppl_free_page_list;
2594 *new_head = cur_head;
2595 pmap_ppl_free_page_list = new_head;
2596 pmap_ppl_free_page_count++;
2597
2598 pmap_simple_unlock(&pmap_ppl_free_page_lock);
2599 }
2600
2601 /*
2602 * Get a PPL page from the free list.
2603 */
2604 static pmap_paddr_t
2605 pmap_get_free_ppl_page(void)
2606 {
2607 pmap_paddr_t result = 0;
2608
2609 pmap_simple_lock(&pmap_ppl_free_page_lock);
2610
2611 if (pmap_ppl_free_page_list != NULL) {
2612 void ** new_head = NULL;
2613 new_head = *((void**)pmap_ppl_free_page_list);
2614 result = kvtophys((vm_offset_t)pmap_ppl_free_page_list);
2615 pmap_ppl_free_page_list = new_head;
2616 pmap_ppl_free_page_count--;
2617 } else {
2618 result = 0L;
2619 }
2620
2621 pmap_simple_unlock(&pmap_ppl_free_page_lock);
2622 assert((result & ARM_PGMASK) == 0);
2623
2624 return result;
2625 }
2626
2627 /*
2628 * pmap_mark_page_as_ppl_page claims a page on behalf of the PPL by marking it
2629 * as PPL-owned and only allowing the PPL to write to it.
2630 */
2631 MARK_AS_PMAP_TEXT static void
2632 pmap_mark_page_as_ppl_page_internal(pmap_paddr_t pa)
2633 {
2634 vm_offset_t kva = 0;
2635 unsigned int pai = 0;
2636 pp_attr_t attr;
2637
2638 /*
2639 * Mark each page that we allocate as belonging to the monitor, as we
2640 * intend to use it for monitor-y stuff (page tables, table pages, that
2641 * sort of thing).
2642 */
2643 assert(!TEST_PAGE_RATIO_4);
2644
2645 if (!pa_valid(pa)) {
2646 panic("%s: bad address, "
2647 "pa=%p",
2648 __func__,
2649 (void *)pa);
2650 }
2651
2652 pai = (unsigned int)pa_index(pa);
2653 LOCK_PVH(pai);
2654
2655 /* A page that the PPL already owns can't be given to the PPL. */
2656 if (pa_test_monitor(pa)) {
2657 panic("%s: page already belongs to PPL, "
2658 "pa=0x%llx",
2659 __FUNCTION__,
2660 pa);
2661 }
2662 /* The page cannot be mapped outside of the physical aperture. */
2663 if (!pmap_verify_free((ppnum_t)atop(pa))) {
2664 panic("%s: page is not free, "
2665 "pa=0x%llx",
2666 __FUNCTION__,
2667 pa);
2668 }
2669
2670 do {
2671 attr = pp_attr_table[pai];
2672 if (attr & PP_ATTR_NO_MONITOR) {
2673 panic("%s: page excluded from PPL, "
2674 "pa=0x%llx",
2675 __FUNCTION__,
2676 pa);
2677 }
2678 } while (!OSCompareAndSwap16(attr, attr | PP_ATTR_MONITOR, &pp_attr_table[pai]));
2679
2680 UNLOCK_PVH(pai);
2681
2682 kva = phystokv(pa);
2683 pmap_set_xprr_perm(kva, XPRR_KERN_RW_PERM, XPRR_PPL_RW_PERM);
2684 bzero((void *)(kva & ~PAGE_MASK), PAGE_SIZE);
2685
2686 pmap_give_free_ppl_page(pa);
2687 }
2688
2689 static void
2690 pmap_mark_page_as_ppl_page(pmap_paddr_t pa)
2691 {
2692 pmap_mark_page_as_ppl_page_ppl(pa);
2693 }
2694
2695 static void
2696 pmap_mark_page_as_kernel_page(pmap_paddr_t pa)
2697 {
2698 vm_offset_t kva = 0;
2699 unsigned int pai = 0;
2700
2701 pai = (unsigned int)pa_index(pa);
2702 LOCK_PVH(pai);
2703
2704 if (!pa_test_monitor(pa)) {
2705 panic("%s: page is not a PPL page, "
2706 "pa=%p",
2707 __FUNCTION__,
2708 (void *)pa);
2709 }
2710
2711 pa_clear_monitor(pa);
2712 UNLOCK_PVH(pai);
2713
2714 kva = phystokv(pa);
2715 pmap_set_xprr_perm(kva, XPRR_PPL_RW_PERM, XPRR_KERN_RW_PERM);
2716 }
2717
2718 MARK_AS_PMAP_TEXT static pmap_paddr_t
2719 pmap_release_ppl_pages_to_kernel_internal(void)
2720 {
2721 pmap_paddr_t pa = 0;
2722
2723 if (pmap_ppl_free_page_count <= PMAP_MIN_FREE_PPL_PAGES) {
2724 goto done;
2725 }
2726
2727 pa = pmap_get_free_ppl_page();
2728
2729 if (!pa) {
2730 goto done;
2731 }
2732
2733 pmap_mark_page_as_kernel_page(pa);
2734
2735 done:
2736 return pa;
2737 }
2738
2739 static uint64_t
2740 pmap_release_ppl_pages_to_kernel(void)
2741 {
2742 pmap_paddr_t pa = 0;
2743 vm_page_t m = VM_PAGE_NULL;
2744 vm_page_t local_freeq = VM_PAGE_NULL;
2745 uint64_t pmap_ppl_pages_returned_to_kernel_count = 0;
2746
2747 while (pmap_ppl_free_page_count > PMAP_MIN_FREE_PPL_PAGES) {
2748 pa = pmap_release_ppl_pages_to_kernel_ppl();
2749
2750 if (!pa) {
2751 break;
2752 }
2753
2754 /* If we retrieved a page, add it to the free queue. */
2755 vm_object_lock(pmap_object);
2756 m = vm_page_lookup(pmap_object, (pa - gPhysBase));
2757 assert(m != VM_PAGE_NULL);
2758 assert(VM_PAGE_WIRED(m));
2759
2760 m->vmp_busy = TRUE;
2761 m->vmp_snext = local_freeq;
2762 local_freeq = m;
2763 pmap_ppl_pages_returned_to_kernel_count++;
2764 pmap_ppl_pages_returned_to_kernel_count_total++;
2765
2766 vm_object_unlock(pmap_object);
2767 }
2768
2769 if (local_freeq) {
2770 /* We need to hold the object lock for freeing pages. */
2771 vm_object_lock(pmap_object);
2772 vm_page_free_list(local_freeq, TRUE);
2773 vm_object_unlock(pmap_object);
2774 }
2775
2776 return pmap_ppl_pages_returned_to_kernel_count;
2777 }
2778 #endif
2779
2780 static kern_return_t
2781 pmap_pages_alloc(
2782 pmap_paddr_t *pa,
2783 unsigned size,
2784 unsigned option)
2785 {
2786 #if XNU_MONITOR
2787 if (size != PAGE_SIZE) {
2788 panic("%s: size != PAGE_SIZE, "
2789 "pa=%p, size=%u, option=%u",
2790 __FUNCTION__,
2791 pa, size, option);
2792 }
2793
2794 if (option & PMAP_PAGES_RECLAIM_NOWAIT) {
2795 *pa = pmap_pages_reclaim();
2796 assert(*pa);
2797 return KERN_SUCCESS;
2798 }
2799
2800 assert(option & PMAP_PAGES_ALLOCATE_NOWAIT);
2801
2802 *pa = pmap_get_free_ppl_page();
2803
2804 if (*pa == 0) {
2805 return KERN_RESOURCE_SHORTAGE;
2806 } else {
2807 return KERN_SUCCESS;
2808 }
2809 #else
2810 vm_page_t m = VM_PAGE_NULL, m_prev;
2811
2812 if (option & PMAP_PAGES_RECLAIM_NOWAIT) {
2813 assert(size == PAGE_SIZE);
2814 *pa = pmap_pages_reclaim();
2815 return KERN_SUCCESS;
2816 }
2817 if (size == PAGE_SIZE) {
2818 while ((m = vm_page_grab()) == VM_PAGE_NULL) {
2819 if (option & PMAP_PAGES_ALLOCATE_NOWAIT) {
2820 return KERN_RESOURCE_SHORTAGE;
2821 }
2822
2823 VM_PAGE_WAIT();
2824 }
2825 vm_page_lock_queues();
2826 vm_page_wire(m, VM_KERN_MEMORY_PTE, TRUE);
2827 vm_page_unlock_queues();
2828 }
2829 if (size == 2 * PAGE_SIZE) {
2830 while (cpm_allocate(size, &m, 0, 1, TRUE, 0) != KERN_SUCCESS) {
2831 if (option & PMAP_PAGES_ALLOCATE_NOWAIT) {
2832 return KERN_RESOURCE_SHORTAGE;
2833 }
2834
2835 VM_PAGE_WAIT();
2836 }
2837 }
2838
2839 *pa = (pmap_paddr_t)ptoa(VM_PAGE_GET_PHYS_PAGE(m));
2840
2841 vm_object_lock(pmap_object);
2842 while (m != VM_PAGE_NULL) {
2843 vm_page_insert_wired(m, pmap_object, (vm_object_offset_t) ((ptoa(VM_PAGE_GET_PHYS_PAGE(m))) - gPhysBase), VM_KERN_MEMORY_PTE);
2844 m_prev = m;
2845 m = NEXT_PAGE(m_prev);
2846 *(NEXT_PAGE_PTR(m_prev)) = VM_PAGE_NULL;
2847 }
2848 vm_object_unlock(pmap_object);
2849
2850 OSAddAtomic(size >> PAGE_SHIFT, &inuse_pmap_pages_count);
2851 OSAddAtomic64(size >> PAGE_SHIFT, &alloc_pmap_pages_count);
2852
2853 return KERN_SUCCESS;
2854 #endif
2855 }
2856
2857 #if XNU_MONITOR
2858 static pmap_paddr_t
2859 pmap_alloc_page_for_kern(void)
2860 {
2861 pmap_paddr_t paddr = 0;
2862 vm_page_t m, m_prev;
2863
2864 while ((m = vm_page_grab()) == VM_PAGE_NULL) {
2865 VM_PAGE_WAIT();
2866 }
2867
2868 vm_page_lock_queues();
2869 vm_page_wire(m, VM_KERN_MEMORY_PTE, TRUE);
2870 vm_page_unlock_queues();
2871
2872 paddr = (pmap_paddr_t)ptoa(VM_PAGE_GET_PHYS_PAGE(m));
2873
2874 if (paddr == 0) {
2875 panic("%s: paddr is 0",
2876 __FUNCTION__);
2877 }
2878
2879 vm_object_lock(pmap_object);
2880
2881 while (m != VM_PAGE_NULL) {
2882 vm_page_insert_wired(m, pmap_object, (vm_object_offset_t) ((ptoa(VM_PAGE_GET_PHYS_PAGE(m))) - gPhysBase), VM_KERN_MEMORY_PTE);
2883 m_prev = m;
2884 m = NEXT_PAGE(m_prev);
2885 *(NEXT_PAGE_PTR(m_prev)) = VM_PAGE_NULL;
2886 }
2887
2888 vm_object_unlock(pmap_object);
2889
2890 OSAddAtomic(1, &inuse_pmap_pages_count);
2891 OSAddAtomic64(1, &alloc_pmap_pages_count);
2892
2893 return paddr;
2894 }
2895
2896 static void
2897 pmap_alloc_page_for_ppl(void)
2898 {
2899 pmap_mark_page_as_ppl_page(pmap_alloc_page_for_kern());
2900 }
2901
2902 static pmap_t
2903 pmap_alloc_pmap(void)
2904 {
2905 pmap_t pmap = PMAP_NULL;
2906
2907 pmap_simple_lock(&pmap_free_list_lock);
2908
2909 if (pmap_free_list != PMAP_NULL) {
2910 pmap = pmap_free_list;
2911 pmap_free_list = *((pmap_t *)pmap);
2912
2913 if (!PMAP_PTR_IS_VALID(pmap)) {
2914 panic("%s: allocated pmap is not valid, pmap=%p",
2915 __FUNCTION__, pmap);
2916 }
2917 }
2918
2919 pmap_simple_unlock(&pmap_free_list_lock);
2920
2921 return pmap;
2922 }
2923
2924 static void
2925 pmap_free_pmap(pmap_t pmap)
2926 {
2927 if (!PMAP_PTR_IS_VALID(pmap)) {
2928 panic("%s: pmap is not valid, "
2929 "pmap=%p",
2930 __FUNCTION__,
2931 pmap);
2932 }
2933
2934 pmap_simple_lock(&pmap_free_list_lock);
2935 *((pmap_t *)pmap) = pmap_free_list;
2936 pmap_free_list = pmap;
2937 pmap_simple_unlock(&pmap_free_list_lock);
2938 }
2939
2940 static void
2941 pmap_bootstrap_pmap_free_list(void)
2942 {
2943 pmap_t cur_head = PMAP_NULL;
2944 unsigned long i = 0;
2945
2946 simple_lock_init(&pmap_free_list_lock, 0);
2947
2948 for (i = 0; i < pmap_array_count; i++) {
2949 *((pmap_t *)(&pmap_array[i])) = cur_head;
2950 cur_head = &pmap_array[i];
2951 }
2952
2953 pmap_free_list = cur_head;
2954 }
2955 #endif
2956
2957 static void
2958 pmap_pages_free(
2959 pmap_paddr_t pa,
2960 unsigned size)
2961 {
2962 pmap_simple_lock(&pmap_pages_lock);
2963
2964 if (pmap_pages_request_count != 0) {
2965 page_free_entry_t *page_entry;
2966
2967 pmap_pages_request_count--;
2968 page_entry = (page_free_entry_t *)phystokv(pa);
2969 page_entry->next = pmap_pages_reclaim_list;
2970 pmap_pages_reclaim_list = page_entry;
2971 pmap_simple_unlock(&pmap_pages_lock);
2972
2973 return;
2974 }
2975
2976 pmap_simple_unlock(&pmap_pages_lock);
2977
2978 #if XNU_MONITOR
2979 (void)size;
2980
2981 pmap_give_free_ppl_page(pa);
2982 #else
2983 vm_page_t m;
2984 pmap_paddr_t pa_max;
2985
2986 OSAddAtomic(-(size >> PAGE_SHIFT), &inuse_pmap_pages_count);
2987
2988 for (pa_max = pa + size; pa < pa_max; pa = pa + PAGE_SIZE) {
2989 vm_object_lock(pmap_object);
2990 m = vm_page_lookup(pmap_object, (pa - gPhysBase));
2991 assert(m != VM_PAGE_NULL);
2992 assert(VM_PAGE_WIRED(m));
2993 vm_page_lock_queues();
2994 vm_page_free(m);
2995 vm_page_unlock_queues();
2996 vm_object_unlock(pmap_object);
2997 }
2998 #endif
2999 }
3000
3001 static inline void
3002 PMAP_ZINFO_PALLOC(
3003 pmap_t pmap, int bytes)
3004 {
3005 pmap_ledger_credit(pmap, task_ledgers.tkm_private, bytes);
3006 }
3007
3008 static inline void
3009 PMAP_ZINFO_PFREE(
3010 pmap_t pmap,
3011 int bytes)
3012 {
3013 pmap_ledger_debit(pmap, task_ledgers.tkm_private, bytes);
3014 }
3015
3016 static inline void
3017 pmap_tt_ledger_credit(
3018 pmap_t pmap,
3019 vm_size_t size)
3020 {
3021 if (pmap != kernel_pmap) {
3022 pmap_ledger_credit(pmap, task_ledgers.phys_footprint, size);
3023 pmap_ledger_credit(pmap, task_ledgers.page_table, size);
3024 }
3025 }
3026
3027 static inline void
3028 pmap_tt_ledger_debit(
3029 pmap_t pmap,
3030 vm_size_t size)
3031 {
3032 if (pmap != kernel_pmap) {
3033 pmap_ledger_debit(pmap, task_ledgers.phys_footprint, size);
3034 pmap_ledger_debit(pmap, task_ledgers.page_table, size);
3035 }
3036 }
3037
3038 static bool
3039 alloc_asid(pmap_t pmap)
3040 {
3041 int vasid;
3042 uint16_t hw_asid;
3043
3044 pmap_simple_lock(&asid_lock);
3045 vasid = bitmap_first(&asid_bitmap[0], MAX_ASID);
3046 if (vasid < 0) {
3047 pmap_simple_unlock(&asid_lock);
3048 return false;
3049 }
3050 assert(vasid < MAX_ASID);
3051 bitmap_clear(&asid_bitmap[0], (unsigned int)vasid);
3052 pmap_simple_unlock(&asid_lock);
3053 // bitmap_first() returns highest-order bits first, but a 0-based scheme works
3054 // slightly better with the collision detection scheme used by pmap_switch_internal().
3055 vasid = MAX_ASID - 1 - vasid;
3056 hw_asid = vasid % MAX_HW_ASID;
3057 pmap->sw_asid = vasid / MAX_HW_ASID;
3058 hw_asid += 1; // Account for ASID 0, which is reserved for the kernel
3059 #if __ARM_KERNEL_PROTECT__
3060 hw_asid <<= 1; // We're really handing out 2 hardware ASIDs, one for EL0 and one for EL1 access
3061 #endif
3062 pmap->hw_asid = hw_asid;
3063 return true;
3064 }
3065
3066 static void
3067 free_asid(pmap_t pmap)
3068 {
3069 unsigned int vasid;
3070 uint16_t hw_asid = pmap->hw_asid;
3071 assert(hw_asid != 0); // Should not try to free kernel ASID
3072
3073 #if __ARM_KERNEL_PROTECT__
3074 hw_asid >>= 1;
3075 #endif
3076 hw_asid -= 1;
3077
3078 vasid = ((unsigned int)pmap->sw_asid * MAX_HW_ASID) + hw_asid;
3079 vasid = MAX_ASID - 1 - vasid;
3080
3081 pmap_simple_lock(&asid_lock);
3082 assert(!bitmap_test(&asid_bitmap[0], vasid));
3083 bitmap_set(&asid_bitmap[0], vasid);
3084 pmap_simple_unlock(&asid_lock);
3085 }
3086
3087
3088 #ifndef PMAP_PV_LOAD_FACTOR
3089 #define PMAP_PV_LOAD_FACTOR 1
3090 #endif
3091
3092 #define PV_LOW_WATER_MARK_DEFAULT (0x200 * PMAP_PV_LOAD_FACTOR)
3093 #define PV_KERN_LOW_WATER_MARK_DEFAULT (0x200 * PMAP_PV_LOAD_FACTOR)
3094 #define PV_ALLOC_CHUNK_INITIAL (0x200 * PMAP_PV_LOAD_FACTOR)
3095 #define PV_KERN_ALLOC_CHUNK_INITIAL (0x200 * PMAP_PV_LOAD_FACTOR)
3096 #define PV_ALLOC_INITIAL_TARGET (PV_ALLOC_CHUNK_INITIAL * 5)
3097 #define PV_KERN_ALLOC_INITIAL_TARGET (PV_KERN_ALLOC_CHUNK_INITIAL)
3098
3099 uint32_t pv_free_count MARK_AS_PMAP_DATA = 0;
3100 uint32_t pv_page_count MARK_AS_PMAP_DATA = 0;
3101 uint32_t pv_kern_free_count MARK_AS_PMAP_DATA = 0;
3102
3103 uint32_t pv_low_water_mark MARK_AS_PMAP_DATA;
3104 uint32_t pv_kern_low_water_mark MARK_AS_PMAP_DATA;
3105 uint32_t pv_alloc_chunk MARK_AS_PMAP_DATA;
3106 uint32_t pv_kern_alloc_chunk MARK_AS_PMAP_DATA;
3107
3108 thread_t mapping_replenish_thread;
3109 event_t mapping_replenish_event;
3110 volatile uint32_t mappingrecurse = 0;
3111
3112 unsigned pmap_mapping_thread_wakeups;
3113 unsigned pmap_reserve_replenish_stat MARK_AS_PMAP_DATA;
3114 unsigned pmap_kern_reserve_alloc_stat MARK_AS_PMAP_DATA;
3115
3116
3117 static void
3118 pv_init(
3119 void)
3120 {
3121 simple_lock_init(&pv_free_list_lock, 0);
3122 simple_lock_init(&pv_kern_free_list_lock, 0);
3123 pv_free_list = PV_ENTRY_NULL;
3124 pv_free_count = 0x0U;
3125 pv_kern_free_list = PV_ENTRY_NULL;
3126 pv_kern_free_count = 0x0U;
3127 }
3128
3129 static inline void PV_ALLOC(pv_entry_t **pv_ep);
3130 static inline void PV_KERN_ALLOC(pv_entry_t **pv_e);
3131 static inline void PV_FREE_LIST(pv_entry_t *pv_eh, pv_entry_t *pv_et, int pv_cnt, uint32_t kern_target);
3132
3133 static boolean_t
3134 pv_alloc(
3135 pmap_t pmap,
3136 unsigned int pai,
3137 pv_entry_t **pvepp)
3138 {
3139 if (pmap != NULL) {
3140 PMAP_ASSERT_LOCKED(pmap);
3141 }
3142 ASSERT_PVH_LOCKED(pai);
3143 PV_ALLOC(pvepp);
3144 if (PV_ENTRY_NULL == *pvepp) {
3145 if ((pmap == NULL) || (kernel_pmap == pmap)) {
3146 PV_KERN_ALLOC(pvepp);
3147
3148 if (PV_ENTRY_NULL == *pvepp) {
3149 pv_entry_t *pv_e;
3150 pv_entry_t *pv_eh;
3151 pv_entry_t *pv_et;
3152 int pv_cnt;
3153 unsigned j;
3154 pmap_paddr_t pa;
3155 kern_return_t ret;
3156
3157 UNLOCK_PVH(pai);
3158 if (pmap != NULL) {
3159 PMAP_UNLOCK(pmap);
3160 }
3161
3162 ret = pmap_pages_alloc(&pa, PAGE_SIZE, PMAP_PAGES_ALLOCATE_NOWAIT);
3163
3164 if (ret == KERN_RESOURCE_SHORTAGE) {
3165 ret = pmap_pages_alloc(&pa, PAGE_SIZE, PMAP_PAGES_RECLAIM_NOWAIT);
3166 }
3167
3168 if (ret != KERN_SUCCESS) {
3169 panic("%s: failed to alloc page for kernel, ret=%d, "
3170 "pmap=%p, pai=%u, pvepp=%p",
3171 __FUNCTION__, ret,
3172 pmap, pai, pvepp);
3173 }
3174
3175 pv_page_count++;
3176
3177 pv_e = (pv_entry_t *)phystokv(pa);
3178 pv_cnt = 0;
3179 pv_eh = pv_et = PV_ENTRY_NULL;
3180 *pvepp = pv_e;
3181 pv_e++;
3182
3183 for (j = 1; j < (PAGE_SIZE / sizeof(pv_entry_t)); j++) {
3184 pv_e->pve_next = pv_eh;
3185 pv_eh = pv_e;
3186
3187 if (pv_et == PV_ENTRY_NULL) {
3188 pv_et = pv_e;
3189 }
3190 pv_cnt++;
3191 pv_e++;
3192 }
3193 PV_FREE_LIST(pv_eh, pv_et, pv_cnt, pv_kern_low_water_mark);
3194 if (pmap != NULL) {
3195 PMAP_LOCK(pmap);
3196 }
3197 LOCK_PVH(pai);
3198 return FALSE;
3199 }
3200 } else {
3201 UNLOCK_PVH(pai);
3202 PMAP_UNLOCK(pmap);
3203
3204 pv_entry_t *pv_e;
3205 pv_entry_t *pv_eh;
3206 pv_entry_t *pv_et;
3207 int pv_cnt;
3208 unsigned j;
3209 pmap_paddr_t pa;
3210 kern_return_t ret;
3211
3212 #if XNU_MONITOR
3213 /*
3214 * The PPL has no guarantee that its allocation
3215 * will succeed, so steal pages if necessary to
3216 * ensure that we can free up a PV allocation.
3217 */
3218 ret = pmap_pages_alloc(&pa, PAGE_SIZE, PMAP_PAGES_ALLOCATE_NOWAIT);
3219
3220 if (ret == KERN_RESOURCE_SHORTAGE) {
3221 ret = pmap_pages_alloc(&pa, PAGE_SIZE, PMAP_PAGES_RECLAIM_NOWAIT);
3222 }
3223 #else
3224 ret = pmap_pages_alloc(&pa, PAGE_SIZE, 0);
3225 #endif
3226
3227 if (ret != KERN_SUCCESS) {
3228 panic("%s: failed to alloc page, ret=%d, "
3229 "pmap=%p, pai=%u, pvepp=%p",
3230 __FUNCTION__, ret,
3231 pmap, pai, pvepp);
3232 }
3233
3234 pv_page_count++;
3235
3236 pv_e = (pv_entry_t *)phystokv(pa);
3237 pv_cnt = 0;
3238 pv_eh = pv_et = PV_ENTRY_NULL;
3239 *pvepp = pv_e;
3240 pv_e++;
3241
3242 for (j = 1; j < (PAGE_SIZE / sizeof(pv_entry_t)); j++) {
3243 pv_e->pve_next = pv_eh;
3244 pv_eh = pv_e;
3245
3246 if (pv_et == PV_ENTRY_NULL) {
3247 pv_et = pv_e;
3248 }
3249 pv_cnt++;
3250 pv_e++;
3251 }
3252
3253 PV_FREE_LIST(pv_eh, pv_et, pv_cnt, pv_kern_low_water_mark);
3254
3255 PMAP_LOCK(pmap);
3256 LOCK_PVH(pai);
3257 return FALSE;
3258 }
3259 }
3260 assert(PV_ENTRY_NULL != *pvepp);
3261 return TRUE;
3262 }
3263
3264 static void
3265 pv_free(
3266 pv_entry_t *pvep)
3267 {
3268 PV_FREE_LIST(pvep, pvep, 1, pv_kern_low_water_mark);
3269 }
3270
3271 static void
3272 pv_list_free(
3273 pv_entry_t *pvehp,
3274 pv_entry_t *pvetp,
3275 unsigned int cnt)
3276 {
3277 PV_FREE_LIST(pvehp, pvetp, cnt, pv_kern_low_water_mark);
3278 }
3279
3280 static inline void
3281 pv_water_mark_check(void)
3282 {
3283 if (__improbable((pv_free_count < pv_low_water_mark) || (pv_kern_free_count < pv_kern_low_water_mark))) {
3284 if (!mappingrecurse && os_atomic_cmpxchg(&mappingrecurse, 0, 1, acq_rel)) {
3285 thread_wakeup(&mapping_replenish_event);
3286 }
3287 }
3288 }
3289
3290 static inline void
3291 PV_ALLOC(pv_entry_t **pv_ep)
3292 {
3293 assert(*pv_ep == PV_ENTRY_NULL);
3294 #if !XNU_MONITOR
3295 if (pv_kern_free_count < pv_kern_low_water_mark) {
3296 /*
3297 * If the kernel reserved pool is low, let non-kernel mappings wait for a page
3298 * from the VM.
3299 */
3300 return;
3301 }
3302 #endif
3303 pmap_simple_lock(&pv_free_list_lock);
3304
3305 if ((*pv_ep = pv_free_list) != 0) {
3306 pv_free_list = (pv_entry_t *)(*pv_ep)->pve_next;
3307 (*pv_ep)->pve_next = PV_ENTRY_NULL;
3308 pv_free_count--;
3309 }
3310
3311 pmap_simple_unlock(&pv_free_list_lock);
3312 }
3313
3314 static inline void
3315 PV_FREE_LIST(pv_entry_t *pv_eh, pv_entry_t *pv_et, int pv_cnt, uint32_t kern_target)
3316 {
3317 bool use_kernel_list = false;
3318 pmap_simple_lock(&pv_kern_free_list_lock);
3319 if (pv_kern_free_count < kern_target) {
3320 pv_et->pve_next = pv_kern_free_list;
3321 pv_kern_free_list = pv_eh;
3322 pv_kern_free_count += pv_cnt;
3323 use_kernel_list = true;
3324 }
3325 pmap_simple_unlock(&pv_kern_free_list_lock);
3326
3327 if (!use_kernel_list) {
3328 pmap_simple_lock(&pv_free_list_lock);
3329 pv_et->pve_next = (pv_entry_t *)pv_free_list;
3330 pv_free_list = pv_eh;
3331 pv_free_count += pv_cnt;
3332 pmap_simple_unlock(&pv_free_list_lock);
3333 }
3334 }
3335
3336 static inline void
3337 PV_KERN_ALLOC(pv_entry_t **pv_e)
3338 {
3339 assert(*pv_e == PV_ENTRY_NULL);
3340 pmap_simple_lock(&pv_kern_free_list_lock);
3341
3342 if ((*pv_e = pv_kern_free_list) != 0) {
3343 pv_kern_free_list = (pv_entry_t *)(*pv_e)->pve_next;
3344 (*pv_e)->pve_next = PV_ENTRY_NULL;
3345 pv_kern_free_count--;
3346 pmap_kern_reserve_alloc_stat++;
3347 }
3348
3349 pmap_simple_unlock(&pv_kern_free_list_lock);
3350 }
3351
3352 /*
3353 * Creates a target number of free pv_entry_t objects for the kernel free list
3354 * and the general free list.
3355 */
3356 MARK_AS_PMAP_TEXT static kern_return_t
3357 mapping_free_prime_internal(void)
3358 {
3359 SECURITY_READ_ONLY_LATE(static boolean_t) mapping_free_prime_internal_called = FALSE;
3360 SECURITY_READ_ONLY_LATE(static boolean_t) mapping_free_prime_internal_done = FALSE;
3361
3362 if (mapping_free_prime_internal_done) {
3363 return KERN_FAILURE;
3364 }
3365
3366 if (!mapping_free_prime_internal_called) {
3367 mapping_free_prime_internal_called = TRUE;
3368
3369 pv_low_water_mark = PV_LOW_WATER_MARK_DEFAULT;
3370
3371 /* Alterable via sysctl */
3372 pv_kern_low_water_mark = PV_KERN_LOW_WATER_MARK_DEFAULT;
3373
3374 pv_kern_alloc_chunk = PV_KERN_ALLOC_CHUNK_INITIAL;
3375 pv_alloc_chunk = PV_ALLOC_CHUNK_INITIAL;
3376 }
3377
3378 return mapping_replenish_internal(PV_KERN_ALLOC_INITIAL_TARGET, PV_ALLOC_INITIAL_TARGET);
3379 }
3380
3381 void
3382 mapping_free_prime(void)
3383 {
3384 kern_return_t kr = KERN_FAILURE;
3385
3386 #if XNU_MONITOR
3387 unsigned int i = 0;
3388
3389 /*
3390 * Allocate the needed PPL pages up front, to minimize the change that
3391 * we will need to call into the PPL multiple times.
3392 */
3393 for (i = 0; i < PV_ALLOC_INITIAL_TARGET; i += (PAGE_SIZE / sizeof(pv_entry_t))) {
3394 pmap_alloc_page_for_ppl();
3395 }
3396
3397 for (i = 0; i < PV_KERN_ALLOC_INITIAL_TARGET; i += (PAGE_SIZE / sizeof(pv_entry_t))) {
3398 pmap_alloc_page_for_ppl();
3399 }
3400
3401 while ((kr = mapping_free_prime_ppl()) == KERN_RESOURCE_SHORTAGE) {
3402 pmap_alloc_page_for_ppl();
3403 }
3404 #else
3405 kr = mapping_free_prime_internal();
3406 #endif
3407
3408 if (kr != KERN_SUCCESS) {
3409 panic("%s: failed, kr=%d",
3410 __FUNCTION__, kr);
3411 }
3412 }
3413
3414 void mapping_replenish(void);
3415
3416 void
3417 mapping_adjust(void)
3418 {
3419 kern_return_t mres;
3420
3421 mres = kernel_thread_start_priority((thread_continue_t)mapping_replenish, NULL, MAXPRI_KERNEL, &mapping_replenish_thread);
3422 if (mres != KERN_SUCCESS) {
3423 panic("%s: mapping_replenish thread creation failed",
3424 __FUNCTION__);
3425 }
3426 thread_deallocate(mapping_replenish_thread);
3427 }
3428
3429 /*
3430 * Fills the kernel and general PV free lists back up to their low watermarks.
3431 */
3432 MARK_AS_PMAP_TEXT static kern_return_t
3433 mapping_replenish_internal(uint32_t kern_target_count, uint32_t user_target_count)
3434 {
3435 pv_entry_t *pv_e;
3436 pv_entry_t *pv_eh;
3437 pv_entry_t *pv_et;
3438 int pv_cnt;
3439 unsigned j;
3440 pmap_paddr_t pa;
3441 kern_return_t ret = KERN_SUCCESS;
3442
3443 while ((pv_free_count < user_target_count) || (pv_kern_free_count < kern_target_count)) {
3444 pv_cnt = 0;
3445 pv_eh = pv_et = PV_ENTRY_NULL;
3446
3447 #if XNU_MONITOR
3448 if ((ret = pmap_pages_alloc(&pa, PAGE_SIZE, PMAP_PAGES_ALLOCATE_NOWAIT)) != KERN_SUCCESS) {
3449 return ret;
3450 }
3451 #else
3452 ret = pmap_pages_alloc(&pa, PAGE_SIZE, 0);
3453 assert(ret == KERN_SUCCESS);
3454 #endif
3455
3456 pv_page_count++;
3457
3458 pv_e = (pv_entry_t *)phystokv(pa);
3459
3460 for (j = 0; j < (PAGE_SIZE / sizeof(pv_entry_t)); j++) {
3461 pv_e->pve_next = pv_eh;
3462 pv_eh = pv_e;
3463
3464 if (pv_et == PV_ENTRY_NULL) {
3465 pv_et = pv_e;
3466 }
3467 pv_cnt++;
3468 pv_e++;
3469 }
3470 pmap_reserve_replenish_stat += pv_cnt;
3471 PV_FREE_LIST(pv_eh, pv_et, pv_cnt, kern_target_count);
3472 }
3473
3474 return ret;
3475 }
3476
3477 /*
3478 * Continuation function that keeps the PV free lists from running out of free
3479 * elements.
3480 */
3481 __attribute__((noreturn))
3482 void
3483 mapping_replenish(void)
3484 {
3485 kern_return_t kr;
3486
3487 /* We qualify for VM privileges...*/
3488 current_thread()->options |= TH_OPT_VMPRIV;
3489
3490 for (;;) {
3491 #if XNU_MONITOR
3492
3493 while ((kr = mapping_replenish_ppl(pv_kern_low_water_mark, pv_low_water_mark)) == KERN_RESOURCE_SHORTAGE) {
3494 pmap_alloc_page_for_ppl();
3495 }
3496 #else
3497 kr = mapping_replenish_internal(pv_kern_low_water_mark, pv_low_water_mark);
3498 #endif
3499
3500 if (kr != KERN_SUCCESS) {
3501 panic("%s: failed, kr=%d", __FUNCTION__, kr);
3502 }
3503
3504 /* Check if the kernel pool has been depleted since the
3505 * first pass, to reduce refill latency.
3506 */
3507 if (pv_kern_free_count < pv_kern_low_water_mark) {
3508 continue;
3509 }
3510 /* Block sans continuation to avoid yielding kernel stack */
3511 assert_wait(&mapping_replenish_event, THREAD_UNINT);
3512 mappingrecurse = 0;
3513 thread_block(THREAD_CONTINUE_NULL);
3514 pmap_mapping_thread_wakeups++;
3515 }
3516 }
3517
3518
3519 static void
3520 ptd_bootstrap(
3521 pt_desc_t *ptdp,
3522 unsigned int ptd_cnt)
3523 {
3524 simple_lock_init(&ptd_free_list_lock, 0);
3525 while (ptd_cnt != 0) {
3526 (*(void **)ptdp) = (void *)ptd_free_list;
3527 ptd_free_list = ptdp;
3528 ptdp++;
3529 ptd_cnt--;
3530 ptd_free_count++;
3531 }
3532 ptd_preboot = FALSE;
3533 }
3534
3535 static pt_desc_t*
3536 ptd_alloc_unlinked(bool reclaim)
3537 {
3538 pt_desc_t *ptdp;
3539 unsigned i;
3540
3541 if (!ptd_preboot) {
3542 pmap_simple_lock(&ptd_free_list_lock);
3543 }
3544
3545 if (ptd_free_count == 0) {
3546 unsigned int ptd_cnt;
3547 pt_desc_t *ptdp_next;
3548
3549 if (ptd_preboot) {
3550 ptdp = (pt_desc_t *)avail_start;
3551 avail_start += ARM_PGBYTES;
3552 ptdp_next = ptdp;
3553 ptd_cnt = ARM_PGBYTES / sizeof(pt_desc_t);
3554 } else {
3555 pmap_paddr_t pa;
3556 kern_return_t ret;
3557
3558 pmap_simple_unlock(&ptd_free_list_lock);
3559
3560 if (pmap_pages_alloc(&pa, PAGE_SIZE, PMAP_PAGES_ALLOCATE_NOWAIT) != KERN_SUCCESS) {
3561 if (reclaim) {
3562 ret = pmap_pages_alloc(&pa, PAGE_SIZE, PMAP_PAGES_RECLAIM_NOWAIT);
3563 assert(ret == KERN_SUCCESS);
3564 } else {
3565 return NULL;
3566 }
3567 }
3568 ptdp = (pt_desc_t *)phystokv(pa);
3569
3570 pmap_simple_lock(&ptd_free_list_lock);
3571 ptdp_next = ptdp;
3572 ptd_cnt = PAGE_SIZE / sizeof(pt_desc_t);
3573 }
3574
3575 while (ptd_cnt != 0) {
3576 (*(void **)ptdp_next) = (void *)ptd_free_list;
3577 ptd_free_list = ptdp_next;
3578 ptdp_next++;
3579 ptd_cnt--;
3580 ptd_free_count++;
3581 }
3582 }
3583
3584 if ((ptdp = ptd_free_list) != PTD_ENTRY_NULL) {
3585 ptd_free_list = (pt_desc_t *)(*(void **)ptdp);
3586 ptd_free_count--;
3587 } else {
3588 panic("%s: out of ptd entry",
3589 __FUNCTION__);
3590 }
3591
3592 if (!ptd_preboot) {
3593 pmap_simple_unlock(&ptd_free_list_lock);
3594 }
3595
3596 ptdp->pt_page.next = NULL;
3597 ptdp->pt_page.prev = NULL;
3598 ptdp->pmap = NULL;
3599
3600 for (i = 0; i < PT_INDEX_MAX; i++) {
3601 ptdp->ptd_info[i].va = (vm_offset_t)-1;
3602 ptdp->ptd_info[i].refcnt = 0;
3603 ptdp->ptd_info[i].wiredcnt = 0;
3604 }
3605
3606 return ptdp;
3607 }
3608
3609 static inline pt_desc_t*
3610 ptd_alloc(pmap_t pmap, bool reclaim)
3611 {
3612 pt_desc_t *ptdp = ptd_alloc_unlinked(reclaim);
3613
3614 if (ptdp == NULL) {
3615 return NULL;
3616 }
3617
3618 ptdp->pmap = pmap;
3619 if (pmap != kernel_pmap) {
3620 /* We should never try to reclaim kernel pagetable pages in
3621 * pmap_pages_reclaim(), so don't enter them into the list. */
3622 pmap_simple_lock(&pt_pages_lock);
3623 queue_enter(&pt_page_list, ptdp, pt_desc_t *, pt_page);
3624 pmap_simple_unlock(&pt_pages_lock);
3625 }
3626
3627 pmap_tt_ledger_credit(pmap, sizeof(*ptdp));
3628 return ptdp;
3629 }
3630
3631 static void
3632 ptd_deallocate(pt_desc_t *ptdp)
3633 {
3634 pmap_t pmap = ptdp->pmap;
3635
3636 if (ptd_preboot) {
3637 panic("%s: early boot, "
3638 "ptdp=%p",
3639 __FUNCTION__,
3640 ptdp);
3641 }
3642
3643 if (ptdp->pt_page.next != NULL) {
3644 pmap_simple_lock(&pt_pages_lock);
3645 queue_remove(&pt_page_list, ptdp, pt_desc_t *, pt_page);
3646 pmap_simple_unlock(&pt_pages_lock);
3647 }
3648 pmap_simple_lock(&ptd_free_list_lock);
3649 (*(void **)ptdp) = (void *)ptd_free_list;
3650 ptd_free_list = (pt_desc_t *)ptdp;
3651 ptd_free_count++;
3652 pmap_simple_unlock(&ptd_free_list_lock);
3653 if (pmap != NULL) {
3654 pmap_tt_ledger_debit(pmap, sizeof(*ptdp));
3655 }
3656 }
3657
3658 static void
3659 ptd_init(
3660 pt_desc_t *ptdp,
3661 pmap_t pmap,
3662 vm_map_address_t va,
3663 unsigned int level,
3664 pt_entry_t *pte_p)
3665 {
3666 if (ptdp->pmap != pmap) {
3667 panic("%s: pmap mismatch, "
3668 "ptdp=%p, pmap=%p, va=%p, level=%u, pte_p=%p",
3669 __FUNCTION__,
3670 ptdp, pmap, (void*)va, level, pte_p);
3671 }
3672
3673 #if (__ARM_VMSA__ == 7)
3674 assert(level == 2);
3675 ptdp->ptd_info[ARM_PT_DESC_INDEX(pte_p)].va = (vm_offset_t) va & ~(ARM_TT_L1_PT_OFFMASK);
3676 #else
3677 assert(level > pt_attr_root_level(pmap_get_pt_attr(pmap)));
3678 ptdp->ptd_info[ARM_PT_DESC_INDEX(pte_p)].va = (vm_offset_t) va & ~(pt_attr_ln_offmask(pmap_get_pt_attr(pmap), level - 1));
3679 #endif
3680 if (level < PMAP_TT_MAX_LEVEL) {
3681 ptdp->ptd_info[ARM_PT_DESC_INDEX(pte_p)].refcnt = PT_DESC_REFCOUNT;
3682 }
3683 }
3684
3685
3686 boolean_t
3687 pmap_valid_address(
3688 pmap_paddr_t addr)
3689 {
3690 return pa_valid(addr);
3691 }
3692
3693 #if (__ARM_VMSA__ == 7)
3694
3695 /*
3696 * Given an offset and a map, compute the address of the
3697 * corresponding translation table entry.
3698 */
3699 static inline tt_entry_t *
3700 pmap_tte(pmap_t pmap,
3701 vm_map_address_t addr)
3702 {
3703 __unused const pt_attr_t * const pt_attr = pmap_get_pt_attr(pmap);
3704
3705 if (!(tte_index(pmap, pt_attr, addr) < pmap->tte_index_max)) {
3706 return (tt_entry_t *)NULL;
3707 }
3708 return &pmap->tte[tte_index(pmap, pt_attr, addr)];
3709 }
3710
3711
3712 /*
3713 * Given an offset and a map, compute the address of the
3714 * pte. If the address is invalid with respect to the map
3715 * then PT_ENTRY_NULL is returned (and the map may need to grow).
3716 *
3717 * This is only used internally.
3718 */
3719 static inline pt_entry_t *
3720 pmap_pte(
3721 pmap_t pmap,
3722 vm_map_address_t addr)
3723 {
3724 pt_entry_t *ptp;
3725 tt_entry_t *ttp;
3726 tt_entry_t tte;
3727
3728 ttp = pmap_tte(pmap, addr);
3729 if (ttp == (tt_entry_t *)NULL) {
3730 return PT_ENTRY_NULL;
3731 }
3732 tte = *ttp;
3733 #if MACH_ASSERT
3734 if ((tte & ARM_TTE_TYPE_MASK) == ARM_TTE_TYPE_BLOCK) {
3735 panic("%s: Attempt to demote L1 block, tte=0x%lx, "
3736 "pmap=%p, addr=%p",
3737 __FUNCTION__, (unsigned long)tte,
3738 pmap, (void*)addr);
3739 }
3740 #endif
3741 if ((tte & ARM_TTE_TYPE_MASK) != ARM_TTE_TYPE_TABLE) {
3742 return PT_ENTRY_NULL;
3743 }
3744 ptp = (pt_entry_t *) ttetokv(tte) + ptenum(addr);
3745 return ptp;
3746 }
3747
3748 __unused static inline tt_entry_t *
3749 pmap_ttne(pmap_t pmap,
3750 unsigned int target_level,
3751 vm_map_address_t addr)
3752 {
3753 tt_entry_t * ret_ttep = NULL;
3754
3755 switch (target_level) {
3756 case 1:
3757 ret_ttep = pmap_tte(pmap, addr);
3758 break;
3759 case 2:
3760 ret_ttep = (tt_entry_t *)pmap_pte(pmap, addr);
3761 break;
3762 default:
3763 panic("%s: bad level, "
3764 "pmap=%p, target_level=%u, addr=%p",
3765 __FUNCTION__,
3766 pmap, target_level, (void *)addr);
3767 }
3768
3769 return ret_ttep;
3770 }
3771
3772 #else
3773
3774 static inline tt_entry_t *
3775 pmap_ttne(pmap_t pmap,
3776 unsigned int target_level,
3777 vm_map_address_t addr)
3778 {
3779 tt_entry_t * ttp = NULL;
3780 tt_entry_t * ttep = NULL;
3781 tt_entry_t tte = ARM_TTE_EMPTY;
3782 unsigned int cur_level;
3783
3784 const pt_attr_t * const pt_attr = pmap_get_pt_attr(pmap);
3785
3786 ttp = pmap->tte;
3787
3788 assert(target_level <= pt_attr->pta_max_level);
3789
3790 for (cur_level = pt_attr->pta_root_level; cur_level <= target_level; cur_level++) {
3791 ttep = &ttp[ttn_index(pmap, pt_attr, addr, cur_level)];
3792
3793 if (cur_level == target_level) {
3794 break;
3795 }
3796
3797 tte = *ttep;
3798
3799 #if MACH_ASSERT
3800 if ((tte & (ARM_TTE_TYPE_MASK | ARM_TTE_VALID)) == (ARM_TTE_TYPE_BLOCK | ARM_TTE_VALID)) {
3801 panic("%s: Attempt to demote L%u block, tte=0x%llx, "
3802 "pmap=%p, target_level=%u, addr=%p",
3803 __FUNCTION__, cur_level, tte,
3804 pmap, target_level, (void*)addr);
3805 }
3806 #endif
3807 if ((tte & (ARM_TTE_TYPE_MASK | ARM_TTE_VALID)) != (ARM_TTE_TYPE_TABLE | ARM_TTE_VALID)) {
3808 return TT_ENTRY_NULL;
3809 }
3810
3811 ttp = (tt_entry_t*)phystokv(tte & ARM_TTE_TABLE_MASK);
3812 }
3813
3814 return ttep;
3815 }
3816
3817 /*
3818 * Given an offset and a map, compute the address of level 1 translation table entry.
3819 * If the tranlation is invalid then PT_ENTRY_NULL is returned.
3820 */
3821 static inline tt_entry_t *
3822 pmap_tt1e(pmap_t pmap,
3823 vm_map_address_t addr)
3824 {
3825 return pmap_ttne(pmap, PMAP_TT_L1_LEVEL, addr);
3826 }
3827
3828 /*
3829 * Given an offset and a map, compute the address of level 2 translation table entry.
3830 * If the tranlation is invalid then PT_ENTRY_NULL is returned.
3831 */
3832 static inline tt_entry_t *
3833 pmap_tt2e(pmap_t pmap,
3834 vm_map_address_t addr)
3835 {
3836 return pmap_ttne(pmap, PMAP_TT_L2_LEVEL, addr);
3837 }
3838
3839
3840 /*
3841 * Given an offset and a map, compute the address of level 3 translation table entry.
3842 * If the tranlation is invalid then PT_ENTRY_NULL is returned.
3843 */
3844 static inline pt_entry_t *
3845 pmap_tt3e(
3846 pmap_t pmap,
3847 vm_map_address_t addr)
3848 {
3849 return (pt_entry_t*)pmap_ttne(pmap, PMAP_TT_L3_LEVEL, addr);
3850 }
3851
3852 static inline tt_entry_t *
3853 pmap_tte(
3854 pmap_t pmap,
3855 vm_map_address_t addr)
3856 {
3857 return pmap_tt2e(pmap, addr);
3858 }
3859
3860 static inline pt_entry_t *
3861 pmap_pte(
3862 pmap_t pmap,
3863 vm_map_address_t addr)
3864 {
3865 return pmap_tt3e(pmap, addr);
3866 }
3867
3868 #endif
3869
3870 #if __APRR_SUPPORTED__
3871 /*
3872 * Indicates whether the given PTE has special restrictions due to the current
3873 * APRR settings.
3874 */
3875 static boolean_t
3876 is_pte_aprr_protected(pt_entry_t pte)
3877 {
3878 uint64_t aprr_el0_value;
3879 uint64_t aprr_el1_value;
3880 uint64_t aprr_index;
3881
3882 MRS(aprr_el0_value, APRR_EL0);
3883 MRS(aprr_el1_value, APRR_EL1);
3884 aprr_index = PTE_TO_APRR_INDEX(pte);
3885
3886 /* Check to see if this mapping had APRR restrictions. */
3887 if ((APRR_EXTRACT_IDX_ATTR(aprr_el0_value, aprr_index) != APRR_EXTRACT_IDX_ATTR(APRR_EL0_RESET, aprr_index)) ||
3888 (APRR_EXTRACT_IDX_ATTR(aprr_el1_value, aprr_index) != APRR_EXTRACT_IDX_ATTR(APRR_EL1_RESET, aprr_index))
3889 ) {
3890 return TRUE;
3891 }
3892
3893 return FALSE;
3894 }
3895 #endif /* __APRR_SUPPORTED__ */
3896
3897
3898 #if __APRR_SUPPORTED__
3899 static boolean_t
3900 is_pte_xprr_protected(pt_entry_t pte)
3901 {
3902 #if __APRR_SUPPORTED__
3903 return is_pte_aprr_protected(pte);
3904 #else /* __APRR_SUPPORTED__ */
3905 #error "XPRR configuration error"
3906 #endif /* __APRR_SUPPORTED__ */
3907 }
3908 #endif /* __APRR_SUPPORTED__*/
3909
3910 #if __APRR_SUPPORTED__
3911 static uint64_t
3912 __unused pte_to_xprr_perm(pt_entry_t pte)
3913 {
3914 #if __APRR_SUPPORTED__
3915 switch (PTE_TO_APRR_INDEX(pte)) {
3916 case APRR_FIRM_RX_INDEX: return XPRR_FIRM_RX_PERM;
3917 case APRR_FIRM_RO_INDEX: return XPRR_FIRM_RO_PERM;
3918 case APRR_PPL_RW_INDEX: return XPRR_PPL_RW_PERM;
3919 case APRR_KERN_RW_INDEX: return XPRR_KERN_RW_PERM;
3920 case APRR_FIRM_RW_INDEX: return XPRR_FIRM_RW_PERM;
3921 case APRR_KERN0_RW_INDEX: return XPRR_KERN0_RW_PERM;
3922 case APRR_USER_JIT_INDEX: return XPRR_USER_JIT_PERM;
3923 case APRR_USER_RW_INDEX: return XPRR_USER_RW_PERM;
3924 case APRR_PPL_RX_INDEX: return XPRR_PPL_RX_PERM;
3925 case APRR_KERN_RX_INDEX: return XPRR_KERN_RX_PERM;
3926 case APRR_USER_XO_INDEX: return XPRR_USER_XO_PERM;
3927 case APRR_KERN_RO_INDEX: return XPRR_KERN_RO_PERM;
3928 case APRR_KERN0_RX_INDEX: return XPRR_KERN0_RO_PERM;
3929 case APRR_KERN0_RO_INDEX: return XPRR_KERN0_RO_PERM;
3930 case APRR_USER_RX_INDEX: return XPRR_USER_RX_PERM;
3931 case APRR_USER_RO_INDEX: return XPRR_USER_RO_PERM;
3932 default: return XPRR_MAX_PERM;
3933 }
3934 #else
3935 #error "XPRR configuration error"
3936 #endif /**/
3937 }
3938
3939 #if __APRR_SUPPORTED__
3940 static uint64_t
3941 xprr_perm_to_aprr_index(uint64_t perm)
3942 {
3943 switch (perm) {
3944 case XPRR_FIRM_RX_PERM: return APRR_FIRM_RX_INDEX;
3945 case XPRR_FIRM_RO_PERM: return APRR_FIRM_RO_INDEX;
3946 case XPRR_PPL_RW_PERM: return APRR_PPL_RW_INDEX;
3947 case XPRR_KERN_RW_PERM: return APRR_KERN_RW_INDEX;
3948 case XPRR_FIRM_RW_PERM: return APRR_FIRM_RW_INDEX;
3949 case XPRR_KERN0_RW_PERM: return APRR_KERN0_RW_INDEX;
3950 case XPRR_USER_JIT_PERM: return APRR_USER_JIT_INDEX;
3951 case XPRR_USER_RW_PERM: return APRR_USER_RW_INDEX;
3952 case XPRR_PPL_RX_PERM: return APRR_PPL_RX_INDEX;
3953 case XPRR_KERN_RX_PERM: return APRR_KERN_RX_INDEX;
3954 case XPRR_USER_XO_PERM: return APRR_USER_XO_INDEX;
3955 case XPRR_KERN_RO_PERM: return APRR_KERN_RO_INDEX;
3956 case XPRR_KERN0_RX_PERM: return APRR_KERN0_RO_INDEX;
3957 case XPRR_KERN0_RO_PERM: return APRR_KERN0_RO_INDEX;
3958 case XPRR_USER_RX_PERM: return APRR_USER_RX_INDEX;
3959 case XPRR_USER_RO_PERM: return APRR_USER_RO_INDEX;
3960 default: return APRR_MAX_INDEX;
3961 }
3962 }
3963 #endif /* __APRR_SUPPORTED__ */
3964
3965 static pt_entry_t
3966 __unused xprr_perm_to_pte(uint64_t perm)
3967 {
3968 #if __APRR_SUPPORTED__
3969 return APRR_INDEX_TO_PTE(xprr_perm_to_aprr_index(perm));
3970 #else
3971 #error "XPRR configuration error"
3972 #endif /**/
3973 }
3974 #endif /* __APRR_SUPPORTED__*/
3975
3976
3977 /*
3978 * Map memory at initialization. The physical addresses being
3979 * mapped are not managed and are never unmapped.
3980 *
3981 * For now, VM is already on, we only need to map the
3982 * specified memory.
3983 */
3984 vm_map_address_t
3985 pmap_map(
3986 vm_map_address_t virt,
3987 vm_offset_t start,
3988 vm_offset_t end,
3989 vm_prot_t prot,
3990 unsigned int flags)
3991 {
3992 kern_return_t kr;
3993 vm_size_t ps;
3994
3995 ps = PAGE_SIZE;
3996 while (start < end) {
3997 kr = pmap_enter(kernel_pmap, virt, (ppnum_t)atop(start),
3998 prot, VM_PROT_NONE, flags, FALSE);
3999
4000 if (kr != KERN_SUCCESS) {
4001 panic("%s: failed pmap_enter, "
4002 "virt=%p, start_addr=%p, end_addr=%p, prot=%#x, flags=%#x",
4003 __FUNCTION__,
4004 (void *) virt, (void *) start, (void *) end, prot, flags);
4005 }
4006
4007 virt += ps;
4008 start += ps;
4009 }
4010 return virt;
4011 }
4012
4013 vm_map_address_t
4014 pmap_map_bd_with_options(
4015 vm_map_address_t virt,
4016 vm_offset_t start,
4017 vm_offset_t end,
4018 vm_prot_t prot,
4019 int32_t options)
4020 {
4021 pt_entry_t tmplate;
4022 pt_entry_t *ptep;
4023 vm_map_address_t vaddr;
4024 vm_offset_t paddr;
4025 pt_entry_t mem_attr;
4026
4027 switch (options & PMAP_MAP_BD_MASK) {
4028 case PMAP_MAP_BD_WCOMB:
4029 mem_attr = ARM_PTE_ATTRINDX(CACHE_ATTRINDX_WRITECOMB);
4030 #if (__ARM_VMSA__ > 7)
4031 mem_attr |= ARM_PTE_SH(SH_OUTER_MEMORY);
4032 #else
4033 mem_attr |= ARM_PTE_SH;
4034 #endif
4035 break;
4036 case PMAP_MAP_BD_POSTED:
4037 mem_attr = ARM_PTE_ATTRINDX(CACHE_ATTRINDX_POSTED);
4038 break;
4039 case PMAP_MAP_BD_POSTED_REORDERED:
4040 mem_attr = ARM_PTE_ATTRINDX(CACHE_ATTRINDX_POSTED_REORDERED);
4041 break;
4042 case PMAP_MAP_BD_POSTED_COMBINED_REORDERED:
4043 mem_attr = ARM_PTE_ATTRINDX(CACHE_ATTRINDX_POSTED_COMBINED_REORDERED);
4044 break;
4045 default:
4046 mem_attr = ARM_PTE_ATTRINDX(CACHE_ATTRINDX_DISABLE);
4047 break;
4048 }
4049
4050 tmplate = pa_to_pte(start) | ARM_PTE_AP((prot & VM_PROT_WRITE) ? AP_RWNA : AP_RONA) |
4051 mem_attr | ARM_PTE_TYPE | ARM_PTE_NX | ARM_PTE_PNX | ARM_PTE_AF;
4052 #if __ARM_KERNEL_PROTECT__
4053 tmplate |= ARM_PTE_NG;
4054 #endif /* __ARM_KERNEL_PROTECT__ */
4055
4056 vaddr = virt;
4057 paddr = start;
4058 while (paddr < end) {
4059 ptep = pmap_pte(kernel_pmap, vaddr);
4060 if (ptep == PT_ENTRY_NULL) {
4061 panic("%s: no PTE for vaddr=%p, "
4062 "virt=%p, start=%p, end=%p, prot=0x%x, options=0x%x",
4063 __FUNCTION__, (void*)vaddr,
4064 (void*)virt, (void*)start, (void*)end, prot, options);
4065 }
4066
4067 assert(!ARM_PTE_IS_COMPRESSED(*ptep, ptep));
4068 WRITE_PTE_STRONG(ptep, tmplate);
4069
4070 pte_increment_pa(tmplate);
4071 vaddr += PAGE_SIZE;
4072 paddr += PAGE_SIZE;
4073 }
4074
4075 if (end >= start) {
4076 flush_mmu_tlb_region(virt, (unsigned)(end - start));
4077 }
4078
4079 return vaddr;
4080 }
4081
4082 /*
4083 * Back-door routine for mapping kernel VM at initialization.
4084 * Useful for mapping memory outside the range
4085 * [vm_first_phys, vm_last_phys] (i.e., devices).
4086 * Otherwise like pmap_map.
4087 */
4088 vm_map_address_t
4089 pmap_map_bd(
4090 vm_map_address_t virt,
4091 vm_offset_t start,
4092 vm_offset_t end,
4093 vm_prot_t prot)
4094 {
4095 pt_entry_t tmplate;
4096 pt_entry_t *ptep;
4097 vm_map_address_t vaddr;
4098 vm_offset_t paddr;
4099
4100 /* not cacheable and not buffered */
4101 tmplate = pa_to_pte(start)
4102 | ARM_PTE_TYPE | ARM_PTE_AF | ARM_PTE_NX | ARM_PTE_PNX
4103 | ARM_PTE_AP((prot & VM_PROT_WRITE) ? AP_RWNA : AP_RONA)
4104 | ARM_PTE_ATTRINDX(CACHE_ATTRINDX_DISABLE);
4105 #if __ARM_KERNEL_PROTECT__
4106 tmplate |= ARM_PTE_NG;
4107 #endif /* __ARM_KERNEL_PROTECT__ */
4108
4109 vaddr = virt;
4110 paddr = start;
4111 while (paddr < end) {
4112 ptep = pmap_pte(kernel_pmap, vaddr);
4113 if (ptep == PT_ENTRY_NULL) {
4114 panic("pmap_map_bd");
4115 }
4116 assert(!ARM_PTE_IS_COMPRESSED(*ptep, ptep));
4117 WRITE_PTE_STRONG(ptep, tmplate);
4118
4119 pte_increment_pa(tmplate);
4120 vaddr += PAGE_SIZE;
4121 paddr += PAGE_SIZE;
4122 }
4123
4124 if (end >= start) {
4125 flush_mmu_tlb_region(virt, (unsigned)(end - start));
4126 }
4127
4128 return vaddr;
4129 }
4130
4131 /*
4132 * Back-door routine for mapping kernel VM at initialization.
4133 * Useful for mapping memory specific physical addresses in early
4134 * boot (i.e., before kernel_map is initialized).
4135 *
4136 * Maps are in the VM_HIGH_KERNEL_WINDOW area.
4137 */
4138
4139 vm_map_address_t
4140 pmap_map_high_window_bd(
4141 vm_offset_t pa_start,
4142 vm_size_t len,
4143 vm_prot_t prot)
4144 {
4145 pt_entry_t *ptep, pte;
4146 #if (__ARM_VMSA__ == 7)
4147 vm_map_address_t va_start = VM_HIGH_KERNEL_WINDOW;
4148 vm_map_address_t va_max = VM_MAX_KERNEL_ADDRESS;
4149 #else
4150 vm_map_address_t va_start = VREGION1_START;
4151 vm_map_address_t va_max = VREGION1_START + VREGION1_SIZE;
4152 #endif
4153 vm_map_address_t va_end;
4154 vm_map_address_t va;
4155 vm_size_t offset;
4156
4157 offset = pa_start & PAGE_MASK;
4158 pa_start -= offset;
4159 len += offset;
4160
4161 if (len > (va_max - va_start)) {
4162 panic("%s: area too large, "
4163 "pa_start=%p, len=%p, prot=0x%x",
4164 __FUNCTION__,
4165 (void*)pa_start, (void*)len, prot);
4166 }
4167
4168 scan:
4169 for (; va_start < va_max; va_start += PAGE_SIZE) {
4170 ptep = pmap_pte(kernel_pmap, va_start);
4171 assert(!ARM_PTE_IS_COMPRESSED(*ptep, ptep));
4172 if (*ptep == ARM_PTE_TYPE_FAULT) {
4173 break;
4174 }
4175 }
4176 if (va_start > va_max) {
4177 panic("%s: insufficient pages, "
4178 "pa_start=%p, len=%p, prot=0x%x",
4179 __FUNCTION__,
4180 (void*)pa_start, (void*)len, prot);
4181 }
4182
4183 for (va_end = va_start + PAGE_SIZE; va_end < va_start + len; va_end += PAGE_SIZE) {
4184 ptep = pmap_pte(kernel_pmap, va_end);
4185 assert(!ARM_PTE_IS_COMPRESSED(*ptep, ptep));
4186 if (*ptep != ARM_PTE_TYPE_FAULT) {
4187 va_start = va_end + PAGE_SIZE;
4188 goto scan;
4189 }
4190 }
4191
4192 for (va = va_start; va < va_end; va += PAGE_SIZE, pa_start += PAGE_SIZE) {
4193 ptep = pmap_pte(kernel_pmap, va);
4194 pte = pa_to_pte(pa_start)
4195 | ARM_PTE_TYPE | ARM_PTE_AF | ARM_PTE_NX | ARM_PTE_PNX
4196 | ARM_PTE_AP((prot & VM_PROT_WRITE) ? AP_RWNA : AP_RONA)
4197 | ARM_PTE_ATTRINDX(CACHE_ATTRINDX_DEFAULT);
4198 #if (__ARM_VMSA__ > 7)
4199 pte |= ARM_PTE_SH(SH_OUTER_MEMORY);
4200 #else
4201 pte |= ARM_PTE_SH;
4202 #endif
4203 #if __ARM_KERNEL_PROTECT__
4204 pte |= ARM_PTE_NG;
4205 #endif /* __ARM_KERNEL_PROTECT__ */
4206 WRITE_PTE_STRONG(ptep, pte);
4207 }
4208 PMAP_UPDATE_TLBS(kernel_pmap, va_start, va_start + len, false);
4209 #if KASAN
4210 kasan_notify_address(va_start, len);
4211 #endif
4212 return va_start;
4213 }
4214
4215 #define PMAP_ALIGN(addr, align) ((addr) + ((align) - 1) & ~((align) - 1))
4216
4217 static vm_size_t
4218 pmap_compute_io_rgns(void)
4219 {
4220 DTEntry entry;
4221 pmap_io_range_t *ranges;
4222 uint64_t rgn_end;
4223 void *prop = NULL;
4224 int err;
4225 unsigned int prop_size;
4226
4227 err = DTLookupEntry(NULL, "/defaults", &entry);
4228 assert(err == kSuccess);
4229
4230 if (kSuccess != DTGetProperty(entry, "pmap-io-ranges", &prop, &prop_size)) {
4231 return 0;
4232 }
4233
4234 ranges = prop;
4235 for (unsigned int i = 0; i < (prop_size / sizeof(*ranges)); ++i) {
4236 if (ranges[i].addr & PAGE_MASK) {
4237 panic("pmap I/O region %u addr 0x%llx is not page-aligned", i, ranges[i].addr);
4238 }
4239 if (ranges[i].len & PAGE_MASK) {
4240 panic("pmap I/O region %u length 0x%llx is not page-aligned", i, ranges[i].len);
4241 }
4242 if (os_add_overflow(ranges[i].addr, ranges[i].len, &rgn_end)) {
4243 panic("pmap I/O region %u addr 0x%llx length 0x%llx wraps around", i, ranges[i].addr, ranges[i].len);
4244 }
4245 if (((ranges[i].addr <= gPhysBase) && (rgn_end > gPhysBase)) ||
4246 ((ranges[i].addr < avail_end) && (rgn_end >= avail_end)) ||
4247 ((ranges[i].addr > gPhysBase) && (rgn_end < avail_end))) {
4248 panic("pmap I/O region %u addr 0x%llx length 0x%llx overlaps physical memory", i, ranges[i].addr, ranges[i].len);
4249 }
4250
4251 ++num_io_rgns;
4252 }
4253
4254 return num_io_rgns * sizeof(*ranges);
4255 }
4256
4257 /*
4258 * return < 0 for a < b
4259 * 0 for a == b
4260 * > 0 for a > b
4261 */
4262 typedef int (*cmpfunc_t)(const void *a, const void *b);
4263
4264 extern void
4265 qsort(void *a, size_t n, size_t es, cmpfunc_t cmp);
4266
4267 static int
4268 cmp_io_rgns(const void *a, const void *b)
4269 {
4270 const pmap_io_range_t *range_a = a;
4271 const pmap_io_range_t *range_b = b;
4272 if ((range_b->addr + range_b->len) <= range_a->addr) {
4273 return 1;
4274 } else if ((range_a->addr + range_a->len) <= range_b->addr) {
4275 return -1;
4276 } else {
4277 return 0;
4278 }
4279 }
4280
4281 static void
4282 pmap_load_io_rgns(void)
4283 {
4284 DTEntry entry;
4285 pmap_io_range_t *ranges;
4286 void *prop = NULL;
4287 int err;
4288 unsigned int prop_size;
4289
4290 if (num_io_rgns == 0) {
4291 return;
4292 }
4293
4294 err = DTLookupEntry(NULL, "/defaults", &entry);
4295 assert(err == kSuccess);
4296
4297 err = DTGetProperty(entry, "pmap-io-ranges", &prop, &prop_size);
4298 assert(err == kSuccess);
4299
4300 ranges = prop;
4301 for (unsigned int i = 0; i < (prop_size / sizeof(*ranges)); ++i) {
4302 io_attr_table[i] = ranges[i];
4303 }
4304
4305 qsort(io_attr_table, num_io_rgns, sizeof(*ranges), cmp_io_rgns);
4306 }
4307
4308 #if __arm64__
4309 /*
4310 * pmap_get_arm64_prot
4311 *
4312 * return effective armv8 VMSA block protections including
4313 * table AP/PXN/XN overrides of a pmap entry
4314 *
4315 */
4316
4317 uint64_t
4318 pmap_get_arm64_prot(
4319 pmap_t pmap,
4320 vm_offset_t addr)
4321 {
4322 tt_entry_t tte = 0;
4323 unsigned int level = 0;
4324 uint64_t tte_type = 0;
4325 uint64_t effective_prot_bits = 0;
4326 uint64_t aggregate_tte = 0;
4327 uint64_t table_ap_bits = 0, table_xn = 0, table_pxn = 0;
4328 const pt_attr_t * const pt_attr = pmap_get_pt_attr(pmap);
4329
4330 for (level = pt_attr->pta_root_level; level <= pt_attr->pta_max_level; level++) {
4331 tte = *pmap_ttne(pmap, level, addr);
4332
4333 if (!(tte & ARM_TTE_VALID)) {
4334 return 0;
4335 }
4336
4337 tte_type = tte & ARM_TTE_TYPE_MASK;
4338
4339 if ((tte_type == ARM_TTE_TYPE_BLOCK) ||
4340 (level == pt_attr->pta_max_level)) {
4341 /* Block or page mapping; both have the same protection bit layout. */
4342 break;
4343 } else if (tte_type == ARM_TTE_TYPE_TABLE) {
4344 /* All of the table bits we care about are overrides, so just OR them together. */
4345 aggregate_tte |= tte;
4346 }
4347 }
4348
4349 table_ap_bits = ((aggregate_tte >> ARM_TTE_TABLE_APSHIFT) & AP_MASK);
4350 table_xn = (aggregate_tte & ARM_TTE_TABLE_XN);
4351 table_pxn = (aggregate_tte & ARM_TTE_TABLE_PXN);
4352
4353 /* Start with the PTE bits. */
4354 effective_prot_bits = tte & (ARM_PTE_APMASK | ARM_PTE_NX | ARM_PTE_PNX);
4355
4356 /* Table AP bits mask out block/page AP bits */
4357 effective_prot_bits &= ~(ARM_PTE_AP(table_ap_bits));
4358
4359 /* XN/PXN bits can be OR'd in. */
4360 effective_prot_bits |= (table_xn ? ARM_PTE_NX : 0);
4361 effective_prot_bits |= (table_pxn ? ARM_PTE_PNX : 0);
4362
4363 return effective_prot_bits;
4364 }
4365 #endif /* __arm64__ */
4366
4367
4368 /*
4369 * Bootstrap the system enough to run with virtual memory.
4370 *
4371 * The early VM initialization code has already allocated
4372 * the first CPU's translation table and made entries for
4373 * all the one-to-one mappings to be found there.
4374 *
4375 * We must set up the kernel pmap structures, the
4376 * physical-to-virtual translation lookup tables for the
4377 * physical memory to be managed (between avail_start and
4378 * avail_end).
4379 *
4380 * Map the kernel's code and data, and allocate the system page table.
4381 * Page_size must already be set.
4382 *
4383 * Parameters:
4384 * first_avail first available physical page -
4385 * after kernel page tables
4386 * avail_start PA of first managed physical page
4387 * avail_end PA of last managed physical page
4388 */
4389
4390 void
4391 pmap_bootstrap(
4392 vm_offset_t vstart)
4393 {
4394 pmap_paddr_t pmap_struct_start;
4395 vm_size_t pv_head_size;
4396 vm_size_t ptd_root_table_size;
4397 vm_size_t pp_attr_table_size;
4398 vm_size_t io_attr_table_size;
4399 unsigned int npages;
4400 vm_map_offset_t maxoffset;
4401
4402 lck_grp_init(&pmap_lck_grp, "pmap", LCK_GRP_ATTR_NULL);
4403
4404 #if XNU_MONITOR
4405
4406 #if DEVELOPMENT || DEBUG
4407 PE_parse_boot_argn("-unsafe_kernel_text", &pmap_ppl_disable, sizeof(pmap_ppl_disable));
4408 #endif
4409
4410 simple_lock_init(&pmap_ppl_free_page_lock, 0);
4411
4412 #if __APRR_SUPPORTED__
4413 if (((uintptr_t)(&ppl_trampoline_start)) % PAGE_SIZE) {
4414 panic("%s: ppl_trampoline_start is not page aligned, "
4415 "vstart=%#lx",
4416 __FUNCTION__,
4417 vstart);
4418 }
4419
4420 if (((uintptr_t)(&ppl_trampoline_end)) % PAGE_SIZE) {
4421 panic("%s: ppl_trampoline_end is not page aligned, "
4422 "vstart=%#lx",
4423 __FUNCTION__,
4424 vstart);
4425 }
4426 #endif /* __APRR_SUPPORTED__ */
4427 #endif /* XNU_MONITOR */
4428
4429 #if DEVELOPMENT || DEBUG
4430 if (PE_parse_boot_argn("pmap_trace", &pmap_trace_mask, sizeof(pmap_trace_mask))) {
4431 kprintf("Kernel traces for pmap operations enabled\n");
4432 }
4433 #endif
4434
4435 /*
4436 * Initialize the kernel pmap.
4437 */
4438 pmap_stamp = 1;
4439 #if ARM_PARAMETERIZED_PMAP
4440 kernel_pmap->pmap_pt_attr = native_pt_attr;
4441 #endif /* ARM_PARAMETERIZED_PMAP */
4442 #if HAS_APPLE_PAC
4443 kernel_pmap->disable_jop = 0;
4444 #endif /* HAS_APPLE_PAC */
4445 kernel_pmap->tte = cpu_tte;
4446 kernel_pmap->ttep = cpu_ttep;
4447 #if (__ARM_VMSA__ > 7)
4448 kernel_pmap->min = ARM64_TTBR1_MIN_ADDR;
4449 #else
4450 kernel_pmap->min = VM_MIN_KERNEL_AND_KEXT_ADDRESS;
4451 #endif
4452 kernel_pmap->max = VM_MAX_KERNEL_ADDRESS;
4453 os_atomic_init(&kernel_pmap->ref_count, 1);
4454 kernel_pmap->gc_status = 0;
4455 kernel_pmap->nx_enabled = TRUE;
4456 #ifdef __arm64__
4457 kernel_pmap->is_64bit = TRUE;
4458 #else
4459 kernel_pmap->is_64bit = FALSE;
4460 #endif
4461 kernel_pmap->stamp = os_atomic_inc(&pmap_stamp, relaxed);
4462
4463 kernel_pmap->nested_region_grand_addr = 0x0ULL;
4464 kernel_pmap->nested_region_subord_addr = 0x0ULL;
4465 kernel_pmap->nested_region_size = 0x0ULL;
4466 kernel_pmap->nested_region_asid_bitmap = NULL;
4467 kernel_pmap->nested_region_asid_bitmap_size = 0x0UL;
4468
4469 #if (__ARM_VMSA__ == 7)
4470 kernel_pmap->tte_index_max = 4 * NTTES;
4471 #endif
4472 kernel_pmap->hw_asid = 0;
4473 kernel_pmap->sw_asid = 0;
4474
4475 PMAP_LOCK_INIT(kernel_pmap);
4476 memset((void *) &kernel_pmap->stats, 0, sizeof(kernel_pmap->stats));
4477
4478 /* allocate space for and initialize the bookkeeping structures */
4479 io_attr_table_size = pmap_compute_io_rgns();
4480 npages = (unsigned int)atop(mem_size);
4481 pp_attr_table_size = npages * sizeof(pp_attr_t);
4482 pv_head_size = round_page(sizeof(pv_entry_t *) * npages);
4483 // allocate enough initial PTDs to map twice the available physical memory
4484 ptd_root_table_size = sizeof(pt_desc_t) * (mem_size / ((PAGE_SIZE / sizeof(pt_entry_t)) * ARM_PGBYTES)) * 2;
4485
4486 pmap_struct_start = avail_start;
4487
4488 pp_attr_table = (pp_attr_t *) phystokv(avail_start);
4489 avail_start = PMAP_ALIGN(avail_start + pp_attr_table_size, __alignof(pp_attr_t));
4490 io_attr_table = (pmap_io_range_t *) phystokv(avail_start);
4491 avail_start = PMAP_ALIGN(avail_start + io_attr_table_size, __alignof(pv_entry_t*));
4492 pv_head_table = (pv_entry_t **) phystokv(avail_start);
4493 avail_start = PMAP_ALIGN(avail_start + pv_head_size, __alignof(pt_desc_t));
4494 ptd_root_table = (pt_desc_t *)phystokv(avail_start);
4495 avail_start = round_page(avail_start + ptd_root_table_size);
4496
4497 memset((char *)phystokv(pmap_struct_start), 0, avail_start - pmap_struct_start);
4498
4499 pmap_load_io_rgns();
4500 ptd_bootstrap(ptd_root_table, (unsigned int)(ptd_root_table_size / sizeof(pt_desc_t)));
4501
4502 #if XNU_MONITOR
4503 pmap_array_begin = (void *)phystokv(avail_start);
4504 pmap_array = pmap_array_begin;
4505 avail_start += round_page(MAX_ASID * sizeof(struct pmap));
4506 pmap_array_end = (void *)phystokv(avail_start);
4507
4508 pmap_array_count = ((pmap_array_end - pmap_array_begin) / sizeof(struct pmap));
4509
4510 pmap_bootstrap_pmap_free_list();
4511
4512 pmap_ledger_ptr_array_begin = (void *)phystokv(avail_start);
4513 pmap_ledger_ptr_array = pmap_ledger_ptr_array_begin;
4514 avail_start += round_page(MAX_PMAP_LEDGERS * sizeof(void*));
4515 pmap_ledger_ptr_array_end = (void *)phystokv(avail_start);
4516
4517 pmap_ledger_refcnt_begin = (void *)phystokv(avail_start);
4518 pmap_ledger_refcnt = pmap_ledger_refcnt_begin;
4519 avail_start += round_page(MAX_PMAP_LEDGERS * sizeof(os_refcnt_t));
4520 pmap_ledger_refcnt_end = (void *)phystokv(avail_start);
4521
4522 simple_lock_init(&pmap_ledger_lock, 0);
4523 #endif
4524 pmap_cpu_data_array_init();
4525
4526 vm_first_phys = gPhysBase;
4527 vm_last_phys = trunc_page(avail_end);
4528
4529 simple_lock_init(&pmaps_lock, 0);
4530 simple_lock_init(&asid_lock, 0);
4531 simple_lock_init(&tt1_lock, 0);
4532 queue_init(&map_pmap_list);
4533 queue_enter(&map_pmap_list, kernel_pmap, pmap_t, pmaps);
4534 free_page_size_tt_list = TT_FREE_ENTRY_NULL;
4535 free_page_size_tt_count = 0;
4536 free_page_size_tt_max = 0;
4537 free_two_page_size_tt_list = TT_FREE_ENTRY_NULL;
4538 free_two_page_size_tt_count = 0;
4539 free_two_page_size_tt_max = 0;
4540 free_tt_list = TT_FREE_ENTRY_NULL;
4541 free_tt_count = 0;
4542 free_tt_max = 0;
4543
4544 simple_lock_init(&pt_pages_lock, 0);
4545 queue_init(&pt_page_list);
4546
4547 simple_lock_init(&pmap_pages_lock, 0);
4548 pmap_pages_request_count = 0;
4549 pmap_pages_request_acum = 0;
4550 pmap_pages_reclaim_list = PAGE_FREE_ENTRY_NULL;
4551
4552 virtual_space_start = vstart;
4553 virtual_space_end = VM_MAX_KERNEL_ADDRESS;
4554
4555 bitmap_full(&asid_bitmap[0], MAX_ASID);
4556
4557
4558
4559 if (PE_parse_boot_argn("arm_maxoffset", &maxoffset, sizeof(maxoffset))) {
4560 maxoffset = trunc_page(maxoffset);
4561 if ((maxoffset >= pmap_max_offset(FALSE, ARM_PMAP_MAX_OFFSET_MIN))
4562 && (maxoffset <= pmap_max_offset(FALSE, ARM_PMAP_MAX_OFFSET_MAX))) {
4563 arm_pmap_max_offset_default = maxoffset;
4564 }
4565 }
4566 #if defined(__arm64__)
4567 if (PE_parse_boot_argn("arm64_maxoffset", &maxoffset, sizeof(maxoffset))) {
4568 maxoffset = trunc_page(maxoffset);
4569 if ((maxoffset >= pmap_max_offset(TRUE, ARM_PMAP_MAX_OFFSET_MIN))
4570 && (maxoffset <= pmap_max_offset(TRUE, ARM_PMAP_MAX_OFFSET_MAX))) {
4571 arm64_pmap_max_offset_default = maxoffset;
4572 }
4573 }
4574 #endif
4575
4576 #if DEVELOPMENT || DEBUG
4577 PE_parse_boot_argn("panic_on_unsigned_execute", &panic_on_unsigned_execute, sizeof(panic_on_unsigned_execute));
4578 #endif /* DEVELOPMENT || DEBUG */
4579
4580 pmap_nesting_size_min = ARM_NESTING_SIZE_MIN;
4581 pmap_nesting_size_max = ARM_NESTING_SIZE_MAX;
4582
4583 simple_lock_init(&phys_backup_lock, 0);
4584
4585
4586 #if MACH_ASSERT
4587 PE_parse_boot_argn("pmap_stats_assert",
4588 &pmap_stats_assert,
4589 sizeof(pmap_stats_assert));
4590 PE_parse_boot_argn("vm_footprint_suspend_allowed",
4591 &vm_footprint_suspend_allowed,
4592 sizeof(vm_footprint_suspend_allowed));
4593 #endif /* MACH_ASSERT */
4594
4595 #if KASAN
4596 /* Shadow the CPU copy windows, as they fall outside of the physical aperture */
4597 kasan_map_shadow(CPUWINDOWS_BASE, CPUWINDOWS_TOP - CPUWINDOWS_BASE, true);
4598 #endif /* KASAN */
4599 }
4600
4601 #if XNU_MONITOR
4602
4603 static inline void
4604 pa_set_range_monitor(pmap_paddr_t start_pa, pmap_paddr_t end_pa)
4605 {
4606 pmap_paddr_t cur_pa;
4607 for (cur_pa = start_pa; cur_pa < end_pa; cur_pa += ARM_PGBYTES) {
4608 assert(pa_valid(cur_pa));
4609 pa_set_monitor(cur_pa);
4610 }
4611 }
4612
4613 static void
4614 pa_set_range_xprr_perm(pmap_paddr_t start_pa,
4615 pmap_paddr_t end_pa,
4616 unsigned int expected_perm,
4617 unsigned int new_perm)
4618 {
4619 vm_offset_t start_va = phystokv(start_pa);
4620 vm_offset_t end_va = start_va + (end_pa - start_pa);
4621
4622 pa_set_range_monitor(start_pa, end_pa);
4623 pmap_set_range_xprr_perm(start_va, end_va, expected_perm, new_perm);
4624 }
4625
4626 void
4627 pmap_static_allocations_done(void)
4628 {
4629 pmap_paddr_t monitor_start_pa;
4630 pmap_paddr_t monitor_end_pa;
4631
4632 /*
4633 * We allocate memory for bootstrap starting at topOfKernelData (which
4634 * is at the end of the device tree and ramdisk data, if applicable).
4635 * We use avail_start as a pointer to the first address that has not
4636 * been reserved for bootstrap, so we know which pages to give to the
4637 * virtual memory layer.
4638 *
4639 * These bootstrap allocations will be used primarily for page tables.
4640 * If we wish to secure the page tables, we need to start by marking
4641 * these bootstrap allocations as pages that we want to protect.
4642 */
4643 monitor_start_pa = BootArgs->topOfKernelData;
4644 monitor_end_pa = BootArgs->topOfKernelData + BOOTSTRAP_TABLE_SIZE;
4645
4646 /*
4647 * The bootstrap page tables are mapped RO at boostrap.
4648 *
4649 * Note that this function call requests switching XPRR permissions from
4650 * XPRR_KERN_RO_PERM to XPRR_KERN_RO_PERM. Whilst this may seem redundant,
4651 * pa_set_range_xprr_perm() does other things too, such as calling
4652 * pa_set_range_monitor() on the requested address range and performing a number
4653 * of integrity checks on the PTEs. We should still
4654 * call this function for all PPL-owned memory, regardless of whether
4655 * permissions are required to be changed or not.
4656 */
4657 pa_set_range_xprr_perm(monitor_start_pa, monitor_end_pa, XPRR_KERN_RO_PERM, XPRR_KERN_RO_PERM);
4658
4659 monitor_start_pa = BootArgs->topOfKernelData + BOOTSTRAP_TABLE_SIZE;
4660 monitor_end_pa = avail_start;
4661
4662 /* The other bootstrap allocations are mapped RW at bootstrap. */
4663 pa_set_range_xprr_perm(monitor_start_pa, monitor_end_pa, XPRR_KERN_RW_PERM, XPRR_PPL_RW_PERM);
4664
4665 /*
4666 * The RO page tables are mapped RW at bootstrap and remain RW after the call
4667 * to pa_set_range_xprr_perm(). We do this, as opposed to using XPRR_PPL_RW_PERM,
4668 * to work around a functional issue on H11 devices where CTRR shifts the APRR
4669 * lookup table index to USER_XO before APRR is applied, hence causing the hardware
4670 * to believe we are dealing with an user XO page upon performing a translation.
4671 *
4672 * Note that this workaround does not pose a security risk, because the RO
4673 * page tables still remain read-only, due to KTRR/CTRR, and further protecting
4674 * them at the APRR level would be unnecessary.
4675 */
4676 monitor_start_pa = kvtophys((vm_offset_t)&ropagetable_begin);
4677 monitor_end_pa = monitor_start_pa + ((vm_offset_t)&ropagetable_end - (vm_offset_t)&ropagetable_begin);
4678 pa_set_range_xprr_perm(monitor_start_pa, monitor_end_pa, XPRR_KERN_RW_PERM, XPRR_KERN_RW_PERM);
4679
4680 monitor_start_pa = kvtophys(segPPLDATAB);
4681 monitor_end_pa = monitor_start_pa + segSizePPLDATA;
4682
4683 /* PPL data is RW for the PPL, RO for the kernel. */
4684 pa_set_range_xprr_perm(monitor_start_pa, monitor_end_pa, XPRR_KERN_RW_PERM, XPRR_PPL_RW_PERM);
4685
4686 monitor_start_pa = kvtophys(segPPLTEXTB);
4687 monitor_end_pa = monitor_start_pa + segSizePPLTEXT;
4688
4689 /* PPL text is RX for the PPL, RO for the kernel. */
4690 pa_set_range_xprr_perm(monitor_start_pa, monitor_end_pa, XPRR_KERN_RX_PERM, XPRR_PPL_RX_PERM);
4691
4692 #if __APRR_SUPPORTED__
4693 monitor_start_pa = kvtophys(segPPLTRAMPB);
4694 monitor_end_pa = monitor_start_pa + segSizePPLTRAMP;
4695
4696 /*
4697 * The PPLTRAMP pages will be a mix of PPL RX/kernel RO and
4698 * PPL RX/kernel RX. However, all of these pages belong to the PPL.
4699 */
4700 pa_set_range_monitor(monitor_start_pa, monitor_end_pa);
4701 #endif
4702
4703 /*
4704 * In order to support DTrace, the save areas for the PPL must be
4705 * writable. This is due to the fact that DTrace will try to update
4706 * register state.
4707 */
4708 if (pmap_ppl_disable) {
4709 vm_offset_t monitor_start_va = phystokv(ppl_cpu_save_area_start);
4710 vm_offset_t monitor_end_va = monitor_start_va + (ppl_cpu_save_area_end - ppl_cpu_save_area_start);
4711
4712 pmap_set_range_xprr_perm(monitor_start_va, monitor_end_va, XPRR_PPL_RW_PERM, XPRR_KERN_RW_PERM);
4713 }
4714
4715 #if __APRR_SUPPORTED__
4716 /* The trampoline must also be specially protected. */
4717 pmap_set_range_xprr_perm((vm_offset_t)&ppl_trampoline_start, (vm_offset_t)&ppl_trampoline_end, XPRR_KERN_RX_PERM, XPRR_PPL_RX_PERM);
4718 #endif
4719
4720 if (segSizePPLDATACONST > 0) {
4721 monitor_start_pa = kvtophys(segPPLDATACONSTB);
4722 monitor_end_pa = monitor_start_pa + segSizePPLDATACONST;
4723
4724 pa_set_range_xprr_perm(monitor_start_pa, monitor_end_pa, XPRR_KERN_RO_PERM, XPRR_KERN_RO_PERM);
4725 }
4726
4727 /*
4728 * Mark the original physical aperture mapping for the PPL stack pages RO as an additional security
4729 * precaution. The real RW mappings are at a different location with guard pages.
4730 */
4731 pa_set_range_xprr_perm(pmap_stacks_start_pa, pmap_stacks_end_pa, XPRR_PPL_RW_PERM, XPRR_KERN_RO_PERM);
4732 }
4733
4734
4735 void
4736 pmap_lockdown_ppl(void)
4737 {
4738 /* Mark the PPL as being locked down. */
4739
4740 #if __APRR_SUPPORTED__
4741 pmap_ppl_locked_down = TRUE;
4742 /* Force a trap into to the PPL to update APRR_EL1. */
4743 pmap_return(FALSE, FALSE);
4744 #else
4745 #error "XPRR configuration error"
4746 #endif /* __APRR_SUPPORTED__ */
4747
4748 }
4749 #endif /* XNU_MONITOR */
4750
4751 void
4752 pmap_virtual_space(
4753 vm_offset_t *startp,
4754 vm_offset_t *endp
4755 )
4756 {
4757 *startp = virtual_space_start;
4758 *endp = virtual_space_end;
4759 }
4760
4761
4762 boolean_t
4763 pmap_virtual_region(
4764 unsigned int region_select,
4765 vm_map_offset_t *startp,
4766 vm_map_size_t *size
4767 )
4768 {
4769 boolean_t ret = FALSE;
4770 #if __ARM64_PMAP_SUBPAGE_L1__ && __ARM_16K_PG__
4771 if (region_select == 0) {
4772 /*
4773 * In this config, the bootstrap mappings should occupy their own L2
4774 * TTs, as they should be immutable after boot. Having the associated
4775 * TTEs and PTEs in their own pages allows us to lock down those pages,
4776 * while allowing the rest of the kernel address range to be remapped.
4777 */
4778 #if (__ARM_VMSA__ > 7)
4779 *startp = LOW_GLOBAL_BASE_ADDRESS & ~ARM_TT_L2_OFFMASK;
4780 #else
4781 #error Unsupported configuration
4782 #endif
4783 *size = ((VM_MAX_KERNEL_ADDRESS - *startp) & ~PAGE_MASK);
4784 ret = TRUE;
4785 }
4786 #else
4787 #if (__ARM_VMSA__ > 7)
4788 unsigned long low_global_vr_mask = 0;
4789 vm_map_size_t low_global_vr_size = 0;
4790 #endif
4791
4792 if (region_select == 0) {
4793 #if (__ARM_VMSA__ == 7)
4794 *startp = gVirtBase & 0xFFC00000;
4795 *size = ((virtual_space_start - (gVirtBase & 0xFFC00000)) + ~0xFFC00000) & 0xFFC00000;
4796 #else
4797 /* Round to avoid overlapping with the V=P area; round to at least the L2 block size. */
4798 if (!TEST_PAGE_SIZE_4K) {
4799 *startp = gVirtBase & 0xFFFFFFFFFE000000;
4800 *size = ((virtual_space_start - (gVirtBase & 0xFFFFFFFFFE000000)) + ~0xFFFFFFFFFE000000) & 0xFFFFFFFFFE000000;
4801 } else {
4802 *startp = gVirtBase & 0xFFFFFFFFFF800000;
4803 *size = ((virtual_space_start - (gVirtBase & 0xFFFFFFFFFF800000)) + ~0xFFFFFFFFFF800000) & 0xFFFFFFFFFF800000;
4804 }
4805 #endif
4806 ret = TRUE;
4807 }
4808 if (region_select == 1) {
4809 *startp = VREGION1_START;
4810 *size = VREGION1_SIZE;
4811 ret = TRUE;
4812 }
4813 #if (__ARM_VMSA__ > 7)
4814 /* We need to reserve a range that is at least the size of an L2 block mapping for the low globals */
4815 if (!TEST_PAGE_SIZE_4K) {
4816 low_global_vr_mask = 0xFFFFFFFFFE000000;
4817 low_global_vr_size = 0x2000000;
4818 } else {
4819 low_global_vr_mask = 0xFFFFFFFFFF800000;
4820 low_global_vr_size = 0x800000;
4821 }
4822
4823 if (((gVirtBase & low_global_vr_mask) != LOW_GLOBAL_BASE_ADDRESS) && (region_select == 2)) {
4824 *startp = LOW_GLOBAL_BASE_ADDRESS;
4825 *size = low_global_vr_size;
4826 ret = TRUE;
4827 }
4828
4829 if (region_select == 3) {
4830 /* In this config, we allow the bootstrap mappings to occupy the same
4831 * page table pages as the heap.
4832 */
4833 *startp = VM_MIN_KERNEL_ADDRESS;
4834 *size = LOW_GLOBAL_BASE_ADDRESS - *startp;
4835 ret = TRUE;
4836 }
4837 #endif
4838 #endif
4839 return ret;
4840 }
4841
4842 unsigned int
4843 pmap_free_pages(
4844 void)
4845 {
4846 return (unsigned int)atop(avail_end - first_avail);
4847 }
4848
4849
4850 boolean_t
4851 pmap_next_page_hi(
4852 ppnum_t * pnum,
4853 __unused boolean_t might_free)
4854 {
4855 return pmap_next_page(pnum);
4856 }
4857
4858
4859 boolean_t
4860 pmap_next_page(
4861 ppnum_t *pnum)
4862 {
4863 if (first_avail != avail_end) {
4864 *pnum = (ppnum_t)atop(first_avail);
4865 first_avail += PAGE_SIZE;
4866 return TRUE;
4867 }
4868 return FALSE;
4869 }
4870
4871
4872 /*
4873 * Initialize the pmap module.
4874 * Called by vm_init, to initialize any structures that the pmap
4875 * system needs to map virtual memory.
4876 */
4877 void
4878 pmap_init(
4879 void)
4880 {
4881 /*
4882 * Protect page zero in the kernel map.
4883 * (can be overruled by permanent transltion
4884 * table entries at page zero - see arm_vm_init).
4885 */
4886 vm_protect(kernel_map, 0, PAGE_SIZE, TRUE, VM_PROT_NONE);
4887
4888 pmap_initialized = TRUE;
4889
4890 pmap_zone_init();
4891
4892
4893 /*
4894 * Initialize the pmap object (for tracking the vm_page_t
4895 * structures for pages we allocate to be page tables in
4896 * pmap_expand().
4897 */
4898 _vm_object_allocate(mem_size, pmap_object);
4899 pmap_object->copy_strategy = MEMORY_OBJECT_COPY_NONE;
4900
4901 pv_init();
4902
4903 /*
4904 * The values of [hard_]maxproc may have been scaled, make sure
4905 * they are still less than the value of MAX_ASID.
4906 */
4907 if (maxproc > MAX_ASID) {
4908 maxproc = MAX_ASID;
4909 }
4910 if (hard_maxproc > MAX_ASID) {
4911 hard_maxproc = MAX_ASID;
4912 }
4913
4914 #if CONFIG_PGTRACE
4915 pmap_pgtrace_init();
4916 #endif
4917 }
4918
4919 boolean_t
4920 pmap_verify_free(
4921 ppnum_t ppnum)
4922 {
4923 pv_entry_t **pv_h;
4924 int pai;
4925 pmap_paddr_t phys = ptoa(ppnum);
4926
4927 assert(phys != vm_page_fictitious_addr);
4928
4929 if (!pa_valid(phys)) {
4930 return FALSE;
4931 }
4932
4933 pai = (int)pa_index(phys);
4934 pv_h = pai_to_pvh(pai);
4935
4936 return pvh_test_type(pv_h, PVH_TYPE_NULL);
4937 }
4938
4939 #if MACH_ASSERT
4940 void
4941 pmap_assert_free(ppnum_t ppnum)
4942 {
4943 assertf(pmap_verify_free(ppnum), "page = 0x%x", ppnum);
4944 (void)ppnum;
4945 }
4946 #endif
4947
4948
4949 /*
4950 * Initialize zones used by pmap.
4951 */
4952 static void
4953 pmap_zone_init(
4954 void)
4955 {
4956 /*
4957 * Create the zone of physical maps
4958 * and the physical-to-virtual entries.
4959 */
4960 pmap_zone = zinit((vm_size_t) sizeof(struct pmap), (vm_size_t) sizeof(struct pmap) * 256,
4961 PAGE_SIZE, "pmap");
4962 }
4963
4964 #if XNU_MONITOR
4965 MARK_AS_PMAP_TEXT static void
4966 pmap_ledger_alloc_init_internal(size_t size)
4967 {
4968 pmap_simple_lock(&pmap_ledger_lock);
4969
4970 if (pmap_ledger_alloc_initialized) {
4971 panic("%s: already initialized, "
4972 "size=%lu",
4973 __func__,
4974 size);
4975 }
4976
4977 if (size != sizeof(pmap_ledger_data_t)) {
4978 panic("%s: size mismatch, expected %lu, "
4979 "size=%lu",
4980 __func__, PMAP_LEDGER_DATA_BYTES,
4981 size);
4982 }
4983
4984 pmap_ledger_alloc_initialized = true;
4985
4986 pmap_simple_unlock(&pmap_ledger_lock);
4987 }
4988
4989 MARK_AS_PMAP_TEXT static ledger_t
4990 pmap_ledger_alloc_internal(void)
4991 {
4992 pmap_paddr_t paddr;
4993 uint64_t vaddr, vstart, vend;
4994 uint64_t index;
4995
4996 ledger_t new_ledger;
4997 uint64_t array_index;
4998
4999 pmap_simple_lock(&pmap_ledger_lock);
5000 if (pmap_ledger_free_list == NULL) {
5001 paddr = pmap_get_free_ppl_page();
5002
5003 if (paddr == 0) {
5004 pmap_simple_unlock(&pmap_ledger_lock);
5005 return NULL;
5006 }
5007
5008 vstart = phystokv(paddr);
5009 vend = vstart + PAGE_SIZE;
5010
5011 for (vaddr = vstart; (vaddr < vend) && ((vaddr + sizeof(pmap_ledger_t)) <= vend); vaddr += sizeof(pmap_ledger_t)) {
5012 pmap_ledger_t *free_ledger;
5013
5014 index = pmap_ledger_ptr_array_free_index++;
5015
5016 if (index >= MAX_PMAP_LEDGERS) {
5017 panic("%s: pmap_ledger_ptr_array is full, index=%llu",
5018 __func__, index);
5019 }
5020
5021 free_ledger = (pmap_ledger_t*)vaddr;
5022
5023 pmap_ledger_ptr_array[index] = free_ledger;
5024 free_ledger->back_ptr = &pmap_ledger_ptr_array[index];
5025
5026 free_ledger->next = pmap_ledger_free_list;
5027 pmap_ledger_free_list = free_ledger;
5028 }
5029
5030 pa_set_range_xprr_perm(paddr, paddr + PAGE_SIZE, XPRR_PPL_RW_PERM, XPRR_KERN_RW_PERM);
5031 }
5032
5033 new_ledger = (ledger_t)pmap_ledger_free_list;
5034 pmap_ledger_free_list = pmap_ledger_free_list->next;
5035
5036 array_index = pmap_ledger_validate(new_ledger);
5037 os_ref_init(&pmap_ledger_refcnt[array_index], NULL);
5038
5039 pmap_simple_unlock(&pmap_ledger_lock);
5040
5041 return new_ledger;
5042 }
5043
5044 MARK_AS_PMAP_TEXT static void
5045 pmap_ledger_free_internal(ledger_t ledger)
5046 {
5047 pmap_ledger_t* free_ledger;
5048
5049 free_ledger = (pmap_ledger_t*)ledger;
5050
5051 pmap_simple_lock(&pmap_ledger_lock);
5052 uint64_t array_index = pmap_ledger_validate(ledger);
5053
5054 if (os_ref_release(&pmap_ledger_refcnt[array_index]) != 0) {
5055 panic("%s: ledger still referenced, "
5056 "ledger=%p",
5057 __func__,
5058 ledger);
5059 }
5060
5061 free_ledger->next = pmap_ledger_free_list;
5062 pmap_ledger_free_list = free_ledger;
5063 pmap_simple_unlock(&pmap_ledger_lock);
5064 }
5065
5066
5067 static void
5068 pmap_ledger_retain(ledger_t ledger)
5069 {
5070 pmap_simple_lock(&pmap_ledger_lock);
5071 uint64_t array_index = pmap_ledger_validate(ledger);
5072 os_ref_retain(&pmap_ledger_refcnt[array_index]);
5073 pmap_simple_unlock(&pmap_ledger_lock);
5074 }
5075
5076 static void
5077 pmap_ledger_release(ledger_t ledger)
5078 {
5079 pmap_simple_lock(&pmap_ledger_lock);
5080 uint64_t array_index = pmap_ledger_validate(ledger);
5081 os_ref_release_live(&pmap_ledger_refcnt[array_index]);
5082 pmap_simple_unlock(&pmap_ledger_lock);
5083 }
5084
5085 void
5086 pmap_ledger_alloc_init(size_t size)
5087 {
5088 pmap_ledger_alloc_init_ppl(size);
5089 }
5090
5091 ledger_t
5092 pmap_ledger_alloc(void)
5093 {
5094 ledger_t retval = NULL;
5095
5096 while ((retval = pmap_ledger_alloc_ppl()) == NULL) {
5097 pmap_alloc_page_for_ppl();
5098 }
5099
5100 return retval;
5101 }
5102
5103 void
5104 pmap_ledger_free(ledger_t ledger)
5105 {
5106 pmap_ledger_free_ppl(ledger);
5107 }
5108 #else /* XNU_MONITOR */
5109 __dead2
5110 void
5111 pmap_ledger_alloc_init(size_t size)
5112 {
5113 panic("%s: unsupported, "
5114 "size=%lu",
5115 __func__, size);
5116 }
5117
5118 __dead2
5119 ledger_t
5120 pmap_ledger_alloc(void)
5121 {
5122 panic("%s: unsupported",
5123 __func__);
5124 }
5125
5126 __dead2
5127 void
5128 pmap_ledger_free(ledger_t ledger)
5129 {
5130 panic("%s: unsupported, "
5131 "ledger=%p",
5132 __func__, ledger);
5133 }
5134 #endif /* XNU_MONITOR */
5135
5136 /*
5137 * Create and return a physical map.
5138 *
5139 * If the size specified for the map
5140 * is zero, the map is an actual physical
5141 * map, and may be referenced by the
5142 * hardware.
5143 *
5144 * If the size specified is non-zero,
5145 * the map will be used in software only, and
5146 * is bounded by that size.
5147 */
5148 MARK_AS_PMAP_TEXT static pmap_t
5149 pmap_create_options_internal(
5150 ledger_t ledger,
5151 vm_map_size_t size,
5152 unsigned int flags)
5153 {
5154 unsigned i;
5155 unsigned tte_index_max;
5156 pmap_t p;
5157 bool is_64bit = flags & PMAP_CREATE_64BIT;
5158 #if defined(HAS_APPLE_PAC)
5159 bool disable_jop = flags & PMAP_CREATE_DISABLE_JOP;
5160 #endif /* defined(HAS_APPLE_PAC) */
5161
5162 /*
5163 * A software use-only map doesn't even need a pmap.
5164 */
5165 if (size != 0) {
5166 return PMAP_NULL;
5167 }
5168
5169 #if XNU_MONITOR
5170 if ((p = pmap_alloc_pmap()) == PMAP_NULL) {
5171 return PMAP_NULL;
5172 }
5173
5174 if (ledger) {
5175 pmap_ledger_validate(ledger);
5176 pmap_ledger_retain(ledger);
5177 }
5178 #else
5179 /*
5180 * Allocate a pmap struct from the pmap_zone. Then allocate
5181 * the translation table of the right size for the pmap.
5182 */
5183 if ((p = (pmap_t) zalloc(pmap_zone)) == PMAP_NULL) {
5184 return PMAP_NULL;
5185 }
5186 #endif
5187
5188 p->ledger = ledger;
5189
5190 if (flags & PMAP_CREATE_64BIT) {
5191 p->min = MACH_VM_MIN_ADDRESS;
5192 p->max = MACH_VM_MAX_ADDRESS;
5193 } else {
5194 p->min = VM_MIN_ADDRESS;
5195 p->max = VM_MAX_ADDRESS;
5196 }
5197
5198 #if defined(HAS_APPLE_PAC)
5199 p->disable_jop = disable_jop;
5200 #endif /* defined(HAS_APPLE_PAC) */
5201
5202 p->nested_region_true_start = 0;
5203 p->nested_region_true_end = ~0;
5204
5205 os_atomic_init(&p->ref_count, 1);
5206 p->gc_status = 0;
5207 p->stamp = os_atomic_inc(&pmap_stamp, relaxed);
5208 p->nx_enabled = TRUE;
5209 p->is_64bit = is_64bit;
5210 p->nested = FALSE;
5211 p->nested_pmap = PMAP_NULL;
5212
5213 #if ARM_PARAMETERIZED_PMAP
5214 p->pmap_pt_attr = native_pt_attr;
5215 #endif /* ARM_PARAMETERIZED_PMAP */
5216
5217 if (!pmap_get_pt_ops(p)->alloc_id(p)) {
5218 goto id_alloc_fail;
5219 }
5220
5221
5222
5223 PMAP_LOCK_INIT(p);
5224 memset((void *) &p->stats, 0, sizeof(p->stats));
5225
5226 p->tt_entry_free = (tt_entry_t *)0;
5227 tte_index_max = PMAP_ROOT_ALLOC_SIZE / sizeof(tt_entry_t);
5228
5229 #if (__ARM_VMSA__ == 7)
5230 p->tte_index_max = tte_index_max;
5231 #endif
5232
5233 #if XNU_MONITOR
5234 p->tte = pmap_tt1_allocate(p, PMAP_ROOT_ALLOC_SIZE, PMAP_TT_ALLOCATE_NOWAIT);
5235 #else
5236 p->tte = pmap_tt1_allocate(p, PMAP_ROOT_ALLOC_SIZE, 0);
5237 #endif
5238 if (!(p->tte)) {
5239 goto tt1_alloc_fail;
5240 }
5241
5242 p->ttep = ml_static_vtop((vm_offset_t)p->tte);
5243 PMAP_TRACE(3, PMAP_CODE(PMAP__TTE), VM_KERNEL_ADDRHIDE(p), VM_KERNEL_ADDRHIDE(p->min), VM_KERNEL_ADDRHIDE(p->max), p->ttep);
5244
5245 /* nullify the translation table */
5246 for (i = 0; i < tte_index_max; i++) {
5247 p->tte[i] = ARM_TTE_TYPE_FAULT;
5248 }
5249
5250 FLUSH_PTE_RANGE(p->tte, p->tte + tte_index_max);
5251
5252 /*
5253 * initialize the rest of the structure
5254 */
5255 p->nested_region_grand_addr = 0x0ULL;
5256 p->nested_region_subord_addr = 0x0ULL;
5257 p->nested_region_size = 0x0ULL;
5258 p->nested_region_asid_bitmap = NULL;
5259 p->nested_region_asid_bitmap_size = 0x0UL;
5260
5261 p->nested_has_no_bounds_ref = false;
5262 p->nested_no_bounds_refcnt = 0;
5263 p->nested_bounds_set = false;
5264
5265
5266 #if MACH_ASSERT
5267 p->pmap_stats_assert = TRUE;
5268 p->pmap_pid = 0;
5269 strlcpy(p->pmap_procname, "<nil>", sizeof(p->pmap_procname));
5270 #endif /* MACH_ASSERT */
5271 #if DEVELOPMENT || DEBUG
5272 p->footprint_was_suspended = FALSE;
5273 #endif /* DEVELOPMENT || DEBUG */
5274
5275 pmap_simple_lock(&pmaps_lock);
5276 queue_enter(&map_pmap_list, p, pmap_t, pmaps);
5277 pmap_simple_unlock(&pmaps_lock);
5278
5279 return p;
5280
5281 tt1_alloc_fail:
5282 pmap_get_pt_ops(p)->free_id(p);
5283 id_alloc_fail:
5284 #if XNU_MONITOR
5285 pmap_free_pmap(p);
5286
5287 if (ledger) {
5288 pmap_ledger_release(ledger);
5289 }
5290 #else
5291 zfree(pmap_zone, p);
5292 #endif
5293 return PMAP_NULL;
5294 }
5295
5296 pmap_t
5297 pmap_create_options(
5298 ledger_t ledger,
5299 vm_map_size_t size,
5300 unsigned int flags)
5301 {
5302 pmap_t pmap;
5303
5304 PMAP_TRACE(1, PMAP_CODE(PMAP__CREATE) | DBG_FUNC_START, size, flags);
5305
5306 ledger_reference(ledger);
5307
5308 #if XNU_MONITOR
5309 /*
5310 * TODO: It should be valid for pmap_create_options_internal to fail; we could
5311 * be out of ASIDs.
5312 */
5313 while ((pmap = pmap_create_options_ppl(ledger, size, flags)) == PMAP_NULL) {
5314 pmap_alloc_page_for_ppl();
5315 }
5316 #else
5317 pmap = pmap_create_options_internal(ledger, size, flags);
5318 #endif
5319
5320 if (pmap == PMAP_NULL) {
5321 ledger_dereference(ledger);
5322 }
5323
5324 PMAP_TRACE(1, PMAP_CODE(PMAP__CREATE) | DBG_FUNC_END, VM_KERNEL_ADDRHIDE(pmap), PMAP_VASID(pmap), pmap->hw_asid);
5325
5326 return pmap;
5327 }
5328
5329 #if XNU_MONITOR
5330 /*
5331 * This symbol remains in place when the PPL is enabled so that the dispatch
5332 * table does not change from development to release configurations.
5333 */
5334 #endif
5335 #if MACH_ASSERT || XNU_MONITOR
5336 MARK_AS_PMAP_TEXT static void
5337 pmap_set_process_internal(
5338 __unused pmap_t pmap,
5339 __unused int pid,
5340 __unused char *procname)
5341 {
5342 #if MACH_ASSERT
5343 if (pmap == NULL) {
5344 return;
5345 }
5346
5347 VALIDATE_PMAP(pmap);
5348
5349 pmap->pmap_pid = pid;
5350 strlcpy(pmap->pmap_procname, procname, sizeof(pmap->pmap_procname));
5351 if (pmap_ledgers_panic_leeway) {
5352 /*
5353 * XXX FBDP
5354 * Some processes somehow trigger some issues that make
5355 * the pmap stats and ledgers go off track, causing
5356 * some assertion failures and ledger panics.
5357 * Turn off the sanity checks if we allow some ledger leeway
5358 * because of that. We'll still do a final check in
5359 * pmap_check_ledgers() for discrepancies larger than the
5360 * allowed leeway after the address space has been fully
5361 * cleaned up.
5362 */
5363 pmap->pmap_stats_assert = FALSE;
5364 ledger_disable_panic_on_negative(pmap->ledger,
5365 task_ledgers.phys_footprint);
5366 ledger_disable_panic_on_negative(pmap->ledger,
5367 task_ledgers.internal);
5368 ledger_disable_panic_on_negative(pmap->ledger,
5369 task_ledgers.internal_compressed);
5370 ledger_disable_panic_on_negative(pmap->ledger,
5371 task_ledgers.iokit_mapped);
5372 ledger_disable_panic_on_negative(pmap->ledger,
5373 task_ledgers.alternate_accounting);
5374 ledger_disable_panic_on_negative(pmap->ledger,
5375 task_ledgers.alternate_accounting_compressed);
5376 }
5377 #endif /* MACH_ASSERT */
5378 }
5379 #endif /* MACH_ASSERT || XNU_MONITOR */
5380
5381 #if MACH_ASSERT
5382 void
5383 pmap_set_process(
5384 pmap_t pmap,
5385 int pid,
5386 char *procname)
5387 {
5388 #if XNU_MONITOR
5389 pmap_set_process_ppl(pmap, pid, procname);
5390 #else
5391 pmap_set_process_internal(pmap, pid, procname);
5392 #endif
5393 }
5394 #endif /* MACH_ASSERT */
5395
5396 /*
5397 * We maintain stats and ledgers so that a task's physical footprint is:
5398 * phys_footprint = ((internal - alternate_accounting)
5399 * + (internal_compressed - alternate_accounting_compressed)
5400 * + iokit_mapped
5401 * + purgeable_nonvolatile
5402 * + purgeable_nonvolatile_compressed
5403 * + page_table)
5404 * where "alternate_accounting" includes "iokit" and "purgeable" memory.
5405 */
5406
5407
5408 /*
5409 * Retire the given physical map from service.
5410 * Should only be called if the map contains
5411 * no valid mappings.
5412 */
5413 MARK_AS_PMAP_TEXT static void
5414 pmap_destroy_internal(
5415 pmap_t pmap)
5416 {
5417 if (pmap == PMAP_NULL) {
5418 return;
5419 }
5420
5421 VALIDATE_PMAP(pmap);
5422
5423 __unused const pt_attr_t * const pt_attr = pmap_get_pt_attr(pmap);
5424
5425 int32_t ref_count = os_atomic_dec(&pmap->ref_count, relaxed);
5426 if (ref_count > 0) {
5427 return;
5428 } else if (ref_count < 0) {
5429 panic("pmap %p: refcount underflow", pmap);
5430 } else if (pmap == kernel_pmap) {
5431 panic("pmap %p: attempt to destroy kernel pmap", pmap);
5432 }
5433
5434 pt_entry_t *ttep;
5435
5436 #if (__ARM_VMSA__ > 7)
5437 pmap_unmap_sharedpage(pmap);
5438 #endif /* (__ARM_VMSA__ > 7) */
5439
5440 pmap_simple_lock(&pmaps_lock);
5441 while (pmap->gc_status & PMAP_GC_INFLIGHT) {
5442 pmap->gc_status |= PMAP_GC_WAIT;
5443 assert_wait((event_t) &pmap->gc_status, THREAD_UNINT);
5444 pmap_simple_unlock(&pmaps_lock);
5445 (void) thread_block(THREAD_CONTINUE_NULL);
5446 pmap_simple_lock(&pmaps_lock);
5447 }
5448 queue_remove(&map_pmap_list, pmap, pmap_t, pmaps);
5449 pmap_simple_unlock(&pmaps_lock);
5450
5451 pmap_trim_self(pmap);
5452
5453 /*
5454 * Free the memory maps, then the
5455 * pmap structure.
5456 */
5457 #if (__ARM_VMSA__ == 7)
5458 unsigned int i = 0;
5459
5460 PMAP_LOCK(pmap);
5461 for (i = 0; i < pmap->tte_index_max; i++) {
5462 ttep = &pmap->tte[i];
5463 if ((*ttep & ARM_TTE_TYPE_MASK) == ARM_TTE_TYPE_TABLE) {
5464 pmap_tte_deallocate(pmap, ttep, PMAP_TT_L1_LEVEL);
5465 }
5466 }
5467 PMAP_UNLOCK(pmap);
5468 #else /* (__ARM_VMSA__ == 7) */
5469 vm_map_address_t c;
5470 unsigned int level;
5471
5472 for (level = pt_attr->pta_max_level - 1; level >= pt_attr->pta_root_level; level--) {
5473 for (c = pmap->min; c < pmap->max; c += pt_attr_ln_size(pt_attr, level)) {
5474 ttep = pmap_ttne(pmap, level, c);
5475
5476 if ((ttep != PT_ENTRY_NULL) && (*ttep & ARM_TTE_TYPE_MASK) == ARM_TTE_TYPE_TABLE) {
5477 PMAP_LOCK(pmap);
5478 pmap_tte_deallocate(pmap, ttep, level);
5479 PMAP_UNLOCK(pmap);
5480 }
5481 }
5482 }
5483 #endif /* (__ARM_VMSA__ == 7) */
5484
5485
5486
5487 if (pmap->tte) {
5488 #if (__ARM_VMSA__ == 7)
5489 pmap_tt1_deallocate(pmap, pmap->tte, pmap->tte_index_max * sizeof(tt_entry_t), 0);
5490 pmap->tte_index_max = 0;
5491 #else /* (__ARM_VMSA__ == 7) */
5492 pmap_tt1_deallocate(pmap, pmap->tte, PMAP_ROOT_ALLOC_SIZE, 0);
5493 #endif /* (__ARM_VMSA__ == 7) */
5494 pmap->tte = (tt_entry_t *) NULL;
5495 pmap->ttep = 0;
5496 }
5497
5498 assert((tt_free_entry_t*)pmap->tt_entry_free == NULL);
5499
5500 pmap_get_pt_ops(pmap)->flush_tlb_async(pmap);
5501 sync_tlb_flush();
5502
5503 /* return its asid to the pool */
5504 pmap_get_pt_ops(pmap)->free_id(pmap);
5505 pmap_check_ledgers(pmap);
5506
5507 if (pmap->nested_region_asid_bitmap) {
5508 #if XNU_MONITOR
5509 pmap_pages_free(kvtophys((vm_offset_t)(pmap->nested_region_asid_bitmap)), PAGE_SIZE);
5510 #else
5511 kfree(pmap->nested_region_asid_bitmap, pmap->nested_region_asid_bitmap_size * sizeof(unsigned int));
5512 #endif
5513 }
5514
5515 #if XNU_MONITOR
5516 if (pmap->ledger) {
5517 pmap_ledger_release(pmap->ledger);
5518 }
5519
5520 pmap_free_pmap(pmap);
5521 #else
5522 zfree(pmap_zone, pmap);
5523 #endif
5524 }
5525
5526 void
5527 pmap_destroy(
5528 pmap_t pmap)
5529 {
5530 ledger_t ledger;
5531
5532 PMAP_TRACE(1, PMAP_CODE(PMAP__DESTROY) | DBG_FUNC_START, VM_KERNEL_ADDRHIDE(pmap), PMAP_VASID(pmap), pmap->hw_asid);
5533
5534 ledger = pmap->ledger;
5535
5536 #if XNU_MONITOR
5537 pmap_destroy_ppl(pmap);
5538
5539 pmap_check_ledger_fields(ledger);
5540 #else
5541 pmap_destroy_internal(pmap);
5542 #endif
5543
5544 ledger_dereference(ledger);
5545
5546 PMAP_TRACE(1, PMAP_CODE(PMAP__DESTROY) | DBG_FUNC_END);
5547 }
5548
5549
5550 /*
5551 * Add a reference to the specified pmap.
5552 */
5553 MARK_AS_PMAP_TEXT static void
5554 pmap_reference_internal(
5555 pmap_t pmap)
5556 {
5557 if (pmap != PMAP_NULL) {
5558 VALIDATE_PMAP(pmap);
5559 os_atomic_inc(&pmap->ref_count, relaxed);
5560 }
5561 }
5562
5563 void
5564 pmap_reference(
5565 pmap_t pmap)
5566 {
5567 #if XNU_MONITOR
5568 pmap_reference_ppl(pmap);
5569 #else
5570 pmap_reference_internal(pmap);
5571 #endif
5572 }
5573
5574 static tt_entry_t *
5575 pmap_tt1_allocate(
5576 pmap_t pmap,
5577 vm_size_t size,
5578 unsigned option)
5579 {
5580 tt_entry_t *tt1 = NULL;
5581 tt_free_entry_t *tt1_free;
5582 pmap_paddr_t pa;
5583 vm_address_t va;
5584 vm_address_t va_end;
5585 kern_return_t ret;
5586
5587 pmap_simple_lock(&tt1_lock);
5588 if ((size == PAGE_SIZE) && (free_page_size_tt_count != 0)) {
5589 free_page_size_tt_count--;
5590 tt1 = (tt_entry_t *)free_page_size_tt_list;
5591 free_page_size_tt_list = ((tt_free_entry_t *)tt1)->next;
5592 } else if ((size == 2 * PAGE_SIZE) && (free_two_page_size_tt_count != 0)) {
5593 free_two_page_size_tt_count--;
5594 tt1 = (tt_entry_t *)free_two_page_size_tt_list;
5595 free_two_page_size_tt_list = ((tt_free_entry_t *)tt1)->next;
5596 } else if ((size < PAGE_SIZE) && (free_tt_count != 0)) {
5597 free_tt_count--;
5598 tt1 = (tt_entry_t *)free_tt_list;
5599 free_tt_list = (tt_free_entry_t *)((tt_free_entry_t *)tt1)->next;
5600 }
5601
5602 pmap_simple_unlock(&tt1_lock);
5603
5604 if (tt1 != NULL) {
5605 pmap_tt_ledger_credit(pmap, size);
5606 return (tt_entry_t *)tt1;
5607 }
5608
5609 ret = pmap_pages_alloc(&pa, (unsigned)((size < PAGE_SIZE)? PAGE_SIZE : size), ((option & PMAP_TT_ALLOCATE_NOWAIT)? PMAP_PAGES_ALLOCATE_NOWAIT : 0));
5610
5611 if (ret == KERN_RESOURCE_SHORTAGE) {
5612 return (tt_entry_t *)0;
5613 }
5614
5615 #if XNU_MONITOR
5616 assert(pa);
5617 #endif
5618
5619 if (size < PAGE_SIZE) {
5620 va = phystokv(pa) + size;
5621 tt_free_entry_t *local_free_list = (tt_free_entry_t*)va;
5622 tt_free_entry_t *next_free = NULL;
5623 for (va_end = phystokv(pa) + PAGE_SIZE; va < va_end; va = va + size) {
5624 tt1_free = (tt_free_entry_t *)va;
5625 tt1_free->next = next_free;
5626 next_free = tt1_free;
5627 }
5628 pmap_simple_lock(&tt1_lock);
5629 local_free_list->next = free_tt_list;
5630 free_tt_list = next_free;
5631 free_tt_count += ((PAGE_SIZE / size) - 1);
5632 if (free_tt_count > free_tt_max) {
5633 free_tt_max = free_tt_count;
5634 }
5635 pmap_simple_unlock(&tt1_lock);
5636 }
5637
5638 /* Always report root allocations in units of PMAP_ROOT_ALLOC_SIZE, which can be obtained by sysctl arm_pt_root_size.
5639 * Depending on the device, this can vary between 512b and 16K. */
5640 OSAddAtomic((uint32_t)(size / PMAP_ROOT_ALLOC_SIZE), (pmap == kernel_pmap ? &inuse_kernel_tteroot_count : &inuse_user_tteroot_count));
5641 OSAddAtomic64(size / PMAP_ROOT_ALLOC_SIZE, &alloc_tteroot_count);
5642 pmap_tt_ledger_credit(pmap, size);
5643
5644 return (tt_entry_t *) phystokv(pa);
5645 }
5646
5647 static void
5648 pmap_tt1_deallocate(
5649 pmap_t pmap,
5650 tt_entry_t *tt,
5651 vm_size_t size,
5652 unsigned option)
5653 {
5654 tt_free_entry_t *tt_entry;
5655
5656 tt_entry = (tt_free_entry_t *)tt;
5657 assert(not_in_kdp);
5658 pmap_simple_lock(&tt1_lock);
5659
5660 if (size < PAGE_SIZE) {
5661 free_tt_count++;
5662 if (free_tt_count > free_tt_max) {
5663 free_tt_max = free_tt_count;
5664 }
5665 tt_entry->next = free_tt_list;
5666 free_tt_list = tt_entry;
5667 }
5668
5669 if (size == PAGE_SIZE) {
5670 free_page_size_tt_count++;
5671 if (free_page_size_tt_count > free_page_size_tt_max) {
5672 free_page_size_tt_max = free_page_size_tt_count;
5673 }
5674 tt_entry->next = free_page_size_tt_list;
5675 free_page_size_tt_list = tt_entry;
5676 }
5677
5678 if (size == 2 * PAGE_SIZE) {
5679 free_two_page_size_tt_count++;
5680 if (free_two_page_size_tt_count > free_two_page_size_tt_max) {
5681 free_two_page_size_tt_max = free_two_page_size_tt_count;
5682 }
5683 tt_entry->next = free_two_page_size_tt_list;
5684 free_two_page_size_tt_list = tt_entry;
5685 }
5686
5687 if (option & PMAP_TT_DEALLOCATE_NOBLOCK) {
5688 pmap_simple_unlock(&tt1_lock);
5689 pmap_tt_ledger_debit(pmap, size);
5690 return;
5691 }
5692
5693 while (free_page_size_tt_count > FREE_PAGE_SIZE_TT_MAX) {
5694 free_page_size_tt_count--;
5695 tt = (tt_entry_t *)free_page_size_tt_list;
5696 free_page_size_tt_list = ((tt_free_entry_t *)tt)->next;
5697
5698 pmap_simple_unlock(&tt1_lock);
5699
5700 pmap_pages_free(ml_static_vtop((vm_offset_t)tt), PAGE_SIZE);
5701
5702 OSAddAtomic(-(int32_t)(PAGE_SIZE / PMAP_ROOT_ALLOC_SIZE), (pmap == kernel_pmap ? &inuse_kernel_tteroot_count : &inuse_user_tteroot_count));
5703
5704 pmap_simple_lock(&tt1_lock);
5705 }
5706
5707 while (free_two_page_size_tt_count > FREE_TWO_PAGE_SIZE_TT_MAX) {
5708 free_two_page_size_tt_count--;
5709 tt = (tt_entry_t *)free_two_page_size_tt_list;
5710 free_two_page_size_tt_list = ((tt_free_entry_t *)tt)->next;
5711
5712 pmap_simple_unlock(&tt1_lock);
5713
5714 pmap_pages_free(ml_static_vtop((vm_offset_t)tt), 2 * PAGE_SIZE);
5715
5716 OSAddAtomic(-2 * (int32_t)(PAGE_SIZE / PMAP_ROOT_ALLOC_SIZE), (pmap == kernel_pmap ? &inuse_kernel_tteroot_count : &inuse_user_tteroot_count));
5717
5718 pmap_simple_lock(&tt1_lock);
5719 }
5720 pmap_simple_unlock(&tt1_lock);
5721 pmap_tt_ledger_debit(pmap, size);
5722 }
5723
5724 static kern_return_t
5725 pmap_tt_allocate(
5726 pmap_t pmap,
5727 tt_entry_t **ttp,
5728 unsigned int level,
5729 unsigned int options)
5730 {
5731 pmap_paddr_t pa;
5732 *ttp = NULL;
5733
5734 PMAP_LOCK(pmap);
5735 if ((tt_free_entry_t *)pmap->tt_entry_free != NULL) {
5736 tt_free_entry_t *tt_free_next;
5737
5738 tt_free_next = ((tt_free_entry_t *)pmap->tt_entry_free)->next;
5739 *ttp = (tt_entry_t *)pmap->tt_entry_free;
5740 pmap->tt_entry_free = (tt_entry_t *)tt_free_next;
5741 }
5742 PMAP_UNLOCK(pmap);
5743
5744 if (*ttp == NULL) {
5745 pt_desc_t *ptdp;
5746
5747 /*
5748 * Allocate a VM page for the level x page table entries.
5749 */
5750 while (pmap_pages_alloc(&pa, PAGE_SIZE, ((options & PMAP_TT_ALLOCATE_NOWAIT)? PMAP_PAGES_ALLOCATE_NOWAIT : 0)) != KERN_SUCCESS) {
5751 if (options & PMAP_OPTIONS_NOWAIT) {
5752 return KERN_RESOURCE_SHORTAGE;
5753 }
5754 VM_PAGE_WAIT();
5755 }
5756
5757 while ((ptdp = ptd_alloc(pmap, false)) == NULL) {
5758 if (options & PMAP_OPTIONS_NOWAIT) {
5759 pmap_pages_free(pa, PAGE_SIZE);
5760 return KERN_RESOURCE_SHORTAGE;
5761 }
5762 VM_PAGE_WAIT();
5763 }
5764
5765 if (level < PMAP_TT_MAX_LEVEL) {
5766 OSAddAtomic64(1, &alloc_ttepages_count);
5767 OSAddAtomic(1, (pmap == kernel_pmap ? &inuse_kernel_ttepages_count : &inuse_user_ttepages_count));
5768 } else {
5769 OSAddAtomic64(1, &alloc_ptepages_count);
5770 OSAddAtomic(1, (pmap == kernel_pmap ? &inuse_kernel_ptepages_count : &inuse_user_ptepages_count));
5771 }
5772
5773 pmap_tt_ledger_credit(pmap, PAGE_SIZE);
5774
5775 PMAP_ZINFO_PALLOC(pmap, PAGE_SIZE);
5776
5777 pvh_update_head_unlocked(pai_to_pvh(pa_index(pa)), ptdp, PVH_TYPE_PTDP);
5778
5779 __unreachable_ok_push
5780 if (TEST_PAGE_RATIO_4) {
5781 vm_address_t va;
5782 vm_address_t va_end;
5783
5784 PMAP_LOCK(pmap);
5785
5786 for (va_end = phystokv(pa) + PAGE_SIZE, va = phystokv(pa) + ARM_PGBYTES; va < va_end; va = va + ARM_PGBYTES) {
5787 ((tt_free_entry_t *)va)->next = (tt_free_entry_t *)pmap->tt_entry_free;
5788 pmap->tt_entry_free = (tt_entry_t *)va;
5789 }
5790 PMAP_UNLOCK(pmap);
5791 }
5792 __unreachable_ok_pop
5793
5794 *ttp = (tt_entry_t *)phystokv(pa);
5795 }
5796
5797 #if XNU_MONITOR
5798 assert(*ttp);
5799 #endif
5800
5801 return KERN_SUCCESS;
5802 }
5803
5804
5805 static void
5806 pmap_tt_deallocate(
5807 pmap_t pmap,
5808 tt_entry_t *ttp,
5809 unsigned int level)
5810 {
5811 pt_desc_t *ptdp;
5812 unsigned pt_acc_cnt;
5813 unsigned i, max_pt_index = PAGE_RATIO;
5814 vm_offset_t free_page = 0;
5815
5816 PMAP_LOCK(pmap);
5817
5818 ptdp = ptep_get_ptd((vm_offset_t)ttp);
5819
5820 ptdp->ptd_info[ARM_PT_DESC_INDEX(ttp)].va = (vm_offset_t)-1;
5821
5822 if ((level < PMAP_TT_MAX_LEVEL) && (ptdp->ptd_info[ARM_PT_DESC_INDEX(ttp)].refcnt == PT_DESC_REFCOUNT)) {
5823 ptdp->ptd_info[ARM_PT_DESC_INDEX(ttp)].refcnt = 0;
5824 }
5825
5826 if (ptdp->ptd_info[ARM_PT_DESC_INDEX(ttp)].refcnt != 0) {
5827 panic("pmap_tt_deallocate(): ptdp %p, count %d\n", ptdp, ptdp->ptd_info[ARM_PT_DESC_INDEX(ttp)].refcnt);
5828 }
5829
5830 ptdp->ptd_info[ARM_PT_DESC_INDEX(ttp)].refcnt = 0;
5831
5832 for (i = 0, pt_acc_cnt = 0; i < max_pt_index; i++) {
5833 pt_acc_cnt += ptdp->ptd_info[i].refcnt;
5834 }
5835
5836 if (pt_acc_cnt == 0) {
5837 tt_free_entry_t *tt_free_list = (tt_free_entry_t *)&pmap->tt_entry_free;
5838 unsigned pt_free_entry_cnt = 1;
5839
5840 while (pt_free_entry_cnt < max_pt_index && tt_free_list) {
5841 tt_free_entry_t *tt_free_list_next;
5842
5843 tt_free_list_next = tt_free_list->next;
5844 if ((((vm_offset_t)tt_free_list_next) - ((vm_offset_t)ttp & ~PAGE_MASK)) < PAGE_SIZE) {
5845 pt_free_entry_cnt++;
5846 }
5847 tt_free_list = tt_free_list_next;
5848 }
5849 if (pt_free_entry_cnt == max_pt_index) {
5850 tt_free_entry_t *tt_free_list_cur;
5851
5852 free_page = (vm_offset_t)ttp & ~PAGE_MASK;
5853 tt_free_list = (tt_free_entry_t *)&pmap->tt_entry_free;
5854 tt_free_list_cur = (tt_free_entry_t *)&pmap->tt_entry_free;
5855
5856 while (tt_free_list_cur) {
5857 tt_free_entry_t *tt_free_list_next;
5858
5859 tt_free_list_next = tt_free_list_cur->next;
5860 if ((((vm_offset_t)tt_free_list_next) - free_page) < PAGE_SIZE) {
5861 tt_free_list->next = tt_free_list_next->next;
5862 } else {
5863 tt_free_list = tt_free_list_next;
5864 }
5865 tt_free_list_cur = tt_free_list_next;
5866 }
5867 } else {
5868 ((tt_free_entry_t *)ttp)->next = (tt_free_entry_t *)pmap->tt_entry_free;
5869 pmap->tt_entry_free = ttp;
5870 }
5871 } else {
5872 ((tt_free_entry_t *)ttp)->next = (tt_free_entry_t *)pmap->tt_entry_free;
5873 pmap->tt_entry_free = ttp;
5874 }
5875
5876 PMAP_UNLOCK(pmap);
5877
5878 if (free_page != 0) {
5879 ptd_deallocate(ptep_get_ptd((vm_offset_t)free_page));
5880 *(pt_desc_t **)pai_to_pvh(pa_index(ml_static_vtop(free_page))) = NULL;
5881 pmap_pages_free(ml_static_vtop(free_page), PAGE_SIZE);
5882 if (level < PMAP_TT_MAX_LEVEL) {
5883 OSAddAtomic(-1, (pmap == kernel_pmap ? &inuse_kernel_ttepages_count : &inuse_user_ttepages_count));
5884 } else {
5885 OSAddAtomic(-1, (pmap == kernel_pmap ? &inuse_kernel_ptepages_count : &inuse_user_ptepages_count));
5886 }
5887 PMAP_ZINFO_PFREE(pmap, PAGE_SIZE);
5888 pmap_tt_ledger_debit(pmap, PAGE_SIZE);
5889 }
5890 }
5891
5892 static void
5893 pmap_tte_remove(
5894 pmap_t pmap,
5895 tt_entry_t *ttep,
5896 unsigned int level)
5897 {
5898 tt_entry_t tte = *ttep;
5899
5900 if (tte == 0) {
5901 panic("pmap_tte_deallocate(): null tt_entry ttep==%p\n", ttep);
5902 }
5903
5904 if (((level + 1) == PMAP_TT_MAX_LEVEL) && (tte_get_ptd(tte)->ptd_info[ARM_PT_DESC_INDEX(ttetokv(*ttep))].refcnt != 0)) {
5905 panic("pmap_tte_deallocate(): pmap=%p ttep=%p ptd=%p refcnt=0x%x \n", pmap, ttep,
5906 tte_get_ptd(tte), (tte_get_ptd(tte)->ptd_info[ARM_PT_DESC_INDEX(ttetokv(*ttep))].refcnt));
5907 }
5908
5909 #if (__ARM_VMSA__ == 7)
5910 {
5911 tt_entry_t *ttep_4M = (tt_entry_t *) ((vm_offset_t)ttep & 0xFFFFFFF0);
5912 unsigned i;
5913
5914 for (i = 0; i < 4; i++, ttep_4M++) {
5915 *ttep_4M = (tt_entry_t) 0;
5916 }
5917 FLUSH_PTE_RANGE_STRONG(ttep_4M - 4, ttep_4M);
5918 }
5919 #else
5920 *ttep = (tt_entry_t) 0;
5921 FLUSH_PTE_STRONG(ttep);
5922 #endif
5923 }
5924
5925 static void
5926 pmap_tte_deallocate(
5927 pmap_t pmap,
5928 tt_entry_t *ttep,
5929 unsigned int level)
5930 {
5931 pmap_paddr_t pa;
5932 tt_entry_t tte;
5933
5934 PMAP_ASSERT_LOCKED(pmap);
5935
5936 tte = *ttep;
5937
5938 #if MACH_ASSERT
5939 if (tte_get_ptd(tte)->pmap != pmap) {
5940 panic("pmap_tte_deallocate(): ptd=%p ptd->pmap=%p pmap=%p \n",
5941 tte_get_ptd(tte), tte_get_ptd(tte)->pmap, pmap);
5942 }
5943 #endif
5944
5945 pmap_tte_remove(pmap, ttep, level);
5946
5947 if ((tte & ARM_TTE_TYPE_MASK) == ARM_TTE_TYPE_TABLE) {
5948 #if MACH_ASSERT
5949 {
5950 pt_entry_t *pte_p = ((pt_entry_t *) (ttetokv(tte) & ~ARM_PGMASK));
5951 unsigned i;
5952
5953 for (i = 0; i < (ARM_PGBYTES / sizeof(*pte_p)); i++, pte_p++) {
5954 if (ARM_PTE_IS_COMPRESSED(*pte_p, pte_p)) {
5955 panic("pmap_tte_deallocate: tte=0x%llx pmap=%p, pte_p=%p, pte=0x%llx compressed\n",
5956 (uint64_t)tte, pmap, pte_p, (uint64_t)(*pte_p));
5957 } else if (((*pte_p) & ARM_PTE_TYPE_MASK) != ARM_PTE_TYPE_FAULT) {
5958 panic("pmap_tte_deallocate: tte=0x%llx pmap=%p, pte_p=%p, pte=0x%llx\n",
5959 (uint64_t)tte, pmap, pte_p, (uint64_t)(*pte_p));
5960 }
5961 }
5962 }
5963 #endif
5964 PMAP_UNLOCK(pmap);
5965
5966 /* Clear any page offset: we mean to free the whole page, but armv7 TTEs may only be
5967 * aligned on 1K boundaries. We clear the surrounding "chunk" of 4 TTEs above. */
5968 pa = tte_to_pa(tte) & ~ARM_PGMASK;
5969 pmap_tt_deallocate(pmap, (tt_entry_t *) phystokv(pa), level + 1);
5970 PMAP_LOCK(pmap);
5971 }
5972 }
5973
5974 /*
5975 * Remove a range of hardware page-table entries.
5976 * The entries given are the first (inclusive)
5977 * and last (exclusive) entries for the VM pages.
5978 * The virtual address is the va for the first pte.
5979 *
5980 * The pmap must be locked.
5981 * If the pmap is not the kernel pmap, the range must lie
5982 * entirely within one pte-page. This is NOT checked.
5983 * Assumes that the pte-page exists.
5984 *
5985 * Returns the number of PTE changed, and sets *rmv_cnt
5986 * to the number of SPTE changed.
5987 */
5988 static int
5989 pmap_remove_range(
5990 pmap_t pmap,
5991 vm_map_address_t va,
5992 pt_entry_t *bpte,
5993 pt_entry_t *epte,
5994 uint32_t *rmv_cnt)
5995 {
5996 bool need_strong_sync = false;
5997 int num_changed = pmap_remove_range_options(pmap, va, bpte, epte, rmv_cnt,
5998 &need_strong_sync, PMAP_OPTIONS_REMOVE);
5999 if (num_changed > 0) {
6000 PMAP_UPDATE_TLBS(pmap, va, va + (PAGE_SIZE * (epte - bpte)), need_strong_sync);
6001 }
6002 return num_changed;
6003 }
6004
6005
6006 #ifdef PVH_FLAG_EXEC
6007
6008 /*
6009 * Update the access protection bits of the physical aperture mapping for a page.
6010 * This is useful, for example, in guranteeing that a verified executable page
6011 * has no writable mappings anywhere in the system, including the physical
6012 * aperture. flush_tlb_async can be set to true to avoid unnecessary TLB
6013 * synchronization overhead in cases where the call to this function is
6014 * guaranteed to be followed by other TLB operations.
6015 */
6016 static void
6017 pmap_set_ptov_ap(unsigned int pai __unused, unsigned int ap __unused, boolean_t flush_tlb_async __unused)
6018 {
6019 #if __ARM_PTE_PHYSMAP__
6020 ASSERT_PVH_LOCKED(pai);
6021 vm_offset_t kva = phystokv(vm_first_phys + (pmap_paddr_t)ptoa(pai));
6022 pt_entry_t *pte_p = pmap_pte(kernel_pmap, kva);
6023
6024 pt_entry_t tmplate = *pte_p;
6025 if ((tmplate & ARM_PTE_APMASK) == ARM_PTE_AP(ap)) {
6026 return;
6027 }
6028 tmplate = (tmplate & ~ARM_PTE_APMASK) | ARM_PTE_AP(ap);
6029 #if (__ARM_VMSA__ > 7)
6030 if (tmplate & ARM_PTE_HINT_MASK) {
6031 panic("%s: physical aperture PTE %p has hint bit set, va=%p, pte=0x%llx",
6032 __func__, pte_p, (void *)kva, tmplate);
6033 }
6034 #endif
6035 WRITE_PTE_STRONG(pte_p, tmplate);
6036 flush_mmu_tlb_region_asid_async(kva, PAGE_SIZE, kernel_pmap);
6037 if (!flush_tlb_async) {
6038 sync_tlb_flush();
6039 }
6040 #endif
6041 }
6042
6043 #endif /* defined(PVH_FLAG_EXEC) */
6044
6045 static void
6046 pmap_remove_pv(
6047 pmap_t pmap,
6048 pt_entry_t *cpte,
6049 int pai,
6050 int *num_internal,
6051 int *num_alt_internal,
6052 int *num_reusable,
6053 int *num_external)
6054 {
6055 pv_entry_t **pv_h, **pve_pp;
6056 pv_entry_t *pve_p;
6057
6058 ASSERT_PVH_LOCKED(pai);
6059 pv_h = pai_to_pvh(pai);
6060 vm_offset_t pvh_flags = pvh_get_flags(pv_h);
6061
6062 #if XNU_MONITOR
6063 if (pvh_flags & PVH_FLAG_LOCKDOWN) {
6064 panic("%d is locked down (%#lx), cannot remove", pai, pvh_flags);
6065 }
6066 #endif
6067
6068 if (pvh_test_type(pv_h, PVH_TYPE_PTEP)) {
6069 if (__builtin_expect((cpte != pvh_ptep(pv_h)), 0)) {
6070 panic("%s: cpte=%p does not match pv_h=%p (%p), pai=0x%x\n", __func__, cpte, pv_h, pvh_ptep(pv_h), pai);
6071 }
6072 if (IS_ALTACCT_PAGE(pai, PV_ENTRY_NULL)) {
6073 assert(IS_INTERNAL_PAGE(pai));
6074 (*num_internal)++;
6075 (*num_alt_internal)++;
6076 CLR_ALTACCT_PAGE(pai, PV_ENTRY_NULL);
6077 } else if (IS_INTERNAL_PAGE(pai)) {
6078 if (IS_REUSABLE_PAGE(pai)) {
6079 (*num_reusable)++;
6080 } else {
6081 (*num_internal)++;
6082 }
6083 } else {
6084 (*num_external)++;
6085 }
6086 pvh_update_head(pv_h, PV_ENTRY_NULL, PVH_TYPE_NULL);
6087 } else if (pvh_test_type(pv_h, PVH_TYPE_PVEP)) {
6088 pve_pp = pv_h;
6089 pve_p = pvh_list(pv_h);
6090
6091 while (pve_p != PV_ENTRY_NULL &&
6092 (pve_get_ptep(pve_p) != cpte)) {
6093 pve_pp = pve_link_field(pve_p);
6094 pve_p = PVE_NEXT_PTR(pve_next(pve_p));
6095 }
6096
6097 if (__builtin_expect((pve_p == PV_ENTRY_NULL), 0)) {
6098 panic("%s: cpte=%p (pai=0x%x) not in pv_h=%p\n", __func__, cpte, pai, pv_h);
6099 }
6100
6101 #if MACH_ASSERT
6102 if ((pmap != NULL) && (kern_feature_override(KF_PMAPV_OVRD) == FALSE)) {
6103 pv_entry_t *check_pve_p = PVE_NEXT_PTR(pve_next(pve_p));
6104 while (check_pve_p != PV_ENTRY_NULL) {
6105 if (pve_get_ptep(check_pve_p) == cpte) {
6106 panic("%s: duplicate pve entry cpte=%p pmap=%p, pv_h=%p, pve_p=%p, pai=0x%x",
6107 __func__, cpte, pmap, pv_h, pve_p, pai);
6108 }
6109 check_pve_p = PVE_NEXT_PTR(pve_next(check_pve_p));
6110 }
6111 }
6112 #endif
6113
6114 if (IS_ALTACCT_PAGE(pai, pve_p)) {
6115 assert(IS_INTERNAL_PAGE(pai));
6116 (*num_internal)++;
6117 (*num_alt_internal)++;
6118 CLR_ALTACCT_PAGE(pai, pve_p);
6119 } else if (IS_INTERNAL_PAGE(pai)) {
6120 if (IS_REUSABLE_PAGE(pai)) {
6121 (*num_reusable)++;
6122 } else {
6123 (*num_internal)++;
6124 }
6125 } else {
6126 (*num_external)++;
6127 }
6128
6129 pvh_remove(pv_h, pve_pp, pve_p);
6130 pv_free(pve_p);
6131 if (!pvh_test_type(pv_h, PVH_TYPE_NULL)) {
6132 pvh_set_flags(pv_h, pvh_flags);
6133 }
6134 } else {
6135 panic("%s: unexpected PV head %p, cpte=%p pmap=%p pv_h=%p pai=0x%x",
6136 __func__, *pv_h, cpte, pmap, pv_h, pai);
6137 }
6138
6139 #ifdef PVH_FLAG_EXEC
6140 if ((pvh_flags & PVH_FLAG_EXEC) && pvh_test_type(pv_h, PVH_TYPE_NULL)) {
6141 pmap_set_ptov_ap(pai, AP_RWNA, FALSE);
6142 }
6143 #endif
6144 }
6145
6146 static int
6147 pmap_remove_range_options(
6148 pmap_t pmap,
6149 vm_map_address_t va,
6150 pt_entry_t *bpte,
6151 pt_entry_t *epte,
6152 uint32_t *rmv_cnt,
6153 bool *need_strong_sync __unused,
6154 int options)
6155 {
6156 pt_entry_t *cpte;
6157 int num_removed, num_unwired;
6158 int num_pte_changed;
6159 int pai = 0;
6160 pmap_paddr_t pa;
6161 int num_external, num_internal, num_reusable;
6162 int num_alt_internal;
6163 uint64_t num_compressed, num_alt_compressed;
6164
6165 PMAP_ASSERT_LOCKED(pmap);
6166
6167 num_removed = 0;
6168 num_unwired = 0;
6169 num_pte_changed = 0;
6170 num_external = 0;
6171 num_internal = 0;
6172 num_reusable = 0;
6173 num_compressed = 0;
6174 num_alt_internal = 0;
6175 num_alt_compressed = 0;
6176
6177 for (cpte = bpte; cpte < epte;
6178 cpte += PAGE_SIZE / ARM_PGBYTES, va += PAGE_SIZE) {
6179 pt_entry_t spte;
6180 boolean_t managed = FALSE;
6181
6182 spte = *cpte;
6183
6184 #if CONFIG_PGTRACE
6185 if (pgtrace_enabled) {
6186 pmap_pgtrace_remove_clone(pmap, pte_to_pa(spte), va);
6187 }
6188 #endif
6189
6190 while (!managed) {
6191 if (pmap != kernel_pmap &&
6192 (options & PMAP_OPTIONS_REMOVE) &&
6193 (ARM_PTE_IS_COMPRESSED(spte, cpte))) {
6194 /*
6195 * "pmap" must be locked at this point,
6196 * so this should not race with another
6197 * pmap_remove_range() or pmap_enter().
6198 */
6199
6200 /* one less "compressed"... */
6201 num_compressed++;
6202 if (spte & ARM_PTE_COMPRESSED_ALT) {
6203 /* ... but it used to be "ALTACCT" */
6204 num_alt_compressed++;
6205 }
6206
6207 /* clear marker */
6208 WRITE_PTE_FAST(cpte, ARM_PTE_TYPE_FAULT);
6209 /*
6210 * "refcnt" also accounts for
6211 * our "compressed" markers,
6212 * so let's update it here.
6213 */
6214 if (OSAddAtomic16(-1, (SInt16 *) &(ptep_get_ptd(cpte)->ptd_info[ARM_PT_DESC_INDEX(cpte)].refcnt)) <= 0) {
6215 panic("pmap_remove_range_options: over-release of ptdp %p for pte %p\n", ptep_get_ptd(cpte), cpte);
6216 }
6217 spte = *cpte;
6218 }
6219 /*
6220 * It may be possible for the pte to transition from managed
6221 * to unmanaged in this timeframe; for now, elide the assert.
6222 * We should break out as a consequence of checking pa_valid.
6223 */
6224 //assert(!ARM_PTE_IS_COMPRESSED(spte));
6225 pa = pte_to_pa(spte);
6226 if (!pa_valid(pa)) {
6227 #if XNU_MONITOR || HAS_MILD_DSB
6228 unsigned int cacheattr = pmap_cache_attributes((ppnum_t)atop(pa));
6229 #endif
6230 #if XNU_MONITOR
6231 if (!pmap_ppl_disable && (cacheattr & PP_ATTR_MONITOR)) {
6232 panic("%s: attempt to remove mapping of PPL-protected I/O address 0x%llx", __func__, (uint64_t)pa);
6233 }
6234 #endif
6235 break;
6236 }
6237 pai = (int)pa_index(pa);
6238 LOCK_PVH(pai);
6239 spte = *cpte;
6240 pa = pte_to_pa(spte);
6241 if (pai == (int)pa_index(pa)) {
6242 managed = TRUE;
6243 break; // Leave pai locked as we will unlock it after we free the PV entry
6244 }
6245 UNLOCK_PVH(pai);
6246 }
6247
6248 if (ARM_PTE_IS_COMPRESSED(*cpte, cpte)) {
6249 /*
6250 * There used to be a valid mapping here but it
6251 * has already been removed when the page was
6252 * sent to the VM compressor, so nothing left to
6253 * remove now...
6254 */
6255 continue;
6256 }
6257
6258 /* remove the translation, do not flush the TLB */
6259 if (*cpte != ARM_PTE_TYPE_FAULT) {
6260 assertf(!ARM_PTE_IS_COMPRESSED(*cpte, cpte), "unexpected compressed pte %p (=0x%llx)", cpte, (uint64_t)*cpte);
6261 assertf((*cpte & ARM_PTE_TYPE_VALID) == ARM_PTE_TYPE, "invalid pte %p (=0x%llx)", cpte, (uint64_t)*cpte);
6262 #if MACH_ASSERT
6263 if (managed && (pmap != kernel_pmap) && (ptep_get_va(cpte) != va)) {
6264 panic("pmap_remove_range_options(): cpte=%p ptd=%p pte=0x%llx va=0x%llx\n",
6265 cpte, ptep_get_ptd(cpte), (uint64_t)*cpte, (uint64_t)va);
6266 }
6267 #endif
6268 WRITE_PTE_FAST(cpte, ARM_PTE_TYPE_FAULT);
6269 num_pte_changed++;
6270 }
6271
6272 if ((spte != ARM_PTE_TYPE_FAULT) &&
6273 (pmap != kernel_pmap)) {
6274 assertf(!ARM_PTE_IS_COMPRESSED(spte, cpte), "unexpected compressed pte %p (=0x%llx)", cpte, (uint64_t)spte);
6275 assertf((spte & ARM_PTE_TYPE_VALID) == ARM_PTE_TYPE, "invalid pte %p (=0x%llx)", cpte, (uint64_t)spte);
6276 if (OSAddAtomic16(-1, (SInt16 *) &(ptep_get_ptd(cpte)->ptd_info[ARM_PT_DESC_INDEX(cpte)].refcnt)) <= 0) {
6277 panic("pmap_remove_range_options: over-release of ptdp %p for pte %p\n", ptep_get_ptd(cpte), cpte);
6278 }
6279 if (rmv_cnt) {
6280 (*rmv_cnt)++;
6281 }
6282 }
6283
6284 if (pte_is_wired(spte)) {
6285 pte_set_wired(cpte, 0);
6286 num_unwired++;
6287 }
6288 /*
6289 * if not managed, we're done
6290 */
6291 if (!managed) {
6292 continue;
6293 }
6294 /*
6295 * find and remove the mapping from the chain for this
6296 * physical address.
6297 */
6298
6299 pmap_remove_pv(pmap, cpte, pai, &num_internal, &num_alt_internal, &num_reusable, &num_external);
6300
6301 UNLOCK_PVH(pai);
6302 num_removed++;
6303 }
6304
6305 /*
6306 * Update the counts
6307 */
6308 OSAddAtomic(-num_removed, (SInt32 *) &pmap->stats.resident_count);
6309 pmap_ledger_debit(pmap, task_ledgers.phys_mem, machine_ptob(num_removed));
6310
6311 if (pmap != kernel_pmap) {
6312 /* sanity checks... */
6313 #if MACH_ASSERT
6314 if (pmap->stats.internal < num_internal) {
6315 if ((!pmap_stats_assert ||
6316 !pmap->pmap_stats_assert)) {
6317 printf("%d[%s] pmap_remove_range_options(%p,0x%llx,%p,%p,0x%x): num_internal=%d num_removed=%d num_unwired=%d num_external=%d num_reusable=%d num_compressed=%lld num_alt_internal=%d num_alt_compressed=%lld num_pte_changed=%d stats.internal=%d stats.reusable=%d\n",
6318 pmap->pmap_pid,
6319 pmap->pmap_procname,
6320 pmap,
6321 (uint64_t) va,
6322 bpte,
6323 epte,
6324 options,
6325 num_internal,
6326 num_removed,
6327 num_unwired,
6328 num_external,
6329 num_reusable,
6330 num_compressed,
6331 num_alt_internal,
6332 num_alt_compressed,
6333 num_pte_changed,
6334 pmap->stats.internal,
6335 pmap->stats.reusable);
6336 } else {
6337 panic("%d[%s] pmap_remove_range_options(%p,0x%llx,%p,%p,0x%x): num_internal=%d num_removed=%d num_unwired=%d num_external=%d num_reusable=%d num_compressed=%lld num_alt_internal=%d num_alt_compressed=%lld num_pte_changed=%d stats.internal=%d stats.reusable=%d",
6338 pmap->pmap_pid,
6339 pmap->pmap_procname,
6340 pmap,
6341 (uint64_t) va,
6342 bpte,
6343 epte,
6344 options,
6345 num_internal,
6346 num_removed,
6347 num_unwired,
6348 num_external,
6349 num_reusable,
6350 num_compressed,
6351 num_alt_internal,
6352 num_alt_compressed,
6353 num_pte_changed,
6354 pmap->stats.internal,
6355 pmap->stats.reusable);
6356 }
6357 }
6358 #endif /* MACH_ASSERT */
6359 PMAP_STATS_ASSERTF(pmap->stats.external >= num_external,
6360 pmap,
6361 "pmap=%p num_external=%d stats.external=%d",
6362 pmap, num_external, pmap->stats.external);
6363 PMAP_STATS_ASSERTF(pmap->stats.internal >= num_internal,
6364 pmap,
6365 "pmap=%p num_internal=%d stats.internal=%d num_reusable=%d stats.reusable=%d",
6366 pmap,
6367 num_internal, pmap->stats.internal,
6368 num_reusable, pmap->stats.reusable);
6369 PMAP_STATS_ASSERTF(pmap->stats.reusable >= num_reusable,
6370 pmap,
6371 "pmap=%p num_internal=%d stats.internal=%d num_reusable=%d stats.reusable=%d",
6372 pmap,
6373 num_internal, pmap->stats.internal,
6374 num_reusable, pmap->stats.reusable);
6375 PMAP_STATS_ASSERTF(pmap->stats.compressed >= num_compressed,
6376 pmap,
6377 "pmap=%p num_compressed=%lld num_alt_compressed=%lld stats.compressed=%lld",
6378 pmap, num_compressed, num_alt_compressed,
6379 pmap->stats.compressed);
6380
6381 /* update pmap stats... */
6382 OSAddAtomic(-num_unwired, (SInt32 *) &pmap->stats.wired_count);
6383 if (num_external) {
6384 OSAddAtomic(-num_external, &pmap->stats.external);
6385 }
6386 if (num_internal) {
6387 OSAddAtomic(-num_internal, &pmap->stats.internal);
6388 }
6389 if (num_reusable) {
6390 OSAddAtomic(-num_reusable, &pmap->stats.reusable);
6391 }
6392 if (num_compressed) {
6393 OSAddAtomic64(-num_compressed, &pmap->stats.compressed);
6394 }
6395 /* ... and ledgers */
6396 pmap_ledger_debit(pmap, task_ledgers.wired_mem, machine_ptob(num_unwired));
6397 pmap_ledger_debit(pmap, task_ledgers.internal, machine_ptob(num_internal));
6398 pmap_ledger_debit(pmap, task_ledgers.alternate_accounting, machine_ptob(num_alt_internal));
6399 pmap_ledger_debit(pmap, task_ledgers.alternate_accounting_compressed, machine_ptob(num_alt_compressed));
6400 pmap_ledger_debit(pmap, task_ledgers.internal_compressed, machine_ptob(num_compressed));
6401 /* make needed adjustments to phys_footprint */
6402 pmap_ledger_debit(pmap, task_ledgers.phys_footprint,
6403 machine_ptob((num_internal -
6404 num_alt_internal) +
6405 (num_compressed -
6406 num_alt_compressed)));
6407 }
6408
6409 /* flush the ptable entries we have written */
6410 if (num_pte_changed > 0) {
6411 FLUSH_PTE_RANGE_STRONG(bpte, epte);
6412 }
6413
6414 return num_pte_changed;
6415 }
6416
6417
6418 /*
6419 * Remove the given range of addresses
6420 * from the specified map.
6421 *
6422 * It is assumed that the start and end are properly
6423 * rounded to the hardware page size.
6424 */
6425 void
6426 pmap_remove(
6427 pmap_t pmap,
6428 vm_map_address_t start,
6429 vm_map_address_t end)
6430 {
6431 pmap_remove_options(pmap, start, end, PMAP_OPTIONS_REMOVE);
6432 }
6433
6434 MARK_AS_PMAP_TEXT static int
6435 pmap_remove_options_internal(
6436 pmap_t pmap,
6437 vm_map_address_t start,
6438 vm_map_address_t end,
6439 int options)
6440 {
6441 int remove_count = 0;
6442 pt_entry_t *bpte, *epte;
6443 pt_entry_t *pte_p;
6444 tt_entry_t *tte_p;
6445 uint32_t rmv_spte = 0;
6446 bool need_strong_sync = false;
6447 bool flush_tte = false;
6448
6449 if (__improbable(end < start)) {
6450 panic("%s: invalid address range %p, %p", __func__, (void*)start, (void*)end);
6451 }
6452
6453 VALIDATE_PMAP(pmap);
6454
6455 __unused const pt_attr_t * const pt_attr = pmap_get_pt_attr(pmap);
6456
6457 PMAP_LOCK(pmap);
6458
6459 tte_p = pmap_tte(pmap, start);
6460
6461 if (tte_p == (tt_entry_t *) NULL) {
6462 goto done;
6463 }
6464
6465 if ((*tte_p & ARM_TTE_TYPE_MASK) == ARM_TTE_TYPE_TABLE) {
6466 pte_p = (pt_entry_t *) ttetokv(*tte_p);
6467 bpte = &pte_p[ptenum(start)];
6468 epte = bpte + ((end - start) >> pt_attr_leaf_shift(pt_attr));
6469
6470 remove_count += pmap_remove_range_options(pmap, start, bpte, epte,
6471 &rmv_spte, &need_strong_sync, options);
6472
6473 if (rmv_spte && (ptep_get_ptd(pte_p)->ptd_info[ARM_PT_DESC_INDEX(pte_p)].refcnt == 0) &&
6474 (pmap != kernel_pmap) && (pmap->nested == FALSE)) {
6475 pmap_tte_deallocate(pmap, tte_p, pt_attr_twig_level(pt_attr));
6476 flush_tte = true;
6477 }
6478 }
6479
6480 done:
6481 PMAP_UNLOCK(pmap);
6482
6483 if (remove_count > 0) {
6484 PMAP_UPDATE_TLBS(pmap, start, end, need_strong_sync);
6485 } else if (flush_tte > 0) {
6486 pmap_get_pt_ops(pmap)->flush_tlb_tte_async(start, pmap);
6487 sync_tlb_flush();
6488 }
6489 return remove_count;
6490 }
6491
6492 void
6493 pmap_remove_options(
6494 pmap_t pmap,
6495 vm_map_address_t start,
6496 vm_map_address_t end,
6497 int options)
6498 {
6499 int remove_count = 0;
6500 vm_map_address_t va;
6501
6502 if (pmap == PMAP_NULL) {
6503 return;
6504 }
6505
6506 __unused const pt_attr_t * const pt_attr = pmap_get_pt_attr(pmap);
6507
6508 PMAP_TRACE(2, PMAP_CODE(PMAP__REMOVE) | DBG_FUNC_START,
6509 VM_KERNEL_ADDRHIDE(pmap), VM_KERNEL_ADDRHIDE(start),
6510 VM_KERNEL_ADDRHIDE(end));
6511
6512 #if MACH_ASSERT
6513 if ((start | end) & PAGE_MASK) {
6514 panic("pmap_remove_options() pmap %p start 0x%llx end 0x%llx\n",
6515 pmap, (uint64_t)start, (uint64_t)end);
6516 }
6517 if ((end < start) || (start < pmap->min) || (end > pmap->max)) {
6518 panic("pmap_remove_options(): invalid address range, pmap=%p, start=0x%llx, end=0x%llx\n",
6519 pmap, (uint64_t)start, (uint64_t)end);
6520 }
6521 #endif
6522
6523 /*
6524 * Invalidate the translation buffer first
6525 */
6526 va = start;
6527 while (va < end) {
6528 vm_map_address_t l;
6529
6530 l = ((va + pt_attr_twig_size(pt_attr)) & ~pt_attr_twig_offmask(pt_attr));
6531 if (l > end) {
6532 l = end;
6533 }
6534
6535 #if XNU_MONITOR
6536 remove_count += pmap_remove_options_ppl(pmap, va, l, options);
6537
6538 pmap_ledger_check_balance(pmap);
6539 #else
6540 remove_count += pmap_remove_options_internal(pmap, va, l, options);
6541 #endif
6542
6543 va = l;
6544 }
6545
6546 PMAP_TRACE(2, PMAP_CODE(PMAP__REMOVE) | DBG_FUNC_END);
6547 }
6548
6549
6550 /*
6551 * Remove phys addr if mapped in specified map
6552 */
6553 void
6554 pmap_remove_some_phys(
6555 __unused pmap_t map,
6556 __unused ppnum_t pn)
6557 {
6558 /* Implement to support working set code */
6559 }
6560
6561 void
6562 pmap_set_pmap(
6563 pmap_t pmap,
6564 #if !__ARM_USER_PROTECT__
6565 __unused
6566 #endif
6567 thread_t thread)
6568 {
6569 pmap_switch(pmap);
6570 #if __ARM_USER_PROTECT__
6571 if (pmap->tte_index_max == NTTES) {
6572 thread->machine.uptw_ttc = 2;
6573 } else {
6574 thread->machine.uptw_ttc = 1;
6575 }
6576 thread->machine.uptw_ttb = ((unsigned int) pmap->ttep) | TTBR_SETUP;
6577 thread->machine.asid = pmap->hw_asid;
6578 #endif
6579 }
6580
6581 static void
6582 pmap_flush_core_tlb_asid(pmap_t pmap)
6583 {
6584 #if (__ARM_VMSA__ == 7)
6585 flush_core_tlb_asid(pmap->hw_asid);
6586 #else
6587 flush_core_tlb_asid(((uint64_t) pmap->hw_asid) << TLBI_ASID_SHIFT);
6588 #endif
6589 }
6590
6591 MARK_AS_PMAP_TEXT static void
6592 pmap_switch_internal(
6593 pmap_t pmap)
6594 {
6595 VALIDATE_PMAP(pmap);
6596 pmap_cpu_data_t *cpu_data_ptr = pmap_get_cpu_data();
6597 uint16_t asid_index = pmap->hw_asid;
6598 boolean_t do_asid_flush = FALSE;
6599
6600 #if __ARM_KERNEL_PROTECT__
6601 asid_index >>= 1;
6602 #endif
6603
6604 #if (__ARM_VMSA__ > 7)
6605 pmap_t last_nested_pmap = cpu_data_ptr->cpu_nested_pmap;
6606 #endif
6607
6608 #if MAX_ASID > MAX_HW_ASID
6609 if (asid_index > 0) {
6610 asid_index -= 1;
6611 /* Paranoia. */
6612 assert(asid_index < (sizeof(cpu_data_ptr->cpu_asid_high_bits) / sizeof(*cpu_data_ptr->cpu_asid_high_bits)));
6613
6614 /* Extract the "virtual" bits of the ASIDs (which could cause us to alias). */
6615 uint8_t asid_high_bits = pmap->sw_asid;
6616 uint8_t last_asid_high_bits = cpu_data_ptr->cpu_asid_high_bits[asid_index];
6617
6618 if (asid_high_bits != last_asid_high_bits) {
6619 /*
6620 * If the virtual ASID of the new pmap does not match the virtual ASID
6621 * last seen on this CPU for the physical ASID (that was a mouthful),
6622 * then this switch runs the risk of aliasing. We need to flush the
6623 * TLB for this phyiscal ASID in this case.
6624 */
6625 cpu_data_ptr->cpu_asid_high_bits[asid_index] = asid_high_bits;
6626 do_asid_flush = TRUE;
6627 }
6628 }
6629 #endif /* MAX_ASID > MAX_HW_ASID */
6630
6631 pmap_switch_user_ttb_internal(pmap);
6632
6633 #if (__ARM_VMSA__ > 7)
6634 /* If we're switching to a different nested pmap (i.e. shared region), we'll need
6635 * to flush the userspace mappings for that region. Those mappings are global
6636 * and will not be protected by the ASID. It should also be cheaper to flush the
6637 * entire local TLB rather than to do a broadcast MMU flush by VA region. */
6638 if ((pmap != kernel_pmap) && (last_nested_pmap != NULL) && (pmap->nested_pmap != last_nested_pmap)) {
6639 flush_core_tlb();
6640 } else
6641 #endif
6642 if (do_asid_flush) {
6643 pmap_flush_core_tlb_asid(pmap);
6644 #if DEVELOPMENT || DEBUG
6645 os_atomic_inc(&pmap_asid_flushes, relaxed);
6646 #endif
6647 }
6648 }
6649
6650 void
6651 pmap_switch(
6652 pmap_t pmap)
6653 {
6654 PMAP_TRACE(1, PMAP_CODE(PMAP__SWITCH) | DBG_FUNC_START, VM_KERNEL_ADDRHIDE(pmap), PMAP_VASID(pmap), pmap->hw_asid);
6655 #if XNU_MONITOR
6656 pmap_switch_ppl(pmap);
6657 #else
6658 pmap_switch_internal(pmap);
6659 #endif
6660 PMAP_TRACE(1, PMAP_CODE(PMAP__SWITCH) | DBG_FUNC_END);
6661 }
6662
6663 void
6664 pmap_page_protect(
6665 ppnum_t ppnum,
6666 vm_prot_t prot)
6667 {
6668 pmap_page_protect_options(ppnum, prot, 0, NULL);
6669 }
6670
6671 /*
6672 * Routine: pmap_page_protect_options
6673 *
6674 * Function:
6675 * Lower the permission for all mappings to a given
6676 * page.
6677 */
6678 MARK_AS_PMAP_TEXT static void
6679 pmap_page_protect_options_internal(
6680 ppnum_t ppnum,
6681 vm_prot_t prot,
6682 unsigned int options)
6683 {
6684 pmap_paddr_t phys = ptoa(ppnum);
6685 pv_entry_t **pv_h;
6686 pv_entry_t **pve_pp;
6687 pv_entry_t *pve_p;
6688 pv_entry_t *pveh_p;
6689 pv_entry_t *pvet_p;
6690 pt_entry_t *pte_p;
6691 pv_entry_t *new_pve_p;
6692 pt_entry_t *new_pte_p;
6693 vm_offset_t pvh_flags;
6694 int pai;
6695 boolean_t remove;
6696 boolean_t set_NX;
6697 boolean_t tlb_flush_needed = FALSE;
6698 unsigned int pvh_cnt = 0;
6699
6700 assert(ppnum != vm_page_fictitious_addr);
6701
6702 /* Only work with managed pages. */
6703 if (!pa_valid(phys)) {
6704 return;
6705 }
6706
6707 /*
6708 * Determine the new protection.
6709 */
6710 switch (prot) {
6711 case VM_PROT_ALL:
6712 return; /* nothing to do */
6713 case VM_PROT_READ:
6714 case VM_PROT_READ | VM_PROT_EXECUTE:
6715 remove = FALSE;
6716 break;
6717 default:
6718 remove = TRUE;
6719 break;
6720 }
6721
6722 pai = (int)pa_index(phys);
6723 LOCK_PVH(pai);
6724 pv_h = pai_to_pvh(pai);
6725 pvh_flags = pvh_get_flags(pv_h);
6726
6727 #if XNU_MONITOR
6728 if (remove && (pvh_flags & PVH_FLAG_LOCKDOWN)) {
6729 panic("%d is locked down (%#llx), cannot remove", pai, pvh_get_flags(pv_h));
6730 }
6731 #endif
6732
6733 pte_p = PT_ENTRY_NULL;
6734 pve_p = PV_ENTRY_NULL;
6735 pve_pp = pv_h;
6736 pveh_p = PV_ENTRY_NULL;
6737 pvet_p = PV_ENTRY_NULL;
6738 new_pve_p = PV_ENTRY_NULL;
6739 new_pte_p = PT_ENTRY_NULL;
6740 if (pvh_test_type(pv_h, PVH_TYPE_PTEP)) {
6741 pte_p = pvh_ptep(pv_h);
6742 } else if (pvh_test_type(pv_h, PVH_TYPE_PVEP)) {
6743 pve_p = pvh_list(pv_h);
6744 pveh_p = pve_p;
6745 }
6746
6747 while ((pve_p != PV_ENTRY_NULL) || (pte_p != PT_ENTRY_NULL)) {
6748 vm_map_address_t va;
6749 pmap_t pmap;
6750 pt_entry_t tmplate;
6751 boolean_t update = FALSE;
6752
6753 if (pve_p != PV_ENTRY_NULL) {
6754 pte_p = pve_get_ptep(pve_p);
6755 }
6756
6757 #ifdef PVH_FLAG_IOMMU
6758 if ((vm_offset_t)pte_p & PVH_FLAG_IOMMU) {
6759 #if XNU_MONITOR
6760 if (pvh_flags & PVH_FLAG_LOCKDOWN) {
6761 panic("pmap_page_protect: ppnum 0x%x locked down, cannot be owned by iommu 0x%llx, pve_p=%p",
6762 ppnum, (uint64_t)pte_p & ~PVH_FLAG_IOMMU, pve_p);
6763 }
6764 #endif
6765 if (remove) {
6766 if (options & PMAP_OPTIONS_COMPRESSOR) {
6767 panic("pmap_page_protect: attempt to compress ppnum 0x%x owned by iommu 0x%llx, pve_p=%p",
6768 ppnum, (uint64_t)pte_p & ~PVH_FLAG_IOMMU, pve_p);
6769 }
6770 if (pve_p != PV_ENTRY_NULL) {
6771 pv_entry_t *temp_pve_p = PVE_NEXT_PTR(pve_next(pve_p));
6772 pvh_remove(pv_h, pve_pp, pve_p);
6773 pveh_p = pvh_list(pv_h);
6774 pve_next(pve_p) = new_pve_p;
6775 new_pve_p = pve_p;
6776 pve_p = temp_pve_p;
6777 continue;
6778 } else {
6779 new_pte_p = pte_p;
6780 break;
6781 }
6782 }
6783 goto protect_skip_pve;
6784 }
6785 #endif
6786 pmap = ptep_get_pmap(pte_p);
6787 va = ptep_get_va(pte_p);
6788
6789 if (pte_p == PT_ENTRY_NULL) {
6790 panic("pmap_page_protect: pmap=%p prot=%d options=%u, pv_h=%p, pveh_p=%p, pve_p=%p, va=0x%llx ppnum: 0x%x\n",
6791 pmap, prot, options, pv_h, pveh_p, pve_p, (uint64_t)va, ppnum);
6792 } else if ((pmap == NULL) || (atop(pte_to_pa(*pte_p)) != ppnum)) {
6793 #if MACH_ASSERT
6794 if (kern_feature_override(KF_PMAPV_OVRD) == FALSE) {
6795 pv_entry_t *check_pve_p = pveh_p;
6796 while (check_pve_p != PV_ENTRY_NULL) {
6797 if ((check_pve_p != pve_p) && (pve_get_ptep(check_pve_p) == pte_p)) {
6798 panic("pmap_page_protect: duplicate pve entry pte_p=%p pmap=%p prot=%d options=%u, pv_h=%p, pveh_p=%p, pve_p=%p, pte=0x%llx, va=0x%llx ppnum: 0x%x\n",
6799 pte_p, pmap, prot, options, pv_h, pveh_p, pve_p, (uint64_t)*pte_p, (uint64_t)va, ppnum);
6800 }
6801 check_pve_p = PVE_NEXT_PTR(pve_next(check_pve_p));
6802 }
6803 }
6804 #endif
6805 panic("pmap_page_protect: bad pve entry pte_p=%p pmap=%p prot=%d options=%u, pv_h=%p, pveh_p=%p, pve_p=%p, pte=0x%llx, va=0x%llx ppnum: 0x%x\n",
6806 pte_p, pmap, prot, options, pv_h, pveh_p, pve_p, (uint64_t)*pte_p, (uint64_t)va, ppnum);
6807 }
6808
6809 #if DEVELOPMENT || DEBUG
6810 if ((prot & VM_PROT_EXECUTE) || !nx_enabled || !pmap->nx_enabled)
6811 #else
6812 if ((prot & VM_PROT_EXECUTE))
6813 #endif
6814 { set_NX = FALSE;} else {
6815 set_NX = TRUE;
6816 }
6817
6818 /* Remove the mapping if new protection is NONE */
6819 if (remove) {
6820 boolean_t is_altacct = FALSE;
6821
6822 if (IS_ALTACCT_PAGE(pai, pve_p)) {
6823 is_altacct = TRUE;
6824 } else {
6825 is_altacct = FALSE;
6826 }
6827
6828 if (pte_is_wired(*pte_p)) {
6829 pte_set_wired(pte_p, 0);
6830 if (pmap != kernel_pmap) {
6831 pmap_ledger_debit(pmap, task_ledgers.wired_mem, PAGE_SIZE);
6832 OSAddAtomic(-1, (SInt32 *) &pmap->stats.wired_count);
6833 }
6834 }
6835
6836 if (*pte_p != ARM_PTE_TYPE_FAULT &&
6837 pmap != kernel_pmap &&
6838 (options & PMAP_OPTIONS_COMPRESSOR) &&
6839 IS_INTERNAL_PAGE(pai)) {
6840 assert(!ARM_PTE_IS_COMPRESSED(*pte_p, pte_p));
6841 /* mark this PTE as having been "compressed" */
6842 tmplate = ARM_PTE_COMPRESSED;
6843 if (is_altacct) {
6844 tmplate |= ARM_PTE_COMPRESSED_ALT;
6845 is_altacct = TRUE;
6846 }
6847 } else {
6848 tmplate = ARM_PTE_TYPE_FAULT;
6849 }
6850
6851 if ((*pte_p != ARM_PTE_TYPE_FAULT) &&
6852 tmplate == ARM_PTE_TYPE_FAULT &&
6853 (pmap != kernel_pmap)) {
6854 if (OSAddAtomic16(-1, (SInt16 *) &(ptep_get_ptd(pte_p)->ptd_info[ARM_PT_DESC_INDEX(pte_p)].refcnt)) <= 0) {
6855 panic("pmap_page_protect_options(): over-release of ptdp %p for pte %p\n", ptep_get_ptd(pte_p), pte_p);
6856 }
6857 }
6858
6859 if (*pte_p != tmplate) {
6860 WRITE_PTE_STRONG(pte_p, tmplate);
6861 update = TRUE;
6862 }
6863 pvh_cnt++;
6864 pmap_ledger_debit(pmap, task_ledgers.phys_mem, PAGE_SIZE);
6865 OSAddAtomic(-1, (SInt32 *) &pmap->stats.resident_count);
6866
6867 #if MACH_ASSERT
6868 /*
6869 * We only ever compress internal pages.
6870 */
6871 if (options & PMAP_OPTIONS_COMPRESSOR) {
6872 assert(IS_INTERNAL_PAGE(pai));
6873 }
6874 #endif
6875
6876 if (pmap != kernel_pmap) {
6877 if (IS_REUSABLE_PAGE(pai) &&
6878 IS_INTERNAL_PAGE(pai) &&
6879 !is_altacct) {
6880 PMAP_STATS_ASSERTF(pmap->stats.reusable > 0, pmap, "stats.reusable %d", pmap->stats.reusable);
6881 OSAddAtomic(-1, &pmap->stats.reusable);
6882 } else if (IS_INTERNAL_PAGE(pai)) {
6883 PMAP_STATS_ASSERTF(pmap->stats.internal > 0, pmap, "stats.internal %d", pmap->stats.internal);
6884 OSAddAtomic(-1, &pmap->stats.internal);
6885 } else {
6886 PMAP_STATS_ASSERTF(pmap->stats.external > 0, pmap, "stats.external %d", pmap->stats.external);
6887 OSAddAtomic(-1, &pmap->stats.external);
6888 }
6889 if ((options & PMAP_OPTIONS_COMPRESSOR) &&
6890 IS_INTERNAL_PAGE(pai)) {
6891 /* adjust "compressed" stats */
6892 OSAddAtomic64(+1, &pmap->stats.compressed);
6893 PMAP_STATS_PEAK(pmap->stats.compressed);
6894 pmap->stats.compressed_lifetime++;
6895 }
6896
6897 if (IS_ALTACCT_PAGE(pai, pve_p)) {
6898 assert(IS_INTERNAL_PAGE(pai));
6899 pmap_ledger_debit(pmap, task_ledgers.internal, PAGE_SIZE);
6900 pmap_ledger_debit(pmap, task_ledgers.alternate_accounting, PAGE_SIZE);
6901 if (options & PMAP_OPTIONS_COMPRESSOR) {
6902 pmap_ledger_credit(pmap, task_ledgers.internal_compressed, PAGE_SIZE);
6903 pmap_ledger_credit(pmap, task_ledgers.alternate_accounting_compressed, PAGE_SIZE);
6904 }
6905
6906 /*
6907 * Cleanup our marker before
6908 * we free this pv_entry.
6909 */
6910 CLR_ALTACCT_PAGE(pai, pve_p);
6911 } else if (IS_REUSABLE_PAGE(pai)) {
6912 assert(IS_INTERNAL_PAGE(pai));
6913 if (options & PMAP_OPTIONS_COMPRESSOR) {
6914 pmap_ledger_credit(pmap, task_ledgers.internal_compressed, PAGE_SIZE);
6915 /* was not in footprint, but is now */
6916 pmap_ledger_credit(pmap, task_ledgers.phys_footprint, PAGE_SIZE);
6917 }
6918 } else if (IS_INTERNAL_PAGE(pai)) {
6919 pmap_ledger_debit(pmap, task_ledgers.internal, PAGE_SIZE);
6920
6921 /*
6922 * Update all stats related to physical footprint, which only
6923 * deals with internal pages.
6924 */
6925 if (options & PMAP_OPTIONS_COMPRESSOR) {
6926 /*
6927 * This removal is only being done so we can send this page to
6928 * the compressor; therefore it mustn't affect total task footprint.
6929 */
6930 pmap_ledger_credit(pmap, task_ledgers.internal_compressed, PAGE_SIZE);
6931 } else {
6932 /*
6933 * This internal page isn't going to the compressor, so adjust stats to keep
6934 * phys_footprint up to date.
6935 */
6936 pmap_ledger_debit(pmap, task_ledgers.phys_footprint, PAGE_SIZE);
6937 }
6938 } else {
6939 /* external page: no impact on ledgers */
6940 }
6941 }
6942
6943 if (pve_p != PV_ENTRY_NULL) {
6944 assert(pve_next(pve_p) == PVE_NEXT_PTR(pve_next(pve_p)));
6945 }
6946 } else {
6947 pt_entry_t spte;
6948 const pt_attr_t *const pt_attr = pmap_get_pt_attr(pmap);
6949
6950 spte = *pte_p;
6951
6952 if (pmap == kernel_pmap) {
6953 tmplate = ((spte & ~ARM_PTE_APMASK) | ARM_PTE_AP(AP_RONA));
6954 } else {
6955 tmplate = ((spte & ~ARM_PTE_APMASK) | pt_attr_leaf_ro(pt_attr));
6956 }
6957
6958 pte_set_was_writeable(tmplate, false);
6959 /*
6960 * While the naive implementation of this would serve to add execute
6961 * permission, this is not how the VM uses this interface, or how
6962 * x86_64 implements it. So ignore requests to add execute permissions.
6963 */
6964 if (set_NX) {
6965 tmplate |= pt_attr_leaf_xn(pt_attr);
6966 }
6967
6968 #if __APRR_SUPPORTED__
6969 if (__improbable(is_pte_xprr_protected(spte))) {
6970 panic("pmap_page_protect: modifying an xPRR mapping pte_p=%p pmap=%p prot=%d options=%u, pv_h=%p, pveh_p=%p, pve_p=%p, pte=0x%llx, tmplate=0x%llx, va=0x%llx ppnum: 0x%x",
6971 pte_p, pmap, prot, options, pv_h, pveh_p, pve_p, (uint64_t)spte, (uint64_t)tmplate, (uint64_t)va, ppnum);
6972 }
6973
6974 if (__improbable(is_pte_xprr_protected(tmplate))) {
6975 panic("pmap_page_protect: creating an xPRR mapping pte_p=%p pmap=%p prot=%d options=%u, pv_h=%p, pveh_p=%p, pve_p=%p, pte=0x%llx, tmplate=0x%llx, va=0x%llx ppnum: 0x%x",
6976 pte_p, pmap, prot, options, pv_h, pveh_p, pve_p, (uint64_t)spte, (uint64_t)tmplate, (uint64_t)va, ppnum);
6977 }
6978 #endif /* __APRR_SUPPORTED__*/
6979
6980 if (*pte_p != ARM_PTE_TYPE_FAULT &&
6981 !ARM_PTE_IS_COMPRESSED(*pte_p, pte_p) &&
6982 *pte_p != tmplate) {
6983 WRITE_PTE_STRONG(pte_p, tmplate);
6984 update = TRUE;
6985 }
6986 }
6987
6988 /* Invalidate TLBs for all CPUs using it */
6989 if (update) {
6990 tlb_flush_needed = TRUE;
6991 pmap_get_pt_ops(pmap)->flush_tlb_region_async(va, PAGE_SIZE, pmap);
6992 }
6993
6994 #ifdef PVH_FLAG_IOMMU
6995 protect_skip_pve:
6996 #endif
6997 pte_p = PT_ENTRY_NULL;
6998 pvet_p = pve_p;
6999 if (pve_p != PV_ENTRY_NULL) {
7000 if (remove) {
7001 assert(pve_next(pve_p) == PVE_NEXT_PTR(pve_next(pve_p)));
7002 }
7003 pve_pp = pve_link_field(pve_p);
7004 pve_p = PVE_NEXT_PTR(pve_next(pve_p));
7005 }
7006 }
7007
7008 #ifdef PVH_FLAG_EXEC
7009 if (remove && (pvh_get_flags(pv_h) & PVH_FLAG_EXEC)) {
7010 pmap_set_ptov_ap(pai, AP_RWNA, tlb_flush_needed);
7011 }
7012 #endif
7013 if (tlb_flush_needed) {
7014 sync_tlb_flush();
7015 }
7016
7017 /* if we removed a bunch of entries, take care of them now */
7018 if (remove) {
7019 if (new_pve_p != PV_ENTRY_NULL) {
7020 pvh_update_head(pv_h, new_pve_p, PVH_TYPE_PVEP);
7021 pvh_set_flags(pv_h, pvh_flags);
7022 } else if (new_pte_p != PT_ENTRY_NULL) {
7023 pvh_update_head(pv_h, new_pte_p, PVH_TYPE_PTEP);
7024 pvh_set_flags(pv_h, pvh_flags);
7025 } else {
7026 pvh_update_head(pv_h, PV_ENTRY_NULL, PVH_TYPE_NULL);
7027 }
7028 }
7029
7030 UNLOCK_PVH(pai);
7031
7032 if (remove && (pvet_p != PV_ENTRY_NULL)) {
7033 pv_list_free(pveh_p, pvet_p, pvh_cnt);
7034 }
7035 }
7036
7037 void
7038 pmap_page_protect_options(
7039 ppnum_t ppnum,
7040 vm_prot_t prot,
7041 unsigned int options,
7042 __unused void *arg)
7043 {
7044 pmap_paddr_t phys = ptoa(ppnum);
7045
7046 assert(ppnum != vm_page_fictitious_addr);
7047
7048 /* Only work with managed pages. */
7049 if (!pa_valid(phys)) {
7050 return;
7051 }
7052
7053 /*
7054 * Determine the new protection.
7055 */
7056 if (prot == VM_PROT_ALL) {
7057 return; /* nothing to do */
7058 }
7059
7060 PMAP_TRACE(2, PMAP_CODE(PMAP__PAGE_PROTECT) | DBG_FUNC_START, ppnum, prot);
7061
7062 #if XNU_MONITOR
7063 pmap_page_protect_options_ppl(ppnum, prot, options);
7064 #else
7065 pmap_page_protect_options_internal(ppnum, prot, options);
7066 #endif
7067
7068 PMAP_TRACE(2, PMAP_CODE(PMAP__PAGE_PROTECT) | DBG_FUNC_END);
7069 }
7070
7071 /*
7072 * Indicates if the pmap layer enforces some additional restrictions on the
7073 * given set of protections.
7074 */
7075 bool
7076 pmap_has_prot_policy(__unused vm_prot_t prot)
7077 {
7078 return FALSE;
7079 }
7080
7081 /*
7082 * Set the physical protection on the
7083 * specified range of this map as requested.
7084 * VERY IMPORTANT: Will not increase permissions.
7085 * VERY IMPORTANT: Only pmap_enter() is allowed to grant permissions.
7086 */
7087 void
7088 pmap_protect(
7089 pmap_t pmap,
7090 vm_map_address_t b,
7091 vm_map_address_t e,
7092 vm_prot_t prot)
7093 {
7094 pmap_protect_options(pmap, b, e, prot, 0, NULL);
7095 }
7096
7097 MARK_AS_PMAP_TEXT static void
7098 pmap_protect_options_internal(
7099 pmap_t pmap,
7100 vm_map_address_t start,
7101 vm_map_address_t end,
7102 vm_prot_t prot,
7103 unsigned int options,
7104 __unused void *args)
7105 {
7106 const pt_attr_t *const pt_attr = pmap_get_pt_attr(pmap);
7107 tt_entry_t *tte_p;
7108 pt_entry_t *bpte_p, *epte_p;
7109 pt_entry_t *pte_p;
7110 boolean_t set_NX = TRUE;
7111 #if (__ARM_VMSA__ > 7)
7112 boolean_t set_XO = FALSE;
7113 #endif
7114 boolean_t should_have_removed = FALSE;
7115 bool need_strong_sync = false;
7116
7117 if (__improbable(end < start)) {
7118 panic("%s called with bogus range: %p, %p", __func__, (void*)start, (void*)end);
7119 }
7120
7121 #if DEVELOPMENT || DEBUG
7122 if (options & PMAP_OPTIONS_PROTECT_IMMEDIATE) {
7123 if ((prot & VM_PROT_ALL) == VM_PROT_NONE) {
7124 should_have_removed = TRUE;
7125 }
7126 } else
7127 #endif
7128 {
7129 /* Determine the new protection. */
7130 switch (prot) {
7131 #if (__ARM_VMSA__ > 7)
7132 case VM_PROT_EXECUTE:
7133 set_XO = TRUE;
7134 /* fall through */
7135 #endif
7136 case VM_PROT_READ:
7137 case VM_PROT_READ | VM_PROT_EXECUTE:
7138 break;
7139 case VM_PROT_READ | VM_PROT_WRITE:
7140 case VM_PROT_ALL:
7141 return; /* nothing to do */
7142 default:
7143 should_have_removed = TRUE;
7144 }
7145 }
7146
7147 if (should_have_removed) {
7148 panic("%s: should have been a remove operation, "
7149 "pmap=%p, start=%p, end=%p, prot=%#x, options=%#x, args=%p",
7150 __FUNCTION__,
7151 pmap, (void *)start, (void *)end, prot, options, args);
7152 }
7153
7154 #if DEVELOPMENT || DEBUG
7155 if ((prot & VM_PROT_EXECUTE) || !nx_enabled || !pmap->nx_enabled)
7156 #else
7157 if ((prot & VM_PROT_EXECUTE))
7158 #endif
7159 {
7160 set_NX = FALSE;
7161 } else {
7162 set_NX = TRUE;
7163 }
7164
7165 VALIDATE_PMAP(pmap);
7166 PMAP_LOCK(pmap);
7167 tte_p = pmap_tte(pmap, start);
7168
7169 if ((tte_p != (tt_entry_t *) NULL) && (*tte_p & ARM_TTE_TYPE_MASK) == ARM_TTE_TYPE_TABLE) {
7170 bpte_p = (pt_entry_t *) ttetokv(*tte_p);
7171 bpte_p = &bpte_p[ptenum(start)];
7172 epte_p = bpte_p + arm_atop(end - start);
7173 pte_p = bpte_p;
7174
7175 for (pte_p = bpte_p;
7176 pte_p < epte_p;
7177 pte_p += PAGE_SIZE / ARM_PGBYTES) {
7178 pt_entry_t spte;
7179 #if DEVELOPMENT || DEBUG
7180 boolean_t force_write = FALSE;
7181 #endif
7182
7183 spte = *pte_p;
7184
7185 if ((spte == ARM_PTE_TYPE_FAULT) ||
7186 ARM_PTE_IS_COMPRESSED(spte, pte_p)) {
7187 continue;
7188 }
7189
7190 pmap_paddr_t pa;
7191 int pai = 0;
7192 boolean_t managed = FALSE;
7193
7194 while (!managed) {
7195 /*
7196 * It may be possible for the pte to transition from managed
7197 * to unmanaged in this timeframe; for now, elide the assert.
7198 * We should break out as a consequence of checking pa_valid.
7199 */
7200 // assert(!ARM_PTE_IS_COMPRESSED(spte));
7201 pa = pte_to_pa(spte);
7202 if (!pa_valid(pa)) {
7203 break;
7204 }
7205 pai = (int)pa_index(pa);
7206 LOCK_PVH(pai);
7207 spte = *pte_p;
7208 pa = pte_to_pa(spte);
7209 if (pai == (int)pa_index(pa)) {
7210 managed = TRUE;
7211 break; // Leave the PVH locked as we will unlock it after we free the PTE
7212 }
7213 UNLOCK_PVH(pai);
7214 }
7215
7216 if ((spte == ARM_PTE_TYPE_FAULT) ||
7217 ARM_PTE_IS_COMPRESSED(spte, pte_p)) {
7218 continue;
7219 }
7220
7221 pt_entry_t tmplate;
7222
7223 if (pmap == kernel_pmap) {
7224 #if DEVELOPMENT || DEBUG
7225 if ((options & PMAP_OPTIONS_PROTECT_IMMEDIATE) && (prot & VM_PROT_WRITE)) {
7226 force_write = TRUE;
7227 tmplate = ((spte & ~ARM_PTE_APMASK) | ARM_PTE_AP(AP_RWNA));
7228 } else
7229 #endif
7230 {
7231 tmplate = ((spte & ~ARM_PTE_APMASK) | ARM_PTE_AP(AP_RONA));
7232 }
7233 } else {
7234 #if DEVELOPMENT || DEBUG
7235 if ((options & PMAP_OPTIONS_PROTECT_IMMEDIATE) && (prot & VM_PROT_WRITE)) {
7236 force_write = TRUE;
7237 tmplate = ((spte & ~ARM_PTE_APMASK) | pt_attr_leaf_rw(pt_attr));
7238 } else
7239 #endif
7240 {
7241 tmplate = ((spte & ~ARM_PTE_APMASK) | pt_attr_leaf_ro(pt_attr));
7242 }
7243 }
7244
7245 /*
7246 * XXX Removing "NX" would
7247 * grant "execute" access
7248 * immediately, bypassing any
7249 * checks VM might want to do
7250 * in its soft fault path.
7251 * pmap_protect() and co. are
7252 * not allowed to increase
7253 * access permissions.
7254 */
7255 if (set_NX) {
7256 tmplate |= pt_attr_leaf_xn(pt_attr);
7257 } else {
7258 #if (__ARM_VMSA__ > 7)
7259 if (pmap == kernel_pmap) {
7260 /* do NOT clear "PNX"! */
7261 tmplate |= ARM_PTE_NX;
7262 } else {
7263 /* do NOT clear "NX"! */
7264 tmplate |= pt_attr_leaf_x(pt_attr);
7265 if (set_XO) {
7266 tmplate &= ~ARM_PTE_APMASK;
7267 tmplate |= pt_attr_leaf_rona(pt_attr);
7268 }
7269 }
7270 #endif
7271 }
7272
7273 #if DEVELOPMENT || DEBUG
7274 if (force_write) {
7275 /*
7276 * TODO: Run CS/Monitor checks here.
7277 */
7278 if (managed) {
7279 /*
7280 * We are marking the page as writable,
7281 * so we consider it to be modified and
7282 * referenced.
7283 */
7284 pa_set_bits(pa, PP_ATTR_REFERENCED | PP_ATTR_MODIFIED);
7285 tmplate |= ARM_PTE_AF;
7286
7287 if (IS_REFFAULT_PAGE(pai)) {
7288 CLR_REFFAULT_PAGE(pai);
7289 }
7290
7291 if (IS_MODFAULT_PAGE(pai)) {
7292 CLR_MODFAULT_PAGE(pai);
7293 }
7294 }
7295 } else if (options & PMAP_OPTIONS_PROTECT_IMMEDIATE) {
7296 /*
7297 * An immediate request for anything other than
7298 * write should still mark the page as
7299 * referenced if managed.
7300 */
7301 if (managed) {
7302 pa_set_bits(pa, PP_ATTR_REFERENCED);
7303 tmplate |= ARM_PTE_AF;
7304
7305 if (IS_REFFAULT_PAGE(pai)) {
7306 CLR_REFFAULT_PAGE(pai);
7307 }
7308 }
7309 }
7310 #endif
7311
7312 /* We do not expect to write fast fault the entry. */
7313 pte_set_was_writeable(tmplate, false);
7314
7315 #if __APRR_SUPPORTED__
7316 if (__improbable(is_pte_xprr_protected(spte) && (pte_to_xprr_perm(spte) != XPRR_USER_JIT_PERM)
7317 && (pte_to_xprr_perm(spte) != XPRR_USER_XO_PERM))) {
7318 /* Only test for PPL protection here, User-JIT mappings may be mutated by this function. */
7319 panic("%s: modifying a PPL mapping pte_p=%p pmap=%p prot=%d options=%u, pte=0x%llx, tmplate=0x%llx",
7320 __func__, pte_p, pmap, prot, options, (uint64_t)spte, (uint64_t)tmplate);
7321 }
7322
7323 if (__improbable(is_pte_xprr_protected(tmplate) && (pte_to_xprr_perm(tmplate) != XPRR_USER_XO_PERM))) {
7324 panic("%s: creating an xPRR mapping pte_p=%p pmap=%p prot=%d options=%u, pte=0x%llx, tmplate=0x%llx",
7325 __func__, pte_p, pmap, prot, options, (uint64_t)spte, (uint64_t)tmplate);
7326 }
7327 #endif /* __APRR_SUPPORTED__*/
7328 WRITE_PTE_FAST(pte_p, tmplate);
7329
7330 if (managed) {
7331 ASSERT_PVH_LOCKED(pai);
7332 UNLOCK_PVH(pai);
7333 }
7334 }
7335 FLUSH_PTE_RANGE_STRONG(bpte_p, epte_p);
7336 PMAP_UPDATE_TLBS(pmap, start, end, need_strong_sync);
7337 }
7338
7339 PMAP_UNLOCK(pmap);
7340 }
7341
7342 void
7343 pmap_protect_options(
7344 pmap_t pmap,
7345 vm_map_address_t b,
7346 vm_map_address_t e,
7347 vm_prot_t prot,
7348 unsigned int options,
7349 __unused void *args)
7350 {
7351 vm_map_address_t l, beg;
7352
7353 __unused const pt_attr_t * const pt_attr = pmap_get_pt_attr(pmap);
7354
7355 if ((b | e) & PAGE_MASK) {
7356 panic("pmap_protect_options() pmap %p start 0x%llx end 0x%llx\n",
7357 pmap, (uint64_t)b, (uint64_t)e);
7358 }
7359
7360 #if DEVELOPMENT || DEBUG
7361 if (options & PMAP_OPTIONS_PROTECT_IMMEDIATE) {
7362 if ((prot & VM_PROT_ALL) == VM_PROT_NONE) {
7363 pmap_remove_options(pmap, b, e, options);
7364 return;
7365 }
7366 } else
7367 #endif
7368 {
7369 /* Determine the new protection. */
7370 switch (prot) {
7371 case VM_PROT_EXECUTE:
7372 case VM_PROT_READ:
7373 case VM_PROT_READ | VM_PROT_EXECUTE:
7374 break;
7375 case VM_PROT_READ | VM_PROT_WRITE:
7376 case VM_PROT_ALL:
7377 return; /* nothing to do */
7378 default:
7379 pmap_remove_options(pmap, b, e, options);
7380 return;
7381 }
7382 }
7383
7384 PMAP_TRACE(2, PMAP_CODE(PMAP__PROTECT) | DBG_FUNC_START,
7385 VM_KERNEL_ADDRHIDE(pmap), VM_KERNEL_ADDRHIDE(b),
7386 VM_KERNEL_ADDRHIDE(e));
7387
7388 beg = b;
7389
7390 while (beg < e) {
7391 l = ((beg + pt_attr_twig_size(pt_attr)) & ~pt_attr_twig_offmask(pt_attr));
7392
7393 if (l > e) {
7394 l = e;
7395 }
7396
7397 #if XNU_MONITOR
7398 pmap_protect_options_ppl(pmap, beg, l, prot, options, args);
7399 #else
7400 pmap_protect_options_internal(pmap, beg, l, prot, options, args);
7401 #endif
7402
7403 beg = l;
7404 }
7405
7406 PMAP_TRACE(2, PMAP_CODE(PMAP__PROTECT) | DBG_FUNC_END);
7407 }
7408
7409 /* Map a (possibly) autogenned block */
7410 kern_return_t
7411 pmap_map_block(
7412 pmap_t pmap,
7413 addr64_t va,
7414 ppnum_t pa,
7415 uint32_t size,
7416 vm_prot_t prot,
7417 int attr,
7418 __unused unsigned int flags)
7419 {
7420 kern_return_t kr;
7421 addr64_t original_va = va;
7422 uint32_t page;
7423
7424 for (page = 0; page < size; page++) {
7425 kr = pmap_enter(pmap, va, pa, prot, VM_PROT_NONE, attr, TRUE);
7426
7427 if (kr != KERN_SUCCESS) {
7428 /*
7429 * This will panic for now, as it is unclear that
7430 * removing the mappings is correct.
7431 */
7432 panic("%s: failed pmap_enter, "
7433 "pmap=%p, va=%#llx, pa=%u, size=%u, prot=%#x, flags=%#x",
7434 __FUNCTION__,
7435 pmap, va, pa, size, prot, flags);
7436
7437 pmap_remove(pmap, original_va, va - original_va);
7438 return kr;
7439 }
7440
7441 va += PAGE_SIZE;
7442 pa++;
7443 }
7444
7445 return KERN_SUCCESS;
7446 }
7447
7448 /*
7449 * Insert the given physical page (p) at
7450 * the specified virtual address (v) in the
7451 * target physical map with the protection requested.
7452 *
7453 * If specified, the page will be wired down, meaning
7454 * that the related pte can not be reclaimed.
7455 *
7456 * NB: This is the only routine which MAY NOT lazy-evaluate
7457 * or lose information. That is, this routine must actually
7458 * insert this page into the given map eventually (must make
7459 * forward progress eventually.
7460 */
7461 kern_return_t
7462 pmap_enter(
7463 pmap_t pmap,
7464 vm_map_address_t v,
7465 ppnum_t pn,
7466 vm_prot_t prot,
7467 vm_prot_t fault_type,
7468 unsigned int flags,
7469 boolean_t wired)
7470 {
7471 return pmap_enter_options(pmap, v, pn, prot, fault_type, flags, wired, 0, NULL);
7472 }
7473
7474
7475 static inline void
7476 pmap_enter_pte(pmap_t pmap, pt_entry_t *pte_p, pt_entry_t pte, vm_map_address_t v)
7477 {
7478 if (pmap != kernel_pmap && ((pte & ARM_PTE_WIRED) != (*pte_p & ARM_PTE_WIRED))) {
7479 SInt16 *ptd_wiredcnt_ptr = (SInt16 *)&(ptep_get_ptd(pte_p)->ptd_info[ARM_PT_DESC_INDEX(pte_p)].wiredcnt);
7480 if (pte & ARM_PTE_WIRED) {
7481 OSAddAtomic16(1, ptd_wiredcnt_ptr);
7482 pmap_ledger_credit(pmap, task_ledgers.wired_mem, PAGE_SIZE);
7483 OSAddAtomic(1, (SInt32 *) &pmap->stats.wired_count);
7484 } else {
7485 OSAddAtomic16(-1, ptd_wiredcnt_ptr);
7486 pmap_ledger_debit(pmap, task_ledgers.wired_mem, PAGE_SIZE);
7487 OSAddAtomic(-1, (SInt32 *) &pmap->stats.wired_count);
7488 }
7489 }
7490 if (*pte_p != ARM_PTE_TYPE_FAULT &&
7491 !ARM_PTE_IS_COMPRESSED(*pte_p, pte_p)) {
7492 WRITE_PTE_STRONG(pte_p, pte);
7493 PMAP_UPDATE_TLBS(pmap, v, v + PAGE_SIZE, false);
7494 } else {
7495 WRITE_PTE(pte_p, pte);
7496 __builtin_arm_isb(ISB_SY);
7497 }
7498
7499 PMAP_TRACE(3, PMAP_CODE(PMAP__TTE), VM_KERNEL_ADDRHIDE(pmap), VM_KERNEL_ADDRHIDE(v), VM_KERNEL_ADDRHIDE(v + PAGE_SIZE), pte);
7500 }
7501
7502 MARK_AS_PMAP_TEXT static pt_entry_t
7503 wimg_to_pte(unsigned int wimg)
7504 {
7505 pt_entry_t pte;
7506
7507 switch (wimg & (VM_WIMG_MASK)) {
7508 case VM_WIMG_IO:
7509 case VM_WIMG_RT:
7510 pte = ARM_PTE_ATTRINDX(CACHE_ATTRINDX_DISABLE);
7511 pte |= ARM_PTE_NX | ARM_PTE_PNX;
7512 break;
7513 case VM_WIMG_POSTED:
7514 pte = ARM_PTE_ATTRINDX(CACHE_ATTRINDX_POSTED);
7515 pte |= ARM_PTE_NX | ARM_PTE_PNX;
7516 break;
7517 case VM_WIMG_POSTED_REORDERED:
7518 pte = ARM_PTE_ATTRINDX(CACHE_ATTRINDX_POSTED_REORDERED);
7519 pte |= ARM_PTE_NX | ARM_PTE_PNX;
7520 break;
7521 case VM_WIMG_POSTED_COMBINED_REORDERED:
7522 pte = ARM_PTE_ATTRINDX(CACHE_ATTRINDX_POSTED_COMBINED_REORDERED);
7523 pte |= ARM_PTE_NX | ARM_PTE_PNX;
7524 break;
7525 case VM_WIMG_WCOMB:
7526 pte = ARM_PTE_ATTRINDX(CACHE_ATTRINDX_WRITECOMB);
7527 pte |= ARM_PTE_NX | ARM_PTE_PNX;
7528 break;
7529 case VM_WIMG_WTHRU:
7530 pte = ARM_PTE_ATTRINDX(CACHE_ATTRINDX_WRITETHRU);
7531 #if (__ARM_VMSA__ > 7)
7532 pte |= ARM_PTE_SH(SH_OUTER_MEMORY);
7533 #else
7534 pte |= ARM_PTE_SH;
7535 #endif
7536 break;
7537 case VM_WIMG_COPYBACK:
7538 pte = ARM_PTE_ATTRINDX(CACHE_ATTRINDX_WRITEBACK);
7539 #if (__ARM_VMSA__ > 7)
7540 pte |= ARM_PTE_SH(SH_OUTER_MEMORY);
7541 #else
7542 pte |= ARM_PTE_SH;
7543 #endif
7544 break;
7545 case VM_WIMG_INNERWBACK:
7546 pte = ARM_PTE_ATTRINDX(CACHE_ATTRINDX_INNERWRITEBACK);
7547 #if (__ARM_VMSA__ > 7)
7548 pte |= ARM_PTE_SH(SH_INNER_MEMORY);
7549 #else
7550 pte |= ARM_PTE_SH;
7551 #endif
7552 break;
7553 default:
7554 pte = ARM_PTE_ATTRINDX(CACHE_ATTRINDX_DEFAULT);
7555 #if (__ARM_VMSA__ > 7)
7556 pte |= ARM_PTE_SH(SH_OUTER_MEMORY);
7557 #else
7558 pte |= ARM_PTE_SH;
7559 #endif
7560 }
7561
7562 return pte;
7563 }
7564
7565 static boolean_t
7566 pmap_enter_pv(
7567 pmap_t pmap,
7568 pt_entry_t *pte_p,
7569 int pai,
7570 unsigned int options,
7571 pv_entry_t **pve_p,
7572 boolean_t *is_altacct)
7573 {
7574 pv_entry_t **pv_h;
7575 pv_h = pai_to_pvh(pai);
7576 boolean_t first_cpu_mapping;
7577
7578 ASSERT_PVH_LOCKED(pai);
7579
7580 vm_offset_t pvh_flags = pvh_get_flags(pv_h);
7581
7582 #if XNU_MONITOR
7583 if (pvh_flags & PVH_FLAG_LOCKDOWN) {
7584 panic("%d is locked down (%#lx), cannot enter", pai, pvh_flags);
7585 }
7586 #endif
7587
7588 #ifdef PVH_FLAG_CPU
7589 /* An IOMMU mapping may already be present for a page that hasn't yet
7590 * had a CPU mapping established, so we use PVH_FLAG_CPU to determine
7591 * if this is the first CPU mapping. We base internal/reusable
7592 * accounting on the options specified for the first CPU mapping.
7593 * PVH_FLAG_CPU, and thus this accounting, will then persist as long
7594 * as there are *any* mappings of the page. The accounting for a
7595 * page should not need to change until the page is recycled by the
7596 * VM layer, and we assert that there are no mappings when a page
7597 * is recycled. An IOMMU mapping of a freed/recycled page is
7598 * considered a security violation & potential DMA corruption path.*/
7599 first_cpu_mapping = ((pmap != NULL) && !(pvh_flags & PVH_FLAG_CPU));
7600 if (first_cpu_mapping) {
7601 pvh_flags |= PVH_FLAG_CPU;
7602 }
7603 #else
7604 first_cpu_mapping = pvh_test_type(pv_h, PVH_TYPE_NULL);
7605 #endif
7606
7607 if (first_cpu_mapping) {
7608 if (options & PMAP_OPTIONS_INTERNAL) {
7609 SET_INTERNAL_PAGE(pai);
7610 } else {
7611 CLR_INTERNAL_PAGE(pai);
7612 }
7613 if ((options & PMAP_OPTIONS_INTERNAL) &&
7614 (options & PMAP_OPTIONS_REUSABLE)) {
7615 SET_REUSABLE_PAGE(pai);
7616 } else {
7617 CLR_REUSABLE_PAGE(pai);
7618 }
7619 }
7620 if (pvh_test_type(pv_h, PVH_TYPE_NULL)) {
7621 pvh_update_head(pv_h, pte_p, PVH_TYPE_PTEP);
7622 if (pmap != NULL && pmap != kernel_pmap &&
7623 ((options & PMAP_OPTIONS_ALT_ACCT) ||
7624 PMAP_FOOTPRINT_SUSPENDED(pmap)) &&
7625 IS_INTERNAL_PAGE(pai)) {
7626 /*
7627 * Make a note to ourselves that this mapping is using alternative
7628 * accounting. We'll need this in order to know which ledger to
7629 * debit when the mapping is removed.
7630 *
7631 * The altacct bit must be set while the pv head is locked. Defer
7632 * the ledger accounting until after we've dropped the lock.
7633 */
7634 SET_ALTACCT_PAGE(pai, PV_ENTRY_NULL);
7635 *is_altacct = TRUE;
7636 } else {
7637 CLR_ALTACCT_PAGE(pai, PV_ENTRY_NULL);
7638 }
7639 } else {
7640 if (pvh_test_type(pv_h, PVH_TYPE_PTEP)) {
7641 pt_entry_t *pte1_p;
7642
7643 /*
7644 * convert pvh list from PVH_TYPE_PTEP to PVH_TYPE_PVEP
7645 */
7646 pte1_p = pvh_ptep(pv_h);
7647 pvh_set_flags(pv_h, pvh_flags);
7648 if ((*pve_p == PV_ENTRY_NULL) && (!pv_alloc(pmap, pai, pve_p))) {
7649 return FALSE;
7650 }
7651
7652 pve_set_ptep(*pve_p, pte1_p);
7653 (*pve_p)->pve_next = PV_ENTRY_NULL;
7654
7655 if (IS_ALTACCT_PAGE(pai, PV_ENTRY_NULL)) {
7656 /*
7657 * transfer "altacct" from
7658 * pp_attr to this pve
7659 */
7660 CLR_ALTACCT_PAGE(pai, PV_ENTRY_NULL);
7661 SET_ALTACCT_PAGE(pai, *pve_p);
7662 }
7663 pvh_update_head(pv_h, *pve_p, PVH_TYPE_PVEP);
7664 *pve_p = PV_ENTRY_NULL;
7665 } else if (!pvh_test_type(pv_h, PVH_TYPE_PVEP)) {
7666 panic("%s: unexpected PV head %p, pte_p=%p pmap=%p pv_h=%p",
7667 __func__, *pv_h, pte_p, pmap, pv_h);
7668 }
7669 /*
7670 * Set up pv_entry for this new mapping and then
7671 * add it to the list for this physical page.
7672 */
7673 pvh_set_flags(pv_h, pvh_flags);
7674 if ((*pve_p == PV_ENTRY_NULL) && (!pv_alloc(pmap, pai, pve_p))) {
7675 return FALSE;
7676 }
7677
7678 pve_set_ptep(*pve_p, pte_p);
7679 (*pve_p)->pve_next = PV_ENTRY_NULL;
7680
7681 pvh_add(pv_h, *pve_p);
7682
7683 if (pmap != NULL && pmap != kernel_pmap &&
7684 ((options & PMAP_OPTIONS_ALT_ACCT) ||
7685 PMAP_FOOTPRINT_SUSPENDED(pmap)) &&
7686 IS_INTERNAL_PAGE(pai)) {
7687 /*
7688 * Make a note to ourselves that this
7689 * mapping is using alternative
7690 * accounting. We'll need this in order
7691 * to know which ledger to debit when
7692 * the mapping is removed.
7693 *
7694 * The altacct bit must be set while
7695 * the pv head is locked. Defer the
7696 * ledger accounting until after we've
7697 * dropped the lock.
7698 */
7699 SET_ALTACCT_PAGE(pai, *pve_p);
7700 *is_altacct = TRUE;
7701 }
7702
7703 *pve_p = PV_ENTRY_NULL;
7704 }
7705
7706 pvh_set_flags(pv_h, pvh_flags);
7707
7708 return TRUE;
7709 }
7710
7711 MARK_AS_PMAP_TEXT static kern_return_t
7712 pmap_enter_options_internal(
7713 pmap_t pmap,
7714 vm_map_address_t v,
7715 ppnum_t pn,
7716 vm_prot_t prot,
7717 vm_prot_t fault_type,
7718 unsigned int flags,
7719 boolean_t wired,
7720 unsigned int options)
7721 {
7722 pmap_paddr_t pa = ptoa(pn);
7723 pt_entry_t pte;
7724 pt_entry_t spte;
7725 pt_entry_t *pte_p;
7726 pv_entry_t *pve_p;
7727 boolean_t set_NX;
7728 boolean_t set_XO = FALSE;
7729 boolean_t refcnt_updated;
7730 boolean_t wiredcnt_updated;
7731 unsigned int wimg_bits;
7732 boolean_t was_compressed, was_alt_compressed;
7733 kern_return_t kr = KERN_SUCCESS;
7734
7735 VALIDATE_PMAP(pmap);
7736
7737 __unused const pt_attr_t * const pt_attr = pmap_get_pt_attr(pmap);
7738
7739 if ((v) & PAGE_MASK) {
7740 panic("pmap_enter_options() pmap %p v 0x%llx\n",
7741 pmap, (uint64_t)v);
7742 }
7743
7744 if ((prot & VM_PROT_EXECUTE) && (prot & VM_PROT_WRITE) && (pmap == kernel_pmap)) {
7745 panic("pmap_enter_options(): WX request on kernel_pmap");
7746 }
7747
7748 #if DEVELOPMENT || DEBUG
7749 if ((prot & VM_PROT_EXECUTE) || !nx_enabled || !pmap->nx_enabled)
7750 #else
7751 if ((prot & VM_PROT_EXECUTE))
7752 #endif
7753 { set_NX = FALSE;} else {
7754 set_NX = TRUE;
7755 }
7756
7757 #if (__ARM_VMSA__ > 7)
7758 if (prot == VM_PROT_EXECUTE) {
7759 set_XO = TRUE;
7760 }
7761 #endif
7762
7763 assert(pn != vm_page_fictitious_addr);
7764
7765 refcnt_updated = FALSE;
7766 wiredcnt_updated = FALSE;
7767 pve_p = PV_ENTRY_NULL;
7768 was_compressed = FALSE;
7769 was_alt_compressed = FALSE;
7770
7771 PMAP_LOCK(pmap);
7772
7773 /*
7774 * Expand pmap to include this pte. Assume that
7775 * pmap is always expanded to include enough hardware
7776 * pages to map one VM page.
7777 */
7778 while ((pte_p = pmap_pte(pmap, v)) == PT_ENTRY_NULL) {
7779 /* Must unlock to expand the pmap. */
7780 PMAP_UNLOCK(pmap);
7781
7782 kr = pmap_expand(pmap, v, options, PMAP_TT_MAX_LEVEL);
7783
7784 if (kr != KERN_SUCCESS) {
7785 return kr;
7786 }
7787
7788 PMAP_LOCK(pmap);
7789 }
7790
7791 if (options & PMAP_OPTIONS_NOENTER) {
7792 PMAP_UNLOCK(pmap);
7793 return KERN_SUCCESS;
7794 }
7795
7796 Pmap_enter_retry:
7797
7798 spte = *pte_p;
7799
7800 if (ARM_PTE_IS_COMPRESSED(spte, pte_p)) {
7801 /*
7802 * "pmap" should be locked at this point, so this should
7803 * not race with another pmap_enter() or pmap_remove_range().
7804 */
7805 assert(pmap != kernel_pmap);
7806
7807 /* one less "compressed" */
7808 OSAddAtomic64(-1, &pmap->stats.compressed);
7809 pmap_ledger_debit(pmap, task_ledgers.internal_compressed,
7810 PAGE_SIZE);
7811
7812 was_compressed = TRUE;
7813 if (spte & ARM_PTE_COMPRESSED_ALT) {
7814 was_alt_compressed = TRUE;
7815 pmap_ledger_debit(pmap, task_ledgers.alternate_accounting_compressed, PAGE_SIZE);
7816 } else {
7817 /* was part of the footprint */
7818 pmap_ledger_debit(pmap, task_ledgers.phys_footprint, PAGE_SIZE);
7819 }
7820
7821 /* clear "compressed" marker */
7822 /* XXX is it necessary since we're about to overwrite it ? */
7823 WRITE_PTE_FAST(pte_p, ARM_PTE_TYPE_FAULT);
7824 spte = ARM_PTE_TYPE_FAULT;
7825
7826 /*
7827 * We're replacing a "compressed" marker with a valid PTE,
7828 * so no change for "refcnt".
7829 */
7830 refcnt_updated = TRUE;
7831 }
7832
7833 if ((spte != ARM_PTE_TYPE_FAULT) && (pte_to_pa(spte) != pa)) {
7834 pmap_remove_range(pmap, v, pte_p, pte_p + 1, 0);
7835 }
7836
7837 pte = pa_to_pte(pa) | ARM_PTE_TYPE;
7838
7839 /* Don't bother tracking wiring for kernel PTEs. We use ARM_PTE_WIRED to track
7840 * wired memory statistics for user pmaps, but kernel PTEs are assumed
7841 * to be wired in nearly all cases. For VM layer functionality, the wired
7842 * count in vm_page_t is sufficient. */
7843 if (wired && pmap != kernel_pmap) {
7844 pte |= ARM_PTE_WIRED;
7845 }
7846
7847 if (set_NX) {
7848 pte |= pt_attr_leaf_xn(pt_attr);
7849 } else {
7850 #if (__ARM_VMSA__ > 7)
7851 if (pmap == kernel_pmap) {
7852 pte |= ARM_PTE_NX;
7853 } else {
7854 pte |= pt_attr_leaf_x(pt_attr);
7855 }
7856 #endif
7857 }
7858
7859 if (pmap == kernel_pmap) {
7860 #if __ARM_KERNEL_PROTECT__
7861 pte |= ARM_PTE_NG;
7862 #endif /* __ARM_KERNEL_PROTECT__ */
7863 if (prot & VM_PROT_WRITE) {
7864 pte |= ARM_PTE_AP(AP_RWNA);
7865 pa_set_bits(pa, PP_ATTR_MODIFIED | PP_ATTR_REFERENCED);
7866 } else {
7867 pte |= ARM_PTE_AP(AP_RONA);
7868 pa_set_bits(pa, PP_ATTR_REFERENCED);
7869 }
7870 #if (__ARM_VMSA__ == 7)
7871 if ((_COMM_PAGE_BASE_ADDRESS <= v) && (v < _COMM_PAGE_BASE_ADDRESS + _COMM_PAGE_AREA_LENGTH)) {
7872 pte = (pte & ~(ARM_PTE_APMASK)) | ARM_PTE_AP(AP_RORO);
7873 }
7874 #endif
7875 } else {
7876 if (!pmap->nested) {
7877 pte |= ARM_PTE_NG;
7878 } else if ((pmap->nested_region_asid_bitmap)
7879 && (v >= pmap->nested_region_subord_addr)
7880 && (v < (pmap->nested_region_subord_addr + pmap->nested_region_size))) {
7881 unsigned int index = (unsigned int)((v - pmap->nested_region_subord_addr) >> pt_attr_twig_shift(pt_attr));
7882
7883 if ((pmap->nested_region_asid_bitmap)
7884 && testbit(index, (int *)pmap->nested_region_asid_bitmap)) {
7885 pte |= ARM_PTE_NG;
7886 }
7887 }
7888 #if MACH_ASSERT
7889 if (pmap->nested_pmap != NULL) {
7890 vm_map_address_t nest_vaddr;
7891 pt_entry_t *nest_pte_p;
7892
7893 nest_vaddr = v - pmap->nested_region_grand_addr + pmap->nested_region_subord_addr;
7894
7895 if ((nest_vaddr >= pmap->nested_region_subord_addr)
7896 && (nest_vaddr < (pmap->nested_region_subord_addr + pmap->nested_region_size))
7897 && ((nest_pte_p = pmap_pte(pmap->nested_pmap, nest_vaddr)) != PT_ENTRY_NULL)
7898 && (*nest_pte_p != ARM_PTE_TYPE_FAULT)
7899 && (!ARM_PTE_IS_COMPRESSED(*nest_pte_p, nest_pte_p))
7900 && (((*nest_pte_p) & ARM_PTE_NG) != ARM_PTE_NG)) {
7901 unsigned int index = (unsigned int)((v - pmap->nested_region_subord_addr) >> pt_attr_twig_shift(pt_attr));
7902
7903 if ((pmap->nested_pmap->nested_region_asid_bitmap)
7904 && !testbit(index, (int *)pmap->nested_pmap->nested_region_asid_bitmap)) {
7905 panic("pmap_enter(): Global attribute conflict nest_pte_p=%p pmap=%p v=0x%llx spte=0x%llx \n",
7906 nest_pte_p, pmap, (uint64_t)v, (uint64_t)*nest_pte_p);
7907 }
7908 }
7909 }
7910 #endif
7911 if (prot & VM_PROT_WRITE) {
7912 if (pa_valid(pa) && (!pa_test_bits(pa, PP_ATTR_MODIFIED))) {
7913 if (fault_type & VM_PROT_WRITE) {
7914 if (set_XO) {
7915 pte |= pt_attr_leaf_rwna(pt_attr);
7916 } else {
7917 pte |= pt_attr_leaf_rw(pt_attr);
7918 }
7919 pa_set_bits(pa, PP_ATTR_REFERENCED | PP_ATTR_MODIFIED);
7920 } else {
7921 if (set_XO) {
7922 pte |= pt_attr_leaf_rona(pt_attr);
7923 } else {
7924 pte |= pt_attr_leaf_ro(pt_attr);
7925 }
7926 pa_set_bits(pa, PP_ATTR_REFERENCED);
7927 pte_set_was_writeable(pte, true);
7928 }
7929 } else {
7930 if (set_XO) {
7931 pte |= pt_attr_leaf_rwna(pt_attr);
7932 } else {
7933 pte |= pt_attr_leaf_rw(pt_attr);
7934 }
7935 pa_set_bits(pa, PP_ATTR_REFERENCED);
7936 }
7937 } else {
7938 if (set_XO) {
7939 pte |= pt_attr_leaf_rona(pt_attr);
7940 } else {
7941 pte |= pt_attr_leaf_ro(pt_attr);;
7942 }
7943 pa_set_bits(pa, PP_ATTR_REFERENCED);
7944 }
7945 }
7946
7947 pte |= ARM_PTE_AF;
7948
7949 volatile uint16_t *refcnt = NULL;
7950 volatile uint16_t *wiredcnt = NULL;
7951 if (pmap != kernel_pmap) {
7952 refcnt = &(ptep_get_ptd(pte_p)->ptd_info[ARM_PT_DESC_INDEX(pte_p)].refcnt);
7953 wiredcnt = &(ptep_get_ptd(pte_p)->ptd_info[ARM_PT_DESC_INDEX(pte_p)].wiredcnt);
7954 /* Bump the wired count to keep the PTE page from being reclaimed. We need this because
7955 * we may drop the PVH and pmap locks later in pmap_enter() if we need to allocate
7956 * a new PV entry. */
7957 if (!wiredcnt_updated) {
7958 OSAddAtomic16(1, (volatile int16_t*)wiredcnt);
7959 wiredcnt_updated = TRUE;
7960 }
7961 if (!refcnt_updated) {
7962 OSAddAtomic16(1, (volatile int16_t*)refcnt);
7963 refcnt_updated = TRUE;
7964 }
7965 }
7966
7967 if (pa_valid(pa)) {
7968 int pai;
7969 boolean_t is_altacct, is_internal;
7970
7971 is_internal = FALSE;
7972 is_altacct = FALSE;
7973
7974 pai = (int)pa_index(pa);
7975
7976 LOCK_PVH(pai);
7977
7978 Pmap_enter_loop:
7979 if ((flags & (VM_WIMG_MASK | VM_WIMG_USE_DEFAULT))) {
7980 wimg_bits = (flags & (VM_WIMG_MASK | VM_WIMG_USE_DEFAULT));
7981 } else {
7982 wimg_bits = pmap_cache_attributes(pn);
7983 }
7984
7985 /* We may be retrying this operation after dropping the PVH lock.
7986 * Cache attributes for the physical page may have changed while the lock
7987 * was dropped, so clear any cache attributes we may have previously set
7988 * in the PTE template. */
7989 pte &= ~(ARM_PTE_ATTRINDXMASK | ARM_PTE_SHMASK);
7990 pte |= pmap_get_pt_ops(pmap)->wimg_to_pte(wimg_bits);
7991
7992 #if XNU_MONITOR
7993 /* The regular old kernel is not allowed to remap PPL pages. */
7994 if (pa_test_monitor(pa)) {
7995 panic("%s: page belongs to PPL, "
7996 "pmap=%p, v=0x%llx, pn=%u, prot=0x%x, fault_type=0x%x, flags=0x%x, wired=%u, options=0x%x",
7997 __FUNCTION__,
7998 pmap, v, pn, prot, fault_type, flags, wired, options);
7999 }
8000
8001 if (pvh_get_flags(pai_to_pvh(pai)) & PVH_FLAG_LOCKDOWN) {
8002 panic("%s: page locked down, "
8003 "pmap=%p, v=0x%llx, pn=%u, prot=0x%x, fault_type=0x%x, flags=0x%x, wired=%u, options=0x%x",
8004 __FUNCTION__,
8005 pmap, v, pn, prot, fault_type, flags, wired, options);
8006 }
8007 #endif
8008
8009
8010 if (pte == *pte_p) {
8011 /*
8012 * This pmap_enter operation has been completed by another thread
8013 * undo refcnt on pt and return
8014 */
8015 UNLOCK_PVH(pai);
8016 goto Pmap_enter_cleanup;
8017 } else if (pte_to_pa(*pte_p) == pa) {
8018 pmap_enter_pte(pmap, pte_p, pte, v);
8019 UNLOCK_PVH(pai);
8020 goto Pmap_enter_cleanup;
8021 } else if (*pte_p != ARM_PTE_TYPE_FAULT) {
8022 /*
8023 * pte has been modified by another thread
8024 * hold refcnt on pt and retry pmap_enter operation
8025 */
8026 UNLOCK_PVH(pai);
8027 goto Pmap_enter_retry;
8028 }
8029 if (!pmap_enter_pv(pmap, pte_p, pai, options, &pve_p, &is_altacct)) {
8030 goto Pmap_enter_loop;
8031 }
8032
8033 pmap_enter_pte(pmap, pte_p, pte, v);
8034
8035 if (pmap != kernel_pmap) {
8036 if (IS_REUSABLE_PAGE(pai) &&
8037 !is_altacct) {
8038 assert(IS_INTERNAL_PAGE(pai));
8039 OSAddAtomic(+1, &pmap->stats.reusable);
8040 PMAP_STATS_PEAK(pmap->stats.reusable);
8041 } else if (IS_INTERNAL_PAGE(pai)) {
8042 OSAddAtomic(+1, &pmap->stats.internal);
8043 PMAP_STATS_PEAK(pmap->stats.internal);
8044 is_internal = TRUE;
8045 } else {
8046 OSAddAtomic(+1, &pmap->stats.external);
8047 PMAP_STATS_PEAK(pmap->stats.external);
8048 }
8049 }
8050
8051 UNLOCK_PVH(pai);
8052
8053 if (pmap != kernel_pmap) {
8054 pmap_ledger_credit(pmap, task_ledgers.phys_mem, PAGE_SIZE);
8055
8056 if (is_internal) {
8057 /*
8058 * Make corresponding adjustments to
8059 * phys_footprint statistics.
8060 */
8061 pmap_ledger_credit(pmap, task_ledgers.internal, PAGE_SIZE);
8062 if (is_altacct) {
8063 /*
8064 * If this page is internal and
8065 * in an IOKit region, credit
8066 * the task's total count of
8067 * dirty, internal IOKit pages.
8068 * It should *not* count towards
8069 * the task's total physical
8070 * memory footprint, because
8071 * this entire region was
8072 * already billed to the task
8073 * at the time the mapping was
8074 * created.
8075 *
8076 * Put another way, this is
8077 * internal++ and
8078 * alternate_accounting++, so
8079 * net effect on phys_footprint
8080 * is 0. That means: don't
8081 * touch phys_footprint here.
8082 */
8083 pmap_ledger_credit(pmap, task_ledgers.alternate_accounting, PAGE_SIZE);
8084 } else {
8085 pmap_ledger_credit(pmap, task_ledgers.phys_footprint, PAGE_SIZE);
8086 }
8087 }
8088 }
8089
8090 OSAddAtomic(1, (SInt32 *) &pmap->stats.resident_count);
8091 if (pmap->stats.resident_count > pmap->stats.resident_max) {
8092 pmap->stats.resident_max = pmap->stats.resident_count;
8093 }
8094 } else {
8095 if (prot & VM_PROT_EXECUTE) {
8096 kr = KERN_FAILURE;
8097 goto Pmap_enter_cleanup;
8098 }
8099
8100 wimg_bits = pmap_cache_attributes(pn);
8101 if ((flags & (VM_WIMG_MASK | VM_WIMG_USE_DEFAULT))) {
8102 wimg_bits = (wimg_bits & (~VM_WIMG_MASK)) | (flags & (VM_WIMG_MASK | VM_WIMG_USE_DEFAULT));
8103 }
8104
8105 pte |= pmap_get_pt_ops(pmap)->wimg_to_pte(wimg_bits);
8106
8107 #if XNU_MONITOR
8108 if (!pmap_ppl_disable && (wimg_bits & PP_ATTR_MONITOR)) {
8109 uint64_t xprr_perm = pte_to_xprr_perm(pte);
8110 switch (xprr_perm) {
8111 case XPRR_KERN_RO_PERM:
8112 break;
8113 case XPRR_KERN_RW_PERM:
8114 pte &= ~ARM_PTE_XPRR_MASK;
8115 pte |= xprr_perm_to_pte(XPRR_PPL_RW_PERM);
8116 break;
8117 default:
8118 panic("Unsupported xPRR perm %llu for pte 0x%llx", xprr_perm, (uint64_t)pte);
8119 }
8120 }
8121 #endif
8122 pmap_enter_pte(pmap, pte_p, pte, v);
8123 }
8124
8125 goto Pmap_enter_return;
8126
8127 Pmap_enter_cleanup:
8128
8129 if (refcnt != NULL) {
8130 assert(refcnt_updated);
8131 if (OSAddAtomic16(-1, (volatile int16_t*)refcnt) <= 0) {
8132 panic("pmap_enter(): over-release of ptdp %p for pte %p\n", ptep_get_ptd(pte_p), pte_p);
8133 }
8134 }
8135
8136 Pmap_enter_return:
8137
8138 #if CONFIG_PGTRACE
8139 if (pgtrace_enabled) {
8140 // Clone and invalidate original mapping if eligible
8141 for (int i = 0; i < PAGE_RATIO; i++) {
8142 pmap_pgtrace_enter_clone(pmap, v + ARM_PGBYTES * i, 0, 0);
8143 }
8144 }
8145 #endif
8146
8147 if (pve_p != PV_ENTRY_NULL) {
8148 pv_free(pve_p);
8149 }
8150
8151 if (wiredcnt_updated && (OSAddAtomic16(-1, (volatile int16_t*)wiredcnt) <= 0)) {
8152 panic("pmap_enter(): over-unwire of ptdp %p for pte %p\n", ptep_get_ptd(pte_p), pte_p);
8153 }
8154
8155 PMAP_UNLOCK(pmap);
8156
8157 return kr;
8158 }
8159
8160 kern_return_t
8161 pmap_enter_options(
8162 pmap_t pmap,
8163 vm_map_address_t v,
8164 ppnum_t pn,
8165 vm_prot_t prot,
8166 vm_prot_t fault_type,
8167 unsigned int flags,
8168 boolean_t wired,
8169 unsigned int options,
8170 __unused void *arg)
8171 {
8172 kern_return_t kr = KERN_FAILURE;
8173
8174 PMAP_TRACE(2, PMAP_CODE(PMAP__ENTER) | DBG_FUNC_START,
8175 VM_KERNEL_ADDRHIDE(pmap), VM_KERNEL_ADDRHIDE(v), pn, prot);
8176
8177 #if XNU_MONITOR
8178 if (options & PMAP_OPTIONS_NOWAIT) {
8179 /* If NOWAIT was requested, just return the result. */
8180 kr = pmap_enter_options_ppl(pmap, v, pn, prot, fault_type, flags, wired, options);
8181 } else {
8182 /*
8183 * If NOWAIT was not requested, loop until the enter does not
8184 * fail due to lack of resources.
8185 */
8186 while ((kr = pmap_enter_options_ppl(pmap, v, pn, prot, fault_type, flags, wired, options | PMAP_OPTIONS_NOWAIT)) == KERN_RESOURCE_SHORTAGE) {
8187 pv_water_mark_check();
8188 pmap_alloc_page_for_ppl();
8189 }
8190 }
8191
8192 pmap_ledger_check_balance(pmap);
8193 #else
8194 kr = pmap_enter_options_internal(pmap, v, pn, prot, fault_type, flags, wired, options);
8195 #endif
8196 pv_water_mark_check();
8197
8198 PMAP_TRACE(2, PMAP_CODE(PMAP__ENTER) | DBG_FUNC_END, kr);
8199
8200 return kr;
8201 }
8202
8203 /*
8204 * Routine: pmap_change_wiring
8205 * Function: Change the wiring attribute for a map/virtual-address
8206 * pair.
8207 * In/out conditions:
8208 * The mapping must already exist in the pmap.
8209 */
8210 MARK_AS_PMAP_TEXT static void
8211 pmap_change_wiring_internal(
8212 pmap_t pmap,
8213 vm_map_address_t v,
8214 boolean_t wired)
8215 {
8216 pt_entry_t *pte_p;
8217 pmap_paddr_t pa;
8218
8219 /* Don't bother tracking wiring for kernel PTEs. We use ARM_PTE_WIRED to track
8220 * wired memory statistics for user pmaps, but kernel PTEs are assumed
8221 * to be wired in nearly all cases. For VM layer functionality, the wired
8222 * count in vm_page_t is sufficient. */
8223 if (pmap == kernel_pmap) {
8224 return;
8225 }
8226 VALIDATE_USER_PMAP(pmap);
8227
8228 PMAP_LOCK(pmap);
8229 pte_p = pmap_pte(pmap, v);
8230 assert(pte_p != PT_ENTRY_NULL);
8231 pa = pte_to_pa(*pte_p);
8232
8233 while (pa_valid(pa)) {
8234 pmap_paddr_t new_pa;
8235
8236 LOCK_PVH((int)pa_index(pa));
8237 new_pa = pte_to_pa(*pte_p);
8238
8239 if (pa == new_pa) {
8240 break;
8241 }
8242
8243 UNLOCK_PVH((int)pa_index(pa));
8244 pa = new_pa;
8245 }
8246
8247 if (wired && !pte_is_wired(*pte_p)) {
8248 pte_set_wired(pte_p, wired);
8249 OSAddAtomic(+1, (SInt32 *) &pmap->stats.wired_count);
8250 pmap_ledger_credit(pmap, task_ledgers.wired_mem, PAGE_SIZE);
8251 } else if (!wired && pte_is_wired(*pte_p)) {
8252 PMAP_STATS_ASSERTF(pmap->stats.wired_count >= 1, pmap, "stats.wired_count %d", pmap->stats.wired_count);
8253 pte_set_wired(pte_p, wired);
8254 OSAddAtomic(-1, (SInt32 *) &pmap->stats.wired_count);
8255 pmap_ledger_debit(pmap, task_ledgers.wired_mem, PAGE_SIZE);
8256 }
8257
8258 if (pa_valid(pa)) {
8259 UNLOCK_PVH((int)pa_index(pa));
8260 }
8261
8262 PMAP_UNLOCK(pmap);
8263 }
8264
8265 void
8266 pmap_change_wiring(
8267 pmap_t pmap,
8268 vm_map_address_t v,
8269 boolean_t wired)
8270 {
8271 #if XNU_MONITOR
8272 pmap_change_wiring_ppl(pmap, v, wired);
8273
8274 pmap_ledger_check_balance(pmap);
8275 #else
8276 pmap_change_wiring_internal(pmap, v, wired);
8277 #endif
8278 }
8279
8280 MARK_AS_PMAP_TEXT static ppnum_t
8281 pmap_find_phys_internal(
8282 pmap_t pmap,
8283 addr64_t va)
8284 {
8285 ppnum_t ppn = 0;
8286
8287 VALIDATE_PMAP(pmap);
8288
8289 if (pmap != kernel_pmap) {
8290 PMAP_LOCK(pmap);
8291 }
8292
8293 ppn = pmap_vtophys(pmap, va);
8294
8295 if (pmap != kernel_pmap) {
8296 PMAP_UNLOCK(pmap);
8297 }
8298
8299 return ppn;
8300 }
8301
8302 ppnum_t
8303 pmap_find_phys(
8304 pmap_t pmap,
8305 addr64_t va)
8306 {
8307 pmap_paddr_t pa = 0;
8308
8309 if (pmap == kernel_pmap) {
8310 pa = mmu_kvtop(va);
8311 } else if ((current_thread()->map) && (pmap == vm_map_pmap(current_thread()->map))) {
8312 pa = mmu_uvtop(va);
8313 }
8314
8315 if (pa) {
8316 return (ppnum_t)(pa >> PAGE_SHIFT);
8317 }
8318
8319 if (not_in_kdp) {
8320 #if XNU_MONITOR
8321 return pmap_find_phys_ppl(pmap, va);
8322 #else
8323 return pmap_find_phys_internal(pmap, va);
8324 #endif
8325 } else {
8326 return pmap_vtophys(pmap, va);
8327 }
8328 }
8329
8330 pmap_paddr_t
8331 kvtophys(
8332 vm_offset_t va)
8333 {
8334 pmap_paddr_t pa;
8335
8336 pa = mmu_kvtop(va);
8337 if (pa) {
8338 return pa;
8339 }
8340 pa = ((pmap_paddr_t)pmap_vtophys(kernel_pmap, va)) << PAGE_SHIFT;
8341 if (pa) {
8342 pa |= (va & PAGE_MASK);
8343 }
8344
8345 return (pmap_paddr_t)pa;
8346 }
8347
8348 ppnum_t
8349 pmap_vtophys(
8350 pmap_t pmap,
8351 addr64_t va)
8352 {
8353 if ((va < pmap->min) || (va >= pmap->max)) {
8354 return 0;
8355 }
8356
8357 #if (__ARM_VMSA__ == 7)
8358 tt_entry_t *tte_p, tte;
8359 pt_entry_t *pte_p;
8360 ppnum_t ppn;
8361
8362 tte_p = pmap_tte(pmap, va);
8363 if (tte_p == (tt_entry_t *) NULL) {
8364 return (ppnum_t) 0;
8365 }
8366
8367 tte = *tte_p;
8368 if ((tte & ARM_TTE_TYPE_MASK) == ARM_TTE_TYPE_TABLE) {
8369 pte_p = (pt_entry_t *) ttetokv(tte) + ptenum(va);
8370 ppn = (ppnum_t) atop(pte_to_pa(*pte_p) | (va & ARM_PGMASK));
8371 #if DEVELOPMENT || DEBUG
8372 if (ppn != 0 &&
8373 ARM_PTE_IS_COMPRESSED(*pte_p, pte_p)) {
8374 panic("pmap_vtophys(%p,0x%llx): compressed pte_p=%p 0x%llx with ppn=0x%x\n",
8375 pmap, va, pte_p, (uint64_t) (*pte_p), ppn);
8376 }
8377 #endif /* DEVELOPMENT || DEBUG */
8378 } else if ((tte & ARM_TTE_TYPE_MASK) == ARM_TTE_TYPE_BLOCK) {
8379 if ((tte & ARM_TTE_BLOCK_SUPER) == ARM_TTE_BLOCK_SUPER) {
8380 ppn = (ppnum_t) atop(suptte_to_pa(tte) | (va & ARM_TT_L1_SUPER_OFFMASK));
8381 } else {
8382 ppn = (ppnum_t) atop(sectte_to_pa(tte) | (va & ARM_TT_L1_BLOCK_OFFMASK));
8383 }
8384 } else {
8385 ppn = 0;
8386 }
8387 #else
8388 tt_entry_t *ttp;
8389 tt_entry_t tte;
8390 ppnum_t ppn = 0;
8391
8392 __unused const pt_attr_t * const pt_attr = pmap_get_pt_attr(pmap);
8393
8394 /* Level 0 currently unused */
8395
8396 /* Get first-level (1GB) entry */
8397 ttp = pmap_tt1e(pmap, va);
8398 tte = *ttp;
8399 if ((tte & (ARM_TTE_TYPE_MASK | ARM_TTE_VALID)) != (ARM_TTE_TYPE_TABLE | ARM_TTE_VALID)) {
8400 return ppn;
8401 }
8402
8403 tte = ((tt_entry_t*) phystokv(tte & ARM_TTE_TABLE_MASK))[tt2_index(pmap, pt_attr, va)];
8404
8405 if ((tte & ARM_TTE_VALID) != (ARM_TTE_VALID)) {
8406 return ppn;
8407 }
8408
8409 if ((tte & ARM_TTE_TYPE_MASK) == ARM_TTE_TYPE_BLOCK) {
8410 ppn = (ppnum_t) atop((tte & ARM_TTE_BLOCK_L2_MASK) | (va & ARM_TT_L2_OFFMASK));
8411 return ppn;
8412 }
8413 tte = ((tt_entry_t*) phystokv(tte & ARM_TTE_TABLE_MASK))[tt3_index(pmap, pt_attr, va)];
8414 ppn = (ppnum_t) atop((tte & ARM_PTE_MASK) | (va & ARM_TT_L3_OFFMASK));
8415 #endif
8416
8417 return ppn;
8418 }
8419
8420 MARK_AS_PMAP_TEXT static vm_offset_t
8421 pmap_extract_internal(
8422 pmap_t pmap,
8423 vm_map_address_t va)
8424 {
8425 pmap_paddr_t pa = 0;
8426 ppnum_t ppn = 0;
8427
8428 if (pmap == NULL) {
8429 return 0;
8430 }
8431
8432 VALIDATE_PMAP(pmap);
8433
8434 PMAP_LOCK(pmap);
8435
8436 ppn = pmap_vtophys(pmap, va);
8437
8438 if (ppn != 0) {
8439 pa = ptoa(ppn) | ((va) & PAGE_MASK);
8440 }
8441
8442 PMAP_UNLOCK(pmap);
8443
8444 return pa;
8445 }
8446
8447 /*
8448 * Routine: pmap_extract
8449 * Function:
8450 * Extract the physical page address associated
8451 * with the given map/virtual_address pair.
8452 *
8453 */
8454 vm_offset_t
8455 pmap_extract(
8456 pmap_t pmap,
8457 vm_map_address_t va)
8458 {
8459 pmap_paddr_t pa = 0;
8460
8461 if (pmap == kernel_pmap) {
8462 pa = mmu_kvtop(va);
8463 } else if (pmap == vm_map_pmap(current_thread()->map)) {
8464 pa = mmu_uvtop(va);
8465 }
8466
8467 if (pa) {
8468 return pa;
8469 }
8470
8471 #if XNU_MONITOR
8472 return pmap_extract_ppl(pmap, va);
8473 #else
8474 return pmap_extract_internal(pmap, va);
8475 #endif
8476 }
8477
8478 /*
8479 * pmap_init_pte_page - Initialize a page table page.
8480 */
8481 void
8482 pmap_init_pte_page(
8483 pmap_t pmap,
8484 pt_entry_t *pte_p,
8485 vm_offset_t va,
8486 unsigned int ttlevel,
8487 boolean_t alloc_ptd,
8488 boolean_t clear)
8489 {
8490 pt_desc_t *ptdp = NULL;
8491 vm_offset_t *pvh;
8492
8493 pvh = (vm_offset_t *)(pai_to_pvh(pa_index(ml_static_vtop((vm_offset_t)pte_p))));
8494
8495 if (pvh_test_type(pvh, PVH_TYPE_NULL)) {
8496 if (alloc_ptd) {
8497 /*
8498 * This path should only be invoked from arm_vm_init. If we are emulating 16KB pages
8499 * on 4KB hardware, we may already have allocated a page table descriptor for a
8500 * bootstrap request, so we check for an existing PTD here.
8501 */
8502 ptdp = ptd_alloc(pmap, true);
8503 pvh_update_head_unlocked(pvh, ptdp, PVH_TYPE_PTDP);
8504 } else {
8505 panic("pmap_init_pte_page(): pte_p %p", pte_p);
8506 }
8507 } else if (pvh_test_type(pvh, PVH_TYPE_PTDP)) {
8508 ptdp = (pt_desc_t*)(pvh_list(pvh));
8509 } else {
8510 panic("pmap_init_pte_page(): invalid PVH type for pte_p %p", pte_p);
8511 }
8512
8513 if (clear) {
8514 bzero(pte_p, ARM_PGBYTES);
8515 // below barrier ensures the page zeroing is visible to PTW before
8516 // it is linked to the PTE of previous level
8517 __builtin_arm_dmb(DMB_ISHST);
8518 }
8519 ptd_init(ptdp, pmap, va, ttlevel, pte_p);
8520 }
8521
8522 /*
8523 * Routine: pmap_expand
8524 *
8525 * Expands a pmap to be able to map the specified virtual address.
8526 *
8527 * Allocates new memory for the default (COARSE) translation table
8528 * entry, initializes all the pte entries to ARM_PTE_TYPE_FAULT and
8529 * also allocates space for the corresponding pv entries.
8530 *
8531 * Nothing should be locked.
8532 */
8533 static kern_return_t
8534 pmap_expand(
8535 pmap_t pmap,
8536 vm_map_address_t v,
8537 unsigned int options,
8538 unsigned int level)
8539 {
8540 __unused const pt_attr_t * const pt_attr = pmap_get_pt_attr(pmap);
8541
8542 #if (__ARM_VMSA__ == 7)
8543 vm_offset_t pa;
8544 tt_entry_t *tte_p;
8545 tt_entry_t *tt_p;
8546 unsigned int i;
8547
8548 #if DEVELOPMENT || DEBUG
8549 /*
8550 * We no longer support root level expansion; panic in case something
8551 * still attempts to trigger it.
8552 */
8553 i = tte_index(pmap, pt_attr, v);
8554
8555 if (i >= pmap->tte_index_max) {
8556 panic("%s: index out of range, index=%u, max=%u, "
8557 "pmap=%p, addr=%p, options=%u, level=%u",
8558 __func__, i, pmap->tte_index_max,
8559 pmap, (void *)v, options, level);
8560 }
8561 #endif /* DEVELOPMENT || DEBUG */
8562
8563 if (level == 1) {
8564 return KERN_SUCCESS;
8565 }
8566
8567 {
8568 tt_entry_t *tte_next_p;
8569
8570 PMAP_LOCK(pmap);
8571 pa = 0;
8572 if (pmap_pte(pmap, v) != PT_ENTRY_NULL) {
8573 PMAP_UNLOCK(pmap);
8574 return KERN_SUCCESS;
8575 }
8576 tte_p = &pmap->tte[ttenum(v & ~ARM_TT_L1_PT_OFFMASK)];
8577 for (i = 0, tte_next_p = tte_p; i < 4; i++) {
8578 if (tte_to_pa(*tte_next_p)) {
8579 pa = tte_to_pa(*tte_next_p);
8580 break;
8581 }
8582 tte_next_p++;
8583 }
8584 pa = pa & ~PAGE_MASK;
8585 if (pa) {
8586 tte_p = &pmap->tte[ttenum(v)];
8587 *tte_p = pa_to_tte(pa) | (((v >> ARM_TT_L1_SHIFT) & 0x3) << 10) | ARM_TTE_TYPE_TABLE;
8588 FLUSH_PTE(tte_p);
8589 PMAP_TRACE(3, PMAP_CODE(PMAP__TTE), VM_KERNEL_ADDRHIDE(pmap), VM_KERNEL_ADDRHIDE(v & ~ARM_TT_L1_OFFMASK),
8590 VM_KERNEL_ADDRHIDE((v & ~ARM_TT_L1_OFFMASK) + ARM_TT_L1_SIZE), *tte_p);
8591 PMAP_UNLOCK(pmap);
8592 return KERN_SUCCESS;
8593 }
8594 PMAP_UNLOCK(pmap);
8595 }
8596 v = v & ~ARM_TT_L1_PT_OFFMASK;
8597
8598
8599 while (pmap_pte(pmap, v) == PT_ENTRY_NULL) {
8600 /*
8601 * Allocate a VM page for the level 2 page table entries.
8602 */
8603 while (pmap_tt_allocate(pmap, &tt_p, PMAP_TT_L2_LEVEL, ((options & PMAP_TT_ALLOCATE_NOWAIT)? PMAP_PAGES_ALLOCATE_NOWAIT : 0)) != KERN_SUCCESS) {
8604 if (options & PMAP_OPTIONS_NOWAIT) {
8605 return KERN_RESOURCE_SHORTAGE;
8606 }
8607 VM_PAGE_WAIT();
8608 }
8609
8610 PMAP_LOCK(pmap);
8611 /*
8612 * See if someone else expanded us first
8613 */
8614 if (pmap_pte(pmap, v) == PT_ENTRY_NULL) {
8615 tt_entry_t *tte_next_p;
8616
8617 pmap_init_pte_page(pmap, (pt_entry_t *) tt_p, v, PMAP_TT_L2_LEVEL, FALSE, TRUE);
8618 pa = kvtophys((vm_offset_t)tt_p);
8619 tte_p = &pmap->tte[ttenum(v)];
8620 for (i = 0, tte_next_p = tte_p; i < 4; i++) {
8621 *tte_next_p = pa_to_tte(pa) | ARM_TTE_TYPE_TABLE;
8622 PMAP_TRACE(3, PMAP_CODE(PMAP__TTE), VM_KERNEL_ADDRHIDE(pmap), VM_KERNEL_ADDRHIDE((v & ~ARM_TT_L1_PT_OFFMASK) + (i * ARM_TT_L1_SIZE)),
8623 VM_KERNEL_ADDRHIDE((v & ~ARM_TT_L1_PT_OFFMASK) + ((i + 1) * ARM_TT_L1_SIZE)), *tte_p);
8624 tte_next_p++;
8625 pa = pa + 0x400;
8626 }
8627 FLUSH_PTE_RANGE(tte_p, tte_p + 4);
8628
8629 pa = 0x0ULL;
8630 tt_p = (tt_entry_t *)NULL;
8631 }
8632 PMAP_UNLOCK(pmap);
8633 if (tt_p != (tt_entry_t *)NULL) {
8634 pmap_tt_deallocate(pmap, tt_p, PMAP_TT_L2_LEVEL);
8635 tt_p = (tt_entry_t *)NULL;
8636 }
8637 }
8638 return KERN_SUCCESS;
8639 #else
8640 pmap_paddr_t pa;
8641 unsigned int ttlevel = pt_attr_root_level(pt_attr);
8642 tt_entry_t *tte_p;
8643 tt_entry_t *tt_p;
8644
8645 pa = 0x0ULL;
8646 tt_p = (tt_entry_t *)NULL;
8647
8648 for (; ttlevel < level; ttlevel++) {
8649 PMAP_LOCK(pmap);
8650
8651 if (pmap_ttne(pmap, ttlevel + 1, v) == PT_ENTRY_NULL) {
8652 PMAP_UNLOCK(pmap);
8653 while (pmap_tt_allocate(pmap, &tt_p, ttlevel + 1, ((options & PMAP_TT_ALLOCATE_NOWAIT)? PMAP_PAGES_ALLOCATE_NOWAIT : 0)) != KERN_SUCCESS) {
8654 if (options & PMAP_OPTIONS_NOWAIT) {
8655 return KERN_RESOURCE_SHORTAGE;
8656 }
8657 #if XNU_MONITOR
8658 panic("%s: failed to allocate tt, "
8659 "pmap=%p, v=%p, options=0x%x, level=%u",
8660 __FUNCTION__,
8661 pmap, (void *)v, options, level);
8662 #else
8663 VM_PAGE_WAIT();
8664 #endif
8665 }
8666 PMAP_LOCK(pmap);
8667 if ((pmap_ttne(pmap, ttlevel + 1, v) == PT_ENTRY_NULL)) {
8668 pmap_init_pte_page(pmap, (pt_entry_t *) tt_p, v, ttlevel + 1, FALSE, TRUE);
8669 pa = kvtophys((vm_offset_t)tt_p);
8670 tte_p = pmap_ttne(pmap, ttlevel, v);
8671 *tte_p = (pa & ARM_TTE_TABLE_MASK) | ARM_TTE_TYPE_TABLE | ARM_TTE_VALID;
8672 PMAP_TRACE(ttlevel + 1, PMAP_CODE(PMAP__TTE), VM_KERNEL_ADDRHIDE(pmap), VM_KERNEL_ADDRHIDE(v & ~pt_attr_ln_offmask(pt_attr, ttlevel)),
8673 VM_KERNEL_ADDRHIDE((v & ~pt_attr_ln_offmask(pt_attr, ttlevel)) + pt_attr_ln_size(pt_attr, ttlevel)), *tte_p);
8674 pa = 0x0ULL;
8675 tt_p = (tt_entry_t *)NULL;
8676 }
8677 }
8678
8679 PMAP_UNLOCK(pmap);
8680
8681 if (tt_p != (tt_entry_t *)NULL) {
8682 pmap_tt_deallocate(pmap, tt_p, ttlevel + 1);
8683 tt_p = (tt_entry_t *)NULL;
8684 }
8685 }
8686
8687 return KERN_SUCCESS;
8688 #endif
8689 }
8690
8691 /*
8692 * Routine: pmap_collect
8693 * Function:
8694 * Garbage collects the physical map system for
8695 * pages which are no longer used.
8696 * Success need not be guaranteed -- that is, there
8697 * may well be pages which are not referenced, but
8698 * others may be collected.
8699 */
8700 void
8701 pmap_collect(pmap_t pmap)
8702 {
8703 if (pmap == PMAP_NULL) {
8704 return;
8705 }
8706
8707 #if 0
8708 PMAP_LOCK(pmap);
8709 if ((pmap->nested == FALSE) && (pmap != kernel_pmap)) {
8710 /* TODO: Scan for vm page assigned to top level page tables with no reference */
8711 }
8712 PMAP_UNLOCK(pmap);
8713 #endif
8714
8715 return;
8716 }
8717
8718 /*
8719 * Routine: pmap_gc
8720 * Function:
8721 * Pmap garbage collection
8722 * Called by the pageout daemon when pages are scarce.
8723 *
8724 */
8725 void
8726 pmap_gc(
8727 void)
8728 {
8729 #if XNU_MONITOR
8730 /*
8731 * We cannot invoke the scheduler from the PPL, so for now we elide the
8732 * GC logic if the PPL is enabled.
8733 */
8734 #endif
8735 #if !XNU_MONITOR
8736 pmap_t pmap, pmap_next;
8737 boolean_t gc_wait;
8738
8739 if (pmap_gc_allowed &&
8740 (pmap_gc_allowed_by_time_throttle ||
8741 pmap_gc_forced)) {
8742 pmap_gc_forced = FALSE;
8743 pmap_gc_allowed_by_time_throttle = FALSE;
8744 pmap_simple_lock(&pmaps_lock);
8745 pmap = CAST_DOWN_EXPLICIT(pmap_t, queue_first(&map_pmap_list));
8746 while (!queue_end(&map_pmap_list, (queue_entry_t)pmap)) {
8747 if (!(pmap->gc_status & PMAP_GC_INFLIGHT)) {
8748 pmap->gc_status |= PMAP_GC_INFLIGHT;
8749 }
8750 pmap_simple_unlock(&pmaps_lock);
8751
8752 pmap_collect(pmap);
8753
8754 pmap_simple_lock(&pmaps_lock);
8755 gc_wait = (pmap->gc_status & PMAP_GC_WAIT);
8756 pmap->gc_status &= ~(PMAP_GC_INFLIGHT | PMAP_GC_WAIT);
8757 pmap_next = CAST_DOWN_EXPLICIT(pmap_t, queue_next(&pmap->pmaps));
8758 if (gc_wait) {
8759 if (!queue_end(&map_pmap_list, (queue_entry_t)pmap_next)) {
8760 pmap_next->gc_status |= PMAP_GC_INFLIGHT;
8761 }
8762 pmap_simple_unlock(&pmaps_lock);
8763 thread_wakeup((event_t) &pmap->gc_status);
8764 pmap_simple_lock(&pmaps_lock);
8765 }
8766 pmap = pmap_next;
8767 }
8768 pmap_simple_unlock(&pmaps_lock);
8769 }
8770 #endif
8771 }
8772
8773 /*
8774 * Called by the VM to reclaim pages that we can reclaim quickly and cheaply.
8775 */
8776 uint64_t
8777 pmap_release_pages_fast(void)
8778 {
8779 #if XNU_MONITOR
8780 return pmap_release_ppl_pages_to_kernel();
8781 #else /* XNU_MONITOR */
8782 return 0;
8783 #endif
8784 }
8785
8786 /*
8787 * By default, don't attempt pmap GC more frequently
8788 * than once / 1 minutes.
8789 */
8790
8791 void
8792 compute_pmap_gc_throttle(
8793 void *arg __unused)
8794 {
8795 pmap_gc_allowed_by_time_throttle = TRUE;
8796 }
8797
8798 /*
8799 * pmap_attribute_cache_sync(vm_offset_t pa)
8800 *
8801 * Invalidates all of the instruction cache on a physical page and
8802 * pushes any dirty data from the data cache for the same physical page
8803 */
8804
8805 kern_return_t
8806 pmap_attribute_cache_sync(
8807 ppnum_t pp,
8808 vm_size_t size,
8809 __unused vm_machine_attribute_t attribute,
8810 __unused vm_machine_attribute_val_t * value)
8811 {
8812 if (size > PAGE_SIZE) {
8813 panic("pmap_attribute_cache_sync size: 0x%llx\n", (uint64_t)size);
8814 } else {
8815 cache_sync_page(pp);
8816 }
8817
8818 return KERN_SUCCESS;
8819 }
8820
8821 /*
8822 * pmap_sync_page_data_phys(ppnum_t pp)
8823 *
8824 * Invalidates all of the instruction cache on a physical page and
8825 * pushes any dirty data from the data cache for the same physical page
8826 */
8827 void
8828 pmap_sync_page_data_phys(
8829 ppnum_t pp)
8830 {
8831 cache_sync_page(pp);
8832 }
8833
8834 /*
8835 * pmap_sync_page_attributes_phys(ppnum_t pp)
8836 *
8837 * Write back and invalidate all cachelines on a physical page.
8838 */
8839 void
8840 pmap_sync_page_attributes_phys(
8841 ppnum_t pp)
8842 {
8843 flush_dcache((vm_offset_t) (pp << PAGE_SHIFT), PAGE_SIZE, TRUE);
8844 }
8845
8846 #if CONFIG_COREDUMP
8847 /* temporary workaround */
8848 boolean_t
8849 coredumpok(
8850 vm_map_t map,
8851 vm_offset_t va)
8852 {
8853 pt_entry_t *pte_p;
8854 pt_entry_t spte;
8855
8856 pte_p = pmap_pte(map->pmap, va);
8857 if (0 == pte_p) {
8858 return FALSE;
8859 }
8860 spte = *pte_p;
8861 return (spte & ARM_PTE_ATTRINDXMASK) == ARM_PTE_ATTRINDX(CACHE_ATTRINDX_DEFAULT);
8862 }
8863 #endif
8864
8865 void
8866 fillPage(
8867 ppnum_t pn,
8868 unsigned int fill)
8869 {
8870 unsigned int *addr;
8871 int count;
8872
8873 addr = (unsigned int *) phystokv(ptoa(pn));
8874 count = PAGE_SIZE / sizeof(unsigned int);
8875 while (count--) {
8876 *addr++ = fill;
8877 }
8878 }
8879
8880 extern void mapping_set_mod(ppnum_t pn);
8881
8882 void
8883 mapping_set_mod(
8884 ppnum_t pn)
8885 {
8886 pmap_set_modify(pn);
8887 }
8888
8889 extern void mapping_set_ref(ppnum_t pn);
8890
8891 void
8892 mapping_set_ref(
8893 ppnum_t pn)
8894 {
8895 pmap_set_reference(pn);
8896 }
8897
8898 /*
8899 * Clear specified attribute bits.
8900 *
8901 * Try to force an arm_fast_fault() for all mappings of
8902 * the page - to force attributes to be set again at fault time.
8903 * If the forcing succeeds, clear the cached bits at the head.
8904 * Otherwise, something must have been wired, so leave the cached
8905 * attributes alone.
8906 */
8907 MARK_AS_PMAP_TEXT static void
8908 phys_attribute_clear_internal(
8909 ppnum_t pn,
8910 unsigned int bits,
8911 int options,
8912 void *arg)
8913 {
8914 pmap_paddr_t pa = ptoa(pn);
8915 vm_prot_t allow_mode = VM_PROT_ALL;
8916
8917 #if XNU_MONITOR
8918 if (bits & PP_ATTR_PPL_OWNED_BITS) {
8919 panic("%s: illegal request, "
8920 "pn=%u, bits=%#x, options=%#x, arg=%p",
8921 __FUNCTION__,
8922 pn, bits, options, arg);
8923 }
8924 #endif
8925
8926 if ((bits & PP_ATTR_MODIFIED) &&
8927 (options & PMAP_OPTIONS_NOFLUSH) &&
8928 (arg == NULL)) {
8929 panic("phys_attribute_clear(0x%x,0x%x,0x%x,%p): "
8930 "should not clear 'modified' without flushing TLBs\n",
8931 pn, bits, options, arg);
8932 }
8933
8934 assert(pn != vm_page_fictitious_addr);
8935
8936 if (options & PMAP_OPTIONS_CLEAR_WRITE) {
8937 assert(bits == PP_ATTR_MODIFIED);
8938
8939 pmap_page_protect_options_internal(pn, (VM_PROT_ALL & ~VM_PROT_WRITE), 0);
8940 /*
8941 * We short circuit this case; it should not need to
8942 * invoke arm_force_fast_fault, so just clear the modified bit.
8943 * pmap_page_protect has taken care of resetting
8944 * the state so that we'll see the next write as a fault to
8945 * the VM (i.e. we don't want a fast fault).
8946 */
8947 pa_clear_bits(pa, bits);
8948 return;
8949 }
8950 if (bits & PP_ATTR_REFERENCED) {
8951 allow_mode &= ~(VM_PROT_READ | VM_PROT_EXECUTE);
8952 }
8953 if (bits & PP_ATTR_MODIFIED) {
8954 allow_mode &= ~VM_PROT_WRITE;
8955 }
8956
8957 if (bits == PP_ATTR_NOENCRYPT) {
8958 /*
8959 * We short circuit this case; it should not need to
8960 * invoke arm_force_fast_fault, so just clear and
8961 * return. On ARM, this bit is just a debugging aid.
8962 */
8963 pa_clear_bits(pa, bits);
8964 return;
8965 }
8966
8967 if (arm_force_fast_fault_internal(pn, allow_mode, options)) {
8968 pa_clear_bits(pa, bits);
8969 }
8970 return;
8971 }
8972
8973 static void
8974 phys_attribute_clear(
8975 ppnum_t pn,
8976 unsigned int bits,
8977 int options,
8978 void *arg)
8979 {
8980 /*
8981 * Do we really want this tracepoint? It will be extremely chatty.
8982 * Also, should we have a corresponding trace point for the set path?
8983 */
8984 PMAP_TRACE(3, PMAP_CODE(PMAP__ATTRIBUTE_CLEAR) | DBG_FUNC_START, pn, bits);
8985
8986 #if XNU_MONITOR
8987 phys_attribute_clear_ppl(pn, bits, options, arg);
8988 #else
8989 phys_attribute_clear_internal(pn, bits, options, arg);
8990 #endif
8991
8992 PMAP_TRACE(3, PMAP_CODE(PMAP__ATTRIBUTE_CLEAR) | DBG_FUNC_END);
8993 }
8994
8995 /*
8996 * Set specified attribute bits.
8997 *
8998 * Set cached value in the pv head because we have
8999 * no per-mapping hardware support for referenced and
9000 * modify bits.
9001 */
9002 MARK_AS_PMAP_TEXT static void
9003 phys_attribute_set_internal(
9004 ppnum_t pn,
9005 unsigned int bits)
9006 {
9007 pmap_paddr_t pa = ptoa(pn);
9008 assert(pn != vm_page_fictitious_addr);
9009
9010 #if XNU_MONITOR
9011 if (bits & PP_ATTR_PPL_OWNED_BITS) {
9012 panic("%s: illegal request, "
9013 "pn=%u, bits=%#x",
9014 __FUNCTION__,
9015 pn, bits);
9016 }
9017 #endif
9018
9019 pa_set_bits(pa, bits);
9020
9021 return;
9022 }
9023
9024 static void
9025 phys_attribute_set(
9026 ppnum_t pn,
9027 unsigned int bits)
9028 {
9029 #if XNU_MONITOR
9030 phys_attribute_set_ppl(pn, bits);
9031 #else
9032 phys_attribute_set_internal(pn, bits);
9033 #endif
9034 }
9035
9036
9037 /*
9038 * Check specified attribute bits.
9039 *
9040 * use the software cached bits (since no hw support).
9041 */
9042 static boolean_t
9043 phys_attribute_test(
9044 ppnum_t pn,
9045 unsigned int bits)
9046 {
9047 pmap_paddr_t pa = ptoa(pn);
9048 assert(pn != vm_page_fictitious_addr);
9049 return pa_test_bits(pa, bits);
9050 }
9051
9052
9053 /*
9054 * Set the modify/reference bits on the specified physical page.
9055 */
9056 void
9057 pmap_set_modify(ppnum_t pn)
9058 {
9059 phys_attribute_set(pn, PP_ATTR_MODIFIED);
9060 }
9061
9062
9063 /*
9064 * Clear the modify bits on the specified physical page.
9065 */
9066 void
9067 pmap_clear_modify(
9068 ppnum_t pn)
9069 {
9070 phys_attribute_clear(pn, PP_ATTR_MODIFIED, 0, NULL);
9071 }
9072
9073
9074 /*
9075 * pmap_is_modified:
9076 *
9077 * Return whether or not the specified physical page is modified
9078 * by any physical maps.
9079 */
9080 boolean_t
9081 pmap_is_modified(
9082 ppnum_t pn)
9083 {
9084 return phys_attribute_test(pn, PP_ATTR_MODIFIED);
9085 }
9086
9087
9088 /*
9089 * Set the reference bit on the specified physical page.
9090 */
9091 static void
9092 pmap_set_reference(
9093 ppnum_t pn)
9094 {
9095 phys_attribute_set(pn, PP_ATTR_REFERENCED);
9096 }
9097
9098 /*
9099 * Clear the reference bits on the specified physical page.
9100 */
9101 void
9102 pmap_clear_reference(
9103 ppnum_t pn)
9104 {
9105 phys_attribute_clear(pn, PP_ATTR_REFERENCED, 0, NULL);
9106 }
9107
9108
9109 /*
9110 * pmap_is_referenced:
9111 *
9112 * Return whether or not the specified physical page is referenced
9113 * by any physical maps.
9114 */
9115 boolean_t
9116 pmap_is_referenced(
9117 ppnum_t pn)
9118 {
9119 return phys_attribute_test(pn, PP_ATTR_REFERENCED);
9120 }
9121
9122 /*
9123 * pmap_get_refmod(phys)
9124 * returns the referenced and modified bits of the specified
9125 * physical page.
9126 */
9127 unsigned int
9128 pmap_get_refmod(
9129 ppnum_t pn)
9130 {
9131 return ((phys_attribute_test(pn, PP_ATTR_MODIFIED)) ? VM_MEM_MODIFIED : 0)
9132 | ((phys_attribute_test(pn, PP_ATTR_REFERENCED)) ? VM_MEM_REFERENCED : 0);
9133 }
9134
9135 /*
9136 * pmap_clear_refmod(phys, mask)
9137 * clears the referenced and modified bits as specified by the mask
9138 * of the specified physical page.
9139 */
9140 void
9141 pmap_clear_refmod_options(
9142 ppnum_t pn,
9143 unsigned int mask,
9144 unsigned int options,
9145 void *arg)
9146 {
9147 unsigned int bits;
9148
9149 bits = ((mask & VM_MEM_MODIFIED) ? PP_ATTR_MODIFIED : 0) |
9150 ((mask & VM_MEM_REFERENCED) ? PP_ATTR_REFERENCED : 0);
9151 phys_attribute_clear(pn, bits, options, arg);
9152 }
9153
9154 void
9155 pmap_clear_refmod(
9156 ppnum_t pn,
9157 unsigned int mask)
9158 {
9159 pmap_clear_refmod_options(pn, mask, 0, NULL);
9160 }
9161
9162 unsigned int
9163 pmap_disconnect_options(
9164 ppnum_t pn,
9165 unsigned int options,
9166 void *arg)
9167 {
9168 if ((options & PMAP_OPTIONS_COMPRESSOR_IFF_MODIFIED)) {
9169 /*
9170 * On ARM, the "modified" bit is managed by software, so
9171 * we know up-front if the physical page is "modified",
9172 * without having to scan all the PTEs pointing to it.
9173 * The caller should have made the VM page "busy" so noone
9174 * should be able to establish any new mapping and "modify"
9175 * the page behind us.
9176 */
9177 if (pmap_is_modified(pn)) {
9178 /*
9179 * The page has been modified and will be sent to
9180 * the VM compressor.
9181 */
9182 options |= PMAP_OPTIONS_COMPRESSOR;
9183 } else {
9184 /*
9185 * The page hasn't been modified and will be freed
9186 * instead of compressed.
9187 */
9188 }
9189 }
9190
9191 /* disconnect the page */
9192 pmap_page_protect_options(pn, 0, options, arg);
9193
9194 /* return ref/chg status */
9195 return pmap_get_refmod(pn);
9196 }
9197
9198 /*
9199 * Routine:
9200 * pmap_disconnect
9201 *
9202 * Function:
9203 * Disconnect all mappings for this page and return reference and change status
9204 * in generic format.
9205 *
9206 */
9207 unsigned int
9208 pmap_disconnect(
9209 ppnum_t pn)
9210 {
9211 pmap_page_protect(pn, 0); /* disconnect the page */
9212 return pmap_get_refmod(pn); /* return ref/chg status */
9213 }
9214
9215 boolean_t
9216 pmap_has_managed_page(ppnum_t first, ppnum_t last)
9217 {
9218 if (ptoa(first) >= vm_last_phys) {
9219 return FALSE;
9220 }
9221 if (ptoa(last) < vm_first_phys) {
9222 return FALSE;
9223 }
9224
9225 return TRUE;
9226 }
9227
9228 /*
9229 * The state maintained by the noencrypt functions is used as a
9230 * debugging aid on ARM. This incurs some overhead on the part
9231 * of the caller. A special case check in phys_attribute_clear
9232 * (the most expensive path) currently minimizes this overhead,
9233 * but stubbing these functions out on RELEASE kernels yields
9234 * further wins.
9235 */
9236 boolean_t
9237 pmap_is_noencrypt(
9238 ppnum_t pn)
9239 {
9240 #if DEVELOPMENT || DEBUG
9241 boolean_t result = FALSE;
9242
9243 if (!pa_valid(ptoa(pn))) {
9244 return FALSE;
9245 }
9246
9247 result = (phys_attribute_test(pn, PP_ATTR_NOENCRYPT));
9248
9249 return result;
9250 #else
9251 #pragma unused(pn)
9252 return FALSE;
9253 #endif
9254 }
9255
9256 void
9257 pmap_set_noencrypt(
9258 ppnum_t pn)
9259 {
9260 #if DEVELOPMENT || DEBUG
9261 if (!pa_valid(ptoa(pn))) {
9262 return;
9263 }
9264
9265 phys_attribute_set(pn, PP_ATTR_NOENCRYPT);
9266 #else
9267 #pragma unused(pn)
9268 #endif
9269 }
9270
9271 void
9272 pmap_clear_noencrypt(
9273 ppnum_t pn)
9274 {
9275 #if DEVELOPMENT || DEBUG
9276 if (!pa_valid(ptoa(pn))) {
9277 return;
9278 }
9279
9280 phys_attribute_clear(pn, PP_ATTR_NOENCRYPT, 0, NULL);
9281 #else
9282 #pragma unused(pn)
9283 #endif
9284 }
9285
9286 #if XNU_MONITOR
9287 boolean_t
9288 pmap_is_monitor(ppnum_t pn)
9289 {
9290 assert(pa_valid(ptoa(pn)));
9291 return phys_attribute_test(pn, PP_ATTR_MONITOR);
9292 }
9293 #endif
9294
9295 void
9296 pmap_lock_phys_page(ppnum_t pn)
9297 {
9298 #if !XNU_MONITOR
9299 int pai;
9300 pmap_paddr_t phys = ptoa(pn);
9301
9302 if (pa_valid(phys)) {
9303 pai = (int)pa_index(phys);
9304 LOCK_PVH(pai);
9305 } else
9306 #else
9307 (void)pn;
9308 #endif
9309 { simple_lock(&phys_backup_lock, LCK_GRP_NULL);}
9310 }
9311
9312
9313 void
9314 pmap_unlock_phys_page(ppnum_t pn)
9315 {
9316 #if !XNU_MONITOR
9317 int pai;
9318 pmap_paddr_t phys = ptoa(pn);
9319
9320 if (pa_valid(phys)) {
9321 pai = (int)pa_index(phys);
9322 UNLOCK_PVH(pai);
9323 } else
9324 #else
9325 (void)pn;
9326 #endif
9327 { simple_unlock(&phys_backup_lock);}
9328 }
9329
9330 MARK_AS_PMAP_TEXT static void
9331 pmap_switch_user_ttb_internal(
9332 pmap_t pmap)
9333 {
9334 VALIDATE_PMAP(pmap);
9335 pmap_cpu_data_t *cpu_data_ptr;
9336 cpu_data_ptr = pmap_get_cpu_data();
9337
9338 #if (__ARM_VMSA__ == 7)
9339 cpu_data_ptr->cpu_user_pmap = pmap;
9340 cpu_data_ptr->cpu_user_pmap_stamp = pmap->stamp;
9341
9342 #if MACH_ASSERT && __ARM_USER_PROTECT__
9343 {
9344 unsigned int ttbr0_val, ttbr1_val;
9345 __asm__ volatile ("mrc p15,0,%0,c2,c0,0\n" : "=r"(ttbr0_val));
9346 __asm__ volatile ("mrc p15,0,%0,c2,c0,1\n" : "=r"(ttbr1_val));
9347 if (ttbr0_val != ttbr1_val) {
9348 panic("Misaligned ttbr0 %08X\n", ttbr0_val);
9349 }
9350 }
9351 #endif
9352 if (pmap->tte_index_max == NTTES) {
9353 /* Setting TTBCR.N for TTBR0 TTBR1 boundary at 0x40000000 */
9354 __asm__ volatile ("mcr p15,0,%0,c2,c0,2" : : "r"(2));
9355 __builtin_arm_isb(ISB_SY);
9356 #if !__ARM_USER_PROTECT__
9357 set_mmu_ttb(pmap->ttep);
9358 #endif
9359 } else {
9360 #if !__ARM_USER_PROTECT__
9361 set_mmu_ttb(pmap->ttep);
9362 #endif
9363 /* Setting TTBCR.N for TTBR0 TTBR1 boundary at 0x80000000 */
9364 __asm__ volatile ("mcr p15,0,%0,c2,c0,2" : : "r"(1));
9365 __builtin_arm_isb(ISB_SY);
9366 #if MACH_ASSERT && __ARM_USER_PROTECT__
9367 if (pmap->ttep & 0x1000) {
9368 panic("Misaligned ttbr0 %08X\n", pmap->ttep);
9369 }
9370 #endif
9371 }
9372
9373 #if !__ARM_USER_PROTECT__
9374 set_context_id(pmap->hw_asid);
9375 #endif
9376
9377 #else /* (__ARM_VMSA__ == 7) */
9378
9379 if (pmap != kernel_pmap) {
9380 cpu_data_ptr->cpu_nested_pmap = pmap->nested_pmap;
9381 }
9382
9383 if (pmap == kernel_pmap) {
9384 pmap_clear_user_ttb_internal();
9385 } else {
9386 set_mmu_ttb((pmap->ttep & TTBR_BADDR_MASK) | (((uint64_t)pmap->hw_asid) << TTBR_ASID_SHIFT));
9387 }
9388
9389 #if defined(HAS_APPLE_PAC) && (__APCFG_SUPPORTED__ || __APSTS_SUPPORTED__)
9390 if (!(BootArgs->bootFlags & kBootFlagsDisableJOP) && !(BootArgs->bootFlags & kBootFlagsDisableUserJOP)) {
9391 uint64_t sctlr = __builtin_arm_rsr64("SCTLR_EL1");
9392 bool jop_enabled = sctlr & SCTLR_JOP_KEYS_ENABLED;
9393 if (!jop_enabled && !pmap->disable_jop) {
9394 // turn on JOP
9395 sctlr |= SCTLR_JOP_KEYS_ENABLED;
9396 __builtin_arm_wsr64("SCTLR_EL1", sctlr);
9397 // no ISB necessary because this won't take effect until eret returns to EL0
9398 } else if (jop_enabled && pmap->disable_jop) {
9399 // turn off JOP
9400 sctlr &= ~SCTLR_JOP_KEYS_ENABLED;
9401 __builtin_arm_wsr64("SCTLR_EL1", sctlr);
9402 }
9403 }
9404 #endif /* HAS_APPLE_PAC && (__APCFG_SUPPORTED__ || __APSTS_SUPPORTED__) */
9405 #endif /* (__ARM_VMSA__ == 7) */
9406 }
9407
9408 void
9409 pmap_switch_user_ttb(
9410 pmap_t pmap)
9411 {
9412 PMAP_TRACE(1, PMAP_CODE(PMAP__SWITCH_USER_TTB) | DBG_FUNC_START, VM_KERNEL_ADDRHIDE(pmap), PMAP_VASID(pmap), pmap->hw_asid);
9413 #if XNU_MONITOR
9414 pmap_switch_user_ttb_ppl(pmap);
9415 #else
9416 pmap_switch_user_ttb_internal(pmap);
9417 #endif
9418 PMAP_TRACE(1, PMAP_CODE(PMAP__SWITCH_USER_TTB) | DBG_FUNC_END);
9419 }
9420
9421 MARK_AS_PMAP_TEXT static void
9422 pmap_clear_user_ttb_internal(void)
9423 {
9424 #if (__ARM_VMSA__ > 7)
9425 set_mmu_ttb(invalid_ttep & TTBR_BADDR_MASK);
9426 #else
9427 set_mmu_ttb(kernel_pmap->ttep);
9428 #endif
9429 }
9430
9431 void
9432 pmap_clear_user_ttb(void)
9433 {
9434 #if XNU_MONITOR
9435 pmap_clear_user_ttb_ppl();
9436 #else
9437 pmap_clear_user_ttb_internal();
9438 #endif
9439 }
9440
9441 /*
9442 * Routine: arm_force_fast_fault
9443 *
9444 * Function:
9445 * Force all mappings for this page to fault according
9446 * to the access modes allowed, so we can gather ref/modify
9447 * bits again.
9448 */
9449 MARK_AS_PMAP_TEXT static boolean_t
9450 arm_force_fast_fault_internal(
9451 ppnum_t ppnum,
9452 vm_prot_t allow_mode,
9453 int options)
9454 {
9455 pmap_paddr_t phys = ptoa(ppnum);
9456 pv_entry_t *pve_p;
9457 pt_entry_t *pte_p;
9458 int pai;
9459 boolean_t result;
9460 pv_entry_t **pv_h;
9461 boolean_t is_reusable, is_internal;
9462 boolean_t tlb_flush_needed = FALSE;
9463 boolean_t ref_fault;
9464 boolean_t mod_fault;
9465
9466 assert(ppnum != vm_page_fictitious_addr);
9467
9468 if (!pa_valid(phys)) {
9469 return FALSE; /* Not a managed page. */
9470 }
9471
9472 result = TRUE;
9473 ref_fault = FALSE;
9474 mod_fault = FALSE;
9475 pai = (int)pa_index(phys);
9476 LOCK_PVH(pai);
9477 pv_h = pai_to_pvh(pai);
9478
9479 pte_p = PT_ENTRY_NULL;
9480 pve_p = PV_ENTRY_NULL;
9481 if (pvh_test_type(pv_h, PVH_TYPE_PTEP)) {
9482 pte_p = pvh_ptep(pv_h);
9483 } else if (pvh_test_type(pv_h, PVH_TYPE_PVEP)) {
9484 pve_p = pvh_list(pv_h);
9485 }
9486
9487 is_reusable = IS_REUSABLE_PAGE(pai);
9488 is_internal = IS_INTERNAL_PAGE(pai);
9489
9490 while ((pve_p != PV_ENTRY_NULL) || (pte_p != PT_ENTRY_NULL)) {
9491 vm_map_address_t va;
9492 pt_entry_t spte;
9493 pt_entry_t tmplate;
9494 pmap_t pmap;
9495 boolean_t update_pte;
9496
9497 if (pve_p != PV_ENTRY_NULL) {
9498 pte_p = pve_get_ptep(pve_p);
9499 }
9500
9501 if (pte_p == PT_ENTRY_NULL) {
9502 panic("pte_p is NULL: pve_p=%p ppnum=0x%x\n", pve_p, ppnum);
9503 }
9504 #ifdef PVH_FLAG_IOMMU
9505 if ((vm_offset_t)pte_p & PVH_FLAG_IOMMU) {
9506 goto fff_skip_pve;
9507 }
9508 #endif
9509 if (*pte_p == ARM_PTE_EMPTY) {
9510 panic("pte is NULL: pte_p=%p ppnum=0x%x\n", pte_p, ppnum);
9511 }
9512 if (ARM_PTE_IS_COMPRESSED(*pte_p, pte_p)) {
9513 panic("pte is COMPRESSED: pte_p=%p ppnum=0x%x\n", pte_p, ppnum);
9514 }
9515
9516 pmap = ptep_get_pmap(pte_p);
9517 va = ptep_get_va(pte_p);
9518
9519 assert(va >= pmap->min && va < pmap->max);
9520
9521 if (pte_is_wired(*pte_p) || pmap == kernel_pmap) {
9522 result = FALSE;
9523 break;
9524 }
9525
9526 spte = *pte_p;
9527 tmplate = spte;
9528 update_pte = FALSE;
9529
9530 if ((allow_mode & VM_PROT_READ) != VM_PROT_READ) {
9531 /* read protection sets the pte to fault */
9532 tmplate = tmplate & ~ARM_PTE_AF;
9533 update_pte = TRUE;
9534 ref_fault = TRUE;
9535 }
9536 if ((allow_mode & VM_PROT_WRITE) != VM_PROT_WRITE) {
9537 /* take away write permission if set */
9538 if (pmap == kernel_pmap) {
9539 if ((tmplate & ARM_PTE_APMASK) == ARM_PTE_AP(AP_RWNA)) {
9540 tmplate = ((tmplate & ~ARM_PTE_APMASK) | ARM_PTE_AP(AP_RONA));
9541 pte_set_was_writeable(tmplate, true);
9542 update_pte = TRUE;
9543 mod_fault = TRUE;
9544 }
9545 } else {
9546 if ((tmplate & ARM_PTE_APMASK) == ARM_PTE_AP(AP_RWRW)) {
9547 tmplate = ((tmplate & ~ARM_PTE_APMASK) | pt_attr_leaf_ro(pmap_get_pt_attr(pmap)));
9548 pte_set_was_writeable(tmplate, true);
9549 update_pte = TRUE;
9550 mod_fault = TRUE;
9551 }
9552 }
9553 }
9554
9555 #if MACH_ASSERT && XNU_MONITOR
9556 if (is_pte_xprr_protected(spte)) {
9557 if (pte_to_xprr_perm(spte) != pte_to_xprr_perm(tmplate)) {
9558 panic("%s: attempted to mutate an xPRR mapping pte_p=%p, pmap=%p, pv_h=%p, pve_p=%p, pte=0x%llx, tmplate=0x%llx, va=0x%llx, "
9559 "ppnum=0x%x, options=0x%x, allow_mode=0x%x",
9560 __FUNCTION__, pte_p, pmap, pv_h, pve_p, (unsigned long long)spte, (unsigned long long)tmplate, (unsigned long long)va,
9561 ppnum, options, allow_mode);
9562 }
9563 }
9564 #endif /* MACH_ASSERT && XNU_MONITOR */
9565
9566 if (update_pte) {
9567 if (*pte_p != ARM_PTE_TYPE_FAULT &&
9568 !ARM_PTE_IS_COMPRESSED(*pte_p, pte_p)) {
9569 WRITE_PTE_STRONG(pte_p, tmplate);
9570 pmap_get_pt_ops(pmap)->flush_tlb_region_async(va, PAGE_SIZE, pmap);
9571 tlb_flush_needed = TRUE;
9572 } else {
9573 WRITE_PTE(pte_p, tmplate);
9574 __builtin_arm_isb(ISB_SY);
9575 }
9576 }
9577
9578 /* update pmap stats and ledgers */
9579 if (IS_ALTACCT_PAGE(pai, pve_p)) {
9580 /*
9581 * We do not track "reusable" status for
9582 * "alternate accounting" mappings.
9583 */
9584 } else if ((options & PMAP_OPTIONS_CLEAR_REUSABLE) &&
9585 is_reusable &&
9586 is_internal &&
9587 pmap != kernel_pmap) {
9588 /* one less "reusable" */
9589 PMAP_STATS_ASSERTF(pmap->stats.reusable > 0, pmap, "stats.reusable %d", pmap->stats.reusable);
9590 OSAddAtomic(-1, &pmap->stats.reusable);
9591 /* one more "internal" */
9592 OSAddAtomic(+1, &pmap->stats.internal);
9593 PMAP_STATS_PEAK(pmap->stats.internal);
9594 PMAP_STATS_ASSERTF(pmap->stats.internal > 0, pmap, "stats.internal %d", pmap->stats.internal);
9595 pmap_ledger_credit(pmap, task_ledgers.internal, machine_ptob(1));
9596 assert(!IS_ALTACCT_PAGE(pai, pve_p));
9597 assert(IS_INTERNAL_PAGE(pai));
9598 pmap_ledger_credit(pmap, task_ledgers.phys_footprint, machine_ptob(1));
9599
9600 /*
9601 * Avoid the cost of another trap to handle the fast
9602 * fault when we next write to this page: let's just
9603 * handle that now since we already have all the
9604 * necessary information.
9605 */
9606 {
9607 arm_clear_fast_fault(ppnum, VM_PROT_WRITE);
9608 }
9609 } else if ((options & PMAP_OPTIONS_SET_REUSABLE) &&
9610 !is_reusable &&
9611 is_internal &&
9612 pmap != kernel_pmap) {
9613 /* one more "reusable" */
9614 OSAddAtomic(+1, &pmap->stats.reusable);
9615 PMAP_STATS_PEAK(pmap->stats.reusable);
9616 PMAP_STATS_ASSERTF(pmap->stats.reusable > 0, pmap, "stats.reusable %d", pmap->stats.reusable);
9617 /* one less "internal" */
9618 PMAP_STATS_ASSERTF(pmap->stats.internal > 0, pmap, "stats.internal %d", pmap->stats.internal);
9619 OSAddAtomic(-1, &pmap->stats.internal);
9620 pmap_ledger_debit(pmap, task_ledgers.internal, machine_ptob(1));
9621 assert(!IS_ALTACCT_PAGE(pai, pve_p));
9622 assert(IS_INTERNAL_PAGE(pai));
9623 pmap_ledger_debit(pmap, task_ledgers.phys_footprint, machine_ptob(1));
9624 }
9625
9626 #ifdef PVH_FLAG_IOMMU
9627 fff_skip_pve:
9628 #endif
9629 pte_p = PT_ENTRY_NULL;
9630 if (pve_p != PV_ENTRY_NULL) {
9631 pve_p = PVE_NEXT_PTR(pve_next(pve_p));
9632 }
9633 }
9634
9635 if (tlb_flush_needed) {
9636 sync_tlb_flush();
9637 }
9638
9639 /* update global "reusable" status for this page */
9640 if (is_internal) {
9641 if ((options & PMAP_OPTIONS_CLEAR_REUSABLE) &&
9642 is_reusable) {
9643 CLR_REUSABLE_PAGE(pai);
9644 } else if ((options & PMAP_OPTIONS_SET_REUSABLE) &&
9645 !is_reusable) {
9646 SET_REUSABLE_PAGE(pai);
9647 }
9648 }
9649
9650 if (mod_fault) {
9651 SET_MODFAULT_PAGE(pai);
9652 }
9653 if (ref_fault) {
9654 SET_REFFAULT_PAGE(pai);
9655 }
9656
9657 UNLOCK_PVH(pai);
9658 return result;
9659 }
9660
9661 boolean_t
9662 arm_force_fast_fault(
9663 ppnum_t ppnum,
9664 vm_prot_t allow_mode,
9665 int options,
9666 __unused void *arg)
9667 {
9668 pmap_paddr_t phys = ptoa(ppnum);
9669
9670 assert(ppnum != vm_page_fictitious_addr);
9671
9672 if (!pa_valid(phys)) {
9673 return FALSE; /* Not a managed page. */
9674 }
9675
9676 #if XNU_MONITOR
9677 return arm_force_fast_fault_ppl(ppnum, allow_mode, options);
9678 #else
9679 return arm_force_fast_fault_internal(ppnum, allow_mode, options);
9680 #endif
9681 }
9682
9683 /*
9684 * Routine: arm_clear_fast_fault
9685 *
9686 * Function:
9687 * Clear pending force fault for all mappings for this page based on
9688 * the observed fault type, update ref/modify bits.
9689 */
9690 boolean_t
9691 arm_clear_fast_fault(
9692 ppnum_t ppnum,
9693 vm_prot_t fault_type)
9694 {
9695 pmap_paddr_t pa = ptoa(ppnum);
9696 pv_entry_t *pve_p;
9697 pt_entry_t *pte_p;
9698 int pai;
9699 boolean_t result;
9700 boolean_t tlb_flush_needed = FALSE;
9701 pv_entry_t **pv_h;
9702
9703 assert(ppnum != vm_page_fictitious_addr);
9704
9705 if (!pa_valid(pa)) {
9706 return FALSE; /* Not a managed page. */
9707 }
9708
9709 result = FALSE;
9710 pai = (int)pa_index(pa);
9711 ASSERT_PVH_LOCKED(pai);
9712 pv_h = pai_to_pvh(pai);
9713
9714 pte_p = PT_ENTRY_NULL;
9715 pve_p = PV_ENTRY_NULL;
9716 if (pvh_test_type(pv_h, PVH_TYPE_PTEP)) {
9717 pte_p = pvh_ptep(pv_h);
9718 } else if (pvh_test_type(pv_h, PVH_TYPE_PVEP)) {
9719 pve_p = pvh_list(pv_h);
9720 }
9721
9722 while ((pve_p != PV_ENTRY_NULL) || (pte_p != PT_ENTRY_NULL)) {
9723 vm_map_address_t va;
9724 pt_entry_t spte;
9725 pt_entry_t tmplate;
9726 pmap_t pmap;
9727
9728 if (pve_p != PV_ENTRY_NULL) {
9729 pte_p = pve_get_ptep(pve_p);
9730 }
9731
9732 if (pte_p == PT_ENTRY_NULL) {
9733 panic("pte_p is NULL: pve_p=%p ppnum=0x%x\n", pve_p, ppnum);
9734 }
9735 #ifdef PVH_FLAG_IOMMU
9736 if ((vm_offset_t)pte_p & PVH_FLAG_IOMMU) {
9737 goto cff_skip_pve;
9738 }
9739 #endif
9740 if (*pte_p == ARM_PTE_EMPTY) {
9741 panic("pte is NULL: pte_p=%p ppnum=0x%x\n", pte_p, ppnum);
9742 }
9743
9744 pmap = ptep_get_pmap(pte_p);
9745 va = ptep_get_va(pte_p);
9746
9747 assert(va >= pmap->min && va < pmap->max);
9748
9749 spte = *pte_p;
9750 tmplate = spte;
9751
9752 if ((fault_type & VM_PROT_WRITE) && (pte_was_writeable(spte))) {
9753 {
9754 if (pmap == kernel_pmap) {
9755 tmplate = ((spte & ~ARM_PTE_APMASK) | ARM_PTE_AP(AP_RWNA));
9756 } else {
9757 tmplate = ((spte & ~ARM_PTE_APMASK) | pt_attr_leaf_rw(pmap_get_pt_attr(pmap)));
9758 }
9759 }
9760
9761 tmplate |= ARM_PTE_AF;
9762
9763 pte_set_was_writeable(tmplate, false);
9764 pa_set_bits(pa, PP_ATTR_REFERENCED | PP_ATTR_MODIFIED);
9765 } else if ((fault_type & VM_PROT_READ) && ((spte & ARM_PTE_AF) != ARM_PTE_AF)) {
9766 tmplate = spte | ARM_PTE_AF;
9767
9768 {
9769 pa_set_bits(pa, PP_ATTR_REFERENCED);
9770 }
9771 }
9772
9773 #if MACH_ASSERT && XNU_MONITOR
9774 if (is_pte_xprr_protected(spte)) {
9775 if (pte_to_xprr_perm(spte) != pte_to_xprr_perm(tmplate)) {
9776 panic("%s: attempted to mutate an xPRR mapping pte_p=%p, pmap=%p, pv_h=%p, pve_p=%p, pte=0x%llx, tmplate=0x%llx, va=0x%llx, "
9777 "ppnum=0x%x, fault_type=0x%x",
9778 __FUNCTION__, pte_p, pmap, pv_h, pve_p, (unsigned long long)spte, (unsigned long long)tmplate, (unsigned long long)va,
9779 ppnum, fault_type);
9780 }
9781 }
9782 #endif /* MACH_ASSERT && XNU_MONITOR */
9783
9784 if (spte != tmplate) {
9785 if (spte != ARM_PTE_TYPE_FAULT) {
9786 WRITE_PTE_STRONG(pte_p, tmplate);
9787 pmap_get_pt_ops(pmap)->flush_tlb_region_async(va, PAGE_SIZE, pmap);
9788 tlb_flush_needed = TRUE;
9789 } else {
9790 WRITE_PTE(pte_p, tmplate);
9791 __builtin_arm_isb(ISB_SY);
9792 }
9793 result = TRUE;
9794 }
9795
9796 #ifdef PVH_FLAG_IOMMU
9797 cff_skip_pve:
9798 #endif
9799 pte_p = PT_ENTRY_NULL;
9800 if (pve_p != PV_ENTRY_NULL) {
9801 pve_p = PVE_NEXT_PTR(pve_next(pve_p));
9802 }
9803 }
9804 if (tlb_flush_needed) {
9805 sync_tlb_flush();
9806 }
9807 return result;
9808 }
9809
9810 /*
9811 * Determine if the fault was induced by software tracking of
9812 * modify/reference bits. If so, re-enable the mapping (and set
9813 * the appropriate bits).
9814 *
9815 * Returns KERN_SUCCESS if the fault was induced and was
9816 * successfully handled.
9817 *
9818 * Returns KERN_FAILURE if the fault was not induced and
9819 * the function was unable to deal with it.
9820 *
9821 * Returns KERN_PROTECTION_FAILURE if the pmap layer explictly
9822 * disallows this type of access.
9823 */
9824 MARK_AS_PMAP_TEXT static kern_return_t
9825 arm_fast_fault_internal(
9826 pmap_t pmap,
9827 vm_map_address_t va,
9828 vm_prot_t fault_type,
9829 __unused bool was_af_fault,
9830 __unused bool from_user)
9831 {
9832 kern_return_t result = KERN_FAILURE;
9833 pt_entry_t *ptep;
9834 pt_entry_t spte = ARM_PTE_TYPE_FAULT;
9835 int pai;
9836 pmap_paddr_t pa;
9837 VALIDATE_PMAP(pmap);
9838
9839 PMAP_LOCK(pmap);
9840
9841 /*
9842 * If the entry doesn't exist, is completely invalid, or is already
9843 * valid, we can't fix it here.
9844 */
9845
9846 ptep = pmap_pte(pmap, va);
9847 if (ptep != PT_ENTRY_NULL) {
9848 while (true) {
9849 spte = *ptep;
9850
9851 pa = pte_to_pa(spte);
9852
9853 if ((spte == ARM_PTE_TYPE_FAULT) ||
9854 ARM_PTE_IS_COMPRESSED(spte, ptep)) {
9855 PMAP_UNLOCK(pmap);
9856 return result;
9857 }
9858
9859 if (!pa_valid(pa)) {
9860 PMAP_UNLOCK(pmap);
9861 #if XNU_MONITOR
9862 if (pmap_cache_attributes((ppnum_t)atop(pa)) & PP_ATTR_MONITOR) {
9863 return KERN_PROTECTION_FAILURE;
9864 } else
9865 #endif
9866 return result;
9867 }
9868 pai = (int)pa_index(pa);
9869 LOCK_PVH(pai);
9870 #if __APRR_SUPPORTED__
9871 if (*ptep == spte) {
9872 /*
9873 * Double-check the spte value, as we care
9874 * about the AF bit.
9875 */
9876 break;
9877 }
9878 UNLOCK_PVH(pai);
9879 #else /* !(__APRR_SUPPORTED__*/
9880 break;
9881 #endif /* !(__APRR_SUPPORTED__*/
9882 }
9883 } else {
9884 PMAP_UNLOCK(pmap);
9885 return result;
9886 }
9887
9888 #if __APRR_SUPPORTED__
9889 /* Check to see if this mapping had APRR restrictions. */
9890 if (is_pte_xprr_protected(spte)) {
9891 /*
9892 * We have faulted on an XPRR managed mapping; decide if the access should be
9893 * reattempted or if it should cause an exception. Now that all JIT entitled
9894 * task threads always have MPRR enabled we're only here because of
9895 * an AF fault or an actual permission fault. AF faults will have result
9896 * changed to KERN_SUCCESS below upon arm_clear_fast_fault return.
9897 */
9898 if (was_af_fault && (spte & ARM_PTE_AF)) {
9899 result = KERN_SUCCESS;
9900 goto out;
9901 } else {
9902 result = KERN_PROTECTION_FAILURE;
9903 }
9904 }
9905 #endif /* __APRR_SUPPORTED__*/
9906
9907 if ((IS_REFFAULT_PAGE(pai)) ||
9908 ((fault_type & VM_PROT_WRITE) && IS_MODFAULT_PAGE(pai))) {
9909 /*
9910 * An attempted access will always clear ref/mod fault state, as
9911 * appropriate for the fault type. arm_clear_fast_fault will
9912 * update the associated PTEs for the page as appropriate; if
9913 * any PTEs are updated, we redrive the access. If the mapping
9914 * does not actually allow for the attempted access, the
9915 * following fault will (hopefully) fail to update any PTEs, and
9916 * thus cause arm_fast_fault to decide that it failed to handle
9917 * the fault.
9918 */
9919 if (IS_REFFAULT_PAGE(pai)) {
9920 CLR_REFFAULT_PAGE(pai);
9921 }
9922 if ((fault_type & VM_PROT_WRITE) && IS_MODFAULT_PAGE(pai)) {
9923 CLR_MODFAULT_PAGE(pai);
9924 }
9925
9926 if (arm_clear_fast_fault((ppnum_t)atop(pa), fault_type)) {
9927 /*
9928 * Should this preserve KERN_PROTECTION_FAILURE? The
9929 * cost of not doing so is a another fault in a case
9930 * that should already result in an exception.
9931 */
9932 result = KERN_SUCCESS;
9933 }
9934 }
9935
9936 #if __APRR_SUPPORTED__
9937 out:
9938 #endif /* __APRR_SUPPORTED__*/
9939 UNLOCK_PVH(pai);
9940 PMAP_UNLOCK(pmap);
9941 return result;
9942 }
9943
9944 kern_return_t
9945 arm_fast_fault(
9946 pmap_t pmap,
9947 vm_map_address_t va,
9948 vm_prot_t fault_type,
9949 bool was_af_fault,
9950 __unused bool from_user)
9951 {
9952 kern_return_t result = KERN_FAILURE;
9953
9954 if (va < pmap->min || va >= pmap->max) {
9955 return result;
9956 }
9957
9958 PMAP_TRACE(3, PMAP_CODE(PMAP__FAST_FAULT) | DBG_FUNC_START,
9959 VM_KERNEL_ADDRHIDE(pmap), VM_KERNEL_ADDRHIDE(va), fault_type,
9960 from_user);
9961
9962 #if (__ARM_VMSA__ == 7)
9963 if (pmap != kernel_pmap) {
9964 pmap_cpu_data_t *cpu_data_ptr = pmap_get_cpu_data();
9965 pmap_t cur_pmap;
9966 pmap_t cur_user_pmap;
9967
9968 cur_pmap = current_pmap();
9969 cur_user_pmap = cpu_data_ptr->cpu_user_pmap;
9970
9971 if ((cur_user_pmap == cur_pmap) && (cur_pmap == pmap)) {
9972 if (cpu_data_ptr->cpu_user_pmap_stamp != pmap->stamp) {
9973 pmap_set_pmap(pmap, current_thread());
9974 result = KERN_SUCCESS;
9975 goto done;
9976 }
9977 }
9978 }
9979 #endif
9980
9981 #if XNU_MONITOR
9982 result = arm_fast_fault_ppl(pmap, va, fault_type, was_af_fault, from_user);
9983 #else
9984 result = arm_fast_fault_internal(pmap, va, fault_type, was_af_fault, from_user);
9985 #endif
9986
9987 #if (__ARM_VMSA__ == 7)
9988 done:
9989 #endif
9990
9991 PMAP_TRACE(3, PMAP_CODE(PMAP__FAST_FAULT) | DBG_FUNC_END, result);
9992
9993 return result;
9994 }
9995
9996 void
9997 pmap_copy_page(
9998 ppnum_t psrc,
9999 ppnum_t pdst)
10000 {
10001 bcopy_phys((addr64_t) (ptoa(psrc)),
10002 (addr64_t) (ptoa(pdst)),
10003 PAGE_SIZE);
10004 }
10005
10006
10007 /*
10008 * pmap_copy_page copies the specified (machine independent) pages.
10009 */
10010 void
10011 pmap_copy_part_page(
10012 ppnum_t psrc,
10013 vm_offset_t src_offset,
10014 ppnum_t pdst,
10015 vm_offset_t dst_offset,
10016 vm_size_t len)
10017 {
10018 bcopy_phys((addr64_t) (ptoa(psrc) + src_offset),
10019 (addr64_t) (ptoa(pdst) + dst_offset),
10020 len);
10021 }
10022
10023
10024 /*
10025 * pmap_zero_page zeros the specified (machine independent) page.
10026 */
10027 void
10028 pmap_zero_page(
10029 ppnum_t pn)
10030 {
10031 assert(pn != vm_page_fictitious_addr);
10032 bzero_phys((addr64_t) ptoa(pn), PAGE_SIZE);
10033 }
10034
10035 /*
10036 * pmap_zero_part_page
10037 * zeros the specified (machine independent) part of a page.
10038 */
10039 void
10040 pmap_zero_part_page(
10041 ppnum_t pn,
10042 vm_offset_t offset,
10043 vm_size_t len)
10044 {
10045 assert(pn != vm_page_fictitious_addr);
10046 assert(offset + len <= PAGE_SIZE);
10047 bzero_phys((addr64_t) (ptoa(pn) + offset), len);
10048 }
10049
10050
10051 /*
10052 * nop in current arm implementation
10053 */
10054 void
10055 inval_copy_windows(
10056 __unused thread_t t)
10057 {
10058 }
10059
10060 void
10061 pmap_map_globals(
10062 void)
10063 {
10064 pt_entry_t *ptep, pte;
10065
10066 ptep = pmap_pte(kernel_pmap, LOWGLOBAL_ALIAS);
10067 assert(ptep != PT_ENTRY_NULL);
10068 assert(*ptep == ARM_PTE_EMPTY);
10069
10070 pte = pa_to_pte(ml_static_vtop((vm_offset_t)&lowGlo)) | AP_RONA | ARM_PTE_NX | ARM_PTE_PNX | ARM_PTE_AF | ARM_PTE_TYPE;
10071 #if __ARM_KERNEL_PROTECT__
10072 pte |= ARM_PTE_NG;
10073 #endif /* __ARM_KERNEL_PROTECT__ */
10074 pte |= ARM_PTE_ATTRINDX(CACHE_ATTRINDX_WRITEBACK);
10075 #if (__ARM_VMSA__ > 7)
10076 pte |= ARM_PTE_SH(SH_OUTER_MEMORY);
10077 #else
10078 pte |= ARM_PTE_SH;
10079 #endif
10080 *ptep = pte;
10081 FLUSH_PTE_RANGE(ptep, (ptep + 1));
10082 PMAP_UPDATE_TLBS(kernel_pmap, LOWGLOBAL_ALIAS, LOWGLOBAL_ALIAS + PAGE_SIZE, false);
10083 }
10084
10085 vm_offset_t
10086 pmap_cpu_windows_copy_addr(int cpu_num, unsigned int index)
10087 {
10088 if (__improbable(index >= CPUWINDOWS_MAX)) {
10089 panic("%s: invalid index %u", __func__, index);
10090 }
10091 return (vm_offset_t)(CPUWINDOWS_BASE + (PAGE_SIZE * ((CPUWINDOWS_MAX * cpu_num) + index)));
10092 }
10093
10094 MARK_AS_PMAP_TEXT static unsigned int
10095 pmap_map_cpu_windows_copy_internal(
10096 ppnum_t pn,
10097 vm_prot_t prot,
10098 unsigned int wimg_bits)
10099 {
10100 pt_entry_t *ptep = NULL, pte;
10101 pmap_cpu_data_t *pmap_cpu_data = pmap_get_cpu_data();
10102 unsigned int cpu_num;
10103 unsigned int i;
10104 vm_offset_t cpu_copywindow_vaddr = 0;
10105 bool need_strong_sync = false;
10106
10107 #if XNU_MONITOR || HAS_MILD_DSB
10108 unsigned int cacheattr = (!pa_valid(ptoa(pn)) ? pmap_cache_attributes(pn) : 0);
10109 need_strong_sync = ((cacheattr & PMAP_IO_RANGE_STRONG_SYNC) != 0);
10110 #endif
10111
10112 #if XNU_MONITOR
10113 #ifdef __ARM_COHERENT_IO__
10114 if (pa_valid(ptoa(pn)) && !pmap_ppl_disable) {
10115 panic("%s: attempted to map a managed page, "
10116 "pn=%u, prot=0x%x, wimg_bits=0x%x",
10117 __FUNCTION__,
10118 pn, prot, wimg_bits);
10119 }
10120 if (!pmap_ppl_disable && (cacheattr & PP_ATTR_MONITOR)) {
10121 panic("%s: attempt to map PPL-protected I/O address 0x%llx", __func__, (uint64_t)ptoa(pn));
10122 }
10123
10124 #else /* __ARM_COHERENT_IO__ */
10125 #error CPU copy windows are not properly supported with both the PPL and incoherent IO
10126 #endif /* __ARM_COHERENT_IO__ */
10127 #endif /* XNU_MONITOR */
10128 cpu_num = pmap_cpu_data->cpu_number;
10129
10130 for (i = 0; i < CPUWINDOWS_MAX; i++) {
10131 cpu_copywindow_vaddr = pmap_cpu_windows_copy_addr(cpu_num, i);
10132 ptep = pmap_pte(kernel_pmap, cpu_copywindow_vaddr);
10133 assert(!ARM_PTE_IS_COMPRESSED(*ptep, ptep));
10134 if (*ptep == ARM_PTE_TYPE_FAULT) {
10135 break;
10136 }
10137 }
10138 if (i == CPUWINDOWS_MAX) {
10139 panic("pmap_map_cpu_windows_copy: out of window\n");
10140 }
10141
10142 pte = pa_to_pte(ptoa(pn)) | ARM_PTE_TYPE | ARM_PTE_AF | ARM_PTE_NX | ARM_PTE_PNX;
10143 #if __ARM_KERNEL_PROTECT__
10144 pte |= ARM_PTE_NG;
10145 #endif /* __ARM_KERNEL_PROTECT__ */
10146
10147 pte |= wimg_to_pte(wimg_bits);
10148
10149 if (prot & VM_PROT_WRITE) {
10150 pte |= ARM_PTE_AP(AP_RWNA);
10151 } else {
10152 pte |= ARM_PTE_AP(AP_RONA);
10153 }
10154
10155 WRITE_PTE_FAST(ptep, pte);
10156 /*
10157 * Invalidate tlb. Cover nested cpu_copywindow_vaddr usage with the interrupted context
10158 * in pmap_unmap_cpu_windows_copy() after clearing the pte and before tlb invalidate.
10159 */
10160 FLUSH_PTE_STRONG(ptep);
10161 PMAP_UPDATE_TLBS(kernel_pmap, cpu_copywindow_vaddr, cpu_copywindow_vaddr + PAGE_SIZE, pmap_cpu_data->copywindow_strong_sync[i]);
10162 pmap_cpu_data->copywindow_strong_sync[i] = need_strong_sync;
10163
10164 return i;
10165 }
10166
10167 unsigned int
10168 pmap_map_cpu_windows_copy(
10169 ppnum_t pn,
10170 vm_prot_t prot,
10171 unsigned int wimg_bits)
10172 {
10173 #if XNU_MONITOR
10174 return pmap_map_cpu_windows_copy_ppl(pn, prot, wimg_bits);
10175 #else
10176 return pmap_map_cpu_windows_copy_internal(pn, prot, wimg_bits);
10177 #endif
10178 }
10179
10180 MARK_AS_PMAP_TEXT static void
10181 pmap_unmap_cpu_windows_copy_internal(
10182 unsigned int index)
10183 {
10184 pt_entry_t *ptep;
10185 unsigned int cpu_num;
10186 vm_offset_t cpu_copywindow_vaddr = 0;
10187 pmap_cpu_data_t *pmap_cpu_data = pmap_get_cpu_data();
10188
10189 cpu_num = pmap_cpu_data->cpu_number;
10190
10191 cpu_copywindow_vaddr = pmap_cpu_windows_copy_addr(cpu_num, index);
10192 /* Issue full-system DSB to ensure prior operations on the per-CPU window
10193 * (which are likely to have been on I/O memory) are complete before
10194 * tearing down the mapping. */
10195 __builtin_arm_dsb(DSB_SY);
10196 ptep = pmap_pte(kernel_pmap, cpu_copywindow_vaddr);
10197 WRITE_PTE_STRONG(ptep, ARM_PTE_TYPE_FAULT);
10198 PMAP_UPDATE_TLBS(kernel_pmap, cpu_copywindow_vaddr, cpu_copywindow_vaddr + PAGE_SIZE, pmap_cpu_data->copywindow_strong_sync[index]);
10199 }
10200
10201 void
10202 pmap_unmap_cpu_windows_copy(
10203 unsigned int index)
10204 {
10205 #if XNU_MONITOR
10206 return pmap_unmap_cpu_windows_copy_ppl(index);
10207 #else
10208 return pmap_unmap_cpu_windows_copy_internal(index);
10209 #endif
10210 }
10211
10212 /*
10213 * Indicate that a pmap is intended to be used as a nested pmap
10214 * within one or more larger address spaces. This must be set
10215 * before pmap_nest() is called with this pmap as the 'subordinate'.
10216 */
10217 MARK_AS_PMAP_TEXT static void
10218 pmap_set_nested_internal(
10219 pmap_t pmap)
10220 {
10221 VALIDATE_PMAP(pmap);
10222 pmap->nested = TRUE;
10223 }
10224
10225 void
10226 pmap_set_nested(
10227 pmap_t pmap)
10228 {
10229 #if XNU_MONITOR
10230 pmap_set_nested_ppl(pmap);
10231 #else
10232 pmap_set_nested_internal(pmap);
10233 #endif
10234 }
10235
10236 /*
10237 * pmap_trim_range(pmap, start, end)
10238 *
10239 * pmap = pmap to operate on
10240 * start = start of the range
10241 * end = end of the range
10242 *
10243 * Attempts to deallocate TTEs for the given range in the nested range.
10244 */
10245 MARK_AS_PMAP_TEXT static void
10246 pmap_trim_range(
10247 pmap_t pmap,
10248 addr64_t start,
10249 addr64_t end)
10250 {
10251 addr64_t cur;
10252 addr64_t nested_region_start;
10253 addr64_t nested_region_end;
10254 addr64_t adjusted_start;
10255 addr64_t adjusted_end;
10256 addr64_t adjust_offmask;
10257 tt_entry_t * tte_p;
10258 pt_entry_t * pte_p;
10259 __unused const pt_attr_t * const pt_attr = pmap_get_pt_attr(pmap);
10260
10261 if (__improbable(end < start)) {
10262 panic("%s: invalid address range, "
10263 "pmap=%p, start=%p, end=%p",
10264 __func__,
10265 pmap, (void*)start, (void*)end);
10266 }
10267
10268 nested_region_start = pmap->nested ? pmap->nested_region_subord_addr : pmap->nested_region_subord_addr;
10269 nested_region_end = nested_region_start + pmap->nested_region_size;
10270
10271 if (__improbable((start < nested_region_start) || (end > nested_region_end))) {
10272 panic("%s: range outside nested region %p-%p, "
10273 "pmap=%p, start=%p, end=%p",
10274 __func__, (void *)nested_region_start, (void *)nested_region_end,
10275 pmap, (void*)start, (void*)end);
10276 }
10277
10278 /* Contract the range to TT page boundaries. */
10279 adjust_offmask = pt_attr_leaf_table_offmask(pt_attr);
10280 adjusted_start = ((start + adjust_offmask) & ~adjust_offmask);
10281 adjusted_end = end & ~adjust_offmask;
10282 bool modified = false;
10283
10284 /* Iterate over the range, trying to remove TTEs. */
10285 for (cur = adjusted_start; (cur < adjusted_end) && (cur >= adjusted_start); cur += pt_attr_twig_size(pt_attr)) {
10286 PMAP_LOCK(pmap);
10287
10288 tte_p = pmap_tte(pmap, cur);
10289
10290 if (tte_p == (tt_entry_t *) NULL) {
10291 goto done;
10292 }
10293
10294 if ((*tte_p & ARM_TTE_TYPE_MASK) == ARM_TTE_TYPE_TABLE) {
10295 pte_p = (pt_entry_t *) ttetokv(*tte_p);
10296
10297 if ((ptep_get_ptd(pte_p)->ptd_info[ARM_PT_DESC_INDEX(pte_p)].refcnt == 0) &&
10298 (pmap != kernel_pmap)) {
10299 if (pmap->nested == TRUE) {
10300 /* Deallocate for the nested map. */
10301 pmap_tte_deallocate(pmap, tte_p, pt_attr_twig_level(pt_attr));
10302 } else {
10303 /* Just remove for the parent map. */
10304 pmap_tte_remove(pmap, tte_p, pt_attr_twig_level(pt_attr));
10305 }
10306
10307 pmap_get_pt_ops(pmap)->flush_tlb_tte_async(cur, pmap);
10308 modified = true;
10309 }
10310 }
10311
10312 done:
10313 PMAP_UNLOCK(pmap);
10314 }
10315
10316 if (modified) {
10317 sync_tlb_flush();
10318 }
10319
10320 #if (__ARM_VMSA__ > 7)
10321 /* Remove empty L2 TTs. */
10322 adjusted_start = ((start + ARM_TT_L1_OFFMASK) & ~ARM_TT_L1_OFFMASK);
10323 adjusted_end = end & ~ARM_TT_L1_OFFMASK;
10324
10325 for (cur = adjusted_start; (cur < adjusted_end) && (cur >= adjusted_start); cur += ARM_TT_L1_SIZE) {
10326 /* For each L1 entry in our range... */
10327 PMAP_LOCK(pmap);
10328
10329 bool remove_tt1e = true;
10330 tt_entry_t * tt1e_p = pmap_tt1e(pmap, cur);
10331 tt_entry_t * tt2e_start;
10332 tt_entry_t * tt2e_end;
10333 tt_entry_t * tt2e_p;
10334 tt_entry_t tt1e;
10335
10336 if (tt1e_p == NULL) {
10337 PMAP_UNLOCK(pmap);
10338 continue;
10339 }
10340
10341 tt1e = *tt1e_p;
10342
10343 if (tt1e == ARM_TTE_TYPE_FAULT) {
10344 PMAP_UNLOCK(pmap);
10345 continue;
10346 }
10347
10348 tt2e_start = &((tt_entry_t*) phystokv(tt1e & ARM_TTE_TABLE_MASK))[0];
10349 tt2e_end = &tt2e_start[TTE_PGENTRIES];
10350
10351 for (tt2e_p = tt2e_start; tt2e_p < tt2e_end; tt2e_p++) {
10352 if (*tt2e_p != ARM_TTE_TYPE_FAULT) {
10353 /*
10354 * If any TTEs are populated, don't remove the
10355 * L1 TT.
10356 */
10357 remove_tt1e = false;
10358 }
10359 }
10360
10361 if (remove_tt1e) {
10362 pmap_tte_deallocate(pmap, tt1e_p, PMAP_TT_L1_LEVEL);
10363 PMAP_UPDATE_TLBS(pmap, cur, cur + PAGE_SIZE, false);
10364 }
10365
10366 PMAP_UNLOCK(pmap);
10367 }
10368 #endif /* (__ARM_VMSA__ > 7) */
10369 }
10370
10371 /*
10372 * pmap_trim_internal(grand, subord, vstart, nstart, size)
10373 *
10374 * grand = pmap subord is nested in
10375 * subord = nested pmap
10376 * vstart = start of the used range in grand
10377 * nstart = start of the used range in nstart
10378 * size = size of the used range
10379 *
10380 * Attempts to trim the shared region page tables down to only cover the given
10381 * range in subord and grand.
10382 */
10383 MARK_AS_PMAP_TEXT static void
10384 pmap_trim_internal(
10385 pmap_t grand,
10386 pmap_t subord,
10387 addr64_t vstart,
10388 addr64_t nstart,
10389 uint64_t size)
10390 {
10391 addr64_t vend, nend;
10392 addr64_t adjust_offmask;
10393
10394 if (__improbable(os_add_overflow(vstart, size, &vend))) {
10395 panic("%s: grand addr wraps around, "
10396 "grand=%p, subord=%p, vstart=%p, nstart=%p, size=%#llx",
10397 __func__, grand, subord, (void*)vstart, (void*)nstart, size);
10398 }
10399
10400 if (__improbable(os_add_overflow(nstart, size, &nend))) {
10401 panic("%s: nested addr wraps around, "
10402 "grand=%p, subord=%p, vstart=%p, nstart=%p, size=%#llx",
10403 __func__, grand, subord, (void*)vstart, (void*)nstart, size);
10404 }
10405
10406 VALIDATE_PMAP(grand);
10407 VALIDATE_PMAP(subord);
10408
10409 __unused const pt_attr_t * const pt_attr = pmap_get_pt_attr(grand);
10410
10411 PMAP_LOCK(subord);
10412
10413 if (!subord->nested) {
10414 panic("%s: subord is not nestable, "
10415 "grand=%p, subord=%p, vstart=%p, nstart=%p, size=%#llx",
10416 __func__, grand, subord, (void*)vstart, (void*)nstart, size);
10417 }
10418
10419 if (grand->nested) {
10420 panic("%s: grand is nestable, "
10421 "grand=%p, subord=%p, vstart=%p, nstart=%p, size=%#llx",
10422 __func__, grand, subord, (void*)vstart, (void*)nstart, size);
10423 }
10424
10425 if (grand->nested_pmap != subord) {
10426 panic("%s: grand->nested != subord, "
10427 "grand=%p, subord=%p, vstart=%p, nstart=%p, size=%#llx",
10428 __func__, grand, subord, (void*)vstart, (void*)nstart, size);
10429 }
10430
10431 if (size != 0) {
10432 if ((vstart < grand->nested_region_grand_addr) || (vend > (grand->nested_region_grand_addr + grand->nested_region_size))) {
10433 panic("%s: grand range not in nested region, "
10434 "grand=%p, subord=%p, vstart=%p, nstart=%p, size=%#llx",
10435 __func__, grand, subord, (void*)vstart, (void*)nstart, size);
10436 }
10437
10438 if ((nstart < grand->nested_region_grand_addr) || (nend > (grand->nested_region_grand_addr + grand->nested_region_size))) {
10439 panic("%s: subord range not in nested region, "
10440 "grand=%p, subord=%p, vstart=%p, nstart=%p, size=%#llx",
10441 __func__, grand, subord, (void*)vstart, (void*)nstart, size);
10442 }
10443 }
10444
10445
10446 if (!grand->nested_has_no_bounds_ref) {
10447 assert(subord->nested_bounds_set);
10448
10449 if (!grand->nested_bounds_set) {
10450 /* Inherit the bounds from subord. */
10451 grand->nested_region_true_start = (subord->nested_region_true_start - grand->nested_region_subord_addr) + grand->nested_region_grand_addr;
10452 grand->nested_region_true_end = (subord->nested_region_true_end - grand->nested_region_subord_addr) + grand->nested_region_grand_addr;
10453 grand->nested_bounds_set = true;
10454 }
10455
10456 PMAP_UNLOCK(subord);
10457 return;
10458 }
10459
10460 if ((!subord->nested_bounds_set) && size) {
10461 adjust_offmask = pt_attr_leaf_table_offmask(pt_attr);
10462
10463 subord->nested_region_true_start = nstart;
10464 subord->nested_region_true_end = nend;
10465 subord->nested_region_true_start &= ~adjust_offmask;
10466
10467 if (__improbable(os_add_overflow(subord->nested_region_true_end, adjust_offmask, &subord->nested_region_true_end))) {
10468 panic("%s: padded true end wraps around, "
10469 "grand=%p, subord=%p, vstart=%p, nstart=%p, size=%#llx",
10470 __func__, grand, subord, (void*)vstart, (void*)nstart, size);
10471 }
10472
10473 subord->nested_region_true_end &= ~adjust_offmask;
10474 subord->nested_bounds_set = true;
10475 }
10476
10477 if (subord->nested_bounds_set) {
10478 /* Inherit the bounds from subord. */
10479 grand->nested_region_true_start = (subord->nested_region_true_start - grand->nested_region_subord_addr) + grand->nested_region_grand_addr;
10480 grand->nested_region_true_end = (subord->nested_region_true_end - grand->nested_region_subord_addr) + grand->nested_region_grand_addr;
10481 grand->nested_bounds_set = true;
10482
10483 /* If we know the bounds, we can trim the pmap. */
10484 grand->nested_has_no_bounds_ref = false;
10485 PMAP_UNLOCK(subord);
10486 } else {
10487 /* Don't trim if we don't know the bounds. */
10488 PMAP_UNLOCK(subord);
10489 return;
10490 }
10491
10492 /* Trim grand to only cover the given range. */
10493 pmap_trim_range(grand, grand->nested_region_grand_addr, grand->nested_region_true_start);
10494 pmap_trim_range(grand, grand->nested_region_true_end, (grand->nested_region_grand_addr + grand->nested_region_size));
10495
10496 /* Try to trim subord. */
10497 pmap_trim_subord(subord);
10498 }
10499
10500 MARK_AS_PMAP_TEXT static void
10501 pmap_trim_self(pmap_t pmap)
10502 {
10503 if (pmap->nested_has_no_bounds_ref && pmap->nested_pmap) {
10504 /* If we have a no bounds ref, we need to drop it. */
10505 PMAP_LOCK(pmap->nested_pmap);
10506 pmap->nested_has_no_bounds_ref = false;
10507 boolean_t nested_bounds_set = pmap->nested_pmap->nested_bounds_set;
10508 vm_map_offset_t nested_region_true_start = (pmap->nested_pmap->nested_region_true_start - pmap->nested_region_subord_addr) + pmap->nested_region_grand_addr;
10509 vm_map_offset_t nested_region_true_end = (pmap->nested_pmap->nested_region_true_end - pmap->nested_region_subord_addr) + pmap->nested_region_grand_addr;
10510 PMAP_UNLOCK(pmap->nested_pmap);
10511
10512 if (nested_bounds_set) {
10513 pmap_trim_range(pmap, pmap->nested_region_grand_addr, nested_region_true_start);
10514 pmap_trim_range(pmap, nested_region_true_end, (pmap->nested_region_grand_addr + pmap->nested_region_size));
10515 }
10516 /*
10517 * Try trimming the nested pmap, in case we had the
10518 * last reference.
10519 */
10520 pmap_trim_subord(pmap->nested_pmap);
10521 }
10522 }
10523
10524 /*
10525 * pmap_trim_subord(grand, subord)
10526 *
10527 * grand = pmap that we have nested subord in
10528 * subord = nested pmap we are attempting to trim
10529 *
10530 * Trims subord if possible
10531 */
10532 MARK_AS_PMAP_TEXT static void
10533 pmap_trim_subord(pmap_t subord)
10534 {
10535 bool contract_subord = false;
10536
10537 PMAP_LOCK(subord);
10538
10539 subord->nested_no_bounds_refcnt--;
10540
10541 if ((subord->nested_no_bounds_refcnt == 0) && (subord->nested_bounds_set)) {
10542 /* If this was the last no bounds reference, trim subord. */
10543 contract_subord = true;
10544 }
10545
10546 PMAP_UNLOCK(subord);
10547
10548 if (contract_subord) {
10549 pmap_trim_range(subord, subord->nested_region_subord_addr, subord->nested_region_true_start);
10550 pmap_trim_range(subord, subord->nested_region_true_end, subord->nested_region_subord_addr + subord->nested_region_size);
10551 }
10552 }
10553
10554 void
10555 pmap_trim(
10556 pmap_t grand,
10557 pmap_t subord,
10558 addr64_t vstart,
10559 addr64_t nstart,
10560 uint64_t size)
10561 {
10562 #if XNU_MONITOR
10563 pmap_trim_ppl(grand, subord, vstart, nstart, size);
10564
10565 pmap_ledger_check_balance(grand);
10566 pmap_ledger_check_balance(subord);
10567 #else
10568 pmap_trim_internal(grand, subord, vstart, nstart, size);
10569 #endif
10570 }
10571
10572 #if HAS_APPLE_PAC && XNU_MONITOR
10573 static void *
10574 pmap_sign_user_ptr_internal(void *value, ptrauth_key key, uint64_t discriminator)
10575 {
10576 void *res = NULL;
10577 boolean_t current_intr_state = ml_set_interrupts_enabled(FALSE);
10578
10579 ml_set_kernelkey_enabled(FALSE);
10580 switch (key) {
10581 case ptrauth_key_asia:
10582 res = ptrauth_sign_unauthenticated(value, ptrauth_key_asia, discriminator);
10583 break;
10584 case ptrauth_key_asda:
10585 res = ptrauth_sign_unauthenticated(value, ptrauth_key_asda, discriminator);
10586 break;
10587 default:
10588 panic("attempt to sign user pointer without process independent key");
10589 }
10590 ml_set_kernelkey_enabled(TRUE);
10591
10592 ml_set_interrupts_enabled(current_intr_state);
10593
10594 return res;
10595 }
10596
10597 void *
10598 pmap_sign_user_ptr(void *value, ptrauth_key key, uint64_t discriminator)
10599 {
10600 return pmap_sign_user_ptr_internal(value, key, discriminator);
10601 }
10602
10603 static void *
10604 pmap_auth_user_ptr_internal(void *value, ptrauth_key key, uint64_t discriminator)
10605 {
10606 if ((key != ptrauth_key_asia) && (key != ptrauth_key_asda)) {
10607 panic("attempt to auth user pointer without process independent key");
10608 }
10609
10610 void *res = NULL;
10611 boolean_t current_intr_state = ml_set_interrupts_enabled(FALSE);
10612
10613 ml_set_kernelkey_enabled(FALSE);
10614 res = ml_auth_ptr_unchecked(value, key, discriminator);
10615 ml_set_kernelkey_enabled(TRUE);
10616
10617 ml_set_interrupts_enabled(current_intr_state);
10618
10619 return res;
10620 }
10621
10622 void *
10623 pmap_auth_user_ptr(void *value, ptrauth_key key, uint64_t discriminator)
10624 {
10625 return pmap_auth_user_ptr_internal(value, key, discriminator);
10626 }
10627 #endif /* HAS_APPLE_PAC && XNU_MONITOR */
10628
10629 /*
10630 * kern_return_t pmap_nest(grand, subord, vstart, size)
10631 *
10632 * grand = the pmap that we will nest subord into
10633 * subord = the pmap that goes into the grand
10634 * vstart = start of range in pmap to be inserted
10635 * nstart = start of range in pmap nested pmap
10636 * size = Size of nest area (up to 16TB)
10637 *
10638 * Inserts a pmap into another. This is used to implement shared segments.
10639 *
10640 */
10641
10642 MARK_AS_PMAP_TEXT static kern_return_t
10643 pmap_nest_internal(
10644 pmap_t grand,
10645 pmap_t subord,
10646 addr64_t vstart,
10647 addr64_t nstart,
10648 uint64_t size)
10649 {
10650 kern_return_t kr = KERN_FAILURE;
10651 vm_map_offset_t vaddr, nvaddr;
10652 tt_entry_t *stte_p;
10653 tt_entry_t *gtte_p;
10654 unsigned int i;
10655 unsigned int num_tte;
10656 unsigned int nested_region_asid_bitmap_size;
10657 unsigned int* nested_region_asid_bitmap;
10658 int expand_options = 0;
10659
10660 addr64_t vend, nend;
10661 if (__improbable(os_add_overflow(vstart, size, &vend))) {
10662 panic("%s: %p grand addr wraps around: 0x%llx + 0x%llx", __func__, grand, vstart, size);
10663 }
10664 if (__improbable(os_add_overflow(nstart, size, &nend))) {
10665 panic("%s: %p nested addr wraps around: 0x%llx + 0x%llx", __func__, subord, nstart, size);
10666 }
10667
10668 VALIDATE_PMAP(grand);
10669 VALIDATE_PMAP(subord);
10670
10671 __unused const pt_attr_t * const pt_attr = pmap_get_pt_attr(grand);
10672 assert(pmap_get_pt_attr(subord) == pt_attr);
10673
10674 #if XNU_MONITOR
10675 expand_options |= PMAP_TT_ALLOCATE_NOWAIT;
10676 #endif
10677
10678 if (((size | vstart | nstart) & (pt_attr_leaf_table_offmask(pt_attr))) != 0x0ULL) {
10679 panic("pmap_nest() pmap %p unaligned nesting request 0x%llx, 0x%llx, 0x%llx\n", grand, vstart, nstart, size);
10680 }
10681
10682 if (!subord->nested) {
10683 panic("%s: subordinate pmap %p is not nestable", __func__, subord);
10684 }
10685
10686 if ((grand->nested_pmap != PMAP_NULL) && (grand->nested_pmap != subord)) {
10687 panic("pmap_nest() pmap %p has a nested pmap\n", grand);
10688 }
10689
10690 if (subord->nested_region_asid_bitmap == NULL) {
10691 nested_region_asid_bitmap_size = (unsigned int)(size >> pt_attr_twig_shift(pt_attr)) / (sizeof(unsigned int) * NBBY);
10692
10693 #if XNU_MONITOR
10694 pmap_paddr_t pa = 0;
10695
10696 if ((nested_region_asid_bitmap_size * sizeof(unsigned int)) > PAGE_SIZE) {
10697 panic("%s: nested_region_asid_bitmap_size=%u will not fit in a page, "
10698 "grand=%p, subord=%p, vstart=0x%llx, nstart=0x%llx, size=%llx",
10699 __FUNCTION__,
10700 nested_region_asid_bitmap_size,
10701 grand, subord, vstart, nstart, size);
10702 }
10703
10704 kr = pmap_pages_alloc(&pa, PAGE_SIZE, PMAP_PAGES_ALLOCATE_NOWAIT);
10705
10706 if (kr != KERN_SUCCESS) {
10707 return kr;
10708 }
10709
10710 assert(pa);
10711
10712 nested_region_asid_bitmap = (unsigned int *)phystokv(pa);
10713 #else
10714 nested_region_asid_bitmap = kalloc(nested_region_asid_bitmap_size * sizeof(unsigned int));
10715 #endif
10716 bzero(nested_region_asid_bitmap, nested_region_asid_bitmap_size * sizeof(unsigned int));
10717
10718 PMAP_LOCK(subord);
10719 if (subord->nested_region_asid_bitmap == NULL) {
10720 subord->nested_region_asid_bitmap = nested_region_asid_bitmap;
10721 subord->nested_region_asid_bitmap_size = nested_region_asid_bitmap_size;
10722 subord->nested_region_subord_addr = nstart;
10723 subord->nested_region_size = (mach_vm_offset_t) size;
10724 nested_region_asid_bitmap = NULL;
10725 }
10726 PMAP_UNLOCK(subord);
10727 if (nested_region_asid_bitmap != NULL) {
10728 #if XNU_MONITOR
10729 pmap_pages_free(kvtophys((vm_offset_t)nested_region_asid_bitmap), PAGE_SIZE);
10730 #else
10731 kfree(nested_region_asid_bitmap, nested_region_asid_bitmap_size * sizeof(unsigned int));
10732 #endif
10733 }
10734 }
10735 if ((subord->nested_region_subord_addr + subord->nested_region_size) < nend) {
10736 uint64_t new_size;
10737 unsigned int new_nested_region_asid_bitmap_size;
10738 unsigned int* new_nested_region_asid_bitmap;
10739
10740 nested_region_asid_bitmap = NULL;
10741 nested_region_asid_bitmap_size = 0;
10742 new_size = nend - subord->nested_region_subord_addr;
10743
10744 /* We explicitly add 1 to the bitmap allocation size in order to avoid issues with truncation. */
10745 new_nested_region_asid_bitmap_size = (unsigned int)((new_size >> pt_attr_twig_shift(pt_attr)) / (sizeof(unsigned int) * NBBY)) + 1;
10746
10747 #if XNU_MONITOR
10748 pmap_paddr_t pa = 0;
10749
10750 if ((new_nested_region_asid_bitmap_size * sizeof(unsigned int)) > PAGE_SIZE) {
10751 panic("%s: new_nested_region_asid_bitmap_size=%u will not fit in a page, "
10752 "grand=%p, subord=%p, vstart=0x%llx, nstart=0x%llx, size=%llx",
10753 __FUNCTION__,
10754 new_nested_region_asid_bitmap_size,
10755 grand, subord, vstart, nstart, size);
10756 }
10757
10758 kr = pmap_pages_alloc(&pa, PAGE_SIZE, PMAP_PAGES_ALLOCATE_NOWAIT);
10759
10760 if (kr != KERN_SUCCESS) {
10761 return kr;
10762 }
10763
10764 assert(pa);
10765
10766 new_nested_region_asid_bitmap = (unsigned int *)phystokv(pa);
10767 #else
10768 new_nested_region_asid_bitmap = kalloc(new_nested_region_asid_bitmap_size * sizeof(unsigned int));
10769 #endif
10770 PMAP_LOCK(subord);
10771 if (subord->nested_region_size < new_size) {
10772 bzero(new_nested_region_asid_bitmap, new_nested_region_asid_bitmap_size * sizeof(unsigned int));
10773 bcopy(subord->nested_region_asid_bitmap, new_nested_region_asid_bitmap, subord->nested_region_asid_bitmap_size);
10774 nested_region_asid_bitmap_size = subord->nested_region_asid_bitmap_size;
10775 nested_region_asid_bitmap = subord->nested_region_asid_bitmap;
10776 subord->nested_region_asid_bitmap = new_nested_region_asid_bitmap;
10777 subord->nested_region_asid_bitmap_size = new_nested_region_asid_bitmap_size;
10778 subord->nested_region_size = new_size;
10779 new_nested_region_asid_bitmap = NULL;
10780 }
10781 PMAP_UNLOCK(subord);
10782 if (nested_region_asid_bitmap != NULL)
10783 #if XNU_MONITOR
10784 {pmap_pages_free(kvtophys((vm_offset_t)nested_region_asid_bitmap), PAGE_SIZE);}
10785 #else
10786 { kfree(nested_region_asid_bitmap, nested_region_asid_bitmap_size * sizeof(unsigned int));}
10787 #endif
10788 if (new_nested_region_asid_bitmap != NULL)
10789 #if XNU_MONITOR
10790 {pmap_pages_free(kvtophys((vm_offset_t)new_nested_region_asid_bitmap), PAGE_SIZE);}
10791 #else
10792 { kfree(new_nested_region_asid_bitmap, new_nested_region_asid_bitmap_size * sizeof(unsigned int));}
10793 #endif
10794 }
10795
10796 PMAP_LOCK(subord);
10797 if (grand->nested_pmap == PMAP_NULL) {
10798 grand->nested_pmap = subord;
10799
10800 if (!subord->nested_bounds_set) {
10801 /*
10802 * We are nesting without the shared regions bounds
10803 * being known. We'll have to trim the pmap later.
10804 */
10805 grand->nested_has_no_bounds_ref = true;
10806 subord->nested_no_bounds_refcnt++;
10807 }
10808
10809 grand->nested_region_grand_addr = vstart;
10810 grand->nested_region_subord_addr = nstart;
10811 grand->nested_region_size = (mach_vm_offset_t) size;
10812 } else {
10813 if ((grand->nested_region_grand_addr > vstart)) {
10814 panic("pmap_nest() pmap %p : attempt to nest outside the nested region\n", grand);
10815 } else if ((grand->nested_region_grand_addr + grand->nested_region_size) < vend) {
10816 grand->nested_region_size = (mach_vm_offset_t)(vstart - grand->nested_region_grand_addr + size);
10817 }
10818 }
10819
10820 #if (__ARM_VMSA__ == 7)
10821 nvaddr = (vm_map_offset_t) nstart;
10822 vaddr = (vm_map_offset_t) vstart;
10823 num_tte = size >> ARM_TT_L1_SHIFT;
10824
10825 for (i = 0; i < num_tte; i++) {
10826 if (((subord->nested_region_true_start) > nvaddr) || ((subord->nested_region_true_end) <= nvaddr)) {
10827 goto expand_next;
10828 }
10829
10830 stte_p = pmap_tte(subord, nvaddr);
10831 if ((stte_p == (tt_entry_t *)NULL) || (((*stte_p) & ARM_TTE_TYPE_MASK) != ARM_TTE_TYPE_TABLE)) {
10832 PMAP_UNLOCK(subord);
10833 kr = pmap_expand(subord, nvaddr, expand_options, PMAP_TT_L2_LEVEL);
10834
10835 if (kr != KERN_SUCCESS) {
10836 PMAP_LOCK(grand);
10837 goto done;
10838 }
10839
10840 PMAP_LOCK(subord);
10841 }
10842 PMAP_UNLOCK(subord);
10843 PMAP_LOCK(grand);
10844 stte_p = pmap_tte(grand, vaddr);
10845 if (stte_p == (tt_entry_t *)NULL) {
10846 PMAP_UNLOCK(grand);
10847 kr = pmap_expand(grand, vaddr, expand_options, PMAP_TT_L1_LEVEL);
10848
10849 if (kr != KERN_SUCCESS) {
10850 PMAP_LOCK(grand);
10851 goto done;
10852 }
10853 } else {
10854 PMAP_UNLOCK(grand);
10855 kr = KERN_SUCCESS;
10856 }
10857 PMAP_LOCK(subord);
10858
10859 expand_next:
10860 nvaddr += ARM_TT_L1_SIZE;
10861 vaddr += ARM_TT_L1_SIZE;
10862 }
10863
10864 #else
10865 nvaddr = (vm_map_offset_t) nstart;
10866 num_tte = (unsigned int)(size >> pt_attr_twig_shift(pt_attr));
10867
10868 for (i = 0; i < num_tte; i++) {
10869 if (((subord->nested_region_true_start) > nvaddr) || ((subord->nested_region_true_end) <= nvaddr)) {
10870 goto expand_next;
10871 }
10872
10873 stte_p = pmap_tte(subord, nvaddr);
10874 if (stte_p == PT_ENTRY_NULL || *stte_p == ARM_TTE_EMPTY) {
10875 PMAP_UNLOCK(subord);
10876 kr = pmap_expand(subord, nvaddr, expand_options, PMAP_TT_LEAF_LEVEL);
10877
10878 if (kr != KERN_SUCCESS) {
10879 PMAP_LOCK(grand);
10880 goto done;
10881 }
10882
10883 PMAP_LOCK(subord);
10884 }
10885 expand_next:
10886 nvaddr += pt_attr_twig_size(pt_attr);
10887 }
10888 #endif
10889 PMAP_UNLOCK(subord);
10890
10891 /*
10892 * copy tte's from subord pmap into grand pmap
10893 */
10894
10895 PMAP_LOCK(grand);
10896 nvaddr = (vm_map_offset_t) nstart;
10897 vaddr = (vm_map_offset_t) vstart;
10898
10899
10900 #if (__ARM_VMSA__ == 7)
10901 for (i = 0; i < num_tte; i++) {
10902 if (((subord->nested_region_true_start) > nvaddr) || ((subord->nested_region_true_end) <= nvaddr)) {
10903 goto nest_next;
10904 }
10905
10906 stte_p = pmap_tte(subord, nvaddr);
10907 gtte_p = pmap_tte(grand, vaddr);
10908 *gtte_p = *stte_p;
10909
10910 nest_next:
10911 nvaddr += ARM_TT_L1_SIZE;
10912 vaddr += ARM_TT_L1_SIZE;
10913 }
10914 #else
10915 for (i = 0; i < num_tte; i++) {
10916 if (((subord->nested_region_true_start) > nvaddr) || ((subord->nested_region_true_end) <= nvaddr)) {
10917 goto nest_next;
10918 }
10919
10920 stte_p = pmap_tte(subord, nvaddr);
10921 gtte_p = pmap_tte(grand, vaddr);
10922 if (gtte_p == PT_ENTRY_NULL) {
10923 PMAP_UNLOCK(grand);
10924 kr = pmap_expand(grand, vaddr, expand_options, PMAP_TT_TWIG_LEVEL);
10925 PMAP_LOCK(grand);
10926
10927 if (kr != KERN_SUCCESS) {
10928 goto done;
10929 }
10930
10931 gtte_p = pmap_tt2e(grand, vaddr);
10932 }
10933 *gtte_p = *stte_p;
10934
10935 nest_next:
10936 vaddr += pt_attr_twig_size(pt_attr);
10937 nvaddr += pt_attr_twig_size(pt_attr);
10938 }
10939 #endif
10940
10941 kr = KERN_SUCCESS;
10942 done:
10943
10944 stte_p = pmap_tte(grand, vstart);
10945 FLUSH_PTE_RANGE_STRONG(stte_p, stte_p + num_tte);
10946
10947 #if (__ARM_VMSA__ > 7)
10948 /*
10949 * check for overflow on LP64 arch
10950 */
10951 assert((size & 0xFFFFFFFF00000000ULL) == 0);
10952 #endif
10953 PMAP_UPDATE_TLBS(grand, vstart, vend, false);
10954
10955 PMAP_UNLOCK(grand);
10956 return kr;
10957 }
10958
10959 kern_return_t
10960 pmap_nest(
10961 pmap_t grand,
10962 pmap_t subord,
10963 addr64_t vstart,
10964 addr64_t nstart,
10965 uint64_t size)
10966 {
10967 kern_return_t kr = KERN_FAILURE;
10968
10969 PMAP_TRACE(2, PMAP_CODE(PMAP__NEST) | DBG_FUNC_START,
10970 VM_KERNEL_ADDRHIDE(grand), VM_KERNEL_ADDRHIDE(subord),
10971 VM_KERNEL_ADDRHIDE(vstart));
10972
10973 #if XNU_MONITOR
10974 while ((kr = pmap_nest_ppl(grand, subord, vstart, nstart, size)) == KERN_RESOURCE_SHORTAGE) {
10975 pmap_alloc_page_for_ppl();
10976 }
10977
10978 pmap_ledger_check_balance(grand);
10979 pmap_ledger_check_balance(subord);
10980 #else
10981 kr = pmap_nest_internal(grand, subord, vstart, nstart, size);
10982 #endif
10983
10984 PMAP_TRACE(2, PMAP_CODE(PMAP__NEST) | DBG_FUNC_END, kr);
10985
10986 return kr;
10987 }
10988
10989 /*
10990 * kern_return_t pmap_unnest(grand, vaddr)
10991 *
10992 * grand = the pmap that will have the virtual range unnested
10993 * vaddr = start of range in pmap to be unnested
10994 * size = size of range in pmap to be unnested
10995 *
10996 */
10997
10998 kern_return_t
10999 pmap_unnest(
11000 pmap_t grand,
11001 addr64_t vaddr,
11002 uint64_t size)
11003 {
11004 return pmap_unnest_options(grand, vaddr, size, 0);
11005 }
11006
11007 MARK_AS_PMAP_TEXT static kern_return_t
11008 pmap_unnest_options_internal(
11009 pmap_t grand,
11010 addr64_t vaddr,
11011 uint64_t size,
11012 unsigned int option)
11013 {
11014 vm_map_offset_t start;
11015 vm_map_offset_t addr;
11016 tt_entry_t *tte_p;
11017 unsigned int current_index;
11018 unsigned int start_index;
11019 unsigned int max_index;
11020 unsigned int num_tte;
11021 unsigned int i;
11022
11023 addr64_t vend;
11024 if (__improbable(os_add_overflow(vaddr, size, &vend))) {
11025 panic("%s: %p vaddr wraps around: 0x%llx + 0x%llx", __func__, grand, vaddr, size);
11026 }
11027
11028 VALIDATE_PMAP(grand);
11029
11030 __unused const pt_attr_t * const pt_attr = pmap_get_pt_attr(grand);
11031
11032 if (((size | vaddr) & pt_attr_twig_offmask(pt_attr)) != 0x0ULL) {
11033 panic("pmap_unnest(): unaligned request");
11034 }
11035
11036 if ((option & PMAP_UNNEST_CLEAN) == 0) {
11037 if (grand->nested_pmap == NULL) {
11038 panic("%s: %p has no nested pmap", __func__, grand);
11039 }
11040
11041 if ((vaddr < grand->nested_region_grand_addr) || (vend > (grand->nested_region_grand_addr + grand->nested_region_size))) {
11042 panic("%s: %p: unnest request to region not-fully-nested region [%p, %p)", __func__, grand, (void*)vaddr, (void*)vend);
11043 }
11044
11045 PMAP_LOCK(grand->nested_pmap);
11046
11047 start = vaddr - grand->nested_region_grand_addr + grand->nested_region_subord_addr;
11048 start_index = (unsigned int)((vaddr - grand->nested_region_grand_addr) >> pt_attr_twig_shift(pt_attr));
11049 max_index = (unsigned int)(start_index + (size >> pt_attr_twig_shift(pt_attr)));
11050 num_tte = (unsigned int)(size >> pt_attr_twig_shift(pt_attr));
11051
11052 for (current_index = start_index, addr = start; current_index < max_index; current_index++, addr += pt_attr_twig_size(pt_attr)) {
11053 pt_entry_t *bpte, *epte, *cpte;
11054
11055 if (addr < grand->nested_pmap->nested_region_true_start) {
11056 /* We haven't reached the interesting range. */
11057 continue;
11058 }
11059
11060 if (addr >= grand->nested_pmap->nested_region_true_end) {
11061 /* We're done with the interesting range. */
11062 break;
11063 }
11064
11065 bpte = pmap_pte(grand->nested_pmap, addr);
11066 epte = bpte + (pt_attr_leaf_index_mask(pt_attr) >> pt_attr_leaf_shift(pt_attr));
11067
11068 if (!testbit(current_index, (int *)grand->nested_pmap->nested_region_asid_bitmap)) {
11069 setbit(current_index, (int *)grand->nested_pmap->nested_region_asid_bitmap);
11070
11071 for (cpte = bpte; cpte <= epte; cpte++) {
11072 pmap_paddr_t pa;
11073 int pai = 0;
11074 boolean_t managed = FALSE;
11075 pt_entry_t spte;
11076
11077 if ((*cpte != ARM_PTE_TYPE_FAULT)
11078 && (!ARM_PTE_IS_COMPRESSED(*cpte, cpte))) {
11079 spte = *cpte;
11080 while (!managed) {
11081 pa = pte_to_pa(spte);
11082 if (!pa_valid(pa)) {
11083 break;
11084 }
11085 pai = (int)pa_index(pa);
11086 LOCK_PVH(pai);
11087 spte = *cpte;
11088 pa = pte_to_pa(spte);
11089 if (pai == (int)pa_index(pa)) {
11090 managed = TRUE;
11091 break; // Leave the PVH locked as we'll unlock it after we update the PTE
11092 }
11093 UNLOCK_PVH(pai);
11094 }
11095
11096 if (((spte & ARM_PTE_NG) != ARM_PTE_NG)) {
11097 WRITE_PTE_FAST(cpte, (spte | ARM_PTE_NG));
11098 }
11099
11100 if (managed) {
11101 ASSERT_PVH_LOCKED(pai);
11102 UNLOCK_PVH(pai);
11103 }
11104 }
11105 }
11106 }
11107
11108 FLUSH_PTE_RANGE_STRONG(bpte, epte);
11109 flush_mmu_tlb_region_asid_async(start, (unsigned)size, grand->nested_pmap);
11110 }
11111
11112 sync_tlb_flush();
11113
11114 PMAP_UNLOCK(grand->nested_pmap);
11115 }
11116
11117 PMAP_LOCK(grand);
11118
11119 /*
11120 * invalidate all pdes for segment at vaddr in pmap grand
11121 */
11122 start = vaddr;
11123 addr = vaddr;
11124
11125 num_tte = (unsigned int)(size >> pt_attr_twig_shift(pt_attr));
11126
11127 for (i = 0; i < num_tte; i++, addr += pt_attr_twig_size(pt_attr)) {
11128 if (addr < grand->nested_pmap->nested_region_true_start) {
11129 /* We haven't reached the interesting range. */
11130 continue;
11131 }
11132
11133 if (addr >= grand->nested_pmap->nested_region_true_end) {
11134 /* We're done with the interesting range. */
11135 break;
11136 }
11137
11138 tte_p = pmap_tte(grand, addr);
11139 *tte_p = ARM_TTE_TYPE_FAULT;
11140 }
11141
11142 tte_p = pmap_tte(grand, start);
11143 FLUSH_PTE_RANGE_STRONG(tte_p, tte_p + num_tte);
11144 PMAP_UPDATE_TLBS(grand, start, vend, false);
11145
11146 PMAP_UNLOCK(grand);
11147
11148 return KERN_SUCCESS;
11149 }
11150
11151 kern_return_t
11152 pmap_unnest_options(
11153 pmap_t grand,
11154 addr64_t vaddr,
11155 uint64_t size,
11156 unsigned int option)
11157 {
11158 kern_return_t kr = KERN_FAILURE;
11159
11160 PMAP_TRACE(2, PMAP_CODE(PMAP__UNNEST) | DBG_FUNC_START,
11161 VM_KERNEL_ADDRHIDE(grand), VM_KERNEL_ADDRHIDE(vaddr));
11162
11163 #if XNU_MONITOR
11164 kr = pmap_unnest_options_ppl(grand, vaddr, size, option);
11165 #else
11166 kr = pmap_unnest_options_internal(grand, vaddr, size, option);
11167 #endif
11168
11169 PMAP_TRACE(2, PMAP_CODE(PMAP__UNNEST) | DBG_FUNC_END, kr);
11170
11171 return kr;
11172 }
11173
11174 boolean_t
11175 pmap_adjust_unnest_parameters(
11176 __unused pmap_t p,
11177 __unused vm_map_offset_t *s,
11178 __unused vm_map_offset_t *e)
11179 {
11180 return TRUE; /* to get to log_unnest_badness()... */
11181 }
11182
11183 /*
11184 * disable no-execute capability on
11185 * the specified pmap
11186 */
11187 #if DEVELOPMENT || DEBUG
11188 void
11189 pmap_disable_NX(
11190 pmap_t pmap)
11191 {
11192 pmap->nx_enabled = FALSE;
11193 }
11194 #else
11195 void
11196 pmap_disable_NX(
11197 __unused pmap_t pmap)
11198 {
11199 }
11200 #endif
11201
11202 void
11203 pt_fake_zone_init(
11204 int zone_index)
11205 {
11206 pt_fake_zone_index = zone_index;
11207 }
11208
11209 void
11210 pt_fake_zone_info(
11211 int *count,
11212 vm_size_t *cur_size, vm_size_t *max_size, vm_size_t *elem_size, vm_size_t *alloc_size,
11213 uint64_t *sum_size, int *collectable, int *exhaustable, int *caller_acct)
11214 {
11215 *count = inuse_pmap_pages_count;
11216 *cur_size = PAGE_SIZE * (inuse_pmap_pages_count);
11217 *max_size = PAGE_SIZE * (inuse_pmap_pages_count + vm_page_inactive_count + vm_page_active_count + vm_page_free_count);
11218 *elem_size = PAGE_SIZE;
11219 *alloc_size = PAGE_SIZE;
11220 *sum_size = (alloc_pmap_pages_count) * PAGE_SIZE;
11221
11222 *collectable = 1;
11223 *exhaustable = 0;
11224 *caller_acct = 1;
11225 }
11226
11227 /*
11228 * flush a range of hardware TLB entries.
11229 * NOTE: assumes the smallest TLB entry in use will be for
11230 * an ARM small page (4K).
11231 */
11232
11233 #define ARM_FULL_TLB_FLUSH_THRESHOLD 64
11234
11235 #if __ARM_RANGE_TLBI__
11236 #define ARM64_RANGE_TLB_FLUSH_THRESHOLD 1
11237 #define ARM64_FULL_TLB_FLUSH_THRESHOLD ARM64_16K_TLB_RANGE_PAGES
11238 #else
11239 #define ARM64_FULL_TLB_FLUSH_THRESHOLD 256
11240 #endif // __ARM_RANGE_TLBI__
11241
11242 static void
11243 flush_mmu_tlb_region_asid_async(
11244 vm_offset_t va,
11245 unsigned length,
11246 pmap_t pmap)
11247 {
11248 #if (__ARM_VMSA__ == 7)
11249 vm_offset_t end = va + length;
11250 uint32_t asid;
11251
11252 asid = pmap->hw_asid;
11253
11254 if (length / ARM_SMALL_PAGE_SIZE > ARM_FULL_TLB_FLUSH_THRESHOLD) {
11255 boolean_t flush_all = FALSE;
11256
11257 if ((asid == 0) || (pmap->nested == TRUE)) {
11258 flush_all = TRUE;
11259 }
11260 if (flush_all) {
11261 flush_mmu_tlb_async();
11262 } else {
11263 flush_mmu_tlb_asid_async(asid);
11264 }
11265
11266 return;
11267 }
11268 if (pmap->nested == TRUE) {
11269 #if !__ARM_MP_EXT__
11270 flush_mmu_tlb();
11271 #else
11272 va = arm_trunc_page(va);
11273 while (va < end) {
11274 flush_mmu_tlb_mva_entries_async(va);
11275 va += ARM_SMALL_PAGE_SIZE;
11276 }
11277 #endif
11278 return;
11279 }
11280 va = arm_trunc_page(va) | (asid & 0xff);
11281 flush_mmu_tlb_entries_async(va, end);
11282
11283 #else
11284 unsigned npages = length >> pt_attr_leaf_shift(pmap_get_pt_attr(pmap));
11285 uint32_t asid;
11286
11287 asid = pmap->hw_asid;
11288
11289 if (npages > ARM64_FULL_TLB_FLUSH_THRESHOLD) {
11290 boolean_t flush_all = FALSE;
11291
11292 if ((asid == 0) || (pmap->nested == TRUE)) {
11293 flush_all = TRUE;
11294 }
11295 if (flush_all) {
11296 flush_mmu_tlb_async();
11297 } else {
11298 flush_mmu_tlb_asid_async((uint64_t)asid << TLBI_ASID_SHIFT);
11299 }
11300 return;
11301 }
11302 #if __ARM_RANGE_TLBI__
11303 if (npages > ARM64_RANGE_TLB_FLUSH_THRESHOLD) {
11304 va = generate_rtlbi_param(npages, asid, va);
11305 if (pmap->nested == TRUE) {
11306 flush_mmu_tlb_allrange_async(va);
11307 } else {
11308 flush_mmu_tlb_range_async(va);
11309 }
11310 return;
11311 }
11312 #endif
11313 vm_offset_t end = tlbi_asid(asid) | tlbi_addr(va + length);
11314 va = tlbi_asid(asid) | tlbi_addr(va);
11315 if (pmap->nested == TRUE) {
11316 flush_mmu_tlb_allentries_async(va, end);
11317 } else {
11318 flush_mmu_tlb_entries_async(va, end);
11319 }
11320
11321 #endif
11322 }
11323
11324 MARK_AS_PMAP_TEXT static void
11325 flush_mmu_tlb_tte_asid_async(vm_offset_t va, pmap_t pmap)
11326 {
11327 #if (__ARM_VMSA__ == 7)
11328 flush_mmu_tlb_entry_async((va & ~ARM_TT_L1_PT_OFFMASK) | (pmap->hw_asid & 0xff));
11329 flush_mmu_tlb_entry_async(((va & ~ARM_TT_L1_PT_OFFMASK) + ARM_TT_L1_SIZE) | (pmap->hw_asid & 0xff));
11330 flush_mmu_tlb_entry_async(((va & ~ARM_TT_L1_PT_OFFMASK) + 2 * ARM_TT_L1_SIZE) | (pmap->hw_asid & 0xff));
11331 flush_mmu_tlb_entry_async(((va & ~ARM_TT_L1_PT_OFFMASK) + 3 * ARM_TT_L1_SIZE) | (pmap->hw_asid & 0xff));
11332 #else
11333 flush_mmu_tlb_entry_async(tlbi_addr(va & ~pt_attr_twig_offmask(pmap_get_pt_attr(pmap))) | tlbi_asid(pmap->hw_asid));
11334 #endif
11335 }
11336
11337 MARK_AS_PMAP_TEXT static void
11338 flush_mmu_tlb_full_asid_async(pmap_t pmap)
11339 {
11340 #if (__ARM_VMSA__ == 7)
11341 flush_mmu_tlb_asid_async(pmap->hw_asid);
11342 #else /* (__ARM_VMSA__ == 7) */
11343 flush_mmu_tlb_asid_async((uint64_t)(pmap->hw_asid) << TLBI_ASID_SHIFT);
11344 #endif /* (__ARM_VMSA__ == 7) */
11345 }
11346
11347 void
11348 flush_mmu_tlb_region(
11349 vm_offset_t va,
11350 unsigned length)
11351 {
11352 flush_mmu_tlb_region_asid_async(va, length, kernel_pmap);
11353 sync_tlb_flush();
11354 }
11355
11356 static pmap_io_range_t*
11357 pmap_find_io_attr(pmap_paddr_t paddr)
11358 {
11359 pmap_io_range_t find_range = {.addr = paddr & ~PAGE_MASK, .len = PAGE_SIZE};
11360 unsigned int begin = 0, end = num_io_rgns - 1;
11361 if ((num_io_rgns == 0) || (paddr < io_attr_table[begin].addr) ||
11362 (paddr >= (io_attr_table[end].addr + io_attr_table[end].len))) {
11363 return NULL;
11364 }
11365
11366 for (;;) {
11367 unsigned int middle = (begin + end) / 2;
11368 int cmp = cmp_io_rgns(&find_range, &io_attr_table[middle]);
11369 if (cmp == 0) {
11370 return &io_attr_table[middle];
11371 } else if (begin == end) {
11372 break;
11373 } else if (cmp > 0) {
11374 begin = middle + 1;
11375 } else {
11376 end = middle;
11377 }
11378 }
11379
11380 return NULL;
11381 }
11382
11383 unsigned int
11384 pmap_cache_attributes(
11385 ppnum_t pn)
11386 {
11387 pmap_paddr_t paddr;
11388 int pai;
11389 unsigned int result;
11390 pp_attr_t pp_attr_current;
11391
11392 paddr = ptoa(pn);
11393
11394 assert(vm_last_phys > vm_first_phys); // Check that pmap has been bootstrapped
11395
11396 if (!pa_valid(paddr)) {
11397 pmap_io_range_t *io_rgn = pmap_find_io_attr(paddr);
11398 return (io_rgn == NULL) ? VM_WIMG_IO : io_rgn->wimg;
11399 }
11400
11401 result = VM_WIMG_DEFAULT;
11402
11403 pai = (int)pa_index(paddr);
11404
11405 pp_attr_current = pp_attr_table[pai];
11406 if (pp_attr_current & PP_ATTR_WIMG_MASK) {
11407 result = pp_attr_current & PP_ATTR_WIMG_MASK;
11408 }
11409 return result;
11410 }
11411
11412 MARK_AS_PMAP_TEXT static void
11413 pmap_sync_wimg(ppnum_t pn, unsigned int wimg_bits_prev, unsigned int wimg_bits_new)
11414 {
11415 if ((wimg_bits_prev != wimg_bits_new)
11416 && ((wimg_bits_prev == VM_WIMG_COPYBACK)
11417 || ((wimg_bits_prev == VM_WIMG_INNERWBACK)
11418 && (wimg_bits_new != VM_WIMG_COPYBACK))
11419 || ((wimg_bits_prev == VM_WIMG_WTHRU)
11420 && ((wimg_bits_new != VM_WIMG_COPYBACK) || (wimg_bits_new != VM_WIMG_INNERWBACK))))) {
11421 pmap_sync_page_attributes_phys(pn);
11422 }
11423
11424 if ((wimg_bits_new == VM_WIMG_RT) && (wimg_bits_prev != VM_WIMG_RT)) {
11425 pmap_force_dcache_clean(phystokv(ptoa(pn)), PAGE_SIZE);
11426 }
11427 }
11428
11429 MARK_AS_PMAP_TEXT static __unused void
11430 pmap_update_compressor_page_internal(ppnum_t pn, unsigned int prev_cacheattr, unsigned int new_cacheattr)
11431 {
11432 pmap_paddr_t paddr = ptoa(pn);
11433 int pai = (int)pa_index(paddr);
11434
11435 if (__improbable(!pa_valid(paddr))) {
11436 panic("%s called on non-managed page 0x%08x", __func__, pn);
11437 }
11438
11439 LOCK_PVH(pai);
11440
11441 #if XNU_MONITOR
11442 if (__improbable(pa_test_monitor(paddr))) {
11443 panic("%s invoked on PPL page 0x%08x", __func__, pn);
11444 }
11445 #endif
11446
11447 pmap_update_cache_attributes_locked(pn, new_cacheattr);
11448
11449 UNLOCK_PVH(pai);
11450
11451 pmap_sync_wimg(pn, prev_cacheattr & VM_WIMG_MASK, new_cacheattr & VM_WIMG_MASK);
11452 }
11453
11454 void *
11455 pmap_map_compressor_page(ppnum_t pn)
11456 {
11457 #if __ARM_PTE_PHYSMAP__
11458 unsigned int cacheattr = pmap_cache_attributes(pn) & VM_WIMG_MASK;
11459 if (cacheattr != VM_WIMG_DEFAULT) {
11460 #if XNU_MONITOR
11461 pmap_update_compressor_page_ppl(pn, cacheattr, VM_WIMG_DEFAULT);
11462 #else
11463 pmap_update_compressor_page_internal(pn, cacheattr, VM_WIMG_DEFAULT);
11464 #endif
11465 }
11466 #endif
11467 return (void*)phystokv(ptoa(pn));
11468 }
11469
11470 void
11471 pmap_unmap_compressor_page(ppnum_t pn __unused, void *kva __unused)
11472 {
11473 #if __ARM_PTE_PHYSMAP__
11474 unsigned int cacheattr = pmap_cache_attributes(pn) & VM_WIMG_MASK;
11475 if (cacheattr != VM_WIMG_DEFAULT) {
11476 #if XNU_MONITOR
11477 pmap_update_compressor_page_ppl(pn, VM_WIMG_DEFAULT, cacheattr);
11478 #else
11479 pmap_update_compressor_page_internal(pn, VM_WIMG_DEFAULT, cacheattr);
11480 #endif
11481 }
11482 #endif
11483 }
11484
11485 MARK_AS_PMAP_TEXT static boolean_t
11486 pmap_batch_set_cache_attributes_internal(
11487 ppnum_t pn,
11488 unsigned int cacheattr,
11489 unsigned int page_cnt,
11490 unsigned int page_index,
11491 boolean_t doit,
11492 unsigned int *res)
11493 {
11494 pmap_paddr_t paddr;
11495 int pai;
11496 pp_attr_t pp_attr_current;
11497 pp_attr_t pp_attr_template;
11498 unsigned int wimg_bits_prev, wimg_bits_new;
11499
11500 if (cacheattr & VM_WIMG_USE_DEFAULT) {
11501 cacheattr = VM_WIMG_DEFAULT;
11502 }
11503
11504 if ((doit == FALSE) && (*res == 0)) {
11505 pmap_pin_kernel_pages((vm_offset_t)res, sizeof(*res));
11506 *res = page_cnt;
11507 pmap_unpin_kernel_pages((vm_offset_t)res, sizeof(*res));
11508 if (platform_cache_batch_wimg(cacheattr & (VM_WIMG_MASK), page_cnt << PAGE_SHIFT) == FALSE) {
11509 return FALSE;
11510 }
11511 }
11512
11513 paddr = ptoa(pn);
11514
11515 if (!pa_valid(paddr)) {
11516 panic("pmap_batch_set_cache_attributes(): pn 0x%08x not managed", pn);
11517 }
11518
11519 pai = (int)pa_index(paddr);
11520
11521 if (doit) {
11522 LOCK_PVH(pai);
11523 #if XNU_MONITOR
11524 if (pa_test_monitor(paddr)) {
11525 panic("%s invoked on PPL page 0x%llx", __func__, (uint64_t)paddr);
11526 }
11527 #endif
11528 }
11529
11530 do {
11531 pp_attr_current = pp_attr_table[pai];
11532 wimg_bits_prev = VM_WIMG_DEFAULT;
11533 if (pp_attr_current & PP_ATTR_WIMG_MASK) {
11534 wimg_bits_prev = pp_attr_current & PP_ATTR_WIMG_MASK;
11535 }
11536
11537 pp_attr_template = (pp_attr_current & ~PP_ATTR_WIMG_MASK) | PP_ATTR_WIMG(cacheattr & (VM_WIMG_MASK));
11538
11539 if (!doit) {
11540 break;
11541 }
11542
11543 /* WIMG bits should only be updated under the PVH lock, but we should do this in a CAS loop
11544 * to avoid losing simultaneous updates to other bits like refmod. */
11545 } while (!OSCompareAndSwap16(pp_attr_current, pp_attr_template, &pp_attr_table[pai]));
11546
11547 wimg_bits_new = VM_WIMG_DEFAULT;
11548 if (pp_attr_template & PP_ATTR_WIMG_MASK) {
11549 wimg_bits_new = pp_attr_template & PP_ATTR_WIMG_MASK;
11550 }
11551
11552 if (doit) {
11553 if (wimg_bits_new != wimg_bits_prev) {
11554 pmap_update_cache_attributes_locked(pn, cacheattr);
11555 }
11556 UNLOCK_PVH(pai);
11557 if ((wimg_bits_new == VM_WIMG_RT) && (wimg_bits_prev != VM_WIMG_RT)) {
11558 pmap_force_dcache_clean(phystokv(paddr), PAGE_SIZE);
11559 }
11560 } else {
11561 if (wimg_bits_new == VM_WIMG_COPYBACK) {
11562 return FALSE;
11563 }
11564 if (wimg_bits_prev == wimg_bits_new) {
11565 pmap_pin_kernel_pages((vm_offset_t)res, sizeof(*res));
11566 *res = *res - 1;
11567 pmap_unpin_kernel_pages((vm_offset_t)res, sizeof(*res));
11568 if (!platform_cache_batch_wimg(wimg_bits_new, (*res) << PAGE_SHIFT)) {
11569 return FALSE;
11570 }
11571 }
11572 return TRUE;
11573 }
11574
11575 if (page_cnt == (page_index + 1)) {
11576 wimg_bits_prev = VM_WIMG_COPYBACK;
11577 if (((wimg_bits_prev != wimg_bits_new))
11578 && ((wimg_bits_prev == VM_WIMG_COPYBACK)
11579 || ((wimg_bits_prev == VM_WIMG_INNERWBACK)
11580 && (wimg_bits_new != VM_WIMG_COPYBACK))
11581 || ((wimg_bits_prev == VM_WIMG_WTHRU)
11582 && ((wimg_bits_new != VM_WIMG_COPYBACK) || (wimg_bits_new != VM_WIMG_INNERWBACK))))) {
11583 platform_cache_flush_wimg(wimg_bits_new);
11584 }
11585 }
11586
11587 return TRUE;
11588 }
11589
11590 boolean_t
11591 pmap_batch_set_cache_attributes(
11592 ppnum_t pn,
11593 unsigned int cacheattr,
11594 unsigned int page_cnt,
11595 unsigned int page_index,
11596 boolean_t doit,
11597 unsigned int *res)
11598 {
11599 #if XNU_MONITOR
11600 return pmap_batch_set_cache_attributes_ppl(pn, cacheattr, page_cnt, page_index, doit, res);
11601 #else
11602 return pmap_batch_set_cache_attributes_internal(pn, cacheattr, page_cnt, page_index, doit, res);
11603 #endif
11604 }
11605
11606 MARK_AS_PMAP_TEXT static void
11607 pmap_set_cache_attributes_priv(
11608 ppnum_t pn,
11609 unsigned int cacheattr,
11610 boolean_t external __unused)
11611 {
11612 pmap_paddr_t paddr;
11613 int pai;
11614 pp_attr_t pp_attr_current;
11615 pp_attr_t pp_attr_template;
11616 unsigned int wimg_bits_prev, wimg_bits_new;
11617
11618 paddr = ptoa(pn);
11619
11620 if (!pa_valid(paddr)) {
11621 return; /* Not a managed page. */
11622 }
11623
11624 if (cacheattr & VM_WIMG_USE_DEFAULT) {
11625 cacheattr = VM_WIMG_DEFAULT;
11626 }
11627
11628 pai = (int)pa_index(paddr);
11629
11630 LOCK_PVH(pai);
11631
11632 #if XNU_MONITOR
11633 if (external && pa_test_monitor(paddr)) {
11634 panic("%s invoked on PPL page 0x%llx", __func__, (uint64_t)paddr);
11635 } else if (!external && !pa_test_monitor(paddr)) {
11636 panic("%s invoked on non-PPL page 0x%llx", __func__, (uint64_t)paddr);
11637 }
11638 #endif
11639
11640 do {
11641 pp_attr_current = pp_attr_table[pai];
11642 wimg_bits_prev = VM_WIMG_DEFAULT;
11643 if (pp_attr_current & PP_ATTR_WIMG_MASK) {
11644 wimg_bits_prev = pp_attr_current & PP_ATTR_WIMG_MASK;
11645 }
11646
11647 pp_attr_template = (pp_attr_current & ~PP_ATTR_WIMG_MASK) | PP_ATTR_WIMG(cacheattr & (VM_WIMG_MASK));
11648
11649 /* WIMG bits should only be updated under the PVH lock, but we should do this in a CAS loop
11650 * to avoid losing simultaneous updates to other bits like refmod. */
11651 } while (!OSCompareAndSwap16(pp_attr_current, pp_attr_template, &pp_attr_table[pai]));
11652
11653 wimg_bits_new = VM_WIMG_DEFAULT;
11654 if (pp_attr_template & PP_ATTR_WIMG_MASK) {
11655 wimg_bits_new = pp_attr_template & PP_ATTR_WIMG_MASK;
11656 }
11657
11658 if (wimg_bits_new != wimg_bits_prev) {
11659 pmap_update_cache_attributes_locked(pn, cacheattr);
11660 }
11661
11662 UNLOCK_PVH(pai);
11663
11664 pmap_sync_wimg(pn, wimg_bits_prev, wimg_bits_new);
11665 }
11666
11667 MARK_AS_PMAP_TEXT static void
11668 pmap_set_cache_attributes_internal(
11669 ppnum_t pn,
11670 unsigned int cacheattr)
11671 {
11672 pmap_set_cache_attributes_priv(pn, cacheattr, TRUE);
11673 }
11674
11675 void
11676 pmap_set_cache_attributes(
11677 ppnum_t pn,
11678 unsigned int cacheattr)
11679 {
11680 #if XNU_MONITOR
11681 pmap_set_cache_attributes_ppl(pn, cacheattr);
11682 #else
11683 pmap_set_cache_attributes_internal(pn, cacheattr);
11684 #endif
11685 }
11686
11687 MARK_AS_PMAP_TEXT void
11688 pmap_update_cache_attributes_locked(
11689 ppnum_t ppnum,
11690 unsigned attributes)
11691 {
11692 pmap_paddr_t phys = ptoa(ppnum);
11693 pv_entry_t *pve_p;
11694 pt_entry_t *pte_p;
11695 pv_entry_t **pv_h;
11696 pt_entry_t tmplate;
11697 unsigned int pai;
11698 boolean_t tlb_flush_needed = FALSE;
11699
11700 PMAP_TRACE(2, PMAP_CODE(PMAP__UPDATE_CACHING) | DBG_FUNC_START, ppnum, attributes);
11701
11702 #if __ARM_PTE_PHYSMAP__
11703 vm_offset_t kva = phystokv(phys);
11704 pte_p = pmap_pte(kernel_pmap, kva);
11705
11706 tmplate = *pte_p;
11707 tmplate &= ~(ARM_PTE_ATTRINDXMASK | ARM_PTE_SHMASK);
11708 #if XNU_MONITOR
11709 tmplate |= (wimg_to_pte(attributes) & ~ARM_PTE_XPRR_MASK);
11710 #else
11711 tmplate |= wimg_to_pte(attributes);
11712 #endif
11713 #if (__ARM_VMSA__ > 7)
11714 if (tmplate & ARM_PTE_HINT_MASK) {
11715 panic("%s: physical aperture PTE %p has hint bit set, va=%p, pte=0x%llx",
11716 __FUNCTION__, pte_p, (void *)kva, tmplate);
11717 }
11718 #endif
11719 WRITE_PTE_STRONG(pte_p, tmplate);
11720 flush_mmu_tlb_region_asid_async(kva, PAGE_SIZE, kernel_pmap);
11721 tlb_flush_needed = TRUE;
11722 #endif
11723
11724 pai = (unsigned int)pa_index(phys);
11725
11726 pv_h = pai_to_pvh(pai);
11727
11728 pte_p = PT_ENTRY_NULL;
11729 pve_p = PV_ENTRY_NULL;
11730 if (pvh_test_type(pv_h, PVH_TYPE_PTEP)) {
11731 pte_p = pvh_ptep(pv_h);
11732 } else if (pvh_test_type(pv_h, PVH_TYPE_PVEP)) {
11733 pve_p = pvh_list(pv_h);
11734 pte_p = PT_ENTRY_NULL;
11735 }
11736
11737 while ((pve_p != PV_ENTRY_NULL) || (pte_p != PT_ENTRY_NULL)) {
11738 vm_map_address_t va;
11739 pmap_t pmap;
11740
11741 if (pve_p != PV_ENTRY_NULL) {
11742 pte_p = pve_get_ptep(pve_p);
11743 }
11744 #ifdef PVH_FLAG_IOMMU
11745 if ((vm_offset_t)pte_p & PVH_FLAG_IOMMU) {
11746 goto cache_skip_pve;
11747 }
11748 #endif
11749 pmap = ptep_get_pmap(pte_p);
11750 va = ptep_get_va(pte_p);
11751
11752 tmplate = *pte_p;
11753 tmplate &= ~(ARM_PTE_ATTRINDXMASK | ARM_PTE_SHMASK);
11754 tmplate |= pmap_get_pt_ops(pmap)->wimg_to_pte(attributes);
11755
11756 WRITE_PTE_STRONG(pte_p, tmplate);
11757 pmap_get_pt_ops(pmap)->flush_tlb_region_async(va, PAGE_SIZE, pmap);
11758 tlb_flush_needed = TRUE;
11759
11760 #ifdef PVH_FLAG_IOMMU
11761 cache_skip_pve:
11762 #endif
11763 pte_p = PT_ENTRY_NULL;
11764 if (pve_p != PV_ENTRY_NULL) {
11765 pve_p = PVE_NEXT_PTR(pve_next(pve_p));
11766 }
11767 }
11768 if (tlb_flush_needed) {
11769 sync_tlb_flush();
11770 }
11771
11772 PMAP_TRACE(2, PMAP_CODE(PMAP__UPDATE_CACHING) | DBG_FUNC_END, ppnum, attributes);
11773 }
11774
11775 #if (__ARM_VMSA__ == 7)
11776 vm_map_address_t
11777 pmap_create_sharedpage(
11778 void)
11779 {
11780 pmap_paddr_t pa;
11781 kern_return_t kr;
11782
11783 (void) pmap_pages_alloc(&pa, PAGE_SIZE, 0);
11784 memset((char *) phystokv(pa), 0, PAGE_SIZE);
11785
11786 kr = pmap_enter(kernel_pmap, _COMM_PAGE_BASE_ADDRESS, atop(pa), VM_PROT_READ | VM_PROT_WRITE, VM_PROT_NONE, VM_WIMG_USE_DEFAULT, TRUE);
11787 assert(kr == KERN_SUCCESS);
11788
11789 return (vm_map_address_t)phystokv(pa);
11790 }
11791 #else
11792 static void
11793 pmap_update_tt3e(
11794 pmap_t pmap,
11795 vm_address_t address,
11796 tt_entry_t template)
11797 {
11798 tt_entry_t *ptep, pte;
11799
11800 ptep = pmap_tt3e(pmap, address);
11801 if (ptep == NULL) {
11802 panic("%s: no ptep?\n", __FUNCTION__);
11803 }
11804
11805 pte = *ptep;
11806 pte = tte_to_pa(pte) | template;
11807 WRITE_PTE_STRONG(ptep, pte);
11808 }
11809
11810 /* Note absence of non-global bit */
11811 #define PMAP_COMM_PAGE_PTE_TEMPLATE (ARM_PTE_TYPE_VALID \
11812 | ARM_PTE_ATTRINDX(CACHE_ATTRINDX_WRITEBACK) \
11813 | ARM_PTE_SH(SH_INNER_MEMORY) | ARM_PTE_NX \
11814 | ARM_PTE_PNX | ARM_PTE_AP(AP_RORO) | ARM_PTE_AF)
11815
11816 vm_map_address_t
11817 pmap_create_sharedpage(
11818 void
11819 )
11820 {
11821 kern_return_t kr;
11822 pmap_paddr_t pa = 0;
11823
11824 #if XNU_MONITOR
11825 pa = pmap_alloc_page_for_kern();
11826 assert(pa);
11827 #else
11828
11829 (void) pmap_pages_alloc(&pa, PAGE_SIZE, 0);
11830 #endif
11831
11832 memset((char *) phystokv(pa), 0, PAGE_SIZE);
11833
11834 #ifdef CONFIG_XNUPOST
11835 /*
11836 * The kernel pmap maintains a user accessible mapping of the commpage
11837 * to test PAN.
11838 */
11839 kr = pmap_enter(kernel_pmap, _COMM_HIGH_PAGE64_BASE_ADDRESS, (ppnum_t)atop(pa), VM_PROT_READ, VM_PROT_NONE, VM_WIMG_USE_DEFAULT, TRUE);
11840 assert(kr == KERN_SUCCESS);
11841
11842 /*
11843 * This mapping should not be global (as we only expect to reference it
11844 * during testing).
11845 */
11846 pmap_update_tt3e(kernel_pmap, _COMM_HIGH_PAGE64_BASE_ADDRESS, PMAP_COMM_PAGE_PTE_TEMPLATE | ARM_PTE_NG);
11847
11848 #if KASAN
11849 kasan_map_shadow(_COMM_HIGH_PAGE64_BASE_ADDRESS, PAGE_SIZE, true);
11850 #endif
11851 #endif /* CONFIG_XNUPOST */
11852
11853 /*
11854 * In order to avoid burning extra pages on mapping the shared page, we
11855 * create a dedicated pmap for the shared page. We forcibly nest the
11856 * translation tables from this pmap into other pmaps. The level we
11857 * will nest at depends on the MMU configuration (page size, TTBR range,
11858 * etc).
11859 *
11860 * Note that this is NOT "the nested pmap" (which is used to nest the
11861 * shared cache).
11862 *
11863 * Note that we update parameters of the entry for our unique needs (NG
11864 * entry, etc.).
11865 */
11866 sharedpage_pmap = pmap_create_options(NULL, 0x0, 0);
11867 assert(sharedpage_pmap != NULL);
11868
11869 /* The user 64-bit mapping... */
11870 kr = pmap_enter(sharedpage_pmap, _COMM_PAGE64_BASE_ADDRESS, (ppnum_t)atop(pa), VM_PROT_READ, VM_PROT_NONE, VM_WIMG_USE_DEFAULT, TRUE);
11871 assert(kr == KERN_SUCCESS);
11872 pmap_update_tt3e(sharedpage_pmap, _COMM_PAGE64_BASE_ADDRESS, PMAP_COMM_PAGE_PTE_TEMPLATE);
11873
11874 /* ...and the user 32-bit mapping. */
11875 kr = pmap_enter(sharedpage_pmap, _COMM_PAGE32_BASE_ADDRESS, (ppnum_t)atop(pa), VM_PROT_READ, VM_PROT_NONE, VM_WIMG_USE_DEFAULT, TRUE);
11876 assert(kr == KERN_SUCCESS);
11877 pmap_update_tt3e(sharedpage_pmap, _COMM_PAGE32_BASE_ADDRESS, PMAP_COMM_PAGE_PTE_TEMPLATE);
11878
11879 /* For manipulation in kernel, go straight to physical page */
11880 return (vm_map_address_t)phystokv(pa);
11881 }
11882
11883 /*
11884 * Asserts to ensure that the TTEs we nest to map the shared page do not overlap
11885 * with user controlled TTEs.
11886 */
11887 #if (ARM_PGSHIFT == 14)
11888 static_assert((_COMM_PAGE64_BASE_ADDRESS & ~ARM_TT_L2_OFFMASK) >= MACH_VM_MAX_ADDRESS);
11889 static_assert((_COMM_PAGE32_BASE_ADDRESS & ~ARM_TT_L2_OFFMASK) >= VM_MAX_ADDRESS);
11890 #elif (ARM_PGSHIFT == 12)
11891 static_assert((_COMM_PAGE64_BASE_ADDRESS & ~ARM_TT_L1_OFFMASK) >= MACH_VM_MAX_ADDRESS);
11892 static_assert((_COMM_PAGE32_BASE_ADDRESS & ~ARM_TT_L1_OFFMASK) >= VM_MAX_ADDRESS);
11893 #else
11894 #error Nested shared page mapping is unsupported on this config
11895 #endif
11896
11897 MARK_AS_PMAP_TEXT static kern_return_t
11898 pmap_insert_sharedpage_internal(
11899 pmap_t pmap)
11900 {
11901 kern_return_t kr = KERN_SUCCESS;
11902 vm_offset_t sharedpage_vaddr;
11903 pt_entry_t *ttep, *src_ttep;
11904 int options = 0;
11905
11906 VALIDATE_PMAP(pmap);
11907 #if XNU_MONITOR
11908 options |= PMAP_OPTIONS_NOWAIT;
11909 #endif /* XNU_MONITOR */
11910
11911 #if _COMM_PAGE_AREA_LENGTH != PAGE_SIZE
11912 #error We assume a single page.
11913 #endif
11914
11915 if (pmap_is_64bit(pmap)) {
11916 sharedpage_vaddr = _COMM_PAGE64_BASE_ADDRESS;
11917 } else {
11918 sharedpage_vaddr = _COMM_PAGE32_BASE_ADDRESS;
11919 }
11920
11921 PMAP_LOCK(pmap);
11922
11923 /*
11924 * For 4KB pages, we can force the commpage to nest at the level one
11925 * page table, as each entry is 1GB (i.e, there will be no overlap
11926 * with regular userspace mappings). For 16KB pages, each level one
11927 * entry is 64GB, so we must go to the second level entry (32MB) in
11928 * order to nest.
11929 */
11930 #if (ARM_PGSHIFT == 12)
11931 (void)options;
11932
11933 /* Just slam in the L1 entry. */
11934 ttep = pmap_tt1e(pmap, sharedpage_vaddr);
11935
11936 if (*ttep != ARM_PTE_EMPTY) {
11937 panic("%s: Found something mapped at the commpage address?!", __FUNCTION__);
11938 }
11939
11940 src_ttep = pmap_tt1e(sharedpage_pmap, sharedpage_vaddr);
11941 #elif (ARM_PGSHIFT == 14)
11942 /* Allocate for the L2 entry if necessary, and slam it into place. */
11943 /*
11944 * As long as we are use a three level page table, the first level
11945 * should always exist, so we don't need to check for it.
11946 */
11947 while (*pmap_tt1e(pmap, sharedpage_vaddr) == ARM_PTE_EMPTY) {
11948 PMAP_UNLOCK(pmap);
11949
11950 kr = pmap_expand(pmap, sharedpage_vaddr, options, PMAP_TT_L2_LEVEL);
11951
11952 if (kr != KERN_SUCCESS) {
11953 #if XNU_MONITOR
11954 if (kr == KERN_RESOURCE_SHORTAGE) {
11955 return kr;
11956 } else
11957 #endif
11958 {
11959 panic("Failed to pmap_expand for commpage, pmap=%p", pmap);
11960 }
11961 }
11962
11963 PMAP_LOCK(pmap);
11964 }
11965
11966 ttep = pmap_tt2e(pmap, sharedpage_vaddr);
11967
11968 if (*ttep != ARM_PTE_EMPTY) {
11969 panic("%s: Found something mapped at the commpage address?!", __FUNCTION__);
11970 }
11971
11972 src_ttep = pmap_tt2e(sharedpage_pmap, sharedpage_vaddr);
11973 #endif
11974
11975 *ttep = *src_ttep;
11976 FLUSH_PTE_STRONG(ttep);
11977
11978 /* TODO: Should we flush in the 64-bit case? */
11979 flush_mmu_tlb_region_asid_async(sharedpage_vaddr, PAGE_SIZE, kernel_pmap);
11980
11981 #if (ARM_PGSHIFT == 12)
11982 flush_mmu_tlb_entry_async(tlbi_addr(sharedpage_vaddr & ~ARM_TT_L1_OFFMASK) | tlbi_asid(pmap->hw_asid));
11983 #elif (ARM_PGSHIFT == 14)
11984 flush_mmu_tlb_entry_async(tlbi_addr(sharedpage_vaddr & ~ARM_TT_L2_OFFMASK) | tlbi_asid(pmap->hw_asid));
11985 #endif
11986 sync_tlb_flush();
11987
11988 PMAP_UNLOCK(pmap);
11989
11990 return kr;
11991 }
11992
11993 static void
11994 pmap_unmap_sharedpage(
11995 pmap_t pmap)
11996 {
11997 pt_entry_t *ttep;
11998 vm_offset_t sharedpage_vaddr;
11999
12000 #if _COMM_PAGE_AREA_LENGTH != PAGE_SIZE
12001 #error We assume a single page.
12002 #endif
12003
12004 if (pmap_is_64bit(pmap)) {
12005 sharedpage_vaddr = _COMM_PAGE64_BASE_ADDRESS;
12006 } else {
12007 sharedpage_vaddr = _COMM_PAGE32_BASE_ADDRESS;
12008 }
12009
12010 #if (ARM_PGSHIFT == 12)
12011 ttep = pmap_tt1e(pmap, sharedpage_vaddr);
12012
12013 if (ttep == NULL) {
12014 return;
12015 }
12016
12017 /* It had better be mapped to the shared page */
12018 if (*ttep != ARM_TTE_EMPTY && *ttep != *pmap_tt1e(sharedpage_pmap, sharedpage_vaddr)) {
12019 panic("%s: Something other than commpage mapped in shared page slot?", __FUNCTION__);
12020 }
12021 #elif (ARM_PGSHIFT == 14)
12022 ttep = pmap_tt2e(pmap, sharedpage_vaddr);
12023
12024 if (ttep == NULL) {
12025 return;
12026 }
12027
12028 /* It had better be mapped to the shared page */
12029 if (*ttep != ARM_TTE_EMPTY && *ttep != *pmap_tt2e(sharedpage_pmap, sharedpage_vaddr)) {
12030 panic("%s: Something other than commpage mapped in shared page slot?", __FUNCTION__);
12031 }
12032 #endif
12033
12034 *ttep = ARM_TTE_EMPTY;
12035 flush_mmu_tlb_region_asid_async(sharedpage_vaddr, PAGE_SIZE, kernel_pmap);
12036
12037 #if (ARM_PGSHIFT == 12)
12038 flush_mmu_tlb_entry_async(tlbi_addr(sharedpage_vaddr & ~ARM_TT_L1_OFFMASK) | tlbi_asid(pmap->hw_asid));
12039 #elif (ARM_PGSHIFT == 14)
12040 flush_mmu_tlb_entry_async(tlbi_addr(sharedpage_vaddr & ~ARM_TT_L2_OFFMASK) | tlbi_asid(pmap->hw_asid));
12041 #endif
12042 sync_tlb_flush();
12043 }
12044
12045 void
12046 pmap_insert_sharedpage(
12047 pmap_t pmap)
12048 {
12049 #if XNU_MONITOR
12050 kern_return_t kr = KERN_FAILURE;
12051
12052 while ((kr = pmap_insert_sharedpage_ppl(pmap)) == KERN_RESOURCE_SHORTAGE) {
12053 pmap_alloc_page_for_ppl();
12054 }
12055
12056 pmap_ledger_check_balance(pmap);
12057
12058 if (kr != KERN_SUCCESS) {
12059 panic("%s: failed to insert the shared page, kr=%d, "
12060 "pmap=%p",
12061 __FUNCTION__, kr,
12062 pmap);
12063 }
12064 #else
12065 pmap_insert_sharedpage_internal(pmap);
12066 #endif
12067 }
12068
12069 static boolean_t
12070 pmap_is_64bit(
12071 pmap_t pmap)
12072 {
12073 return pmap->is_64bit;
12074 }
12075
12076 #endif
12077
12078 /* ARMTODO -- an implementation that accounts for
12079 * holes in the physical map, if any.
12080 */
12081 boolean_t
12082 pmap_valid_page(
12083 ppnum_t pn)
12084 {
12085 return pa_valid(ptoa(pn));
12086 }
12087
12088 boolean_t
12089 pmap_bootloader_page(
12090 ppnum_t pn)
12091 {
12092 pmap_paddr_t paddr = ptoa(pn);
12093
12094 if (pa_valid(paddr)) {
12095 return FALSE;
12096 }
12097 pmap_io_range_t *io_rgn = pmap_find_io_attr(paddr);
12098 return (io_rgn != NULL) && (io_rgn->wimg & PMAP_IO_RANGE_CARVEOUT);
12099 }
12100
12101 MARK_AS_PMAP_TEXT static boolean_t
12102 pmap_is_empty_internal(
12103 pmap_t pmap,
12104 vm_map_offset_t va_start,
12105 vm_map_offset_t va_end)
12106 {
12107 vm_map_offset_t block_start, block_end;
12108 tt_entry_t *tte_p;
12109
12110 if (pmap == NULL) {
12111 return TRUE;
12112 }
12113
12114 VALIDATE_PMAP(pmap);
12115
12116 __unused const pt_attr_t * const pt_attr = pmap_get_pt_attr(pmap);
12117 unsigned int initial_not_in_kdp = not_in_kdp;
12118
12119 if ((pmap != kernel_pmap) && (initial_not_in_kdp)) {
12120 PMAP_LOCK(pmap);
12121 }
12122
12123 #if (__ARM_VMSA__ == 7)
12124 if (tte_index(pmap, pt_attr, va_end) >= pmap->tte_index_max) {
12125 if ((pmap != kernel_pmap) && (initial_not_in_kdp)) {
12126 PMAP_UNLOCK(pmap);
12127 }
12128 return TRUE;
12129 }
12130 #endif
12131
12132 /* TODO: This will be faster if we increment ttep at each level. */
12133 block_start = va_start;
12134
12135 while (block_start < va_end) {
12136 pt_entry_t *bpte_p, *epte_p;
12137 pt_entry_t *pte_p;
12138
12139 block_end = (block_start + pt_attr_twig_size(pt_attr)) & ~pt_attr_twig_offmask(pt_attr);
12140 if (block_end > va_end) {
12141 block_end = va_end;
12142 }
12143
12144 tte_p = pmap_tte(pmap, block_start);
12145 if ((tte_p != PT_ENTRY_NULL)
12146 && ((*tte_p & ARM_TTE_TYPE_MASK) == ARM_TTE_TYPE_TABLE)) {
12147 pte_p = (pt_entry_t *) ttetokv(*tte_p);
12148 bpte_p = &pte_p[pte_index(pmap, pt_attr, block_start)];
12149 epte_p = &pte_p[pte_index(pmap, pt_attr, block_end)];
12150
12151 for (pte_p = bpte_p; pte_p < epte_p; pte_p++) {
12152 if (*pte_p != ARM_PTE_EMPTY) {
12153 if ((pmap != kernel_pmap) && (initial_not_in_kdp)) {
12154 PMAP_UNLOCK(pmap);
12155 }
12156 return FALSE;
12157 }
12158 }
12159 }
12160 block_start = block_end;
12161 }
12162
12163 if ((pmap != kernel_pmap) && (initial_not_in_kdp)) {
12164 PMAP_UNLOCK(pmap);
12165 }
12166
12167 return TRUE;
12168 }
12169
12170 boolean_t
12171 pmap_is_empty(
12172 pmap_t pmap,
12173 vm_map_offset_t va_start,
12174 vm_map_offset_t va_end)
12175 {
12176 #if XNU_MONITOR
12177 return pmap_is_empty_ppl(pmap, va_start, va_end);
12178 #else
12179 return pmap_is_empty_internal(pmap, va_start, va_end);
12180 #endif
12181 }
12182
12183 vm_map_offset_t
12184 pmap_max_offset(
12185 boolean_t is64,
12186 unsigned int option)
12187 {
12188 return (is64) ? pmap_max_64bit_offset(option) : pmap_max_32bit_offset(option);
12189 }
12190
12191 vm_map_offset_t
12192 pmap_max_64bit_offset(
12193 __unused unsigned int option)
12194 {
12195 vm_map_offset_t max_offset_ret = 0;
12196
12197 #if defined(__arm64__)
12198 const vm_map_offset_t min_max_offset = SHARED_REGION_BASE_ARM64 + SHARED_REGION_SIZE_ARM64 + 0x20000000; // end of shared region + 512MB for various purposes
12199 if (option == ARM_PMAP_MAX_OFFSET_DEFAULT) {
12200 max_offset_ret = arm64_pmap_max_offset_default;
12201 } else if (option == ARM_PMAP_MAX_OFFSET_MIN) {
12202 max_offset_ret = min_max_offset;
12203 } else if (option == ARM_PMAP_MAX_OFFSET_MAX) {
12204 max_offset_ret = MACH_VM_MAX_ADDRESS;
12205 } else if (option == ARM_PMAP_MAX_OFFSET_DEVICE) {
12206 if (arm64_pmap_max_offset_default) {
12207 max_offset_ret = arm64_pmap_max_offset_default;
12208 } else if (max_mem > 0xC0000000) {
12209 max_offset_ret = min_max_offset + 0x138000000; // Max offset is 13.375GB for devices with > 3GB of memory
12210 } else if (max_mem > 0x40000000) {
12211 max_offset_ret = min_max_offset + 0x38000000; // Max offset is 9.375GB for devices with > 1GB and <= 3GB of memory
12212 } else {
12213 max_offset_ret = min_max_offset;
12214 }
12215 } else if (option == ARM_PMAP_MAX_OFFSET_JUMBO) {
12216 if (arm64_pmap_max_offset_default) {
12217 // Allow the boot-arg to override jumbo size
12218 max_offset_ret = arm64_pmap_max_offset_default;
12219 } else {
12220 max_offset_ret = MACH_VM_MAX_ADDRESS; // Max offset is 64GB for pmaps with special "jumbo" blessing
12221 }
12222 } else {
12223 panic("pmap_max_64bit_offset illegal option 0x%x\n", option);
12224 }
12225
12226 assert(max_offset_ret <= MACH_VM_MAX_ADDRESS);
12227 assert(max_offset_ret >= min_max_offset);
12228 #else
12229 panic("Can't run pmap_max_64bit_offset on non-64bit architectures\n");
12230 #endif
12231
12232 return max_offset_ret;
12233 }
12234
12235 vm_map_offset_t
12236 pmap_max_32bit_offset(
12237 unsigned int option)
12238 {
12239 vm_map_offset_t max_offset_ret = 0;
12240
12241 if (option == ARM_PMAP_MAX_OFFSET_DEFAULT) {
12242 max_offset_ret = arm_pmap_max_offset_default;
12243 } else if (option == ARM_PMAP_MAX_OFFSET_MIN) {
12244 max_offset_ret = 0x80000000;
12245 } else if (option == ARM_PMAP_MAX_OFFSET_MAX) {
12246 max_offset_ret = VM_MAX_ADDRESS;
12247 } else if (option == ARM_PMAP_MAX_OFFSET_DEVICE) {
12248 if (arm_pmap_max_offset_default) {
12249 max_offset_ret = arm_pmap_max_offset_default;
12250 } else if (max_mem > 0x20000000) {
12251 max_offset_ret = 0x80000000;
12252 } else {
12253 max_offset_ret = 0x80000000;
12254 }
12255 } else if (option == ARM_PMAP_MAX_OFFSET_JUMBO) {
12256 max_offset_ret = 0x80000000;
12257 } else {
12258 panic("pmap_max_32bit_offset illegal option 0x%x\n", option);
12259 }
12260
12261 assert(max_offset_ret <= MACH_VM_MAX_ADDRESS);
12262 return max_offset_ret;
12263 }
12264
12265 #if CONFIG_DTRACE
12266 /*
12267 * Constrain DTrace copyin/copyout actions
12268 */
12269 extern kern_return_t dtrace_copyio_preflight(addr64_t);
12270 extern kern_return_t dtrace_copyio_postflight(addr64_t);
12271
12272 kern_return_t
12273 dtrace_copyio_preflight(
12274 __unused addr64_t va)
12275 {
12276 if (current_map() == kernel_map) {
12277 return KERN_FAILURE;
12278 } else {
12279 return KERN_SUCCESS;
12280 }
12281 }
12282
12283 kern_return_t
12284 dtrace_copyio_postflight(
12285 __unused addr64_t va)
12286 {
12287 return KERN_SUCCESS;
12288 }
12289 #endif /* CONFIG_DTRACE */
12290
12291
12292 void
12293 pmap_flush_context_init(__unused pmap_flush_context *pfc)
12294 {
12295 }
12296
12297
12298 void
12299 pmap_flush(
12300 __unused pmap_flush_context *cpus_to_flush)
12301 {
12302 /* not implemented yet */
12303 return;
12304 }
12305
12306 #if XNU_MONITOR
12307
12308 /*
12309 * Enforce that the address range described by kva and nbytes is not currently
12310 * PPL-owned, and won't become PPL-owned while pinned. This is to prevent
12311 * unintentionally writing to PPL-owned memory.
12312 */
12313 static void
12314 pmap_pin_kernel_pages(vm_offset_t kva, size_t nbytes)
12315 {
12316 vm_offset_t end;
12317 if (os_add_overflow(kva, nbytes, &end)) {
12318 panic("%s(%p, 0x%llx): overflow", __func__, (void*)kva, (uint64_t)nbytes);
12319 }
12320 for (vm_offset_t ckva = kva; ckva < end; ckva = round_page(ckva + 1)) {
12321 pmap_paddr_t pa = kvtophys(ckva);
12322 if (!pa_valid(pa)) {
12323 panic("%s(%p): invalid physical page 0x%llx", __func__, (void*)kva, (uint64_t)pa);
12324 }
12325 pp_attr_t attr;
12326 unsigned int pai = (unsigned int)pa_index(pa);
12327 if (ckva == phystokv(pa)) {
12328 panic("%s(%p): attempt to pin static mapping for page 0x%llx", __func__, (void*)kva, (uint64_t)pa);
12329 }
12330 do {
12331 attr = pp_attr_table[pai] & ~PP_ATTR_NO_MONITOR;
12332 if (attr & PP_ATTR_MONITOR) {
12333 panic("%s(%p): physical page 0x%llx belongs to PPL", __func__, (void*)kva, (uint64_t)pa);
12334 }
12335 } while (!OSCompareAndSwap16(attr, attr | PP_ATTR_NO_MONITOR, &pp_attr_table[pai]));
12336 }
12337 }
12338
12339 static void
12340 pmap_unpin_kernel_pages(vm_offset_t kva, size_t nbytes)
12341 {
12342 vm_offset_t end;
12343 if (os_add_overflow(kva, nbytes, &end)) {
12344 panic("%s(%p, 0x%llx): overflow", __func__, (void*)kva, (uint64_t)nbytes);
12345 }
12346 for (vm_offset_t ckva = kva; ckva < end; ckva = round_page(ckva + 1)) {
12347 pmap_paddr_t pa = kvtophys(ckva);
12348 if (!pa_valid(pa)) {
12349 panic("%s(%p): invalid physical page 0x%llx", __func__, (void*)kva, (uint64_t)pa);
12350 }
12351 if (!(pp_attr_table[pa_index(pa)] & PP_ATTR_NO_MONITOR)) {
12352 panic("%s(%p): physical page 0x%llx not pinned", __func__, (void*)kva, (uint64_t)pa);
12353 }
12354 assert(!(pp_attr_table[pa_index(pa)] & PP_ATTR_MONITOR));
12355 pa_clear_no_monitor(pa);
12356 }
12357 }
12358
12359 /*
12360 * Lock down a page, making all mappings read-only, and preventing
12361 * further mappings or removal of this particular kva's mapping.
12362 * Effectively, it makes the page at kva immutable.
12363 */
12364 MARK_AS_PMAP_TEXT static void
12365 pmap_ppl_lockdown_page(vm_address_t kva)
12366 {
12367 pmap_paddr_t pa = kvtophys(kva);
12368 unsigned int pai = (unsigned int)pa_index(pa);
12369 LOCK_PVH(pai);
12370 pv_entry_t **pv_h = pai_to_pvh(pai);
12371
12372 if (pa_test_monitor(pa)) {
12373 panic("%#lx: page %llx belongs to PPL", kva, pa);
12374 }
12375
12376 if (pvh_get_flags(pv_h) & (PVH_FLAG_LOCKDOWN | PVH_FLAG_EXEC)) {
12377 panic("%#lx: already locked down/executable (%#llx)", kva, pvh_get_flags(pv_h));
12378 }
12379
12380 pt_entry_t *pte_p = pmap_pte(kernel_pmap, kva);
12381
12382 if (pte_p == PT_ENTRY_NULL) {
12383 panic("%#lx: NULL pte", kva);
12384 }
12385
12386 pt_entry_t tmplate = *pte_p;
12387 if ((tmplate & ARM_PTE_APMASK) != ARM_PTE_AP(AP_RWNA)) {
12388 panic("%#lx: not a kernel r/w page (%#llx)", kva, tmplate & ARM_PTE_APMASK);
12389 }
12390
12391 pvh_set_flags(pv_h, pvh_get_flags(pv_h) | PVH_FLAG_LOCKDOWN);
12392
12393 pmap_set_ptov_ap(pai, AP_RONA, FALSE);
12394
12395 UNLOCK_PVH(pai);
12396
12397 pmap_page_protect_options_internal((ppnum_t)atop(pa), VM_PROT_READ, 0);
12398 }
12399
12400 /*
12401 * Release a page from being locked down to the PPL, making it writable
12402 * to the kernel once again.
12403 */
12404 MARK_AS_PMAP_TEXT static void
12405 pmap_ppl_unlockdown_page(vm_address_t kva)
12406 {
12407 pmap_paddr_t pa = kvtophys(kva);
12408 unsigned int pai = (unsigned int)pa_index(pa);
12409 LOCK_PVH(pai);
12410 pv_entry_t **pv_h = pai_to_pvh(pai);
12411
12412 vm_offset_t pvh_flags = pvh_get_flags(pv_h);
12413
12414 if (!(pvh_flags & PVH_FLAG_LOCKDOWN)) {
12415 panic("unlockdown attempt on not locked down virtual %#lx/pai %d", kva, pai);
12416 }
12417
12418 pvh_set_flags(pv_h, pvh_flags & ~PVH_FLAG_LOCKDOWN);
12419 pmap_set_ptov_ap(pai, AP_RWNA, FALSE);
12420 UNLOCK_PVH(pai);
12421 }
12422
12423 #else /* XNU_MONITOR */
12424
12425 static void __unused
12426 pmap_pin_kernel_pages(vm_offset_t kva __unused, size_t nbytes __unused)
12427 {
12428 }
12429
12430 static void __unused
12431 pmap_unpin_kernel_pages(vm_offset_t kva __unused, size_t nbytes __unused)
12432 {
12433 }
12434
12435 #endif /* !XNU_MONITOR */
12436
12437
12438 #define PMAP_RESIDENT_INVALID ((mach_vm_size_t)-1)
12439
12440 MARK_AS_PMAP_TEXT static mach_vm_size_t
12441 pmap_query_resident_internal(
12442 pmap_t pmap,
12443 vm_map_address_t start,
12444 vm_map_address_t end,
12445 mach_vm_size_t *compressed_bytes_p)
12446 {
12447 mach_vm_size_t resident_bytes = 0;
12448 mach_vm_size_t compressed_bytes = 0;
12449
12450 pt_entry_t *bpte, *epte;
12451 pt_entry_t *pte_p;
12452 tt_entry_t *tte_p;
12453
12454 if (pmap == NULL) {
12455 return PMAP_RESIDENT_INVALID;
12456 }
12457
12458 VALIDATE_PMAP(pmap);
12459
12460 /* Ensure that this request is valid, and addresses exactly one TTE. */
12461 if (__improbable((start % ARM_PGBYTES) || (end % ARM_PGBYTES))) {
12462 panic("%s: address range %p, %p not page-aligned", __func__, (void*)start, (void*)end);
12463 }
12464
12465 if (__improbable((end < start) || ((end - start) > (PTE_PGENTRIES * ARM_PGBYTES)))) {
12466 panic("%s: invalid address range %p, %p", __func__, (void*)start, (void*)end);
12467 }
12468
12469 PMAP_LOCK(pmap);
12470 tte_p = pmap_tte(pmap, start);
12471 if (tte_p == (tt_entry_t *) NULL) {
12472 PMAP_UNLOCK(pmap);
12473 return PMAP_RESIDENT_INVALID;
12474 }
12475 if ((*tte_p & ARM_TTE_TYPE_MASK) == ARM_TTE_TYPE_TABLE) {
12476 __unused const pt_attr_t * const pt_attr = pmap_get_pt_attr(pmap);
12477 pte_p = (pt_entry_t *) ttetokv(*tte_p);
12478 bpte = &pte_p[pte_index(pmap, pt_attr, start)];
12479 epte = &pte_p[pte_index(pmap, pt_attr, end)];
12480
12481 for (; bpte < epte; bpte++) {
12482 if (ARM_PTE_IS_COMPRESSED(*bpte, bpte)) {
12483 compressed_bytes += ARM_PGBYTES;
12484 } else if (pa_valid(pte_to_pa(*bpte))) {
12485 resident_bytes += ARM_PGBYTES;
12486 }
12487 }
12488 }
12489 PMAP_UNLOCK(pmap);
12490
12491 if (compressed_bytes_p) {
12492 pmap_pin_kernel_pages((vm_offset_t)compressed_bytes_p, sizeof(*compressed_bytes_p));
12493 *compressed_bytes_p += compressed_bytes;
12494 pmap_unpin_kernel_pages((vm_offset_t)compressed_bytes_p, sizeof(*compressed_bytes_p));
12495 }
12496
12497 return resident_bytes;
12498 }
12499
12500 mach_vm_size_t
12501 pmap_query_resident(
12502 pmap_t pmap,
12503 vm_map_address_t start,
12504 vm_map_address_t end,
12505 mach_vm_size_t *compressed_bytes_p)
12506 {
12507 mach_vm_size_t total_resident_bytes;
12508 mach_vm_size_t compressed_bytes;
12509 vm_map_address_t va;
12510
12511
12512 if (pmap == PMAP_NULL) {
12513 if (compressed_bytes_p) {
12514 *compressed_bytes_p = 0;
12515 }
12516 return 0;
12517 }
12518
12519 __unused const pt_attr_t * const pt_attr = pmap_get_pt_attr(pmap);
12520
12521 total_resident_bytes = 0;
12522 compressed_bytes = 0;
12523
12524 PMAP_TRACE(3, PMAP_CODE(PMAP__QUERY_RESIDENT) | DBG_FUNC_START,
12525 VM_KERNEL_ADDRHIDE(pmap), VM_KERNEL_ADDRHIDE(start),
12526 VM_KERNEL_ADDRHIDE(end));
12527
12528 va = start;
12529 while (va < end) {
12530 vm_map_address_t l;
12531 mach_vm_size_t resident_bytes;
12532
12533 l = ((va + pt_attr_twig_size(pt_attr)) & ~pt_attr_twig_offmask(pt_attr));
12534
12535 if (l > end) {
12536 l = end;
12537 }
12538 #if XNU_MONITOR
12539 resident_bytes = pmap_query_resident_ppl(pmap, va, l, compressed_bytes_p);
12540 #else
12541 resident_bytes = pmap_query_resident_internal(pmap, va, l, compressed_bytes_p);
12542 #endif
12543 if (resident_bytes == PMAP_RESIDENT_INVALID) {
12544 break;
12545 }
12546
12547 total_resident_bytes += resident_bytes;
12548
12549 va = l;
12550 }
12551
12552 if (compressed_bytes_p) {
12553 *compressed_bytes_p = compressed_bytes;
12554 }
12555
12556 PMAP_TRACE(3, PMAP_CODE(PMAP__QUERY_RESIDENT) | DBG_FUNC_END,
12557 total_resident_bytes);
12558
12559 return total_resident_bytes;
12560 }
12561
12562 #if MACH_ASSERT
12563 static void
12564 pmap_check_ledgers(
12565 pmap_t pmap)
12566 {
12567 int pid;
12568 char *procname;
12569
12570 if (pmap->pmap_pid == 0) {
12571 /*
12572 * This pmap was not or is no longer fully associated
12573 * with a task (e.g. the old pmap after a fork()/exec() or
12574 * spawn()). Its "ledger" still points at a task that is
12575 * now using a different (and active) address space, so
12576 * we can't check that all the pmap ledgers are balanced here.
12577 *
12578 * If the "pid" is set, that means that we went through
12579 * pmap_set_process() in task_terminate_internal(), so
12580 * this task's ledger should not have been re-used and
12581 * all the pmap ledgers should be back to 0.
12582 */
12583 return;
12584 }
12585
12586 pid = pmap->pmap_pid;
12587 procname = pmap->pmap_procname;
12588
12589 vm_map_pmap_check_ledgers(pmap, pmap->ledger, pid, procname);
12590
12591 PMAP_STATS_ASSERTF(pmap->stats.resident_count == 0, pmap, "stats.resident_count %d", pmap->stats.resident_count);
12592 #if 00
12593 PMAP_STATS_ASSERTF(pmap->stats.wired_count == 0, pmap, "stats.wired_count %d", pmap->stats.wired_count);
12594 #endif
12595 PMAP_STATS_ASSERTF(pmap->stats.device == 0, pmap, "stats.device %d", pmap->stats.device);
12596 PMAP_STATS_ASSERTF(pmap->stats.internal == 0, pmap, "stats.internal %d", pmap->stats.internal);
12597 PMAP_STATS_ASSERTF(pmap->stats.external == 0, pmap, "stats.external %d", pmap->stats.external);
12598 PMAP_STATS_ASSERTF(pmap->stats.reusable == 0, pmap, "stats.reusable %d", pmap->stats.reusable);
12599 PMAP_STATS_ASSERTF(pmap->stats.compressed == 0, pmap, "stats.compressed %lld", pmap->stats.compressed);
12600 }
12601 #endif /* MACH_ASSERT */
12602
12603 void
12604 pmap_advise_pagezero_range(__unused pmap_t p, __unused uint64_t a)
12605 {
12606 }
12607
12608
12609 #if CONFIG_PGTRACE
12610 #define PROF_START uint64_t t, nanot;\
12611 t = mach_absolute_time();
12612
12613 #define PROF_END absolutetime_to_nanoseconds(mach_absolute_time()-t, &nanot);\
12614 kprintf("%s: took %llu ns\n", __func__, nanot);
12615
12616 #define PMAP_PGTRACE_LOCK(p) \
12617 do { \
12618 *(p) = ml_set_interrupts_enabled(false); \
12619 if (simple_lock_try(&(pmap_pgtrace.lock), LCK_GRP_NULL)) break; \
12620 ml_set_interrupts_enabled(*(p)); \
12621 } while (true)
12622
12623 #define PMAP_PGTRACE_UNLOCK(p) \
12624 do { \
12625 simple_unlock(&(pmap_pgtrace.lock)); \
12626 ml_set_interrupts_enabled(*(p)); \
12627 } while (0)
12628
12629 #define PGTRACE_WRITE_PTE(pte_p, pte_entry) \
12630 do { \
12631 *(pte_p) = (pte_entry); \
12632 FLUSH_PTE(pte_p); \
12633 } while (0)
12634
12635 #define PGTRACE_MAX_MAP 16 // maximum supported va to same pa
12636
12637 typedef enum {
12638 UNDEFINED,
12639 PA_UNDEFINED,
12640 VA_UNDEFINED,
12641 DEFINED
12642 } pmap_pgtrace_page_state_t;
12643
12644 typedef struct {
12645 queue_chain_t chain;
12646
12647 /*
12648 * pa - pa
12649 * maps - list of va maps to upper pa
12650 * map_pool - map pool
12651 * map_waste - waste can
12652 * state - state
12653 */
12654 pmap_paddr_t pa;
12655 queue_head_t maps;
12656 queue_head_t map_pool;
12657 queue_head_t map_waste;
12658 pmap_pgtrace_page_state_t state;
12659 } pmap_pgtrace_page_t;
12660
12661 static struct {
12662 /*
12663 * pages - list of tracing page info
12664 */
12665 queue_head_t pages;
12666 decl_simple_lock_data(, lock);
12667 } pmap_pgtrace = {};
12668
12669 static void
12670 pmap_pgtrace_init(void)
12671 {
12672 queue_init(&(pmap_pgtrace.pages));
12673 simple_lock_init(&(pmap_pgtrace.lock), 0);
12674
12675 boolean_t enabled;
12676
12677 if (PE_parse_boot_argn("pgtrace", &enabled, sizeof(enabled))) {
12678 pgtrace_enabled = enabled;
12679 }
12680 }
12681
12682 // find a page with given pa - pmap_pgtrace should be locked
12683 inline static pmap_pgtrace_page_t *
12684 pmap_pgtrace_find_page(pmap_paddr_t pa)
12685 {
12686 queue_head_t *q = &(pmap_pgtrace.pages);
12687 pmap_pgtrace_page_t *p;
12688
12689 queue_iterate(q, p, pmap_pgtrace_page_t *, chain) {
12690 if (p->state == UNDEFINED) {
12691 continue;
12692 }
12693 if (p->state == PA_UNDEFINED) {
12694 continue;
12695 }
12696 if (p->pa == pa) {
12697 return p;
12698 }
12699 }
12700
12701 return NULL;
12702 }
12703
12704 // enter clone of given pmap, va page and range - pmap should be locked
12705 static bool
12706 pmap_pgtrace_enter_clone(pmap_t pmap, vm_map_offset_t va_page, vm_map_offset_t start, vm_map_offset_t end)
12707 {
12708 bool ints;
12709 queue_head_t *q = &(pmap_pgtrace.pages);
12710 pmap_paddr_t pa_page;
12711 pt_entry_t *ptep, *cptep;
12712 pmap_pgtrace_page_t *p;
12713 bool found = false;
12714
12715 PMAP_ASSERT_LOCKED(pmap);
12716 assert(va_page == arm_trunc_page(va_page));
12717
12718 PMAP_PGTRACE_LOCK(&ints);
12719
12720 ptep = pmap_pte(pmap, va_page);
12721
12722 // target pte should exist
12723 if (!ptep || !(*ptep & ARM_PTE_TYPE_VALID)) {
12724 PMAP_PGTRACE_UNLOCK(&ints);
12725 return false;
12726 }
12727
12728 queue_head_t *mapq;
12729 queue_head_t *mappool;
12730 pmap_pgtrace_map_t *map = NULL;
12731
12732 pa_page = pte_to_pa(*ptep);
12733
12734 // find if we have a page info defined for this
12735 queue_iterate(q, p, pmap_pgtrace_page_t *, chain) {
12736 mapq = &(p->maps);
12737 mappool = &(p->map_pool);
12738
12739 switch (p->state) {
12740 case PA_UNDEFINED:
12741 queue_iterate(mapq, map, pmap_pgtrace_map_t *, chain) {
12742 if (map->cloned == false && map->pmap == pmap && map->ova == va_page) {
12743 p->pa = pa_page;
12744 map->range.start = start;
12745 map->range.end = end;
12746 found = true;
12747 break;
12748 }
12749 }
12750 break;
12751
12752 case VA_UNDEFINED:
12753 if (p->pa != pa_page) {
12754 break;
12755 }
12756 queue_iterate(mapq, map, pmap_pgtrace_map_t *, chain) {
12757 if (map->cloned == false) {
12758 map->pmap = pmap;
12759 map->ova = va_page;
12760 map->range.start = start;
12761 map->range.end = end;
12762 found = true;
12763 break;
12764 }
12765 }
12766 break;
12767
12768 case DEFINED:
12769 if (p->pa != pa_page) {
12770 break;
12771 }
12772 queue_iterate(mapq, map, pmap_pgtrace_map_t *, chain) {
12773 if (map->cloned == true && map->pmap == pmap && map->ova == va_page) {
12774 kprintf("%s: skip existing mapping at va=%llx\n", __func__, va_page);
12775 break;
12776 } else if (map->cloned == true && map->pmap == kernel_pmap && map->cva[1] == va_page) {
12777 kprintf("%s: skip clone mapping at va=%llx\n", __func__, va_page);
12778 break;
12779 } else if (map->cloned == false && map->pmap == pmap && map->ova == va_page) {
12780 // range should be already defined as well
12781 found = true;
12782 break;
12783 }
12784 }
12785 break;
12786
12787 default:
12788 panic("invalid state p->state=%x\n", p->state);
12789 }
12790
12791 if (found == true) {
12792 break;
12793 }
12794 }
12795
12796 // do not clone if no page info found
12797 if (found == false) {
12798 PMAP_PGTRACE_UNLOCK(&ints);
12799 return false;
12800 }
12801
12802 // copy pre, target and post ptes to clone ptes
12803 for (int i = 0; i < 3; i++) {
12804 ptep = pmap_pte(pmap, va_page + (i - 1) * ARM_PGBYTES);
12805 cptep = pmap_pte(kernel_pmap, map->cva[i]);
12806 assert(cptep != NULL);
12807 if (ptep == NULL) {
12808 PGTRACE_WRITE_PTE(cptep, (pt_entry_t)NULL);
12809 } else {
12810 PGTRACE_WRITE_PTE(cptep, *ptep);
12811 }
12812 PMAP_UPDATE_TLBS(kernel_pmap, map->cva[i], map->cva[i] + ARM_PGBYTES, false);
12813 }
12814
12815 // get ptes for original and clone
12816 ptep = pmap_pte(pmap, va_page);
12817 cptep = pmap_pte(kernel_pmap, map->cva[1]);
12818
12819 // invalidate original pte and mark it as a pgtrace page
12820 PGTRACE_WRITE_PTE(ptep, (*ptep | ARM_PTE_PGTRACE) & ~ARM_PTE_TYPE_VALID);
12821 PMAP_UPDATE_TLBS(pmap, map->ova, map->ova + ARM_PGBYTES, false);
12822
12823 map->cloned = true;
12824 p->state = DEFINED;
12825
12826 kprintf("%s: pa_page=%llx va_page=%llx cva[1]=%llx pmap=%p ptep=%p cptep=%p\n", __func__, pa_page, va_page, map->cva[1], pmap, ptep, cptep);
12827
12828 PMAP_PGTRACE_UNLOCK(&ints);
12829
12830 return true;
12831 }
12832
12833 // This function removes trace bit and validate pte if applicable. Pmap must be locked.
12834 static void
12835 pmap_pgtrace_remove_clone(pmap_t pmap, pmap_paddr_t pa, vm_map_offset_t va)
12836 {
12837 bool ints, found = false;
12838 pmap_pgtrace_page_t *p;
12839 pt_entry_t *ptep;
12840
12841 PMAP_PGTRACE_LOCK(&ints);
12842
12843 // we must have this page info
12844 p = pmap_pgtrace_find_page(pa);
12845 if (p == NULL) {
12846 goto unlock_exit;
12847 }
12848
12849 // find matching map
12850 queue_head_t *mapq = &(p->maps);
12851 queue_head_t *mappool = &(p->map_pool);
12852 pmap_pgtrace_map_t *map;
12853
12854 queue_iterate(mapq, map, pmap_pgtrace_map_t *, chain) {
12855 if (map->pmap == pmap && map->ova == va) {
12856 found = true;
12857 break;
12858 }
12859 }
12860
12861 if (!found) {
12862 goto unlock_exit;
12863 }
12864
12865 if (map->cloned == true) {
12866 // Restore back the pte to original state
12867 ptep = pmap_pte(pmap, map->ova);
12868 assert(ptep);
12869 PGTRACE_WRITE_PTE(ptep, *ptep | ARM_PTE_TYPE_VALID);
12870 PMAP_UPDATE_TLBS(pmap, va, va + ARM_PGBYTES, false);
12871
12872 // revert clone pages
12873 for (int i = 0; i < 3; i++) {
12874 ptep = pmap_pte(kernel_pmap, map->cva[i]);
12875 assert(ptep != NULL);
12876 PGTRACE_WRITE_PTE(ptep, map->cva_spte[i]);
12877 PMAP_UPDATE_TLBS(kernel_pmap, map->cva[i], map->cva[i] + ARM_PGBYTES, false);
12878 }
12879 }
12880
12881 queue_remove(mapq, map, pmap_pgtrace_map_t *, chain);
12882 map->pmap = NULL;
12883 map->ova = (vm_map_offset_t)NULL;
12884 map->cloned = false;
12885 queue_enter_first(mappool, map, pmap_pgtrace_map_t *, chain);
12886
12887 kprintf("%s: p=%p pa=%llx va=%llx\n", __func__, p, pa, va);
12888
12889 unlock_exit:
12890 PMAP_PGTRACE_UNLOCK(&ints);
12891 }
12892
12893 // remove all clones of given pa - pmap must be locked
12894 static void
12895 pmap_pgtrace_remove_all_clone(pmap_paddr_t pa)
12896 {
12897 bool ints;
12898 pmap_pgtrace_page_t *p;
12899 pt_entry_t *ptep;
12900
12901 PMAP_PGTRACE_LOCK(&ints);
12902
12903 // we must have this page info
12904 p = pmap_pgtrace_find_page(pa);
12905 if (p == NULL) {
12906 PMAP_PGTRACE_UNLOCK(&ints);
12907 return;
12908 }
12909
12910 queue_head_t *mapq = &(p->maps);
12911 queue_head_t *mappool = &(p->map_pool);
12912 queue_head_t *mapwaste = &(p->map_waste);
12913 pmap_pgtrace_map_t *map;
12914
12915 // move maps to waste
12916 while (!queue_empty(mapq)) {
12917 queue_remove_first(mapq, map, pmap_pgtrace_map_t *, chain);
12918 queue_enter_first(mapwaste, map, pmap_pgtrace_map_t*, chain);
12919 }
12920
12921 PMAP_PGTRACE_UNLOCK(&ints);
12922
12923 // sanitize maps in waste
12924 queue_iterate(mapwaste, map, pmap_pgtrace_map_t *, chain) {
12925 if (map->cloned == true) {
12926 PMAP_LOCK(map->pmap);
12927
12928 // restore back original pte
12929 ptep = pmap_pte(map->pmap, map->ova);
12930 assert(ptep);
12931 PGTRACE_WRITE_PTE(ptep, *ptep | ARM_PTE_TYPE_VALID);
12932 PMAP_UPDATE_TLBS(map->pmap, map->ova, map->ova + ARM_PGBYTES, false);
12933
12934 // revert clone ptes
12935 for (int i = 0; i < 3; i++) {
12936 ptep = pmap_pte(kernel_pmap, map->cva[i]);
12937 assert(ptep != NULL);
12938 PGTRACE_WRITE_PTE(ptep, map->cva_spte[i]);
12939 PMAP_UPDATE_TLBS(kernel_pmap, map->cva[i], map->cva[i] + ARM_PGBYTES, false);
12940 }
12941
12942 PMAP_UNLOCK(map->pmap);
12943 }
12944
12945 map->pmap = NULL;
12946 map->ova = (vm_map_offset_t)NULL;
12947 map->cloned = false;
12948 }
12949
12950 PMAP_PGTRACE_LOCK(&ints);
12951
12952 // recycle maps back to map_pool
12953 while (!queue_empty(mapwaste)) {
12954 queue_remove_first(mapwaste, map, pmap_pgtrace_map_t *, chain);
12955 queue_enter_first(mappool, map, pmap_pgtrace_map_t*, chain);
12956 }
12957
12958 PMAP_PGTRACE_UNLOCK(&ints);
12959 }
12960
12961 inline static void
12962 pmap_pgtrace_get_search_space(pmap_t pmap, vm_map_offset_t *startp, vm_map_offset_t *endp)
12963 {
12964 uint64_t tsz;
12965 vm_map_offset_t end;
12966
12967 if (pmap == kernel_pmap) {
12968 tsz = (get_tcr() >> TCR_T1SZ_SHIFT) & TCR_TSZ_MASK;
12969 *startp = MAX(VM_MIN_KERNEL_ADDRESS, (UINT64_MAX >> (64 - tsz)) << (64 - tsz));
12970 *endp = VM_MAX_KERNEL_ADDRESS;
12971 } else {
12972 tsz = (get_tcr() >> TCR_T0SZ_SHIFT) & TCR_TSZ_MASK;
12973 if (tsz == 64) {
12974 end = 0;
12975 } else {
12976 end = ((uint64_t)1 << (64 - tsz)) - 1;
12977 }
12978
12979 *startp = 0;
12980 *endp = end;
12981 }
12982
12983 assert(*endp > *startp);
12984
12985 return;
12986 }
12987
12988 // has pa mapped in given pmap? then clone it
12989 static uint64_t
12990 pmap_pgtrace_clone_from_pa(pmap_t pmap, pmap_paddr_t pa, vm_map_offset_t start_offset, vm_map_offset_t end_offset)
12991 {
12992 uint64_t ret = 0;
12993 vm_map_offset_t min, max;
12994 vm_map_offset_t cur_page, end_page;
12995 pt_entry_t *ptep;
12996 tt_entry_t *ttep;
12997 tt_entry_t tte;
12998 __unused const pt_attr_t * const pt_attr = pmap_get_pt_attr(pmap);
12999
13000 pmap_pgtrace_get_search_space(pmap, &min, &max);
13001
13002 cur_page = arm_trunc_page(min);
13003 end_page = arm_trunc_page(max);
13004 while (cur_page <= end_page) {
13005 vm_map_offset_t add = 0;
13006
13007 PMAP_LOCK(pmap);
13008
13009 // skip uninterested space
13010 if (pmap == kernel_pmap &&
13011 ((vm_kernel_base <= cur_page && cur_page < vm_kernel_top) ||
13012 (vm_kext_base <= cur_page && cur_page < vm_kext_top))) {
13013 add = ARM_PGBYTES;
13014 goto unlock_continue;
13015 }
13016
13017 // check whether we can skip l1
13018 ttep = pmap_tt1e(pmap, cur_page);
13019 assert(ttep);
13020 tte = *ttep;
13021 if ((tte & (ARM_TTE_TYPE_MASK | ARM_TTE_VALID)) != (ARM_TTE_TYPE_TABLE | ARM_TTE_VALID)) {
13022 add = ARM_TT_L1_SIZE;
13023 goto unlock_continue;
13024 }
13025
13026 // how about l2
13027 tte = ((tt_entry_t*) phystokv(tte & ARM_TTE_TABLE_MASK))[tt2_index(pmap, pt_attr, cur_page)];
13028
13029 if ((tte & (ARM_TTE_TYPE_MASK | ARM_TTE_VALID)) != (ARM_TTE_TYPE_TABLE | ARM_TTE_VALID)) {
13030 add = ARM_TT_L2_SIZE;
13031 goto unlock_continue;
13032 }
13033
13034 // ptep finally
13035 ptep = &(((pt_entry_t*) phystokv(tte & ARM_TTE_TABLE_MASK))[tt3_index(pmap, pt_attr, cur_page)]);
13036 if (ptep == PT_ENTRY_NULL) {
13037 add = ARM_TT_L3_SIZE;
13038 goto unlock_continue;
13039 }
13040
13041 if (arm_trunc_page(pa) == pte_to_pa(*ptep)) {
13042 if (pmap_pgtrace_enter_clone(pmap, cur_page, start_offset, end_offset) == true) {
13043 ret++;
13044 }
13045 }
13046
13047 add = ARM_PGBYTES;
13048
13049 unlock_continue:
13050 PMAP_UNLOCK(pmap);
13051
13052 //overflow
13053 if (cur_page + add < cur_page) {
13054 break;
13055 }
13056
13057 cur_page += add;
13058 }
13059
13060
13061 return ret;
13062 }
13063
13064 // search pv table and clone vas of given pa
13065 static uint64_t
13066 pmap_pgtrace_clone_from_pvtable(pmap_paddr_t pa, vm_map_offset_t start_offset, vm_map_offset_t end_offset)
13067 {
13068 uint64_t ret = 0;
13069 unsigned long pai;
13070 pv_entry_t **pvh;
13071 pt_entry_t *ptep;
13072 pmap_t pmap;
13073
13074 typedef struct {
13075 queue_chain_t chain;
13076 pmap_t pmap;
13077 vm_map_offset_t va;
13078 } pmap_va_t;
13079
13080 queue_head_t pmapvaq;
13081 pmap_va_t *pmapva;
13082
13083 queue_init(&pmapvaq);
13084
13085 pai = pa_index(pa);
13086 LOCK_PVH(pai);
13087 pvh = pai_to_pvh(pai);
13088
13089 // collect pmap/va pair from pvh
13090 if (pvh_test_type(pvh, PVH_TYPE_PTEP)) {
13091 ptep = pvh_ptep(pvh);
13092 pmap = ptep_get_pmap(ptep);
13093
13094 pmapva = (pmap_va_t *)kalloc(sizeof(pmap_va_t));
13095 pmapva->pmap = pmap;
13096 pmapva->va = ptep_get_va(ptep);
13097
13098 queue_enter_first(&pmapvaq, pmapva, pmap_va_t *, chain);
13099 } else if (pvh_test_type(pvh, PVH_TYPE_PVEP)) {
13100 pv_entry_t *pvep;
13101
13102 pvep = pvh_list(pvh);
13103 while (pvep) {
13104 ptep = pve_get_ptep(pvep);
13105 pmap = ptep_get_pmap(ptep);
13106
13107 pmapva = (pmap_va_t *)kalloc(sizeof(pmap_va_t));
13108 pmapva->pmap = pmap;
13109 pmapva->va = ptep_get_va(ptep);
13110
13111 queue_enter_first(&pmapvaq, pmapva, pmap_va_t *, chain);
13112
13113 pvep = PVE_NEXT_PTR(pve_next(pvep));
13114 }
13115 }
13116
13117 UNLOCK_PVH(pai);
13118
13119 // clone them while making sure mapping still exists
13120 queue_iterate(&pmapvaq, pmapva, pmap_va_t *, chain) {
13121 PMAP_LOCK(pmapva->pmap);
13122 ptep = pmap_pte(pmapva->pmap, pmapva->va);
13123 if (pte_to_pa(*ptep) == pa) {
13124 if (pmap_pgtrace_enter_clone(pmapva->pmap, pmapva->va, start_offset, end_offset) == true) {
13125 ret++;
13126 }
13127 }
13128 PMAP_UNLOCK(pmapva->pmap);
13129
13130 kfree(pmapva, sizeof(pmap_va_t));
13131 }
13132
13133 return ret;
13134 }
13135
13136 // allocate a page info
13137 static pmap_pgtrace_page_t *
13138 pmap_pgtrace_alloc_page(void)
13139 {
13140 pmap_pgtrace_page_t *p;
13141 queue_head_t *mapq;
13142 queue_head_t *mappool;
13143 queue_head_t *mapwaste;
13144 pmap_pgtrace_map_t *map;
13145
13146 p = kalloc(sizeof(pmap_pgtrace_page_t));
13147 assert(p);
13148
13149 p->state = UNDEFINED;
13150
13151 mapq = &(p->maps);
13152 mappool = &(p->map_pool);
13153 mapwaste = &(p->map_waste);
13154 queue_init(mapq);
13155 queue_init(mappool);
13156 queue_init(mapwaste);
13157
13158 for (int i = 0; i < PGTRACE_MAX_MAP; i++) {
13159 vm_map_offset_t newcva;
13160 pt_entry_t *cptep;
13161 kern_return_t kr;
13162 vm_map_entry_t entry;
13163
13164 // get a clone va
13165 vm_object_reference(kernel_object);
13166 kr = vm_map_find_space(kernel_map, &newcva, vm_map_round_page(3 * ARM_PGBYTES, PAGE_MASK), 0, 0, VM_MAP_KERNEL_FLAGS_NONE, VM_KERN_MEMORY_DIAG, &entry);
13167 if (kr != KERN_SUCCESS) {
13168 panic("%s VM couldn't find any space kr=%d\n", __func__, kr);
13169 }
13170 VME_OBJECT_SET(entry, kernel_object);
13171 VME_OFFSET_SET(entry, newcva);
13172 vm_map_unlock(kernel_map);
13173
13174 // fill default clone page info and add to pool
13175 map = kalloc(sizeof(pmap_pgtrace_map_t));
13176 for (int j = 0; j < 3; j++) {
13177 vm_map_offset_t addr = newcva + j * ARM_PGBYTES;
13178
13179 // pre-expand pmap while preemption enabled
13180 kr = pmap_expand(kernel_pmap, addr, 0, PMAP_TT_MAX_LEVEL);
13181 if (kr != KERN_SUCCESS) {
13182 panic("%s: pmap_expand(kernel_pmap, addr=%llx) returns kr=%d\n", __func__, addr, kr);
13183 }
13184
13185 cptep = pmap_pte(kernel_pmap, addr);
13186 assert(cptep != NULL);
13187
13188 map->cva[j] = addr;
13189 map->cva_spte[j] = *cptep;
13190 }
13191 map->range.start = map->range.end = 0;
13192 map->cloned = false;
13193 queue_enter_first(mappool, map, pmap_pgtrace_map_t *, chain);
13194 }
13195
13196 return p;
13197 }
13198
13199 // free a page info
13200 static void
13201 pmap_pgtrace_free_page(pmap_pgtrace_page_t *p)
13202 {
13203 queue_head_t *mapq;
13204 queue_head_t *mappool;
13205 queue_head_t *mapwaste;
13206 pmap_pgtrace_map_t *map;
13207
13208 assert(p);
13209
13210 mapq = &(p->maps);
13211 mappool = &(p->map_pool);
13212 mapwaste = &(p->map_waste);
13213
13214 while (!queue_empty(mapq)) {
13215 queue_remove_first(mapq, map, pmap_pgtrace_map_t *, chain);
13216 kfree(map, sizeof(pmap_pgtrace_map_t));
13217 }
13218
13219 while (!queue_empty(mappool)) {
13220 queue_remove_first(mappool, map, pmap_pgtrace_map_t *, chain);
13221 kfree(map, sizeof(pmap_pgtrace_map_t));
13222 }
13223
13224 while (!queue_empty(mapwaste)) {
13225 queue_remove_first(mapwaste, map, pmap_pgtrace_map_t *, chain);
13226 kfree(map, sizeof(pmap_pgtrace_map_t));
13227 }
13228
13229 kfree(p, sizeof(pmap_pgtrace_page_t));
13230 }
13231
13232 // construct page infos with the given address range
13233 int
13234 pmap_pgtrace_add_page(pmap_t pmap, vm_map_offset_t start, vm_map_offset_t end)
13235 {
13236 int ret = 0;
13237 pt_entry_t *ptep;
13238 queue_head_t *q = &(pmap_pgtrace.pages);
13239 bool ints;
13240 vm_map_offset_t cur_page, end_page;
13241
13242 if (start > end) {
13243 kprintf("%s: invalid start=%llx > end=%llx\n", __func__, start, end);
13244 return -1;
13245 }
13246
13247 PROF_START
13248
13249 // add each page in given range
13250 cur_page = arm_trunc_page(start);
13251 end_page = arm_trunc_page(end);
13252 while (cur_page <= end_page) {
13253 pmap_paddr_t pa_page = 0;
13254 uint64_t num_cloned = 0;
13255 pmap_pgtrace_page_t *p = NULL, *newp;
13256 bool free_newp = true;
13257 pmap_pgtrace_page_state_t state;
13258
13259 // do all allocations outside of spinlocks
13260 newp = pmap_pgtrace_alloc_page();
13261
13262 // keep lock orders in pmap, kernel_pmap and pgtrace lock
13263 if (pmap != NULL) {
13264 PMAP_LOCK(pmap);
13265 }
13266 if (pmap != kernel_pmap) {
13267 PMAP_LOCK(kernel_pmap);
13268 }
13269
13270 // addresses are physical if pmap is null
13271 if (pmap == NULL) {
13272 ptep = NULL;
13273 pa_page = cur_page;
13274 state = VA_UNDEFINED;
13275 } else {
13276 ptep = pmap_pte(pmap, cur_page);
13277 if (ptep != NULL) {
13278 pa_page = pte_to_pa(*ptep);
13279 state = DEFINED;
13280 } else {
13281 state = PA_UNDEFINED;
13282 }
13283 }
13284
13285 // search if we have a page info already
13286 PMAP_PGTRACE_LOCK(&ints);
13287 if (state != PA_UNDEFINED) {
13288 p = pmap_pgtrace_find_page(pa_page);
13289 }
13290
13291 // add pre-allocated page info if nothing found
13292 if (p == NULL) {
13293 queue_enter_first(q, newp, pmap_pgtrace_page_t *, chain);
13294 p = newp;
13295 free_newp = false;
13296 }
13297
13298 // now p points what we want
13299 p->state = state;
13300
13301 queue_head_t *mapq = &(p->maps);
13302 queue_head_t *mappool = &(p->map_pool);
13303 pmap_pgtrace_map_t *map;
13304 vm_map_offset_t start_offset, end_offset;
13305
13306 // calculate trace offsets in the page
13307 if (cur_page > start) {
13308 start_offset = 0;
13309 } else {
13310 start_offset = start - cur_page;
13311 }
13312 if (cur_page == end_page) {
13313 end_offset = end - end_page;
13314 } else {
13315 end_offset = ARM_PGBYTES - 1;
13316 }
13317
13318 kprintf("%s: pmap=%p cur_page=%llx ptep=%p state=%d start_offset=%llx end_offset=%llx\n", __func__, pmap, cur_page, ptep, state, start_offset, end_offset);
13319
13320 // fill map info
13321 assert(!queue_empty(mappool));
13322 queue_remove_first(mappool, map, pmap_pgtrace_map_t *, chain);
13323 if (p->state == PA_UNDEFINED) {
13324 map->pmap = pmap;
13325 map->ova = cur_page;
13326 map->range.start = start_offset;
13327 map->range.end = end_offset;
13328 } else if (p->state == VA_UNDEFINED) {
13329 p->pa = pa_page;
13330 map->range.start = start_offset;
13331 map->range.end = end_offset;
13332 } else if (p->state == DEFINED) {
13333 p->pa = pa_page;
13334 map->pmap = pmap;
13335 map->ova = cur_page;
13336 map->range.start = start_offset;
13337 map->range.end = end_offset;
13338 } else {
13339 panic("invalid p->state=%d\n", p->state);
13340 }
13341
13342 // not cloned yet
13343 map->cloned = false;
13344 queue_enter(mapq, map, pmap_pgtrace_map_t *, chain);
13345
13346 // unlock locks
13347 PMAP_PGTRACE_UNLOCK(&ints);
13348 if (pmap != kernel_pmap) {
13349 PMAP_UNLOCK(kernel_pmap);
13350 }
13351 if (pmap != NULL) {
13352 PMAP_UNLOCK(pmap);
13353 }
13354
13355 // now clone it
13356 if (pa_valid(pa_page)) {
13357 num_cloned = pmap_pgtrace_clone_from_pvtable(pa_page, start_offset, end_offset);
13358 }
13359 if (pmap == NULL) {
13360 num_cloned += pmap_pgtrace_clone_from_pa(kernel_pmap, pa_page, start_offset, end_offset);
13361 } else {
13362 num_cloned += pmap_pgtrace_clone_from_pa(pmap, pa_page, start_offset, end_offset);
13363 }
13364
13365 // free pre-allocations if we didn't add it to the q
13366 if (free_newp) {
13367 pmap_pgtrace_free_page(newp);
13368 }
13369
13370 if (num_cloned == 0) {
13371 kprintf("%s: no mapping found for pa_page=%llx but will be added when a page entered\n", __func__, pa_page);
13372 }
13373
13374 ret += num_cloned;
13375
13376 // overflow
13377 if (cur_page + ARM_PGBYTES < cur_page) {
13378 break;
13379 } else {
13380 cur_page += ARM_PGBYTES;
13381 }
13382 }
13383
13384 PROF_END
13385
13386 return ret;
13387 }
13388
13389 // delete page infos for given address range
13390 int
13391 pmap_pgtrace_delete_page(pmap_t pmap, vm_map_offset_t start, vm_map_offset_t end)
13392 {
13393 int ret = 0;
13394 bool ints;
13395 queue_head_t *q = &(pmap_pgtrace.pages);
13396 pmap_pgtrace_page_t *p;
13397 vm_map_offset_t cur_page, end_page;
13398
13399 kprintf("%s start=%llx end=%llx\n", __func__, start, end);
13400
13401 PROF_START
13402
13403 pt_entry_t *ptep;
13404 pmap_paddr_t pa_page;
13405
13406 // remove page info from start to end
13407 cur_page = arm_trunc_page(start);
13408 end_page = arm_trunc_page(end);
13409 while (cur_page <= end_page) {
13410 p = NULL;
13411
13412 if (pmap == NULL) {
13413 pa_page = cur_page;
13414 } else {
13415 PMAP_LOCK(pmap);
13416 ptep = pmap_pte(pmap, cur_page);
13417 if (ptep == NULL) {
13418 PMAP_UNLOCK(pmap);
13419 goto cont;
13420 }
13421 pa_page = pte_to_pa(*ptep);
13422 PMAP_UNLOCK(pmap);
13423 }
13424
13425 // remove all clones and validate
13426 pmap_pgtrace_remove_all_clone(pa_page);
13427
13428 // find page info and delete
13429 PMAP_PGTRACE_LOCK(&ints);
13430 p = pmap_pgtrace_find_page(pa_page);
13431 if (p != NULL) {
13432 queue_remove(q, p, pmap_pgtrace_page_t *, chain);
13433 ret++;
13434 }
13435 PMAP_PGTRACE_UNLOCK(&ints);
13436
13437 // free outside of locks
13438 if (p != NULL) {
13439 pmap_pgtrace_free_page(p);
13440 }
13441
13442 cont:
13443 // overflow
13444 if (cur_page + ARM_PGBYTES < cur_page) {
13445 break;
13446 } else {
13447 cur_page += ARM_PGBYTES;
13448 }
13449 }
13450
13451 PROF_END
13452
13453 return ret;
13454 }
13455
13456 kern_return_t
13457 pmap_pgtrace_fault(pmap_t pmap, vm_map_offset_t va, arm_saved_state_t *ss)
13458 {
13459 pt_entry_t *ptep;
13460 pgtrace_run_result_t res;
13461 pmap_pgtrace_page_t *p;
13462 bool ints, found = false;
13463 pmap_paddr_t pa;
13464
13465 // Quick check if we are interested
13466 ptep = pmap_pte(pmap, va);
13467 if (!ptep || !(*ptep & ARM_PTE_PGTRACE)) {
13468 return KERN_FAILURE;
13469 }
13470
13471 PMAP_PGTRACE_LOCK(&ints);
13472
13473 // Check again since access is serialized
13474 ptep = pmap_pte(pmap, va);
13475 if (!ptep || !(*ptep & ARM_PTE_PGTRACE)) {
13476 PMAP_PGTRACE_UNLOCK(&ints);
13477 return KERN_FAILURE;
13478 } else if ((*ptep & ARM_PTE_TYPE_VALID) == ARM_PTE_TYPE_VALID) {
13479 // Somehow this cpu's tlb has not updated
13480 kprintf("%s Somehow this cpu's tlb has not updated?\n", __func__);
13481 PMAP_UPDATE_TLBS(pmap, va, va + ARM_PGBYTES, false);
13482
13483 PMAP_PGTRACE_UNLOCK(&ints);
13484 return KERN_SUCCESS;
13485 }
13486
13487 // Find if this pa is what we are tracing
13488 pa = pte_to_pa(*ptep);
13489
13490 p = pmap_pgtrace_find_page(arm_trunc_page(pa));
13491 if (p == NULL) {
13492 panic("%s Can't find va=%llx pa=%llx from tracing pages\n", __func__, va, pa);
13493 }
13494
13495 // find if pmap and va are also matching
13496 queue_head_t *mapq = &(p->maps);
13497 queue_head_t *mapwaste = &(p->map_waste);
13498 pmap_pgtrace_map_t *map;
13499
13500 queue_iterate(mapq, map, pmap_pgtrace_map_t *, chain) {
13501 if (map->pmap == pmap && map->ova == arm_trunc_page(va)) {
13502 found = true;
13503 break;
13504 }
13505 }
13506
13507 // if not found, search map waste as they are still valid
13508 if (!found) {
13509 queue_iterate(mapwaste, map, pmap_pgtrace_map_t *, chain) {
13510 if (map->pmap == pmap && map->ova == arm_trunc_page(va)) {
13511 found = true;
13512 break;
13513 }
13514 }
13515 }
13516
13517 if (!found) {
13518 panic("%s Can't find va=%llx pa=%llx from tracing pages\n", __func__, va, pa);
13519 }
13520
13521 // Decode and run it on the clone map
13522 bzero(&res, sizeof(res));
13523 pgtrace_decode_and_run(*(uint32_t *)get_saved_state_pc(ss), // instruction
13524 va, map->cva, // fault va and clone page vas
13525 ss, &res);
13526
13527 // write a log if in range
13528 vm_map_offset_t offset = va - map->ova;
13529 if (map->range.start <= offset && offset <= map->range.end) {
13530 pgtrace_write_log(res);
13531 }
13532
13533 PMAP_PGTRACE_UNLOCK(&ints);
13534
13535 // Return to next instruction
13536 add_saved_state_pc(ss, sizeof(uint32_t));
13537
13538 return KERN_SUCCESS;
13539 }
13540 #endif
13541
13542 boolean_t
13543 pmap_enforces_execute_only(
13544 #if (__ARM_VMSA__ == 7)
13545 __unused
13546 #endif
13547 pmap_t pmap)
13548 {
13549 #if (__ARM_VMSA__ > 7)
13550 return pmap != kernel_pmap;
13551 #else
13552 return FALSE;
13553 #endif
13554 }
13555
13556 MARK_AS_PMAP_TEXT void
13557 pmap_set_jit_entitled_internal(
13558 __unused pmap_t pmap)
13559 {
13560 return;
13561 }
13562
13563 void
13564 pmap_set_jit_entitled(
13565 pmap_t pmap)
13566 {
13567 #if XNU_MONITOR
13568 pmap_set_jit_entitled_ppl(pmap);
13569 #else
13570 pmap_set_jit_entitled_internal(pmap);
13571 #endif
13572 }
13573
13574 MARK_AS_PMAP_TEXT static kern_return_t
13575 pmap_query_page_info_internal(
13576 pmap_t pmap,
13577 vm_map_offset_t va,
13578 int *disp_p)
13579 {
13580 pmap_paddr_t pa;
13581 int disp;
13582 int pai;
13583 pt_entry_t *pte;
13584 pv_entry_t **pv_h, *pve_p;
13585
13586 if (pmap == PMAP_NULL || pmap == kernel_pmap) {
13587 pmap_pin_kernel_pages((vm_offset_t)disp_p, sizeof(*disp_p));
13588 *disp_p = 0;
13589 pmap_unpin_kernel_pages((vm_offset_t)disp_p, sizeof(*disp_p));
13590 return KERN_INVALID_ARGUMENT;
13591 }
13592
13593 disp = 0;
13594
13595 VALIDATE_PMAP(pmap);
13596 PMAP_LOCK(pmap);
13597
13598 pte = pmap_pte(pmap, va);
13599 if (pte == PT_ENTRY_NULL) {
13600 goto done;
13601 }
13602
13603 pa = pte_to_pa(*pte);
13604 if (pa == 0) {
13605 if (ARM_PTE_IS_COMPRESSED(*pte, pte)) {
13606 disp |= PMAP_QUERY_PAGE_COMPRESSED;
13607 if (*pte & ARM_PTE_COMPRESSED_ALT) {
13608 disp |= PMAP_QUERY_PAGE_COMPRESSED_ALTACCT;
13609 }
13610 }
13611 } else {
13612 disp |= PMAP_QUERY_PAGE_PRESENT;
13613 pai = (int) pa_index(pa);
13614 if (!pa_valid(pa)) {
13615 goto done;
13616 }
13617 LOCK_PVH(pai);
13618 pv_h = pai_to_pvh(pai);
13619 pve_p = PV_ENTRY_NULL;
13620 if (pvh_test_type(pv_h, PVH_TYPE_PVEP)) {
13621 pve_p = pvh_list(pv_h);
13622 while (pve_p != PV_ENTRY_NULL &&
13623 pve_get_ptep(pve_p) != pte) {
13624 pve_p = PVE_NEXT_PTR(pve_next(pve_p));
13625 }
13626 }
13627 if (IS_ALTACCT_PAGE(pai, pve_p)) {
13628 disp |= PMAP_QUERY_PAGE_ALTACCT;
13629 } else if (IS_REUSABLE_PAGE(pai)) {
13630 disp |= PMAP_QUERY_PAGE_REUSABLE;
13631 } else if (IS_INTERNAL_PAGE(pai)) {
13632 disp |= PMAP_QUERY_PAGE_INTERNAL;
13633 }
13634 UNLOCK_PVH(pai);
13635 }
13636
13637 done:
13638 PMAP_UNLOCK(pmap);
13639 pmap_pin_kernel_pages((vm_offset_t)disp_p, sizeof(*disp_p));
13640 *disp_p = disp;
13641 pmap_unpin_kernel_pages((vm_offset_t)disp_p, sizeof(*disp_p));
13642 return KERN_SUCCESS;
13643 }
13644
13645 kern_return_t
13646 pmap_query_page_info(
13647 pmap_t pmap,
13648 vm_map_offset_t va,
13649 int *disp_p)
13650 {
13651 #if XNU_MONITOR
13652 return pmap_query_page_info_ppl(pmap, va, disp_p);
13653 #else
13654 return pmap_query_page_info_internal(pmap, va, disp_p);
13655 #endif
13656 }
13657
13658 MARK_AS_PMAP_TEXT kern_return_t
13659 pmap_return_internal(__unused boolean_t do_panic, __unused boolean_t do_recurse)
13660 {
13661
13662 return KERN_SUCCESS;
13663 }
13664
13665 kern_return_t
13666 pmap_return(boolean_t do_panic, boolean_t do_recurse)
13667 {
13668 #if XNU_MONITOR
13669 return pmap_return_ppl(do_panic, do_recurse);
13670 #else
13671 return pmap_return_internal(do_panic, do_recurse);
13672 #endif
13673 }
13674
13675
13676
13677
13678 MARK_AS_PMAP_TEXT static void
13679 pmap_footprint_suspend_internal(
13680 vm_map_t map,
13681 boolean_t suspend)
13682 {
13683 #if DEVELOPMENT || DEBUG
13684 if (suspend) {
13685 current_thread()->pmap_footprint_suspended = TRUE;
13686 map->pmap->footprint_was_suspended = TRUE;
13687 } else {
13688 current_thread()->pmap_footprint_suspended = FALSE;
13689 }
13690 #else /* DEVELOPMENT || DEBUG */
13691 (void) map;
13692 (void) suspend;
13693 #endif /* DEVELOPMENT || DEBUG */
13694 }
13695
13696 void
13697 pmap_footprint_suspend(
13698 vm_map_t map,
13699 boolean_t suspend)
13700 {
13701 #if XNU_MONITOR
13702 pmap_footprint_suspend_ppl(map, suspend);
13703 #else
13704 pmap_footprint_suspend_internal(map, suspend);
13705 #endif
13706 }
13707
13708 #if defined(__arm64__) && (DEVELOPMENT || DEBUG)
13709
13710 struct page_table_dump_header {
13711 uint64_t pa;
13712 uint64_t num_entries;
13713 uint64_t start_va;
13714 uint64_t end_va;
13715 };
13716
13717 static size_t
13718 pmap_dump_page_tables_recurse(pmap_t pmap,
13719 const tt_entry_t *ttp,
13720 unsigned int cur_level,
13721 uint64_t start_va,
13722 void *bufp,
13723 void *buf_end)
13724 {
13725 size_t bytes_used = 0;
13726 uint64_t num_entries = ARM_PGBYTES / sizeof(*ttp);
13727 const pt_attr_t * const pt_attr = pmap_get_pt_attr(pmap);
13728
13729 uint64_t size = pt_attr->pta_level_info[cur_level].size;
13730 uint64_t valid_mask = pt_attr->pta_level_info[cur_level].valid_mask;
13731 uint64_t type_mask = pt_attr->pta_level_info[cur_level].type_mask;
13732 uint64_t type_block = pt_attr->pta_level_info[cur_level].type_block;
13733
13734 if (cur_level == arm64_root_pgtable_level) {
13735 num_entries = arm64_root_pgtable_num_ttes;
13736 }
13737
13738 uint64_t tt_size = num_entries * sizeof(tt_entry_t);
13739 const tt_entry_t *tt_end = &ttp[num_entries];
13740
13741 if (((vm_offset_t)buf_end - (vm_offset_t)bufp) < (tt_size + sizeof(struct page_table_dump_header))) {
13742 return 0;
13743 }
13744
13745 struct page_table_dump_header *header = (struct page_table_dump_header*)bufp;
13746 header->pa = ml_static_vtop((vm_offset_t)ttp);
13747 header->num_entries = num_entries;
13748 header->start_va = start_va;
13749 header->end_va = start_va + (num_entries * size);
13750
13751 bcopy(ttp, (uint8_t*)bufp + sizeof(*header), tt_size);
13752 bytes_used += (sizeof(*header) + tt_size);
13753 uint64_t current_va = start_va;
13754
13755 for (const tt_entry_t *ttep = ttp; ttep < tt_end; ttep++, current_va += size) {
13756 tt_entry_t tte = *ttep;
13757
13758 if (!(tte & valid_mask)) {
13759 continue;
13760 }
13761
13762 if ((tte & type_mask) == type_block) {
13763 continue;
13764 } else {
13765 if (cur_level >= PMAP_TT_MAX_LEVEL) {
13766 panic("%s: corrupt entry %#llx at %p, "
13767 "ttp=%p, cur_level=%u, bufp=%p, buf_end=%p",
13768 __FUNCTION__, tte, ttep,
13769 ttp, cur_level, bufp, buf_end);
13770 }
13771
13772 const tt_entry_t *next_tt = (const tt_entry_t*)phystokv(tte & ARM_TTE_TABLE_MASK);
13773
13774 size_t recurse_result = pmap_dump_page_tables_recurse(pmap, next_tt, cur_level + 1, current_va, (uint8_t*)bufp + bytes_used, buf_end);
13775
13776 if (recurse_result == 0) {
13777 return 0;
13778 }
13779
13780 bytes_used += recurse_result;
13781 }
13782 }
13783
13784 return bytes_used;
13785 }
13786
13787 size_t
13788 pmap_dump_page_tables(pmap_t pmap, void *bufp, void *buf_end)
13789 {
13790 if (not_in_kdp) {
13791 panic("pmap_dump_page_tables must only be called from kernel debugger context");
13792 }
13793 return pmap_dump_page_tables_recurse(pmap, pmap->tte, arm64_root_pgtable_level, pmap->min, bufp, buf_end);
13794 }
13795
13796 #else /* defined(__arm64__) && (DEVELOPMENT || DEBUG) */
13797
13798 size_t
13799 pmap_dump_page_tables(pmap_t pmap __unused, void *bufp __unused, void *buf_end __unused)
13800 {
13801 return (size_t)-1;
13802 }
13803
13804 #endif /* !defined(__arm64__) */