]> git.saurik.com Git - apple/xnu.git/blob - osfmk/arm/pmap.c
92337e64c3dbc685bd4692d11d8b59c230c1fcba
[apple/xnu.git] / osfmk / arm / pmap.c
1 /*
2 * Copyright (c) 2011-2020 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28 #include <string.h>
29 #include <mach_assert.h>
30 #include <mach_ldebug.h>
31
32 #include <mach/shared_region.h>
33 #include <mach/vm_param.h>
34 #include <mach/vm_prot.h>
35 #include <mach/vm_map.h>
36 #include <mach/machine/vm_param.h>
37 #include <mach/machine/vm_types.h>
38
39 #include <mach/boolean.h>
40 #include <kern/bits.h>
41 #include <kern/thread.h>
42 #include <kern/sched.h>
43 #include <kern/zalloc.h>
44 #include <kern/kalloc.h>
45 #include <kern/ledger.h>
46 #include <kern/spl.h>
47 #include <kern/trustcache.h>
48
49 #include <os/overflow.h>
50
51 #include <vm/pmap.h>
52 #include <vm/vm_map.h>
53 #include <vm/vm_kern.h>
54 #include <vm/vm_protos.h>
55 #include <vm/vm_object.h>
56 #include <vm/vm_page.h>
57 #include <vm/vm_pageout.h>
58 #include <vm/cpm.h>
59
60 #include <libkern/img4/interface.h>
61 #include <libkern/section_keywords.h>
62 #include <sys/errno.h>
63
64 #include <machine/atomic.h>
65 #include <machine/thread.h>
66 #include <machine/lowglobals.h>
67
68 #include <arm/caches_internal.h>
69 #include <arm/cpu_data.h>
70 #include <arm/cpu_data_internal.h>
71 #include <arm/cpu_capabilities.h>
72 #include <arm/cpu_number.h>
73 #include <arm/machine_cpu.h>
74 #include <arm/misc_protos.h>
75 #include <arm/trap.h>
76
77 #if (__ARM_VMSA__ > 7)
78 #include <arm64/proc_reg.h>
79 #include <pexpert/arm64/boot.h>
80 #if CONFIG_PGTRACE
81 #include <stdint.h>
82 #include <arm64/pgtrace.h>
83 #if CONFIG_PGTRACE_NONKEXT
84 #include <arm64/pgtrace_decoder.h>
85 #endif // CONFIG_PGTRACE_NONKEXT
86 #endif // CONFIG_PGTRACE
87 #if defined(KERNEL_INTEGRITY_KTRR) || defined(KERNEL_INTEGRITY_CTRR)
88 #include <arm64/amcc_rorgn.h>
89 #endif // defined(KERNEL_INTEGRITY_KTRR) || defined(KERNEL_INTEGRITY_CTRR)
90 #endif
91
92 #include <pexpert/device_tree.h>
93
94 #include <san/kasan.h>
95 #include <sys/cdefs.h>
96
97 #if defined(HAS_APPLE_PAC)
98 #include <ptrauth.h>
99 #endif
100
101 #ifdef CONFIG_XNUPOST
102 #include <tests/xnupost.h>
103 #endif
104
105
106 #if HIBERNATION
107 #include <IOKit/IOHibernatePrivate.h>
108 #endif /* HIBERNATION */
109
110 #define PMAP_TT_L0_LEVEL 0x0
111 #define PMAP_TT_L1_LEVEL 0x1
112 #define PMAP_TT_L2_LEVEL 0x2
113 #define PMAP_TT_L3_LEVEL 0x3
114
115 #ifdef __ARM64_PMAP_SUBPAGE_L1__
116 #if (__ARM_VMSA__ <= 7)
117 #error This is not supported for old-style page tables
118 #endif
119 #define PMAP_ROOT_ALLOC_SIZE (((ARM_TT_L1_INDEX_MASK >> ARM_TT_L1_SHIFT) + 1) * sizeof(tt_entry_t))
120 #else
121 #if (__ARM_VMSA__ <= 7)
122 #define PMAP_ROOT_ALLOC_SIZE (ARM_PGBYTES * 2)
123 #else
124 #define PMAP_ROOT_ALLOC_SIZE (ARM_PGBYTES)
125 #endif
126 #endif
127
128 extern u_int32_t random(void); /* from <libkern/libkern.h> */
129
130 static bool alloc_asid(pmap_t pmap);
131 static void free_asid(pmap_t pmap);
132 static void flush_mmu_tlb_region_asid_async(vm_offset_t va, size_t length, pmap_t pmap);
133 static void flush_mmu_tlb_tte_asid_async(vm_offset_t va, pmap_t pmap);
134 static void flush_mmu_tlb_full_asid_async(pmap_t pmap);
135 static pt_entry_t wimg_to_pte(unsigned int wimg);
136
137 struct page_table_ops {
138 bool (*alloc_id)(pmap_t pmap);
139 void (*free_id)(pmap_t pmap);
140 void (*flush_tlb_region_async)(vm_offset_t va, size_t length, pmap_t pmap);
141 void (*flush_tlb_tte_async)(vm_offset_t va, pmap_t pmap);
142 void (*flush_tlb_async)(pmap_t pmap);
143 pt_entry_t (*wimg_to_pte)(unsigned int wimg);
144 };
145
146 static const struct page_table_ops native_pt_ops =
147 {
148 .alloc_id = alloc_asid,
149 .free_id = free_asid,
150 .flush_tlb_region_async = flush_mmu_tlb_region_asid_async,
151 .flush_tlb_tte_async = flush_mmu_tlb_tte_asid_async,
152 .flush_tlb_async = flush_mmu_tlb_full_asid_async,
153 .wimg_to_pte = wimg_to_pte,
154 };
155
156 #if (__ARM_VMSA__ > 7)
157 const struct page_table_level_info pmap_table_level_info_16k[] =
158 {
159 [0] = {
160 .size = ARM_16K_TT_L0_SIZE,
161 .offmask = ARM_16K_TT_L0_OFFMASK,
162 .shift = ARM_16K_TT_L0_SHIFT,
163 .index_mask = ARM_16K_TT_L0_INDEX_MASK,
164 .valid_mask = ARM_TTE_VALID,
165 .type_mask = ARM_TTE_TYPE_MASK,
166 .type_block = ARM_TTE_TYPE_BLOCK
167 },
168 [1] = {
169 .size = ARM_16K_TT_L1_SIZE,
170 .offmask = ARM_16K_TT_L1_OFFMASK,
171 .shift = ARM_16K_TT_L1_SHIFT,
172 .index_mask = ARM_16K_TT_L1_INDEX_MASK,
173 .valid_mask = ARM_TTE_VALID,
174 .type_mask = ARM_TTE_TYPE_MASK,
175 .type_block = ARM_TTE_TYPE_BLOCK
176 },
177 [2] = {
178 .size = ARM_16K_TT_L2_SIZE,
179 .offmask = ARM_16K_TT_L2_OFFMASK,
180 .shift = ARM_16K_TT_L2_SHIFT,
181 .index_mask = ARM_16K_TT_L2_INDEX_MASK,
182 .valid_mask = ARM_TTE_VALID,
183 .type_mask = ARM_TTE_TYPE_MASK,
184 .type_block = ARM_TTE_TYPE_BLOCK
185 },
186 [3] = {
187 .size = ARM_16K_TT_L3_SIZE,
188 .offmask = ARM_16K_TT_L3_OFFMASK,
189 .shift = ARM_16K_TT_L3_SHIFT,
190 .index_mask = ARM_16K_TT_L3_INDEX_MASK,
191 .valid_mask = ARM_PTE_TYPE_VALID,
192 .type_mask = ARM_PTE_TYPE_MASK,
193 .type_block = ARM_TTE_TYPE_L3BLOCK
194 }
195 };
196
197 const struct page_table_level_info pmap_table_level_info_4k[] =
198 {
199 [0] = {
200 .size = ARM_4K_TT_L0_SIZE,
201 .offmask = ARM_4K_TT_L0_OFFMASK,
202 .shift = ARM_4K_TT_L0_SHIFT,
203 .index_mask = ARM_4K_TT_L0_INDEX_MASK,
204 .valid_mask = ARM_TTE_VALID,
205 .type_mask = ARM_TTE_TYPE_MASK,
206 .type_block = ARM_TTE_TYPE_BLOCK
207 },
208 [1] = {
209 .size = ARM_4K_TT_L1_SIZE,
210 .offmask = ARM_4K_TT_L1_OFFMASK,
211 .shift = ARM_4K_TT_L1_SHIFT,
212 .index_mask = ARM_4K_TT_L1_INDEX_MASK,
213 .valid_mask = ARM_TTE_VALID,
214 .type_mask = ARM_TTE_TYPE_MASK,
215 .type_block = ARM_TTE_TYPE_BLOCK
216 },
217 [2] = {
218 .size = ARM_4K_TT_L2_SIZE,
219 .offmask = ARM_4K_TT_L2_OFFMASK,
220 .shift = ARM_4K_TT_L2_SHIFT,
221 .index_mask = ARM_4K_TT_L2_INDEX_MASK,
222 .valid_mask = ARM_TTE_VALID,
223 .type_mask = ARM_TTE_TYPE_MASK,
224 .type_block = ARM_TTE_TYPE_BLOCK
225 },
226 [3] = {
227 .size = ARM_4K_TT_L3_SIZE,
228 .offmask = ARM_4K_TT_L3_OFFMASK,
229 .shift = ARM_4K_TT_L3_SHIFT,
230 .index_mask = ARM_4K_TT_L3_INDEX_MASK,
231 .valid_mask = ARM_PTE_TYPE_VALID,
232 .type_mask = ARM_PTE_TYPE_MASK,
233 .type_block = ARM_TTE_TYPE_L3BLOCK
234 }
235 };
236
237 struct page_table_attr {
238 const struct page_table_level_info * const pta_level_info;
239 const struct page_table_ops * const pta_ops;
240 const uintptr_t ap_ro;
241 const uintptr_t ap_rw;
242 const uintptr_t ap_rona;
243 const uintptr_t ap_rwna;
244 const uintptr_t ap_xn;
245 const uintptr_t ap_x;
246 const unsigned int pta_root_level;
247 const unsigned int pta_sharedpage_level;
248 const unsigned int pta_max_level;
249 #if __ARM_MIXED_PAGE_SIZE__
250 const uint64_t pta_tcr_value;
251 #endif /* __ARM_MIXED_PAGE_SIZE__ */
252 const uint64_t pta_page_size;
253 const uint64_t pta_page_shift;
254 };
255
256 const struct page_table_attr pmap_pt_attr_4k = {
257 .pta_level_info = pmap_table_level_info_4k,
258 .pta_root_level = (T0SZ_BOOT - 16) / 9,
259 #if __ARM_MIXED_PAGE_SIZE__
260 .pta_sharedpage_level = PMAP_TT_L2_LEVEL,
261 #else /* __ARM_MIXED_PAGE_SIZE__ */
262 #if __ARM_16K_PG__
263 .pta_sharedpage_level = PMAP_TT_L2_LEVEL,
264 #else /* __ARM_16K_PG__ */
265 .pta_sharedpage_level = PMAP_TT_L1_LEVEL,
266 #endif /* __ARM_16K_PG__ */
267 #endif /* __ARM_MIXED_PAGE_SIZE__ */
268 .pta_max_level = PMAP_TT_L3_LEVEL,
269 .pta_ops = &native_pt_ops,
270 .ap_ro = ARM_PTE_AP(AP_RORO),
271 .ap_rw = ARM_PTE_AP(AP_RWRW),
272 .ap_rona = ARM_PTE_AP(AP_RONA),
273 .ap_rwna = ARM_PTE_AP(AP_RWNA),
274 .ap_xn = ARM_PTE_PNX | ARM_PTE_NX,
275 .ap_x = ARM_PTE_PNX,
276 #if __ARM_MIXED_PAGE_SIZE__
277 .pta_tcr_value = TCR_EL1_4KB,
278 #endif /* __ARM_MIXED_PAGE_SIZE__ */
279 .pta_page_size = 4096,
280 .pta_page_shift = 12,
281 };
282
283 const struct page_table_attr pmap_pt_attr_16k = {
284 .pta_level_info = pmap_table_level_info_16k,
285 .pta_root_level = PMAP_TT_L1_LEVEL,
286 .pta_sharedpage_level = PMAP_TT_L2_LEVEL,
287 .pta_max_level = PMAP_TT_L3_LEVEL,
288 .pta_ops = &native_pt_ops,
289 .ap_ro = ARM_PTE_AP(AP_RORO),
290 .ap_rw = ARM_PTE_AP(AP_RWRW),
291 .ap_rona = ARM_PTE_AP(AP_RONA),
292 .ap_rwna = ARM_PTE_AP(AP_RWNA),
293 .ap_xn = ARM_PTE_PNX | ARM_PTE_NX,
294 .ap_x = ARM_PTE_PNX,
295 #if __ARM_MIXED_PAGE_SIZE__
296 .pta_tcr_value = TCR_EL1_16KB,
297 #endif /* __ARM_MIXED_PAGE_SIZE__ */
298 .pta_page_size = 16384,
299 .pta_page_shift = 14,
300 };
301
302 #if __ARM_16K_PG__
303 const struct page_table_attr * const native_pt_attr = &pmap_pt_attr_16k;
304 #else /* !__ARM_16K_PG__ */
305 const struct page_table_attr * const native_pt_attr = &pmap_pt_attr_4k;
306 #endif /* !__ARM_16K_PG__ */
307
308
309 #else /* (__ARM_VMSA__ > 7) */
310 /*
311 * We don't support pmap parameterization for VMSA7, so use an opaque
312 * page_table_attr structure.
313 */
314 const struct page_table_attr * const native_pt_attr = NULL;
315 #endif /* (__ARM_VMSA__ > 7) */
316
317 typedef struct page_table_attr pt_attr_t;
318
319 /* Macro for getting pmap attributes; not a function for const propagation. */
320 #if ARM_PARAMETERIZED_PMAP
321 /* The page table attributes are linked to the pmap */
322 #define pmap_get_pt_attr(pmap) ((pmap)->pmap_pt_attr)
323 #define pmap_get_pt_ops(pmap) ((pmap)->pmap_pt_attr->pta_ops)
324 #else /* !ARM_PARAMETERIZED_PMAP */
325 /* The page table attributes are fixed (to allow for const propagation) */
326 #define pmap_get_pt_attr(pmap) (native_pt_attr)
327 #define pmap_get_pt_ops(pmap) (&native_pt_ops)
328 #endif /* !ARM_PARAMETERIZED_PMAP */
329
330 #if (__ARM_VMSA__ > 7)
331 static inline uint64_t
332 pt_attr_page_size(const pt_attr_t * const pt_attr)
333 {
334 return pt_attr->pta_page_size;
335 }
336
337 __unused static inline uint64_t
338 pt_attr_ln_size(const pt_attr_t * const pt_attr, unsigned int level)
339 {
340 return pt_attr->pta_level_info[level].size;
341 }
342
343 __unused static inline uint64_t
344 pt_attr_ln_shift(const pt_attr_t * const pt_attr, unsigned int level)
345 {
346 return pt_attr->pta_level_info[level].shift;
347 }
348
349 static inline uint64_t
350 pt_attr_ln_offmask(const pt_attr_t * const pt_attr, unsigned int level)
351 {
352 return pt_attr->pta_level_info[level].offmask;
353 }
354
355 __unused static inline uint64_t
356 pt_attr_ln_pt_offmask(const pt_attr_t * const pt_attr, unsigned int level)
357 {
358 return pt_attr_ln_offmask(pt_attr, level);
359 }
360
361 __unused static inline uint64_t
362 pt_attr_ln_index_mask(const pt_attr_t * const pt_attr, unsigned int level)
363 {
364 return pt_attr->pta_level_info[level].index_mask;
365 }
366
367 static inline unsigned int
368 pt_attr_twig_level(const pt_attr_t * const pt_attr)
369 {
370 return pt_attr->pta_max_level - 1;
371 }
372
373 static inline unsigned int
374 pt_attr_root_level(const pt_attr_t * const pt_attr)
375 {
376 return pt_attr->pta_root_level;
377 }
378
379 /**
380 * This is the level at which to copy a pt_entry from the sharedpage_pmap into
381 * the user pmap. Typically L1 for 4K pages, and L2 for 16K pages. In this way,
382 * the sharedpage's L2/L3 page tables are reused in every 4k task, whereas only
383 * the L3 page table is reused in 16K tasks.
384 */
385 static inline unsigned int
386 pt_attr_sharedpage_level(const pt_attr_t * const pt_attr)
387 {
388 return pt_attr->pta_sharedpage_level;
389 }
390
391 static __unused inline uint64_t
392 pt_attr_leaf_size(const pt_attr_t * const pt_attr)
393 {
394 return pt_attr->pta_level_info[pt_attr->pta_max_level].size;
395 }
396
397 static __unused inline uint64_t
398 pt_attr_leaf_offmask(const pt_attr_t * const pt_attr)
399 {
400 return pt_attr->pta_level_info[pt_attr->pta_max_level].offmask;
401 }
402
403 static inline uint64_t
404 pt_attr_leaf_shift(const pt_attr_t * const pt_attr)
405 {
406 return pt_attr->pta_level_info[pt_attr->pta_max_level].shift;
407 }
408
409 static __unused inline uint64_t
410 pt_attr_leaf_index_mask(const pt_attr_t * const pt_attr)
411 {
412 return pt_attr->pta_level_info[pt_attr->pta_max_level].index_mask;
413 }
414
415 static inline uint64_t
416 pt_attr_twig_size(const pt_attr_t * const pt_attr)
417 {
418 return pt_attr->pta_level_info[pt_attr->pta_max_level - 1].size;
419 }
420
421 static inline uint64_t
422 pt_attr_twig_offmask(const pt_attr_t * const pt_attr)
423 {
424 return pt_attr->pta_level_info[pt_attr->pta_max_level - 1].offmask;
425 }
426
427 static inline uint64_t
428 pt_attr_twig_shift(const pt_attr_t * const pt_attr)
429 {
430 return pt_attr->pta_level_info[pt_attr->pta_max_level - 1].shift;
431 }
432
433 static __unused inline uint64_t
434 pt_attr_twig_index_mask(const pt_attr_t * const pt_attr)
435 {
436 return pt_attr->pta_level_info[pt_attr->pta_max_level - 1].index_mask;
437 }
438
439 static inline uint64_t
440 pt_attr_leaf_table_size(const pt_attr_t * const pt_attr)
441 {
442 return pt_attr_twig_size(pt_attr);
443 }
444
445 static inline uint64_t
446 pt_attr_leaf_table_offmask(const pt_attr_t * const pt_attr)
447 {
448 return pt_attr_twig_offmask(pt_attr);
449 }
450
451 static inline uintptr_t
452 pt_attr_leaf_rw(const pt_attr_t * const pt_attr)
453 {
454 return pt_attr->ap_rw;
455 }
456
457 static inline uintptr_t
458 pt_attr_leaf_ro(const pt_attr_t * const pt_attr)
459 {
460 return pt_attr->ap_ro;
461 }
462
463 static inline uintptr_t
464 pt_attr_leaf_rona(const pt_attr_t * const pt_attr)
465 {
466 return pt_attr->ap_rona;
467 }
468
469 static inline uintptr_t
470 pt_attr_leaf_rwna(const pt_attr_t * const pt_attr)
471 {
472 return pt_attr->ap_rwna;
473 }
474
475 static inline uintptr_t
476 pt_attr_leaf_xn(const pt_attr_t * const pt_attr)
477 {
478 return pt_attr->ap_xn;
479 }
480
481 static inline uintptr_t
482 pt_attr_leaf_x(const pt_attr_t * const pt_attr)
483 {
484 return pt_attr->ap_x;
485 }
486
487 #else /* (__ARM_VMSA__ > 7) */
488 static inline uint64_t
489 pt_attr_page_size(__unused const pt_attr_t * const pt_attr)
490 {
491 return PAGE_SIZE;
492 }
493
494 __unused static inline unsigned int
495 pt_attr_root_level(__unused const pt_attr_t * const pt_attr)
496 {
497 return PMAP_TT_L1_LEVEL;
498 }
499
500 __unused static inline unsigned int
501 pt_attr_sharedpage_level(__unused const pt_attr_t * const pt_attr)
502 {
503 return PMAP_TT_L1_LEVEL;
504 }
505
506 static inline unsigned int
507 pt_attr_twig_level(__unused const pt_attr_t * const pt_attr)
508 {
509 return PMAP_TT_L1_LEVEL;
510 }
511
512 static inline uint64_t
513 pt_attr_twig_size(__unused const pt_attr_t * const pt_attr)
514 {
515 return ARM_TT_TWIG_SIZE;
516 }
517
518 static inline uint64_t
519 pt_attr_twig_offmask(__unused const pt_attr_t * const pt_attr)
520 {
521 return ARM_TT_TWIG_OFFMASK;
522 }
523
524 static inline uint64_t
525 pt_attr_twig_shift(__unused const pt_attr_t * const pt_attr)
526 {
527 return ARM_TT_TWIG_SHIFT;
528 }
529
530 static __unused inline uint64_t
531 pt_attr_twig_index_mask(__unused const pt_attr_t * const pt_attr)
532 {
533 return ARM_TT_TWIG_INDEX_MASK;
534 }
535
536 __unused static inline uint64_t
537 pt_attr_leaf_size(__unused const pt_attr_t * const pt_attr)
538 {
539 return ARM_TT_LEAF_SIZE;
540 }
541
542 __unused static inline uint64_t
543 pt_attr_leaf_offmask(__unused const pt_attr_t * const pt_attr)
544 {
545 return ARM_TT_LEAF_OFFMASK;
546 }
547
548 static inline uint64_t
549 pt_attr_leaf_shift(__unused const pt_attr_t * const pt_attr)
550 {
551 return ARM_TT_LEAF_SHIFT;
552 }
553
554 static __unused inline uint64_t
555 pt_attr_leaf_index_mask(__unused const pt_attr_t * const pt_attr)
556 {
557 return ARM_TT_LEAF_INDEX_MASK;
558 }
559
560 static inline uint64_t
561 pt_attr_leaf_table_size(__unused const pt_attr_t * const pt_attr)
562 {
563 return ARM_TT_L1_PT_SIZE;
564 }
565
566 static inline uint64_t
567 pt_attr_leaf_table_offmask(__unused const pt_attr_t * const pt_attr)
568 {
569 return ARM_TT_L1_PT_OFFMASK;
570 }
571
572 static inline uintptr_t
573 pt_attr_leaf_rw(__unused const pt_attr_t * const pt_attr)
574 {
575 return ARM_PTE_AP(AP_RWRW);
576 }
577
578 static inline uintptr_t
579 pt_attr_leaf_ro(__unused const pt_attr_t * const pt_attr)
580 {
581 return ARM_PTE_AP(AP_RORO);
582 }
583
584 static inline uintptr_t
585 pt_attr_leaf_rona(__unused const pt_attr_t * const pt_attr)
586 {
587 return ARM_PTE_AP(AP_RONA);
588 }
589
590 static inline uintptr_t
591 pt_attr_leaf_rwna(__unused const pt_attr_t * const pt_attr)
592 {
593 return ARM_PTE_AP(AP_RWNA);
594 }
595
596 static inline uintptr_t
597 pt_attr_leaf_xn(__unused const pt_attr_t * const pt_attr)
598 {
599 return ARM_PTE_NX;
600 }
601
602 __unused static inline uintptr_t
603 pt_attr_ln_offmask(__unused const pt_attr_t * const pt_attr, unsigned int level)
604 {
605 if (level == PMAP_TT_L1_LEVEL) {
606 return ARM_TT_L1_OFFMASK;
607 } else if (level == PMAP_TT_L2_LEVEL) {
608 return ARM_TT_L2_OFFMASK;
609 }
610
611 return 0;
612 }
613
614 static inline uintptr_t
615 pt_attr_ln_pt_offmask(__unused const pt_attr_t * const pt_attr, unsigned int level)
616 {
617 if (level == PMAP_TT_L1_LEVEL) {
618 return ARM_TT_L1_PT_OFFMASK;
619 } else if (level == PMAP_TT_L2_LEVEL) {
620 return ARM_TT_L2_OFFMASK;
621 }
622
623 return 0;
624 }
625
626 #endif /* (__ARM_VMSA__ > 7) */
627
628 static inline unsigned int
629 pt_attr_leaf_level(const pt_attr_t * const pt_attr)
630 {
631 return pt_attr_twig_level(pt_attr) + 1;
632 }
633
634
635 static inline void
636 pmap_sync_tlb(bool strong __unused)
637 {
638 sync_tlb_flush();
639 }
640
641 #if MACH_ASSERT
642 int vm_footprint_suspend_allowed = 1;
643
644 extern int pmap_ledgers_panic;
645 extern int pmap_ledgers_panic_leeway;
646
647 int pmap_stats_assert = 1;
648 #define PMAP_STATS_ASSERTF(cond, pmap, fmt, ...) \
649 MACRO_BEGIN \
650 if (pmap_stats_assert && (pmap)->pmap_stats_assert) \
651 assertf(cond, fmt, ##__VA_ARGS__); \
652 MACRO_END
653 #else /* MACH_ASSERT */
654 #define PMAP_STATS_ASSERTF(cond, pmap, fmt, ...)
655 #endif /* MACH_ASSERT */
656
657 #if DEVELOPMENT || DEBUG
658 #define PMAP_FOOTPRINT_SUSPENDED(pmap) \
659 (current_thread()->pmap_footprint_suspended)
660 #else /* DEVELOPMENT || DEBUG */
661 #define PMAP_FOOTPRINT_SUSPENDED(pmap) (FALSE)
662 #endif /* DEVELOPMENT || DEBUG */
663
664
665 /*
666 * Represents a tlb range that will be flushed before exiting
667 * the ppl.
668 * Used by phys_attribute_clear_range to defer flushing pages in
669 * this range until the end of the operation.
670 */
671 typedef struct pmap_tlb_flush_range {
672 pmap_t ptfr_pmap;
673 vm_map_address_t ptfr_start;
674 vm_map_address_t ptfr_end;
675 bool ptfr_flush_needed;
676 } pmap_tlb_flush_range_t;
677
678 #if XNU_MONITOR
679 /*
680 * PPL External References.
681 */
682 extern vm_offset_t segPPLDATAB;
683 extern unsigned long segSizePPLDATA;
684 extern vm_offset_t segPPLTEXTB;
685 extern unsigned long segSizePPLTEXT;
686 #if __APRR_SUPPORTED__
687 extern vm_offset_t segPPLTRAMPB;
688 extern unsigned long segSizePPLTRAMP;
689 extern void ppl_trampoline_start;
690 extern void ppl_trampoline_end;
691 #endif
692 extern vm_offset_t segPPLDATACONSTB;
693 extern unsigned long segSizePPLDATACONST;
694
695
696 /*
697 * PPL Global Variables
698 */
699
700 #if (DEVELOPMENT || DEBUG) || CONFIG_CSR_FROM_DT
701 /* Indicates if the PPL will enforce mapping policies; set by -unsafe_kernel_text */
702 SECURITY_READ_ONLY_LATE(boolean_t) pmap_ppl_disable = FALSE;
703 #else
704 const boolean_t pmap_ppl_disable = FALSE;
705 #endif
706
707 /* Indicates if the PPL has started applying APRR. */
708 boolean_t pmap_ppl_locked_down MARK_AS_PMAP_DATA = FALSE;
709
710 /*
711 * The PPL cannot invoke the kernel in order to allocate memory, so we must
712 * maintain a list of free pages that the PPL owns. The kernel can give the PPL
713 * additional pages.
714 */
715 MARK_AS_PMAP_DATA SIMPLE_LOCK_DECLARE(pmap_ppl_free_page_lock, 0);
716 void ** pmap_ppl_free_page_list MARK_AS_PMAP_DATA = NULL;
717 uint64_t pmap_ppl_free_page_count MARK_AS_PMAP_DATA = 0;
718 uint64_t pmap_ppl_pages_returned_to_kernel_count_total = 0;
719
720 struct pmap_cpu_data_array_entry pmap_cpu_data_array[MAX_CPUS] MARK_AS_PMAP_DATA = {0};
721
722 extern void *pmap_stacks_start;
723 extern void *pmap_stacks_end;
724 SECURITY_READ_ONLY_LATE(pmap_paddr_t) pmap_stacks_start_pa = 0;
725 SECURITY_READ_ONLY_LATE(pmap_paddr_t) pmap_stacks_end_pa = 0;
726 SECURITY_READ_ONLY_LATE(pmap_paddr_t) ppl_cpu_save_area_start = 0;
727 SECURITY_READ_ONLY_LATE(pmap_paddr_t) ppl_cpu_save_area_end = 0;
728
729 /* Allocation data/locks for pmap structures. */
730 #if XNU_MONITOR
731 MARK_AS_PMAP_DATA SIMPLE_LOCK_DECLARE(pmap_free_list_lock, 0);
732 #endif
733 SECURITY_READ_ONLY_LATE(unsigned long) pmap_array_count = 0;
734 SECURITY_READ_ONLY_LATE(void *) pmap_array_begin = NULL;
735 SECURITY_READ_ONLY_LATE(void *) pmap_array_end = NULL;
736 SECURITY_READ_ONLY_LATE(pmap_t) pmap_array = NULL;
737 pmap_t pmap_free_list MARK_AS_PMAP_DATA = NULL;
738
739 /* Allocation data/locks/structs for task ledger structures. */
740 #define PMAP_LEDGER_DATA_BYTES \
741 (((sizeof(task_ledgers) / sizeof(int)) * sizeof(struct ledger_entry)) + sizeof(struct ledger))
742
743 /*
744 * Maximum number of ledgers allowed are maximum number of tasks
745 * allowed on system plus some more i.e. ~10% of total tasks = 200.
746 */
747 #define MAX_PMAP_LEDGERS (pmap_max_asids + 200)
748 #define PMAP_ARRAY_SIZE (pmap_max_asids)
749
750 typedef struct pmap_ledger_data {
751 char pld_data[PMAP_LEDGER_DATA_BYTES];
752 } pmap_ledger_data_t;
753
754 typedef struct pmap_ledger {
755 union {
756 struct pmap_ledger_data ple_data;
757 struct pmap_ledger * next;
758 };
759
760 struct pmap_ledger ** back_ptr;
761 } pmap_ledger_t;
762
763 SECURITY_READ_ONLY_LATE(bool) pmap_ledger_alloc_initialized = false;
764 MARK_AS_PMAP_DATA SIMPLE_LOCK_DECLARE(pmap_ledger_lock, 0);
765 SECURITY_READ_ONLY_LATE(void *) pmap_ledger_refcnt_begin = NULL;
766 SECURITY_READ_ONLY_LATE(void *) pmap_ledger_refcnt_end = NULL;
767 SECURITY_READ_ONLY_LATE(os_refcnt_t *) pmap_ledger_refcnt = NULL;
768 SECURITY_READ_ONLY_LATE(void *) pmap_ledger_ptr_array_begin = NULL;
769 SECURITY_READ_ONLY_LATE(void *) pmap_ledger_ptr_array_end = NULL;
770 SECURITY_READ_ONLY_LATE(pmap_ledger_t * *) pmap_ledger_ptr_array = NULL;
771 uint64_t pmap_ledger_ptr_array_free_index MARK_AS_PMAP_DATA = 0;
772 pmap_ledger_t * pmap_ledger_free_list MARK_AS_PMAP_DATA = NULL;
773
774 #define pmap_ledger_debit(p, e, a) ledger_debit_nocheck((p)->ledger, e, a)
775 #define pmap_ledger_credit(p, e, a) ledger_credit_nocheck((p)->ledger, e, a)
776
777 static inline void
778 pmap_check_ledger_fields(ledger_t ledger)
779 {
780 if (ledger == NULL) {
781 return;
782 }
783
784 thread_t cur_thread = current_thread();
785 ledger_check_new_balance(cur_thread, ledger, task_ledgers.alternate_accounting);
786 ledger_check_new_balance(cur_thread, ledger, task_ledgers.alternate_accounting_compressed);
787 ledger_check_new_balance(cur_thread, ledger, task_ledgers.internal);
788 ledger_check_new_balance(cur_thread, ledger, task_ledgers.internal_compressed);
789 ledger_check_new_balance(cur_thread, ledger, task_ledgers.page_table);
790 ledger_check_new_balance(cur_thread, ledger, task_ledgers.phys_footprint);
791 ledger_check_new_balance(cur_thread, ledger, task_ledgers.phys_mem);
792 ledger_check_new_balance(cur_thread, ledger, task_ledgers.tkm_private);
793 ledger_check_new_balance(cur_thread, ledger, task_ledgers.wired_mem);
794 }
795
796 #define pmap_ledger_check_balance(p) pmap_check_ledger_fields((p)->ledger)
797
798 #else /* XNU_MONITOR */
799
800 #define pmap_ledger_debit(p, e, a) ledger_debit((p)->ledger, e, a)
801 #define pmap_ledger_credit(p, e, a) ledger_credit((p)->ledger, e, a)
802
803 #endif /* !XNU_MONITOR */
804
805
806 /* Virtual memory region for early allocation */
807 #if (__ARM_VMSA__ == 7)
808 #define VREGION1_HIGH_WINDOW (0)
809 #else
810 #define VREGION1_HIGH_WINDOW (PE_EARLY_BOOT_VA)
811 #endif
812 #define VREGION1_START ((VM_MAX_KERNEL_ADDRESS & CPUWINDOWS_BASE_MASK) - VREGION1_HIGH_WINDOW)
813 #define VREGION1_SIZE (trunc_page(VM_MAX_KERNEL_ADDRESS - (VREGION1_START)))
814
815 extern uint8_t bootstrap_pagetables[];
816
817 extern unsigned int not_in_kdp;
818
819 extern vm_offset_t first_avail;
820
821 extern pmap_paddr_t avail_start;
822 extern pmap_paddr_t avail_end;
823
824 extern vm_offset_t virtual_space_start; /* Next available kernel VA */
825 extern vm_offset_t virtual_space_end; /* End of kernel address space */
826 extern vm_offset_t static_memory_end;
827
828 extern const vm_map_address_t physmap_base;
829 extern const vm_map_address_t physmap_end;
830
831 extern int maxproc, hard_maxproc;
832
833 vm_address_t MARK_AS_PMAP_DATA image4_slab = 0;
834
835 #if (__ARM_VMSA__ > 7)
836 /* The number of address bits one TTBR can cover. */
837 #define PGTABLE_ADDR_BITS (64ULL - T0SZ_BOOT)
838
839 /*
840 * The bounds on our TTBRs. These are for sanity checking that
841 * an address is accessible by a TTBR before we attempt to map it.
842 */
843 #define ARM64_TTBR0_MIN_ADDR (0ULL)
844 #define ARM64_TTBR0_MAX_ADDR (0ULL + (1ULL << PGTABLE_ADDR_BITS) - 1)
845 #define ARM64_TTBR1_MIN_ADDR (0ULL - (1ULL << PGTABLE_ADDR_BITS))
846 #define ARM64_TTBR1_MAX_ADDR (~0ULL)
847
848 /* The level of the root of a page table. */
849 const uint64_t arm64_root_pgtable_level = (3 - ((PGTABLE_ADDR_BITS - 1 - ARM_PGSHIFT) / (ARM_PGSHIFT - TTE_SHIFT)));
850
851 /* The number of entries in the root TT of a page table. */
852 const uint64_t arm64_root_pgtable_num_ttes = (2 << ((PGTABLE_ADDR_BITS - 1 - ARM_PGSHIFT) % (ARM_PGSHIFT - TTE_SHIFT)));
853 #else
854 const uint64_t arm64_root_pgtable_level = 0;
855 const uint64_t arm64_root_pgtable_num_ttes = 0;
856 #endif
857
858 struct pmap kernel_pmap_store MARK_AS_PMAP_DATA;
859 SECURITY_READ_ONLY_LATE(pmap_t) kernel_pmap = &kernel_pmap_store;
860
861 struct vm_object pmap_object_store VM_PAGE_PACKED_ALIGNED; /* store pt pages */
862 vm_object_t pmap_object = &pmap_object_store;
863
864 static SECURITY_READ_ONLY_LATE(zone_t) pmap_zone; /* zone of pmap structures */
865
866 MARK_AS_PMAP_DATA SIMPLE_LOCK_DECLARE(pmaps_lock, 0);
867 MARK_AS_PMAP_DATA SIMPLE_LOCK_DECLARE(tt1_lock, 0);
868 unsigned int pmap_stamp MARK_AS_PMAP_DATA;
869 queue_head_t map_pmap_list MARK_AS_PMAP_DATA;
870
871 MARK_AS_PMAP_DATA SIMPLE_LOCK_DECLARE(pt_pages_lock, 0);
872 queue_head_t pt_page_list MARK_AS_PMAP_DATA; /* pt page ptd entries list */
873
874 MARK_AS_PMAP_DATA SIMPLE_LOCK_DECLARE(pmap_pages_lock, 0);
875
876 typedef struct page_free_entry {
877 struct page_free_entry *next;
878 } page_free_entry_t;
879
880 #define PAGE_FREE_ENTRY_NULL ((page_free_entry_t *) 0)
881
882 page_free_entry_t *pmap_pages_reclaim_list MARK_AS_PMAP_DATA; /* Reclaimed pt page list */
883 unsigned int pmap_pages_request_count MARK_AS_PMAP_DATA; /* Pending requests to reclaim pt page */
884 unsigned long long pmap_pages_request_acum MARK_AS_PMAP_DATA;
885
886
887 typedef struct tt_free_entry {
888 struct tt_free_entry *next;
889 } tt_free_entry_t;
890
891 #define TT_FREE_ENTRY_NULL ((tt_free_entry_t *) 0)
892
893 tt_free_entry_t *free_page_size_tt_list MARK_AS_PMAP_DATA;
894 unsigned int free_page_size_tt_count MARK_AS_PMAP_DATA;
895 unsigned int free_page_size_tt_max MARK_AS_PMAP_DATA;
896 #define FREE_PAGE_SIZE_TT_MAX 4
897 tt_free_entry_t *free_two_page_size_tt_list MARK_AS_PMAP_DATA;
898 unsigned int free_two_page_size_tt_count MARK_AS_PMAP_DATA;
899 unsigned int free_two_page_size_tt_max MARK_AS_PMAP_DATA;
900 #define FREE_TWO_PAGE_SIZE_TT_MAX 4
901 tt_free_entry_t *free_tt_list MARK_AS_PMAP_DATA;
902 unsigned int free_tt_count MARK_AS_PMAP_DATA;
903 unsigned int free_tt_max MARK_AS_PMAP_DATA;
904
905 #define TT_FREE_ENTRY_NULL ((tt_free_entry_t *) 0)
906
907 boolean_t pmap_gc_allowed MARK_AS_PMAP_DATA = TRUE;
908 boolean_t pmap_gc_forced MARK_AS_PMAP_DATA = FALSE;
909 boolean_t pmap_gc_allowed_by_time_throttle = TRUE;
910
911 unsigned int inuse_user_ttepages_count MARK_AS_PMAP_DATA = 0; /* non-root, non-leaf user pagetable pages, in units of PAGE_SIZE */
912 unsigned int inuse_user_ptepages_count MARK_AS_PMAP_DATA = 0; /* leaf user pagetable pages, in units of PAGE_SIZE */
913 unsigned int inuse_user_tteroot_count MARK_AS_PMAP_DATA = 0; /* root user pagetables, in units of PMAP_ROOT_ALLOC_SIZE */
914 unsigned int inuse_kernel_ttepages_count MARK_AS_PMAP_DATA = 0; /* non-root, non-leaf kernel pagetable pages, in units of PAGE_SIZE */
915 unsigned int inuse_kernel_ptepages_count MARK_AS_PMAP_DATA = 0; /* leaf kernel pagetable pages, in units of PAGE_SIZE */
916 unsigned int inuse_kernel_tteroot_count MARK_AS_PMAP_DATA = 0; /* root kernel pagetables, in units of PMAP_ROOT_ALLOC_SIZE */
917 unsigned int inuse_pmap_pages_count = 0; /* debugging */
918
919 SECURITY_READ_ONLY_LATE(tt_entry_t *) invalid_tte = 0;
920 SECURITY_READ_ONLY_LATE(pmap_paddr_t) invalid_ttep = 0;
921
922 SECURITY_READ_ONLY_LATE(tt_entry_t *) cpu_tte = 0; /* set by arm_vm_init() - keep out of bss */
923 SECURITY_READ_ONLY_LATE(pmap_paddr_t) cpu_ttep = 0; /* set by arm_vm_init() - phys tte addr */
924
925 #if DEVELOPMENT || DEBUG
926 int nx_enabled = 1; /* enable no-execute protection */
927 int allow_data_exec = 0; /* No apps may execute data */
928 int allow_stack_exec = 0; /* No apps may execute from the stack */
929 unsigned long pmap_asid_flushes MARK_AS_PMAP_DATA = 0;
930 unsigned long pmap_asid_hits MARK_AS_PMAP_DATA = 0;
931 unsigned long pmap_asid_misses MARK_AS_PMAP_DATA = 0;
932 #else /* DEVELOPMENT || DEBUG */
933 const int nx_enabled = 1; /* enable no-execute protection */
934 const int allow_data_exec = 0; /* No apps may execute data */
935 const int allow_stack_exec = 0; /* No apps may execute from the stack */
936 #endif /* DEVELOPMENT || DEBUG */
937
938 /**
939 * This variable is set true during hibernation entry to protect pmap data structures
940 * during image copying, and reset false on hibernation exit.
941 */
942 bool hib_entry_pmap_lockdown MARK_AS_PMAP_DATA = false;
943
944 /* Macro used to ensure that pmap data structures aren't modified during hibernation image copying. */
945 #if HIBERNATION
946 #define ASSERT_NOT_HIBERNATING() (assertf(!hib_entry_pmap_lockdown, \
947 "Attempted to modify PMAP data structures after hibernation image copying has begun."))
948 #else
949 #define ASSERT_NOT_HIBERNATING()
950 #endif /* HIBERNATION */
951
952 #define PV_ENTRY_NULL ((pv_entry_t *) 0)
953
954 /*
955 * PMAP LEDGERS:
956 * We use the least significant bit of the "pve_next" pointer in a "pv_entry"
957 * as a marker for pages mapped through an "alternate accounting" mapping.
958 * These macros set, clear and test for this marker and extract the actual
959 * value of the "pve_next" pointer.
960 */
961 #define PVE_NEXT_ALTACCT ((uintptr_t) 0x1)
962 #define PVE_NEXT_SET_ALTACCT(pve_next_p) \
963 *(pve_next_p) = (struct pv_entry *) (((uintptr_t) *(pve_next_p)) | \
964 PVE_NEXT_ALTACCT)
965 #define PVE_NEXT_CLR_ALTACCT(pve_next_p) \
966 *(pve_next_p) = (struct pv_entry *) (((uintptr_t) *(pve_next_p)) & \
967 ~PVE_NEXT_ALTACCT)
968 #define PVE_NEXT_IS_ALTACCT(pve_next) \
969 ((((uintptr_t) (pve_next)) & PVE_NEXT_ALTACCT) ? TRUE : FALSE)
970 #define PVE_NEXT_PTR(pve_next) \
971 ((struct pv_entry *)(((uintptr_t) (pve_next)) & \
972 ~PVE_NEXT_ALTACCT))
973 #if MACH_ASSERT
974 static void pmap_check_ledgers(pmap_t pmap);
975 #else
976 static inline void
977 pmap_check_ledgers(__unused pmap_t pmap)
978 {
979 }
980 #endif /* MACH_ASSERT */
981
982 SECURITY_READ_ONLY_LATE(pv_entry_t * *) pv_head_table; /* array of pv entry pointers */
983
984 pv_free_list_t pv_free MARK_AS_PMAP_DATA = {0};
985 pv_free_list_t pv_kern_free MARK_AS_PMAP_DATA = {0};
986 MARK_AS_PMAP_DATA SIMPLE_LOCK_DECLARE(pv_free_list_lock, 0);
987 MARK_AS_PMAP_DATA SIMPLE_LOCK_DECLARE(pv_kern_free_list_lock, 0);
988
989 SIMPLE_LOCK_DECLARE(phys_backup_lock, 0);
990
991 /*
992 * pt_desc - structure to keep info on page assigned to page tables
993 */
994 #if (__ARM_VMSA__ == 7)
995 #define PT_INDEX_MAX 1
996 #else /* (__ARM_VMSA__ != 7) */
997
998 #if __ARM_MIXED_PAGE_SIZE__
999 #define PT_INDEX_MAX (ARM_PGBYTES / 4096)
1000 #elif (ARM_PGSHIFT == 14)
1001 #define PT_INDEX_MAX 1
1002 #elif (ARM_PGSHIFT == 12)
1003 #define PT_INDEX_MAX 4
1004 #else
1005 #error Unsupported ARM_PGSHIFT
1006 #endif /* (ARM_PGSHIFT != 14) */
1007
1008 #endif /* (__ARM_VMSA__ != 7) */
1009
1010 #define PT_DESC_REFCOUNT 0x4000U
1011 #define PT_DESC_IOMMU_REFCOUNT 0x8000U
1012
1013 typedef struct {
1014 /*
1015 * For non-leaf pagetables, should always be PT_DESC_REFCOUNT
1016 * For leaf pagetables, should reflect the number of non-empty PTEs
1017 * For IOMMU pages, should always be PT_DESC_IOMMU_REFCOUNT
1018 */
1019 unsigned short refcnt;
1020 /*
1021 * For non-leaf pagetables, should be 0
1022 * For leaf pagetables, should reflect the number of wired entries
1023 * For IOMMU pages, may optionally reflect a driver-defined refcount (IOMMU operations are implicitly wired)
1024 */
1025 unsigned short wiredcnt;
1026 vm_offset_t va;
1027 } ptd_info_t;
1028
1029 typedef struct pt_desc {
1030 queue_chain_t pt_page;
1031 union {
1032 struct pmap *pmap;
1033 };
1034 ptd_info_t ptd_info[PT_INDEX_MAX];
1035 } pt_desc_t;
1036
1037
1038 #define PTD_ENTRY_NULL ((pt_desc_t *) 0)
1039
1040 SECURITY_READ_ONLY_LATE(pt_desc_t *) ptd_root_table;
1041
1042 pt_desc_t *ptd_free_list MARK_AS_PMAP_DATA = PTD_ENTRY_NULL;
1043 SECURITY_READ_ONLY_LATE(boolean_t) ptd_preboot = TRUE;
1044 unsigned int ptd_free_count MARK_AS_PMAP_DATA = 0;
1045 decl_simple_lock_data(, ptd_free_list_lock MARK_AS_PMAP_DATA);
1046
1047 /*
1048 * physical page attribute
1049 */
1050 typedef u_int16_t pp_attr_t;
1051
1052 #define PP_ATTR_WIMG_MASK 0x003F
1053 #define PP_ATTR_WIMG(x) ((x) & PP_ATTR_WIMG_MASK)
1054
1055 #define PP_ATTR_REFERENCED 0x0040
1056 #define PP_ATTR_MODIFIED 0x0080
1057
1058 #define PP_ATTR_INTERNAL 0x0100
1059 #define PP_ATTR_REUSABLE 0x0200
1060 #define PP_ATTR_ALTACCT 0x0400
1061 #define PP_ATTR_NOENCRYPT 0x0800
1062
1063 #define PP_ATTR_REFFAULT 0x1000
1064 #define PP_ATTR_MODFAULT 0x2000
1065
1066 #if XNU_MONITOR
1067 /*
1068 * Denotes that a page is owned by the PPL. This is modified/checked with the
1069 * PVH lock held, to avoid ownership related races. This does not need to be a
1070 * PP_ATTR bit (as we have the lock), but for now this is a convenient place to
1071 * put the bit.
1072 */
1073 #define PP_ATTR_MONITOR 0x4000
1074
1075 /*
1076 * Denotes that a page *cannot* be owned by the PPL. This is required in order
1077 * to temporarily 'pin' kernel pages that are used to store PPL output parameters.
1078 * Otherwise a malicious or buggy caller could pass PPL-owned memory for these
1079 * parameters and in so doing stage a write gadget against the PPL.
1080 */
1081 #define PP_ATTR_NO_MONITOR 0x8000
1082
1083 /*
1084 * All of the bits owned by the PPL; kernel requests to set or clear these bits
1085 * are illegal.
1086 */
1087 #define PP_ATTR_PPL_OWNED_BITS (PP_ATTR_MONITOR | PP_ATTR_NO_MONITOR)
1088 #endif
1089
1090 SECURITY_READ_ONLY_LATE(volatile pp_attr_t*) pp_attr_table;
1091
1092 /**
1093 * The layout of this structure needs to map 1-to-1 with the pmap-io-range device
1094 * tree nodes. Astris (through the LowGlobals) also depends on the consistency
1095 * of this structure.
1096 */
1097 typedef struct pmap_io_range {
1098 uint64_t addr;
1099 uint64_t len;
1100 #define PMAP_IO_RANGE_STRONG_SYNC (1UL << 31) // Strong DSB required for pages in this range
1101 #define PMAP_IO_RANGE_CARVEOUT (1UL << 30) // Corresponds to memory carved out by bootloader
1102 #define PMAP_IO_RANGE_NEEDS_HIBERNATING (1UL << 29) // Pages in this range need to be included in the hibernation image
1103 uint32_t wimg; // lower 16 bits treated as pp_attr_t, upper 16 bits contain additional mapping flags
1104 uint32_t signature; // 4CC
1105 } __attribute__((packed)) pmap_io_range_t;
1106
1107 SECURITY_READ_ONLY_LATE(pmap_io_range_t*) io_attr_table = (pmap_io_range_t*)0;
1108
1109 SECURITY_READ_ONLY_LATE(pmap_paddr_t) vm_first_phys = (pmap_paddr_t) 0;
1110 SECURITY_READ_ONLY_LATE(pmap_paddr_t) vm_last_phys = (pmap_paddr_t) 0;
1111
1112 SECURITY_READ_ONLY_LATE(unsigned int) num_io_rgns = 0;
1113
1114 SECURITY_READ_ONLY_LATE(boolean_t) pmap_initialized = FALSE; /* Has pmap_init completed? */
1115
1116 SECURITY_READ_ONLY_LATE(vm_map_offset_t) arm_pmap_max_offset_default = 0x0;
1117 #if defined(__arm64__)
1118 SECURITY_READ_ONLY_LATE(vm_map_offset_t) arm64_pmap_max_offset_default = 0x0;
1119 #endif
1120
1121 #if PMAP_PANIC_DEV_WIMG_ON_MANAGED && (DEVELOPMENT || DEBUG)
1122 SECURITY_READ_ONLY_LATE(boolean_t) pmap_panic_dev_wimg_on_managed = TRUE;
1123 #else
1124 SECURITY_READ_ONLY_LATE(boolean_t) pmap_panic_dev_wimg_on_managed = FALSE;
1125 #endif
1126
1127 MARK_AS_PMAP_DATA SIMPLE_LOCK_DECLARE(asid_lock, 0);
1128 SECURITY_READ_ONLY_LATE(static uint32_t) pmap_max_asids = 0;
1129 SECURITY_READ_ONLY_LATE(int) pmap_asid_plru = 1;
1130 SECURITY_READ_ONLY_LATE(uint16_t) asid_chunk_size = 0;
1131 SECURITY_READ_ONLY_LATE(static bitmap_t*) asid_bitmap;
1132 static bitmap_t asid_plru_bitmap[BITMAP_LEN(MAX_HW_ASIDS)] MARK_AS_PMAP_DATA;
1133 static uint64_t asid_plru_generation[BITMAP_LEN(MAX_HW_ASIDS)] MARK_AS_PMAP_DATA = {0};
1134 static uint64_t asid_plru_gencount MARK_AS_PMAP_DATA = 0;
1135
1136
1137 #if (__ARM_VMSA__ > 7)
1138 #if __ARM_MIXED_PAGE_SIZE__
1139 SECURITY_READ_ONLY_LATE(pmap_t) sharedpage_pmap_4k;
1140 #endif
1141 SECURITY_READ_ONLY_LATE(pmap_t) sharedpage_pmap_default;
1142 #endif
1143
1144 #if XNU_MONITOR
1145 /*
1146 * We define our target as 8 pages; enough for 2 page table pages, a PTD page,
1147 * and a PV page; in essence, twice as many pages as may be necessary to satisfy
1148 * a single pmap_enter request.
1149 */
1150 #define PMAP_MIN_FREE_PPL_PAGES 8
1151 #endif
1152
1153 #define pa_index(pa) \
1154 (atop((pa) - vm_first_phys))
1155
1156 #define pai_to_pvh(pai) \
1157 (&pv_head_table[pai])
1158
1159 #define pa_valid(x) \
1160 ((x) >= vm_first_phys && (x) < vm_last_phys)
1161
1162 /* PTE Define Macros */
1163
1164 #define pte_is_wired(pte) \
1165 (((pte) & ARM_PTE_WIRED_MASK) == ARM_PTE_WIRED)
1166
1167 #define pte_was_writeable(pte) \
1168 (((pte) & ARM_PTE_WRITEABLE) == ARM_PTE_WRITEABLE)
1169
1170 #define pte_set_was_writeable(pte, was_writeable) \
1171 do { \
1172 if ((was_writeable)) { \
1173 (pte) |= ARM_PTE_WRITEABLE; \
1174 } else { \
1175 (pte) &= ~ARM_PTE_WRITEABLE; \
1176 } \
1177 } while(0)
1178
1179 /* PVE Define Macros */
1180
1181 #define pve_next(pve) \
1182 ((pve)->pve_next)
1183
1184 #define pve_link_field(pve) \
1185 (&pve_next(pve))
1186
1187 #define pve_link(pp, e) \
1188 ((pve_next(e) = pve_next(pp)), (pve_next(pp) = (e)))
1189
1190 #define pve_unlink(pp, e) \
1191 (pve_next(pp) = pve_next(e))
1192
1193 /* bits held in the ptep pointer field */
1194
1195 #define pve_get_ptep(pve) \
1196 ((pve)->pve_ptep)
1197
1198 #define pve_set_ptep(pve, ptep_new) \
1199 do { \
1200 (pve)->pve_ptep = (ptep_new); \
1201 } while (0)
1202
1203 /* PTEP Define Macros */
1204
1205 /* mask for page descriptor index */
1206 #define ARM_TT_PT_INDEX_MASK ARM_PGMASK
1207
1208 #if (__ARM_VMSA__ == 7)
1209
1210 /*
1211 * Shift value used for reconstructing the virtual address for a PTE.
1212 */
1213 #define ARM_TT_PT_ADDR_SHIFT (10U)
1214
1215 #define ptep_get_pmap(ptep) \
1216 ((((pt_desc_t *) (pvh_list(pai_to_pvh(pa_index(ml_static_vtop((((vm_offset_t)(ptep) & ~ARM_PGMASK))))))))->pmap))
1217
1218 #else
1219
1220 #if (ARM_PGSHIFT == 12)
1221 /*
1222 * Shift value used for reconstructing the virtual address for a PTE.
1223 */
1224 #define ARM_TT_PT_ADDR_SHIFT (9ULL)
1225 #else
1226
1227 /*
1228 * Shift value used for reconstructing the virtual address for a PTE.
1229 */
1230 #define ARM_TT_PT_ADDR_SHIFT (11ULL)
1231 #endif
1232
1233 #define ptep_get_pmap(ptep) \
1234 ((((pt_desc_t *) (pvh_list(pai_to_pvh(pa_index(ml_static_vtop((((vm_offset_t)(ptep) & ~ARM_PGMASK))))))))->pmap))
1235
1236 #endif
1237
1238 #define ptep_get_ptd(ptep) \
1239 ((struct pt_desc *)(pvh_list(pai_to_pvh(pa_index(ml_static_vtop((vm_offset_t)(ptep)))))))
1240
1241
1242 /* PVH Define Macros */
1243
1244 /* pvhead type */
1245 #define PVH_TYPE_NULL 0x0UL
1246 #define PVH_TYPE_PVEP 0x1UL
1247 #define PVH_TYPE_PTEP 0x2UL
1248 #define PVH_TYPE_PTDP 0x3UL
1249
1250 #define PVH_TYPE_MASK (0x3UL)
1251
1252 #ifdef __arm64__
1253
1254 /* All flags listed below are stored in the PV head pointer unless otherwise noted */
1255 #define PVH_FLAG_IOMMU 0x4UL /* Stored in each PTE, or in PV head for single-PTE PV heads */
1256 #define PVH_FLAG_IOMMU_TABLE (1ULL << 63) /* Stored in each PTE, or in PV head for single-PTE PV heads */
1257 #define PVH_FLAG_CPU (1ULL << 62)
1258 #define PVH_LOCK_BIT 61
1259 #define PVH_FLAG_LOCK (1ULL << PVH_LOCK_BIT)
1260 #define PVH_FLAG_EXEC (1ULL << 60)
1261 #define PVH_FLAG_LOCKDOWN (1ULL << 59)
1262 #define PVH_FLAG_HASHED (1ULL << 58) /* Used to mark that a page has been hashed into the hibernation image. */
1263 #define PVH_HIGH_FLAGS (PVH_FLAG_CPU | PVH_FLAG_LOCK | PVH_FLAG_EXEC | PVH_FLAG_LOCKDOWN | PVH_FLAG_HASHED)
1264
1265 #else /* !__arm64__ */
1266
1267 #define PVH_LOCK_BIT 31
1268 #define PVH_FLAG_LOCK (1UL << PVH_LOCK_BIT)
1269 #define PVH_HIGH_FLAGS PVH_FLAG_LOCK
1270
1271 #endif
1272
1273 #define PVH_LIST_MASK (~PVH_TYPE_MASK)
1274
1275 #define pvh_test_type(h, b) \
1276 ((*(vm_offset_t *)(h) & (PVH_TYPE_MASK)) == (b))
1277
1278 #define pvh_ptep(h) \
1279 ((pt_entry_t *)((*(vm_offset_t *)(h) & PVH_LIST_MASK) | PVH_HIGH_FLAGS))
1280
1281 #define pvh_list(h) \
1282 ((pv_entry_t *)((*(vm_offset_t *)(h) & PVH_LIST_MASK) | PVH_HIGH_FLAGS))
1283
1284 #define pvh_get_flags(h) \
1285 (*(vm_offset_t *)(h) & PVH_HIGH_FLAGS)
1286
1287 #define pvh_set_flags(h, f) \
1288 do { \
1289 os_atomic_store((vm_offset_t *)(h), (*(vm_offset_t *)(h) & ~PVH_HIGH_FLAGS) | (f), \
1290 relaxed); \
1291 } while (0)
1292
1293 #define pvh_update_head(h, e, t) \
1294 do { \
1295 assert(*(vm_offset_t *)(h) & PVH_FLAG_LOCK); \
1296 os_atomic_store((vm_offset_t *)(h), (vm_offset_t)(e) | (t) | PVH_FLAG_LOCK, \
1297 relaxed); \
1298 } while (0)
1299
1300 #define pvh_update_head_unlocked(h, e, t) \
1301 do { \
1302 assert(!(*(vm_offset_t *)(h) & PVH_FLAG_LOCK)); \
1303 *(vm_offset_t *)(h) = ((vm_offset_t)(e) | (t)) & ~PVH_FLAG_LOCK; \
1304 } while (0)
1305
1306 #define pvh_add(h, e) \
1307 do { \
1308 assert(!pvh_test_type((h), PVH_TYPE_PTEP)); \
1309 pve_next(e) = pvh_list(h); \
1310 pvh_update_head((h), (e), PVH_TYPE_PVEP); \
1311 } while (0)
1312
1313 #define pvh_remove(h, p, e) \
1314 do { \
1315 assert(!PVE_NEXT_IS_ALTACCT(pve_next((e)))); \
1316 if ((p) == (h)) { \
1317 if (PVE_NEXT_PTR(pve_next((e))) == PV_ENTRY_NULL) { \
1318 pvh_update_head((h), PV_ENTRY_NULL, PVH_TYPE_NULL); \
1319 } else { \
1320 pvh_update_head((h), PVE_NEXT_PTR(pve_next((e))), PVH_TYPE_PVEP); \
1321 } \
1322 } else { \
1323 /* \
1324 * PMAP LEDGERS: \
1325 * preserve the "alternate accounting" bit \
1326 * when updating "p" (the previous entry's \
1327 * "pve_next"). \
1328 */ \
1329 boolean_t __is_altacct; \
1330 __is_altacct = PVE_NEXT_IS_ALTACCT(*(p)); \
1331 *(p) = PVE_NEXT_PTR(pve_next((e))); \
1332 if (__is_altacct) { \
1333 PVE_NEXT_SET_ALTACCT((p)); \
1334 } else { \
1335 PVE_NEXT_CLR_ALTACCT((p)); \
1336 } \
1337 } \
1338 } while (0)
1339
1340
1341 /* PPATTR Define Macros */
1342
1343 #define ppattr_set_bits(h, b) os_atomic_or((h), (pp_attr_t)(b), acq_rel)
1344 #define ppattr_clear_bits(h, b) os_atomic_andnot((h), (pp_attr_t)(b), acq_rel)
1345
1346 #define ppattr_test_bits(h, b) \
1347 ((*(h) & (pp_attr_t)(b)) == (pp_attr_t)(b))
1348
1349 #define pa_set_bits(x, b) \
1350 do { \
1351 if (pa_valid(x)) \
1352 ppattr_set_bits(&pp_attr_table[pa_index(x)], \
1353 (b)); \
1354 } while (0)
1355
1356 #define pa_test_bits(x, b) \
1357 (pa_valid(x) ? ppattr_test_bits(&pp_attr_table[pa_index(x)],\
1358 (b)) : FALSE)
1359
1360 #define pa_clear_bits(x, b) \
1361 do { \
1362 if (pa_valid(x)) \
1363 ppattr_clear_bits(&pp_attr_table[pa_index(x)], \
1364 (b)); \
1365 } while (0)
1366
1367 #define pa_set_modify(x) \
1368 pa_set_bits(x, PP_ATTR_MODIFIED)
1369
1370 #define pa_clear_modify(x) \
1371 pa_clear_bits(x, PP_ATTR_MODIFIED)
1372
1373 #define pa_set_reference(x) \
1374 pa_set_bits(x, PP_ATTR_REFERENCED)
1375
1376 #define pa_clear_reference(x) \
1377 pa_clear_bits(x, PP_ATTR_REFERENCED)
1378
1379 #if XNU_MONITOR
1380 #define pa_set_monitor(x) \
1381 pa_set_bits((x), PP_ATTR_MONITOR)
1382
1383 #define pa_clear_monitor(x) \
1384 pa_clear_bits((x), PP_ATTR_MONITOR)
1385
1386 #define pa_test_monitor(x) \
1387 pa_test_bits((x), PP_ATTR_MONITOR)
1388
1389 #define pa_set_no_monitor(x) \
1390 pa_set_bits((x), PP_ATTR_NO_MONITOR)
1391
1392 #define pa_clear_no_monitor(x) \
1393 pa_clear_bits((x), PP_ATTR_NO_MONITOR)
1394
1395 #define pa_test_no_monitor(x) \
1396 pa_test_bits((x), PP_ATTR_NO_MONITOR)
1397 #endif
1398
1399 #define IS_INTERNAL_PAGE(pai) \
1400 ppattr_test_bits(&pp_attr_table[pai], PP_ATTR_INTERNAL)
1401 #define SET_INTERNAL_PAGE(pai) \
1402 ppattr_set_bits(&pp_attr_table[pai], PP_ATTR_INTERNAL)
1403 #define CLR_INTERNAL_PAGE(pai) \
1404 ppattr_clear_bits(&pp_attr_table[pai], PP_ATTR_INTERNAL)
1405
1406 #define IS_REUSABLE_PAGE(pai) \
1407 ppattr_test_bits(&pp_attr_table[pai], PP_ATTR_REUSABLE)
1408 #define SET_REUSABLE_PAGE(pai) \
1409 ppattr_set_bits(&pp_attr_table[pai], PP_ATTR_REUSABLE)
1410 #define CLR_REUSABLE_PAGE(pai) \
1411 ppattr_clear_bits(&pp_attr_table[pai], PP_ATTR_REUSABLE)
1412
1413 #define IS_ALTACCT_PAGE(pai, pve_p) \
1414 (((pve_p) == NULL) \
1415 ? ppattr_test_bits(&pp_attr_table[pai], PP_ATTR_ALTACCT) \
1416 : PVE_NEXT_IS_ALTACCT(pve_next((pve_p))))
1417 #define SET_ALTACCT_PAGE(pai, pve_p) \
1418 if ((pve_p) == NULL) { \
1419 ppattr_set_bits(&pp_attr_table[pai], PP_ATTR_ALTACCT); \
1420 } else { \
1421 PVE_NEXT_SET_ALTACCT(&pve_next((pve_p))); \
1422 }
1423 #define CLR_ALTACCT_PAGE(pai, pve_p) \
1424 if ((pve_p) == NULL) { \
1425 ppattr_clear_bits(&pp_attr_table[pai], PP_ATTR_ALTACCT); \
1426 } else { \
1427 PVE_NEXT_CLR_ALTACCT(&pve_next((pve_p))); \
1428 }
1429
1430 #define IS_REFFAULT_PAGE(pai) \
1431 ppattr_test_bits(&pp_attr_table[pai], PP_ATTR_REFFAULT)
1432 #define SET_REFFAULT_PAGE(pai) \
1433 ppattr_set_bits(&pp_attr_table[pai], PP_ATTR_REFFAULT)
1434 #define CLR_REFFAULT_PAGE(pai) \
1435 ppattr_clear_bits(&pp_attr_table[pai], PP_ATTR_REFFAULT)
1436
1437 #define IS_MODFAULT_PAGE(pai) \
1438 ppattr_test_bits(&pp_attr_table[pai], PP_ATTR_MODFAULT)
1439 #define SET_MODFAULT_PAGE(pai) \
1440 ppattr_set_bits(&pp_attr_table[pai], PP_ATTR_MODFAULT)
1441 #define CLR_MODFAULT_PAGE(pai) \
1442 ppattr_clear_bits(&pp_attr_table[pai], PP_ATTR_MODFAULT)
1443
1444 #define tte_get_ptd(tte) \
1445 ((struct pt_desc *)(pvh_list(pai_to_pvh(pa_index((vm_offset_t)((tte) & ~PAGE_MASK))))))
1446
1447
1448 #if (__ARM_VMSA__ == 7)
1449
1450 #define tte_index(pmap, pt_attr, addr) \
1451 ttenum((addr))
1452
1453 #define pte_index(pmap, pt_attr, addr) \
1454 ptenum((addr))
1455
1456 #else
1457
1458 #define ttn_index(pmap, pt_attr, addr, pt_level) \
1459 (((addr) & (pt_attr)->pta_level_info[(pt_level)].index_mask) >> (pt_attr)->pta_level_info[(pt_level)].shift)
1460
1461 #define tt0_index(pmap, pt_attr, addr) \
1462 ttn_index((pmap), (pt_attr), (addr), PMAP_TT_L0_LEVEL)
1463
1464 #define tt1_index(pmap, pt_attr, addr) \
1465 ttn_index((pmap), (pt_attr), (addr), PMAP_TT_L1_LEVEL)
1466
1467 #define tt2_index(pmap, pt_attr, addr) \
1468 ttn_index((pmap), (pt_attr), (addr), PMAP_TT_L2_LEVEL)
1469
1470 #define tt3_index(pmap, pt_attr, addr) \
1471 ttn_index((pmap), (pt_attr), (addr), PMAP_TT_L3_LEVEL)
1472
1473 #define tte_index(pmap, pt_attr, addr) \
1474 tt2_index((pmap), (pt_attr), (addr))
1475
1476 #define pte_index(pmap, pt_attr, addr) \
1477 tt3_index((pmap), (pt_attr), (addr))
1478
1479 #endif
1480
1481
1482 static inline ptd_info_t *
1483 ptd_get_info(pt_desc_t *ptd, const tt_entry_t *ttep)
1484 {
1485 assert(ptd->ptd_info[0].refcnt != PT_DESC_IOMMU_REFCOUNT);
1486 #if PT_INDEX_MAX == 1
1487 #pragma unused(ttep)
1488 return &ptd->ptd_info[0];
1489 #else
1490 uint64_t pmap_page_shift = pt_attr_leaf_shift(pmap_get_pt_attr(ptd->pmap));
1491 vm_offset_t ttep_page = (vm_offset_t)ttep >> pmap_page_shift;
1492 unsigned int ttep_index = ttep_page & ((1U << (PAGE_SHIFT - pmap_page_shift)) - 1);
1493 assert(ttep_index < PT_INDEX_MAX);
1494 return &ptd->ptd_info[ttep_index];
1495 #endif
1496 }
1497
1498 static inline ptd_info_t *
1499 ptep_get_info(const pt_entry_t *ptep)
1500 {
1501 return ptd_get_info(ptep_get_ptd(ptep), ptep);
1502 }
1503
1504 static inline vm_map_address_t
1505 ptep_get_va(const pt_entry_t *ptep)
1506 {
1507 pv_entry_t **pv_h;
1508 const pt_attr_t * const pt_attr = pmap_get_pt_attr(ptep_get_pmap(ptep));
1509 pv_h = pai_to_pvh(pa_index(ml_static_vtop(((vm_offset_t)ptep))));;
1510
1511 assert(pvh_test_type(pv_h, PVH_TYPE_PTDP));
1512 pt_desc_t *ptdp = (pt_desc_t *)(pvh_list(pv_h));
1513
1514 vm_map_address_t va = ptd_get_info(ptdp, ptep)->va;
1515 vm_offset_t ptep_index = ((vm_offset_t)ptep & pt_attr_leaf_offmask(pt_attr)) / sizeof(*ptep);
1516
1517 va += (ptep_index << pt_attr_leaf_shift(pt_attr));
1518
1519 return va;
1520 }
1521
1522 static inline void
1523 pte_set_wired(pmap_t pmap, pt_entry_t *ptep, boolean_t wired)
1524 {
1525 if (wired) {
1526 *ptep |= ARM_PTE_WIRED;
1527 } else {
1528 *ptep &= ~ARM_PTE_WIRED;
1529 }
1530 /*
1531 * Do not track wired page count for kernel pagetable pages. Kernel mappings are
1532 * not guaranteed to have PTDs in the first place, and kernel pagetable pages are
1533 * never reclaimed.
1534 */
1535 if (pmap == kernel_pmap) {
1536 return;
1537 }
1538 unsigned short *ptd_wiredcnt_ptr;
1539 ptd_wiredcnt_ptr = &(ptep_get_info(ptep)->wiredcnt);
1540 if (wired) {
1541 os_atomic_add(ptd_wiredcnt_ptr, (unsigned short)1, relaxed);
1542 } else {
1543 unsigned short prev_wired = os_atomic_sub_orig(ptd_wiredcnt_ptr, (unsigned short)1, relaxed);
1544 if (__improbable(prev_wired == 0)) {
1545 panic("pmap %p (pte %p): wired count underflow", pmap, ptep);
1546 }
1547 }
1548 }
1549
1550 /*
1551 * Lock on pmap system
1552 */
1553
1554 lck_grp_t pmap_lck_grp MARK_AS_PMAP_DATA;
1555
1556 static inline void
1557 pmap_lock_init(pmap_t pmap)
1558 {
1559 lck_rw_init(&pmap->rwlock, &pmap_lck_grp, 0);
1560 pmap->rwlock.lck_rw_can_sleep = FALSE;
1561 }
1562
1563 static inline void
1564 pmap_lock_destroy(pmap_t pmap)
1565 {
1566 lck_rw_destroy(&pmap->rwlock, &pmap_lck_grp);
1567 }
1568
1569 static inline void
1570 pmap_lock(pmap_t pmap)
1571 {
1572 #if !XNU_MONITOR
1573 mp_disable_preemption();
1574 #endif
1575 lck_rw_lock_exclusive(&pmap->rwlock);
1576 }
1577
1578 static inline void
1579 pmap_lock_ro(pmap_t pmap)
1580 {
1581 #if !XNU_MONITOR
1582 mp_disable_preemption();
1583 #endif
1584 lck_rw_lock_shared(&pmap->rwlock);
1585 }
1586
1587 static inline void
1588 pmap_unlock(pmap_t pmap)
1589 {
1590 lck_rw_unlock_exclusive(&pmap->rwlock);
1591 #if !XNU_MONITOR
1592 mp_enable_preemption();
1593 #endif
1594 }
1595
1596 static inline void
1597 pmap_unlock_ro(pmap_t pmap)
1598 {
1599 lck_rw_unlock_shared(&pmap->rwlock);
1600 #if !XNU_MONITOR
1601 mp_enable_preemption();
1602 #endif
1603 }
1604
1605 static inline bool
1606 pmap_try_lock(pmap_t pmap)
1607 {
1608 bool ret;
1609
1610 #if !XNU_MONITOR
1611 mp_disable_preemption();
1612 #endif
1613 ret = lck_rw_try_lock_exclusive(&pmap->rwlock);
1614 if (!ret) {
1615 #if !XNU_MONITOR
1616 mp_enable_preemption();
1617 #endif
1618 }
1619
1620 return ret;
1621 }
1622
1623 //assert that ONLY READ lock is held
1624 __unused static inline void
1625 pmap_assert_locked_r(__unused pmap_t pmap)
1626 {
1627 #if MACH_ASSERT
1628 lck_rw_assert(&pmap->rwlock, LCK_RW_ASSERT_SHARED);
1629 #else
1630 (void)pmap;
1631 #endif
1632 }
1633 //assert that ONLY WRITE lock is held
1634 __unused static inline void
1635 pmap_assert_locked_w(__unused pmap_t pmap)
1636 {
1637 #if MACH_ASSERT
1638 lck_rw_assert(&pmap->rwlock, LCK_RW_ASSERT_EXCLUSIVE);
1639 #else
1640 (void)pmap;
1641 #endif
1642 }
1643
1644 //assert that either READ or WRITE lock is held
1645 __unused static inline void
1646 pmap_assert_locked_any(__unused pmap_t pmap)
1647 {
1648 #if MACH_ASSERT
1649 lck_rw_assert(&pmap->rwlock, LCK_RW_ASSERT_HELD);
1650 #endif
1651 }
1652
1653
1654 #if defined(__arm64__)
1655 #define PVH_LOCK_WORD 1 /* Assumes little-endian */
1656 #else
1657 #define PVH_LOCK_WORD 0
1658 #endif
1659
1660 #define ASSERT_PVH_LOCKED(index) \
1661 do { \
1662 assert((vm_offset_t)(pv_head_table[index]) & PVH_FLAG_LOCK); \
1663 } while (0)
1664
1665 #define LOCK_PVH(index) \
1666 do { \
1667 pmap_lock_bit((uint32_t*)(&pv_head_table[index]) + PVH_LOCK_WORD, PVH_LOCK_BIT - (PVH_LOCK_WORD * 32)); \
1668 } while (0)
1669
1670 #define UNLOCK_PVH(index) \
1671 do { \
1672 ASSERT_PVH_LOCKED(index); \
1673 pmap_unlock_bit((uint32_t*)(&pv_head_table[index]) + PVH_LOCK_WORD, PVH_LOCK_BIT - (PVH_LOCK_WORD * 32)); \
1674 } while (0)
1675
1676 #define PMAP_UPDATE_TLBS(pmap, s, e, strong) { \
1677 pmap_get_pt_ops(pmap)->flush_tlb_region_async(s, (size_t)((e) - (s)), pmap); \
1678 pmap_sync_tlb(strong); \
1679 }
1680
1681 #define FLUSH_PTE_RANGE(spte, epte) \
1682 __builtin_arm_dmb(DMB_ISH);
1683
1684 #define FLUSH_PTE(pte_p) \
1685 __builtin_arm_dmb(DMB_ISH);
1686
1687 #define FLUSH_PTE_STRONG(pte_p) \
1688 __builtin_arm_dsb(DSB_ISH);
1689
1690 #define FLUSH_PTE_RANGE_STRONG(spte, epte) \
1691 __builtin_arm_dsb(DSB_ISH);
1692
1693 #define WRITE_PTE_FAST(pte_p, pte_entry) \
1694 __unreachable_ok_push \
1695 if (TEST_PAGE_RATIO_4) { \
1696 if (((unsigned)(pte_p)) & 0x1f) { \
1697 panic("%s: WRITE_PTE_FAST is unaligned, " \
1698 "pte_p=%p, pte_entry=%p", \
1699 __FUNCTION__, \
1700 pte_p, (void*)pte_entry); \
1701 } \
1702 if (((pte_entry) & ~ARM_PTE_COMPRESSED_MASK) == ARM_PTE_EMPTY) { \
1703 *(pte_p) = (pte_entry); \
1704 *((pte_p)+1) = (pte_entry); \
1705 *((pte_p)+2) = (pte_entry); \
1706 *((pte_p)+3) = (pte_entry); \
1707 } else { \
1708 *(pte_p) = (pte_entry); \
1709 *((pte_p)+1) = (pte_entry) | 0x1000; \
1710 *((pte_p)+2) = (pte_entry) | 0x2000; \
1711 *((pte_p)+3) = (pte_entry) | 0x3000; \
1712 } \
1713 } else { \
1714 *(pte_p) = (pte_entry); \
1715 } \
1716 __unreachable_ok_pop
1717
1718 #define WRITE_PTE(pte_p, pte_entry) \
1719 WRITE_PTE_FAST(pte_p, pte_entry); \
1720 FLUSH_PTE(pte_p);
1721
1722 #define WRITE_PTE_STRONG(pte_p, pte_entry) \
1723 WRITE_PTE_FAST(pte_p, pte_entry); \
1724 FLUSH_PTE_STRONG(pte_p);
1725
1726 /*
1727 * Other useful macros.
1728 */
1729 #define current_pmap() \
1730 (vm_map_pmap(current_thread()->map))
1731
1732 #if XNU_MONITOR
1733 /*
1734 * PPL-related macros.
1735 */
1736 #define ARRAY_ELEM_PTR_IS_VALID(_ptr_, _elem_size_, _array_begin_, _array_end_) \
1737 (((_ptr_) >= (typeof(_ptr_))_array_begin_) && \
1738 ((_ptr_) < (typeof(_ptr_))_array_end_) && \
1739 !((((void *)(_ptr_)) - ((void *)_array_begin_)) % (_elem_size_)))
1740
1741 #define PMAP_PTR_IS_VALID(x) ARRAY_ELEM_PTR_IS_VALID(x, sizeof(struct pmap), pmap_array_begin, pmap_array_end)
1742
1743 #define USER_PMAP_IS_VALID(x) (PMAP_PTR_IS_VALID(x) && (os_atomic_load(&(x)->ref_count, relaxed) > 0))
1744
1745 #define VALIDATE_PMAP(x) \
1746 if (__improbable(((x) != kernel_pmap) && !USER_PMAP_IS_VALID(x))) \
1747 panic("%s: invalid pmap %p", __func__, (x));
1748
1749 #define VALIDATE_LEDGER_PTR(x) \
1750 if (__improbable(!ARRAY_ELEM_PTR_IS_VALID(x, sizeof(void *), pmap_ledger_ptr_array_begin, pmap_ledger_ptr_array_end))) \
1751 panic("%s: invalid ledger ptr %p", __func__, (x));
1752
1753 #define ARRAY_ELEM_INDEX(x, _elem_size_, _array_begin_) ((uint64_t)((((void *)(x)) - (_array_begin_)) / (_elem_size_)))
1754
1755 static uint64_t
1756 pmap_ledger_validate(void * ledger)
1757 {
1758 uint64_t array_index;
1759 pmap_ledger_t ** ledger_ptr_array_ptr = ((pmap_ledger_t*)ledger)->back_ptr;
1760 VALIDATE_LEDGER_PTR(ledger_ptr_array_ptr);
1761 array_index = ARRAY_ELEM_INDEX(ledger_ptr_array_ptr, sizeof(pmap_ledger_t *), pmap_ledger_ptr_array_begin);
1762
1763 if (array_index >= MAX_PMAP_LEDGERS) {
1764 panic("%s: ledger %p array index invalid, index was %#llx", __func__, ledger, array_index);
1765 }
1766
1767 pmap_ledger_t *ledger_ptr = *ledger_ptr_array_ptr;
1768
1769 if (__improbable(ledger_ptr != ledger)) {
1770 panic("%s: ledger pointer mismatch, %p != %p", __func__, ledger, ledger_ptr);
1771 }
1772
1773 return array_index;
1774 }
1775
1776 #else /* XNU_MONITOR */
1777
1778 #define VALIDATE_PMAP(x) assert((x) != NULL);
1779
1780 #endif /* XNU_MONITOR */
1781
1782 #if DEVELOPMENT || DEBUG
1783
1784 /*
1785 * Trace levels are controlled by a bitmask in which each
1786 * level can be enabled/disabled by the (1<<level) position
1787 * in the boot arg
1788 * Level 1: pmap lifecycle (create/destroy/switch)
1789 * Level 2: mapping lifecycle (enter/remove/protect/nest/unnest)
1790 * Level 3: internal state management (attributes/fast-fault)
1791 * Level 4-7: TTE traces for paging levels 0-3. TTBs are traced at level 4.
1792 */
1793
1794 SECURITY_READ_ONLY_LATE(unsigned int) pmap_trace_mask = 0;
1795
1796 #define PMAP_TRACE(level, ...) \
1797 if (__improbable((1 << (level)) & pmap_trace_mask)) { \
1798 KDBG_RELEASE(__VA_ARGS__); \
1799 }
1800 #else /* DEVELOPMENT || DEBUG */
1801
1802 #define PMAP_TRACE(level, ...)
1803
1804 #endif /* DEVELOPMENT || DEBUG */
1805
1806
1807 /*
1808 * Internal function prototypes (forward declarations).
1809 */
1810
1811 typedef enum {
1812 PV_ALLOC_SUCCESS,
1813 PV_ALLOC_RETRY,
1814 PV_ALLOC_FAIL
1815 } pv_alloc_return_t;
1816
1817 static pv_alloc_return_t pv_alloc(
1818 pmap_t pmap,
1819 unsigned int pai,
1820 pv_entry_t **pvepp);
1821
1822 static void ptd_bootstrap(
1823 pt_desc_t *ptdp, unsigned int ptd_cnt);
1824
1825 static inline pt_desc_t *ptd_alloc_unlinked(void);
1826
1827 static pt_desc_t *ptd_alloc(pmap_t pmap);
1828
1829 static void ptd_deallocate(pt_desc_t *ptdp);
1830
1831 static void ptd_init(
1832 pt_desc_t *ptdp, pmap_t pmap, vm_map_address_t va, unsigned int ttlevel, pt_entry_t * pte_p);
1833
1834 static void pmap_set_reference(
1835 ppnum_t pn);
1836
1837 pmap_paddr_t pmap_vtophys(
1838 pmap_t pmap, addr64_t va);
1839
1840 void pmap_switch_user_ttb(
1841 pmap_t pmap);
1842
1843 static kern_return_t pmap_expand(
1844 pmap_t, vm_map_address_t, unsigned int options, unsigned int level);
1845
1846 static int pmap_remove_range(
1847 pmap_t, vm_map_address_t, pt_entry_t *, pt_entry_t *, uint32_t *);
1848
1849 static int pmap_remove_range_options(
1850 pmap_t, vm_map_address_t, pt_entry_t *, pt_entry_t *, uint32_t *, bool *, int);
1851
1852 static tt_entry_t *pmap_tt1_allocate(
1853 pmap_t, vm_size_t, unsigned int);
1854
1855 #define PMAP_TT_ALLOCATE_NOWAIT 0x1
1856
1857 static void pmap_tt1_deallocate(
1858 pmap_t, tt_entry_t *, vm_size_t, unsigned int);
1859
1860 #define PMAP_TT_DEALLOCATE_NOBLOCK 0x1
1861
1862 static kern_return_t pmap_tt_allocate(
1863 pmap_t, tt_entry_t **, unsigned int, unsigned int);
1864
1865 #define PMAP_TT_ALLOCATE_NOWAIT 0x1
1866
1867 static void pmap_tte_deallocate(
1868 pmap_t, tt_entry_t *, unsigned int);
1869
1870 const unsigned int arm_hardware_page_size = ARM_PGBYTES;
1871 const unsigned int arm_pt_desc_size = sizeof(pt_desc_t);
1872 const unsigned int arm_pt_root_size = PMAP_ROOT_ALLOC_SIZE;
1873
1874 #define PMAP_TT_DEALLOCATE_NOBLOCK 0x1
1875
1876 #if (__ARM_VMSA__ > 7)
1877
1878 static inline tt_entry_t *pmap_tt1e(
1879 pmap_t, vm_map_address_t);
1880
1881 static inline tt_entry_t *pmap_tt2e(
1882 pmap_t, vm_map_address_t);
1883
1884 static inline pt_entry_t *pmap_tt3e(
1885 pmap_t, vm_map_address_t);
1886
1887 static inline pt_entry_t *pmap_ttne(
1888 pmap_t, unsigned int, vm_map_address_t);
1889
1890 static void pmap_unmap_sharedpage(
1891 pmap_t pmap);
1892
1893 static boolean_t
1894 pmap_is_64bit(pmap_t);
1895
1896
1897 #endif /* (__ARM_VMSA__ > 7) */
1898
1899 static inline tt_entry_t *pmap_tte(
1900 pmap_t, vm_map_address_t);
1901
1902 static inline pt_entry_t *pmap_pte(
1903 pmap_t, vm_map_address_t);
1904
1905 static void pmap_update_cache_attributes_locked(
1906 ppnum_t, unsigned);
1907
1908 static boolean_t arm_clear_fast_fault(
1909 ppnum_t ppnum,
1910 vm_prot_t fault_type);
1911
1912 static pmap_paddr_t pmap_pages_reclaim(
1913 void);
1914
1915 static kern_return_t pmap_pages_alloc_zeroed(
1916 pmap_paddr_t *pa,
1917 unsigned size,
1918 unsigned option);
1919
1920 #define PMAP_PAGES_ALLOCATE_NOWAIT 0x1
1921 #define PMAP_PAGES_RECLAIM_NOWAIT 0x2
1922
1923 static void pmap_pages_free(
1924 pmap_paddr_t pa,
1925 unsigned size);
1926
1927 static void pmap_pin_kernel_pages(vm_offset_t kva, size_t nbytes);
1928
1929 static void pmap_unpin_kernel_pages(vm_offset_t kva, size_t nbytes);
1930
1931 static void pmap_trim_self(pmap_t pmap);
1932 static void pmap_trim_subord(pmap_t subord);
1933
1934 #if __APRR_SUPPORTED__
1935 static uint64_t pte_to_xprr_perm(pt_entry_t pte);
1936 static pt_entry_t xprr_perm_to_pte(uint64_t perm);
1937 #endif /* __APRR_SUPPORTED__*/
1938
1939 /*
1940 * Temporary prototypes, while we wait for pmap_enter to move to taking an
1941 * address instead of a page number.
1942 */
1943 static kern_return_t
1944 pmap_enter_addr(
1945 pmap_t pmap,
1946 vm_map_address_t v,
1947 pmap_paddr_t pa,
1948 vm_prot_t prot,
1949 vm_prot_t fault_type,
1950 unsigned int flags,
1951 boolean_t wired);
1952
1953 kern_return_t
1954 pmap_enter_options_addr(
1955 pmap_t pmap,
1956 vm_map_address_t v,
1957 pmap_paddr_t pa,
1958 vm_prot_t prot,
1959 vm_prot_t fault_type,
1960 unsigned int flags,
1961 boolean_t wired,
1962 unsigned int options,
1963 __unused void *arg);
1964
1965 #ifdef CONFIG_XNUPOST
1966 kern_return_t pmap_test(void);
1967 #endif /* CONFIG_XNUPOST */
1968
1969 #if XNU_MONITOR
1970 static pmap_paddr_t pmap_alloc_page_for_kern(unsigned int options);
1971 static void pmap_alloc_page_for_ppl(unsigned int options);
1972
1973
1974 /*
1975 * This macro generates prototypes for the *_internal functions, which
1976 * represent the PPL interface. When the PPL is enabled, this will also
1977 * generate prototypes for the PPL entrypoints (*_ppl), as well as generating
1978 * the entrypoints.
1979 */
1980 #define GEN_ASM_NAME(__function_name) _##__function_name##_ppl
1981
1982 #define PMAP_SUPPORT_PROTOTYPES_WITH_ASM_INTERNAL(__return_type, __function_name, __function_args, __function_index, __assembly_function_name) \
1983 static __return_type __function_name##_internal __function_args; \
1984 extern __return_type __function_name##_ppl __function_args; \
1985 __asm__ (".text \n" \
1986 ".align 2 \n" \
1987 ".globl " #__assembly_function_name "\n" \
1988 #__assembly_function_name ":\n" \
1989 "mov x15, " #__function_index "\n" \
1990 "b _aprr_ppl_enter\n")
1991
1992 #define PMAP_SUPPORT_PROTOTYPES_WITH_ASM(__return_type, __function_name, __function_args, __function_index, __assembly_function_name) \
1993 PMAP_SUPPORT_PROTOTYPES_WITH_ASM_INTERNAL(__return_type, __function_name, __function_args, __function_index, __assembly_function_name)
1994
1995 #define PMAP_SUPPORT_PROTOTYPES(__return_type, __function_name, __function_args, __function_index) \
1996 PMAP_SUPPORT_PROTOTYPES_WITH_ASM(__return_type, __function_name, __function_args, __function_index, GEN_ASM_NAME(__function_name))
1997 #else /* XNU_MONITOR */
1998 #define PMAP_SUPPORT_PROTOTYPES(__return_type, __function_name, __function_args, __function_index) \
1999 static __return_type __function_name##_internal __function_args
2000 #endif /* XNU_MONITOR */
2001
2002 PMAP_SUPPORT_PROTOTYPES(
2003 kern_return_t,
2004 arm_fast_fault, (pmap_t pmap,
2005 vm_map_address_t va,
2006 vm_prot_t fault_type,
2007 bool was_af_fault,
2008 bool from_user), ARM_FAST_FAULT_INDEX);
2009
2010
2011 PMAP_SUPPORT_PROTOTYPES(
2012 boolean_t,
2013 arm_force_fast_fault, (ppnum_t ppnum,
2014 vm_prot_t allow_mode,
2015 int options), ARM_FORCE_FAST_FAULT_INDEX);
2016
2017 MARK_AS_PMAP_TEXT static boolean_t
2018 arm_force_fast_fault_with_flush_range(
2019 ppnum_t ppnum,
2020 vm_prot_t allow_mode,
2021 int options,
2022 pmap_tlb_flush_range_t *flush_range);
2023
2024 PMAP_SUPPORT_PROTOTYPES(
2025 kern_return_t,
2026 mapping_free_prime, (void), MAPPING_FREE_PRIME_INDEX);
2027
2028 PMAP_SUPPORT_PROTOTYPES(
2029 boolean_t,
2030 pmap_batch_set_cache_attributes, (ppnum_t pn,
2031 unsigned int cacheattr,
2032 unsigned int page_cnt,
2033 unsigned int page_index,
2034 boolean_t doit,
2035 unsigned int *res), PMAP_BATCH_SET_CACHE_ATTRIBUTES_INDEX);
2036
2037 PMAP_SUPPORT_PROTOTYPES(
2038 void,
2039 pmap_change_wiring, (pmap_t pmap,
2040 vm_map_address_t v,
2041 boolean_t wired), PMAP_CHANGE_WIRING_INDEX);
2042
2043 PMAP_SUPPORT_PROTOTYPES(
2044 pmap_t,
2045 pmap_create_options, (ledger_t ledger,
2046 vm_map_size_t size,
2047 unsigned int flags,
2048 kern_return_t * kr), PMAP_CREATE_INDEX);
2049
2050 PMAP_SUPPORT_PROTOTYPES(
2051 void,
2052 pmap_destroy, (pmap_t pmap), PMAP_DESTROY_INDEX);
2053
2054 PMAP_SUPPORT_PROTOTYPES(
2055 kern_return_t,
2056 pmap_enter_options, (pmap_t pmap,
2057 vm_map_address_t v,
2058 pmap_paddr_t pa,
2059 vm_prot_t prot,
2060 vm_prot_t fault_type,
2061 unsigned int flags,
2062 boolean_t wired,
2063 unsigned int options), PMAP_ENTER_OPTIONS_INDEX);
2064
2065 PMAP_SUPPORT_PROTOTYPES(
2066 pmap_paddr_t,
2067 pmap_find_pa, (pmap_t pmap,
2068 addr64_t va), PMAP_FIND_PA_INDEX);
2069
2070 #if (__ARM_VMSA__ > 7)
2071 PMAP_SUPPORT_PROTOTYPES(
2072 kern_return_t,
2073 pmap_insert_sharedpage, (pmap_t pmap), PMAP_INSERT_SHAREDPAGE_INDEX);
2074 #endif
2075
2076
2077 PMAP_SUPPORT_PROTOTYPES(
2078 boolean_t,
2079 pmap_is_empty, (pmap_t pmap,
2080 vm_map_offset_t va_start,
2081 vm_map_offset_t va_end), PMAP_IS_EMPTY_INDEX);
2082
2083
2084 PMAP_SUPPORT_PROTOTYPES(
2085 unsigned int,
2086 pmap_map_cpu_windows_copy, (ppnum_t pn,
2087 vm_prot_t prot,
2088 unsigned int wimg_bits), PMAP_MAP_CPU_WINDOWS_COPY_INDEX);
2089
2090 PMAP_SUPPORT_PROTOTYPES(
2091 kern_return_t,
2092 pmap_nest, (pmap_t grand,
2093 pmap_t subord,
2094 addr64_t vstart,
2095 uint64_t size), PMAP_NEST_INDEX);
2096
2097 PMAP_SUPPORT_PROTOTYPES(
2098 void,
2099 pmap_page_protect_options, (ppnum_t ppnum,
2100 vm_prot_t prot,
2101 unsigned int options), PMAP_PAGE_PROTECT_OPTIONS_INDEX);
2102
2103 PMAP_SUPPORT_PROTOTYPES(
2104 void,
2105 pmap_protect_options, (pmap_t pmap,
2106 vm_map_address_t start,
2107 vm_map_address_t end,
2108 vm_prot_t prot,
2109 unsigned int options,
2110 void *args), PMAP_PROTECT_OPTIONS_INDEX);
2111
2112 PMAP_SUPPORT_PROTOTYPES(
2113 kern_return_t,
2114 pmap_query_page_info, (pmap_t pmap,
2115 vm_map_offset_t va,
2116 int *disp_p), PMAP_QUERY_PAGE_INFO_INDEX);
2117
2118 PMAP_SUPPORT_PROTOTYPES(
2119 mach_vm_size_t,
2120 pmap_query_resident, (pmap_t pmap,
2121 vm_map_address_t start,
2122 vm_map_address_t end,
2123 mach_vm_size_t * compressed_bytes_p), PMAP_QUERY_RESIDENT_INDEX);
2124
2125 PMAP_SUPPORT_PROTOTYPES(
2126 void,
2127 pmap_reference, (pmap_t pmap), PMAP_REFERENCE_INDEX);
2128
2129 PMAP_SUPPORT_PROTOTYPES(
2130 int,
2131 pmap_remove_options, (pmap_t pmap,
2132 vm_map_address_t start,
2133 vm_map_address_t end,
2134 int options), PMAP_REMOVE_OPTIONS_INDEX);
2135
2136 PMAP_SUPPORT_PROTOTYPES(
2137 kern_return_t,
2138 pmap_return, (boolean_t do_panic,
2139 boolean_t do_recurse), PMAP_RETURN_INDEX);
2140
2141 PMAP_SUPPORT_PROTOTYPES(
2142 void,
2143 pmap_set_cache_attributes, (ppnum_t pn,
2144 unsigned int cacheattr), PMAP_SET_CACHE_ATTRIBUTES_INDEX);
2145
2146 PMAP_SUPPORT_PROTOTYPES(
2147 void,
2148 pmap_update_compressor_page, (ppnum_t pn,
2149 unsigned int prev_cacheattr, unsigned int new_cacheattr), PMAP_UPDATE_COMPRESSOR_PAGE_INDEX);
2150
2151 PMAP_SUPPORT_PROTOTYPES(
2152 void,
2153 pmap_set_nested, (pmap_t pmap), PMAP_SET_NESTED_INDEX);
2154
2155 #if MACH_ASSERT || XNU_MONITOR
2156 PMAP_SUPPORT_PROTOTYPES(
2157 void,
2158 pmap_set_process, (pmap_t pmap,
2159 int pid,
2160 char *procname), PMAP_SET_PROCESS_INDEX);
2161 #endif
2162
2163 PMAP_SUPPORT_PROTOTYPES(
2164 void,
2165 pmap_unmap_cpu_windows_copy, (unsigned int index), PMAP_UNMAP_CPU_WINDOWS_COPY_INDEX);
2166
2167 PMAP_SUPPORT_PROTOTYPES(
2168 kern_return_t,
2169 pmap_unnest_options, (pmap_t grand,
2170 addr64_t vaddr,
2171 uint64_t size,
2172 unsigned int option), PMAP_UNNEST_OPTIONS_INDEX);
2173
2174 #if XNU_MONITOR
2175 PMAP_SUPPORT_PROTOTYPES(
2176 void,
2177 pmap_cpu_data_init, (unsigned int cpu_number), PMAP_CPU_DATA_INIT_INDEX);
2178 #endif
2179
2180 PMAP_SUPPORT_PROTOTYPES(
2181 void,
2182 phys_attribute_set, (ppnum_t pn,
2183 unsigned int bits), PHYS_ATTRIBUTE_SET_INDEX);
2184
2185 #if XNU_MONITOR
2186 PMAP_SUPPORT_PROTOTYPES(
2187 void,
2188 pmap_mark_page_as_ppl_page, (pmap_paddr_t pa, bool initially_free), PMAP_MARK_PAGE_AS_PMAP_PAGE_INDEX);
2189 #endif
2190
2191 PMAP_SUPPORT_PROTOTYPES(
2192 void,
2193 phys_attribute_clear, (ppnum_t pn,
2194 unsigned int bits,
2195 int options,
2196 void *arg), PHYS_ATTRIBUTE_CLEAR_INDEX);
2197
2198 #if __ARM_RANGE_TLBI__
2199 PMAP_SUPPORT_PROTOTYPES(
2200 void,
2201 phys_attribute_clear_range, (pmap_t pmap,
2202 vm_map_address_t start,
2203 vm_map_address_t end,
2204 unsigned int bits,
2205 unsigned int options), PHYS_ATTRIBUTE_CLEAR_RANGE_INDEX);
2206 #endif /* __ARM_RANGE_TLBI__ */
2207
2208
2209 PMAP_SUPPORT_PROTOTYPES(
2210 void,
2211 pmap_switch, (pmap_t pmap), PMAP_SWITCH_INDEX);
2212
2213 PMAP_SUPPORT_PROTOTYPES(
2214 void,
2215 pmap_switch_user_ttb, (pmap_t pmap), PMAP_SWITCH_USER_TTB_INDEX);
2216
2217 PMAP_SUPPORT_PROTOTYPES(
2218 void,
2219 pmap_clear_user_ttb, (void), PMAP_CLEAR_USER_TTB_INDEX);
2220
2221 #if XNU_MONITOR
2222 PMAP_SUPPORT_PROTOTYPES(
2223 uint64_t,
2224 pmap_release_ppl_pages_to_kernel, (void), PMAP_RELEASE_PAGES_TO_KERNEL_INDEX);
2225 #endif
2226
2227 PMAP_SUPPORT_PROTOTYPES(
2228 void,
2229 pmap_set_vm_map_cs_enforced, (pmap_t pmap, bool new_value), PMAP_SET_VM_MAP_CS_ENFORCED_INDEX);
2230
2231 PMAP_SUPPORT_PROTOTYPES(
2232 void,
2233 pmap_set_jit_entitled, (pmap_t pmap), PMAP_SET_JIT_ENTITLED_INDEX);
2234
2235 #if __has_feature(ptrauth_calls) && defined(XNU_TARGET_OS_OSX)
2236 PMAP_SUPPORT_PROTOTYPES(
2237 void,
2238 pmap_disable_user_jop, (pmap_t pmap), PMAP_DISABLE_USER_JOP_INDEX);
2239 #endif /* __has_feature(ptrauth_calls) && defined(XNU_TARGET_OS_OSX) */
2240
2241 PMAP_SUPPORT_PROTOTYPES(
2242 void,
2243 pmap_trim, (pmap_t grand,
2244 pmap_t subord,
2245 addr64_t vstart,
2246 uint64_t size), PMAP_TRIM_INDEX);
2247
2248 #if HAS_APPLE_PAC && XNU_MONITOR
2249 PMAP_SUPPORT_PROTOTYPES(
2250 void *,
2251 pmap_sign_user_ptr, (void *value, ptrauth_key key, uint64_t discriminator, uint64_t jop_key), PMAP_SIGN_USER_PTR);
2252 PMAP_SUPPORT_PROTOTYPES(
2253 void *,
2254 pmap_auth_user_ptr, (void *value, ptrauth_key key, uint64_t discriminator, uint64_t jop_key), PMAP_AUTH_USER_PTR);
2255 #endif /* HAS_APPLE_PAC && XNU_MONITOR */
2256
2257
2258
2259
2260 #if XNU_MONITOR
2261 static void pmap_mark_page_as_ppl_page(pmap_paddr_t pa);
2262 #endif
2263
2264 void pmap_footprint_suspend(vm_map_t map,
2265 boolean_t suspend);
2266 PMAP_SUPPORT_PROTOTYPES(
2267 void,
2268 pmap_footprint_suspend, (vm_map_t map,
2269 boolean_t suspend),
2270 PMAP_FOOTPRINT_SUSPEND_INDEX);
2271
2272 #if XNU_MONITOR
2273 PMAP_SUPPORT_PROTOTYPES(
2274 void,
2275 pmap_ledger_alloc_init, (size_t),
2276 PMAP_LEDGER_ALLOC_INIT_INDEX);
2277
2278 PMAP_SUPPORT_PROTOTYPES(
2279 ledger_t,
2280 pmap_ledger_alloc, (void),
2281 PMAP_LEDGER_ALLOC_INDEX);
2282
2283 PMAP_SUPPORT_PROTOTYPES(
2284 void,
2285 pmap_ledger_free, (ledger_t),
2286 PMAP_LEDGER_FREE_INDEX);
2287 #endif
2288
2289
2290
2291
2292 #if CONFIG_PGTRACE
2293 boolean_t pgtrace_enabled = 0;
2294
2295 typedef struct {
2296 queue_chain_t chain;
2297
2298 /*
2299 * pmap - pmap for below addresses
2300 * ova - original va page address
2301 * cva - clone va addresses for pre, target and post pages
2302 * cva_spte - clone saved ptes
2303 * range - trace range in this map
2304 * cloned - has been cloned or not
2305 */
2306 pmap_t pmap;
2307 vm_map_offset_t ova;
2308 vm_map_offset_t cva[3];
2309 pt_entry_t cva_spte[3];
2310 struct {
2311 pmap_paddr_t start;
2312 pmap_paddr_t end;
2313 } range;
2314 bool cloned;
2315 } pmap_pgtrace_map_t;
2316
2317 static void pmap_pgtrace_init(void);
2318 static bool pmap_pgtrace_enter_clone(pmap_t pmap, vm_map_offset_t va_page, vm_map_offset_t start, vm_map_offset_t end);
2319 static void pmap_pgtrace_remove_clone(pmap_t pmap, pmap_paddr_t pa_page, vm_map_offset_t va_page);
2320 static void pmap_pgtrace_remove_all_clone(pmap_paddr_t pa);
2321 #endif
2322
2323 #if (__ARM_VMSA__ > 7)
2324 /*
2325 * The low global vector page is mapped at a fixed alias.
2326 * Since the page size is 16k for H8 and newer we map the globals to a 16k
2327 * aligned address. Readers of the globals (e.g. lldb, panic server) need
2328 * to check both addresses anyway for backward compatibility. So for now
2329 * we leave H6 and H7 where they were.
2330 */
2331 #if (ARM_PGSHIFT == 14)
2332 #define LOWGLOBAL_ALIAS (LOW_GLOBAL_BASE_ADDRESS + 0x4000)
2333 #else
2334 #define LOWGLOBAL_ALIAS (LOW_GLOBAL_BASE_ADDRESS + 0x2000)
2335 #endif
2336
2337 #else
2338 #define LOWGLOBAL_ALIAS (0xFFFF1000)
2339 #endif
2340
2341 long long alloc_tteroot_count __attribute__((aligned(8))) MARK_AS_PMAP_DATA = 0LL;
2342 long long alloc_ttepages_count __attribute__((aligned(8))) MARK_AS_PMAP_DATA = 0LL;
2343 long long alloc_ptepages_count __attribute__((aligned(8))) MARK_AS_PMAP_DATA = 0LL;
2344 long long alloc_pmap_pages_count __attribute__((aligned(8))) = 0LL;
2345
2346 #if XNU_MONITOR
2347
2348 #if __has_feature(ptrauth_calls)
2349 #define __ptrauth_ppl_handler __ptrauth(ptrauth_key_function_pointer, true, 0)
2350 #else
2351 #define __ptrauth_ppl_handler
2352 #endif
2353
2354 /*
2355 * Table of function pointers used for PPL dispatch.
2356 */
2357 const void * __ptrauth_ppl_handler const ppl_handler_table[PMAP_COUNT] = {
2358 [ARM_FAST_FAULT_INDEX] = arm_fast_fault_internal,
2359 [ARM_FORCE_FAST_FAULT_INDEX] = arm_force_fast_fault_internal,
2360 [MAPPING_FREE_PRIME_INDEX] = mapping_free_prime_internal,
2361 [PHYS_ATTRIBUTE_CLEAR_INDEX] = phys_attribute_clear_internal,
2362 [PHYS_ATTRIBUTE_SET_INDEX] = phys_attribute_set_internal,
2363 [PMAP_BATCH_SET_CACHE_ATTRIBUTES_INDEX] = pmap_batch_set_cache_attributes_internal,
2364 [PMAP_CHANGE_WIRING_INDEX] = pmap_change_wiring_internal,
2365 [PMAP_CREATE_INDEX] = pmap_create_options_internal,
2366 [PMAP_DESTROY_INDEX] = pmap_destroy_internal,
2367 [PMAP_ENTER_OPTIONS_INDEX] = pmap_enter_options_internal,
2368 [PMAP_FIND_PA_INDEX] = pmap_find_pa_internal,
2369 [PMAP_INSERT_SHAREDPAGE_INDEX] = pmap_insert_sharedpage_internal,
2370 [PMAP_IS_EMPTY_INDEX] = pmap_is_empty_internal,
2371 [PMAP_MAP_CPU_WINDOWS_COPY_INDEX] = pmap_map_cpu_windows_copy_internal,
2372 [PMAP_MARK_PAGE_AS_PMAP_PAGE_INDEX] = pmap_mark_page_as_ppl_page_internal,
2373 [PMAP_NEST_INDEX] = pmap_nest_internal,
2374 [PMAP_PAGE_PROTECT_OPTIONS_INDEX] = pmap_page_protect_options_internal,
2375 [PMAP_PROTECT_OPTIONS_INDEX] = pmap_protect_options_internal,
2376 [PMAP_QUERY_PAGE_INFO_INDEX] = pmap_query_page_info_internal,
2377 [PMAP_QUERY_RESIDENT_INDEX] = pmap_query_resident_internal,
2378 [PMAP_REFERENCE_INDEX] = pmap_reference_internal,
2379 [PMAP_REMOVE_OPTIONS_INDEX] = pmap_remove_options_internal,
2380 [PMAP_RETURN_INDEX] = pmap_return_internal,
2381 [PMAP_SET_CACHE_ATTRIBUTES_INDEX] = pmap_set_cache_attributes_internal,
2382 [PMAP_UPDATE_COMPRESSOR_PAGE_INDEX] = pmap_update_compressor_page_internal,
2383 [PMAP_SET_NESTED_INDEX] = pmap_set_nested_internal,
2384 [PMAP_SET_PROCESS_INDEX] = pmap_set_process_internal,
2385 [PMAP_SWITCH_INDEX] = pmap_switch_internal,
2386 [PMAP_SWITCH_USER_TTB_INDEX] = pmap_switch_user_ttb_internal,
2387 [PMAP_CLEAR_USER_TTB_INDEX] = pmap_clear_user_ttb_internal,
2388 [PMAP_UNMAP_CPU_WINDOWS_COPY_INDEX] = pmap_unmap_cpu_windows_copy_internal,
2389 [PMAP_UNNEST_OPTIONS_INDEX] = pmap_unnest_options_internal,
2390 [PMAP_FOOTPRINT_SUSPEND_INDEX] = pmap_footprint_suspend_internal,
2391 [PMAP_CPU_DATA_INIT_INDEX] = pmap_cpu_data_init_internal,
2392 [PMAP_RELEASE_PAGES_TO_KERNEL_INDEX] = pmap_release_ppl_pages_to_kernel_internal,
2393 [PMAP_SET_VM_MAP_CS_ENFORCED_INDEX] = pmap_set_vm_map_cs_enforced_internal,
2394 [PMAP_SET_JIT_ENTITLED_INDEX] = pmap_set_jit_entitled_internal,
2395 [PMAP_TRIM_INDEX] = pmap_trim_internal,
2396 [PMAP_LEDGER_ALLOC_INIT_INDEX] = pmap_ledger_alloc_init_internal,
2397 [PMAP_LEDGER_ALLOC_INDEX] = pmap_ledger_alloc_internal,
2398 [PMAP_LEDGER_FREE_INDEX] = pmap_ledger_free_internal,
2399 #if HAS_APPLE_PAC && XNU_MONITOR
2400 [PMAP_SIGN_USER_PTR] = pmap_sign_user_ptr_internal,
2401 [PMAP_AUTH_USER_PTR] = pmap_auth_user_ptr_internal,
2402 #endif /* HAS_APPLE_PAC && XNU_MONITOR */
2403 #if __ARM_RANGE_TLBI__
2404 [PHYS_ATTRIBUTE_CLEAR_RANGE_INDEX] = phys_attribute_clear_range_internal,
2405 #endif /* __ARM_RANGE_TLBI__ */
2406 #if __has_feature(ptrauth_calls) && defined(XNU_TARGET_OS_OSX)
2407 [PMAP_DISABLE_USER_JOP_INDEX] = pmap_disable_user_jop_internal,
2408 #endif /* __has_feature(ptrauth_calls) && defined(XNU_TARGET_OS_OSX) */
2409 };
2410 #endif
2411
2412
2413 /*
2414 * Allocates and initializes a per-CPU data structure for the pmap.
2415 */
2416 MARK_AS_PMAP_TEXT static void
2417 pmap_cpu_data_init_internal(unsigned int cpu_number)
2418 {
2419 pmap_cpu_data_t * pmap_cpu_data = pmap_get_cpu_data();
2420
2421 #if XNU_MONITOR
2422 /* Verify cacheline-aligned */
2423 assert(((vm_offset_t)pmap_cpu_data & ((1 << MAX_L2_CLINE) - 1)) == 0);
2424 if (pmap_cpu_data->cpu_number != PMAP_INVALID_CPU_NUM) {
2425 panic("%s: pmap_cpu_data->cpu_number=%u, "
2426 "cpu_number=%u",
2427 __FUNCTION__, pmap_cpu_data->cpu_number,
2428 cpu_number);
2429 }
2430 #endif
2431 pmap_cpu_data->cpu_number = cpu_number;
2432 }
2433
2434 void
2435 pmap_cpu_data_init(void)
2436 {
2437 #if XNU_MONITOR
2438 pmap_cpu_data_init_ppl(cpu_number());
2439 #else
2440 pmap_cpu_data_init_internal(cpu_number());
2441 #endif
2442 }
2443
2444 static void
2445 pmap_cpu_data_array_init(void)
2446 {
2447 #if XNU_MONITOR
2448 unsigned int i = 0;
2449 pmap_paddr_t ppl_cpu_save_area_cur = 0;
2450 pt_entry_t template, *pte_p;
2451 vm_offset_t stack_va = (vm_offset_t)pmap_stacks_start + ARM_PGBYTES;
2452 assert((pmap_stacks_start != NULL) && (pmap_stacks_end != NULL));
2453 pmap_stacks_start_pa = avail_start;
2454
2455 for (i = 0; i < MAX_CPUS; i++) {
2456 for (vm_offset_t cur_va = stack_va; cur_va < (stack_va + PPL_STACK_SIZE); cur_va += ARM_PGBYTES) {
2457 assert(cur_va < (vm_offset_t)pmap_stacks_end);
2458 pte_p = pmap_pte(kernel_pmap, cur_va);
2459 assert(*pte_p == ARM_PTE_EMPTY);
2460 template = pa_to_pte(avail_start) | ARM_PTE_AF | ARM_PTE_SH(SH_OUTER_MEMORY) | ARM_PTE_TYPE |
2461 ARM_PTE_ATTRINDX(CACHE_ATTRINDX_DEFAULT) | xprr_perm_to_pte(XPRR_PPL_RW_PERM);
2462 #if __ARM_KERNEL_PROTECT__
2463 template |= ARM_PTE_NG;
2464 #endif /* __ARM_KERNEL_PROTECT__ */
2465 WRITE_PTE(pte_p, template);
2466 __builtin_arm_isb(ISB_SY);
2467 avail_start += ARM_PGBYTES;
2468 }
2469 #if KASAN
2470 kasan_map_shadow(stack_va, PPL_STACK_SIZE, false);
2471 #endif
2472 pmap_cpu_data_array[i].cpu_data.cpu_number = PMAP_INVALID_CPU_NUM;
2473 pmap_cpu_data_array[i].cpu_data.ppl_state = PPL_STATE_KERNEL;
2474 pmap_cpu_data_array[i].cpu_data.ppl_stack = (void*)(stack_va + PPL_STACK_SIZE);
2475 stack_va += (PPL_STACK_SIZE + ARM_PGBYTES);
2476 }
2477 sync_tlb_flush();
2478 pmap_stacks_end_pa = avail_start;
2479
2480 ppl_cpu_save_area_start = avail_start;
2481 ppl_cpu_save_area_end = ppl_cpu_save_area_start;
2482 ppl_cpu_save_area_cur = ppl_cpu_save_area_start;
2483
2484 for (i = 0; i < MAX_CPUS; i++) {
2485 while ((ppl_cpu_save_area_end - ppl_cpu_save_area_cur) < sizeof(arm_context_t)) {
2486 avail_start += PAGE_SIZE;
2487 ppl_cpu_save_area_end = avail_start;
2488 }
2489
2490 pmap_cpu_data_array[i].cpu_data.save_area = (arm_context_t *)phystokv(ppl_cpu_save_area_cur);
2491 ppl_cpu_save_area_cur += sizeof(arm_context_t);
2492 }
2493 #endif
2494
2495 pmap_cpu_data_init();
2496 }
2497
2498 pmap_cpu_data_t *
2499 pmap_get_cpu_data(void)
2500 {
2501 pmap_cpu_data_t * pmap_cpu_data = NULL;
2502
2503 #if XNU_MONITOR
2504 extern pmap_cpu_data_t* ml_get_ppl_cpu_data(void);
2505 pmap_cpu_data = ml_get_ppl_cpu_data();
2506 #else
2507 pmap_cpu_data = &getCpuDatap()->cpu_pmap_cpu_data;
2508 #endif
2509
2510 return pmap_cpu_data;
2511 }
2512
2513 #if XNU_MONITOR
2514 /*
2515 * pmap_set_range_xprr_perm takes a range (specified using start and end) that
2516 * falls within the physical aperture. All mappings within this range have
2517 * their protections changed from those specified by the expected_perm to those
2518 * specified by the new_perm.
2519 */
2520 static void
2521 pmap_set_range_xprr_perm(vm_address_t start,
2522 vm_address_t end,
2523 unsigned int expected_perm,
2524 unsigned int new_perm)
2525 {
2526 #if (__ARM_VMSA__ == 7)
2527 #error This function is not supported on older ARM hardware
2528 #else
2529 pmap_t pmap = NULL;
2530
2531 vm_address_t va = 0;
2532 vm_address_t tte_start = 0;
2533 vm_address_t tte_end = 0;
2534
2535 tt_entry_t *tte_p = NULL;
2536 pt_entry_t *pte_p = NULL;
2537 pt_entry_t *cpte_p = NULL;
2538 pt_entry_t *bpte_p = NULL;
2539 pt_entry_t *epte_p = NULL;
2540
2541 tt_entry_t tte = 0;
2542 pt_entry_t cpte = 0;
2543 pt_entry_t template = 0;
2544
2545 pmap = kernel_pmap;
2546
2547 va = start;
2548
2549 /*
2550 * Validate our arguments; any invalid argument will be grounds for a
2551 * panic.
2552 */
2553 if ((start | end) % ARM_PGBYTES) {
2554 panic("%s: start or end not page aligned, "
2555 "start=%p, end=%p, new_perm=%u, expected_perm=%u",
2556 __FUNCTION__,
2557 (void *)start, (void *)end, new_perm, expected_perm);
2558 }
2559
2560 if (start > end) {
2561 panic("%s: start > end, "
2562 "start=%p, end=%p, new_perm=%u, expected_perm=%u",
2563 __FUNCTION__,
2564 (void *)start, (void *)end, new_perm, expected_perm);
2565 }
2566
2567 bool in_physmap = (start >= physmap_base) && (end < physmap_end);
2568 bool in_static = (start >= gVirtBase) && (end < static_memory_end);
2569
2570 if (!(in_physmap || in_static)) {
2571 panic("%s: address not in static region or physical aperture, "
2572 "start=%p, end=%p, new_perm=%u, expected_perm=%u",
2573 __FUNCTION__,
2574 (void *)start, (void *)end, new_perm, expected_perm);
2575 }
2576
2577 if ((new_perm > XPRR_MAX_PERM) || (expected_perm > XPRR_MAX_PERM)) {
2578 panic("%s: invalid XPRR index, "
2579 "start=%p, end=%p, new_perm=%u, expected_perm=%u",
2580 __FUNCTION__,
2581 (void *)start, (void *)end, new_perm, expected_perm);
2582 }
2583
2584 /*
2585 * Walk over the PTEs for the given range, and set the protections on
2586 * those PTEs.
2587 */
2588 while (va < end) {
2589 tte_start = va;
2590 tte_end = ((va + pt_attr_twig_size(native_pt_attr)) & ~pt_attr_twig_offmask(native_pt_attr));
2591
2592 if (tte_end > end) {
2593 tte_end = end;
2594 }
2595
2596 tte_p = pmap_tte(pmap, va);
2597
2598 /*
2599 * The physical aperture should not have holes.
2600 * The physical aperture should be contiguous.
2601 * Do not make eye contact with the physical aperture.
2602 */
2603 if (tte_p == NULL) {
2604 panic("%s: physical aperture tte is NULL, "
2605 "start=%p, end=%p, new_perm=%u, expected_perm=%u",
2606 __FUNCTION__,
2607 (void *)start, (void *)end, new_perm, expected_perm);
2608 }
2609
2610 tte = *tte_p;
2611
2612 if ((tte & ARM_TTE_TYPE_MASK) == ARM_TTE_TYPE_TABLE) {
2613 /*
2614 * Walk over the given L3 page table page and update the
2615 * PTEs.
2616 */
2617 pte_p = (pt_entry_t *)ttetokv(tte);
2618 bpte_p = &pte_p[pte_index(pmap, native_pt_attr, va)];
2619 epte_p = bpte_p + ((tte_end - va) >> pt_attr_leaf_shift(native_pt_attr));
2620
2621 for (cpte_p = bpte_p; cpte_p < epte_p;
2622 cpte_p += PAGE_SIZE / ARM_PGBYTES, va += PAGE_SIZE) {
2623 int pai = (int)pa_index(pte_to_pa(*cpte_p));
2624 LOCK_PVH(pai);
2625 cpte = *cpte_p;
2626
2627 /*
2628 * Every PTE involved should be valid, should
2629 * not have the hint bit set, and should have
2630 * Every valid PTE involved should
2631 * not have the hint bit set and should have
2632 * the expected APRR index.
2633 */
2634 if ((cpte & ARM_PTE_TYPE_MASK) ==
2635 ARM_PTE_TYPE_FAULT) {
2636 panic("%s: physical aperture PTE is invalid, va=%p, "
2637 "start=%p, end=%p, new_perm=%u, expected_perm=%u",
2638 __FUNCTION__,
2639 (void *)va,
2640 (void *)start, (void *)end, new_perm, expected_perm);
2641 UNLOCK_PVH(pai);
2642 continue;
2643 }
2644
2645 if (cpte & ARM_PTE_HINT_MASK) {
2646 panic("%s: physical aperture PTE has hint bit set, va=%p, cpte=0x%llx, "
2647 "start=%p, end=%p, new_perm=%u, expected_perm=%u",
2648 __FUNCTION__,
2649 (void *)va, cpte,
2650 (void *)start, (void *)end, new_perm, expected_perm);
2651 }
2652
2653 if (pte_to_xprr_perm(cpte) != expected_perm) {
2654 panic("%s: perm=%llu does not match expected_perm, cpte=0x%llx, "
2655 "start=%p, end=%p, new_perm=%u, expected_perm=%u",
2656 __FUNCTION__,
2657 pte_to_xprr_perm(cpte), cpte,
2658 (void *)start, (void *)end, new_perm, expected_perm);
2659 }
2660
2661 template = cpte;
2662 template &= ~ARM_PTE_XPRR_MASK;
2663 template |= xprr_perm_to_pte(new_perm);
2664
2665 WRITE_PTE_STRONG(cpte_p, template);
2666 UNLOCK_PVH(pai);
2667 }
2668 } else {
2669 panic("%s: tte=0x%llx is not a table type entry, "
2670 "start=%p, end=%p, new_perm=%u, expected_perm=%u",
2671 __FUNCTION__,
2672 tte,
2673 (void *)start, (void *)end, new_perm, expected_perm);
2674 }
2675
2676 va = tte_end;
2677 }
2678
2679 PMAP_UPDATE_TLBS(pmap, start, end, false);
2680 #endif /* (__ARM_VMSA__ == 7) */
2681 }
2682
2683 /*
2684 * A convenience function for setting protections on a single page.
2685 */
2686 static inline void
2687 pmap_set_xprr_perm(vm_address_t page_kva,
2688 unsigned int expected_perm,
2689 unsigned int new_perm)
2690 {
2691 pmap_set_range_xprr_perm(page_kva, page_kva + PAGE_SIZE, expected_perm, new_perm);
2692 }
2693 #endif /* XNU_MONITOR */
2694
2695
2696 /*
2697 * pmap_pages_reclaim(): return a page by freeing an active pagetable page.
2698 * To be eligible, a pt page must be assigned to a non-kernel pmap.
2699 * It must not have any wired PTEs and must contain at least one valid PTE.
2700 * If no eligible page is found in the pt page list, return 0.
2701 */
2702 pmap_paddr_t
2703 pmap_pages_reclaim(
2704 void)
2705 {
2706 boolean_t found_page;
2707 unsigned i;
2708 pt_desc_t *ptdp;
2709
2710 /*
2711 * In a loop, check for a page in the reclaimed pt page list.
2712 * if one is present, unlink that page and return the physical page address.
2713 * Otherwise, scan the pt page list for an eligible pt page to reclaim.
2714 * If found, invoke pmap_remove_range() on its pmap and address range then
2715 * deallocates that pt page. This will end up adding the pt page to the
2716 * reclaimed pt page list.
2717 */
2718
2719 pmap_simple_lock(&pmap_pages_lock);
2720 pmap_pages_request_count++;
2721 pmap_pages_request_acum++;
2722
2723 while (1) {
2724 if (pmap_pages_reclaim_list != (page_free_entry_t *)NULL) {
2725 page_free_entry_t *page_entry;
2726
2727 page_entry = pmap_pages_reclaim_list;
2728 pmap_pages_reclaim_list = pmap_pages_reclaim_list->next;
2729 pmap_simple_unlock(&pmap_pages_lock);
2730
2731 return (pmap_paddr_t)ml_static_vtop((vm_offset_t)page_entry);
2732 }
2733
2734 pmap_simple_unlock(&pmap_pages_lock);
2735
2736 pmap_simple_lock(&pt_pages_lock);
2737 ptdp = (pt_desc_t *)queue_first(&pt_page_list);
2738 found_page = FALSE;
2739
2740 while (!queue_end(&pt_page_list, (queue_entry_t)ptdp)) {
2741 if ((ptdp->pmap->nested == FALSE)
2742 && (pmap_try_lock(ptdp->pmap))) {
2743 assert(ptdp->pmap != kernel_pmap);
2744 unsigned refcnt_acc = 0;
2745 unsigned wiredcnt_acc = 0;
2746
2747 for (i = 0; i < PT_INDEX_MAX; i++) {
2748 if (ptdp->ptd_info[i].refcnt == PT_DESC_REFCOUNT) {
2749 /* Do not attempt to free a page that contains an L2 table */
2750 refcnt_acc = 0;
2751 break;
2752 }
2753 refcnt_acc += ptdp->ptd_info[i].refcnt;
2754 wiredcnt_acc += ptdp->ptd_info[i].wiredcnt;
2755 }
2756 if ((wiredcnt_acc == 0) && (refcnt_acc != 0)) {
2757 found_page = TRUE;
2758 /* Leave ptdp->pmap locked here. We're about to reclaim
2759 * a tt page from it, so we don't want anyone else messing
2760 * with it while we do that. */
2761 break;
2762 }
2763 pmap_unlock(ptdp->pmap);
2764 }
2765 ptdp = (pt_desc_t *)queue_next((queue_t)ptdp);
2766 }
2767 if (!found_page) {
2768 pmap_simple_unlock(&pt_pages_lock);
2769 return (pmap_paddr_t)0;
2770 } else {
2771 int remove_count = 0;
2772 bool need_strong_sync = false;
2773 vm_map_address_t va;
2774 pmap_t pmap;
2775 pt_entry_t *bpte, *epte;
2776 pt_entry_t *pte_p;
2777 tt_entry_t *tte_p;
2778 uint32_t rmv_spte = 0;
2779
2780 pmap_simple_unlock(&pt_pages_lock);
2781 pmap = ptdp->pmap;
2782 pmap_assert_locked_w(pmap); // pmap write lock should be held from loop above
2783
2784 const pt_attr_t * const pt_attr = pmap_get_pt_attr(pmap);
2785
2786 for (i = 0; i < (PAGE_SIZE / pt_attr_page_size(pt_attr)); i++) {
2787 va = ptdp->ptd_info[i].va;
2788
2789 /* If the VA is bogus, this may represent an unallocated region
2790 * or one which is in transition (already being freed or expanded).
2791 * Don't try to remove mappings here. */
2792 if (va == (vm_offset_t)-1) {
2793 continue;
2794 }
2795
2796 tte_p = pmap_tte(pmap, va);
2797 if ((tte_p != (tt_entry_t *) NULL)
2798 && ((*tte_p & ARM_TTE_TYPE_MASK) == ARM_TTE_TYPE_TABLE)) {
2799 pte_p = (pt_entry_t *) ttetokv(*tte_p);
2800 bpte = &pte_p[pte_index(pmap, pt_attr, va)];
2801 epte = bpte + pt_attr_page_size(pt_attr) / sizeof(pt_entry_t);
2802 /*
2803 * Use PMAP_OPTIONS_REMOVE to clear any
2804 * "compressed" markers and update the
2805 * "compressed" counter in pmap->stats.
2806 * This means that we lose accounting for
2807 * any compressed pages in this range
2808 * but the alternative is to not be able
2809 * to account for their future decompression,
2810 * which could cause the counter to drift
2811 * more and more.
2812 */
2813 remove_count += pmap_remove_range_options(
2814 pmap, va, bpte, epte,
2815 &rmv_spte, &need_strong_sync, PMAP_OPTIONS_REMOVE);
2816 if (ptd_get_info(ptdp, pte_p)->refcnt != 0) {
2817 panic("%s: ptdp %p, count %d", __FUNCTION__, ptdp, ptd_get_info(ptdp, pte_p)->refcnt);
2818 }
2819
2820 pmap_tte_deallocate(pmap, tte_p, pt_attr_twig_level(pt_attr));
2821
2822 if (remove_count > 0) {
2823 pmap_get_pt_ops(pmap)->flush_tlb_region_async(va, (size_t)pt_attr_leaf_table_size(pt_attr), pmap);
2824 } else {
2825 pmap_get_pt_ops(pmap)->flush_tlb_tte_async(va, pmap);
2826 }
2827 }
2828 }
2829 // Undo the lock we grabbed when we found ptdp above
2830 pmap_unlock(pmap);
2831 pmap_sync_tlb(need_strong_sync);
2832 }
2833 pmap_simple_lock(&pmap_pages_lock);
2834 }
2835 }
2836
2837 #if XNU_MONITOR
2838 /*
2839 * Return a PPL page to the free list.
2840 */
2841 MARK_AS_PMAP_TEXT static void
2842 pmap_give_free_ppl_page(pmap_paddr_t paddr)
2843 {
2844 assert((paddr & ARM_PGMASK) == 0);
2845 void ** new_head = (void **)phystokv(paddr);
2846 pmap_simple_lock(&pmap_ppl_free_page_lock);
2847
2848 void * cur_head = pmap_ppl_free_page_list;
2849 *new_head = cur_head;
2850 pmap_ppl_free_page_list = new_head;
2851 pmap_ppl_free_page_count++;
2852
2853 pmap_simple_unlock(&pmap_ppl_free_page_lock);
2854 }
2855
2856 /*
2857 * Get a PPL page from the free list.
2858 */
2859 MARK_AS_PMAP_TEXT static pmap_paddr_t
2860 pmap_get_free_ppl_page(void)
2861 {
2862 pmap_paddr_t result = 0;
2863
2864 pmap_simple_lock(&pmap_ppl_free_page_lock);
2865
2866 if (pmap_ppl_free_page_list != NULL) {
2867 void ** new_head = NULL;
2868 new_head = *((void**)pmap_ppl_free_page_list);
2869 result = kvtophys((vm_offset_t)pmap_ppl_free_page_list);
2870 pmap_ppl_free_page_list = new_head;
2871 pmap_ppl_free_page_count--;
2872 } else {
2873 result = 0L;
2874 }
2875
2876 pmap_simple_unlock(&pmap_ppl_free_page_lock);
2877 assert((result & ARM_PGMASK) == 0);
2878
2879 return result;
2880 }
2881
2882 /*
2883 * pmap_mark_page_as_ppl_page claims a page on behalf of the PPL by marking it
2884 * as PPL-owned and only allowing the PPL to write to it.
2885 */
2886 MARK_AS_PMAP_TEXT static void
2887 pmap_mark_page_as_ppl_page_internal(pmap_paddr_t pa, bool initially_free)
2888 {
2889 vm_offset_t kva = 0;
2890 unsigned int pai = 0;
2891 pp_attr_t attr;
2892
2893 /*
2894 * Mark each page that we allocate as belonging to the monitor, as we
2895 * intend to use it for monitor-y stuff (page tables, table pages, that
2896 * sort of thing).
2897 */
2898 if (!pa_valid(pa)) {
2899 panic("%s: bad address, "
2900 "pa=%p",
2901 __func__,
2902 (void *)pa);
2903 }
2904
2905 pai = (unsigned int)pa_index(pa);
2906 LOCK_PVH(pai);
2907
2908 /* A page that the PPL already owns can't be given to the PPL. */
2909 if (pa_test_monitor(pa)) {
2910 panic("%s: page already belongs to PPL, "
2911 "pa=0x%llx",
2912 __FUNCTION__,
2913 pa);
2914 }
2915 /* The page cannot be mapped outside of the physical aperture. */
2916 if (!pmap_verify_free((ppnum_t)atop(pa))) {
2917 panic("%s: page is not free, "
2918 "pa=0x%llx",
2919 __FUNCTION__,
2920 pa);
2921 }
2922
2923 do {
2924 attr = pp_attr_table[pai];
2925 if (attr & PP_ATTR_NO_MONITOR) {
2926 panic("%s: page excluded from PPL, "
2927 "pa=0x%llx",
2928 __FUNCTION__,
2929 pa);
2930 }
2931 } while (!OSCompareAndSwap16(attr, attr | PP_ATTR_MONITOR, &pp_attr_table[pai]));
2932
2933 UNLOCK_PVH(pai);
2934
2935 kva = phystokv(pa);
2936 pmap_set_xprr_perm(kva, XPRR_KERN_RW_PERM, XPRR_PPL_RW_PERM);
2937
2938 if (initially_free) {
2939 pmap_give_free_ppl_page(pa);
2940 }
2941 }
2942
2943 static void
2944 pmap_mark_page_as_ppl_page(pmap_paddr_t pa)
2945 {
2946 pmap_mark_page_as_ppl_page_ppl(pa, true);
2947 }
2948
2949 MARK_AS_PMAP_TEXT static void
2950 pmap_mark_page_as_kernel_page(pmap_paddr_t pa)
2951 {
2952 vm_offset_t kva = 0;
2953 unsigned int pai = 0;
2954
2955 pai = (unsigned int)pa_index(pa);
2956 LOCK_PVH(pai);
2957
2958 if (!pa_test_monitor(pa)) {
2959 panic("%s: page is not a PPL page, "
2960 "pa=%p",
2961 __FUNCTION__,
2962 (void *)pa);
2963 }
2964
2965 pa_clear_monitor(pa);
2966 UNLOCK_PVH(pai);
2967
2968 kva = phystokv(pa);
2969 pmap_set_xprr_perm(kva, XPRR_PPL_RW_PERM, XPRR_KERN_RW_PERM);
2970 }
2971
2972 MARK_AS_PMAP_TEXT static pmap_paddr_t
2973 pmap_release_ppl_pages_to_kernel_internal(void)
2974 {
2975 pmap_paddr_t pa = 0;
2976
2977 if (pmap_ppl_free_page_count <= PMAP_MIN_FREE_PPL_PAGES) {
2978 goto done;
2979 }
2980
2981 pa = pmap_get_free_ppl_page();
2982
2983 if (!pa) {
2984 goto done;
2985 }
2986
2987 pmap_mark_page_as_kernel_page(pa);
2988
2989 done:
2990 return pa;
2991 }
2992
2993 static uint64_t
2994 pmap_release_ppl_pages_to_kernel(void)
2995 {
2996 pmap_paddr_t pa = 0;
2997 vm_page_t m = VM_PAGE_NULL;
2998 vm_page_t local_freeq = VM_PAGE_NULL;
2999 uint64_t pmap_ppl_pages_returned_to_kernel_count = 0;
3000
3001 while (pmap_ppl_free_page_count > PMAP_MIN_FREE_PPL_PAGES) {
3002 pa = pmap_release_ppl_pages_to_kernel_ppl();
3003
3004 if (!pa) {
3005 break;
3006 }
3007
3008 /* If we retrieved a page, add it to the free queue. */
3009 vm_object_lock(pmap_object);
3010 m = vm_page_lookup(pmap_object, (pa - gPhysBase));
3011 assert(m != VM_PAGE_NULL);
3012 assert(VM_PAGE_WIRED(m));
3013
3014 m->vmp_busy = TRUE;
3015 m->vmp_snext = local_freeq;
3016 local_freeq = m;
3017 pmap_ppl_pages_returned_to_kernel_count++;
3018 pmap_ppl_pages_returned_to_kernel_count_total++;
3019
3020 vm_object_unlock(pmap_object);
3021 }
3022
3023 if (local_freeq) {
3024 /* We need to hold the object lock for freeing pages. */
3025 vm_object_lock(pmap_object);
3026 vm_page_free_list(local_freeq, TRUE);
3027 vm_object_unlock(pmap_object);
3028 }
3029
3030 return pmap_ppl_pages_returned_to_kernel_count;
3031 }
3032 #endif
3033
3034 static inline void
3035 pmap_enqueue_pages(vm_page_t m)
3036 {
3037 vm_page_t m_prev;
3038 vm_object_lock(pmap_object);
3039 while (m != VM_PAGE_NULL) {
3040 vm_page_insert_wired(m, pmap_object, (vm_object_offset_t) ((ptoa(VM_PAGE_GET_PHYS_PAGE(m))) - gPhysBase), VM_KERN_MEMORY_PTE);
3041 m_prev = m;
3042 m = NEXT_PAGE(m_prev);
3043 *(NEXT_PAGE_PTR(m_prev)) = VM_PAGE_NULL;
3044 }
3045 vm_object_unlock(pmap_object);
3046 }
3047
3048 static kern_return_t
3049 pmap_pages_alloc_zeroed(
3050 pmap_paddr_t *pa,
3051 unsigned size,
3052 unsigned option)
3053 {
3054 #if XNU_MONITOR
3055 ASSERT_NOT_HIBERNATING();
3056
3057 if (size != PAGE_SIZE) {
3058 panic("%s: size != PAGE_SIZE, "
3059 "pa=%p, size=%u, option=%u",
3060 __FUNCTION__,
3061 pa, size, option);
3062 }
3063
3064
3065 assert(option & PMAP_PAGES_ALLOCATE_NOWAIT);
3066
3067 *pa = pmap_get_free_ppl_page();
3068
3069 if ((*pa == 0) && (option & PMAP_PAGES_RECLAIM_NOWAIT)) {
3070 *pa = pmap_pages_reclaim();
3071 }
3072
3073 if (*pa == 0) {
3074 return KERN_RESOURCE_SHORTAGE;
3075 } else {
3076 bzero((void*)phystokv(*pa), size);
3077 return KERN_SUCCESS;
3078 }
3079 #else
3080 vm_page_t m = VM_PAGE_NULL;
3081
3082 thread_t self = current_thread();
3083 // We qualify to allocate reserved memory
3084 uint16_t thread_options = self->options;
3085 self->options |= TH_OPT_VMPRIV;
3086 if (__probable(size == PAGE_SIZE)) {
3087 while ((m = vm_page_grab()) == VM_PAGE_NULL) {
3088 if (option & PMAP_PAGES_ALLOCATE_NOWAIT) {
3089 break;
3090 }
3091
3092 VM_PAGE_WAIT();
3093 }
3094 if (m != VM_PAGE_NULL) {
3095 vm_page_lock_queues();
3096 vm_page_wire(m, VM_KERN_MEMORY_PTE, TRUE);
3097 vm_page_unlock_queues();
3098 }
3099 } else if (size == 2 * PAGE_SIZE) {
3100 while (cpm_allocate(size, &m, 0, 1, TRUE, 0) != KERN_SUCCESS) {
3101 if (option & PMAP_PAGES_ALLOCATE_NOWAIT) {
3102 break;
3103 }
3104
3105 VM_PAGE_WAIT();
3106 }
3107 } else {
3108 panic("%s: invalid size %u", __func__, size);
3109 }
3110
3111 self->options = thread_options;
3112
3113 if ((m == VM_PAGE_NULL) && (option & PMAP_PAGES_RECLAIM_NOWAIT)) {
3114 assert(size == PAGE_SIZE);
3115 *pa = pmap_pages_reclaim();
3116 if (*pa != 0) {
3117 bzero((void*)phystokv(*pa), size);
3118 return KERN_SUCCESS;
3119 }
3120 }
3121
3122 if (m == VM_PAGE_NULL) {
3123 return KERN_RESOURCE_SHORTAGE;
3124 }
3125
3126 *pa = (pmap_paddr_t)ptoa(VM_PAGE_GET_PHYS_PAGE(m));
3127
3128 pmap_enqueue_pages(m);
3129
3130 OSAddAtomic(size >> PAGE_SHIFT, &inuse_pmap_pages_count);
3131 OSAddAtomic64(size >> PAGE_SHIFT, &alloc_pmap_pages_count);
3132
3133 bzero((void*)phystokv(*pa), size);
3134 return KERN_SUCCESS;
3135 #endif
3136 }
3137
3138 #if XNU_MONITOR
3139 static pmap_paddr_t
3140 pmap_alloc_page_for_kern(unsigned int options)
3141 {
3142 pmap_paddr_t paddr;
3143 vm_page_t m;
3144
3145 while ((m = vm_page_grab()) == VM_PAGE_NULL) {
3146 if (options & PMAP_PAGES_ALLOCATE_NOWAIT) {
3147 return 0;
3148 }
3149 VM_PAGE_WAIT();
3150 }
3151
3152 vm_page_lock_queues();
3153 vm_page_wire(m, VM_KERN_MEMORY_PTE, TRUE);
3154 vm_page_unlock_queues();
3155
3156 paddr = (pmap_paddr_t)ptoa(VM_PAGE_GET_PHYS_PAGE(m));
3157
3158 if (__improbable(paddr == 0)) {
3159 panic("%s: paddr is 0", __func__);
3160 }
3161
3162 pmap_enqueue_pages(m);
3163
3164 OSAddAtomic(1, &inuse_pmap_pages_count);
3165 OSAddAtomic64(1, &alloc_pmap_pages_count);
3166
3167 return paddr;
3168 }
3169
3170 static void
3171 pmap_alloc_page_for_ppl(unsigned int options)
3172 {
3173 thread_t self = current_thread();
3174 // We qualify to allocate reserved memory
3175 uint16_t thread_options = self->options;
3176 self->options |= TH_OPT_VMPRIV;
3177 pmap_paddr_t paddr = pmap_alloc_page_for_kern(options);
3178 self->options = thread_options;
3179 if (paddr != 0) {
3180 pmap_mark_page_as_ppl_page(paddr);
3181 }
3182 }
3183
3184 static pmap_t
3185 pmap_alloc_pmap(void)
3186 {
3187 pmap_t pmap = PMAP_NULL;
3188
3189 pmap_simple_lock(&pmap_free_list_lock);
3190
3191 if (pmap_free_list != PMAP_NULL) {
3192 pmap = pmap_free_list;
3193 pmap_free_list = *((pmap_t *)pmap);
3194
3195 if (!PMAP_PTR_IS_VALID(pmap)) {
3196 panic("%s: allocated pmap is not valid, pmap=%p",
3197 __FUNCTION__, pmap);
3198 }
3199 }
3200
3201 pmap_simple_unlock(&pmap_free_list_lock);
3202
3203 return pmap;
3204 }
3205
3206 static void
3207 pmap_free_pmap(pmap_t pmap)
3208 {
3209 if (!PMAP_PTR_IS_VALID(pmap)) {
3210 panic("%s: pmap is not valid, "
3211 "pmap=%p",
3212 __FUNCTION__,
3213 pmap);
3214 }
3215
3216 pmap_simple_lock(&pmap_free_list_lock);
3217 *((pmap_t *)pmap) = pmap_free_list;
3218 pmap_free_list = pmap;
3219 pmap_simple_unlock(&pmap_free_list_lock);
3220 }
3221
3222 static void
3223 pmap_bootstrap_pmap_free_list(void)
3224 {
3225 pmap_t cur_head = PMAP_NULL;
3226 unsigned long i = 0;
3227
3228 simple_lock_init(&pmap_free_list_lock, 0);
3229
3230 for (i = 0; i < pmap_array_count; i++) {
3231 *((pmap_t *)(&pmap_array[i])) = cur_head;
3232 cur_head = &pmap_array[i];
3233 }
3234
3235 pmap_free_list = cur_head;
3236 }
3237 #endif
3238
3239 static void
3240 pmap_pages_free(
3241 pmap_paddr_t pa,
3242 unsigned size)
3243 {
3244 pmap_simple_lock(&pmap_pages_lock);
3245
3246 if (pmap_pages_request_count != 0) {
3247 page_free_entry_t *page_entry;
3248
3249 pmap_pages_request_count--;
3250 page_entry = (page_free_entry_t *)phystokv(pa);
3251 page_entry->next = pmap_pages_reclaim_list;
3252 pmap_pages_reclaim_list = page_entry;
3253 pmap_simple_unlock(&pmap_pages_lock);
3254
3255 return;
3256 }
3257
3258 pmap_simple_unlock(&pmap_pages_lock);
3259
3260 #if XNU_MONITOR
3261 (void)size;
3262
3263 pmap_give_free_ppl_page(pa);
3264 #else
3265 vm_page_t m;
3266 pmap_paddr_t pa_max;
3267
3268 OSAddAtomic(-(size >> PAGE_SHIFT), &inuse_pmap_pages_count);
3269
3270 for (pa_max = pa + size; pa < pa_max; pa = pa + PAGE_SIZE) {
3271 vm_object_lock(pmap_object);
3272 m = vm_page_lookup(pmap_object, (pa - gPhysBase));
3273 assert(m != VM_PAGE_NULL);
3274 assert(VM_PAGE_WIRED(m));
3275 vm_page_lock_queues();
3276 vm_page_free(m);
3277 vm_page_unlock_queues();
3278 vm_object_unlock(pmap_object);
3279 }
3280 #endif
3281 }
3282
3283 static inline void
3284 PMAP_ZINFO_PALLOC(
3285 pmap_t pmap, int bytes)
3286 {
3287 pmap_ledger_credit(pmap, task_ledgers.tkm_private, bytes);
3288 }
3289
3290 static inline void
3291 PMAP_ZINFO_PFREE(
3292 pmap_t pmap,
3293 int bytes)
3294 {
3295 pmap_ledger_debit(pmap, task_ledgers.tkm_private, bytes);
3296 }
3297
3298 static inline void
3299 pmap_tt_ledger_credit(
3300 pmap_t pmap,
3301 vm_size_t size)
3302 {
3303 if (pmap != kernel_pmap) {
3304 pmap_ledger_credit(pmap, task_ledgers.phys_footprint, size);
3305 pmap_ledger_credit(pmap, task_ledgers.page_table, size);
3306 }
3307 }
3308
3309 static inline void
3310 pmap_tt_ledger_debit(
3311 pmap_t pmap,
3312 vm_size_t size)
3313 {
3314 if (pmap != kernel_pmap) {
3315 pmap_ledger_debit(pmap, task_ledgers.phys_footprint, size);
3316 pmap_ledger_debit(pmap, task_ledgers.page_table, size);
3317 }
3318 }
3319
3320 static inline void
3321 pmap_update_plru(uint16_t asid_index)
3322 {
3323 if (__probable(pmap_asid_plru)) {
3324 unsigned plru_index = asid_index >> 6;
3325 if (__improbable(os_atomic_andnot(&asid_plru_bitmap[plru_index], (1ULL << (asid_index & 63)), relaxed) == 0)) {
3326 asid_plru_generation[plru_index] = ++asid_plru_gencount;
3327 asid_plru_bitmap[plru_index] = ((plru_index == (MAX_HW_ASIDS >> 6)) ? ~(1ULL << 63) : UINT64_MAX);
3328 }
3329 }
3330 }
3331
3332 static bool
3333 alloc_asid(pmap_t pmap)
3334 {
3335 int vasid = -1;
3336 uint16_t hw_asid;
3337
3338 pmap_simple_lock(&asid_lock);
3339
3340 if (__probable(pmap_asid_plru)) {
3341 unsigned plru_index = 0;
3342 uint64_t lowest_gen = asid_plru_generation[0];
3343 uint64_t lowest_gen_bitmap = asid_plru_bitmap[0];
3344 for (unsigned i = 1; i < (sizeof(asid_plru_generation) / sizeof(asid_plru_generation[0])); ++i) {
3345 if (asid_plru_generation[i] < lowest_gen) {
3346 plru_index = i;
3347 lowest_gen = asid_plru_generation[i];
3348 lowest_gen_bitmap = asid_plru_bitmap[i];
3349 }
3350 }
3351
3352 for (; plru_index < BITMAP_LEN(pmap_max_asids); plru_index += ((MAX_HW_ASIDS + 1) >> 6)) {
3353 uint64_t temp_plru = lowest_gen_bitmap & asid_bitmap[plru_index];
3354 if (temp_plru) {
3355 vasid = (plru_index << 6) + lsb_first(temp_plru);
3356 #if DEVELOPMENT || DEBUG
3357 ++pmap_asid_hits;
3358 #endif
3359 break;
3360 }
3361 }
3362 }
3363 if (__improbable(vasid < 0)) {
3364 // bitmap_first() returns highest-order bits first, but a 0-based scheme works
3365 // slightly better with the collision detection scheme used by pmap_switch_internal().
3366 vasid = bitmap_lsb_first(&asid_bitmap[0], pmap_max_asids);
3367 #if DEVELOPMENT || DEBUG
3368 ++pmap_asid_misses;
3369 #endif
3370 }
3371 if (__improbable(vasid < 0)) {
3372 pmap_simple_unlock(&asid_lock);
3373 return false;
3374 }
3375 assert((uint32_t)vasid < pmap_max_asids);
3376 assert(bitmap_test(&asid_bitmap[0], (unsigned int)vasid));
3377 bitmap_clear(&asid_bitmap[0], (unsigned int)vasid);
3378 pmap_simple_unlock(&asid_lock);
3379 hw_asid = vasid % asid_chunk_size;
3380 pmap->sw_asid = (uint8_t)(vasid / asid_chunk_size);
3381 if (__improbable(hw_asid == MAX_HW_ASIDS)) {
3382 /* If we took a PLRU "miss" and ended up with a hardware ASID we can't actually support,
3383 * reassign to a reserved VASID. */
3384 assert(pmap->sw_asid < UINT8_MAX);
3385 pmap->sw_asid = UINT8_MAX;
3386 /* Allocate from the high end of the hardware ASID range to reduce the likelihood of
3387 * aliasing with vital system processes, which are likely to have lower ASIDs. */
3388 hw_asid = MAX_HW_ASIDS - 1 - (uint16_t)(vasid / asid_chunk_size);
3389 assert(hw_asid < MAX_HW_ASIDS);
3390 }
3391 pmap_update_plru(hw_asid);
3392 hw_asid += 1; // Account for ASID 0, which is reserved for the kernel
3393 #if __ARM_KERNEL_PROTECT__
3394 hw_asid <<= 1; // We're really handing out 2 hardware ASIDs, one for EL0 and one for EL1 access
3395 #endif
3396 pmap->hw_asid = hw_asid;
3397 return true;
3398 }
3399
3400 static void
3401 free_asid(pmap_t pmap)
3402 {
3403 unsigned int vasid;
3404 uint16_t hw_asid = os_atomic_xchg(&pmap->hw_asid, 0, relaxed);
3405 if (__improbable(hw_asid == 0)) {
3406 return;
3407 }
3408
3409 #if __ARM_KERNEL_PROTECT__
3410 hw_asid >>= 1;
3411 #endif
3412 hw_asid -= 1;
3413
3414 if (__improbable(pmap->sw_asid == UINT8_MAX)) {
3415 vasid = ((MAX_HW_ASIDS - 1 - hw_asid) * asid_chunk_size) + MAX_HW_ASIDS;
3416 } else {
3417 vasid = ((unsigned int)pmap->sw_asid * asid_chunk_size) + hw_asid;
3418 }
3419
3420 if (__probable(pmap_asid_plru)) {
3421 os_atomic_or(&asid_plru_bitmap[hw_asid >> 6], (1ULL << (hw_asid & 63)), relaxed);
3422 }
3423 pmap_simple_lock(&asid_lock);
3424 assert(!bitmap_test(&asid_bitmap[0], vasid));
3425 bitmap_set(&asid_bitmap[0], vasid);
3426 pmap_simple_unlock(&asid_lock);
3427 }
3428
3429
3430 #if XNU_MONITOR
3431
3432 /*
3433 * Increase the padding for PPL devices to accommodate increased
3434 * mapping pressure from IOMMUs. This isn't strictly necessary, but
3435 * will reduce the need to retry mappings due to PV allocation failure.
3436 */
3437
3438 #define PV_LOW_WATER_MARK_DEFAULT (0x400)
3439 #define PV_KERN_LOW_WATER_MARK_DEFAULT (0x400)
3440 #define PV_ALLOC_CHUNK_INITIAL (0x400)
3441 #define PV_KERN_ALLOC_CHUNK_INITIAL (0x400)
3442 #define PV_CPU_MIN (0x80)
3443 #define PV_CPU_MAX (0x400)
3444
3445 #else
3446
3447 #define PV_LOW_WATER_MARK_DEFAULT (0x200)
3448 #define PV_KERN_LOW_WATER_MARK_DEFAULT (0x200)
3449 #define PV_ALLOC_CHUNK_INITIAL (0x200)
3450 #define PV_KERN_ALLOC_CHUNK_INITIAL (0x200)
3451 #define PV_CPU_MIN (0x40)
3452 #define PV_CPU_MAX (0x200)
3453
3454 #endif
3455
3456 #define PV_ALLOC_INITIAL_TARGET (PV_ALLOC_CHUNK_INITIAL * 5)
3457 #define PV_KERN_ALLOC_INITIAL_TARGET (PV_KERN_ALLOC_CHUNK_INITIAL)
3458
3459 uint32_t pv_page_count MARK_AS_PMAP_DATA = 0;
3460
3461 uint32_t pv_kern_low_water_mark MARK_AS_PMAP_DATA = PV_KERN_LOW_WATER_MARK_DEFAULT;
3462 uint32_t pv_alloc_initial_target MARK_AS_PMAP_DATA = PV_ALLOC_INITIAL_TARGET;
3463 uint32_t pv_kern_alloc_initial_target MARK_AS_PMAP_DATA = PV_KERN_ALLOC_INITIAL_TARGET;
3464
3465 unsigned pmap_reserve_replenish_stat MARK_AS_PMAP_DATA;
3466 unsigned pmap_kern_reserve_alloc_stat MARK_AS_PMAP_DATA;
3467
3468 static inline void pv_list_alloc(pv_entry_t **pv_ep);
3469 static inline void pv_list_kern_alloc(pv_entry_t **pv_e);
3470 static inline void pv_list_free(pv_entry_t *pv_eh, pv_entry_t *pv_et, int pv_cnt, uint32_t kern_target);
3471
3472 static pv_alloc_return_t
3473 pv_alloc(
3474 pmap_t pmap,
3475 unsigned int pai,
3476 pv_entry_t **pvepp)
3477 {
3478 if (pmap != NULL) {
3479 pmap_assert_locked_w(pmap);
3480 }
3481 ASSERT_PVH_LOCKED(pai);
3482 pv_list_alloc(pvepp);
3483 if (PV_ENTRY_NULL != *pvepp) {
3484 return PV_ALLOC_SUCCESS;
3485 }
3486 #if XNU_MONITOR
3487 unsigned alloc_flags = PMAP_PAGES_ALLOCATE_NOWAIT;
3488 #else
3489 unsigned alloc_flags = 0;
3490 #endif
3491 if ((pmap == NULL) || (kernel_pmap == pmap)) {
3492 pv_list_kern_alloc(pvepp);
3493
3494 if (PV_ENTRY_NULL != *pvepp) {
3495 return PV_ALLOC_SUCCESS;
3496 }
3497 alloc_flags = PMAP_PAGES_ALLOCATE_NOWAIT | PMAP_PAGES_RECLAIM_NOWAIT;
3498 }
3499 pv_entry_t *pv_e;
3500 pv_entry_t *pv_eh;
3501 pv_entry_t *pv_et;
3502 int pv_cnt;
3503 pmap_paddr_t pa;
3504 kern_return_t ret;
3505 pv_alloc_return_t pv_status = PV_ALLOC_RETRY;
3506
3507 UNLOCK_PVH(pai);
3508 if (pmap != NULL) {
3509 pmap_unlock(pmap);
3510 }
3511
3512 ret = pmap_pages_alloc_zeroed(&pa, PAGE_SIZE, alloc_flags);
3513
3514 if (ret != KERN_SUCCESS) {
3515 pv_status = PV_ALLOC_FAIL;
3516 goto pv_alloc_cleanup;
3517 }
3518
3519 pv_page_count++;
3520
3521 pv_e = (pv_entry_t *)phystokv(pa);
3522 *pvepp = pv_e;
3523 pv_cnt = (PAGE_SIZE / sizeof(pv_entry_t)) - 1;
3524 pv_eh = pv_e + 1;
3525 pv_et = &pv_e[pv_cnt];
3526
3527 pv_list_free(pv_eh, pv_et, pv_cnt, pv_kern_low_water_mark);
3528 pv_alloc_cleanup:
3529 if (pmap != NULL) {
3530 pmap_lock(pmap);
3531 }
3532 LOCK_PVH(pai);
3533 return pv_status;
3534 }
3535
3536 static inline void
3537 pv_free_entry(
3538 pv_entry_t *pvep)
3539 {
3540 pv_list_free(pvep, pvep, 1, pv_kern_low_water_mark);
3541 }
3542
3543 static inline void
3544 pv_free_list_alloc(pv_free_list_t *free_list, pv_entry_t **pv_ep)
3545 {
3546 assert(((free_list->list != NULL) && (free_list->count > 0)) ||
3547 ((free_list->list == NULL) && (free_list->count == 0)));
3548
3549 if ((*pv_ep = free_list->list) != NULL) {
3550 pv_entry_t *pv_e = *pv_ep;
3551 if ((pv_e->pve_next == NULL) && (free_list->count > 1)) {
3552 free_list->list = pv_e + 1;
3553 } else {
3554 free_list->list = pv_e->pve_next;
3555 pv_e->pve_next = PV_ENTRY_NULL;
3556 }
3557 free_list->count--;
3558 }
3559 }
3560
3561 static inline void
3562 pv_list_alloc(pv_entry_t **pv_ep)
3563 {
3564 assert(*pv_ep == PV_ENTRY_NULL);
3565 #if !XNU_MONITOR
3566 mp_disable_preemption();
3567 #endif
3568 pmap_cpu_data_t *pmap_cpu_data = pmap_get_cpu_data();
3569 pv_free_list_alloc(&pmap_cpu_data->pv_free, pv_ep);
3570 #if !XNU_MONITOR
3571 mp_enable_preemption();
3572 #endif
3573 if (*pv_ep != PV_ENTRY_NULL) {
3574 return;
3575 }
3576 #if !XNU_MONITOR
3577 if (pv_kern_free.count < pv_kern_low_water_mark) {
3578 /*
3579 * If the kernel reserved pool is low, let non-kernel mappings wait for a page
3580 * from the VM.
3581 */
3582 return;
3583 }
3584 #endif
3585 pmap_simple_lock(&pv_free_list_lock);
3586 pv_free_list_alloc(&pv_free, pv_ep);
3587 pmap_simple_unlock(&pv_free_list_lock);
3588 }
3589
3590 static inline void
3591 pv_list_free(pv_entry_t *pv_eh, pv_entry_t *pv_et, int pv_cnt, uint32_t kern_target)
3592 {
3593 if (pv_cnt == 1) {
3594 bool limit_exceeded = false;
3595 #if !XNU_MONITOR
3596 mp_disable_preemption();
3597 #endif
3598 pmap_cpu_data_t *pmap_cpu_data = pmap_get_cpu_data();
3599 pv_et->pve_next = pmap_cpu_data->pv_free.list;
3600 pmap_cpu_data->pv_free.list = pv_eh;
3601 if (pmap_cpu_data->pv_free.count == PV_CPU_MIN) {
3602 pmap_cpu_data->pv_free_tail = pv_et;
3603 }
3604 pmap_cpu_data->pv_free.count += pv_cnt;
3605 if (__improbable(pmap_cpu_data->pv_free.count > PV_CPU_MAX)) {
3606 pv_et = pmap_cpu_data->pv_free_tail;
3607 pv_cnt = pmap_cpu_data->pv_free.count - PV_CPU_MIN;
3608 pmap_cpu_data->pv_free.list = pmap_cpu_data->pv_free_tail->pve_next;
3609 pmap_cpu_data->pv_free.count = PV_CPU_MIN;
3610 limit_exceeded = true;
3611 }
3612 #if !XNU_MONITOR
3613 mp_enable_preemption();
3614 #endif
3615 if (__probable(!limit_exceeded)) {
3616 return;
3617 }
3618 }
3619 if (__improbable(pv_kern_free.count < kern_target)) {
3620 pmap_simple_lock(&pv_kern_free_list_lock);
3621 pv_et->pve_next = pv_kern_free.list;
3622 pv_kern_free.list = pv_eh;
3623 pv_kern_free.count += pv_cnt;
3624 pmap_simple_unlock(&pv_kern_free_list_lock);
3625 } else {
3626 pmap_simple_lock(&pv_free_list_lock);
3627 pv_et->pve_next = pv_free.list;
3628 pv_free.list = pv_eh;
3629 pv_free.count += pv_cnt;
3630 pmap_simple_unlock(&pv_free_list_lock);
3631 }
3632 }
3633
3634 static inline void
3635 pv_list_kern_alloc(pv_entry_t **pv_ep)
3636 {
3637 assert(*pv_ep == PV_ENTRY_NULL);
3638 pmap_simple_lock(&pv_kern_free_list_lock);
3639 if (pv_kern_free.count > 0) {
3640 pmap_kern_reserve_alloc_stat++;
3641 }
3642 pv_free_list_alloc(&pv_kern_free, pv_ep);
3643 pmap_simple_unlock(&pv_kern_free_list_lock);
3644 }
3645
3646 void
3647 mapping_adjust(void)
3648 {
3649 // Not implemented for arm/arm64
3650 }
3651
3652 /*
3653 * Fills the kernel and general PV free lists back up to their low watermarks.
3654 */
3655 MARK_AS_PMAP_TEXT static kern_return_t
3656 mapping_replenish_internal(uint32_t kern_target_count, uint32_t user_target_count)
3657 {
3658 pv_entry_t *pv_eh;
3659 pv_entry_t *pv_et;
3660 int pv_cnt;
3661 pmap_paddr_t pa;
3662 kern_return_t ret = KERN_SUCCESS;
3663
3664 while ((pv_free.count < user_target_count) || (pv_kern_free.count < kern_target_count)) {
3665 #if XNU_MONITOR
3666 if ((ret = pmap_pages_alloc_zeroed(&pa, PAGE_SIZE, PMAP_PAGES_ALLOCATE_NOWAIT)) != KERN_SUCCESS) {
3667 return ret;
3668 }
3669 #else
3670 ret = pmap_pages_alloc_zeroed(&pa, PAGE_SIZE, 0);
3671 assert(ret == KERN_SUCCESS);
3672 #endif
3673
3674 pv_page_count++;
3675
3676 pv_eh = (pv_entry_t *)phystokv(pa);
3677 pv_cnt = PAGE_SIZE / sizeof(pv_entry_t);
3678 pv_et = &pv_eh[pv_cnt - 1];
3679
3680 pmap_reserve_replenish_stat += pv_cnt;
3681 pv_list_free(pv_eh, pv_et, pv_cnt, kern_target_count);
3682 }
3683
3684 return ret;
3685 }
3686
3687 /*
3688 * Creates a target number of free pv_entry_t objects for the kernel free list
3689 * and the general free list.
3690 */
3691 MARK_AS_PMAP_TEXT static kern_return_t
3692 mapping_free_prime_internal(void)
3693 {
3694 return mapping_replenish_internal(pv_kern_alloc_initial_target, pv_alloc_initial_target);
3695 }
3696
3697 void
3698 mapping_free_prime(void)
3699 {
3700 kern_return_t kr = KERN_FAILURE;
3701
3702 #if XNU_MONITOR
3703 unsigned int i = 0;
3704
3705 /*
3706 * Allocate the needed PPL pages up front, to minimize the chance that
3707 * we will need to call into the PPL multiple times.
3708 */
3709 for (i = 0; i < pv_alloc_initial_target; i += (PAGE_SIZE / sizeof(pv_entry_t))) {
3710 pmap_alloc_page_for_ppl(0);
3711 }
3712
3713 for (i = 0; i < pv_kern_alloc_initial_target; i += (PAGE_SIZE / sizeof(pv_entry_t))) {
3714 pmap_alloc_page_for_ppl(0);
3715 }
3716
3717 while ((kr = mapping_free_prime_ppl()) == KERN_RESOURCE_SHORTAGE) {
3718 pmap_alloc_page_for_ppl(0);
3719 }
3720 #else
3721 kr = mapping_free_prime_internal();
3722 #endif
3723
3724 if (kr != KERN_SUCCESS) {
3725 panic("%s: failed, kr=%d",
3726 __FUNCTION__, kr);
3727 }
3728 }
3729
3730 static void
3731 ptd_bootstrap(
3732 pt_desc_t *ptdp,
3733 unsigned int ptd_cnt)
3734 {
3735 simple_lock_init(&ptd_free_list_lock, 0);
3736 // Region represented by ptdp should be cleared by pmap_bootstrap()
3737 *((void**)(&ptdp[ptd_cnt - 1])) = (void*)ptd_free_list;
3738 ptd_free_list = ptdp;
3739 ptd_free_count += ptd_cnt;
3740 ptd_preboot = FALSE;
3741 }
3742
3743 static pt_desc_t*
3744 ptd_alloc_unlinked(void)
3745 {
3746 pt_desc_t *ptdp;
3747 unsigned i;
3748
3749 if (!ptd_preboot) {
3750 pmap_simple_lock(&ptd_free_list_lock);
3751 }
3752
3753 assert(((ptd_free_list != NULL) && (ptd_free_count > 0)) ||
3754 ((ptd_free_list == NULL) && (ptd_free_count == 0)));
3755
3756 if (ptd_free_count == 0) {
3757 unsigned int ptd_cnt = PAGE_SIZE / sizeof(pt_desc_t);
3758
3759 if (ptd_preboot) {
3760 ptdp = (pt_desc_t *)avail_start;
3761 avail_start += PAGE_SIZE;
3762 bzero(ptdp, PAGE_SIZE);
3763 } else {
3764 pmap_paddr_t pa;
3765
3766 pmap_simple_unlock(&ptd_free_list_lock);
3767
3768 if (pmap_pages_alloc_zeroed(&pa, PAGE_SIZE, PMAP_PAGES_ALLOCATE_NOWAIT) != KERN_SUCCESS) {
3769 return NULL;
3770 }
3771 ptdp = (pt_desc_t *)phystokv(pa);
3772
3773 pmap_simple_lock(&ptd_free_list_lock);
3774 }
3775
3776 *((void**)(&ptdp[ptd_cnt - 1])) = (void*)ptd_free_list;
3777 ptd_free_list = ptdp;
3778 ptd_free_count += ptd_cnt;
3779 }
3780
3781 if ((ptdp = ptd_free_list) != PTD_ENTRY_NULL) {
3782 ptd_free_list = (pt_desc_t *)(*(void **)ptdp);
3783 if ((ptd_free_list == NULL) && (ptd_free_count > 1)) {
3784 ptd_free_list = ptdp + 1;
3785 }
3786 ptd_free_count--;
3787 } else {
3788 panic("%s: out of ptd entry",
3789 __FUNCTION__);
3790 }
3791
3792 if (!ptd_preboot) {
3793 pmap_simple_unlock(&ptd_free_list_lock);
3794 }
3795
3796 ptdp->pt_page.next = NULL;
3797 ptdp->pt_page.prev = NULL;
3798 ptdp->pmap = NULL;
3799
3800 for (i = 0; i < PT_INDEX_MAX; i++) {
3801 ptdp->ptd_info[i].va = (vm_offset_t)-1;
3802 ptdp->ptd_info[i].refcnt = 0;
3803 ptdp->ptd_info[i].wiredcnt = 0;
3804 }
3805
3806 return ptdp;
3807 }
3808
3809 static inline pt_desc_t*
3810 ptd_alloc(pmap_t pmap)
3811 {
3812 pt_desc_t *ptdp = ptd_alloc_unlinked();
3813
3814 if (ptdp == NULL) {
3815 return NULL;
3816 }
3817
3818 ptdp->pmap = pmap;
3819 if (pmap != kernel_pmap) {
3820 /* We should never try to reclaim kernel pagetable pages in
3821 * pmap_pages_reclaim(), so don't enter them into the list. */
3822 pmap_simple_lock(&pt_pages_lock);
3823 queue_enter(&pt_page_list, ptdp, pt_desc_t *, pt_page);
3824 pmap_simple_unlock(&pt_pages_lock);
3825 }
3826
3827 pmap_tt_ledger_credit(pmap, sizeof(*ptdp));
3828 return ptdp;
3829 }
3830
3831 static void
3832 ptd_deallocate(pt_desc_t *ptdp)
3833 {
3834 pmap_t pmap = ptdp->pmap;
3835
3836 if (ptd_preboot) {
3837 panic("%s: early boot, "
3838 "ptdp=%p",
3839 __FUNCTION__,
3840 ptdp);
3841 }
3842
3843 if (ptdp->pt_page.next != NULL) {
3844 pmap_simple_lock(&pt_pages_lock);
3845 queue_remove(&pt_page_list, ptdp, pt_desc_t *, pt_page);
3846 pmap_simple_unlock(&pt_pages_lock);
3847 }
3848 pmap_simple_lock(&ptd_free_list_lock);
3849 (*(void **)ptdp) = (void *)ptd_free_list;
3850 ptd_free_list = (pt_desc_t *)ptdp;
3851 ptd_free_count++;
3852 pmap_simple_unlock(&ptd_free_list_lock);
3853 if (pmap != NULL) {
3854 pmap_tt_ledger_debit(pmap, sizeof(*ptdp));
3855 }
3856 }
3857
3858 static void
3859 ptd_init(
3860 pt_desc_t *ptdp,
3861 pmap_t pmap,
3862 vm_map_address_t va,
3863 unsigned int level,
3864 pt_entry_t *pte_p)
3865 {
3866 const pt_attr_t * const pt_attr = pmap_get_pt_attr(pmap);
3867
3868 if (ptdp->pmap != pmap) {
3869 panic("%s: pmap mismatch, "
3870 "ptdp=%p, pmap=%p, va=%p, level=%u, pte_p=%p",
3871 __FUNCTION__,
3872 ptdp, pmap, (void*)va, level, pte_p);
3873 }
3874
3875 assert(level > pt_attr_root_level(pt_attr));
3876 ptd_info_t *ptd_info = ptd_get_info(ptdp, pte_p);
3877 ptd_info->va = (vm_offset_t) va & ~pt_attr_ln_pt_offmask(pt_attr, level - 1);
3878
3879 if (level < pt_attr_leaf_level(pt_attr)) {
3880 ptd_info->refcnt = PT_DESC_REFCOUNT;
3881 }
3882 }
3883
3884
3885 boolean_t
3886 pmap_valid_address(
3887 pmap_paddr_t addr)
3888 {
3889 return pa_valid(addr);
3890 }
3891
3892 #if (__ARM_VMSA__ == 7)
3893
3894 /*
3895 * Given an offset and a map, compute the address of the
3896 * corresponding translation table entry.
3897 */
3898 static inline tt_entry_t *
3899 pmap_tte(pmap_t pmap,
3900 vm_map_address_t addr)
3901 {
3902 __unused const pt_attr_t * const pt_attr = pmap_get_pt_attr(pmap);
3903
3904 if (!(tte_index(pmap, pt_attr, addr) < pmap->tte_index_max)) {
3905 return (tt_entry_t *)NULL;
3906 }
3907 return &pmap->tte[tte_index(pmap, pt_attr, addr)];
3908 }
3909
3910
3911 /*
3912 * Given an offset and a map, compute the address of the
3913 * pte. If the address is invalid with respect to the map
3914 * then PT_ENTRY_NULL is returned (and the map may need to grow).
3915 *
3916 * This is only used internally.
3917 */
3918 static inline pt_entry_t *
3919 pmap_pte(
3920 pmap_t pmap,
3921 vm_map_address_t addr)
3922 {
3923 pt_entry_t *ptp;
3924 tt_entry_t *ttp;
3925 tt_entry_t tte;
3926
3927 ttp = pmap_tte(pmap, addr);
3928 if (ttp == (tt_entry_t *)NULL) {
3929 return PT_ENTRY_NULL;
3930 }
3931 tte = *ttp;
3932 #if MACH_ASSERT
3933 if ((tte & ARM_TTE_TYPE_MASK) == ARM_TTE_TYPE_BLOCK) {
3934 panic("%s: Attempt to demote L1 block, tte=0x%lx, "
3935 "pmap=%p, addr=%p",
3936 __FUNCTION__, (unsigned long)tte,
3937 pmap, (void*)addr);
3938 }
3939 #endif
3940 if ((tte & ARM_TTE_TYPE_MASK) != ARM_TTE_TYPE_TABLE) {
3941 return PT_ENTRY_NULL;
3942 }
3943 ptp = (pt_entry_t *) ttetokv(tte) + pte_index(pmap, pt_addr, addr);
3944 return ptp;
3945 }
3946
3947 __unused static inline tt_entry_t *
3948 pmap_ttne(pmap_t pmap,
3949 unsigned int target_level,
3950 vm_map_address_t addr)
3951 {
3952 tt_entry_t * ret_ttep = NULL;
3953
3954 switch (target_level) {
3955 case 1:
3956 ret_ttep = pmap_tte(pmap, addr);
3957 break;
3958 case 2:
3959 ret_ttep = (tt_entry_t *)pmap_pte(pmap, addr);
3960 break;
3961 default:
3962 panic("%s: bad level, "
3963 "pmap=%p, target_level=%u, addr=%p",
3964 __FUNCTION__,
3965 pmap, target_level, (void *)addr);
3966 }
3967
3968 return ret_ttep;
3969 }
3970
3971 #else
3972
3973 static inline tt_entry_t *
3974 pmap_ttne(pmap_t pmap,
3975 unsigned int target_level,
3976 vm_map_address_t addr)
3977 {
3978 tt_entry_t * ttp = NULL;
3979 tt_entry_t * ttep = NULL;
3980 tt_entry_t tte = ARM_TTE_EMPTY;
3981 unsigned int cur_level;
3982
3983 const pt_attr_t * const pt_attr = pmap_get_pt_attr(pmap);
3984
3985 ttp = pmap->tte;
3986
3987 assert(target_level <= pt_attr->pta_max_level);
3988
3989 for (cur_level = pt_attr->pta_root_level; cur_level <= target_level; cur_level++) {
3990 ttep = &ttp[ttn_index(pmap, pt_attr, addr, cur_level)];
3991
3992 if (cur_level == target_level) {
3993 break;
3994 }
3995
3996 tte = *ttep;
3997
3998 #if MACH_ASSERT
3999 if ((tte & (ARM_TTE_TYPE_MASK | ARM_TTE_VALID)) == (ARM_TTE_TYPE_BLOCK | ARM_TTE_VALID)) {
4000 panic("%s: Attempt to demote L%u block, tte=0x%llx, "
4001 "pmap=%p, target_level=%u, addr=%p",
4002 __FUNCTION__, cur_level, tte,
4003 pmap, target_level, (void*)addr);
4004 }
4005 #endif
4006 if ((tte & (ARM_TTE_TYPE_MASK | ARM_TTE_VALID)) != (ARM_TTE_TYPE_TABLE | ARM_TTE_VALID)) {
4007 return TT_ENTRY_NULL;
4008 }
4009
4010 ttp = (tt_entry_t*)phystokv(tte & ARM_TTE_TABLE_MASK);
4011 }
4012
4013 return ttep;
4014 }
4015
4016 /*
4017 * Given an offset and a map, compute the address of level 1 translation table entry.
4018 * If the tranlation is invalid then PT_ENTRY_NULL is returned.
4019 */
4020 static inline tt_entry_t *
4021 pmap_tt1e(pmap_t pmap,
4022 vm_map_address_t addr)
4023 {
4024 return pmap_ttne(pmap, PMAP_TT_L1_LEVEL, addr);
4025 }
4026
4027 /*
4028 * Given an offset and a map, compute the address of level 2 translation table entry.
4029 * If the tranlation is invalid then PT_ENTRY_NULL is returned.
4030 */
4031 static inline tt_entry_t *
4032 pmap_tt2e(pmap_t pmap,
4033 vm_map_address_t addr)
4034 {
4035 return pmap_ttne(pmap, PMAP_TT_L2_LEVEL, addr);
4036 }
4037
4038
4039 /*
4040 * Given an offset and a map, compute the address of level 3 translation table entry.
4041 * If the tranlation is invalid then PT_ENTRY_NULL is returned.
4042 */
4043 static inline pt_entry_t *
4044 pmap_tt3e(
4045 pmap_t pmap,
4046 vm_map_address_t addr)
4047 {
4048 return (pt_entry_t*)pmap_ttne(pmap, PMAP_TT_L3_LEVEL, addr);
4049 }
4050
4051 static inline tt_entry_t *
4052 pmap_tte(
4053 pmap_t pmap,
4054 vm_map_address_t addr)
4055 {
4056 return pmap_tt2e(pmap, addr);
4057 }
4058
4059 static inline pt_entry_t *
4060 pmap_pte(
4061 pmap_t pmap,
4062 vm_map_address_t addr)
4063 {
4064 return pmap_tt3e(pmap, addr);
4065 }
4066
4067 #endif
4068
4069 #if __APRR_SUPPORTED__
4070 /*
4071 * Indicates whether the given PTE has special restrictions due to the current
4072 * APRR settings.
4073 */
4074 static boolean_t
4075 is_pte_aprr_protected(pt_entry_t pte)
4076 {
4077 uint64_t aprr_el0_value;
4078 uint64_t aprr_el1_value;
4079 uint64_t aprr_index;
4080
4081 MRS(aprr_el0_value, APRR_EL0);
4082 MRS(aprr_el1_value, APRR_EL1);
4083 aprr_index = PTE_TO_APRR_INDEX(pte);
4084
4085 /* Check to see if this mapping had APRR restrictions. */
4086 if ((APRR_EXTRACT_IDX_ATTR(aprr_el0_value, aprr_index) != APRR_EXTRACT_IDX_ATTR(APRR_EL0_RESET, aprr_index)) ||
4087 (APRR_EXTRACT_IDX_ATTR(aprr_el1_value, aprr_index) != APRR_EXTRACT_IDX_ATTR(APRR_EL1_RESET, aprr_index))
4088 ) {
4089 return TRUE;
4090 }
4091
4092 return FALSE;
4093 }
4094 #endif /* __APRR_SUPPORTED__ */
4095
4096
4097 #if __APRR_SUPPORTED__
4098 static boolean_t
4099 is_pte_xprr_protected(pmap_t pmap __unused, pt_entry_t pte)
4100 {
4101 #if __APRR_SUPPORTED__
4102 return is_pte_aprr_protected(pte);
4103 #else /* __APRR_SUPPORTED__ */
4104 #error "XPRR configuration error"
4105 #endif /* __APRR_SUPPORTED__ */
4106 }
4107 #endif /* __APRR_SUPPORTED__*/
4108
4109 #if __APRR_SUPPORTED__
4110 static uint64_t
4111 __unused pte_to_xprr_perm(pt_entry_t pte)
4112 {
4113 #if __APRR_SUPPORTED__
4114 switch (PTE_TO_APRR_INDEX(pte)) {
4115 case APRR_FIRM_RX_INDEX: return XPRR_FIRM_RX_PERM;
4116 case APRR_FIRM_RO_INDEX: return XPRR_FIRM_RO_PERM;
4117 case APRR_PPL_RW_INDEX: return XPRR_PPL_RW_PERM;
4118 case APRR_KERN_RW_INDEX: return XPRR_KERN_RW_PERM;
4119 case APRR_FIRM_RW_INDEX: return XPRR_FIRM_RW_PERM;
4120 case APRR_KERN0_RW_INDEX: return XPRR_KERN0_RW_PERM;
4121 case APRR_USER_JIT_INDEX: return XPRR_USER_JIT_PERM;
4122 case APRR_USER_RW_INDEX: return XPRR_USER_RW_PERM;
4123 case APRR_PPL_RX_INDEX: return XPRR_PPL_RX_PERM;
4124 case APRR_KERN_RX_INDEX: return XPRR_KERN_RX_PERM;
4125 case APRR_USER_XO_INDEX: return XPRR_USER_XO_PERM;
4126 case APRR_KERN_RO_INDEX: return XPRR_KERN_RO_PERM;
4127 case APRR_KERN0_RX_INDEX: return XPRR_KERN0_RO_PERM;
4128 case APRR_KERN0_RO_INDEX: return XPRR_KERN0_RO_PERM;
4129 case APRR_USER_RX_INDEX: return XPRR_USER_RX_PERM;
4130 case APRR_USER_RO_INDEX: return XPRR_USER_RO_PERM;
4131 default: return XPRR_MAX_PERM;
4132 }
4133 #else
4134 #error "XPRR configuration error"
4135 #endif /**/
4136 }
4137
4138 #if __APRR_SUPPORTED__
4139 static uint64_t
4140 xprr_perm_to_aprr_index(uint64_t perm)
4141 {
4142 switch (perm) {
4143 case XPRR_FIRM_RX_PERM: return APRR_FIRM_RX_INDEX;
4144 case XPRR_FIRM_RO_PERM: return APRR_FIRM_RO_INDEX;
4145 case XPRR_PPL_RW_PERM: return APRR_PPL_RW_INDEX;
4146 case XPRR_KERN_RW_PERM: return APRR_KERN_RW_INDEX;
4147 case XPRR_FIRM_RW_PERM: return APRR_FIRM_RW_INDEX;
4148 case XPRR_KERN0_RW_PERM: return APRR_KERN0_RW_INDEX;
4149 case XPRR_USER_JIT_PERM: return APRR_USER_JIT_INDEX;
4150 case XPRR_USER_RW_PERM: return APRR_USER_RW_INDEX;
4151 case XPRR_PPL_RX_PERM: return APRR_PPL_RX_INDEX;
4152 case XPRR_KERN_RX_PERM: return APRR_KERN_RX_INDEX;
4153 case XPRR_USER_XO_PERM: return APRR_USER_XO_INDEX;
4154 case XPRR_KERN_RO_PERM: return APRR_KERN_RO_INDEX;
4155 case XPRR_KERN0_RX_PERM: return APRR_KERN0_RO_INDEX;
4156 case XPRR_KERN0_RO_PERM: return APRR_KERN0_RO_INDEX;
4157 case XPRR_USER_RX_PERM: return APRR_USER_RX_INDEX;
4158 case XPRR_USER_RO_PERM: return APRR_USER_RO_INDEX;
4159 default: return APRR_MAX_INDEX;
4160 }
4161 }
4162 #endif /* __APRR_SUPPORTED__ */
4163
4164 static pt_entry_t
4165 __unused xprr_perm_to_pte(uint64_t perm)
4166 {
4167 #if __APRR_SUPPORTED__
4168 return APRR_INDEX_TO_PTE(xprr_perm_to_aprr_index(perm));
4169 #else
4170 #error "XPRR configuration error"
4171 #endif /**/
4172 }
4173 #endif /* __APRR_SUPPORTED__*/
4174
4175
4176 /*
4177 * Map memory at initialization. The physical addresses being
4178 * mapped are not managed and are never unmapped.
4179 *
4180 * For now, VM is already on, we only need to map the
4181 * specified memory.
4182 */
4183 vm_map_address_t
4184 pmap_map(
4185 vm_map_address_t virt,
4186 vm_offset_t start,
4187 vm_offset_t end,
4188 vm_prot_t prot,
4189 unsigned int flags)
4190 {
4191 kern_return_t kr;
4192 vm_size_t ps;
4193
4194 ps = PAGE_SIZE;
4195 while (start < end) {
4196 kr = pmap_enter(kernel_pmap, virt, (ppnum_t)atop(start),
4197 prot, VM_PROT_NONE, flags, FALSE);
4198
4199 if (kr != KERN_SUCCESS) {
4200 panic("%s: failed pmap_enter, "
4201 "virt=%p, start_addr=%p, end_addr=%p, prot=%#x, flags=%#x",
4202 __FUNCTION__,
4203 (void *) virt, (void *) start, (void *) end, prot, flags);
4204 }
4205
4206 virt += ps;
4207 start += ps;
4208 }
4209 return virt;
4210 }
4211
4212 vm_map_address_t
4213 pmap_map_bd_with_options(
4214 vm_map_address_t virt,
4215 vm_offset_t start,
4216 vm_offset_t end,
4217 vm_prot_t prot,
4218 int32_t options)
4219 {
4220 pt_entry_t tmplate;
4221 pt_entry_t *ptep;
4222 vm_map_address_t vaddr;
4223 vm_offset_t paddr;
4224 pt_entry_t mem_attr;
4225
4226 switch (options & PMAP_MAP_BD_MASK) {
4227 case PMAP_MAP_BD_WCOMB:
4228 mem_attr = ARM_PTE_ATTRINDX(CACHE_ATTRINDX_WRITECOMB);
4229 #if (__ARM_VMSA__ > 7)
4230 mem_attr |= ARM_PTE_SH(SH_OUTER_MEMORY);
4231 #else
4232 mem_attr |= ARM_PTE_SH;
4233 #endif
4234 break;
4235 case PMAP_MAP_BD_POSTED:
4236 mem_attr = ARM_PTE_ATTRINDX(CACHE_ATTRINDX_POSTED);
4237 break;
4238 case PMAP_MAP_BD_POSTED_REORDERED:
4239 mem_attr = ARM_PTE_ATTRINDX(CACHE_ATTRINDX_POSTED_REORDERED);
4240 break;
4241 case PMAP_MAP_BD_POSTED_COMBINED_REORDERED:
4242 mem_attr = ARM_PTE_ATTRINDX(CACHE_ATTRINDX_POSTED_COMBINED_REORDERED);
4243 break;
4244 default:
4245 mem_attr = ARM_PTE_ATTRINDX(CACHE_ATTRINDX_DISABLE);
4246 break;
4247 }
4248
4249 tmplate = pa_to_pte(start) | ARM_PTE_AP((prot & VM_PROT_WRITE) ? AP_RWNA : AP_RONA) |
4250 mem_attr | ARM_PTE_TYPE | ARM_PTE_NX | ARM_PTE_PNX | ARM_PTE_AF;
4251 #if __ARM_KERNEL_PROTECT__
4252 tmplate |= ARM_PTE_NG;
4253 #endif /* __ARM_KERNEL_PROTECT__ */
4254
4255 vaddr = virt;
4256 paddr = start;
4257 while (paddr < end) {
4258 ptep = pmap_pte(kernel_pmap, vaddr);
4259 if (ptep == PT_ENTRY_NULL) {
4260 panic("%s: no PTE for vaddr=%p, "
4261 "virt=%p, start=%p, end=%p, prot=0x%x, options=0x%x",
4262 __FUNCTION__, (void*)vaddr,
4263 (void*)virt, (void*)start, (void*)end, prot, options);
4264 }
4265
4266 assert(!ARM_PTE_IS_COMPRESSED(*ptep, ptep));
4267 WRITE_PTE_STRONG(ptep, tmplate);
4268
4269 pte_increment_pa(tmplate);
4270 vaddr += PAGE_SIZE;
4271 paddr += PAGE_SIZE;
4272 }
4273
4274 if (end >= start) {
4275 flush_mmu_tlb_region(virt, (unsigned)(end - start));
4276 }
4277
4278 return vaddr;
4279 }
4280
4281 /*
4282 * Back-door routine for mapping kernel VM at initialization.
4283 * Useful for mapping memory outside the range
4284 * [vm_first_phys, vm_last_phys] (i.e., devices).
4285 * Otherwise like pmap_map.
4286 */
4287 vm_map_address_t
4288 pmap_map_bd(
4289 vm_map_address_t virt,
4290 vm_offset_t start,
4291 vm_offset_t end,
4292 vm_prot_t prot)
4293 {
4294 pt_entry_t tmplate;
4295 pt_entry_t *ptep;
4296 vm_map_address_t vaddr;
4297 vm_offset_t paddr;
4298
4299 /* not cacheable and not buffered */
4300 tmplate = pa_to_pte(start)
4301 | ARM_PTE_TYPE | ARM_PTE_AF | ARM_PTE_NX | ARM_PTE_PNX
4302 | ARM_PTE_AP((prot & VM_PROT_WRITE) ? AP_RWNA : AP_RONA)
4303 | ARM_PTE_ATTRINDX(CACHE_ATTRINDX_DISABLE);
4304 #if __ARM_KERNEL_PROTECT__
4305 tmplate |= ARM_PTE_NG;
4306 #endif /* __ARM_KERNEL_PROTECT__ */
4307
4308 vaddr = virt;
4309 paddr = start;
4310 while (paddr < end) {
4311 ptep = pmap_pte(kernel_pmap, vaddr);
4312 if (ptep == PT_ENTRY_NULL) {
4313 panic("pmap_map_bd");
4314 }
4315 assert(!ARM_PTE_IS_COMPRESSED(*ptep, ptep));
4316 WRITE_PTE_STRONG(ptep, tmplate);
4317
4318 pte_increment_pa(tmplate);
4319 vaddr += PAGE_SIZE;
4320 paddr += PAGE_SIZE;
4321 }
4322
4323 if (end >= start) {
4324 flush_mmu_tlb_region(virt, (unsigned)(end - start));
4325 }
4326
4327 return vaddr;
4328 }
4329
4330 /*
4331 * Back-door routine for mapping kernel VM at initialization.
4332 * Useful for mapping memory specific physical addresses in early
4333 * boot (i.e., before kernel_map is initialized).
4334 *
4335 * Maps are in the VM_HIGH_KERNEL_WINDOW area.
4336 */
4337
4338 vm_map_address_t
4339 pmap_map_high_window_bd(
4340 vm_offset_t pa_start,
4341 vm_size_t len,
4342 vm_prot_t prot)
4343 {
4344 pt_entry_t *ptep, pte;
4345 #if (__ARM_VMSA__ == 7)
4346 vm_map_address_t va_start = VM_HIGH_KERNEL_WINDOW;
4347 vm_map_address_t va_max = VM_MAX_KERNEL_ADDRESS;
4348 #else
4349 vm_map_address_t va_start = VREGION1_START;
4350 vm_map_address_t va_max = VREGION1_START + VREGION1_SIZE;
4351 #endif
4352 vm_map_address_t va_end;
4353 vm_map_address_t va;
4354 vm_size_t offset;
4355
4356 offset = pa_start & PAGE_MASK;
4357 pa_start -= offset;
4358 len += offset;
4359
4360 if (len > (va_max - va_start)) {
4361 panic("%s: area too large, "
4362 "pa_start=%p, len=%p, prot=0x%x",
4363 __FUNCTION__,
4364 (void*)pa_start, (void*)len, prot);
4365 }
4366
4367 scan:
4368 for (; va_start < va_max; va_start += PAGE_SIZE) {
4369 ptep = pmap_pte(kernel_pmap, va_start);
4370 assert(!ARM_PTE_IS_COMPRESSED(*ptep, ptep));
4371 if (*ptep == ARM_PTE_TYPE_FAULT) {
4372 break;
4373 }
4374 }
4375 if (va_start > va_max) {
4376 panic("%s: insufficient pages, "
4377 "pa_start=%p, len=%p, prot=0x%x",
4378 __FUNCTION__,
4379 (void*)pa_start, (void*)len, prot);
4380 }
4381
4382 for (va_end = va_start + PAGE_SIZE; va_end < va_start + len; va_end += PAGE_SIZE) {
4383 ptep = pmap_pte(kernel_pmap, va_end);
4384 assert(!ARM_PTE_IS_COMPRESSED(*ptep, ptep));
4385 if (*ptep != ARM_PTE_TYPE_FAULT) {
4386 va_start = va_end + PAGE_SIZE;
4387 goto scan;
4388 }
4389 }
4390
4391 for (va = va_start; va < va_end; va += PAGE_SIZE, pa_start += PAGE_SIZE) {
4392 ptep = pmap_pte(kernel_pmap, va);
4393 pte = pa_to_pte(pa_start)
4394 | ARM_PTE_TYPE | ARM_PTE_AF | ARM_PTE_NX | ARM_PTE_PNX
4395 | ARM_PTE_AP((prot & VM_PROT_WRITE) ? AP_RWNA : AP_RONA)
4396 | ARM_PTE_ATTRINDX(CACHE_ATTRINDX_DEFAULT);
4397 #if (__ARM_VMSA__ > 7)
4398 pte |= ARM_PTE_SH(SH_OUTER_MEMORY);
4399 #else
4400 pte |= ARM_PTE_SH;
4401 #endif
4402 #if __ARM_KERNEL_PROTECT__
4403 pte |= ARM_PTE_NG;
4404 #endif /* __ARM_KERNEL_PROTECT__ */
4405 WRITE_PTE_STRONG(ptep, pte);
4406 }
4407 PMAP_UPDATE_TLBS(kernel_pmap, va_start, va_start + len, false);
4408 #if KASAN
4409 kasan_notify_address(va_start, len);
4410 #endif
4411 return va_start;
4412 }
4413
4414 #define PMAP_ALIGN(addr, align) ((addr) + ((align) - 1) & ~((align) - 1))
4415
4416 static void
4417 pmap_compute_pv_targets(void)
4418 {
4419 DTEntry entry;
4420 void const *prop = NULL;
4421 int err;
4422 unsigned int prop_size;
4423
4424 err = SecureDTLookupEntry(NULL, "/defaults", &entry);
4425 assert(err == kSuccess);
4426
4427 if (kSuccess == SecureDTGetProperty(entry, "pmap-pv-count", &prop, &prop_size)) {
4428 if (prop_size != sizeof(pv_alloc_initial_target)) {
4429 panic("pmap-pv-count property is not a 32-bit integer");
4430 }
4431 pv_alloc_initial_target = *((uint32_t const *)prop);
4432 }
4433
4434 if (kSuccess == SecureDTGetProperty(entry, "pmap-kern-pv-count", &prop, &prop_size)) {
4435 if (prop_size != sizeof(pv_kern_alloc_initial_target)) {
4436 panic("pmap-kern-pv-count property is not a 32-bit integer");
4437 }
4438 pv_kern_alloc_initial_target = *((uint32_t const *)prop);
4439 }
4440
4441 if (kSuccess == SecureDTGetProperty(entry, "pmap-kern-pv-min", &prop, &prop_size)) {
4442 if (prop_size != sizeof(pv_kern_low_water_mark)) {
4443 panic("pmap-kern-pv-min property is not a 32-bit integer");
4444 }
4445 pv_kern_low_water_mark = *((uint32_t const *)prop);
4446 }
4447 }
4448
4449
4450 static uint32_t
4451 pmap_compute_max_asids(void)
4452 {
4453 DTEntry entry;
4454 void const *prop = NULL;
4455 uint32_t max_asids;
4456 int err;
4457 unsigned int prop_size;
4458
4459 err = SecureDTLookupEntry(NULL, "/defaults", &entry);
4460 assert(err == kSuccess);
4461
4462 if (kSuccess != SecureDTGetProperty(entry, "pmap-max-asids", &prop, &prop_size)) {
4463 /* TODO: consider allowing maxproc limits to be scaled earlier so that
4464 * we can choose a more flexible default value here. */
4465 return MAX_ASIDS;
4466 }
4467
4468 if (prop_size != sizeof(max_asids)) {
4469 panic("pmap-max-asids property is not a 32-bit integer");
4470 }
4471
4472 max_asids = *((uint32_t const *)prop);
4473 /* Round up to the nearest 64 to make things a bit easier for the Pseudo-LRU allocator. */
4474 max_asids = (max_asids + 63) & ~63UL;
4475
4476 if (((max_asids + MAX_HW_ASIDS) / (MAX_HW_ASIDS + 1)) > MIN(MAX_HW_ASIDS, UINT8_MAX)) {
4477 /* currently capped by size of pmap->sw_asid */
4478 panic("pmap-max-asids too large");
4479 }
4480 if (max_asids == 0) {
4481 panic("pmap-max-asids cannot be zero");
4482 }
4483 return max_asids;
4484 }
4485
4486
4487 static vm_size_t
4488 pmap_compute_io_rgns(void)
4489 {
4490 DTEntry entry;
4491 pmap_io_range_t const *ranges;
4492 uint64_t rgn_end;
4493 void const *prop = NULL;
4494 int err;
4495 unsigned int prop_size;
4496
4497 err = SecureDTLookupEntry(NULL, "/defaults", &entry);
4498 assert(err == kSuccess);
4499
4500 if (kSuccess != SecureDTGetProperty(entry, "pmap-io-ranges", &prop, &prop_size)) {
4501 return 0;
4502 }
4503
4504 ranges = prop;
4505 for (unsigned int i = 0; i < (prop_size / sizeof(*ranges)); ++i) {
4506 if (ranges[i].addr & PAGE_MASK) {
4507 panic("pmap I/O region %u addr 0x%llx is not page-aligned", i, ranges[i].addr);
4508 }
4509 if (ranges[i].len & PAGE_MASK) {
4510 panic("pmap I/O region %u length 0x%llx is not page-aligned", i, ranges[i].len);
4511 }
4512 if (os_add_overflow(ranges[i].addr, ranges[i].len, &rgn_end)) {
4513 panic("pmap I/O region %u addr 0x%llx length 0x%llx wraps around", i, ranges[i].addr, ranges[i].len);
4514 }
4515 if (((ranges[i].addr <= gPhysBase) && (rgn_end > gPhysBase)) ||
4516 ((ranges[i].addr < avail_end) && (rgn_end >= avail_end)) ||
4517 ((ranges[i].addr > gPhysBase) && (rgn_end < avail_end))) {
4518 panic("pmap I/O region %u addr 0x%llx length 0x%llx overlaps physical memory", i, ranges[i].addr, ranges[i].len);
4519 }
4520
4521 ++num_io_rgns;
4522 }
4523
4524 return num_io_rgns * sizeof(*ranges);
4525 }
4526
4527 /*
4528 * return < 0 for a < b
4529 * 0 for a == b
4530 * > 0 for a > b
4531 */
4532 typedef int (*cmpfunc_t)(const void *a, const void *b);
4533
4534 extern void
4535 qsort(void *a, size_t n, size_t es, cmpfunc_t cmp);
4536
4537 static int
4538 cmp_io_rgns(const void *a, const void *b)
4539 {
4540 const pmap_io_range_t *range_a = a;
4541 const pmap_io_range_t *range_b = b;
4542 if ((range_b->addr + range_b->len) <= range_a->addr) {
4543 return 1;
4544 } else if ((range_a->addr + range_a->len) <= range_b->addr) {
4545 return -1;
4546 } else {
4547 return 0;
4548 }
4549 }
4550
4551 static void
4552 pmap_load_io_rgns(void)
4553 {
4554 DTEntry entry;
4555 pmap_io_range_t const *ranges;
4556 void const *prop = NULL;
4557 int err;
4558 unsigned int prop_size;
4559
4560 if (num_io_rgns == 0) {
4561 return;
4562 }
4563
4564 err = SecureDTLookupEntry(NULL, "/defaults", &entry);
4565 assert(err == kSuccess);
4566
4567 err = SecureDTGetProperty(entry, "pmap-io-ranges", &prop, &prop_size);
4568 assert(err == kSuccess);
4569
4570 ranges = prop;
4571 for (unsigned int i = 0; i < (prop_size / sizeof(*ranges)); ++i) {
4572 io_attr_table[i] = ranges[i];
4573 }
4574
4575 qsort(io_attr_table, num_io_rgns, sizeof(*ranges), cmp_io_rgns);
4576 }
4577
4578 #if __arm64__
4579 /*
4580 * pmap_get_arm64_prot
4581 *
4582 * return effective armv8 VMSA block protections including
4583 * table AP/PXN/XN overrides of a pmap entry
4584 *
4585 */
4586
4587 uint64_t
4588 pmap_get_arm64_prot(
4589 pmap_t pmap,
4590 vm_offset_t addr)
4591 {
4592 tt_entry_t tte = 0;
4593 unsigned int level = 0;
4594 uint64_t tte_type = 0;
4595 uint64_t effective_prot_bits = 0;
4596 uint64_t aggregate_tte = 0;
4597 uint64_t table_ap_bits = 0, table_xn = 0, table_pxn = 0;
4598 const pt_attr_t * const pt_attr = pmap_get_pt_attr(pmap);
4599
4600 for (level = pt_attr->pta_root_level; level <= pt_attr->pta_max_level; level++) {
4601 tte = *pmap_ttne(pmap, level, addr);
4602
4603 if (!(tte & ARM_TTE_VALID)) {
4604 return 0;
4605 }
4606
4607 tte_type = tte & ARM_TTE_TYPE_MASK;
4608
4609 if ((tte_type == ARM_TTE_TYPE_BLOCK) ||
4610 (level == pt_attr->pta_max_level)) {
4611 /* Block or page mapping; both have the same protection bit layout. */
4612 break;
4613 } else if (tte_type == ARM_TTE_TYPE_TABLE) {
4614 /* All of the table bits we care about are overrides, so just OR them together. */
4615 aggregate_tte |= tte;
4616 }
4617 }
4618
4619 table_ap_bits = ((aggregate_tte >> ARM_TTE_TABLE_APSHIFT) & AP_MASK);
4620 table_xn = (aggregate_tte & ARM_TTE_TABLE_XN);
4621 table_pxn = (aggregate_tte & ARM_TTE_TABLE_PXN);
4622
4623 /* Start with the PTE bits. */
4624 effective_prot_bits = tte & (ARM_PTE_APMASK | ARM_PTE_NX | ARM_PTE_PNX);
4625
4626 /* Table AP bits mask out block/page AP bits */
4627 effective_prot_bits &= ~(ARM_PTE_AP(table_ap_bits));
4628
4629 /* XN/PXN bits can be OR'd in. */
4630 effective_prot_bits |= (table_xn ? ARM_PTE_NX : 0);
4631 effective_prot_bits |= (table_pxn ? ARM_PTE_PNX : 0);
4632
4633 return effective_prot_bits;
4634 }
4635 #endif /* __arm64__ */
4636
4637
4638 /*
4639 * Bootstrap the system enough to run with virtual memory.
4640 *
4641 * The early VM initialization code has already allocated
4642 * the first CPU's translation table and made entries for
4643 * all the one-to-one mappings to be found there.
4644 *
4645 * We must set up the kernel pmap structures, the
4646 * physical-to-virtual translation lookup tables for the
4647 * physical memory to be managed (between avail_start and
4648 * avail_end).
4649 *
4650 * Map the kernel's code and data, and allocate the system page table.
4651 * Page_size must already be set.
4652 *
4653 * Parameters:
4654 * first_avail first available physical page -
4655 * after kernel page tables
4656 * avail_start PA of first managed physical page
4657 * avail_end PA of last managed physical page
4658 */
4659
4660 void
4661 pmap_bootstrap(
4662 vm_offset_t vstart)
4663 {
4664 pmap_paddr_t pmap_struct_start;
4665 vm_size_t pv_head_size;
4666 vm_size_t ptd_root_table_size;
4667 vm_size_t pp_attr_table_size;
4668 vm_size_t io_attr_table_size;
4669 vm_size_t asid_table_size;
4670 unsigned int npages;
4671 vm_map_offset_t maxoffset;
4672
4673 lck_grp_init(&pmap_lck_grp, "pmap", LCK_GRP_ATTR_NULL);
4674
4675 #if XNU_MONITOR
4676
4677 #if DEVELOPMENT || DEBUG
4678 PE_parse_boot_argn("-unsafe_kernel_text", &pmap_ppl_disable, sizeof(pmap_ppl_disable));
4679 #endif
4680
4681 #if CONFIG_CSR_FROM_DT
4682 if (csr_unsafe_kernel_text) {
4683 pmap_ppl_disable = true;
4684 }
4685 #endif /* CONFIG_CSR_FROM_DT */
4686
4687 #if __APRR_SUPPORTED__
4688 if (((uintptr_t)(&ppl_trampoline_start)) % PAGE_SIZE) {
4689 panic("%s: ppl_trampoline_start is not page aligned, "
4690 "vstart=%#lx",
4691 __FUNCTION__,
4692 vstart);
4693 }
4694
4695 if (((uintptr_t)(&ppl_trampoline_end)) % PAGE_SIZE) {
4696 panic("%s: ppl_trampoline_end is not page aligned, "
4697 "vstart=%#lx",
4698 __FUNCTION__,
4699 vstart);
4700 }
4701 #endif /* __APRR_SUPPORTED__ */
4702 #endif /* XNU_MONITOR */
4703
4704 #if DEVELOPMENT || DEBUG
4705 if (PE_parse_boot_argn("pmap_trace", &pmap_trace_mask, sizeof(pmap_trace_mask))) {
4706 kprintf("Kernel traces for pmap operations enabled\n");
4707 }
4708 #endif
4709
4710 /*
4711 * Initialize the kernel pmap.
4712 */
4713 pmap_stamp = 1;
4714 #if ARM_PARAMETERIZED_PMAP
4715 kernel_pmap->pmap_pt_attr = native_pt_attr;
4716 #endif /* ARM_PARAMETERIZED_PMAP */
4717 #if HAS_APPLE_PAC
4718 kernel_pmap->disable_jop = 0;
4719 #endif /* HAS_APPLE_PAC */
4720 kernel_pmap->tte = cpu_tte;
4721 kernel_pmap->ttep = cpu_ttep;
4722 #if (__ARM_VMSA__ > 7)
4723 kernel_pmap->min = ARM64_TTBR1_MIN_ADDR;
4724 #else
4725 kernel_pmap->min = VM_MIN_KERNEL_AND_KEXT_ADDRESS;
4726 #endif
4727 kernel_pmap->max = VM_MAX_KERNEL_ADDRESS;
4728 os_atomic_init(&kernel_pmap->ref_count, 1);
4729 kernel_pmap->gc_status = 0;
4730 kernel_pmap->nx_enabled = TRUE;
4731 #ifdef __arm64__
4732 kernel_pmap->is_64bit = TRUE;
4733 #else
4734 kernel_pmap->is_64bit = FALSE;
4735 #endif
4736 kernel_pmap->stamp = os_atomic_inc(&pmap_stamp, relaxed);
4737
4738 #if ARM_PARAMETERIZED_PMAP
4739 kernel_pmap->pmap_pt_attr = native_pt_attr;
4740 #endif /* ARM_PARAMETERIZED_PMAP */
4741
4742 kernel_pmap->nested_region_addr = 0x0ULL;
4743 kernel_pmap->nested_region_size = 0x0ULL;
4744 kernel_pmap->nested_region_asid_bitmap = NULL;
4745 kernel_pmap->nested_region_asid_bitmap_size = 0x0UL;
4746
4747 #if (__ARM_VMSA__ == 7)
4748 kernel_pmap->tte_index_max = 4 * NTTES;
4749 #endif
4750 kernel_pmap->hw_asid = 0;
4751 kernel_pmap->sw_asid = 0;
4752
4753 pmap_lock_init(kernel_pmap);
4754 memset((void *) &kernel_pmap->stats, 0, sizeof(kernel_pmap->stats));
4755
4756 /* allocate space for and initialize the bookkeeping structures */
4757 io_attr_table_size = pmap_compute_io_rgns();
4758 npages = (unsigned int)atop(mem_size);
4759 pp_attr_table_size = npages * sizeof(pp_attr_t);
4760 pv_head_size = round_page(sizeof(pv_entry_t *) * npages);
4761 // allocate enough initial PTDs to map twice the available physical memory
4762 ptd_root_table_size = sizeof(pt_desc_t) * (mem_size / ((PAGE_SIZE / sizeof(pt_entry_t)) * ARM_PGBYTES)) * 2;
4763 pmap_max_asids = pmap_compute_max_asids();
4764 pmap_asid_plru = (pmap_max_asids > MAX_HW_ASIDS);
4765 PE_parse_boot_argn("pmap_asid_plru", &pmap_asid_plru, sizeof(pmap_asid_plru));
4766 /* Align the range of available hardware ASIDs to a multiple of 64 to enable the
4767 * masking used by the PLRU scheme. This means we must handle the case in which
4768 * the returned hardware ASID is MAX_HW_ASIDS, which we do in alloc_asid() and free_asid(). */
4769 _Static_assert(sizeof(asid_plru_bitmap[0] == sizeof(uint64_t)), "bitmap_t is not a 64-bit integer");
4770 _Static_assert(((MAX_HW_ASIDS + 1) % 64) == 0, "MAX_HW_ASIDS + 1 is not divisible by 64");
4771 asid_chunk_size = (pmap_asid_plru ? (MAX_HW_ASIDS + 1) : MAX_HW_ASIDS);
4772
4773 asid_table_size = sizeof(*asid_bitmap) * BITMAP_LEN(pmap_max_asids);
4774
4775 pmap_compute_pv_targets();
4776
4777 pmap_struct_start = avail_start;
4778
4779 pp_attr_table = (pp_attr_t *) phystokv(avail_start);
4780 avail_start = PMAP_ALIGN(avail_start + pp_attr_table_size, __alignof(pp_attr_t));
4781 io_attr_table = (pmap_io_range_t *) phystokv(avail_start);
4782 avail_start = PMAP_ALIGN(avail_start + io_attr_table_size, __alignof(pv_entry_t*));
4783 pv_head_table = (pv_entry_t **) phystokv(avail_start);
4784 avail_start = PMAP_ALIGN(avail_start + pv_head_size, __alignof(pt_desc_t));
4785 ptd_root_table = (pt_desc_t *)phystokv(avail_start);
4786 avail_start = PMAP_ALIGN(avail_start + ptd_root_table_size, __alignof(bitmap_t));
4787 asid_bitmap = (bitmap_t*)phystokv(avail_start);
4788 avail_start = round_page(avail_start + asid_table_size);
4789
4790 memset((char *)phystokv(pmap_struct_start), 0, avail_start - pmap_struct_start);
4791
4792 pmap_load_io_rgns();
4793 ptd_bootstrap(ptd_root_table, (unsigned int)(ptd_root_table_size / sizeof(pt_desc_t)));
4794
4795 #if XNU_MONITOR
4796 pmap_array_begin = (void *)phystokv(avail_start);
4797 pmap_array = pmap_array_begin;
4798 avail_start += round_page(PMAP_ARRAY_SIZE * sizeof(struct pmap));
4799 pmap_array_end = (void *)phystokv(avail_start);
4800
4801 pmap_array_count = ((pmap_array_end - pmap_array_begin) / sizeof(struct pmap));
4802
4803 pmap_bootstrap_pmap_free_list();
4804
4805 pmap_ledger_ptr_array_begin = (void *)phystokv(avail_start);
4806 pmap_ledger_ptr_array = pmap_ledger_ptr_array_begin;
4807 avail_start += round_page(MAX_PMAP_LEDGERS * sizeof(void*));
4808 pmap_ledger_ptr_array_end = (void *)phystokv(avail_start);
4809
4810 pmap_ledger_refcnt_begin = (void *)phystokv(avail_start);
4811 pmap_ledger_refcnt = pmap_ledger_refcnt_begin;
4812 avail_start += round_page(MAX_PMAP_LEDGERS * sizeof(os_refcnt_t));
4813 pmap_ledger_refcnt_end = (void *)phystokv(avail_start);
4814 #endif
4815 pmap_cpu_data_array_init();
4816
4817 vm_first_phys = gPhysBase;
4818 vm_last_phys = trunc_page(avail_end);
4819
4820 queue_init(&map_pmap_list);
4821 queue_enter(&map_pmap_list, kernel_pmap, pmap_t, pmaps);
4822 free_page_size_tt_list = TT_FREE_ENTRY_NULL;
4823 free_page_size_tt_count = 0;
4824 free_page_size_tt_max = 0;
4825 free_two_page_size_tt_list = TT_FREE_ENTRY_NULL;
4826 free_two_page_size_tt_count = 0;
4827 free_two_page_size_tt_max = 0;
4828 free_tt_list = TT_FREE_ENTRY_NULL;
4829 free_tt_count = 0;
4830 free_tt_max = 0;
4831
4832 queue_init(&pt_page_list);
4833
4834 pmap_pages_request_count = 0;
4835 pmap_pages_request_acum = 0;
4836 pmap_pages_reclaim_list = PAGE_FREE_ENTRY_NULL;
4837
4838 virtual_space_start = vstart;
4839 virtual_space_end = VM_MAX_KERNEL_ADDRESS;
4840
4841 bitmap_full(&asid_bitmap[0], pmap_max_asids);
4842 bitmap_full(&asid_plru_bitmap[0], MAX_HW_ASIDS);
4843 // Clear the highest-order bit, which corresponds to MAX_HW_ASIDS + 1
4844 asid_plru_bitmap[MAX_HW_ASIDS >> 6] = ~(1ULL << 63);
4845
4846
4847
4848 if (PE_parse_boot_argn("arm_maxoffset", &maxoffset, sizeof(maxoffset))) {
4849 maxoffset = trunc_page(maxoffset);
4850 if ((maxoffset >= pmap_max_offset(FALSE, ARM_PMAP_MAX_OFFSET_MIN))
4851 && (maxoffset <= pmap_max_offset(FALSE, ARM_PMAP_MAX_OFFSET_MAX))) {
4852 arm_pmap_max_offset_default = maxoffset;
4853 }
4854 }
4855 #if defined(__arm64__)
4856 if (PE_parse_boot_argn("arm64_maxoffset", &maxoffset, sizeof(maxoffset))) {
4857 maxoffset = trunc_page(maxoffset);
4858 if ((maxoffset >= pmap_max_offset(TRUE, ARM_PMAP_MAX_OFFSET_MIN))
4859 && (maxoffset <= pmap_max_offset(TRUE, ARM_PMAP_MAX_OFFSET_MAX))) {
4860 arm64_pmap_max_offset_default = maxoffset;
4861 }
4862 }
4863 #endif
4864
4865 PE_parse_boot_argn("pmap_panic_dev_wimg_on_managed", &pmap_panic_dev_wimg_on_managed, sizeof(pmap_panic_dev_wimg_on_managed));
4866
4867
4868 #if MACH_ASSERT
4869 PE_parse_boot_argn("pmap_stats_assert",
4870 &pmap_stats_assert,
4871 sizeof(pmap_stats_assert));
4872 PE_parse_boot_argn("vm_footprint_suspend_allowed",
4873 &vm_footprint_suspend_allowed,
4874 sizeof(vm_footprint_suspend_allowed));
4875 #endif /* MACH_ASSERT */
4876
4877 #if KASAN
4878 /* Shadow the CPU copy windows, as they fall outside of the physical aperture */
4879 kasan_map_shadow(CPUWINDOWS_BASE, CPUWINDOWS_TOP - CPUWINDOWS_BASE, true);
4880 #endif /* KASAN */
4881 }
4882
4883 #if XNU_MONITOR
4884
4885 static inline void
4886 pa_set_range_monitor(pmap_paddr_t start_pa, pmap_paddr_t end_pa)
4887 {
4888 pmap_paddr_t cur_pa;
4889 for (cur_pa = start_pa; cur_pa < end_pa; cur_pa += ARM_PGBYTES) {
4890 assert(pa_valid(cur_pa));
4891 pa_set_monitor(cur_pa);
4892 }
4893 }
4894
4895 static void
4896 pa_set_range_xprr_perm(pmap_paddr_t start_pa,
4897 pmap_paddr_t end_pa,
4898 unsigned int expected_perm,
4899 unsigned int new_perm)
4900 {
4901 vm_offset_t start_va = phystokv(start_pa);
4902 vm_offset_t end_va = start_va + (end_pa - start_pa);
4903
4904 pa_set_range_monitor(start_pa, end_pa);
4905 pmap_set_range_xprr_perm(start_va, end_va, expected_perm, new_perm);
4906 }
4907
4908 static void
4909 pmap_lockdown_kc(void)
4910 {
4911 extern vm_offset_t vm_kernelcache_base;
4912 extern vm_offset_t vm_kernelcache_top;
4913 pmap_paddr_t start_pa = kvtophys(vm_kernelcache_base);
4914 pmap_paddr_t end_pa = start_pa + (vm_kernelcache_top - vm_kernelcache_base);
4915 pmap_paddr_t cur_pa = start_pa;
4916 vm_offset_t cur_va = vm_kernelcache_base;
4917 while (cur_pa < end_pa) {
4918 vm_size_t range_size = end_pa - cur_pa;
4919 vm_offset_t ptov_va = phystokv_range(cur_pa, &range_size);
4920 if (ptov_va != cur_va) {
4921 /*
4922 * If the physical address maps back to a virtual address that is non-linear
4923 * w.r.t. the kernelcache, that means it corresponds to memory that will be
4924 * reclaimed by the OS and should therefore not be locked down.
4925 */
4926 cur_pa += range_size;
4927 cur_va += range_size;
4928 continue;
4929 }
4930 unsigned int pai = (unsigned int)pa_index(cur_pa);
4931 pv_entry_t **pv_h = pai_to_pvh(pai);
4932
4933 vm_offset_t pvh_flags = pvh_get_flags(pv_h);
4934
4935 if (__improbable(pvh_flags & PVH_FLAG_LOCKDOWN)) {
4936 panic("pai %d already locked down", pai);
4937 }
4938 pvh_set_flags(pv_h, pvh_flags | PVH_FLAG_LOCKDOWN);
4939 cur_pa += ARM_PGBYTES;
4940 cur_va += ARM_PGBYTES;
4941 }
4942 #if defined(KERNEL_INTEGRITY_CTRR) && defined(CONFIG_XNUPOST)
4943 extern uint64_t ctrr_ro_test;
4944 extern uint64_t ctrr_nx_test;
4945 pmap_paddr_t exclude_pages[] = {kvtophys((vm_offset_t)&ctrr_ro_test), kvtophys((vm_offset_t)&ctrr_nx_test)};
4946 for (unsigned i = 0; i < (sizeof(exclude_pages) / sizeof(exclude_pages[0])); ++i) {
4947 pv_entry_t **pv_h = pai_to_pvh(pa_index(exclude_pages[i]));
4948 pvh_set_flags(pv_h, pvh_get_flags(pv_h) & ~PVH_FLAG_LOCKDOWN);
4949 }
4950 #endif
4951 }
4952
4953 void
4954 pmap_static_allocations_done(void)
4955 {
4956 pmap_paddr_t monitor_start_pa;
4957 pmap_paddr_t monitor_end_pa;
4958
4959 /*
4960 * Protect the bootstrap (V=P and V->P) page tables.
4961 *
4962 * These bootstrap allocations will be used primarily for page tables.
4963 * If we wish to secure the page tables, we need to start by marking
4964 * these bootstrap allocations as pages that we want to protect.
4965 */
4966 monitor_start_pa = kvtophys((vm_offset_t)&bootstrap_pagetables);
4967 monitor_end_pa = monitor_start_pa + BOOTSTRAP_TABLE_SIZE;
4968
4969 /* The bootstrap page tables are mapped RW at boostrap. */
4970 pa_set_range_xprr_perm(monitor_start_pa, monitor_end_pa, XPRR_KERN_RW_PERM, XPRR_KERN_RO_PERM);
4971
4972 /*
4973 * We use avail_start as a pointer to the first address that has not
4974 * been reserved for bootstrap, so we know which pages to give to the
4975 * virtual memory layer.
4976 */
4977 monitor_start_pa = BootArgs->topOfKernelData;
4978 monitor_end_pa = avail_start;
4979
4980 /* The other bootstrap allocations are mapped RW at bootstrap. */
4981 pa_set_range_xprr_perm(monitor_start_pa, monitor_end_pa, XPRR_KERN_RW_PERM, XPRR_PPL_RW_PERM);
4982
4983 /*
4984 * The RO page tables are mapped RW in arm_vm_init() and later restricted
4985 * to RO in arm_vm_prot_finalize(), which is called after this function.
4986 * Here we only need to mark the underlying physical pages as PPL-owned to ensure
4987 * they can't be allocated for other uses. We don't need a special xPRR
4988 * protection index, as there is no PPL_RO index, and these pages are ultimately
4989 * protected by KTRR/CTRR. Furthermore, use of PPL_RW for these pages would
4990 * expose us to a functional issue on H11 devices where CTRR shifts the APRR
4991 * lookup table index to USER_XO before APRR is applied, leading the hardware
4992 * to believe we are dealing with an user XO page upon performing a translation.
4993 */
4994 monitor_start_pa = kvtophys((vm_offset_t)&ropagetable_begin);
4995 monitor_end_pa = monitor_start_pa + ((vm_offset_t)&ropagetable_end - (vm_offset_t)&ropagetable_begin);
4996 pa_set_range_monitor(monitor_start_pa, monitor_end_pa);
4997
4998 monitor_start_pa = kvtophys(segPPLDATAB);
4999 monitor_end_pa = monitor_start_pa + segSizePPLDATA;
5000
5001 /* PPL data is RW for the PPL, RO for the kernel. */
5002 pa_set_range_xprr_perm(monitor_start_pa, monitor_end_pa, XPRR_KERN_RW_PERM, XPRR_PPL_RW_PERM);
5003
5004 monitor_start_pa = kvtophys(segPPLTEXTB);
5005 monitor_end_pa = monitor_start_pa + segSizePPLTEXT;
5006
5007 /* PPL text is RX for the PPL, RO for the kernel. */
5008 pa_set_range_xprr_perm(monitor_start_pa, monitor_end_pa, XPRR_KERN_RX_PERM, XPRR_PPL_RX_PERM);
5009
5010 #if __APRR_SUPPORTED__
5011 monitor_start_pa = kvtophys(segPPLTRAMPB);
5012 monitor_end_pa = monitor_start_pa + segSizePPLTRAMP;
5013
5014 /*
5015 * The PPLTRAMP pages will be a mix of PPL RX/kernel RO and
5016 * PPL RX/kernel RX. However, all of these pages belong to the PPL.
5017 */
5018 pa_set_range_monitor(monitor_start_pa, monitor_end_pa);
5019 #endif
5020
5021 /*
5022 * In order to support DTrace, the save areas for the PPL must be
5023 * writable. This is due to the fact that DTrace will try to update
5024 * register state.
5025 */
5026 if (pmap_ppl_disable) {
5027 vm_offset_t monitor_start_va = phystokv(ppl_cpu_save_area_start);
5028 vm_offset_t monitor_end_va = monitor_start_va + (ppl_cpu_save_area_end - ppl_cpu_save_area_start);
5029
5030 pmap_set_range_xprr_perm(monitor_start_va, monitor_end_va, XPRR_PPL_RW_PERM, XPRR_KERN_RW_PERM);
5031 }
5032
5033 #if __APRR_SUPPORTED__
5034 /* The trampoline must also be specially protected. */
5035 pmap_set_range_xprr_perm((vm_offset_t)&ppl_trampoline_start, (vm_offset_t)&ppl_trampoline_end, XPRR_KERN_RX_PERM, XPRR_PPL_RX_PERM);
5036 #endif
5037
5038 if (segSizePPLDATACONST > 0) {
5039 monitor_start_pa = kvtophys(segPPLDATACONSTB);
5040 monitor_end_pa = monitor_start_pa + segSizePPLDATACONST;
5041
5042 pa_set_range_xprr_perm(monitor_start_pa, monitor_end_pa, XPRR_KERN_RO_PERM, XPRR_KERN_RO_PERM);
5043 }
5044
5045 /*
5046 * Mark the original physical aperture mapping for the PPL stack pages RO as an additional security
5047 * precaution. The real RW mappings are at a different location with guard pages.
5048 */
5049 pa_set_range_xprr_perm(pmap_stacks_start_pa, pmap_stacks_end_pa, XPRR_PPL_RW_PERM, XPRR_KERN_RO_PERM);
5050
5051 /* Prevent remapping of the kernelcache */
5052 pmap_lockdown_kc();
5053 }
5054
5055
5056 void
5057 pmap_lockdown_ppl(void)
5058 {
5059 /* Mark the PPL as being locked down. */
5060
5061 #if __APRR_SUPPORTED__
5062 pmap_ppl_locked_down = TRUE;
5063 /* Force a trap into to the PPL to update APRR_EL1. */
5064 pmap_return(FALSE, FALSE);
5065 #else
5066 #error "XPRR configuration error"
5067 #endif /* __APRR_SUPPORTED__ */
5068
5069 }
5070 #endif /* XNU_MONITOR */
5071
5072 void
5073 pmap_virtual_space(
5074 vm_offset_t *startp,
5075 vm_offset_t *endp
5076 )
5077 {
5078 *startp = virtual_space_start;
5079 *endp = virtual_space_end;
5080 }
5081
5082
5083 boolean_t
5084 pmap_virtual_region(
5085 unsigned int region_select,
5086 vm_map_offset_t *startp,
5087 vm_map_size_t *size
5088 )
5089 {
5090 boolean_t ret = FALSE;
5091 #if defined(KERNEL_INTEGRITY_KTRR) || defined(KERNEL_INTEGRITY_CTRR)
5092 if (region_select == 0) {
5093 /*
5094 * In this config, the bootstrap mappings should occupy their own L2
5095 * TTs, as they should be immutable after boot. Having the associated
5096 * TTEs and PTEs in their own pages allows us to lock down those pages,
5097 * while allowing the rest of the kernel address range to be remapped.
5098 */
5099 #if (__ARM_VMSA__ > 7)
5100 *startp = LOW_GLOBAL_BASE_ADDRESS & ~ARM_TT_L2_OFFMASK;
5101 #else
5102 #error Unsupported configuration
5103 #endif
5104 #if defined(ARM_LARGE_MEMORY)
5105 *size = ((KERNEL_PMAP_HEAP_RANGE_START - *startp) & ~PAGE_MASK);
5106 #else
5107 *size = ((VM_MAX_KERNEL_ADDRESS - *startp) & ~PAGE_MASK);
5108 #endif
5109 ret = TRUE;
5110 }
5111 #else
5112 #if (__ARM_VMSA__ > 7)
5113 unsigned long low_global_vr_mask = 0;
5114 vm_map_size_t low_global_vr_size = 0;
5115 #endif
5116
5117 if (region_select == 0) {
5118 #if (__ARM_VMSA__ == 7)
5119 *startp = gVirtBase & 0xFFC00000;
5120 *size = ((virtual_space_start - (gVirtBase & 0xFFC00000)) + ~0xFFC00000) & 0xFFC00000;
5121 #else
5122 /* Round to avoid overlapping with the V=P area; round to at least the L2 block size. */
5123 if (!TEST_PAGE_SIZE_4K) {
5124 *startp = gVirtBase & 0xFFFFFFFFFE000000;
5125 *size = ((virtual_space_start - (gVirtBase & 0xFFFFFFFFFE000000)) + ~0xFFFFFFFFFE000000) & 0xFFFFFFFFFE000000;
5126 } else {
5127 *startp = gVirtBase & 0xFFFFFFFFFF800000;
5128 *size = ((virtual_space_start - (gVirtBase & 0xFFFFFFFFFF800000)) + ~0xFFFFFFFFFF800000) & 0xFFFFFFFFFF800000;
5129 }
5130 #endif
5131 ret = TRUE;
5132 }
5133 if (region_select == 1) {
5134 *startp = VREGION1_START;
5135 *size = VREGION1_SIZE;
5136 ret = TRUE;
5137 }
5138 #if (__ARM_VMSA__ > 7)
5139 /* We need to reserve a range that is at least the size of an L2 block mapping for the low globals */
5140 if (!TEST_PAGE_SIZE_4K) {
5141 low_global_vr_mask = 0xFFFFFFFFFE000000;
5142 low_global_vr_size = 0x2000000;
5143 } else {
5144 low_global_vr_mask = 0xFFFFFFFFFF800000;
5145 low_global_vr_size = 0x800000;
5146 }
5147
5148 if (((gVirtBase & low_global_vr_mask) != LOW_GLOBAL_BASE_ADDRESS) && (region_select == 2)) {
5149 *startp = LOW_GLOBAL_BASE_ADDRESS;
5150 *size = low_global_vr_size;
5151 ret = TRUE;
5152 }
5153
5154 if (region_select == 3) {
5155 /* In this config, we allow the bootstrap mappings to occupy the same
5156 * page table pages as the heap.
5157 */
5158 *startp = VM_MIN_KERNEL_ADDRESS;
5159 *size = LOW_GLOBAL_BASE_ADDRESS - *startp;
5160 ret = TRUE;
5161 }
5162 #endif
5163 #endif
5164 return ret;
5165 }
5166
5167 unsigned int
5168 pmap_free_pages(
5169 void)
5170 {
5171 return (unsigned int)atop(avail_end - first_avail);
5172 }
5173
5174
5175 boolean_t
5176 pmap_next_page_hi(
5177 ppnum_t * pnum,
5178 __unused boolean_t might_free)
5179 {
5180 return pmap_next_page(pnum);
5181 }
5182
5183
5184 boolean_t
5185 pmap_next_page(
5186 ppnum_t *pnum)
5187 {
5188 if (first_avail != avail_end) {
5189 *pnum = (ppnum_t)atop(first_avail);
5190 first_avail += PAGE_SIZE;
5191 return TRUE;
5192 }
5193 return FALSE;
5194 }
5195
5196
5197 /*
5198 * Initialize the pmap module.
5199 * Called by vm_init, to initialize any structures that the pmap
5200 * system needs to map virtual memory.
5201 */
5202 void
5203 pmap_init(
5204 void)
5205 {
5206 /*
5207 * Protect page zero in the kernel map.
5208 * (can be overruled by permanent transltion
5209 * table entries at page zero - see arm_vm_init).
5210 */
5211 vm_protect(kernel_map, 0, PAGE_SIZE, TRUE, VM_PROT_NONE);
5212
5213 pmap_initialized = TRUE;
5214
5215 /*
5216 * Create the zone of physical maps
5217 * and the physical-to-virtual entries.
5218 */
5219 pmap_zone = zone_create_ext("pmap", sizeof(struct pmap),
5220 ZC_ZFREE_CLEARMEM, ZONE_ID_PMAP, NULL);
5221
5222
5223 /*
5224 * Initialize the pmap object (for tracking the vm_page_t
5225 * structures for pages we allocate to be page tables in
5226 * pmap_expand().
5227 */
5228 _vm_object_allocate(mem_size, pmap_object);
5229 pmap_object->copy_strategy = MEMORY_OBJECT_COPY_NONE;
5230
5231 /*
5232 * The values of [hard_]maxproc may have been scaled, make sure
5233 * they are still less than the value of pmap_max_asids.
5234 */
5235 if ((uint32_t)maxproc > pmap_max_asids) {
5236 maxproc = pmap_max_asids;
5237 }
5238 if ((uint32_t)hard_maxproc > pmap_max_asids) {
5239 hard_maxproc = pmap_max_asids;
5240 }
5241
5242 #if CONFIG_PGTRACE
5243 pmap_pgtrace_init();
5244 #endif
5245 }
5246
5247 boolean_t
5248 pmap_verify_free(
5249 ppnum_t ppnum)
5250 {
5251 pv_entry_t **pv_h;
5252 int pai;
5253 pmap_paddr_t phys = ptoa(ppnum);
5254
5255 assert(phys != vm_page_fictitious_addr);
5256
5257 if (!pa_valid(phys)) {
5258 return FALSE;
5259 }
5260
5261 pai = (int)pa_index(phys);
5262 pv_h = pai_to_pvh(pai);
5263
5264 return pvh_test_type(pv_h, PVH_TYPE_NULL);
5265 }
5266
5267 #if MACH_ASSERT
5268 void
5269 pmap_assert_free(ppnum_t ppnum)
5270 {
5271 assertf(pmap_verify_free(ppnum), "page = 0x%x", ppnum);
5272 (void)ppnum;
5273 }
5274 #endif
5275
5276
5277 #if XNU_MONITOR
5278 MARK_AS_PMAP_TEXT static void
5279 pmap_ledger_alloc_init_internal(size_t size)
5280 {
5281 pmap_simple_lock(&pmap_ledger_lock);
5282
5283 if (pmap_ledger_alloc_initialized) {
5284 panic("%s: already initialized, "
5285 "size=%lu",
5286 __func__,
5287 size);
5288 }
5289
5290 if ((size > sizeof(pmap_ledger_data_t)) ||
5291 ((sizeof(pmap_ledger_data_t) - size) % sizeof(struct ledger_entry))) {
5292 panic("%s: size mismatch, expected %lu, "
5293 "size=%lu",
5294 __func__, PMAP_LEDGER_DATA_BYTES,
5295 size);
5296 }
5297
5298 pmap_ledger_alloc_initialized = true;
5299
5300 pmap_simple_unlock(&pmap_ledger_lock);
5301 }
5302
5303 MARK_AS_PMAP_TEXT static ledger_t
5304 pmap_ledger_alloc_internal(void)
5305 {
5306 pmap_paddr_t paddr;
5307 uint64_t vaddr, vstart, vend;
5308 uint64_t index;
5309
5310 ledger_t new_ledger;
5311 uint64_t array_index;
5312
5313 pmap_simple_lock(&pmap_ledger_lock);
5314 if (pmap_ledger_free_list == NULL) {
5315 paddr = pmap_get_free_ppl_page();
5316
5317 if (paddr == 0) {
5318 pmap_simple_unlock(&pmap_ledger_lock);
5319 return NULL;
5320 }
5321
5322 vstart = phystokv(paddr);
5323 vend = vstart + PAGE_SIZE;
5324
5325 for (vaddr = vstart; (vaddr < vend) && ((vaddr + sizeof(pmap_ledger_t)) <= vend); vaddr += sizeof(pmap_ledger_t)) {
5326 pmap_ledger_t *free_ledger;
5327
5328 index = pmap_ledger_ptr_array_free_index++;
5329
5330 if (index >= MAX_PMAP_LEDGERS) {
5331 panic("%s: pmap_ledger_ptr_array is full, index=%llu",
5332 __func__, index);
5333 }
5334
5335 free_ledger = (pmap_ledger_t*)vaddr;
5336
5337 pmap_ledger_ptr_array[index] = free_ledger;
5338 free_ledger->back_ptr = &pmap_ledger_ptr_array[index];
5339
5340 free_ledger->next = pmap_ledger_free_list;
5341 pmap_ledger_free_list = free_ledger;
5342 }
5343
5344 pa_set_range_xprr_perm(paddr, paddr + PAGE_SIZE, XPRR_PPL_RW_PERM, XPRR_KERN_RW_PERM);
5345 }
5346
5347 new_ledger = (ledger_t)pmap_ledger_free_list;
5348 pmap_ledger_free_list = pmap_ledger_free_list->next;
5349
5350 array_index = pmap_ledger_validate(new_ledger);
5351 os_ref_init(&pmap_ledger_refcnt[array_index], NULL);
5352
5353 pmap_simple_unlock(&pmap_ledger_lock);
5354
5355 return new_ledger;
5356 }
5357
5358 MARK_AS_PMAP_TEXT static void
5359 pmap_ledger_free_internal(ledger_t ledger)
5360 {
5361 pmap_ledger_t* free_ledger;
5362
5363 free_ledger = (pmap_ledger_t*)ledger;
5364
5365 pmap_simple_lock(&pmap_ledger_lock);
5366 uint64_t array_index = pmap_ledger_validate(ledger);
5367
5368 if (os_ref_release(&pmap_ledger_refcnt[array_index]) != 0) {
5369 panic("%s: ledger still referenced, "
5370 "ledger=%p",
5371 __func__,
5372 ledger);
5373 }
5374
5375 free_ledger->next = pmap_ledger_free_list;
5376 pmap_ledger_free_list = free_ledger;
5377 pmap_simple_unlock(&pmap_ledger_lock);
5378 }
5379
5380
5381 static void
5382 pmap_ledger_retain(ledger_t ledger)
5383 {
5384 pmap_simple_lock(&pmap_ledger_lock);
5385 uint64_t array_index = pmap_ledger_validate(ledger);
5386 os_ref_retain(&pmap_ledger_refcnt[array_index]);
5387 pmap_simple_unlock(&pmap_ledger_lock);
5388 }
5389
5390 static void
5391 pmap_ledger_release(ledger_t ledger)
5392 {
5393 pmap_simple_lock(&pmap_ledger_lock);
5394 uint64_t array_index = pmap_ledger_validate(ledger);
5395 os_ref_release_live(&pmap_ledger_refcnt[array_index]);
5396 pmap_simple_unlock(&pmap_ledger_lock);
5397 }
5398
5399 void
5400 pmap_ledger_alloc_init(size_t size)
5401 {
5402 pmap_ledger_alloc_init_ppl(size);
5403 }
5404
5405 ledger_t
5406 pmap_ledger_alloc(void)
5407 {
5408 ledger_t retval = NULL;
5409
5410 while ((retval = pmap_ledger_alloc_ppl()) == NULL) {
5411 pmap_alloc_page_for_ppl(0);
5412 }
5413
5414 return retval;
5415 }
5416
5417 void
5418 pmap_ledger_free(ledger_t ledger)
5419 {
5420 pmap_ledger_free_ppl(ledger);
5421 }
5422 #else /* XNU_MONITOR */
5423 __dead2
5424 void
5425 pmap_ledger_alloc_init(size_t size)
5426 {
5427 panic("%s: unsupported, "
5428 "size=%lu",
5429 __func__, size);
5430 }
5431
5432 __dead2
5433 ledger_t
5434 pmap_ledger_alloc(void)
5435 {
5436 panic("%s: unsupported",
5437 __func__);
5438 }
5439
5440 __dead2
5441 void
5442 pmap_ledger_free(ledger_t ledger)
5443 {
5444 panic("%s: unsupported, "
5445 "ledger=%p",
5446 __func__, ledger);
5447 }
5448 #endif /* XNU_MONITOR */
5449
5450 static vm_size_t
5451 pmap_root_alloc_size(pmap_t pmap)
5452 {
5453 #if (__ARM_VMSA__ > 7)
5454 #pragma unused(pmap)
5455 const pt_attr_t * const pt_attr = pmap_get_pt_attr(pmap);
5456 unsigned int root_level = pt_attr_root_level(pt_attr);
5457 return ((pt_attr_ln_index_mask(pt_attr, root_level) >> pt_attr_ln_shift(pt_attr, root_level)) + 1) * sizeof(tt_entry_t);
5458 #else
5459 (void)pmap;
5460 return PMAP_ROOT_ALLOC_SIZE;
5461 #endif
5462 }
5463
5464 /*
5465 * Create and return a physical map.
5466 *
5467 * If the size specified for the map
5468 * is zero, the map is an actual physical
5469 * map, and may be referenced by the
5470 * hardware.
5471 *
5472 * If the size specified is non-zero,
5473 * the map will be used in software only, and
5474 * is bounded by that size.
5475 */
5476 MARK_AS_PMAP_TEXT static pmap_t
5477 pmap_create_options_internal(
5478 ledger_t ledger,
5479 vm_map_size_t size,
5480 unsigned int flags,
5481 kern_return_t *kr)
5482 {
5483 unsigned i;
5484 unsigned tte_index_max;
5485 pmap_t p;
5486 bool is_64bit = flags & PMAP_CREATE_64BIT;
5487 #if defined(HAS_APPLE_PAC)
5488 bool disable_jop = flags & PMAP_CREATE_DISABLE_JOP;
5489 #endif /* defined(HAS_APPLE_PAC) */
5490 kern_return_t local_kr = KERN_SUCCESS;
5491
5492 /*
5493 * A software use-only map doesn't even need a pmap.
5494 */
5495 if (size != 0) {
5496 return PMAP_NULL;
5497 }
5498
5499 if (0 != (flags & ~PMAP_CREATE_KNOWN_FLAGS)) {
5500 return PMAP_NULL;
5501 }
5502
5503 #if XNU_MONITOR
5504 if ((p = pmap_alloc_pmap()) == PMAP_NULL) {
5505 local_kr = KERN_NO_SPACE;
5506 goto pmap_create_fail;
5507 }
5508
5509 if (ledger) {
5510 pmap_ledger_validate(ledger);
5511 pmap_ledger_retain(ledger);
5512 }
5513 #else
5514 /*
5515 * Allocate a pmap struct from the pmap_zone. Then allocate
5516 * the translation table of the right size for the pmap.
5517 */
5518 if ((p = (pmap_t) zalloc(pmap_zone)) == PMAP_NULL) {
5519 local_kr = KERN_RESOURCE_SHORTAGE;
5520 goto pmap_create_fail;
5521 }
5522 #endif
5523
5524 p->ledger = ledger;
5525
5526
5527 p->pmap_vm_map_cs_enforced = false;
5528
5529 if (flags & PMAP_CREATE_64BIT) {
5530 p->min = MACH_VM_MIN_ADDRESS;
5531 p->max = MACH_VM_MAX_ADDRESS;
5532 } else {
5533 p->min = VM_MIN_ADDRESS;
5534 p->max = VM_MAX_ADDRESS;
5535 }
5536 #if defined(HAS_APPLE_PAC)
5537 p->disable_jop = disable_jop;
5538 #endif /* defined(HAS_APPLE_PAC) */
5539
5540 p->nested_region_true_start = 0;
5541 p->nested_region_true_end = ~0;
5542
5543 os_atomic_init(&p->ref_count, 1);
5544 p->gc_status = 0;
5545 p->stamp = os_atomic_inc(&pmap_stamp, relaxed);
5546 p->nx_enabled = TRUE;
5547 p->is_64bit = is_64bit;
5548 p->nested = FALSE;
5549 p->nested_pmap = PMAP_NULL;
5550
5551 #if ARM_PARAMETERIZED_PMAP
5552 /* Default to the native pt_attr */
5553 p->pmap_pt_attr = native_pt_attr;
5554 #endif /* ARM_PARAMETERIZED_PMAP */
5555 #if __ARM_MIXED_PAGE_SIZE__
5556 if (flags & PMAP_CREATE_FORCE_4K_PAGES) {
5557 p->pmap_pt_attr = &pmap_pt_attr_4k;
5558 }
5559 #endif /* __ARM_MIXED_PAGE_SIZE__ */
5560
5561 if (!pmap_get_pt_ops(p)->alloc_id(p)) {
5562 local_kr = KERN_NO_SPACE;
5563 goto id_alloc_fail;
5564 }
5565
5566 pmap_lock_init(p);
5567 memset((void *) &p->stats, 0, sizeof(p->stats));
5568
5569 p->tt_entry_free = (tt_entry_t *)0;
5570 tte_index_max = ((unsigned)pmap_root_alloc_size(p) / sizeof(tt_entry_t));
5571
5572 #if (__ARM_VMSA__ == 7)
5573 p->tte_index_max = tte_index_max;
5574 #endif
5575
5576 #if XNU_MONITOR
5577 p->tte = pmap_tt1_allocate(p, pmap_root_alloc_size(p), PMAP_TT_ALLOCATE_NOWAIT);
5578 #else
5579 p->tte = pmap_tt1_allocate(p, pmap_root_alloc_size(p), 0);
5580 #endif
5581 if (!(p->tte)) {
5582 local_kr = KERN_RESOURCE_SHORTAGE;
5583 goto tt1_alloc_fail;
5584 }
5585
5586 p->ttep = ml_static_vtop((vm_offset_t)p->tte);
5587 PMAP_TRACE(4, PMAP_CODE(PMAP__TTE), VM_KERNEL_ADDRHIDE(p), VM_KERNEL_ADDRHIDE(p->min), VM_KERNEL_ADDRHIDE(p->max), p->ttep);
5588
5589 /* nullify the translation table */
5590 for (i = 0; i < tte_index_max; i++) {
5591 p->tte[i] = ARM_TTE_TYPE_FAULT;
5592 }
5593
5594 FLUSH_PTE_RANGE(p->tte, p->tte + tte_index_max);
5595
5596 /*
5597 * initialize the rest of the structure
5598 */
5599 p->nested_region_addr = 0x0ULL;
5600 p->nested_region_size = 0x0ULL;
5601 p->nested_region_asid_bitmap = NULL;
5602 p->nested_region_asid_bitmap_size = 0x0UL;
5603
5604 p->nested_has_no_bounds_ref = false;
5605 p->nested_no_bounds_refcnt = 0;
5606 p->nested_bounds_set = false;
5607
5608
5609 #if MACH_ASSERT
5610 p->pmap_stats_assert = TRUE;
5611 p->pmap_pid = 0;
5612 strlcpy(p->pmap_procname, "<nil>", sizeof(p->pmap_procname));
5613 #endif /* MACH_ASSERT */
5614 #if DEVELOPMENT || DEBUG
5615 p->footprint_was_suspended = FALSE;
5616 #endif /* DEVELOPMENT || DEBUG */
5617
5618 pmap_simple_lock(&pmaps_lock);
5619 queue_enter(&map_pmap_list, p, pmap_t, pmaps);
5620 pmap_simple_unlock(&pmaps_lock);
5621
5622 return p;
5623
5624 tt1_alloc_fail:
5625 pmap_get_pt_ops(p)->free_id(p);
5626 id_alloc_fail:
5627 #if XNU_MONITOR
5628 pmap_free_pmap(p);
5629
5630 if (ledger) {
5631 pmap_ledger_release(ledger);
5632 }
5633 #else
5634 zfree(pmap_zone, p);
5635 #endif
5636 pmap_create_fail:
5637 #if XNU_MONITOR
5638 pmap_pin_kernel_pages((vm_offset_t)kr, sizeof(*kr));
5639 #endif
5640 *kr = local_kr;
5641 #if XNU_MONITOR
5642 pmap_unpin_kernel_pages((vm_offset_t)kr, sizeof(*kr));
5643 #endif
5644 return PMAP_NULL;
5645 }
5646
5647 pmap_t
5648 pmap_create_options(
5649 ledger_t ledger,
5650 vm_map_size_t size,
5651 unsigned int flags)
5652 {
5653 pmap_t pmap;
5654 kern_return_t kr = KERN_SUCCESS;
5655
5656 PMAP_TRACE(1, PMAP_CODE(PMAP__CREATE) | DBG_FUNC_START, size, flags);
5657
5658 ledger_reference(ledger);
5659
5660 #if XNU_MONITOR
5661 for (;;) {
5662 pmap = pmap_create_options_ppl(ledger, size, flags, &kr);
5663 if (kr != KERN_RESOURCE_SHORTAGE) {
5664 break;
5665 }
5666 assert(pmap == PMAP_NULL);
5667 pmap_alloc_page_for_ppl(0);
5668 kr = KERN_SUCCESS;
5669 }
5670 #else
5671 pmap = pmap_create_options_internal(ledger, size, flags, &kr);
5672 #endif
5673
5674 if (pmap == PMAP_NULL) {
5675 ledger_dereference(ledger);
5676 }
5677
5678 PMAP_TRACE(1, PMAP_CODE(PMAP__CREATE) | DBG_FUNC_END, VM_KERNEL_ADDRHIDE(pmap), PMAP_VASID(pmap), pmap->hw_asid);
5679
5680 return pmap;
5681 }
5682
5683 #if XNU_MONITOR
5684 /*
5685 * This symbol remains in place when the PPL is enabled so that the dispatch
5686 * table does not change from development to release configurations.
5687 */
5688 #endif
5689 #if MACH_ASSERT || XNU_MONITOR
5690 MARK_AS_PMAP_TEXT static void
5691 pmap_set_process_internal(
5692 __unused pmap_t pmap,
5693 __unused int pid,
5694 __unused char *procname)
5695 {
5696 #if MACH_ASSERT
5697 if (pmap == NULL) {
5698 return;
5699 }
5700
5701 VALIDATE_PMAP(pmap);
5702
5703 pmap->pmap_pid = pid;
5704 strlcpy(pmap->pmap_procname, procname, sizeof(pmap->pmap_procname));
5705 if (pmap_ledgers_panic_leeway) {
5706 /*
5707 * XXX FBDP
5708 * Some processes somehow trigger some issues that make
5709 * the pmap stats and ledgers go off track, causing
5710 * some assertion failures and ledger panics.
5711 * Turn off the sanity checks if we allow some ledger leeway
5712 * because of that. We'll still do a final check in
5713 * pmap_check_ledgers() for discrepancies larger than the
5714 * allowed leeway after the address space has been fully
5715 * cleaned up.
5716 */
5717 pmap->pmap_stats_assert = FALSE;
5718 ledger_disable_panic_on_negative(pmap->ledger,
5719 task_ledgers.phys_footprint);
5720 ledger_disable_panic_on_negative(pmap->ledger,
5721 task_ledgers.internal);
5722 ledger_disable_panic_on_negative(pmap->ledger,
5723 task_ledgers.internal_compressed);
5724 ledger_disable_panic_on_negative(pmap->ledger,
5725 task_ledgers.iokit_mapped);
5726 ledger_disable_panic_on_negative(pmap->ledger,
5727 task_ledgers.alternate_accounting);
5728 ledger_disable_panic_on_negative(pmap->ledger,
5729 task_ledgers.alternate_accounting_compressed);
5730 }
5731 #endif /* MACH_ASSERT */
5732 }
5733 #endif /* MACH_ASSERT || XNU_MONITOR */
5734
5735 #if MACH_ASSERT
5736 void
5737 pmap_set_process(
5738 pmap_t pmap,
5739 int pid,
5740 char *procname)
5741 {
5742 #if XNU_MONITOR
5743 pmap_set_process_ppl(pmap, pid, procname);
5744 #else
5745 pmap_set_process_internal(pmap, pid, procname);
5746 #endif
5747 }
5748 #endif /* MACH_ASSERT */
5749
5750 #if (__ARM_VMSA__ > 7)
5751 /*
5752 * pmap_deallocate_all_leaf_tts:
5753 *
5754 * Recursive function for deallocating all leaf TTEs. Walks the given TT,
5755 * removing and deallocating all TTEs.
5756 */
5757 MARK_AS_PMAP_TEXT static void
5758 pmap_deallocate_all_leaf_tts(pmap_t pmap, tt_entry_t * first_ttep, unsigned level)
5759 {
5760 tt_entry_t tte = ARM_TTE_EMPTY;
5761 tt_entry_t * ttep = NULL;
5762 tt_entry_t * last_ttep = NULL;
5763
5764 const pt_attr_t * const pt_attr = pmap_get_pt_attr(pmap);
5765
5766 assert(level < pt_attr_leaf_level(pt_attr));
5767
5768 last_ttep = &first_ttep[ttn_index(pmap, pt_attr, ~0, level)];
5769
5770 for (ttep = first_ttep; ttep <= last_ttep; ttep++) {
5771 tte = *ttep;
5772
5773 if (!(tte & ARM_TTE_VALID)) {
5774 continue;
5775 }
5776
5777 if ((tte & ARM_TTE_TYPE_MASK) == ARM_TTE_TYPE_BLOCK) {
5778 panic("%s: found block mapping, ttep=%p, tte=%p, "
5779 "pmap=%p, first_ttep=%p, level=%u",
5780 __FUNCTION__, ttep, (void *)tte,
5781 pmap, first_ttep, level);
5782 }
5783
5784 /* Must be valid, type table */
5785 if (level < pt_attr_twig_level(pt_attr)) {
5786 /* If we haven't reached the twig level, recurse to the next level. */
5787 pmap_deallocate_all_leaf_tts(pmap, (tt_entry_t *)phystokv((tte) & ARM_TTE_TABLE_MASK), level + 1);
5788 }
5789
5790 /* Remove the TTE. */
5791 pmap_lock(pmap);
5792 pmap_tte_deallocate(pmap, ttep, level);
5793 pmap_unlock(pmap);
5794 }
5795 }
5796 #endif /* (__ARM_VMSA__ > 7) */
5797
5798 /*
5799 * We maintain stats and ledgers so that a task's physical footprint is:
5800 * phys_footprint = ((internal - alternate_accounting)
5801 * + (internal_compressed - alternate_accounting_compressed)
5802 * + iokit_mapped
5803 * + purgeable_nonvolatile
5804 * + purgeable_nonvolatile_compressed
5805 * + page_table)
5806 * where "alternate_accounting" includes "iokit" and "purgeable" memory.
5807 */
5808
5809 /*
5810 * Retire the given physical map from service.
5811 * Should only be called if the map contains
5812 * no valid mappings.
5813 */
5814 MARK_AS_PMAP_TEXT static void
5815 pmap_destroy_internal(
5816 pmap_t pmap)
5817 {
5818 if (pmap == PMAP_NULL) {
5819 return;
5820 }
5821
5822 VALIDATE_PMAP(pmap);
5823
5824 __unused const pt_attr_t * const pt_attr = pmap_get_pt_attr(pmap);
5825
5826 int32_t ref_count = os_atomic_dec(&pmap->ref_count, relaxed);
5827 if (ref_count > 0) {
5828 return;
5829 } else if (ref_count < 0) {
5830 panic("pmap %p: refcount underflow", pmap);
5831 } else if (pmap == kernel_pmap) {
5832 panic("pmap %p: attempt to destroy kernel pmap", pmap);
5833 }
5834
5835 #if (__ARM_VMSA__ > 7)
5836 pmap_unmap_sharedpage(pmap);
5837 #endif /* (__ARM_VMSA__ > 7) */
5838
5839 pmap_simple_lock(&pmaps_lock);
5840 while (pmap->gc_status & PMAP_GC_INFLIGHT) {
5841 pmap->gc_status |= PMAP_GC_WAIT;
5842 assert_wait((event_t) &pmap->gc_status, THREAD_UNINT);
5843 pmap_simple_unlock(&pmaps_lock);
5844 (void) thread_block(THREAD_CONTINUE_NULL);
5845 pmap_simple_lock(&pmaps_lock);
5846 }
5847 queue_remove(&map_pmap_list, pmap, pmap_t, pmaps);
5848 pmap_simple_unlock(&pmaps_lock);
5849
5850 pmap_trim_self(pmap);
5851
5852 /*
5853 * Free the memory maps, then the
5854 * pmap structure.
5855 */
5856 #if (__ARM_VMSA__ == 7)
5857 unsigned int i = 0;
5858 pt_entry_t *ttep;
5859
5860 pmap_lock(pmap);
5861 for (i = 0; i < pmap->tte_index_max; i++) {
5862 ttep = &pmap->tte[i];
5863 if ((*ttep & ARM_TTE_TYPE_MASK) == ARM_TTE_TYPE_TABLE) {
5864 pmap_tte_deallocate(pmap, ttep, PMAP_TT_L1_LEVEL);
5865 }
5866 }
5867 pmap_unlock(pmap);
5868 #else /* (__ARM_VMSA__ == 7) */
5869 pmap_deallocate_all_leaf_tts(pmap, pmap->tte, pt_attr_root_level(pt_attr));
5870 #endif /* (__ARM_VMSA__ == 7) */
5871
5872
5873
5874 if (pmap->tte) {
5875 #if (__ARM_VMSA__ == 7)
5876 pmap_tt1_deallocate(pmap, pmap->tte, pmap->tte_index_max * sizeof(tt_entry_t), 0);
5877 pmap->tte_index_max = 0;
5878 #else /* (__ARM_VMSA__ == 7) */
5879 pmap_tt1_deallocate(pmap, pmap->tte, pmap_root_alloc_size(pmap), 0);
5880 #endif /* (__ARM_VMSA__ == 7) */
5881 pmap->tte = (tt_entry_t *) NULL;
5882 pmap->ttep = 0;
5883 }
5884
5885 assert((tt_free_entry_t*)pmap->tt_entry_free == NULL);
5886
5887 if (__improbable(pmap->nested)) {
5888 pmap_get_pt_ops(pmap)->flush_tlb_region_async(pmap->nested_region_addr, pmap->nested_region_size, pmap);
5889 sync_tlb_flush();
5890 } else {
5891 pmap_get_pt_ops(pmap)->flush_tlb_async(pmap);
5892 sync_tlb_flush();
5893 /* return its asid to the pool */
5894 pmap_get_pt_ops(pmap)->free_id(pmap);
5895 /* release the reference we hold on the nested pmap */
5896 pmap_destroy_internal(pmap->nested_pmap);
5897 }
5898
5899 pmap_check_ledgers(pmap);
5900
5901 if (pmap->nested_region_asid_bitmap) {
5902 #if XNU_MONITOR
5903 pmap_pages_free(kvtophys((vm_offset_t)(pmap->nested_region_asid_bitmap)), PAGE_SIZE);
5904 #else
5905 kheap_free(KHEAP_DATA_BUFFERS, pmap->nested_region_asid_bitmap,
5906 pmap->nested_region_asid_bitmap_size * sizeof(unsigned int));
5907 #endif
5908 }
5909
5910 #if XNU_MONITOR
5911 if (pmap->ledger) {
5912 pmap_ledger_release(pmap->ledger);
5913 }
5914
5915 pmap_lock_destroy(pmap);
5916 pmap_free_pmap(pmap);
5917 #else
5918 pmap_lock_destroy(pmap);
5919 zfree(pmap_zone, pmap);
5920 #endif
5921 }
5922
5923 void
5924 pmap_destroy(
5925 pmap_t pmap)
5926 {
5927 ledger_t ledger;
5928
5929 PMAP_TRACE(1, PMAP_CODE(PMAP__DESTROY) | DBG_FUNC_START, VM_KERNEL_ADDRHIDE(pmap), PMAP_VASID(pmap), pmap->hw_asid);
5930
5931 ledger = pmap->ledger;
5932
5933 #if XNU_MONITOR
5934 pmap_destroy_ppl(pmap);
5935
5936 pmap_check_ledger_fields(ledger);
5937 #else
5938 pmap_destroy_internal(pmap);
5939 #endif
5940
5941 ledger_dereference(ledger);
5942
5943 PMAP_TRACE(1, PMAP_CODE(PMAP__DESTROY) | DBG_FUNC_END);
5944 }
5945
5946
5947 /*
5948 * Add a reference to the specified pmap.
5949 */
5950 MARK_AS_PMAP_TEXT static void
5951 pmap_reference_internal(
5952 pmap_t pmap)
5953 {
5954 if (pmap != PMAP_NULL) {
5955 VALIDATE_PMAP(pmap);
5956 os_atomic_inc(&pmap->ref_count, relaxed);
5957 }
5958 }
5959
5960 void
5961 pmap_reference(
5962 pmap_t pmap)
5963 {
5964 #if XNU_MONITOR
5965 pmap_reference_ppl(pmap);
5966 #else
5967 pmap_reference_internal(pmap);
5968 #endif
5969 }
5970
5971 static tt_entry_t *
5972 pmap_tt1_allocate(
5973 pmap_t pmap,
5974 vm_size_t size,
5975 unsigned option)
5976 {
5977 tt_entry_t *tt1 = NULL;
5978 tt_free_entry_t *tt1_free;
5979 pmap_paddr_t pa;
5980 vm_address_t va;
5981 vm_address_t va_end;
5982 kern_return_t ret;
5983
5984 if ((size < PAGE_SIZE) && (size != PMAP_ROOT_ALLOC_SIZE)) {
5985 size = PAGE_SIZE;
5986 }
5987
5988 pmap_simple_lock(&tt1_lock);
5989 if ((size == PAGE_SIZE) && (free_page_size_tt_count != 0)) {
5990 free_page_size_tt_count--;
5991 tt1 = (tt_entry_t *)free_page_size_tt_list;
5992 free_page_size_tt_list = ((tt_free_entry_t *)tt1)->next;
5993 } else if ((size == 2 * PAGE_SIZE) && (free_two_page_size_tt_count != 0)) {
5994 free_two_page_size_tt_count--;
5995 tt1 = (tt_entry_t *)free_two_page_size_tt_list;
5996 free_two_page_size_tt_list = ((tt_free_entry_t *)tt1)->next;
5997 } else if ((size < PAGE_SIZE) && (free_tt_count != 0)) {
5998 free_tt_count--;
5999 tt1 = (tt_entry_t *)free_tt_list;
6000 free_tt_list = (tt_free_entry_t *)((tt_free_entry_t *)tt1)->next;
6001 }
6002
6003 pmap_simple_unlock(&tt1_lock);
6004
6005 if (tt1 != NULL) {
6006 pmap_tt_ledger_credit(pmap, size);
6007 return (tt_entry_t *)tt1;
6008 }
6009
6010 ret = pmap_pages_alloc_zeroed(&pa, (unsigned)((size < PAGE_SIZE)? PAGE_SIZE : size), ((option & PMAP_TT_ALLOCATE_NOWAIT)? PMAP_PAGES_ALLOCATE_NOWAIT : 0));
6011
6012 if (ret == KERN_RESOURCE_SHORTAGE) {
6013 return (tt_entry_t *)0;
6014 }
6015
6016 #if XNU_MONITOR
6017 assert(pa);
6018 #endif
6019
6020 if (size < PAGE_SIZE) {
6021 va = phystokv(pa) + size;
6022 tt_free_entry_t *local_free_list = (tt_free_entry_t*)va;
6023 tt_free_entry_t *next_free = NULL;
6024 for (va_end = phystokv(pa) + PAGE_SIZE; va < va_end; va = va + size) {
6025 tt1_free = (tt_free_entry_t *)va;
6026 tt1_free->next = next_free;
6027 next_free = tt1_free;
6028 }
6029 pmap_simple_lock(&tt1_lock);
6030 local_free_list->next = free_tt_list;
6031 free_tt_list = next_free;
6032 free_tt_count += ((PAGE_SIZE / size) - 1);
6033 if (free_tt_count > free_tt_max) {
6034 free_tt_max = free_tt_count;
6035 }
6036 pmap_simple_unlock(&tt1_lock);
6037 }
6038
6039 /* Always report root allocations in units of PMAP_ROOT_ALLOC_SIZE, which can be obtained by sysctl arm_pt_root_size.
6040 * Depending on the device, this can vary between 512b and 16K. */
6041 OSAddAtomic((uint32_t)(size / PMAP_ROOT_ALLOC_SIZE), (pmap == kernel_pmap ? &inuse_kernel_tteroot_count : &inuse_user_tteroot_count));
6042 OSAddAtomic64(size / PMAP_ROOT_ALLOC_SIZE, &alloc_tteroot_count);
6043 pmap_tt_ledger_credit(pmap, size);
6044
6045 return (tt_entry_t *) phystokv(pa);
6046 }
6047
6048 static void
6049 pmap_tt1_deallocate(
6050 pmap_t pmap,
6051 tt_entry_t *tt,
6052 vm_size_t size,
6053 unsigned option)
6054 {
6055 tt_free_entry_t *tt_entry;
6056
6057 if ((size < PAGE_SIZE) && (size != PMAP_ROOT_ALLOC_SIZE)) {
6058 size = PAGE_SIZE;
6059 }
6060
6061 tt_entry = (tt_free_entry_t *)tt;
6062 assert(not_in_kdp);
6063 pmap_simple_lock(&tt1_lock);
6064
6065 if (size < PAGE_SIZE) {
6066 free_tt_count++;
6067 if (free_tt_count > free_tt_max) {
6068 free_tt_max = free_tt_count;
6069 }
6070 tt_entry->next = free_tt_list;
6071 free_tt_list = tt_entry;
6072 }
6073
6074 if (size == PAGE_SIZE) {
6075 free_page_size_tt_count++;
6076 if (free_page_size_tt_count > free_page_size_tt_max) {
6077 free_page_size_tt_max = free_page_size_tt_count;
6078 }
6079 tt_entry->next = free_page_size_tt_list;
6080 free_page_size_tt_list = tt_entry;
6081 }
6082
6083 if (size == 2 * PAGE_SIZE) {
6084 free_two_page_size_tt_count++;
6085 if (free_two_page_size_tt_count > free_two_page_size_tt_max) {
6086 free_two_page_size_tt_max = free_two_page_size_tt_count;
6087 }
6088 tt_entry->next = free_two_page_size_tt_list;
6089 free_two_page_size_tt_list = tt_entry;
6090 }
6091
6092 if (option & PMAP_TT_DEALLOCATE_NOBLOCK) {
6093 pmap_simple_unlock(&tt1_lock);
6094 pmap_tt_ledger_debit(pmap, size);
6095 return;
6096 }
6097
6098 while (free_page_size_tt_count > FREE_PAGE_SIZE_TT_MAX) {
6099 free_page_size_tt_count--;
6100 tt = (tt_entry_t *)free_page_size_tt_list;
6101 free_page_size_tt_list = ((tt_free_entry_t *)tt)->next;
6102
6103 pmap_simple_unlock(&tt1_lock);
6104
6105 pmap_pages_free(ml_static_vtop((vm_offset_t)tt), PAGE_SIZE);
6106
6107 OSAddAtomic(-(int32_t)(PAGE_SIZE / PMAP_ROOT_ALLOC_SIZE), (pmap == kernel_pmap ? &inuse_kernel_tteroot_count : &inuse_user_tteroot_count));
6108
6109 pmap_simple_lock(&tt1_lock);
6110 }
6111
6112 while (free_two_page_size_tt_count > FREE_TWO_PAGE_SIZE_TT_MAX) {
6113 free_two_page_size_tt_count--;
6114 tt = (tt_entry_t *)free_two_page_size_tt_list;
6115 free_two_page_size_tt_list = ((tt_free_entry_t *)tt)->next;
6116
6117 pmap_simple_unlock(&tt1_lock);
6118
6119 pmap_pages_free(ml_static_vtop((vm_offset_t)tt), 2 * PAGE_SIZE);
6120
6121 OSAddAtomic(-2 * (int32_t)(PAGE_SIZE / PMAP_ROOT_ALLOC_SIZE), (pmap == kernel_pmap ? &inuse_kernel_tteroot_count : &inuse_user_tteroot_count));
6122
6123 pmap_simple_lock(&tt1_lock);
6124 }
6125 pmap_simple_unlock(&tt1_lock);
6126 pmap_tt_ledger_debit(pmap, size);
6127 }
6128
6129 static kern_return_t
6130 pmap_tt_allocate(
6131 pmap_t pmap,
6132 tt_entry_t **ttp,
6133 unsigned int level,
6134 unsigned int options)
6135 {
6136 pmap_paddr_t pa;
6137 *ttp = NULL;
6138
6139 pmap_lock(pmap);
6140 if ((tt_free_entry_t *)pmap->tt_entry_free != NULL) {
6141 tt_free_entry_t *tt_free_cur, *tt_free_next;
6142
6143 tt_free_cur = ((tt_free_entry_t *)pmap->tt_entry_free);
6144 tt_free_next = tt_free_cur->next;
6145 tt_free_cur->next = NULL;
6146 *ttp = (tt_entry_t *)tt_free_cur;
6147 pmap->tt_entry_free = (tt_entry_t *)tt_free_next;
6148 }
6149 pmap_unlock(pmap);
6150
6151 if (*ttp == NULL) {
6152 pt_desc_t *ptdp;
6153
6154 /*
6155 * Allocate a VM page for the level x page table entries.
6156 */
6157 while (pmap_pages_alloc_zeroed(&pa, PAGE_SIZE, ((options & PMAP_TT_ALLOCATE_NOWAIT)? PMAP_PAGES_ALLOCATE_NOWAIT : 0)) != KERN_SUCCESS) {
6158 if (options & PMAP_OPTIONS_NOWAIT) {
6159 return KERN_RESOURCE_SHORTAGE;
6160 }
6161 VM_PAGE_WAIT();
6162 }
6163
6164 while ((ptdp = ptd_alloc(pmap)) == NULL) {
6165 if (options & PMAP_OPTIONS_NOWAIT) {
6166 pmap_pages_free(pa, PAGE_SIZE);
6167 return KERN_RESOURCE_SHORTAGE;
6168 }
6169 VM_PAGE_WAIT();
6170 }
6171
6172 if (level < pt_attr_leaf_level(pmap_get_pt_attr(pmap))) {
6173 OSAddAtomic64(1, &alloc_ttepages_count);
6174 OSAddAtomic(1, (pmap == kernel_pmap ? &inuse_kernel_ttepages_count : &inuse_user_ttepages_count));
6175 } else {
6176 OSAddAtomic64(1, &alloc_ptepages_count);
6177 OSAddAtomic(1, (pmap == kernel_pmap ? &inuse_kernel_ptepages_count : &inuse_user_ptepages_count));
6178 }
6179
6180 pmap_tt_ledger_credit(pmap, PAGE_SIZE);
6181
6182 PMAP_ZINFO_PALLOC(pmap, PAGE_SIZE);
6183
6184 pvh_update_head_unlocked(pai_to_pvh(pa_index(pa)), ptdp, PVH_TYPE_PTDP);
6185
6186 uint64_t pmap_page_size = pt_attr_page_size(pmap_get_pt_attr(pmap));
6187 if (PAGE_SIZE > pmap_page_size) {
6188 vm_address_t va;
6189 vm_address_t va_end;
6190
6191 pmap_lock(pmap);
6192
6193 for (va_end = phystokv(pa) + PAGE_SIZE, va = phystokv(pa) + pmap_page_size; va < va_end; va = va + pmap_page_size) {
6194 ((tt_free_entry_t *)va)->next = (tt_free_entry_t *)pmap->tt_entry_free;
6195 pmap->tt_entry_free = (tt_entry_t *)va;
6196 }
6197 pmap_unlock(pmap);
6198 }
6199
6200 *ttp = (tt_entry_t *)phystokv(pa);
6201 }
6202
6203 #if XNU_MONITOR
6204 assert(*ttp);
6205 #endif
6206
6207 return KERN_SUCCESS;
6208 }
6209
6210
6211 static void
6212 pmap_tt_deallocate(
6213 pmap_t pmap,
6214 tt_entry_t *ttp,
6215 unsigned int level)
6216 {
6217 pt_desc_t *ptdp;
6218 ptd_info_t *ptd_info;
6219 unsigned pt_acc_cnt;
6220 unsigned i;
6221 vm_offset_t free_page = 0;
6222 const pt_attr_t * const pt_attr = pmap_get_pt_attr(pmap);
6223 unsigned max_pt_index = PAGE_SIZE / pt_attr_page_size(pt_attr);
6224
6225 pmap_lock(pmap);
6226
6227 ptdp = ptep_get_ptd((vm_offset_t)ttp);
6228 ptd_info = ptd_get_info(ptdp, ttp);
6229
6230 ptd_info->va = (vm_offset_t)-1;
6231
6232 if ((level < pt_attr_leaf_level(pt_attr)) && (ptd_info->refcnt == PT_DESC_REFCOUNT)) {
6233 ptd_info->refcnt = 0;
6234 }
6235
6236 if (ptd_info->refcnt != 0) {
6237 panic("pmap_tt_deallocate(): ptdp %p, count %d\n", ptdp, ptd_info->refcnt);
6238 }
6239
6240 ptd_info->refcnt = 0;
6241
6242 for (i = 0, pt_acc_cnt = 0; i < max_pt_index; i++) {
6243 pt_acc_cnt += ptdp->ptd_info[i].refcnt;
6244 }
6245
6246 if (pt_acc_cnt == 0) {
6247 tt_free_entry_t *tt_free_list = (tt_free_entry_t *)&pmap->tt_entry_free;
6248 unsigned pt_free_entry_cnt = 1;
6249
6250 while (pt_free_entry_cnt < max_pt_index && tt_free_list) {
6251 tt_free_entry_t *tt_free_list_next;
6252
6253 tt_free_list_next = tt_free_list->next;
6254 if ((((vm_offset_t)tt_free_list_next) - ((vm_offset_t)ttp & ~PAGE_MASK)) < PAGE_SIZE) {
6255 pt_free_entry_cnt++;
6256 }
6257 tt_free_list = tt_free_list_next;
6258 }
6259 if (pt_free_entry_cnt == max_pt_index) {
6260 tt_free_entry_t *tt_free_list_cur;
6261
6262 free_page = (vm_offset_t)ttp & ~PAGE_MASK;
6263 tt_free_list = (tt_free_entry_t *)&pmap->tt_entry_free;
6264 tt_free_list_cur = (tt_free_entry_t *)&pmap->tt_entry_free;
6265
6266 while (tt_free_list_cur) {
6267 tt_free_entry_t *tt_free_list_next;
6268
6269 tt_free_list_next = tt_free_list_cur->next;
6270 if ((((vm_offset_t)tt_free_list_next) - free_page) < PAGE_SIZE) {
6271 tt_free_list->next = tt_free_list_next->next;
6272 } else {
6273 tt_free_list = tt_free_list_next;
6274 }
6275 tt_free_list_cur = tt_free_list_next;
6276 }
6277 } else {
6278 ((tt_free_entry_t *)ttp)->next = (tt_free_entry_t *)pmap->tt_entry_free;
6279 pmap->tt_entry_free = ttp;
6280 }
6281 } else {
6282 ((tt_free_entry_t *)ttp)->next = (tt_free_entry_t *)pmap->tt_entry_free;
6283 pmap->tt_entry_free = ttp;
6284 }
6285
6286 pmap_unlock(pmap);
6287
6288 if (free_page != 0) {
6289 ptd_deallocate(ptep_get_ptd((vm_offset_t)free_page));
6290 *(pt_desc_t **)pai_to_pvh(pa_index(ml_static_vtop(free_page))) = NULL;
6291 pmap_pages_free(ml_static_vtop(free_page), PAGE_SIZE);
6292 if (level < pt_attr_leaf_level(pt_attr)) {
6293 OSAddAtomic(-1, (pmap == kernel_pmap ? &inuse_kernel_ttepages_count : &inuse_user_ttepages_count));
6294 } else {
6295 OSAddAtomic(-1, (pmap == kernel_pmap ? &inuse_kernel_ptepages_count : &inuse_user_ptepages_count));
6296 }
6297 PMAP_ZINFO_PFREE(pmap, PAGE_SIZE);
6298 pmap_tt_ledger_debit(pmap, PAGE_SIZE);
6299 }
6300 }
6301
6302 /**
6303 * Safely clear out a translation table entry.
6304 *
6305 * @note If the TTE to clear out points to a leaf table, then that leaf table
6306 * must have a refcnt of zero before the TTE can be removed.
6307 *
6308 * @param pmap The pmap containing the page table whose TTE is being removed.
6309 * @param ttep Pointer to the TTE that should be cleared out.
6310 * @param level The level of the page table that contains the TTE to be removed.
6311 */
6312 static void
6313 pmap_tte_remove(
6314 pmap_t pmap,
6315 tt_entry_t *ttep,
6316 unsigned int level)
6317 {
6318 tt_entry_t tte = *ttep;
6319
6320 if (__improbable(tte == 0)) {
6321 panic("%s: null tt_entry ttep==%p", __func__, ttep);
6322 }
6323
6324 if (__improbable((level == pt_attr_twig_level(pmap_get_pt_attr(pmap))) &&
6325 (ptep_get_info((pt_entry_t*)ttetokv(tte))->refcnt != 0))) {
6326 panic("%s: non-zero pagetable refcount: pmap=%p ttep=%p ptd=%p refcnt=0x%x", __func__,
6327 pmap, ttep, tte_get_ptd(tte), ptep_get_info((pt_entry_t*)ttetokv(tte))->refcnt);
6328 }
6329
6330 #if (__ARM_VMSA__ == 7)
6331 {
6332 tt_entry_t *ttep_4M = (tt_entry_t *) ((vm_offset_t)ttep & 0xFFFFFFF0);
6333 unsigned i;
6334
6335 for (i = 0; i < 4; i++, ttep_4M++) {
6336 *ttep_4M = (tt_entry_t) 0;
6337 }
6338 FLUSH_PTE_RANGE_STRONG(ttep_4M - 4, ttep_4M);
6339 }
6340 #else
6341 *ttep = (tt_entry_t) 0;
6342 FLUSH_PTE_STRONG(ttep);
6343 #endif /* (__ARM_VMSA__ == 7) */
6344 }
6345
6346 /**
6347 * Given a pointer to an entry within a `level` page table, delete the
6348 * page table at `level` + 1 that is represented by that entry. For instance,
6349 * to delete an unused L3 table, `ttep` would be a pointer to the L2 entry that
6350 * contains the PA of the L3 table, and `level` would be "2".
6351 *
6352 * @note If the table getting deallocated is a leaf table, then that leaf table
6353 * must have a refcnt of zero before getting deallocated. All other levels
6354 * must have a refcnt of PT_DESC_REFCOUNT in their page table descriptor.
6355 *
6356 * @param pmap The pmap that owns the page table to be deallocated.
6357 * @param ttep Pointer to the `level` TTE to remove.
6358 * @param level The level of the table that contains an entry pointing to the
6359 * table to be removed. The deallocated page table will be a
6360 * `level` + 1 table (so if `level` is 2, then an L3 table will be
6361 * deleted).
6362 */
6363 static void
6364 pmap_tte_deallocate(
6365 pmap_t pmap,
6366 tt_entry_t *ttep,
6367 unsigned int level)
6368 {
6369 pmap_paddr_t pa;
6370 tt_entry_t tte;
6371
6372 pmap_assert_locked_w(pmap);
6373
6374 tte = *ttep;
6375
6376 #if MACH_ASSERT
6377 if (tte_get_ptd(tte)->pmap != pmap) {
6378 panic("%s: Passed in pmap doesn't own the page table to be deleted ptd=%p ptd->pmap=%p pmap=%p",
6379 __func__, tte_get_ptd(tte), tte_get_ptd(tte)->pmap, pmap);
6380 }
6381 #endif /* MACH_ASSERT */
6382
6383 pmap_tte_remove(pmap, ttep, level);
6384
6385 if ((tte & ARM_TTE_TYPE_MASK) == ARM_TTE_TYPE_TABLE) {
6386 uint64_t pmap_page_size = pt_attr_page_size(pmap_get_pt_attr(pmap));
6387 #if MACH_ASSERT
6388 pt_entry_t *pte_p = ((pt_entry_t *) (ttetokv(tte) & ~(pmap_page_size - 1)));
6389
6390 for (unsigned i = 0; i < (pmap_page_size / sizeof(*pte_p)); i++, pte_p++) {
6391 if (__improbable(ARM_PTE_IS_COMPRESSED(*pte_p, pte_p))) {
6392 panic_plain("%s: Found compressed mapping in soon to be deleted "
6393 "L%d table tte=0x%llx pmap=%p, pte_p=%p, pte=0x%llx",
6394 __func__, level + 1, (uint64_t)tte, pmap, pte_p, (uint64_t)(*pte_p));
6395 } else if (__improbable(((*pte_p) & ARM_PTE_TYPE_MASK) != ARM_PTE_TYPE_FAULT)) {
6396 panic_plain("%s: Found valid mapping in soon to be deleted L%d "
6397 "table tte=0x%llx pmap=%p, pte_p=%p, pte=0x%llx",
6398 __func__, level + 1, (uint64_t)tte, pmap, pte_p, (uint64_t)(*pte_p));
6399 }
6400 }
6401 #endif /* MACH_ASSERT */
6402 pmap_unlock(pmap);
6403
6404 /* Clear any page offset: we mean to free the whole page, but armv7 TTEs may only be
6405 * aligned on 1K boundaries. We clear the surrounding "chunk" of 4 TTEs above. */
6406 pa = tte_to_pa(tte) & ~(pmap_page_size - 1);
6407 pmap_tt_deallocate(pmap, (tt_entry_t *) phystokv(pa), level + 1);
6408 pmap_lock(pmap);
6409 }
6410 }
6411
6412 /*
6413 * Remove a range of hardware page-table entries.
6414 * The entries given are the first (inclusive)
6415 * and last (exclusive) entries for the VM pages.
6416 * The virtual address is the va for the first pte.
6417 *
6418 * The pmap must be locked.
6419 * If the pmap is not the kernel pmap, the range must lie
6420 * entirely within one pte-page. This is NOT checked.
6421 * Assumes that the pte-page exists.
6422 *
6423 * Returns the number of PTE changed, and sets *rmv_cnt
6424 * to the number of SPTE changed.
6425 */
6426 static int
6427 pmap_remove_range(
6428 pmap_t pmap,
6429 vm_map_address_t va,
6430 pt_entry_t *bpte,
6431 pt_entry_t *epte,
6432 uint32_t *rmv_cnt)
6433 {
6434 bool need_strong_sync = false;
6435 int num_changed = pmap_remove_range_options(pmap, va, bpte, epte, rmv_cnt,
6436 &need_strong_sync, PMAP_OPTIONS_REMOVE);
6437 if (num_changed > 0) {
6438 PMAP_UPDATE_TLBS(pmap, va,
6439 va + (pt_attr_page_size(pmap_get_pt_attr(pmap)) * (epte - bpte)), need_strong_sync);
6440 }
6441 return num_changed;
6442 }
6443
6444
6445 #ifdef PVH_FLAG_EXEC
6446
6447 /*
6448 * Update the access protection bits of the physical aperture mapping for a page.
6449 * This is useful, for example, in guranteeing that a verified executable page
6450 * has no writable mappings anywhere in the system, including the physical
6451 * aperture. flush_tlb_async can be set to true to avoid unnecessary TLB
6452 * synchronization overhead in cases where the call to this function is
6453 * guaranteed to be followed by other TLB operations.
6454 */
6455 static void
6456 pmap_set_ptov_ap(unsigned int pai __unused, unsigned int ap __unused, boolean_t flush_tlb_async __unused)
6457 {
6458 #if __ARM_PTE_PHYSMAP__
6459 ASSERT_PVH_LOCKED(pai);
6460 vm_offset_t kva = phystokv(vm_first_phys + (pmap_paddr_t)ptoa(pai));
6461 pt_entry_t *pte_p = pmap_pte(kernel_pmap, kva);
6462
6463 pt_entry_t tmplate = *pte_p;
6464 if ((tmplate & ARM_PTE_APMASK) == ARM_PTE_AP(ap)) {
6465 return;
6466 }
6467 tmplate = (tmplate & ~ARM_PTE_APMASK) | ARM_PTE_AP(ap);
6468 #if (__ARM_VMSA__ > 7)
6469 if (tmplate & ARM_PTE_HINT_MASK) {
6470 panic("%s: physical aperture PTE %p has hint bit set, va=%p, pte=0x%llx",
6471 __func__, pte_p, (void *)kva, tmplate);
6472 }
6473 #endif
6474 WRITE_PTE_STRONG(pte_p, tmplate);
6475 flush_mmu_tlb_region_asid_async(kva, PAGE_SIZE, kernel_pmap);
6476 if (!flush_tlb_async) {
6477 sync_tlb_flush();
6478 }
6479 #endif
6480 }
6481
6482 #endif /* defined(PVH_FLAG_EXEC) */
6483
6484 static void
6485 pmap_remove_pv(
6486 pmap_t pmap,
6487 pt_entry_t *cpte,
6488 int pai,
6489 int *num_internal,
6490 int *num_alt_internal,
6491 int *num_reusable,
6492 int *num_external)
6493 {
6494 pv_entry_t **pv_h, **pve_pp;
6495 pv_entry_t *pve_p;
6496
6497 ASSERT_NOT_HIBERNATING();
6498 ASSERT_PVH_LOCKED(pai);
6499 pv_h = pai_to_pvh(pai);
6500 vm_offset_t pvh_flags = pvh_get_flags(pv_h);
6501
6502 #if XNU_MONITOR
6503 if (__improbable(pvh_flags & PVH_FLAG_LOCKDOWN)) {
6504 panic("%d is locked down (%#lx), cannot remove", pai, pvh_flags);
6505 }
6506 #endif
6507
6508 if (pvh_test_type(pv_h, PVH_TYPE_PTEP)) {
6509 if (__improbable((cpte != pvh_ptep(pv_h)))) {
6510 panic("%s: cpte=%p does not match pv_h=%p (%p), pai=0x%x\n", __func__, cpte, pv_h, pvh_ptep(pv_h), pai);
6511 }
6512 if (IS_ALTACCT_PAGE(pai, PV_ENTRY_NULL)) {
6513 assert(IS_INTERNAL_PAGE(pai));
6514 (*num_internal)++;
6515 (*num_alt_internal)++;
6516 CLR_ALTACCT_PAGE(pai, PV_ENTRY_NULL);
6517 } else if (IS_INTERNAL_PAGE(pai)) {
6518 if (IS_REUSABLE_PAGE(pai)) {
6519 (*num_reusable)++;
6520 } else {
6521 (*num_internal)++;
6522 }
6523 } else {
6524 (*num_external)++;
6525 }
6526 pvh_update_head(pv_h, PV_ENTRY_NULL, PVH_TYPE_NULL);
6527 } else if (pvh_test_type(pv_h, PVH_TYPE_PVEP)) {
6528 pve_pp = pv_h;
6529 pve_p = pvh_list(pv_h);
6530
6531 while (pve_p != PV_ENTRY_NULL &&
6532 (pve_get_ptep(pve_p) != cpte)) {
6533 pve_pp = pve_link_field(pve_p);
6534 pve_p = PVE_NEXT_PTR(pve_next(pve_p));
6535 }
6536
6537 if (__improbable((pve_p == PV_ENTRY_NULL))) {
6538 panic("%s: cpte=%p (pai=0x%x) not in pv_h=%p\n", __func__, cpte, pai, pv_h);
6539 }
6540
6541 #if MACH_ASSERT
6542 if ((pmap != NULL) && (kern_feature_override(KF_PMAPV_OVRD) == FALSE)) {
6543 pv_entry_t *check_pve_p = PVE_NEXT_PTR(pve_next(pve_p));
6544 while (check_pve_p != PV_ENTRY_NULL) {
6545 if (pve_get_ptep(check_pve_p) == cpte) {
6546 panic("%s: duplicate pve entry cpte=%p pmap=%p, pv_h=%p, pve_p=%p, pai=0x%x",
6547 __func__, cpte, pmap, pv_h, pve_p, pai);
6548 }
6549 check_pve_p = PVE_NEXT_PTR(pve_next(check_pve_p));
6550 }
6551 }
6552 #endif
6553
6554 if (IS_ALTACCT_PAGE(pai, pve_p)) {
6555 assert(IS_INTERNAL_PAGE(pai));
6556 (*num_internal)++;
6557 (*num_alt_internal)++;
6558 CLR_ALTACCT_PAGE(pai, pve_p);
6559 } else if (IS_INTERNAL_PAGE(pai)) {
6560 if (IS_REUSABLE_PAGE(pai)) {
6561 (*num_reusable)++;
6562 } else {
6563 (*num_internal)++;
6564 }
6565 } else {
6566 (*num_external)++;
6567 }
6568
6569 pvh_remove(pv_h, pve_pp, pve_p);
6570 pv_free_entry(pve_p);
6571 if (!pvh_test_type(pv_h, PVH_TYPE_NULL)) {
6572 pvh_set_flags(pv_h, pvh_flags);
6573 }
6574 } else {
6575 panic("%s: unexpected PV head %p, cpte=%p pmap=%p pv_h=%p pai=0x%x",
6576 __func__, *pv_h, cpte, pmap, pv_h, pai);
6577 }
6578
6579 #ifdef PVH_FLAG_EXEC
6580 if ((pvh_flags & PVH_FLAG_EXEC) && pvh_test_type(pv_h, PVH_TYPE_NULL)) {
6581 pmap_set_ptov_ap(pai, AP_RWNA, FALSE);
6582 }
6583 #endif
6584 }
6585
6586 static int
6587 pmap_remove_range_options(
6588 pmap_t pmap,
6589 vm_map_address_t va,
6590 pt_entry_t *bpte,
6591 pt_entry_t *epte,
6592 uint32_t *rmv_cnt,
6593 bool *need_strong_sync __unused,
6594 int options)
6595 {
6596 pt_entry_t *cpte;
6597 int num_removed, num_unwired;
6598 int num_pte_changed;
6599 int pai = 0;
6600 pmap_paddr_t pa;
6601 int num_external, num_internal, num_reusable;
6602 int num_alt_internal;
6603 uint64_t num_compressed, num_alt_compressed;
6604
6605 pmap_assert_locked_w(pmap);
6606
6607 const pt_attr_t * const pt_attr = pmap_get_pt_attr(pmap);
6608 uint64_t pmap_page_size = pt_attr_page_size(pt_attr);
6609
6610 if (__improbable((uintptr_t)epte > (((uintptr_t)bpte + pmap_page_size) & ~(pmap_page_size - 1)))) {
6611 panic("%s: PTE range [%p, %p) in pmap %p crosses page table boundary", __func__, bpte, epte, pmap);
6612 }
6613
6614 num_removed = 0;
6615 num_unwired = 0;
6616 num_pte_changed = 0;
6617 num_external = 0;
6618 num_internal = 0;
6619 num_reusable = 0;
6620 num_compressed = 0;
6621 num_alt_internal = 0;
6622 num_alt_compressed = 0;
6623
6624 for (cpte = bpte; cpte < epte;
6625 cpte += 1, va += pmap_page_size) {
6626 pt_entry_t spte;
6627 boolean_t managed = FALSE;
6628
6629 spte = *cpte;
6630
6631 #if CONFIG_PGTRACE
6632 if (pgtrace_enabled) {
6633 pmap_pgtrace_remove_clone(pmap, pte_to_pa(spte), va);
6634 }
6635 #endif
6636
6637 while (!managed) {
6638 if (pmap != kernel_pmap &&
6639 (options & PMAP_OPTIONS_REMOVE) &&
6640 (ARM_PTE_IS_COMPRESSED(spte, cpte))) {
6641 /*
6642 * "pmap" must be locked at this point,
6643 * so this should not race with another
6644 * pmap_remove_range() or pmap_enter().
6645 */
6646
6647 /* one less "compressed"... */
6648 num_compressed++;
6649 if (spte & ARM_PTE_COMPRESSED_ALT) {
6650 /* ... but it used to be "ALTACCT" */
6651 num_alt_compressed++;
6652 }
6653
6654 /* clear marker */
6655 WRITE_PTE_FAST(cpte, ARM_PTE_TYPE_FAULT);
6656 /*
6657 * "refcnt" also accounts for
6658 * our "compressed" markers,
6659 * so let's update it here.
6660 */
6661 if (OSAddAtomic16(-1, (SInt16 *) &(ptep_get_info(cpte)->refcnt)) <= 0) {
6662 panic("pmap_remove_range_options: over-release of ptdp %p for pte %p", ptep_get_ptd(cpte), cpte);
6663 }
6664 spte = *cpte;
6665 }
6666 /*
6667 * It may be possible for the pte to transition from managed
6668 * to unmanaged in this timeframe; for now, elide the assert.
6669 * We should break out as a consequence of checking pa_valid.
6670 */
6671 //assert(!ARM_PTE_IS_COMPRESSED(spte));
6672 pa = pte_to_pa(spte);
6673 if (!pa_valid(pa)) {
6674 #if XNU_MONITOR
6675 unsigned int cacheattr = pmap_cache_attributes((ppnum_t)atop(pa));
6676 #endif
6677 #if XNU_MONITOR
6678 if (__improbable((cacheattr & PP_ATTR_MONITOR) &&
6679 (pte_to_xprr_perm(spte) != XPRR_KERN_RO_PERM) && !pmap_ppl_disable)) {
6680 panic("%s: attempt to remove mapping of writable PPL-protected I/O address 0x%llx",
6681 __func__, (uint64_t)pa);
6682 }
6683 #endif
6684 break;
6685 }
6686 pai = (int)pa_index(pa);
6687 LOCK_PVH(pai);
6688 spte = *cpte;
6689 pa = pte_to_pa(spte);
6690 if (pai == (int)pa_index(pa)) {
6691 managed = TRUE;
6692 break; // Leave pai locked as we will unlock it after we free the PV entry
6693 }
6694 UNLOCK_PVH(pai);
6695 }
6696
6697 if (ARM_PTE_IS_COMPRESSED(*cpte, cpte)) {
6698 /*
6699 * There used to be a valid mapping here but it
6700 * has already been removed when the page was
6701 * sent to the VM compressor, so nothing left to
6702 * remove now...
6703 */
6704 continue;
6705 }
6706
6707 /* remove the translation, do not flush the TLB */
6708 if (*cpte != ARM_PTE_TYPE_FAULT) {
6709 assertf(!ARM_PTE_IS_COMPRESSED(*cpte, cpte), "unexpected compressed pte %p (=0x%llx)", cpte, (uint64_t)*cpte);
6710 assertf((*cpte & ARM_PTE_TYPE_VALID) == ARM_PTE_TYPE, "invalid pte %p (=0x%llx)", cpte, (uint64_t)*cpte);
6711 #if MACH_ASSERT
6712 if (managed && (pmap != kernel_pmap) && (ptep_get_va(cpte) != va)) {
6713 panic("pmap_remove_range_options(): VA mismatch: cpte=%p ptd=%p pte=0x%llx va=0x%llx, cpte va=0x%llx",
6714 cpte, ptep_get_ptd(cpte), (uint64_t)*cpte, (uint64_t)va, (uint64_t)ptep_get_va(cpte));
6715 }
6716 #endif
6717 WRITE_PTE_FAST(cpte, ARM_PTE_TYPE_FAULT);
6718 num_pte_changed++;
6719 }
6720
6721 if ((spte != ARM_PTE_TYPE_FAULT) &&
6722 (pmap != kernel_pmap)) {
6723 assertf(!ARM_PTE_IS_COMPRESSED(spte, cpte), "unexpected compressed pte %p (=0x%llx)", cpte, (uint64_t)spte);
6724 assertf((spte & ARM_PTE_TYPE_VALID) == ARM_PTE_TYPE, "invalid pte %p (=0x%llx)", cpte, (uint64_t)spte);
6725 if (OSAddAtomic16(-1, (SInt16 *) &(ptep_get_info(cpte)->refcnt)) <= 0) {
6726 panic("pmap_remove_range_options: over-release of ptdp %p for pte %p", ptep_get_ptd(cpte), cpte);
6727 }
6728 if (rmv_cnt) {
6729 (*rmv_cnt)++;
6730 }
6731 }
6732
6733 if (pte_is_wired(spte)) {
6734 pte_set_wired(pmap, cpte, 0);
6735 num_unwired++;
6736 }
6737 /*
6738 * if not managed, we're done
6739 */
6740 if (!managed) {
6741 continue;
6742 }
6743 /*
6744 * find and remove the mapping from the chain for this
6745 * physical address.
6746 */
6747
6748 pmap_remove_pv(pmap, cpte, pai, &num_internal, &num_alt_internal, &num_reusable, &num_external);
6749
6750 UNLOCK_PVH(pai);
6751 num_removed++;
6752 }
6753
6754 /*
6755 * Update the counts
6756 */
6757 OSAddAtomic(-num_removed, (SInt32 *) &pmap->stats.resident_count);
6758 pmap_ledger_debit(pmap, task_ledgers.phys_mem, num_removed * pmap_page_size * PAGE_RATIO);
6759
6760 if (pmap != kernel_pmap) {
6761 /* update pmap stats... */
6762 OSAddAtomic(-num_unwired, (SInt32 *) &pmap->stats.wired_count);
6763 if (num_external) {
6764 __assert_only int32_t orig_external = OSAddAtomic(-num_external, &pmap->stats.external);
6765 PMAP_STATS_ASSERTF(orig_external >= num_external,
6766 pmap,
6767 "pmap=%p bpte=%p epte=%p num_external=%d stats.external=%d",
6768 pmap, bpte, epte, num_external, orig_external);
6769 }
6770 if (num_internal) {
6771 __assert_only int32_t orig_internal = OSAddAtomic(-num_internal, &pmap->stats.internal);
6772 PMAP_STATS_ASSERTF(orig_internal >= num_internal,
6773 pmap,
6774 "pmap=%p bpte=%p epte=%p num_internal=%d stats.internal=%d num_reusable=%d stats.reusable=%d",
6775 pmap, bpte, epte,
6776 num_internal, orig_internal,
6777 num_reusable, pmap->stats.reusable);
6778 }
6779 if (num_reusable) {
6780 __assert_only int32_t orig_reusable = OSAddAtomic(-num_reusable, &pmap->stats.reusable);
6781 PMAP_STATS_ASSERTF(orig_reusable >= num_reusable,
6782 pmap,
6783 "pmap=%p bpte=%p epte=%p num_internal=%d stats.internal=%d num_reusable=%d stats.reusable=%d",
6784 pmap, bpte, epte,
6785 num_internal, pmap->stats.internal,
6786 num_reusable, orig_reusable);
6787 }
6788 if (num_compressed) {
6789 __assert_only uint64_t orig_compressed = OSAddAtomic64(-num_compressed, &pmap->stats.compressed);
6790 PMAP_STATS_ASSERTF(orig_compressed >= num_compressed,
6791 pmap,
6792 "pmap=%p bpte=%p epte=%p num_compressed=%lld num_alt_compressed=%lld stats.compressed=%lld",
6793 pmap, bpte, epte, num_compressed, num_alt_compressed,
6794 orig_compressed);
6795 }
6796 /* ... and ledgers */
6797 pmap_ledger_debit(pmap, task_ledgers.wired_mem, (num_unwired) * pmap_page_size * PAGE_RATIO);
6798 pmap_ledger_debit(pmap, task_ledgers.internal, (num_internal) * pt_attr_page_size(pt_attr) * PAGE_RATIO);
6799 pmap_ledger_debit(pmap, task_ledgers.alternate_accounting, (num_alt_internal) * pt_attr_page_size(pt_attr) * PAGE_RATIO);
6800 pmap_ledger_debit(pmap, task_ledgers.alternate_accounting_compressed, (num_alt_compressed) * pt_attr_page_size(pt_attr) * PAGE_RATIO);
6801 pmap_ledger_debit(pmap, task_ledgers.internal_compressed, (num_compressed) * pt_attr_page_size(pt_attr) * PAGE_RATIO);
6802 /* make needed adjustments to phys_footprint */
6803 pmap_ledger_debit(pmap, task_ledgers.phys_footprint,
6804 ((num_internal -
6805 num_alt_internal) +
6806 (num_compressed -
6807 num_alt_compressed)) * pmap_page_size * PAGE_RATIO);
6808 }
6809
6810 /* flush the ptable entries we have written */
6811 if (num_pte_changed > 0) {
6812 FLUSH_PTE_RANGE_STRONG(bpte, epte);
6813 }
6814
6815 return num_pte_changed;
6816 }
6817
6818
6819 /*
6820 * Remove the given range of addresses
6821 * from the specified map.
6822 *
6823 * It is assumed that the start and end are properly
6824 * rounded to the hardware page size.
6825 */
6826 void
6827 pmap_remove(
6828 pmap_t pmap,
6829 vm_map_address_t start,
6830 vm_map_address_t end)
6831 {
6832 pmap_remove_options(pmap, start, end, PMAP_OPTIONS_REMOVE);
6833 }
6834
6835 MARK_AS_PMAP_TEXT static int
6836 pmap_remove_options_internal(
6837 pmap_t pmap,
6838 vm_map_address_t start,
6839 vm_map_address_t end,
6840 int options)
6841 {
6842 int remove_count = 0;
6843 pt_entry_t *bpte, *epte;
6844 pt_entry_t *pte_p;
6845 tt_entry_t *tte_p;
6846 uint32_t rmv_spte = 0;
6847 bool need_strong_sync = false;
6848 bool flush_tte = false;
6849
6850 if (__improbable(end < start)) {
6851 panic("%s: invalid address range %p, %p", __func__, (void*)start, (void*)end);
6852 }
6853
6854 VALIDATE_PMAP(pmap);
6855
6856 __unused const pt_attr_t * const pt_attr = pmap_get_pt_attr(pmap);
6857
6858 pmap_lock(pmap);
6859
6860 tte_p = pmap_tte(pmap, start);
6861
6862 if (tte_p == (tt_entry_t *) NULL) {
6863 goto done;
6864 }
6865
6866 if ((*tte_p & ARM_TTE_TYPE_MASK) == ARM_TTE_TYPE_TABLE) {
6867 pte_p = (pt_entry_t *) ttetokv(*tte_p);
6868 bpte = &pte_p[pte_index(pmap, pt_attr, start)];
6869 epte = bpte + ((end - start) >> pt_attr_leaf_shift(pt_attr));
6870
6871 remove_count += pmap_remove_range_options(pmap, start, bpte, epte,
6872 &rmv_spte, &need_strong_sync, options);
6873
6874 if (rmv_spte && (ptep_get_info(pte_p)->refcnt == 0) &&
6875 (pmap != kernel_pmap) && (pmap->nested == FALSE)) {
6876 pmap_tte_deallocate(pmap, tte_p, pt_attr_twig_level(pt_attr));
6877 flush_tte = true;
6878 }
6879 }
6880
6881 done:
6882 pmap_unlock(pmap);
6883
6884 if (remove_count > 0) {
6885 PMAP_UPDATE_TLBS(pmap, start, end, need_strong_sync);
6886 } else if (flush_tte) {
6887 pmap_get_pt_ops(pmap)->flush_tlb_tte_async(start, pmap);
6888 sync_tlb_flush();
6889 }
6890 return remove_count;
6891 }
6892
6893 void
6894 pmap_remove_options(
6895 pmap_t pmap,
6896 vm_map_address_t start,
6897 vm_map_address_t end,
6898 int options)
6899 {
6900 int remove_count = 0;
6901 vm_map_address_t va;
6902
6903 if (pmap == PMAP_NULL) {
6904 return;
6905 }
6906
6907 __unused const pt_attr_t * const pt_attr = pmap_get_pt_attr(pmap);
6908
6909 PMAP_TRACE(2, PMAP_CODE(PMAP__REMOVE) | DBG_FUNC_START,
6910 VM_KERNEL_ADDRHIDE(pmap), VM_KERNEL_ADDRHIDE(start),
6911 VM_KERNEL_ADDRHIDE(end));
6912
6913 #if MACH_ASSERT
6914 if ((start | end) & pt_attr_leaf_offmask(pt_attr)) {
6915 panic("pmap_remove_options() pmap %p start 0x%llx end 0x%llx\n",
6916 pmap, (uint64_t)start, (uint64_t)end);
6917 }
6918 if ((end < start) || (start < pmap->min) || (end > pmap->max)) {
6919 panic("pmap_remove_options(): invalid address range, pmap=%p, start=0x%llx, end=0x%llx\n",
6920 pmap, (uint64_t)start, (uint64_t)end);
6921 }
6922 #endif
6923
6924 /*
6925 * Invalidate the translation buffer first
6926 */
6927 va = start;
6928 while (va < end) {
6929 vm_map_address_t l;
6930
6931 l = ((va + pt_attr_twig_size(pt_attr)) & ~pt_attr_twig_offmask(pt_attr));
6932 if (l > end) {
6933 l = end;
6934 }
6935
6936 #if XNU_MONITOR
6937 remove_count += pmap_remove_options_ppl(pmap, va, l, options);
6938
6939 pmap_ledger_check_balance(pmap);
6940 #else
6941 remove_count += pmap_remove_options_internal(pmap, va, l, options);
6942 #endif
6943
6944 va = l;
6945 }
6946
6947 PMAP_TRACE(2, PMAP_CODE(PMAP__REMOVE) | DBG_FUNC_END);
6948 }
6949
6950
6951 /*
6952 * Remove phys addr if mapped in specified map
6953 */
6954 void
6955 pmap_remove_some_phys(
6956 __unused pmap_t map,
6957 __unused ppnum_t pn)
6958 {
6959 /* Implement to support working set code */
6960 }
6961
6962 void
6963 pmap_set_pmap(
6964 pmap_t pmap,
6965 #if !__ARM_USER_PROTECT__
6966 __unused
6967 #endif
6968 thread_t thread)
6969 {
6970 pmap_switch(pmap);
6971 #if __ARM_USER_PROTECT__
6972 thread->machine.uptw_ttb = ((unsigned int) pmap->ttep) | TTBR_SETUP;
6973 thread->machine.asid = pmap->hw_asid;
6974 #endif
6975 }
6976
6977 static void
6978 pmap_flush_core_tlb_asid_async(pmap_t pmap)
6979 {
6980 #if (__ARM_VMSA__ == 7)
6981 flush_core_tlb_asid_async(pmap->hw_asid);
6982 #else
6983 flush_core_tlb_asid_async(((uint64_t) pmap->hw_asid) << TLBI_ASID_SHIFT);
6984 #endif
6985 }
6986
6987 static inline bool
6988 pmap_user_ttb_is_clear(void)
6989 {
6990 #if (__ARM_VMSA__ > 7)
6991 return get_mmu_ttb() == (invalid_ttep & TTBR_BADDR_MASK);
6992 #else
6993 return get_mmu_ttb() == kernel_pmap->ttep;
6994 #endif
6995 }
6996
6997 MARK_AS_PMAP_TEXT static void
6998 pmap_switch_internal(
6999 pmap_t pmap)
7000 {
7001 VALIDATE_PMAP(pmap);
7002 pmap_cpu_data_t *cpu_data_ptr = pmap_get_cpu_data();
7003 uint16_t asid_index = pmap->hw_asid;
7004 bool do_asid_flush = false;
7005
7006 if (__improbable((asid_index == 0) && (pmap != kernel_pmap))) {
7007 panic("%s: attempt to activate pmap with invalid ASID %p", __func__, pmap);
7008 }
7009 #if __ARM_KERNEL_PROTECT__
7010 asid_index >>= 1;
7011 #endif
7012
7013 #if (__ARM_VMSA__ > 7)
7014 pmap_t last_nested_pmap = cpu_data_ptr->cpu_nested_pmap;
7015 __unused const pt_attr_t *last_nested_pmap_attr = cpu_data_ptr->cpu_nested_pmap_attr;
7016 __unused vm_map_address_t last_nested_region_addr = cpu_data_ptr->cpu_nested_region_addr;
7017 __unused vm_map_offset_t last_nested_region_size = cpu_data_ptr->cpu_nested_region_size;
7018 bool do_shared_region_flush = ((pmap != kernel_pmap) && (last_nested_pmap != NULL) && (pmap->nested_pmap != last_nested_pmap));
7019 bool break_before_make = do_shared_region_flush;
7020 #else
7021 bool do_shared_region_flush = false;
7022 bool break_before_make = false;
7023 #endif
7024
7025 if ((pmap_max_asids > MAX_HW_ASIDS) && (asid_index > 0)) {
7026 asid_index -= 1;
7027 pmap_update_plru(asid_index);
7028
7029 /* Paranoia. */
7030 assert(asid_index < (sizeof(cpu_data_ptr->cpu_sw_asids) / sizeof(*cpu_data_ptr->cpu_sw_asids)));
7031
7032 /* Extract the "virtual" bits of the ASIDs (which could cause us to alias). */
7033 uint8_t new_sw_asid = pmap->sw_asid;
7034 uint8_t last_sw_asid = cpu_data_ptr->cpu_sw_asids[asid_index];
7035
7036 if (new_sw_asid != last_sw_asid) {
7037 /*
7038 * If the virtual ASID of the new pmap does not match the virtual ASID
7039 * last seen on this CPU for the physical ASID (that was a mouthful),
7040 * then this switch runs the risk of aliasing. We need to flush the
7041 * TLB for this phyiscal ASID in this case.
7042 */
7043 cpu_data_ptr->cpu_sw_asids[asid_index] = new_sw_asid;
7044 do_asid_flush = true;
7045 break_before_make = true;
7046 }
7047 }
7048
7049 #if __ARM_MIXED_PAGE_SIZE__
7050 if (pmap_get_pt_attr(pmap)->pta_tcr_value != get_tcr()) {
7051 break_before_make = true;
7052 }
7053 #endif
7054 if (__improbable(break_before_make && !pmap_user_ttb_is_clear())) {
7055 PMAP_TRACE(1, PMAP_CODE(PMAP__CLEAR_USER_TTB), VM_KERNEL_ADDRHIDE(pmap), PMAP_VASID(pmap), pmap->hw_asid);
7056 pmap_clear_user_ttb_internal();
7057 }
7058
7059 #if (__ARM_VMSA__ > 7)
7060 /* If we're switching to a different nested pmap (i.e. shared region), we'll need
7061 * to flush the userspace mappings for that region. Those mappings are global
7062 * and will not be protected by the ASID. It should also be cheaper to flush the
7063 * entire local TLB rather than to do a broadcast MMU flush by VA region. */
7064 if (__improbable(do_shared_region_flush)) {
7065 #if __ARM_RANGE_TLBI__
7066 uint64_t page_shift_prev = pt_attr_leaf_shift(last_nested_pmap_attr);
7067 vm_map_offset_t npages_prev = last_nested_region_size >> page_shift_prev;
7068
7069 /* NOTE: here we flush the global TLB entries for the previous nested region only.
7070 * There may still be non-global entries that overlap with the incoming pmap's
7071 * nested region. On Apple SoCs at least, this is acceptable. Those non-global entries
7072 * must necessarily belong to a different ASID than the incoming pmap, or they would
7073 * be flushed in the do_asid_flush case below. This will prevent them from conflicting
7074 * with the incoming pmap's nested region. However, the ARMv8 ARM is not crystal clear
7075 * on whether such a global/inactive-nonglobal overlap is acceptable, so we may need
7076 * to consider additional invalidation here in the future. */
7077 if (npages_prev <= ARM64_TLB_RANGE_PAGES) {
7078 flush_core_tlb_allrange_async(generate_rtlbi_param((ppnum_t)npages_prev, 0, last_nested_region_addr, page_shift_prev));
7079 } else {
7080 do_asid_flush = false;
7081 flush_core_tlb_async();
7082 }
7083 #else
7084 do_asid_flush = false;
7085 flush_core_tlb_async();
7086 #endif // __ARM_RANGE_TLBI__
7087 }
7088 #endif // (__ARM_VMSA__ > 7)
7089 if (__improbable(do_asid_flush)) {
7090 pmap_flush_core_tlb_asid_async(pmap);
7091 #if DEVELOPMENT || DEBUG
7092 os_atomic_inc(&pmap_asid_flushes, relaxed);
7093 #endif
7094 }
7095 if (__improbable(do_asid_flush || do_shared_region_flush)) {
7096 sync_tlb_flush();
7097 }
7098
7099 pmap_switch_user_ttb_internal(pmap);
7100 }
7101
7102 void
7103 pmap_switch(
7104 pmap_t pmap)
7105 {
7106 PMAP_TRACE(1, PMAP_CODE(PMAP__SWITCH) | DBG_FUNC_START, VM_KERNEL_ADDRHIDE(pmap), PMAP_VASID(pmap), pmap->hw_asid);
7107 #if XNU_MONITOR
7108 pmap_switch_ppl(pmap);
7109 #else
7110 pmap_switch_internal(pmap);
7111 #endif
7112 PMAP_TRACE(1, PMAP_CODE(PMAP__SWITCH) | DBG_FUNC_END);
7113 }
7114
7115 void
7116 pmap_require(pmap_t pmap)
7117 {
7118 #if XNU_MONITOR
7119 VALIDATE_PMAP(pmap);
7120 #else
7121 if (pmap != kernel_pmap) {
7122 zone_id_require(ZONE_ID_PMAP, sizeof(struct pmap), pmap);
7123 }
7124 #endif
7125 }
7126
7127 void
7128 pmap_page_protect(
7129 ppnum_t ppnum,
7130 vm_prot_t prot)
7131 {
7132 pmap_page_protect_options(ppnum, prot, 0, NULL);
7133 }
7134
7135 /*
7136 * Routine: pmap_page_protect_options
7137 *
7138 * Function:
7139 * Lower the permission for all mappings to a given
7140 * page.
7141 */
7142 MARK_AS_PMAP_TEXT static void
7143 pmap_page_protect_options_with_flush_range(
7144 ppnum_t ppnum,
7145 vm_prot_t prot,
7146 unsigned int options,
7147 pmap_tlb_flush_range_t *flush_range)
7148 {
7149 pmap_paddr_t phys = ptoa(ppnum);
7150 pv_entry_t **pv_h;
7151 pv_entry_t **pve_pp;
7152 pv_entry_t *pve_p;
7153 pv_entry_t *pveh_p;
7154 pv_entry_t *pvet_p;
7155 pt_entry_t *pte_p;
7156 pv_entry_t *new_pve_p;
7157 pt_entry_t *new_pte_p;
7158 vm_offset_t pvh_flags;
7159 int pai;
7160 boolean_t remove;
7161 boolean_t set_NX;
7162 boolean_t tlb_flush_needed = FALSE;
7163 unsigned int pvh_cnt = 0;
7164
7165 assert(ppnum != vm_page_fictitious_addr);
7166
7167 /* Only work with managed pages. */
7168 if (!pa_valid(phys)) {
7169 return;
7170 }
7171
7172 /*
7173 * Determine the new protection.
7174 */
7175 switch (prot) {
7176 case VM_PROT_ALL:
7177 return; /* nothing to do */
7178 case VM_PROT_READ:
7179 case VM_PROT_READ | VM_PROT_EXECUTE:
7180 remove = FALSE;
7181 break;
7182 default:
7183 remove = TRUE;
7184 break;
7185 }
7186
7187 pai = (int)pa_index(phys);
7188 LOCK_PVH(pai);
7189 pv_h = pai_to_pvh(pai);
7190 pvh_flags = pvh_get_flags(pv_h);
7191
7192 #if XNU_MONITOR
7193 if (__improbable(remove && (pvh_flags & PVH_FLAG_LOCKDOWN))) {
7194 panic("%d is locked down (%#llx), cannot remove", pai, pvh_get_flags(pv_h));
7195 }
7196 #endif
7197
7198 pte_p = PT_ENTRY_NULL;
7199 pve_p = PV_ENTRY_NULL;
7200 pve_pp = pv_h;
7201 pveh_p = PV_ENTRY_NULL;
7202 pvet_p = PV_ENTRY_NULL;
7203 new_pve_p = PV_ENTRY_NULL;
7204 new_pte_p = PT_ENTRY_NULL;
7205 if (pvh_test_type(pv_h, PVH_TYPE_PTEP)) {
7206 pte_p = pvh_ptep(pv_h);
7207 } else if (pvh_test_type(pv_h, PVH_TYPE_PVEP)) {
7208 pve_p = pvh_list(pv_h);
7209 pveh_p = pve_p;
7210 }
7211
7212 while ((pve_p != PV_ENTRY_NULL) || (pte_p != PT_ENTRY_NULL)) {
7213 vm_map_address_t va = 0;
7214 pmap_t pmap = NULL;
7215 pt_entry_t tmplate = ARM_PTE_TYPE_FAULT;
7216 boolean_t update = FALSE;
7217
7218 if (pve_p != PV_ENTRY_NULL) {
7219 pte_p = pve_get_ptep(pve_p);
7220 }
7221
7222 #ifdef PVH_FLAG_IOMMU
7223 if ((vm_offset_t)pte_p & PVH_FLAG_IOMMU) {
7224 #if XNU_MONITOR
7225 if (__improbable(pvh_flags & PVH_FLAG_LOCKDOWN)) {
7226 panic("pmap_page_protect: ppnum 0x%x locked down, cannot be owned by iommu 0x%llx, pve_p=%p",
7227 ppnum, (uint64_t)pte_p & ~PVH_FLAG_IOMMU, pve_p);
7228 }
7229 #endif
7230 if (remove) {
7231 if (options & PMAP_OPTIONS_COMPRESSOR) {
7232 panic("pmap_page_protect: attempt to compress ppnum 0x%x owned by iommu 0x%llx, pve_p=%p",
7233 ppnum, (uint64_t)pte_p & ~PVH_FLAG_IOMMU, pve_p);
7234 }
7235 if (pve_p != PV_ENTRY_NULL) {
7236 pv_entry_t *temp_pve_p = PVE_NEXT_PTR(pve_next(pve_p));
7237 pvh_remove(pv_h, pve_pp, pve_p);
7238 pveh_p = pvh_list(pv_h);
7239 pve_next(pve_p) = new_pve_p;
7240 new_pve_p = pve_p;
7241 pve_p = temp_pve_p;
7242 continue;
7243 } else {
7244 new_pte_p = pte_p;
7245 break;
7246 }
7247 }
7248 goto protect_skip_pve;
7249 }
7250 #endif
7251 pmap = ptep_get_pmap(pte_p);
7252 va = ptep_get_va(pte_p);
7253
7254 if (pte_p == PT_ENTRY_NULL) {
7255 panic("pmap_page_protect: pmap=%p prot=%d options=%u, pv_h=%p, pveh_p=%p, pve_p=%p, va=0x%llx ppnum: 0x%x\n",
7256 pmap, prot, options, pv_h, pveh_p, pve_p, (uint64_t)va, ppnum);
7257 } else if ((pmap == NULL) || (atop(pte_to_pa(*pte_p)) != ppnum)) {
7258 #if MACH_ASSERT
7259 if (kern_feature_override(KF_PMAPV_OVRD) == FALSE) {
7260 pv_entry_t *check_pve_p = pveh_p;
7261 while (check_pve_p != PV_ENTRY_NULL) {
7262 if ((check_pve_p != pve_p) && (pve_get_ptep(check_pve_p) == pte_p)) {
7263 panic("pmap_page_protect: duplicate pve entry pte_p=%p pmap=%p prot=%d options=%u, pv_h=%p, pveh_p=%p, pve_p=%p, pte=0x%llx, va=0x%llx ppnum: 0x%x\n",
7264 pte_p, pmap, prot, options, pv_h, pveh_p, pve_p, (uint64_t)*pte_p, (uint64_t)va, ppnum);
7265 }
7266 check_pve_p = PVE_NEXT_PTR(pve_next(check_pve_p));
7267 }
7268 }
7269 #endif
7270 panic("pmap_page_protect: bad pve entry pte_p=%p pmap=%p prot=%d options=%u, pv_h=%p, pveh_p=%p, pve_p=%p, pte=0x%llx, va=0x%llx ppnum: 0x%x\n",
7271 pte_p, pmap, prot, options, pv_h, pveh_p, pve_p, (uint64_t)*pte_p, (uint64_t)va, ppnum);
7272 }
7273
7274 #if DEVELOPMENT || DEBUG
7275 if ((prot & VM_PROT_EXECUTE) || !nx_enabled || !pmap->nx_enabled)
7276 #else
7277 if ((prot & VM_PROT_EXECUTE))
7278 #endif
7279 {
7280 set_NX = FALSE;
7281 } else {
7282 set_NX = TRUE;
7283 }
7284
7285 /* Remove the mapping if new protection is NONE */
7286 if (remove) {
7287 boolean_t is_altacct = FALSE;
7288 const pt_attr_t * const pt_attr = pmap_get_pt_attr(pmap);
7289 pt_entry_t spte = *pte_p;
7290
7291 if (IS_ALTACCT_PAGE(pai, pve_p)) {
7292 is_altacct = TRUE;
7293 } else {
7294 is_altacct = FALSE;
7295 }
7296
7297 if (pte_is_wired(spte)) {
7298 pte_set_wired(pmap, pte_p, 0);
7299 spte = *pte_p;
7300 if (pmap != kernel_pmap) {
7301 pmap_ledger_debit(pmap, task_ledgers.wired_mem, pt_attr_page_size(pt_attr) * PAGE_RATIO);
7302 OSAddAtomic(-1, (SInt32 *) &pmap->stats.wired_count);
7303 }
7304 }
7305
7306 if (spte != ARM_PTE_TYPE_FAULT &&
7307 pmap != kernel_pmap &&
7308 (options & PMAP_OPTIONS_COMPRESSOR) &&
7309 IS_INTERNAL_PAGE(pai)) {
7310 assert(!ARM_PTE_IS_COMPRESSED(*pte_p, pte_p));
7311 /* mark this PTE as having been "compressed" */
7312 tmplate = ARM_PTE_COMPRESSED;
7313 if (is_altacct) {
7314 tmplate |= ARM_PTE_COMPRESSED_ALT;
7315 is_altacct = TRUE;
7316 }
7317 } else {
7318 tmplate = ARM_PTE_TYPE_FAULT;
7319 }
7320
7321 /**
7322 * The entry must be written before the refcnt is decremented to
7323 * prevent use-after-free races with code paths that deallocate page
7324 * tables based on a zero refcnt.
7325 */
7326 if (spte != tmplate) {
7327 WRITE_PTE_STRONG(pte_p, tmplate);
7328 update = TRUE;
7329 }
7330
7331 if ((spte != ARM_PTE_TYPE_FAULT) &&
7332 (tmplate == ARM_PTE_TYPE_FAULT) &&
7333 (pmap != kernel_pmap)) {
7334 if (OSAddAtomic16(-1, (SInt16 *) &(ptep_get_info(pte_p)->refcnt)) <= 0) {
7335 panic("pmap_page_protect_options(): over-release of ptdp %p for pte %p\n", ptep_get_ptd(pte_p), pte_p);
7336 }
7337 }
7338
7339 pvh_cnt++;
7340 pmap_ledger_debit(pmap, task_ledgers.phys_mem, pt_attr_page_size(pt_attr) * PAGE_RATIO);
7341 OSAddAtomic(-1, (SInt32 *) &pmap->stats.resident_count);
7342
7343 #if MACH_ASSERT
7344 /*
7345 * We only ever compress internal pages.
7346 */
7347 if (options & PMAP_OPTIONS_COMPRESSOR) {
7348 assert(IS_INTERNAL_PAGE(pai));
7349 }
7350 #endif
7351
7352 if (pmap != kernel_pmap) {
7353 if (IS_REUSABLE_PAGE(pai) &&
7354 IS_INTERNAL_PAGE(pai) &&
7355 !is_altacct) {
7356 __assert_only int32_t orig_reusable = OSAddAtomic(-1, &pmap->stats.reusable);
7357 PMAP_STATS_ASSERTF(orig_reusable > 0, pmap, "stats.reusable %d", orig_reusable);
7358 } else if (IS_INTERNAL_PAGE(pai)) {
7359 __assert_only int32_t orig_internal = OSAddAtomic(-1, &pmap->stats.internal);
7360 PMAP_STATS_ASSERTF(orig_internal > 0, pmap, "stats.internal %d", orig_internal);
7361 } else {
7362 __assert_only int32_t orig_external = OSAddAtomic(-1, &pmap->stats.external);
7363 PMAP_STATS_ASSERTF(orig_external > 0, pmap, "stats.external %d", orig_external);
7364 }
7365 if ((options & PMAP_OPTIONS_COMPRESSOR) &&
7366 IS_INTERNAL_PAGE(pai)) {
7367 /* adjust "compressed" stats */
7368 OSAddAtomic64(+1, &pmap->stats.compressed);
7369 PMAP_STATS_PEAK(pmap->stats.compressed);
7370 pmap->stats.compressed_lifetime++;
7371 }
7372
7373 if (IS_ALTACCT_PAGE(pai, pve_p)) {
7374 assert(IS_INTERNAL_PAGE(pai));
7375 pmap_ledger_debit(pmap, task_ledgers.internal, pt_attr_page_size(pt_attr) * PAGE_RATIO);
7376 pmap_ledger_debit(pmap, task_ledgers.alternate_accounting, pt_attr_page_size(pt_attr) * PAGE_RATIO);
7377 if (options & PMAP_OPTIONS_COMPRESSOR) {
7378 pmap_ledger_credit(pmap, task_ledgers.internal_compressed, pt_attr_page_size(pt_attr) * PAGE_RATIO);
7379 pmap_ledger_credit(pmap, task_ledgers.alternate_accounting_compressed, pt_attr_page_size(pt_attr) * PAGE_RATIO);
7380 }
7381
7382 /*
7383 * Cleanup our marker before
7384 * we free this pv_entry.
7385 */
7386 CLR_ALTACCT_PAGE(pai, pve_p);
7387 } else if (IS_REUSABLE_PAGE(pai)) {
7388 assert(IS_INTERNAL_PAGE(pai));
7389 if (options & PMAP_OPTIONS_COMPRESSOR) {
7390 pmap_ledger_credit(pmap, task_ledgers.internal_compressed, pt_attr_page_size(pt_attr) * PAGE_RATIO);
7391 /* was not in footprint, but is now */
7392 pmap_ledger_credit(pmap, task_ledgers.phys_footprint, pt_attr_page_size(pt_attr) * PAGE_RATIO);
7393 }
7394 } else if (IS_INTERNAL_PAGE(pai)) {
7395 pmap_ledger_debit(pmap, task_ledgers.internal, pt_attr_page_size(pt_attr) * PAGE_RATIO);
7396
7397 /*
7398 * Update all stats related to physical footprint, which only
7399 * deals with internal pages.
7400 */
7401 if (options & PMAP_OPTIONS_COMPRESSOR) {
7402 /*
7403 * This removal is only being done so we can send this page to
7404 * the compressor; therefore it mustn't affect total task footprint.
7405 */
7406 pmap_ledger_credit(pmap, task_ledgers.internal_compressed, pt_attr_page_size(pt_attr) * PAGE_RATIO);
7407 } else {
7408 /*
7409 * This internal page isn't going to the compressor, so adjust stats to keep
7410 * phys_footprint up to date.
7411 */
7412 pmap_ledger_debit(pmap, task_ledgers.phys_footprint, pt_attr_page_size(pt_attr) * PAGE_RATIO);
7413 }
7414 } else {
7415 /* external page: no impact on ledgers */
7416 }
7417 }
7418
7419 if (pve_p != PV_ENTRY_NULL) {
7420 assert(pve_next(pve_p) == PVE_NEXT_PTR(pve_next(pve_p)));
7421 }
7422 } else {
7423 pt_entry_t spte;
7424 const pt_attr_t *const pt_attr = pmap_get_pt_attr(pmap);
7425
7426 spte = *pte_p;
7427
7428 if (pmap == kernel_pmap) {
7429 tmplate = ((spte & ~ARM_PTE_APMASK) | ARM_PTE_AP(AP_RONA));
7430 } else {
7431 tmplate = ((spte & ~ARM_PTE_APMASK) | pt_attr_leaf_ro(pt_attr));
7432 }
7433
7434 pte_set_was_writeable(tmplate, false);
7435 /*
7436 * While the naive implementation of this would serve to add execute
7437 * permission, this is not how the VM uses this interface, or how
7438 * x86_64 implements it. So ignore requests to add execute permissions.
7439 */
7440 if (set_NX) {
7441 tmplate |= pt_attr_leaf_xn(pt_attr);
7442 }
7443
7444 #if __APRR_SUPPORTED__
7445 /**
7446 * Enforce the policy that PPL xPRR mappings can't have their permissions changed after the fact.
7447 *
7448 * Certain userspace applications (e.g., CrashReporter and debuggers) have a need to remap JIT mappings to
7449 * RO/RX, so we explicitly allow that. This doesn't compromise the security of the PPL since this only
7450 * affects userspace mappings, so allow reducing permissions on JIT mappings to RO/RX. This is similar for
7451 * user execute-only mappings.
7452 */
7453 if (__improbable(is_pte_xprr_protected(pmap, spte) && (pte_to_xprr_perm(spte) != XPRR_USER_JIT_PERM)
7454 && (pte_to_xprr_perm(spte) != XPRR_USER_XO_PERM))) {
7455 panic("%s: modifying an xPRR mapping pte_p=%p pmap=%p prot=%d options=%u, pv_h=%p, pveh_p=%p, pve_p=%p, pte=0x%llx, tmplate=0x%llx, va=0x%llx ppnum: 0x%x",
7456 __func__, pte_p, pmap, prot, options, pv_h, pveh_p, pve_p, (uint64_t)spte, (uint64_t)tmplate, (uint64_t)va, ppnum);
7457 }
7458
7459 /**
7460 * Enforce the policy that we can't create a new PPL protected mapping here except for user execute-only
7461 * mappings (which doesn't compromise the security of the PPL since it's userspace-specific).
7462 */
7463 if (__improbable(is_pte_xprr_protected(pmap, tmplate) && (pte_to_xprr_perm(tmplate) != XPRR_USER_XO_PERM))) {
7464 panic("%s: creating an xPRR mapping pte_p=%p pmap=%p prot=%d options=%u, pv_h=%p, pveh_p=%p, pve_p=%p, pte=0x%llx, tmplate=0x%llx, va=0x%llx ppnum: 0x%x",
7465 __func__, pte_p, pmap, prot, options, pv_h, pveh_p, pve_p, (uint64_t)spte, (uint64_t)tmplate, (uint64_t)va, ppnum);
7466 }
7467 #endif /* __APRR_SUPPORTED__*/
7468
7469 if (*pte_p != ARM_PTE_TYPE_FAULT &&
7470 !ARM_PTE_IS_COMPRESSED(*pte_p, pte_p) &&
7471 *pte_p != tmplate) {
7472 WRITE_PTE_STRONG(pte_p, tmplate);
7473 update = TRUE;
7474 }
7475 }
7476
7477 /* Invalidate TLBs for all CPUs using it */
7478 if (update) {
7479 if (remove || !flush_range ||
7480 ((flush_range->ptfr_pmap != pmap) || va >= flush_range->ptfr_end || va < flush_range->ptfr_start)) {
7481 pmap_get_pt_ops(pmap)->flush_tlb_region_async(va,
7482 pt_attr_page_size(pmap_get_pt_attr(pmap)) * PAGE_RATIO, pmap);
7483 }
7484 tlb_flush_needed = TRUE;
7485 }
7486
7487 #ifdef PVH_FLAG_IOMMU
7488 protect_skip_pve:
7489 #endif
7490 pte_p = PT_ENTRY_NULL;
7491 pvet_p = pve_p;
7492 if (pve_p != PV_ENTRY_NULL) {
7493 if (remove) {
7494 assert(pve_next(pve_p) == PVE_NEXT_PTR(pve_next(pve_p)));
7495 }
7496 pve_pp = pve_link_field(pve_p);
7497 pve_p = PVE_NEXT_PTR(pve_next(pve_p));
7498 }
7499 }
7500
7501 #ifdef PVH_FLAG_EXEC
7502 if (remove && (pvh_get_flags(pv_h) & PVH_FLAG_EXEC)) {
7503 pmap_set_ptov_ap(pai, AP_RWNA, tlb_flush_needed);
7504 }
7505 #endif
7506 /* if we removed a bunch of entries, take care of them now */
7507 if (remove) {
7508 if (new_pve_p != PV_ENTRY_NULL) {
7509 pvh_update_head(pv_h, new_pve_p, PVH_TYPE_PVEP);
7510 pvh_set_flags(pv_h, pvh_flags);
7511 } else if (new_pte_p != PT_ENTRY_NULL) {
7512 pvh_update_head(pv_h, new_pte_p, PVH_TYPE_PTEP);
7513 pvh_set_flags(pv_h, pvh_flags);
7514 } else {
7515 pvh_update_head(pv_h, PV_ENTRY_NULL, PVH_TYPE_NULL);
7516 }
7517 }
7518
7519 UNLOCK_PVH(pai);
7520
7521 if (flush_range && tlb_flush_needed) {
7522 if (!remove) {
7523 flush_range->ptfr_flush_needed = true;
7524 tlb_flush_needed = FALSE;
7525 }
7526 }
7527 if (tlb_flush_needed) {
7528 sync_tlb_flush();
7529 }
7530
7531 if (remove && (pvet_p != PV_ENTRY_NULL)) {
7532 pv_list_free(pveh_p, pvet_p, pvh_cnt, pv_kern_low_water_mark);
7533 }
7534 }
7535
7536 MARK_AS_PMAP_TEXT static void
7537 pmap_page_protect_options_internal(
7538 ppnum_t ppnum,
7539 vm_prot_t prot,
7540 unsigned int options)
7541 {
7542 pmap_page_protect_options_with_flush_range(ppnum, prot, options, NULL);
7543 }
7544
7545 void
7546 pmap_page_protect_options(
7547 ppnum_t ppnum,
7548 vm_prot_t prot,
7549 unsigned int options,
7550 __unused void *arg)
7551 {
7552 pmap_paddr_t phys = ptoa(ppnum);
7553
7554 assert(ppnum != vm_page_fictitious_addr);
7555
7556 /* Only work with managed pages. */
7557 if (!pa_valid(phys)) {
7558 return;
7559 }
7560
7561 /*
7562 * Determine the new protection.
7563 */
7564 if (prot == VM_PROT_ALL) {
7565 return; /* nothing to do */
7566 }
7567
7568 PMAP_TRACE(2, PMAP_CODE(PMAP__PAGE_PROTECT) | DBG_FUNC_START, ppnum, prot);
7569
7570 #if XNU_MONITOR
7571 pmap_page_protect_options_ppl(ppnum, prot, options);
7572 #else
7573 pmap_page_protect_options_internal(ppnum, prot, options);
7574 #endif
7575
7576 PMAP_TRACE(2, PMAP_CODE(PMAP__PAGE_PROTECT) | DBG_FUNC_END);
7577 }
7578
7579
7580 #if __has_feature(ptrauth_calls) && defined(XNU_TARGET_OS_OSX)
7581 MARK_AS_PMAP_TEXT void
7582 pmap_disable_user_jop_internal(pmap_t pmap)
7583 {
7584 if (pmap == kernel_pmap) {
7585 panic("%s: called with kernel_pmap\n", __func__);
7586 }
7587 pmap->disable_jop = true;
7588 }
7589
7590 void
7591 pmap_disable_user_jop(pmap_t pmap)
7592 {
7593 #if XNU_MONITOR
7594 pmap_disable_user_jop_ppl(pmap);
7595 #else
7596 pmap_disable_user_jop_internal(pmap);
7597 #endif
7598 }
7599 #endif /* __has_feature(ptrauth_calls) && defined(XNU_TARGET_OS_OSX) */
7600
7601 /*
7602 * Indicates if the pmap layer enforces some additional restrictions on the
7603 * given set of protections.
7604 */
7605 bool
7606 pmap_has_prot_policy(__unused pmap_t pmap, __unused bool translated_allow_execute, __unused vm_prot_t prot)
7607 {
7608 return false;
7609 }
7610
7611 /*
7612 * Set the physical protection on the
7613 * specified range of this map as requested.
7614 * VERY IMPORTANT: Will not increase permissions.
7615 * VERY IMPORTANT: Only pmap_enter() is allowed to grant permissions.
7616 */
7617 void
7618 pmap_protect(
7619 pmap_t pmap,
7620 vm_map_address_t b,
7621 vm_map_address_t e,
7622 vm_prot_t prot)
7623 {
7624 pmap_protect_options(pmap, b, e, prot, 0, NULL);
7625 }
7626
7627 MARK_AS_PMAP_TEXT static void
7628 pmap_protect_options_internal(
7629 pmap_t pmap,
7630 vm_map_address_t start,
7631 vm_map_address_t end,
7632 vm_prot_t prot,
7633 unsigned int options,
7634 __unused void *args)
7635 {
7636 const pt_attr_t *const pt_attr = pmap_get_pt_attr(pmap);
7637 tt_entry_t *tte_p;
7638 pt_entry_t *bpte_p, *epte_p;
7639 pt_entry_t *pte_p;
7640 boolean_t set_NX = TRUE;
7641 #if (__ARM_VMSA__ > 7)
7642 boolean_t set_XO = FALSE;
7643 #endif
7644 boolean_t should_have_removed = FALSE;
7645 bool need_strong_sync = false;
7646
7647 if (__improbable((end < start) || (end > ((start + pt_attr_twig_size(pt_attr)) & ~pt_attr_twig_offmask(pt_attr))))) {
7648 panic("%s: invalid address range %p, %p", __func__, (void*)start, (void*)end);
7649 }
7650
7651 #if DEVELOPMENT || DEBUG
7652 if (options & PMAP_OPTIONS_PROTECT_IMMEDIATE) {
7653 if ((prot & VM_PROT_ALL) == VM_PROT_NONE) {
7654 should_have_removed = TRUE;
7655 }
7656 } else
7657 #endif
7658 {
7659 /* Determine the new protection. */
7660 switch (prot) {
7661 #if (__ARM_VMSA__ > 7)
7662 case VM_PROT_EXECUTE:
7663 set_XO = TRUE;
7664 OS_FALLTHROUGH;
7665 #endif
7666 case VM_PROT_READ:
7667 case VM_PROT_READ | VM_PROT_EXECUTE:
7668 break;
7669 case VM_PROT_READ | VM_PROT_WRITE:
7670 case VM_PROT_ALL:
7671 return; /* nothing to do */
7672 default:
7673 should_have_removed = TRUE;
7674 }
7675 }
7676
7677 if (should_have_removed) {
7678 panic("%s: should have been a remove operation, "
7679 "pmap=%p, start=%p, end=%p, prot=%#x, options=%#x, args=%p",
7680 __FUNCTION__,
7681 pmap, (void *)start, (void *)end, prot, options, args);
7682 }
7683
7684 #if DEVELOPMENT || DEBUG
7685 if ((prot & VM_PROT_EXECUTE) || !nx_enabled || !pmap->nx_enabled)
7686 #else
7687 if ((prot & VM_PROT_EXECUTE))
7688 #endif
7689 {
7690 set_NX = FALSE;
7691 } else {
7692 set_NX = TRUE;
7693 }
7694
7695 VALIDATE_PMAP(pmap);
7696 pmap_lock(pmap);
7697
7698 tte_p = pmap_tte(pmap, start);
7699
7700 if ((tte_p != (tt_entry_t *) NULL) && (*tte_p & ARM_TTE_TYPE_MASK) == ARM_TTE_TYPE_TABLE) {
7701 bpte_p = (pt_entry_t *) ttetokv(*tte_p);
7702 bpte_p = &bpte_p[pte_index(pmap, pt_attr, start)];
7703 epte_p = bpte_p + ((end - start) >> pt_attr_leaf_shift(pt_attr));
7704 pte_p = bpte_p;
7705
7706 for (pte_p = bpte_p;
7707 pte_p < epte_p;
7708 pte_p += PAGE_RATIO) {
7709 pt_entry_t spte;
7710 #if DEVELOPMENT || DEBUG
7711 boolean_t force_write = FALSE;
7712 #endif
7713
7714 spte = *pte_p;
7715
7716 if ((spte == ARM_PTE_TYPE_FAULT) ||
7717 ARM_PTE_IS_COMPRESSED(spte, pte_p)) {
7718 continue;
7719 }
7720
7721 pmap_paddr_t pa;
7722 int pai = 0;
7723 boolean_t managed = FALSE;
7724
7725 while (!managed) {
7726 /*
7727 * It may be possible for the pte to transition from managed
7728 * to unmanaged in this timeframe; for now, elide the assert.
7729 * We should break out as a consequence of checking pa_valid.
7730 */
7731 // assert(!ARM_PTE_IS_COMPRESSED(spte));
7732 pa = pte_to_pa(spte);
7733 if (!pa_valid(pa)) {
7734 break;
7735 }
7736 pai = (int)pa_index(pa);
7737 LOCK_PVH(pai);
7738 spte = *pte_p;
7739 pa = pte_to_pa(spte);
7740 if (pai == (int)pa_index(pa)) {
7741 managed = TRUE;
7742 break; // Leave the PVH locked as we will unlock it after we free the PTE
7743 }
7744 UNLOCK_PVH(pai);
7745 }
7746
7747 if ((spte == ARM_PTE_TYPE_FAULT) ||
7748 ARM_PTE_IS_COMPRESSED(spte, pte_p)) {
7749 continue;
7750 }
7751
7752 pt_entry_t tmplate;
7753
7754 if (pmap == kernel_pmap) {
7755 #if DEVELOPMENT || DEBUG
7756 if ((options & PMAP_OPTIONS_PROTECT_IMMEDIATE) && (prot & VM_PROT_WRITE)) {
7757 force_write = TRUE;
7758 tmplate = ((spte & ~ARM_PTE_APMASK) | ARM_PTE_AP(AP_RWNA));
7759 } else
7760 #endif
7761 {
7762 tmplate = ((spte & ~ARM_PTE_APMASK) | ARM_PTE_AP(AP_RONA));
7763 }
7764 } else {
7765 #if DEVELOPMENT || DEBUG
7766 if ((options & PMAP_OPTIONS_PROTECT_IMMEDIATE) && (prot & VM_PROT_WRITE)) {
7767 force_write = TRUE;
7768 tmplate = ((spte & ~ARM_PTE_APMASK) | pt_attr_leaf_rw(pt_attr));
7769 } else
7770 #endif
7771 {
7772 tmplate = ((spte & ~ARM_PTE_APMASK) | pt_attr_leaf_ro(pt_attr));
7773 }
7774 }
7775
7776 /*
7777 * XXX Removing "NX" would
7778 * grant "execute" access
7779 * immediately, bypassing any
7780 * checks VM might want to do
7781 * in its soft fault path.
7782 * pmap_protect() and co. are
7783 * not allowed to increase
7784 * access permissions.
7785 */
7786 if (set_NX) {
7787 tmplate |= pt_attr_leaf_xn(pt_attr);
7788 } else {
7789 #if (__ARM_VMSA__ > 7)
7790 if (pmap == kernel_pmap) {
7791 /* do NOT clear "PNX"! */
7792 tmplate |= ARM_PTE_NX;
7793 } else {
7794 /* do NOT clear "NX"! */
7795 tmplate |= pt_attr_leaf_x(pt_attr);
7796 if (set_XO) {
7797 tmplate &= ~ARM_PTE_APMASK;
7798 tmplate |= pt_attr_leaf_rona(pt_attr);
7799 }
7800 }
7801 #endif
7802 }
7803
7804 #if DEVELOPMENT || DEBUG
7805 if (force_write) {
7806 /*
7807 * TODO: Run CS/Monitor checks here.
7808 */
7809 if (managed) {
7810 /*
7811 * We are marking the page as writable,
7812 * so we consider it to be modified and
7813 * referenced.
7814 */
7815 pa_set_bits(pa, PP_ATTR_REFERENCED | PP_ATTR_MODIFIED);
7816 tmplate |= ARM_PTE_AF;
7817
7818 if (IS_REFFAULT_PAGE(pai)) {
7819 CLR_REFFAULT_PAGE(pai);
7820 }
7821
7822 if (IS_MODFAULT_PAGE(pai)) {
7823 CLR_MODFAULT_PAGE(pai);
7824 }
7825 }
7826 } else if (options & PMAP_OPTIONS_PROTECT_IMMEDIATE) {
7827 /*
7828 * An immediate request for anything other than
7829 * write should still mark the page as
7830 * referenced if managed.
7831 */
7832 if (managed) {
7833 pa_set_bits(pa, PP_ATTR_REFERENCED);
7834 tmplate |= ARM_PTE_AF;
7835
7836 if (IS_REFFAULT_PAGE(pai)) {
7837 CLR_REFFAULT_PAGE(pai);
7838 }
7839 }
7840 }
7841 #endif
7842
7843 /* We do not expect to write fast fault the entry. */
7844 pte_set_was_writeable(tmplate, false);
7845
7846 #if __APRR_SUPPORTED__
7847 /**
7848 * Enforce the policy that PPL xPRR mappings can't have their permissions changed after the fact.
7849 *
7850 * Certain userspace applications (e.g., CrashReporter and debuggers) have a need to remap JIT mappings to
7851 * RO/RX, so we explicitly allow that. This doesn't compromise the security of the PPL since this only
7852 * affects userspace mappings, so allow reducing permissions on JIT mappings to RO/RX/XO. This is similar
7853 * for user execute-only mappings.
7854 */
7855 if (__improbable(is_pte_xprr_protected(pmap, spte) && (pte_to_xprr_perm(spte) != XPRR_USER_JIT_PERM)
7856 && (pte_to_xprr_perm(spte) != XPRR_USER_XO_PERM))) {
7857 panic("%s: modifying a PPL mapping pte_p=%p pmap=%p prot=%d options=%u, pte=0x%llx, tmplate=0x%llx",
7858 __func__, pte_p, pmap, prot, options, (uint64_t)spte, (uint64_t)tmplate);
7859 }
7860
7861 /**
7862 * Enforce the policy that we can't create a new PPL protected mapping here except for user execute-only
7863 * mappings (which doesn't compromise the security of the PPL since it's userspace-specific).
7864 */
7865 if (__improbable(is_pte_xprr_protected(pmap, tmplate) && (pte_to_xprr_perm(tmplate) != XPRR_USER_XO_PERM))) {
7866 panic("%s: creating an xPRR mapping pte_p=%p pmap=%p prot=%d options=%u, pte=0x%llx, tmplate=0x%llx",
7867 __func__, pte_p, pmap, prot, options, (uint64_t)spte, (uint64_t)tmplate);
7868 }
7869 #endif /* __APRR_SUPPORTED__*/
7870 WRITE_PTE_FAST(pte_p, tmplate);
7871
7872 if (managed) {
7873 ASSERT_PVH_LOCKED(pai);
7874 UNLOCK_PVH(pai);
7875 }
7876 }
7877 FLUSH_PTE_RANGE_STRONG(bpte_p, epte_p);
7878 PMAP_UPDATE_TLBS(pmap, start, end, need_strong_sync);
7879 }
7880
7881 pmap_unlock(pmap);
7882 }
7883
7884 void
7885 pmap_protect_options(
7886 pmap_t pmap,
7887 vm_map_address_t b,
7888 vm_map_address_t e,
7889 vm_prot_t prot,
7890 unsigned int options,
7891 __unused void *args)
7892 {
7893 vm_map_address_t l, beg;
7894
7895 __unused const pt_attr_t * const pt_attr = pmap_get_pt_attr(pmap);
7896
7897 if ((b | e) & pt_attr_leaf_offmask(pt_attr)) {
7898 panic("pmap_protect_options() pmap %p start 0x%llx end 0x%llx\n",
7899 pmap, (uint64_t)b, (uint64_t)e);
7900 }
7901
7902 #if DEVELOPMENT || DEBUG
7903 if (options & PMAP_OPTIONS_PROTECT_IMMEDIATE) {
7904 if ((prot & VM_PROT_ALL) == VM_PROT_NONE) {
7905 pmap_remove_options(pmap, b, e, options);
7906 return;
7907 }
7908 } else
7909 #endif
7910 {
7911 /* Determine the new protection. */
7912 switch (prot) {
7913 case VM_PROT_EXECUTE:
7914 case VM_PROT_READ:
7915 case VM_PROT_READ | VM_PROT_EXECUTE:
7916 break;
7917 case VM_PROT_READ | VM_PROT_WRITE:
7918 case VM_PROT_ALL:
7919 return; /* nothing to do */
7920 default:
7921 pmap_remove_options(pmap, b, e, options);
7922 return;
7923 }
7924 }
7925
7926 PMAP_TRACE(2, PMAP_CODE(PMAP__PROTECT) | DBG_FUNC_START,
7927 VM_KERNEL_ADDRHIDE(pmap), VM_KERNEL_ADDRHIDE(b),
7928 VM_KERNEL_ADDRHIDE(e));
7929
7930 beg = b;
7931
7932 while (beg < e) {
7933 l = ((beg + pt_attr_twig_size(pt_attr)) & ~pt_attr_twig_offmask(pt_attr));
7934
7935 if (l > e) {
7936 l = e;
7937 }
7938
7939 #if XNU_MONITOR
7940 pmap_protect_options_ppl(pmap, beg, l, prot, options, args);
7941 #else
7942 pmap_protect_options_internal(pmap, beg, l, prot, options, args);
7943 #endif
7944
7945 beg = l;
7946 }
7947
7948 PMAP_TRACE(2, PMAP_CODE(PMAP__PROTECT) | DBG_FUNC_END);
7949 }
7950
7951 /* Map a (possibly) autogenned block */
7952 kern_return_t
7953 pmap_map_block(
7954 pmap_t pmap,
7955 addr64_t va,
7956 ppnum_t pa,
7957 uint32_t size,
7958 vm_prot_t prot,
7959 int attr,
7960 __unused unsigned int flags)
7961 {
7962 kern_return_t kr;
7963 addr64_t original_va = va;
7964 uint32_t page;
7965
7966 for (page = 0; page < size; page++) {
7967 kr = pmap_enter(pmap, va, pa, prot, VM_PROT_NONE, attr, TRUE);
7968
7969 if (kr != KERN_SUCCESS) {
7970 /*
7971 * This will panic for now, as it is unclear that
7972 * removing the mappings is correct.
7973 */
7974 panic("%s: failed pmap_enter, "
7975 "pmap=%p, va=%#llx, pa=%u, size=%u, prot=%#x, flags=%#x",
7976 __FUNCTION__,
7977 pmap, va, pa, size, prot, flags);
7978
7979 pmap_remove(pmap, original_va, va - original_va);
7980 return kr;
7981 }
7982
7983 va += PAGE_SIZE;
7984 pa++;
7985 }
7986
7987 return KERN_SUCCESS;
7988 }
7989
7990 kern_return_t
7991 pmap_enter_addr(
7992 pmap_t pmap,
7993 vm_map_address_t v,
7994 pmap_paddr_t pa,
7995 vm_prot_t prot,
7996 vm_prot_t fault_type,
7997 unsigned int flags,
7998 boolean_t wired)
7999 {
8000 return pmap_enter_options_addr(pmap, v, pa, prot, fault_type, flags, wired, 0, NULL);
8001 }
8002
8003 /*
8004 * Insert the given physical page (p) at
8005 * the specified virtual address (v) in the
8006 * target physical map with the protection requested.
8007 *
8008 * If specified, the page will be wired down, meaning
8009 * that the related pte can not be reclaimed.
8010 *
8011 * NB: This is the only routine which MAY NOT lazy-evaluate
8012 * or lose information. That is, this routine must actually
8013 * insert this page into the given map eventually (must make
8014 * forward progress eventually.
8015 */
8016 kern_return_t
8017 pmap_enter(
8018 pmap_t pmap,
8019 vm_map_address_t v,
8020 ppnum_t pn,
8021 vm_prot_t prot,
8022 vm_prot_t fault_type,
8023 unsigned int flags,
8024 boolean_t wired)
8025 {
8026 return pmap_enter_addr(pmap, v, ((pmap_paddr_t)pn) << PAGE_SHIFT, prot, fault_type, flags, wired);
8027 }
8028
8029 static inline void
8030 pmap_enter_pte(pmap_t pmap, pt_entry_t *pte_p, pt_entry_t pte, vm_map_address_t v)
8031 {
8032 const pt_attr_t * const pt_attr = pmap_get_pt_attr(pmap);
8033
8034 if (pmap != kernel_pmap && ((pte & ARM_PTE_WIRED) != (*pte_p & ARM_PTE_WIRED))) {
8035 SInt16 *ptd_wiredcnt_ptr = (SInt16 *)&(ptep_get_info(pte_p)->wiredcnt);
8036 if (pte & ARM_PTE_WIRED) {
8037 OSAddAtomic16(1, ptd_wiredcnt_ptr);
8038 pmap_ledger_credit(pmap, task_ledgers.wired_mem, pt_attr_page_size(pt_attr) * PAGE_RATIO);
8039 OSAddAtomic(1, (SInt32 *) &pmap->stats.wired_count);
8040 } else {
8041 OSAddAtomic16(-1, ptd_wiredcnt_ptr);
8042 pmap_ledger_debit(pmap, task_ledgers.wired_mem, pt_attr_page_size(pt_attr) * PAGE_RATIO);
8043 OSAddAtomic(-1, (SInt32 *) &pmap->stats.wired_count);
8044 }
8045 }
8046 if (*pte_p != ARM_PTE_TYPE_FAULT &&
8047 !ARM_PTE_IS_COMPRESSED(*pte_p, pte_p)) {
8048 WRITE_PTE_STRONG(pte_p, pte);
8049 PMAP_UPDATE_TLBS(pmap, v, v + (pt_attr_page_size(pt_attr) * PAGE_RATIO), false);
8050 } else {
8051 WRITE_PTE(pte_p, pte);
8052 __builtin_arm_isb(ISB_SY);
8053 }
8054
8055 PMAP_TRACE(4 + pt_attr_leaf_level(pt_attr), PMAP_CODE(PMAP__TTE), VM_KERNEL_ADDRHIDE(pmap),
8056 VM_KERNEL_ADDRHIDE(v), VM_KERNEL_ADDRHIDE(v + (pt_attr_page_size(pt_attr) * PAGE_RATIO)), pte);
8057 }
8058
8059 MARK_AS_PMAP_TEXT static pt_entry_t
8060 wimg_to_pte(unsigned int wimg)
8061 {
8062 pt_entry_t pte;
8063
8064 switch (wimg & (VM_WIMG_MASK)) {
8065 case VM_WIMG_IO:
8066 case VM_WIMG_RT:
8067 pte = ARM_PTE_ATTRINDX(CACHE_ATTRINDX_DISABLE);
8068 pte |= ARM_PTE_NX | ARM_PTE_PNX;
8069 break;
8070 case VM_WIMG_POSTED:
8071 pte = ARM_PTE_ATTRINDX(CACHE_ATTRINDX_POSTED);
8072 pte |= ARM_PTE_NX | ARM_PTE_PNX;
8073 break;
8074 case VM_WIMG_POSTED_REORDERED:
8075 pte = ARM_PTE_ATTRINDX(CACHE_ATTRINDX_POSTED_REORDERED);
8076 pte |= ARM_PTE_NX | ARM_PTE_PNX;
8077 break;
8078 case VM_WIMG_POSTED_COMBINED_REORDERED:
8079 pte = ARM_PTE_ATTRINDX(CACHE_ATTRINDX_POSTED_COMBINED_REORDERED);
8080 pte |= ARM_PTE_NX | ARM_PTE_PNX;
8081 break;
8082 case VM_WIMG_WCOMB:
8083 pte = ARM_PTE_ATTRINDX(CACHE_ATTRINDX_WRITECOMB);
8084 pte |= ARM_PTE_NX | ARM_PTE_PNX;
8085 break;
8086 case VM_WIMG_WTHRU:
8087 pte = ARM_PTE_ATTRINDX(CACHE_ATTRINDX_WRITETHRU);
8088 #if (__ARM_VMSA__ > 7)
8089 pte |= ARM_PTE_SH(SH_OUTER_MEMORY);
8090 #else
8091 pte |= ARM_PTE_SH;
8092 #endif
8093 break;
8094 case VM_WIMG_COPYBACK:
8095 pte = ARM_PTE_ATTRINDX(CACHE_ATTRINDX_WRITEBACK);
8096 #if (__ARM_VMSA__ > 7)
8097 pte |= ARM_PTE_SH(SH_OUTER_MEMORY);
8098 #else
8099 pte |= ARM_PTE_SH;
8100 #endif
8101 break;
8102 case VM_WIMG_INNERWBACK:
8103 pte = ARM_PTE_ATTRINDX(CACHE_ATTRINDX_INNERWRITEBACK);
8104 #if (__ARM_VMSA__ > 7)
8105 pte |= ARM_PTE_SH(SH_INNER_MEMORY);
8106 #else
8107 pte |= ARM_PTE_SH;
8108 #endif
8109 break;
8110 default:
8111 pte = ARM_PTE_ATTRINDX(CACHE_ATTRINDX_DEFAULT);
8112 #if (__ARM_VMSA__ > 7)
8113 pte |= ARM_PTE_SH(SH_OUTER_MEMORY);
8114 #else
8115 pte |= ARM_PTE_SH;
8116 #endif
8117 }
8118
8119 return pte;
8120 }
8121
8122 static pv_alloc_return_t
8123 pmap_enter_pv(
8124 pmap_t pmap,
8125 pt_entry_t *pte_p,
8126 int pai,
8127 unsigned int options,
8128 pv_entry_t **pve_p,
8129 boolean_t *is_altacct)
8130 {
8131 pv_entry_t **pv_h;
8132 pv_h = pai_to_pvh(pai);
8133 boolean_t first_cpu_mapping;
8134
8135 ASSERT_NOT_HIBERNATING();
8136 ASSERT_PVH_LOCKED(pai);
8137
8138 vm_offset_t pvh_flags = pvh_get_flags(pv_h);
8139
8140 #if XNU_MONITOR
8141 if (__improbable(pvh_flags & PVH_FLAG_LOCKDOWN)) {
8142 panic("%d is locked down (%#lx), cannot enter", pai, pvh_flags);
8143 }
8144 #endif
8145
8146 #ifdef PVH_FLAG_CPU
8147 /* An IOMMU mapping may already be present for a page that hasn't yet
8148 * had a CPU mapping established, so we use PVH_FLAG_CPU to determine
8149 * if this is the first CPU mapping. We base internal/reusable
8150 * accounting on the options specified for the first CPU mapping.
8151 * PVH_FLAG_CPU, and thus this accounting, will then persist as long
8152 * as there are *any* mappings of the page. The accounting for a
8153 * page should not need to change until the page is recycled by the
8154 * VM layer, and we assert that there are no mappings when a page
8155 * is recycled. An IOMMU mapping of a freed/recycled page is
8156 * considered a security violation & potential DMA corruption path.*/
8157 first_cpu_mapping = ((pmap != NULL) && !(pvh_flags & PVH_FLAG_CPU));
8158 if (first_cpu_mapping) {
8159 pvh_flags |= PVH_FLAG_CPU;
8160 }
8161 #else
8162 first_cpu_mapping = pvh_test_type(pv_h, PVH_TYPE_NULL);
8163 #endif
8164
8165 if (first_cpu_mapping) {
8166 if (options & PMAP_OPTIONS_INTERNAL) {
8167 SET_INTERNAL_PAGE(pai);
8168 } else {
8169 CLR_INTERNAL_PAGE(pai);
8170 }
8171 if ((options & PMAP_OPTIONS_INTERNAL) &&
8172 (options & PMAP_OPTIONS_REUSABLE)) {
8173 SET_REUSABLE_PAGE(pai);
8174 } else {
8175 CLR_REUSABLE_PAGE(pai);
8176 }
8177 }
8178 if (pvh_test_type(pv_h, PVH_TYPE_NULL)) {
8179 pvh_update_head(pv_h, pte_p, PVH_TYPE_PTEP);
8180 if (pmap != NULL && pmap != kernel_pmap &&
8181 ((options & PMAP_OPTIONS_ALT_ACCT) ||
8182 PMAP_FOOTPRINT_SUSPENDED(pmap)) &&
8183 IS_INTERNAL_PAGE(pai)) {
8184 /*
8185 * Make a note to ourselves that this mapping is using alternative
8186 * accounting. We'll need this in order to know which ledger to
8187 * debit when the mapping is removed.
8188 *
8189 * The altacct bit must be set while the pv head is locked. Defer
8190 * the ledger accounting until after we've dropped the lock.
8191 */
8192 SET_ALTACCT_PAGE(pai, PV_ENTRY_NULL);
8193 *is_altacct = TRUE;
8194 } else {
8195 CLR_ALTACCT_PAGE(pai, PV_ENTRY_NULL);
8196 }
8197 } else {
8198 pv_alloc_return_t ret;
8199 if (pvh_test_type(pv_h, PVH_TYPE_PTEP)) {
8200 pt_entry_t *pte1_p;
8201
8202 /*
8203 * convert pvh list from PVH_TYPE_PTEP to PVH_TYPE_PVEP
8204 */
8205 pte1_p = pvh_ptep(pv_h);
8206 pvh_set_flags(pv_h, pvh_flags);
8207 if ((*pve_p == PV_ENTRY_NULL) && ((ret = pv_alloc(pmap, pai, pve_p)) != PV_ALLOC_SUCCESS)) {
8208 return ret;
8209 }
8210
8211 pve_set_ptep(*pve_p, pte1_p);
8212 (*pve_p)->pve_next = PV_ENTRY_NULL;
8213
8214 if (IS_ALTACCT_PAGE(pai, PV_ENTRY_NULL)) {
8215 /*
8216 * transfer "altacct" from
8217 * pp_attr to this pve
8218 */
8219 CLR_ALTACCT_PAGE(pai, PV_ENTRY_NULL);
8220 SET_ALTACCT_PAGE(pai, *pve_p);
8221 }
8222 pvh_update_head(pv_h, *pve_p, PVH_TYPE_PVEP);
8223 *pve_p = PV_ENTRY_NULL;
8224 } else if (!pvh_test_type(pv_h, PVH_TYPE_PVEP)) {
8225 panic("%s: unexpected PV head %p, pte_p=%p pmap=%p pv_h=%p",
8226 __func__, *pv_h, pte_p, pmap, pv_h);
8227 }
8228 /*
8229 * Set up pv_entry for this new mapping and then
8230 * add it to the list for this physical page.
8231 */
8232 pvh_set_flags(pv_h, pvh_flags);
8233 if ((*pve_p == PV_ENTRY_NULL) && ((ret = pv_alloc(pmap, pai, pve_p)) != PV_ALLOC_SUCCESS)) {
8234 return ret;
8235 }
8236
8237 pve_set_ptep(*pve_p, pte_p);
8238 (*pve_p)->pve_next = PV_ENTRY_NULL;
8239
8240 pvh_add(pv_h, *pve_p);
8241
8242 if (pmap != NULL && pmap != kernel_pmap &&
8243 ((options & PMAP_OPTIONS_ALT_ACCT) ||
8244 PMAP_FOOTPRINT_SUSPENDED(pmap)) &&
8245 IS_INTERNAL_PAGE(pai)) {
8246 /*
8247 * Make a note to ourselves that this
8248 * mapping is using alternative
8249 * accounting. We'll need this in order
8250 * to know which ledger to debit when
8251 * the mapping is removed.
8252 *
8253 * The altacct bit must be set while
8254 * the pv head is locked. Defer the
8255 * ledger accounting until after we've
8256 * dropped the lock.
8257 */
8258 SET_ALTACCT_PAGE(pai, *pve_p);
8259 *is_altacct = TRUE;
8260 }
8261
8262 *pve_p = PV_ENTRY_NULL;
8263 }
8264
8265 pvh_set_flags(pv_h, pvh_flags);
8266
8267 return PV_ALLOC_SUCCESS;
8268 }
8269
8270 MARK_AS_PMAP_TEXT static kern_return_t
8271 pmap_enter_options_internal(
8272 pmap_t pmap,
8273 vm_map_address_t v,
8274 pmap_paddr_t pa,
8275 vm_prot_t prot,
8276 vm_prot_t fault_type,
8277 unsigned int flags,
8278 boolean_t wired,
8279 unsigned int options)
8280 {
8281 ppnum_t pn = (ppnum_t)atop(pa);
8282 pt_entry_t pte;
8283 pt_entry_t spte;
8284 pt_entry_t *pte_p;
8285 pv_entry_t *pve_p;
8286 boolean_t set_NX;
8287 boolean_t set_XO = FALSE;
8288 boolean_t refcnt_updated;
8289 boolean_t wiredcnt_updated;
8290 unsigned int wimg_bits;
8291 boolean_t was_compressed, was_alt_compressed;
8292 kern_return_t kr = KERN_SUCCESS;
8293
8294 VALIDATE_PMAP(pmap);
8295
8296 __unused const pt_attr_t * const pt_attr = pmap_get_pt_attr(pmap);
8297
8298 if ((v) & pt_attr_leaf_offmask(pt_attr)) {
8299 panic("pmap_enter_options() pmap %p v 0x%llx\n",
8300 pmap, (uint64_t)v);
8301 }
8302
8303 if ((pa) & pt_attr_leaf_offmask(pt_attr)) {
8304 panic("pmap_enter_options() pmap %p pa 0x%llx\n",
8305 pmap, (uint64_t)pa);
8306 }
8307
8308 if ((prot & VM_PROT_EXECUTE) && (pmap == kernel_pmap)) {
8309 #if defined(KERNEL_INTEGRITY_CTRR) && defined(CONFIG_XNUPOST)
8310 extern vm_offset_t ctrr_test_page;
8311 if (__probable(v != ctrr_test_page))
8312 #endif
8313 panic("pmap_enter_options(): attempt to add executable mapping to kernel_pmap");
8314 }
8315
8316 #if DEVELOPMENT || DEBUG
8317 if ((prot & VM_PROT_EXECUTE) || !nx_enabled || !pmap->nx_enabled)
8318 #else
8319 if ((prot & VM_PROT_EXECUTE))
8320 #endif
8321 {
8322 set_NX = FALSE;
8323 } else {
8324 set_NX = TRUE;
8325 }
8326
8327 #if (__ARM_VMSA__ > 7)
8328 if (prot == VM_PROT_EXECUTE) {
8329 set_XO = TRUE;
8330 }
8331 #endif
8332
8333 assert(pn != vm_page_fictitious_addr);
8334
8335 refcnt_updated = FALSE;
8336 wiredcnt_updated = FALSE;
8337 pve_p = PV_ENTRY_NULL;
8338 was_compressed = FALSE;
8339 was_alt_compressed = FALSE;
8340
8341 pmap_lock(pmap);
8342
8343 /*
8344 * Expand pmap to include this pte. Assume that
8345 * pmap is always expanded to include enough hardware
8346 * pages to map one VM page.
8347 */
8348 while ((pte_p = pmap_pte(pmap, v)) == PT_ENTRY_NULL) {
8349 /* Must unlock to expand the pmap. */
8350 pmap_unlock(pmap);
8351
8352 kr = pmap_expand(pmap, v, options, pt_attr_leaf_level(pt_attr));
8353
8354 if (kr != KERN_SUCCESS) {
8355 return kr;
8356 }
8357
8358 pmap_lock(pmap);
8359 }
8360
8361 if (options & PMAP_OPTIONS_NOENTER) {
8362 pmap_unlock(pmap);
8363 return KERN_SUCCESS;
8364 }
8365
8366 Pmap_enter_retry:
8367
8368 spte = *pte_p;
8369
8370 if (ARM_PTE_IS_COMPRESSED(spte, pte_p)) {
8371 /*
8372 * "pmap" should be locked at this point, so this should
8373 * not race with another pmap_enter() or pmap_remove_range().
8374 */
8375 assert(pmap != kernel_pmap);
8376
8377 /* one less "compressed" */
8378 OSAddAtomic64(-1, &pmap->stats.compressed);
8379 pmap_ledger_debit(pmap, task_ledgers.internal_compressed,
8380 pt_attr_page_size(pt_attr) * PAGE_RATIO);
8381
8382 was_compressed = TRUE;
8383 if (spte & ARM_PTE_COMPRESSED_ALT) {
8384 was_alt_compressed = TRUE;
8385 pmap_ledger_debit(pmap, task_ledgers.alternate_accounting_compressed, pt_attr_page_size(pt_attr) * PAGE_RATIO);
8386 } else {
8387 /* was part of the footprint */
8388 pmap_ledger_debit(pmap, task_ledgers.phys_footprint, pt_attr_page_size(pt_attr) * PAGE_RATIO);
8389 }
8390
8391 /* clear "compressed" marker */
8392 /* XXX is it necessary since we're about to overwrite it ? */
8393 WRITE_PTE_FAST(pte_p, ARM_PTE_TYPE_FAULT);
8394 spte = ARM_PTE_TYPE_FAULT;
8395
8396 /*
8397 * We're replacing a "compressed" marker with a valid PTE,
8398 * so no change for "refcnt".
8399 */
8400 refcnt_updated = TRUE;
8401 }
8402
8403 if ((spte != ARM_PTE_TYPE_FAULT) && (pte_to_pa(spte) != pa)) {
8404 pmap_remove_range(pmap, v, pte_p, pte_p + PAGE_RATIO, 0);
8405 }
8406
8407 pte = pa_to_pte(pa) | ARM_PTE_TYPE;
8408
8409 if (wired) {
8410 pte |= ARM_PTE_WIRED;
8411 }
8412
8413 if (set_NX) {
8414 pte |= pt_attr_leaf_xn(pt_attr);
8415 } else {
8416 #if (__ARM_VMSA__ > 7)
8417 if (pmap == kernel_pmap) {
8418 pte |= ARM_PTE_NX;
8419 } else {
8420 pte |= pt_attr_leaf_x(pt_attr);
8421 }
8422 #endif
8423 }
8424
8425 if (pmap == kernel_pmap) {
8426 #if __ARM_KERNEL_PROTECT__
8427 pte |= ARM_PTE_NG;
8428 #endif /* __ARM_KERNEL_PROTECT__ */
8429 if (prot & VM_PROT_WRITE) {
8430 pte |= ARM_PTE_AP(AP_RWNA);
8431 pa_set_bits(pa, PP_ATTR_MODIFIED | PP_ATTR_REFERENCED);
8432 } else {
8433 pte |= ARM_PTE_AP(AP_RONA);
8434 pa_set_bits(pa, PP_ATTR_REFERENCED);
8435 }
8436 #if (__ARM_VMSA__ == 7)
8437 if ((_COMM_PAGE_BASE_ADDRESS <= v) && (v < _COMM_PAGE_BASE_ADDRESS + _COMM_PAGE_AREA_LENGTH)) {
8438 pte = (pte & ~(ARM_PTE_APMASK)) | ARM_PTE_AP(AP_RORO);
8439 }
8440 #endif
8441 } else {
8442 if (!pmap->nested) {
8443 pte |= ARM_PTE_NG;
8444 } else if ((pmap->nested_region_asid_bitmap)
8445 && (v >= pmap->nested_region_addr)
8446 && (v < (pmap->nested_region_addr + pmap->nested_region_size))) {
8447 unsigned int index = (unsigned int)((v - pmap->nested_region_addr) >> pt_attr_twig_shift(pt_attr));
8448
8449 if ((pmap->nested_region_asid_bitmap)
8450 && testbit(index, (int *)pmap->nested_region_asid_bitmap)) {
8451 pte |= ARM_PTE_NG;
8452 }
8453 }
8454 #if MACH_ASSERT
8455 if (pmap->nested_pmap != NULL) {
8456 vm_map_address_t nest_vaddr;
8457 pt_entry_t *nest_pte_p;
8458
8459 nest_vaddr = v - pmap->nested_region_addr + pmap->nested_region_addr;
8460
8461 if ((nest_vaddr >= pmap->nested_region_addr)
8462 && (nest_vaddr < (pmap->nested_region_addr + pmap->nested_region_size))
8463 && ((nest_pte_p = pmap_pte(pmap->nested_pmap, nest_vaddr)) != PT_ENTRY_NULL)
8464 && (*nest_pte_p != ARM_PTE_TYPE_FAULT)
8465 && (!ARM_PTE_IS_COMPRESSED(*nest_pte_p, nest_pte_p))
8466 && (((*nest_pte_p) & ARM_PTE_NG) != ARM_PTE_NG)) {
8467 unsigned int index = (unsigned int)((v - pmap->nested_region_addr) >> pt_attr_twig_shift(pt_attr));
8468
8469 if ((pmap->nested_pmap->nested_region_asid_bitmap)
8470 && !testbit(index, (int *)pmap->nested_pmap->nested_region_asid_bitmap)) {
8471 panic("pmap_enter(): Global attribute conflict nest_pte_p=%p pmap=%p v=0x%llx spte=0x%llx \n",
8472 nest_pte_p, pmap, (uint64_t)v, (uint64_t)*nest_pte_p);
8473 }
8474 }
8475 }
8476 #endif
8477 if (prot & VM_PROT_WRITE) {
8478 if (pa_valid(pa) && (!pa_test_bits(pa, PP_ATTR_MODIFIED))) {
8479 if (fault_type & VM_PROT_WRITE) {
8480 if (set_XO) {
8481 pte |= pt_attr_leaf_rwna(pt_attr);
8482 } else {
8483 pte |= pt_attr_leaf_rw(pt_attr);
8484 }
8485 pa_set_bits(pa, PP_ATTR_REFERENCED | PP_ATTR_MODIFIED);
8486 } else {
8487 if (set_XO) {
8488 pte |= pt_attr_leaf_rona(pt_attr);
8489 } else {
8490 pte |= pt_attr_leaf_ro(pt_attr);
8491 }
8492 pa_set_bits(pa, PP_ATTR_REFERENCED);
8493 pte_set_was_writeable(pte, true);
8494 }
8495 } else {
8496 if (set_XO) {
8497 pte |= pt_attr_leaf_rwna(pt_attr);
8498 } else {
8499 pte |= pt_attr_leaf_rw(pt_attr);
8500 }
8501 pa_set_bits(pa, PP_ATTR_REFERENCED);
8502 }
8503 } else {
8504 if (set_XO) {
8505 pte |= pt_attr_leaf_rona(pt_attr);
8506 } else {
8507 pte |= pt_attr_leaf_ro(pt_attr);;
8508 }
8509 pa_set_bits(pa, PP_ATTR_REFERENCED);
8510 }
8511 }
8512
8513 pte |= ARM_PTE_AF;
8514
8515 volatile uint16_t *refcnt = NULL;
8516 volatile uint16_t *wiredcnt = NULL;
8517 if (pmap != kernel_pmap) {
8518 ptd_info_t *ptd_info = ptep_get_info(pte_p);
8519 refcnt = &ptd_info->refcnt;
8520 wiredcnt = &ptd_info->wiredcnt;
8521 /* Bump the wired count to keep the PTE page from being reclaimed. We need this because
8522 * we may drop the PVH and pmap locks later in pmap_enter() if we need to allocate
8523 * a new PV entry. */
8524 if (!wiredcnt_updated) {
8525 OSAddAtomic16(1, (volatile int16_t*)wiredcnt);
8526 wiredcnt_updated = TRUE;
8527 }
8528 if (!refcnt_updated) {
8529 OSAddAtomic16(1, (volatile int16_t*)refcnt);
8530 refcnt_updated = TRUE;
8531 }
8532 }
8533
8534 if (pa_valid(pa)) {
8535 int pai;
8536 boolean_t is_altacct, is_internal;
8537
8538 is_internal = FALSE;
8539 is_altacct = FALSE;
8540
8541 pai = (int)pa_index(pa);
8542
8543 LOCK_PVH(pai);
8544
8545 Pmap_enter_loop:
8546 if ((flags & (VM_WIMG_MASK | VM_WIMG_USE_DEFAULT))) {
8547 wimg_bits = (flags & (VM_WIMG_MASK | VM_WIMG_USE_DEFAULT));
8548 } else {
8549 wimg_bits = pmap_cache_attributes(pn);
8550 }
8551
8552 /* We may be retrying this operation after dropping the PVH lock.
8553 * Cache attributes for the physical page may have changed while the lock
8554 * was dropped, so clear any cache attributes we may have previously set
8555 * in the PTE template. */
8556 pte &= ~(ARM_PTE_ATTRINDXMASK | ARM_PTE_SHMASK);
8557 pte |= pmap_get_pt_ops(pmap)->wimg_to_pte(wimg_bits);
8558
8559 #if XNU_MONITOR
8560 /* The regular old kernel is not allowed to remap PPL pages. */
8561 if (__improbable(pa_test_monitor(pa))) {
8562 panic("%s: page belongs to PPL, "
8563 "pmap=%p, v=0x%llx, pa=%p, prot=0x%x, fault_type=0x%x, flags=0x%x, wired=%u, options=0x%x",
8564 __FUNCTION__,
8565 pmap, v, (void*)pa, prot, fault_type, flags, wired, options);
8566 }
8567
8568 if (__improbable(pvh_get_flags(pai_to_pvh(pai)) & PVH_FLAG_LOCKDOWN)) {
8569 panic("%s: page locked down, "
8570 "pmap=%p, v=0x%llx, pa=%p, prot=0x%x, fault_type=0x%x, flags=0x%x, wired=%u, options=0x%x",
8571 __FUNCTION__,
8572 pmap, v, (void *)pa, prot, fault_type, flags, wired, options);
8573 }
8574 #endif
8575
8576
8577 if (pte == *pte_p) {
8578 /*
8579 * This pmap_enter operation has been completed by another thread
8580 * undo refcnt on pt and return
8581 */
8582 UNLOCK_PVH(pai);
8583 goto Pmap_enter_cleanup;
8584 } else if (pte_to_pa(*pte_p) == pa) {
8585 pmap_enter_pte(pmap, pte_p, pte, v);
8586 UNLOCK_PVH(pai);
8587 goto Pmap_enter_cleanup;
8588 } else if (*pte_p != ARM_PTE_TYPE_FAULT) {
8589 /*
8590 * pte has been modified by another thread
8591 * hold refcnt on pt and retry pmap_enter operation
8592 */
8593 UNLOCK_PVH(pai);
8594 goto Pmap_enter_retry;
8595 }
8596 pv_alloc_return_t pv_status = pmap_enter_pv(pmap, pte_p, pai, options, &pve_p, &is_altacct);
8597 if (pv_status == PV_ALLOC_RETRY) {
8598 goto Pmap_enter_loop;
8599 } else if (pv_status == PV_ALLOC_FAIL) {
8600 UNLOCK_PVH(pai);
8601 kr = KERN_RESOURCE_SHORTAGE;
8602 goto Pmap_enter_cleanup;
8603 }
8604
8605 pmap_enter_pte(pmap, pte_p, pte, v);
8606
8607 if (pmap != kernel_pmap) {
8608 if (IS_REUSABLE_PAGE(pai) &&
8609 !is_altacct) {
8610 assert(IS_INTERNAL_PAGE(pai));
8611 OSAddAtomic(+1, &pmap->stats.reusable);
8612 PMAP_STATS_PEAK(pmap->stats.reusable);
8613 } else if (IS_INTERNAL_PAGE(pai)) {
8614 OSAddAtomic(+1, &pmap->stats.internal);
8615 PMAP_STATS_PEAK(pmap->stats.internal);
8616 is_internal = TRUE;
8617 } else {
8618 OSAddAtomic(+1, &pmap->stats.external);
8619 PMAP_STATS_PEAK(pmap->stats.external);
8620 }
8621 }
8622
8623 UNLOCK_PVH(pai);
8624
8625 if (pmap != kernel_pmap) {
8626 pmap_ledger_credit(pmap, task_ledgers.phys_mem, pt_attr_page_size(pt_attr) * PAGE_RATIO);
8627
8628 if (is_internal) {
8629 /*
8630 * Make corresponding adjustments to
8631 * phys_footprint statistics.
8632 */
8633 pmap_ledger_credit(pmap, task_ledgers.internal, pt_attr_page_size(pt_attr) * PAGE_RATIO);
8634 if (is_altacct) {
8635 /*
8636 * If this page is internal and
8637 * in an IOKit region, credit
8638 * the task's total count of
8639 * dirty, internal IOKit pages.
8640 * It should *not* count towards
8641 * the task's total physical
8642 * memory footprint, because
8643 * this entire region was
8644 * already billed to the task
8645 * at the time the mapping was
8646 * created.
8647 *
8648 * Put another way, this is
8649 * internal++ and
8650 * alternate_accounting++, so
8651 * net effect on phys_footprint
8652 * is 0. That means: don't
8653 * touch phys_footprint here.
8654 */
8655 pmap_ledger_credit(pmap, task_ledgers.alternate_accounting, pt_attr_page_size(pt_attr) * PAGE_RATIO);
8656 } else {
8657 pmap_ledger_credit(pmap, task_ledgers.phys_footprint, pt_attr_page_size(pt_attr) * PAGE_RATIO);
8658 }
8659 }
8660 }
8661
8662 OSAddAtomic(1, (SInt32 *) &pmap->stats.resident_count);
8663 if (pmap->stats.resident_count > pmap->stats.resident_max) {
8664 pmap->stats.resident_max = pmap->stats.resident_count;
8665 }
8666 } else {
8667 if (prot & VM_PROT_EXECUTE) {
8668 kr = KERN_FAILURE;
8669 goto Pmap_enter_cleanup;
8670 }
8671
8672 wimg_bits = pmap_cache_attributes(pn);
8673 if ((flags & (VM_WIMG_MASK | VM_WIMG_USE_DEFAULT))) {
8674 wimg_bits = (wimg_bits & (~VM_WIMG_MASK)) | (flags & (VM_WIMG_MASK | VM_WIMG_USE_DEFAULT));
8675 }
8676
8677 pte |= pmap_get_pt_ops(pmap)->wimg_to_pte(wimg_bits);
8678
8679 #if XNU_MONITOR
8680 if ((wimg_bits & PP_ATTR_MONITOR) && !pmap_ppl_disable) {
8681 uint64_t xprr_perm = pte_to_xprr_perm(pte);
8682 switch (xprr_perm) {
8683 case XPRR_KERN_RO_PERM:
8684 break;
8685 case XPRR_KERN_RW_PERM:
8686 pte &= ~ARM_PTE_XPRR_MASK;
8687 pte |= xprr_perm_to_pte(XPRR_PPL_RW_PERM);
8688 break;
8689 default:
8690 panic("Unsupported xPRR perm %llu for pte 0x%llx", xprr_perm, (uint64_t)pte);
8691 }
8692 }
8693 #endif
8694 pmap_enter_pte(pmap, pte_p, pte, v);
8695 }
8696
8697 goto Pmap_enter_return;
8698
8699 Pmap_enter_cleanup:
8700
8701 if (refcnt != NULL) {
8702 assert(refcnt_updated);
8703 if (OSAddAtomic16(-1, (volatile int16_t*)refcnt) <= 0) {
8704 panic("pmap_enter(): over-release of ptdp %p for pte %p\n", ptep_get_ptd(pte_p), pte_p);
8705 }
8706 }
8707
8708 Pmap_enter_return:
8709
8710 #if CONFIG_PGTRACE
8711 if (pgtrace_enabled) {
8712 // Clone and invalidate original mapping if eligible
8713 pmap_pgtrace_enter_clone(pmap, v + ARM_PGBYTES, 0, 0);
8714 }
8715 #endif /* CONFIG_PGTRACE */
8716
8717 if (pve_p != PV_ENTRY_NULL) {
8718 pv_free_entry(pve_p);
8719 }
8720
8721 if (wiredcnt_updated && (OSAddAtomic16(-1, (volatile int16_t*)wiredcnt) <= 0)) {
8722 panic("pmap_enter(): over-unwire of ptdp %p for pte %p\n", ptep_get_ptd(pte_p), pte_p);
8723 }
8724
8725 pmap_unlock(pmap);
8726
8727 return kr;
8728 }
8729
8730 kern_return_t
8731 pmap_enter_options_addr(
8732 pmap_t pmap,
8733 vm_map_address_t v,
8734 pmap_paddr_t pa,
8735 vm_prot_t prot,
8736 vm_prot_t fault_type,
8737 unsigned int flags,
8738 boolean_t wired,
8739 unsigned int options,
8740 __unused void *arg)
8741 {
8742 kern_return_t kr = KERN_FAILURE;
8743
8744
8745 PMAP_TRACE(2, PMAP_CODE(PMAP__ENTER) | DBG_FUNC_START,
8746 VM_KERNEL_ADDRHIDE(pmap), VM_KERNEL_ADDRHIDE(v), pa, prot);
8747
8748
8749 #if XNU_MONITOR
8750 /*
8751 * If NOWAIT was not requested, loop until the enter does not
8752 * fail due to lack of resources.
8753 */
8754 while ((kr = pmap_enter_options_ppl(pmap, v, pa, prot, fault_type, flags, wired, options | PMAP_OPTIONS_NOWAIT)) == KERN_RESOURCE_SHORTAGE) {
8755 pmap_alloc_page_for_ppl((options & PMAP_OPTIONS_NOWAIT) ? PMAP_PAGES_ALLOCATE_NOWAIT : 0);
8756 if (options & PMAP_OPTIONS_NOWAIT) {
8757 break;
8758 }
8759 }
8760
8761 pmap_ledger_check_balance(pmap);
8762 #else
8763 kr = pmap_enter_options_internal(pmap, v, pa, prot, fault_type, flags, wired, options);
8764 #endif
8765
8766 PMAP_TRACE(2, PMAP_CODE(PMAP__ENTER) | DBG_FUNC_END, kr);
8767
8768 return kr;
8769 }
8770
8771 kern_return_t
8772 pmap_enter_options(
8773 pmap_t pmap,
8774 vm_map_address_t v,
8775 ppnum_t pn,
8776 vm_prot_t prot,
8777 vm_prot_t fault_type,
8778 unsigned int flags,
8779 boolean_t wired,
8780 unsigned int options,
8781 __unused void *arg)
8782 {
8783 return pmap_enter_options_addr(pmap, v, ((pmap_paddr_t)pn) << PAGE_SHIFT, prot, fault_type, flags, wired, options, arg);
8784 }
8785
8786 /*
8787 * Routine: pmap_change_wiring
8788 * Function: Change the wiring attribute for a map/virtual-address
8789 * pair.
8790 * In/out conditions:
8791 * The mapping must already exist in the pmap.
8792 */
8793 MARK_AS_PMAP_TEXT static void
8794 pmap_change_wiring_internal(
8795 pmap_t pmap,
8796 vm_map_address_t v,
8797 boolean_t wired)
8798 {
8799 pt_entry_t *pte_p;
8800 pmap_paddr_t pa;
8801
8802 VALIDATE_PMAP(pmap);
8803
8804 pmap_lock(pmap);
8805
8806 const pt_attr_t * pt_attr = pmap_get_pt_attr(pmap);
8807
8808 pte_p = pmap_pte(pmap, v);
8809 assert(pte_p != PT_ENTRY_NULL);
8810 pa = pte_to_pa(*pte_p);
8811
8812 while (pa_valid(pa)) {
8813 pmap_paddr_t new_pa;
8814
8815 LOCK_PVH((int)pa_index(pa));
8816 new_pa = pte_to_pa(*pte_p);
8817
8818 if (pa == new_pa) {
8819 break;
8820 }
8821
8822 UNLOCK_PVH((int)pa_index(pa));
8823 pa = new_pa;
8824 }
8825
8826 if (wired != pte_is_wired(*pte_p)) {
8827 pte_set_wired(pmap, pte_p, wired);
8828 if (pmap != kernel_pmap) {
8829 if (wired) {
8830 OSAddAtomic(+1, (SInt32 *) &pmap->stats.wired_count);
8831 pmap_ledger_credit(pmap, task_ledgers.wired_mem, pt_attr_page_size(pt_attr) * PAGE_RATIO);
8832 } else if (!wired) {
8833 __assert_only int32_t orig_wired = OSAddAtomic(-1, (SInt32 *) &pmap->stats.wired_count);
8834 PMAP_STATS_ASSERTF(orig_wired > 0, pmap, "stats.wired_count %d", orig_wired);
8835 pmap_ledger_debit(pmap, task_ledgers.wired_mem, pt_attr_page_size(pt_attr) * PAGE_RATIO);
8836 }
8837 }
8838 }
8839
8840 if (pa_valid(pa)) {
8841 UNLOCK_PVH((int)pa_index(pa));
8842 }
8843
8844 pmap_unlock(pmap);
8845 }
8846
8847 void
8848 pmap_change_wiring(
8849 pmap_t pmap,
8850 vm_map_address_t v,
8851 boolean_t wired)
8852 {
8853 #if XNU_MONITOR
8854 pmap_change_wiring_ppl(pmap, v, wired);
8855
8856 pmap_ledger_check_balance(pmap);
8857 #else
8858 pmap_change_wiring_internal(pmap, v, wired);
8859 #endif
8860 }
8861
8862 MARK_AS_PMAP_TEXT static pmap_paddr_t
8863 pmap_find_pa_internal(
8864 pmap_t pmap,
8865 addr64_t va)
8866 {
8867 pmap_paddr_t pa = 0;
8868
8869 VALIDATE_PMAP(pmap);
8870
8871 if (pmap != kernel_pmap) {
8872 pmap_lock_ro(pmap);
8873 }
8874
8875 pa = pmap_vtophys(pmap, va);
8876
8877 if (pmap != kernel_pmap) {
8878 pmap_unlock_ro(pmap);
8879 }
8880
8881 return pa;
8882 }
8883
8884 pmap_paddr_t
8885 pmap_find_pa_nofault(pmap_t pmap, addr64_t va)
8886 {
8887 pmap_paddr_t pa = 0;
8888
8889 if (pmap == kernel_pmap) {
8890 pa = mmu_kvtop(va);
8891 } else if ((current_thread()->map) && (pmap == vm_map_pmap(current_thread()->map))) {
8892 /*
8893 * Note that this doesn't account for PAN: mmu_uvtop() may return a valid
8894 * translation even if PAN would prevent kernel access through the translation.
8895 * It's therefore assumed the UVA will be accessed in a PAN-disabled context.
8896 */
8897 pa = mmu_uvtop(va);
8898 }
8899 return pa;
8900 }
8901
8902 pmap_paddr_t
8903 pmap_find_pa(
8904 pmap_t pmap,
8905 addr64_t va)
8906 {
8907 pmap_paddr_t pa = pmap_find_pa_nofault(pmap, va);
8908
8909 if (pa != 0) {
8910 return pa;
8911 }
8912
8913 if (not_in_kdp) {
8914 #if XNU_MONITOR
8915 return pmap_find_pa_ppl(pmap, va);
8916 #else
8917 return pmap_find_pa_internal(pmap, va);
8918 #endif
8919 } else {
8920 return pmap_vtophys(pmap, va);
8921 }
8922 }
8923
8924 ppnum_t
8925 pmap_find_phys_nofault(
8926 pmap_t pmap,
8927 addr64_t va)
8928 {
8929 ppnum_t ppn;
8930 ppn = atop(pmap_find_pa_nofault(pmap, va));
8931 return ppn;
8932 }
8933
8934 ppnum_t
8935 pmap_find_phys(
8936 pmap_t pmap,
8937 addr64_t va)
8938 {
8939 ppnum_t ppn;
8940 ppn = atop(pmap_find_pa(pmap, va));
8941 return ppn;
8942 }
8943
8944
8945 pmap_paddr_t
8946 kvtophys(
8947 vm_offset_t va)
8948 {
8949 pmap_paddr_t pa;
8950
8951 pa = mmu_kvtop(va);
8952 if (pa) {
8953 return pa;
8954 }
8955 pa = ((pmap_paddr_t)pmap_vtophys(kernel_pmap, va)) << PAGE_SHIFT;
8956 if (pa) {
8957 pa |= (va & PAGE_MASK);
8958 }
8959
8960 return (pmap_paddr_t)pa;
8961 }
8962
8963 pmap_paddr_t
8964 pmap_vtophys(
8965 pmap_t pmap,
8966 addr64_t va)
8967 {
8968 if ((va < pmap->min) || (va >= pmap->max)) {
8969 return 0;
8970 }
8971
8972 #if (__ARM_VMSA__ == 7)
8973 tt_entry_t *tte_p, tte;
8974 pt_entry_t *pte_p;
8975 pmap_paddr_t pa;
8976
8977 tte_p = pmap_tte(pmap, va);
8978 if (tte_p == (tt_entry_t *) NULL) {
8979 return (pmap_paddr_t) 0;
8980 }
8981
8982 tte = *tte_p;
8983 if ((tte & ARM_TTE_TYPE_MASK) == ARM_TTE_TYPE_TABLE) {
8984 pte_p = (pt_entry_t *) ttetokv(tte) + pte_index(pmap, pt_attr, va);
8985 pa = pte_to_pa(*pte_p) | (va & ARM_PGMASK);
8986 //LIONEL ppn = (ppnum_t) atop(pte_to_pa(*pte_p) | (va & ARM_PGMASK));
8987 #if DEVELOPMENT || DEBUG
8988 if (atop(pa) != 0 &&
8989 ARM_PTE_IS_COMPRESSED(*pte_p, pte_p)) {
8990 panic("pmap_vtophys(%p,0x%llx): compressed pte_p=%p 0x%llx with ppn=0x%x\n",
8991 pmap, va, pte_p, (uint64_t) (*pte_p), atop(pa));
8992 }
8993 #endif /* DEVELOPMENT || DEBUG */
8994 } else if ((tte & ARM_TTE_TYPE_MASK) == ARM_TTE_TYPE_BLOCK) {
8995 if ((tte & ARM_TTE_BLOCK_SUPER) == ARM_TTE_BLOCK_SUPER) {
8996 pa = suptte_to_pa(tte) | (va & ARM_TT_L1_SUPER_OFFMASK);
8997 } else {
8998 pa = sectte_to_pa(tte) | (va & ARM_TT_L1_BLOCK_OFFMASK);
8999 }
9000 } else {
9001 pa = 0;
9002 }
9003 #else
9004 tt_entry_t * ttp = NULL;
9005 tt_entry_t * ttep = NULL;
9006 tt_entry_t tte = ARM_TTE_EMPTY;
9007 pmap_paddr_t pa = 0;
9008 unsigned int cur_level;
9009
9010 const pt_attr_t * const pt_attr = pmap_get_pt_attr(pmap);
9011
9012 ttp = pmap->tte;
9013
9014 for (cur_level = pt_attr_root_level(pt_attr); cur_level <= pt_attr_leaf_level(pt_attr); cur_level++) {
9015 ttep = &ttp[ttn_index(pmap, pt_attr, va, cur_level)];
9016
9017 tte = *ttep;
9018
9019 const uint64_t valid_mask = pt_attr->pta_level_info[cur_level].valid_mask;
9020 const uint64_t type_mask = pt_attr->pta_level_info[cur_level].type_mask;
9021 const uint64_t type_block = pt_attr->pta_level_info[cur_level].type_block;
9022 const uint64_t offmask = pt_attr->pta_level_info[cur_level].offmask;
9023
9024 if ((tte & valid_mask) != valid_mask) {
9025 return (pmap_paddr_t) 0;
9026 }
9027
9028 /* This detects both leaf entries and intermediate block mappings. */
9029 if ((tte & type_mask) == type_block) {
9030 pa = ((tte & ARM_TTE_PA_MASK & ~offmask) | (va & offmask));
9031 break;
9032 }
9033
9034 ttp = (tt_entry_t*)phystokv(tte & ARM_TTE_TABLE_MASK);
9035 }
9036 #endif
9037
9038 return pa;
9039 }
9040
9041 /*
9042 * pmap_init_pte_page - Initialize a page table page.
9043 */
9044 void
9045 pmap_init_pte_page(
9046 pmap_t pmap,
9047 pt_entry_t *pte_p,
9048 vm_offset_t va,
9049 unsigned int ttlevel,
9050 boolean_t alloc_ptd)
9051 {
9052 pt_desc_t *ptdp = NULL;
9053 vm_offset_t *pvh;
9054
9055 pvh = (vm_offset_t *)(pai_to_pvh(pa_index(ml_static_vtop((vm_offset_t)pte_p))));
9056
9057 if (pvh_test_type(pvh, PVH_TYPE_NULL)) {
9058 if (alloc_ptd) {
9059 /*
9060 * This path should only be invoked from arm_vm_init. If we are emulating 16KB pages
9061 * on 4KB hardware, we may already have allocated a page table descriptor for a
9062 * bootstrap request, so we check for an existing PTD here.
9063 */
9064 ptdp = ptd_alloc(pmap);
9065 if (ptdp == NULL) {
9066 panic("%s: unable to allocate PTD", __func__);
9067 }
9068 pvh_update_head_unlocked(pvh, ptdp, PVH_TYPE_PTDP);
9069 } else {
9070 panic("pmap_init_pte_page(): pte_p %p", pte_p);
9071 }
9072 } else if (pvh_test_type(pvh, PVH_TYPE_PTDP)) {
9073 ptdp = (pt_desc_t*)(pvh_list(pvh));
9074 } else {
9075 panic("pmap_init_pte_page(): invalid PVH type for pte_p %p", pte_p);
9076 }
9077
9078 // below barrier ensures previous updates to the page are visible to PTW before
9079 // it is linked to the PTE of previous level
9080 __builtin_arm_dmb(DMB_ISHST);
9081 ptd_init(ptdp, pmap, va, ttlevel, pte_p);
9082 }
9083
9084 /*
9085 * Routine: pmap_expand
9086 *
9087 * Expands a pmap to be able to map the specified virtual address.
9088 *
9089 * Allocates new memory for the default (COARSE) translation table
9090 * entry, initializes all the pte entries to ARM_PTE_TYPE_FAULT and
9091 * also allocates space for the corresponding pv entries.
9092 *
9093 * Nothing should be locked.
9094 */
9095 static kern_return_t
9096 pmap_expand(
9097 pmap_t pmap,
9098 vm_map_address_t v,
9099 unsigned int options,
9100 unsigned int level)
9101 {
9102 __unused const pt_attr_t * const pt_attr = pmap_get_pt_attr(pmap);
9103
9104 #if (__ARM_VMSA__ == 7)
9105 vm_offset_t pa;
9106 tt_entry_t *tte_p;
9107 tt_entry_t *tt_p;
9108 unsigned int i;
9109
9110 #if DEVELOPMENT || DEBUG
9111 /*
9112 * We no longer support root level expansion; panic in case something
9113 * still attempts to trigger it.
9114 */
9115 i = tte_index(pmap, pt_attr, v);
9116
9117 if (i >= pmap->tte_index_max) {
9118 panic("%s: index out of range, index=%u, max=%u, "
9119 "pmap=%p, addr=%p, options=%u, level=%u",
9120 __func__, i, pmap->tte_index_max,
9121 pmap, (void *)v, options, level);
9122 }
9123 #endif /* DEVELOPMENT || DEBUG */
9124
9125 if (level == 1) {
9126 return KERN_SUCCESS;
9127 }
9128
9129 {
9130 tt_entry_t *tte_next_p;
9131
9132 pmap_lock(pmap);
9133 pa = 0;
9134 if (pmap_pte(pmap, v) != PT_ENTRY_NULL) {
9135 pmap_unlock(pmap);
9136 return KERN_SUCCESS;
9137 }
9138 tte_p = &pmap->tte[ttenum(v & ~ARM_TT_L1_PT_OFFMASK)];
9139 for (i = 0, tte_next_p = tte_p; i < 4; i++) {
9140 if (tte_to_pa(*tte_next_p)) {
9141 pa = tte_to_pa(*tte_next_p);
9142 break;
9143 }
9144 tte_next_p++;
9145 }
9146 pa = pa & ~PAGE_MASK;
9147 if (pa) {
9148 tte_p = &pmap->tte[ttenum(v)];
9149 *tte_p = pa_to_tte(pa) | (((v >> ARM_TT_L1_SHIFT) & 0x3) << 10) | ARM_TTE_TYPE_TABLE;
9150 FLUSH_PTE(tte_p);
9151 PMAP_TRACE(5, PMAP_CODE(PMAP__TTE), VM_KERNEL_ADDRHIDE(pmap), VM_KERNEL_ADDRHIDE(v & ~ARM_TT_L1_OFFMASK),
9152 VM_KERNEL_ADDRHIDE((v & ~ARM_TT_L1_OFFMASK) + ARM_TT_L1_SIZE), *tte_p);
9153 pmap_unlock(pmap);
9154 return KERN_SUCCESS;
9155 }
9156 pmap_unlock(pmap);
9157 }
9158 v = v & ~ARM_TT_L1_PT_OFFMASK;
9159
9160
9161 while (pmap_pte(pmap, v) == PT_ENTRY_NULL) {
9162 /*
9163 * Allocate a VM page for the level 2 page table entries.
9164 */
9165 while (pmap_tt_allocate(pmap, &tt_p, PMAP_TT_L2_LEVEL, ((options & PMAP_TT_ALLOCATE_NOWAIT)? PMAP_PAGES_ALLOCATE_NOWAIT : 0)) != KERN_SUCCESS) {
9166 if (options & PMAP_OPTIONS_NOWAIT) {
9167 return KERN_RESOURCE_SHORTAGE;
9168 }
9169 VM_PAGE_WAIT();
9170 }
9171
9172 pmap_lock(pmap);
9173 /*
9174 * See if someone else expanded us first
9175 */
9176 if (pmap_pte(pmap, v) == PT_ENTRY_NULL) {
9177 tt_entry_t *tte_next_p;
9178
9179 pmap_init_pte_page(pmap, (pt_entry_t *) tt_p, v, PMAP_TT_L2_LEVEL, FALSE);
9180 pa = kvtophys((vm_offset_t)tt_p);
9181 tte_p = &pmap->tte[ttenum(v)];
9182 for (i = 0, tte_next_p = tte_p; i < 4; i++) {
9183 *tte_next_p = pa_to_tte(pa) | ARM_TTE_TYPE_TABLE;
9184 PMAP_TRACE(5, PMAP_CODE(PMAP__TTE), VM_KERNEL_ADDRHIDE(pmap), VM_KERNEL_ADDRHIDE((v & ~ARM_TT_L1_PT_OFFMASK) + (i * ARM_TT_L1_SIZE)),
9185 VM_KERNEL_ADDRHIDE((v & ~ARM_TT_L1_PT_OFFMASK) + ((i + 1) * ARM_TT_L1_SIZE)), *tte_p);
9186 tte_next_p++;
9187 pa = pa + 0x400;
9188 }
9189 FLUSH_PTE_RANGE(tte_p, tte_p + 4);
9190
9191 pa = 0x0ULL;
9192 tt_p = (tt_entry_t *)NULL;
9193 }
9194 pmap_unlock(pmap);
9195 if (tt_p != (tt_entry_t *)NULL) {
9196 pmap_tt_deallocate(pmap, tt_p, PMAP_TT_L2_LEVEL);
9197 tt_p = (tt_entry_t *)NULL;
9198 }
9199 }
9200 return KERN_SUCCESS;
9201 #else
9202 pmap_paddr_t pa;
9203 unsigned int ttlevel = pt_attr_root_level(pt_attr);
9204 tt_entry_t *tte_p;
9205 tt_entry_t *tt_p;
9206
9207 pa = 0x0ULL;
9208 tt_p = (tt_entry_t *)NULL;
9209
9210 for (; ttlevel < level; ttlevel++) {
9211 pmap_lock_ro(pmap);
9212
9213 if (pmap_ttne(pmap, ttlevel + 1, v) == PT_ENTRY_NULL) {
9214 pmap_unlock_ro(pmap);
9215 while (pmap_tt_allocate(pmap, &tt_p, ttlevel + 1, ((options & PMAP_TT_ALLOCATE_NOWAIT)? PMAP_PAGES_ALLOCATE_NOWAIT : 0)) != KERN_SUCCESS) {
9216 if (options & PMAP_OPTIONS_NOWAIT) {
9217 return KERN_RESOURCE_SHORTAGE;
9218 }
9219 #if XNU_MONITOR
9220 panic("%s: failed to allocate tt, "
9221 "pmap=%p, v=%p, options=0x%x, level=%u",
9222 __FUNCTION__,
9223 pmap, (void *)v, options, level);
9224 #else
9225 VM_PAGE_WAIT();
9226 #endif
9227 }
9228 pmap_lock(pmap);
9229 if ((pmap_ttne(pmap, ttlevel + 1, v) == PT_ENTRY_NULL)) {
9230 pmap_init_pte_page(pmap, (pt_entry_t *) tt_p, v, ttlevel + 1, FALSE);
9231 pa = kvtophys((vm_offset_t)tt_p);
9232 tte_p = pmap_ttne(pmap, ttlevel, v);
9233 *tte_p = (pa & ARM_TTE_TABLE_MASK) | ARM_TTE_TYPE_TABLE | ARM_TTE_VALID;
9234 PMAP_TRACE(4 + ttlevel, PMAP_CODE(PMAP__TTE), VM_KERNEL_ADDRHIDE(pmap), VM_KERNEL_ADDRHIDE(v & ~pt_attr_ln_offmask(pt_attr, ttlevel)),
9235 VM_KERNEL_ADDRHIDE((v & ~pt_attr_ln_offmask(pt_attr, ttlevel)) + pt_attr_ln_size(pt_attr, ttlevel)), *tte_p);
9236 pa = 0x0ULL;
9237 tt_p = (tt_entry_t *)NULL;
9238 }
9239 pmap_unlock(pmap);
9240 } else {
9241 pmap_unlock_ro(pmap);
9242 }
9243
9244 if (tt_p != (tt_entry_t *)NULL) {
9245 pmap_tt_deallocate(pmap, tt_p, ttlevel + 1);
9246 tt_p = (tt_entry_t *)NULL;
9247 }
9248 }
9249
9250 return KERN_SUCCESS;
9251 #endif
9252 }
9253
9254 /*
9255 * Routine: pmap_collect
9256 * Function:
9257 * Garbage collects the physical map system for
9258 * pages which are no longer used.
9259 * Success need not be guaranteed -- that is, there
9260 * may well be pages which are not referenced, but
9261 * others may be collected.
9262 */
9263 void
9264 pmap_collect(pmap_t pmap)
9265 {
9266 if (pmap == PMAP_NULL) {
9267 return;
9268 }
9269
9270 #if 0
9271 pmap_lock(pmap);
9272 if ((pmap->nested == FALSE) && (pmap != kernel_pmap)) {
9273 /* TODO: Scan for vm page assigned to top level page tables with no reference */
9274 }
9275 pmap_unlock(pmap);
9276 #endif
9277
9278 return;
9279 }
9280
9281 /*
9282 * Routine: pmap_gc
9283 * Function:
9284 * Pmap garbage collection
9285 * Called by the pageout daemon when pages are scarce.
9286 *
9287 */
9288 void
9289 pmap_gc(
9290 void)
9291 {
9292 #if XNU_MONITOR
9293 /*
9294 * We cannot invoke the scheduler from the PPL, so for now we elide the
9295 * GC logic if the PPL is enabled.
9296 */
9297 #endif
9298 #if !XNU_MONITOR
9299 pmap_t pmap, pmap_next;
9300 boolean_t gc_wait;
9301
9302 if (pmap_gc_allowed &&
9303 (pmap_gc_allowed_by_time_throttle ||
9304 pmap_gc_forced)) {
9305 pmap_gc_forced = FALSE;
9306 pmap_gc_allowed_by_time_throttle = FALSE;
9307 pmap_simple_lock(&pmaps_lock);
9308 pmap = CAST_DOWN_EXPLICIT(pmap_t, queue_first(&map_pmap_list));
9309 while (!queue_end(&map_pmap_list, (queue_entry_t)pmap)) {
9310 if (!(pmap->gc_status & PMAP_GC_INFLIGHT)) {
9311 pmap->gc_status |= PMAP_GC_INFLIGHT;
9312 }
9313 pmap_simple_unlock(&pmaps_lock);
9314
9315 pmap_collect(pmap);
9316
9317 pmap_simple_lock(&pmaps_lock);
9318 gc_wait = (pmap->gc_status & PMAP_GC_WAIT);
9319 pmap->gc_status &= ~(PMAP_GC_INFLIGHT | PMAP_GC_WAIT);
9320 pmap_next = CAST_DOWN_EXPLICIT(pmap_t, queue_next(&pmap->pmaps));
9321 if (gc_wait) {
9322 if (!queue_end(&map_pmap_list, (queue_entry_t)pmap_next)) {
9323 pmap_next->gc_status |= PMAP_GC_INFLIGHT;
9324 }
9325 pmap_simple_unlock(&pmaps_lock);
9326 thread_wakeup((event_t) &pmap->gc_status);
9327 pmap_simple_lock(&pmaps_lock);
9328 }
9329 pmap = pmap_next;
9330 }
9331 pmap_simple_unlock(&pmaps_lock);
9332 }
9333 #endif
9334 }
9335
9336 /*
9337 * Called by the VM to reclaim pages that we can reclaim quickly and cheaply.
9338 */
9339 uint64_t
9340 pmap_release_pages_fast(void)
9341 {
9342 #if XNU_MONITOR
9343 return pmap_release_ppl_pages_to_kernel();
9344 #else /* XNU_MONITOR */
9345 return 0;
9346 #endif
9347 }
9348
9349 /*
9350 * By default, don't attempt pmap GC more frequently
9351 * than once / 1 minutes.
9352 */
9353
9354 void
9355 compute_pmap_gc_throttle(
9356 void *arg __unused)
9357 {
9358 pmap_gc_allowed_by_time_throttle = TRUE;
9359 }
9360
9361 /*
9362 * pmap_attribute_cache_sync(vm_offset_t pa)
9363 *
9364 * Invalidates all of the instruction cache on a physical page and
9365 * pushes any dirty data from the data cache for the same physical page
9366 */
9367
9368 kern_return_t
9369 pmap_attribute_cache_sync(
9370 ppnum_t pp,
9371 vm_size_t size,
9372 __unused vm_machine_attribute_t attribute,
9373 __unused vm_machine_attribute_val_t * value)
9374 {
9375 if (size > PAGE_SIZE) {
9376 panic("pmap_attribute_cache_sync size: 0x%llx\n", (uint64_t)size);
9377 } else {
9378 cache_sync_page(pp);
9379 }
9380
9381 return KERN_SUCCESS;
9382 }
9383
9384 /*
9385 * pmap_sync_page_data_phys(ppnum_t pp)
9386 *
9387 * Invalidates all of the instruction cache on a physical page and
9388 * pushes any dirty data from the data cache for the same physical page
9389 */
9390 void
9391 pmap_sync_page_data_phys(
9392 ppnum_t pp)
9393 {
9394 cache_sync_page(pp);
9395 }
9396
9397 /*
9398 * pmap_sync_page_attributes_phys(ppnum_t pp)
9399 *
9400 * Write back and invalidate all cachelines on a physical page.
9401 */
9402 void
9403 pmap_sync_page_attributes_phys(
9404 ppnum_t pp)
9405 {
9406 flush_dcache((vm_offset_t) (pp << PAGE_SHIFT), PAGE_SIZE, TRUE);
9407 }
9408
9409 #if CONFIG_COREDUMP
9410 /* temporary workaround */
9411 boolean_t
9412 coredumpok(
9413 vm_map_t map,
9414 mach_vm_offset_t va)
9415 {
9416 pt_entry_t *pte_p;
9417 pt_entry_t spte;
9418
9419 pte_p = pmap_pte(map->pmap, va);
9420 if (0 == pte_p) {
9421 return FALSE;
9422 }
9423 spte = *pte_p;
9424 return (spte & ARM_PTE_ATTRINDXMASK) == ARM_PTE_ATTRINDX(CACHE_ATTRINDX_DEFAULT);
9425 }
9426 #endif
9427
9428 void
9429 fillPage(
9430 ppnum_t pn,
9431 unsigned int fill)
9432 {
9433 unsigned int *addr;
9434 int count;
9435
9436 addr = (unsigned int *) phystokv(ptoa(pn));
9437 count = PAGE_SIZE / sizeof(unsigned int);
9438 while (count--) {
9439 *addr++ = fill;
9440 }
9441 }
9442
9443 extern void mapping_set_mod(ppnum_t pn);
9444
9445 void
9446 mapping_set_mod(
9447 ppnum_t pn)
9448 {
9449 pmap_set_modify(pn);
9450 }
9451
9452 extern void mapping_set_ref(ppnum_t pn);
9453
9454 void
9455 mapping_set_ref(
9456 ppnum_t pn)
9457 {
9458 pmap_set_reference(pn);
9459 }
9460
9461 /*
9462 * Clear specified attribute bits.
9463 *
9464 * Try to force an arm_fast_fault() for all mappings of
9465 * the page - to force attributes to be set again at fault time.
9466 * If the forcing succeeds, clear the cached bits at the head.
9467 * Otherwise, something must have been wired, so leave the cached
9468 * attributes alone.
9469 */
9470 MARK_AS_PMAP_TEXT static void
9471 phys_attribute_clear_with_flush_range(
9472 ppnum_t pn,
9473 unsigned int bits,
9474 int options,
9475 void *arg,
9476 pmap_tlb_flush_range_t *flush_range)
9477 {
9478 pmap_paddr_t pa = ptoa(pn);
9479 vm_prot_t allow_mode = VM_PROT_ALL;
9480
9481 #if XNU_MONITOR
9482 if (bits & PP_ATTR_PPL_OWNED_BITS) {
9483 panic("%s: illegal request, "
9484 "pn=%u, bits=%#x, options=%#x, arg=%p, flush_range=%p",
9485 __FUNCTION__,
9486 pn, bits, options, arg, flush_range);
9487 }
9488 #endif
9489
9490 if ((bits & PP_ATTR_MODIFIED) &&
9491 (options & PMAP_OPTIONS_NOFLUSH) &&
9492 (arg == NULL) &&
9493 (flush_range == NULL)) {
9494 panic("phys_attribute_clear(0x%x,0x%x,0x%x,%p,%p): "
9495 "should not clear 'modified' without flushing TLBs\n",
9496 pn, bits, options, arg, flush_range);
9497 }
9498
9499 assert(pn != vm_page_fictitious_addr);
9500
9501 if (options & PMAP_OPTIONS_CLEAR_WRITE) {
9502 assert(bits == PP_ATTR_MODIFIED);
9503
9504 pmap_page_protect_options_with_flush_range(pn, (VM_PROT_ALL & ~VM_PROT_WRITE), 0, flush_range);
9505 /*
9506 * We short circuit this case; it should not need to
9507 * invoke arm_force_fast_fault, so just clear the modified bit.
9508 * pmap_page_protect has taken care of resetting
9509 * the state so that we'll see the next write as a fault to
9510 * the VM (i.e. we don't want a fast fault).
9511 */
9512 pa_clear_bits(pa, bits);
9513 return;
9514 }
9515 if (bits & PP_ATTR_REFERENCED) {
9516 allow_mode &= ~(VM_PROT_READ | VM_PROT_EXECUTE);
9517 }
9518 if (bits & PP_ATTR_MODIFIED) {
9519 allow_mode &= ~VM_PROT_WRITE;
9520 }
9521
9522 if (bits == PP_ATTR_NOENCRYPT) {
9523 /*
9524 * We short circuit this case; it should not need to
9525 * invoke arm_force_fast_fault, so just clear and
9526 * return. On ARM, this bit is just a debugging aid.
9527 */
9528 pa_clear_bits(pa, bits);
9529 return;
9530 }
9531
9532 if (arm_force_fast_fault_with_flush_range(pn, allow_mode, options, flush_range)) {
9533 pa_clear_bits(pa, bits);
9534 }
9535 }
9536
9537 MARK_AS_PMAP_TEXT static void
9538 phys_attribute_clear_internal(
9539 ppnum_t pn,
9540 unsigned int bits,
9541 int options,
9542 void *arg)
9543 {
9544 phys_attribute_clear_with_flush_range(pn, bits, options, arg, NULL);
9545 }
9546
9547 #if __ARM_RANGE_TLBI__
9548 MARK_AS_PMAP_TEXT static void
9549 phys_attribute_clear_twig_internal(
9550 pmap_t pmap,
9551 vm_map_address_t start,
9552 vm_map_address_t end,
9553 unsigned int bits,
9554 unsigned int options,
9555 pmap_tlb_flush_range_t *flush_range)
9556 {
9557 pmap_assert_locked_r(pmap);
9558 const pt_attr_t * const pt_attr = pmap_get_pt_attr(pmap);
9559 assert(end >= start);
9560 assert((end - start) <= pt_attr_twig_size(pt_attr));
9561 pt_entry_t *pte_p, *start_pte_p, *end_pte_p, *curr_pte_p;
9562 tt_entry_t *tte_p;
9563 tte_p = pmap_tte(pmap, start);
9564
9565 if (tte_p == (tt_entry_t *) NULL) {
9566 return;
9567 }
9568
9569 if ((*tte_p & ARM_TTE_TYPE_MASK) == ARM_TTE_TYPE_TABLE) {
9570 pte_p = (pt_entry_t *) ttetokv(*tte_p);
9571
9572 start_pte_p = &pte_p[pte_index(pmap, pt_attr, start)];
9573 end_pte_p = start_pte_p + ((end - start) >> pt_attr_leaf_shift(pt_attr));
9574 assert(end_pte_p >= start_pte_p);
9575 for (curr_pte_p = start_pte_p; curr_pte_p < end_pte_p; curr_pte_p++) {
9576 pmap_paddr_t pa = pte_to_pa(*curr_pte_p);
9577 if (pa_valid(pa)) {
9578 ppnum_t pn = (ppnum_t) atop(pa);
9579 phys_attribute_clear_with_flush_range(pn, bits, options, NULL, flush_range);
9580 }
9581 }
9582 }
9583 }
9584
9585 MARK_AS_PMAP_TEXT static void
9586 phys_attribute_clear_range_internal(
9587 pmap_t pmap,
9588 vm_map_address_t start,
9589 vm_map_address_t end,
9590 unsigned int bits,
9591 unsigned int options)
9592 {
9593 if (__improbable(end < start)) {
9594 panic("%s: invalid address range %p, %p", __func__, (void*)start, (void*)end);
9595 }
9596 VALIDATE_PMAP(pmap);
9597
9598 vm_map_address_t va = start;
9599 pmap_tlb_flush_range_t flush_range = {
9600 .ptfr_pmap = pmap,
9601 .ptfr_start = start,
9602 .ptfr_end = end,
9603 .ptfr_flush_needed = false
9604 };
9605
9606 pmap_lock_ro(pmap);
9607 const pt_attr_t * const pt_attr = pmap_get_pt_attr(pmap);
9608
9609 while (va < end) {
9610 vm_map_address_t curr_end;
9611
9612 curr_end = ((va + pt_attr_twig_size(pt_attr)) & ~pt_attr_twig_offmask(pt_attr));
9613 if (curr_end > end) {
9614 curr_end = end;
9615 }
9616
9617 phys_attribute_clear_twig_internal(pmap, va, curr_end, bits, options, &flush_range);
9618 va = curr_end;
9619 }
9620 pmap_unlock_ro(pmap);
9621 if (flush_range.ptfr_flush_needed) {
9622 pmap_get_pt_ops(pmap)->flush_tlb_region_async(
9623 flush_range.ptfr_start,
9624 flush_range.ptfr_end - flush_range.ptfr_start,
9625 flush_range.ptfr_pmap);
9626 sync_tlb_flush();
9627 }
9628 }
9629
9630 static void
9631 phys_attribute_clear_range(
9632 pmap_t pmap,
9633 vm_map_address_t start,
9634 vm_map_address_t end,
9635 unsigned int bits,
9636 unsigned int options)
9637 {
9638 PMAP_TRACE(3, PMAP_CODE(PMAP__ATTRIBUTE_CLEAR_RANGE) | DBG_FUNC_START, bits);
9639
9640 #if XNU_MONITOR
9641 phys_attribute_clear_range_ppl(pmap, start, end, bits, options);
9642 #else
9643 phys_attribute_clear_range_internal(pmap, start, end, bits, options);
9644 #endif
9645
9646 PMAP_TRACE(3, PMAP_CODE(PMAP__ATTRIBUTE_CLEAR_RANGE) | DBG_FUNC_END);
9647 }
9648 #endif /* __ARM_RANGE_TLBI__ */
9649
9650 static void
9651 phys_attribute_clear(
9652 ppnum_t pn,
9653 unsigned int bits,
9654 int options,
9655 void *arg)
9656 {
9657 /*
9658 * Do we really want this tracepoint? It will be extremely chatty.
9659 * Also, should we have a corresponding trace point for the set path?
9660 */
9661 PMAP_TRACE(3, PMAP_CODE(PMAP__ATTRIBUTE_CLEAR) | DBG_FUNC_START, pn, bits);
9662
9663 #if XNU_MONITOR
9664 phys_attribute_clear_ppl(pn, bits, options, arg);
9665 #else
9666 phys_attribute_clear_internal(pn, bits, options, arg);
9667 #endif
9668
9669 PMAP_TRACE(3, PMAP_CODE(PMAP__ATTRIBUTE_CLEAR) | DBG_FUNC_END);
9670 }
9671
9672 /*
9673 * Set specified attribute bits.
9674 *
9675 * Set cached value in the pv head because we have
9676 * no per-mapping hardware support for referenced and
9677 * modify bits.
9678 */
9679 MARK_AS_PMAP_TEXT static void
9680 phys_attribute_set_internal(
9681 ppnum_t pn,
9682 unsigned int bits)
9683 {
9684 pmap_paddr_t pa = ptoa(pn);
9685 assert(pn != vm_page_fictitious_addr);
9686
9687 #if XNU_MONITOR
9688 if (bits & PP_ATTR_PPL_OWNED_BITS) {
9689 panic("%s: illegal request, "
9690 "pn=%u, bits=%#x",
9691 __FUNCTION__,
9692 pn, bits);
9693 }
9694 #endif
9695
9696 pa_set_bits(pa, (uint16_t)bits);
9697
9698 return;
9699 }
9700
9701 static void
9702 phys_attribute_set(
9703 ppnum_t pn,
9704 unsigned int bits)
9705 {
9706 #if XNU_MONITOR
9707 phys_attribute_set_ppl(pn, bits);
9708 #else
9709 phys_attribute_set_internal(pn, bits);
9710 #endif
9711 }
9712
9713
9714 /*
9715 * Check specified attribute bits.
9716 *
9717 * use the software cached bits (since no hw support).
9718 */
9719 static boolean_t
9720 phys_attribute_test(
9721 ppnum_t pn,
9722 unsigned int bits)
9723 {
9724 pmap_paddr_t pa = ptoa(pn);
9725 assert(pn != vm_page_fictitious_addr);
9726 return pa_test_bits(pa, bits);
9727 }
9728
9729
9730 /*
9731 * Set the modify/reference bits on the specified physical page.
9732 */
9733 void
9734 pmap_set_modify(ppnum_t pn)
9735 {
9736 phys_attribute_set(pn, PP_ATTR_MODIFIED);
9737 }
9738
9739
9740 /*
9741 * Clear the modify bits on the specified physical page.
9742 */
9743 void
9744 pmap_clear_modify(
9745 ppnum_t pn)
9746 {
9747 phys_attribute_clear(pn, PP_ATTR_MODIFIED, 0, NULL);
9748 }
9749
9750
9751 /*
9752 * pmap_is_modified:
9753 *
9754 * Return whether or not the specified physical page is modified
9755 * by any physical maps.
9756 */
9757 boolean_t
9758 pmap_is_modified(
9759 ppnum_t pn)
9760 {
9761 return phys_attribute_test(pn, PP_ATTR_MODIFIED);
9762 }
9763
9764
9765 /*
9766 * Set the reference bit on the specified physical page.
9767 */
9768 static void
9769 pmap_set_reference(
9770 ppnum_t pn)
9771 {
9772 phys_attribute_set(pn, PP_ATTR_REFERENCED);
9773 }
9774
9775 /*
9776 * Clear the reference bits on the specified physical page.
9777 */
9778 void
9779 pmap_clear_reference(
9780 ppnum_t pn)
9781 {
9782 phys_attribute_clear(pn, PP_ATTR_REFERENCED, 0, NULL);
9783 }
9784
9785
9786 /*
9787 * pmap_is_referenced:
9788 *
9789 * Return whether or not the specified physical page is referenced
9790 * by any physical maps.
9791 */
9792 boolean_t
9793 pmap_is_referenced(
9794 ppnum_t pn)
9795 {
9796 return phys_attribute_test(pn, PP_ATTR_REFERENCED);
9797 }
9798
9799 /*
9800 * pmap_get_refmod(phys)
9801 * returns the referenced and modified bits of the specified
9802 * physical page.
9803 */
9804 unsigned int
9805 pmap_get_refmod(
9806 ppnum_t pn)
9807 {
9808 return ((phys_attribute_test(pn, PP_ATTR_MODIFIED)) ? VM_MEM_MODIFIED : 0)
9809 | ((phys_attribute_test(pn, PP_ATTR_REFERENCED)) ? VM_MEM_REFERENCED : 0);
9810 }
9811
9812 static inline unsigned int
9813 pmap_clear_refmod_mask_to_modified_bits(const unsigned int mask)
9814 {
9815 return ((mask & VM_MEM_MODIFIED) ? PP_ATTR_MODIFIED : 0) |
9816 ((mask & VM_MEM_REFERENCED) ? PP_ATTR_REFERENCED : 0);
9817 }
9818
9819 /*
9820 * pmap_clear_refmod(phys, mask)
9821 * clears the referenced and modified bits as specified by the mask
9822 * of the specified physical page.
9823 */
9824 void
9825 pmap_clear_refmod_options(
9826 ppnum_t pn,
9827 unsigned int mask,
9828 unsigned int options,
9829 void *arg)
9830 {
9831 unsigned int bits;
9832
9833 bits = pmap_clear_refmod_mask_to_modified_bits(mask);
9834 phys_attribute_clear(pn, bits, options, arg);
9835 }
9836
9837 /*
9838 * Perform pmap_clear_refmod_options on a virtual address range.
9839 * The operation will be performed in bulk & tlb flushes will be coalesced
9840 * if possible.
9841 *
9842 * Returns true if the operation is supported on this platform.
9843 * If this function returns false, the operation is not supported and
9844 * nothing has been modified in the pmap.
9845 */
9846 bool
9847 pmap_clear_refmod_range_options(
9848 pmap_t pmap __unused,
9849 vm_map_address_t start __unused,
9850 vm_map_address_t end __unused,
9851 unsigned int mask __unused,
9852 unsigned int options __unused)
9853 {
9854 #if __ARM_RANGE_TLBI__
9855 unsigned int bits;
9856 bits = pmap_clear_refmod_mask_to_modified_bits(mask);
9857 phys_attribute_clear_range(pmap, start, end, bits, options);
9858 return true;
9859 #else /* __ARM_RANGE_TLBI__ */
9860 #pragma unused(pmap, start, end, mask, options)
9861 /*
9862 * This operation allows the VM to bulk modify refmod bits on a virtually
9863 * contiguous range of addresses. This is large performance improvement on
9864 * platforms that support ranged tlbi instructions. But on older platforms,
9865 * we can only flush per-page or the entire asid. So we currently
9866 * only support this operation on platforms that support ranged tlbi.
9867 * instructions. On other platforms, we require that
9868 * the VM modify the bits on a per-page basis.
9869 */
9870 return false;
9871 #endif /* __ARM_RANGE_TLBI__ */
9872 }
9873
9874 void
9875 pmap_clear_refmod(
9876 ppnum_t pn,
9877 unsigned int mask)
9878 {
9879 pmap_clear_refmod_options(pn, mask, 0, NULL);
9880 }
9881
9882 unsigned int
9883 pmap_disconnect_options(
9884 ppnum_t pn,
9885 unsigned int options,
9886 void *arg)
9887 {
9888 if ((options & PMAP_OPTIONS_COMPRESSOR_IFF_MODIFIED)) {
9889 /*
9890 * On ARM, the "modified" bit is managed by software, so
9891 * we know up-front if the physical page is "modified",
9892 * without having to scan all the PTEs pointing to it.
9893 * The caller should have made the VM page "busy" so noone
9894 * should be able to establish any new mapping and "modify"
9895 * the page behind us.
9896 */
9897 if (pmap_is_modified(pn)) {
9898 /*
9899 * The page has been modified and will be sent to
9900 * the VM compressor.
9901 */
9902 options |= PMAP_OPTIONS_COMPRESSOR;
9903 } else {
9904 /*
9905 * The page hasn't been modified and will be freed
9906 * instead of compressed.
9907 */
9908 }
9909 }
9910
9911 /* disconnect the page */
9912 pmap_page_protect_options(pn, 0, options, arg);
9913
9914 /* return ref/chg status */
9915 return pmap_get_refmod(pn);
9916 }
9917
9918 /*
9919 * Routine:
9920 * pmap_disconnect
9921 *
9922 * Function:
9923 * Disconnect all mappings for this page and return reference and change status
9924 * in generic format.
9925 *
9926 */
9927 unsigned int
9928 pmap_disconnect(
9929 ppnum_t pn)
9930 {
9931 pmap_page_protect(pn, 0); /* disconnect the page */
9932 return pmap_get_refmod(pn); /* return ref/chg status */
9933 }
9934
9935 boolean_t
9936 pmap_has_managed_page(ppnum_t first, ppnum_t last)
9937 {
9938 if (ptoa(first) >= vm_last_phys) {
9939 return FALSE;
9940 }
9941 if (ptoa(last) < vm_first_phys) {
9942 return FALSE;
9943 }
9944
9945 return TRUE;
9946 }
9947
9948 /*
9949 * The state maintained by the noencrypt functions is used as a
9950 * debugging aid on ARM. This incurs some overhead on the part
9951 * of the caller. A special case check in phys_attribute_clear
9952 * (the most expensive path) currently minimizes this overhead,
9953 * but stubbing these functions out on RELEASE kernels yields
9954 * further wins.
9955 */
9956 boolean_t
9957 pmap_is_noencrypt(
9958 ppnum_t pn)
9959 {
9960 #if DEVELOPMENT || DEBUG
9961 boolean_t result = FALSE;
9962
9963 if (!pa_valid(ptoa(pn))) {
9964 return FALSE;
9965 }
9966
9967 result = (phys_attribute_test(pn, PP_ATTR_NOENCRYPT));
9968
9969 return result;
9970 #else
9971 #pragma unused(pn)
9972 return FALSE;
9973 #endif
9974 }
9975
9976 void
9977 pmap_set_noencrypt(
9978 ppnum_t pn)
9979 {
9980 #if DEVELOPMENT || DEBUG
9981 if (!pa_valid(ptoa(pn))) {
9982 return;
9983 }
9984
9985 phys_attribute_set(pn, PP_ATTR_NOENCRYPT);
9986 #else
9987 #pragma unused(pn)
9988 #endif
9989 }
9990
9991 void
9992 pmap_clear_noencrypt(
9993 ppnum_t pn)
9994 {
9995 #if DEVELOPMENT || DEBUG
9996 if (!pa_valid(ptoa(pn))) {
9997 return;
9998 }
9999
10000 phys_attribute_clear(pn, PP_ATTR_NOENCRYPT, 0, NULL);
10001 #else
10002 #pragma unused(pn)
10003 #endif
10004 }
10005
10006 #if XNU_MONITOR
10007 boolean_t
10008 pmap_is_monitor(ppnum_t pn)
10009 {
10010 assert(pa_valid(ptoa(pn)));
10011 return phys_attribute_test(pn, PP_ATTR_MONITOR);
10012 }
10013 #endif
10014
10015 void
10016 pmap_lock_phys_page(ppnum_t pn)
10017 {
10018 #if !XNU_MONITOR
10019 int pai;
10020 pmap_paddr_t phys = ptoa(pn);
10021
10022 if (pa_valid(phys)) {
10023 pai = (int)pa_index(phys);
10024 LOCK_PVH(pai);
10025 } else
10026 #else
10027 (void)pn;
10028 #endif
10029 { simple_lock(&phys_backup_lock, LCK_GRP_NULL);}
10030 }
10031
10032
10033 void
10034 pmap_unlock_phys_page(ppnum_t pn)
10035 {
10036 #if !XNU_MONITOR
10037 int pai;
10038 pmap_paddr_t phys = ptoa(pn);
10039
10040 if (pa_valid(phys)) {
10041 pai = (int)pa_index(phys);
10042 UNLOCK_PVH(pai);
10043 } else
10044 #else
10045 (void)pn;
10046 #endif
10047 { simple_unlock(&phys_backup_lock);}
10048 }
10049
10050 MARK_AS_PMAP_TEXT static void
10051 pmap_switch_user_ttb_internal(
10052 pmap_t pmap)
10053 {
10054 VALIDATE_PMAP(pmap);
10055 pmap_cpu_data_t *cpu_data_ptr;
10056 cpu_data_ptr = pmap_get_cpu_data();
10057
10058 #if (__ARM_VMSA__ == 7)
10059 cpu_data_ptr->cpu_user_pmap = pmap;
10060 cpu_data_ptr->cpu_user_pmap_stamp = pmap->stamp;
10061
10062 #if MACH_ASSERT && __ARM_USER_PROTECT__
10063 {
10064 unsigned int ttbr0_val, ttbr1_val;
10065 __asm__ volatile ("mrc p15,0,%0,c2,c0,0\n" : "=r"(ttbr0_val));
10066 __asm__ volatile ("mrc p15,0,%0,c2,c0,1\n" : "=r"(ttbr1_val));
10067 if (ttbr0_val != ttbr1_val) {
10068 panic("Misaligned ttbr0 %08X\n", ttbr0_val);
10069 }
10070 if (pmap->ttep & 0x1000) {
10071 panic("Misaligned ttbr0 %08X\n", pmap->ttep);
10072 }
10073 }
10074 #endif
10075 #if !__ARM_USER_PROTECT__
10076 set_mmu_ttb(pmap->ttep);
10077 set_context_id(pmap->hw_asid);
10078 #endif
10079
10080 #else /* (__ARM_VMSA__ == 7) */
10081
10082 if (pmap != kernel_pmap) {
10083 cpu_data_ptr->cpu_nested_pmap = pmap->nested_pmap;
10084 cpu_data_ptr->cpu_nested_pmap_attr = (cpu_data_ptr->cpu_nested_pmap == NULL) ?
10085 NULL : pmap_get_pt_attr(cpu_data_ptr->cpu_nested_pmap);
10086 cpu_data_ptr->cpu_nested_region_addr = pmap->nested_region_addr;
10087 cpu_data_ptr->cpu_nested_region_size = pmap->nested_region_size;
10088 }
10089
10090
10091 #if __ARM_MIXED_PAGE_SIZE__
10092 if ((pmap != kernel_pmap) && (pmap_get_pt_attr(pmap)->pta_tcr_value != get_tcr())) {
10093 set_tcr(pmap_get_pt_attr(pmap)->pta_tcr_value);
10094 }
10095 #endif /* __ARM_MIXED_PAGE_SIZE__ */
10096
10097 if (pmap != kernel_pmap) {
10098 set_mmu_ttb((pmap->ttep & TTBR_BADDR_MASK) | (((uint64_t)pmap->hw_asid) << TTBR_ASID_SHIFT));
10099 } else if (!pmap_user_ttb_is_clear()) {
10100 pmap_clear_user_ttb_internal();
10101 }
10102
10103 #if defined(HAS_APPLE_PAC) && (__APCFG_SUPPORTED__ || __APSTS_SUPPORTED__)
10104 if (!arm_user_jop_disabled()) {
10105 uint64_t sctlr = __builtin_arm_rsr64("SCTLR_EL1");
10106 bool jop_enabled = sctlr & SCTLR_JOP_KEYS_ENABLED;
10107 if (!jop_enabled && !pmap->disable_jop) {
10108 // turn on JOP
10109 sctlr |= SCTLR_JOP_KEYS_ENABLED;
10110 __builtin_arm_wsr64("SCTLR_EL1", sctlr);
10111 arm_context_switch_requires_sync();
10112 } else if (jop_enabled && pmap->disable_jop) {
10113 // turn off JOP
10114 sctlr &= ~SCTLR_JOP_KEYS_ENABLED;
10115 __builtin_arm_wsr64("SCTLR_EL1", sctlr);
10116 arm_context_switch_requires_sync();
10117 }
10118 }
10119 #endif /* HAS_APPLE_PAC && (__APCFG_SUPPORTED__ || __APSTS_SUPPORTED__) */
10120 #endif /* (__ARM_VMSA__ == 7) */
10121 }
10122
10123 void
10124 pmap_switch_user_ttb(
10125 pmap_t pmap)
10126 {
10127 PMAP_TRACE(1, PMAP_CODE(PMAP__SWITCH_USER_TTB) | DBG_FUNC_START, VM_KERNEL_ADDRHIDE(pmap), PMAP_VASID(pmap), pmap->hw_asid);
10128 #if XNU_MONITOR
10129 pmap_switch_user_ttb_ppl(pmap);
10130 #else
10131 pmap_switch_user_ttb_internal(pmap);
10132 #endif
10133 PMAP_TRACE(1, PMAP_CODE(PMAP__SWITCH_USER_TTB) | DBG_FUNC_END);
10134 }
10135
10136 MARK_AS_PMAP_TEXT static void
10137 pmap_clear_user_ttb_internal(void)
10138 {
10139 #if (__ARM_VMSA__ > 7)
10140 set_mmu_ttb(invalid_ttep & TTBR_BADDR_MASK);
10141 #else
10142 set_mmu_ttb(kernel_pmap->ttep);
10143 #endif
10144 }
10145
10146 void
10147 pmap_clear_user_ttb(void)
10148 {
10149 PMAP_TRACE(3, PMAP_CODE(PMAP__CLEAR_USER_TTB) | DBG_FUNC_START, NULL, 0, 0);
10150 #if XNU_MONITOR
10151 pmap_clear_user_ttb_ppl();
10152 #else
10153 pmap_clear_user_ttb_internal();
10154 #endif
10155 PMAP_TRACE(3, PMAP_CODE(PMAP__CLEAR_USER_TTB) | DBG_FUNC_END);
10156 }
10157
10158 MARK_AS_PMAP_TEXT static boolean_t
10159 arm_force_fast_fault_with_flush_range(
10160 ppnum_t ppnum,
10161 vm_prot_t allow_mode,
10162 int options,
10163 pmap_tlb_flush_range_t *flush_range)
10164 {
10165 pmap_paddr_t phys = ptoa(ppnum);
10166 pv_entry_t *pve_p;
10167 pt_entry_t *pte_p;
10168 int pai;
10169 boolean_t result;
10170 pv_entry_t **pv_h;
10171 boolean_t is_reusable, is_internal;
10172 boolean_t tlb_flush_needed = FALSE;
10173 boolean_t ref_fault;
10174 boolean_t mod_fault;
10175 boolean_t clear_write_fault = FALSE;
10176 boolean_t ref_aliases_mod = FALSE;
10177 bool mustsynch = ((options & PMAP_OPTIONS_FF_LOCKED) == 0);
10178
10179 assert(ppnum != vm_page_fictitious_addr);
10180
10181 if (!pa_valid(phys)) {
10182 return FALSE; /* Not a managed page. */
10183 }
10184
10185 result = TRUE;
10186 ref_fault = FALSE;
10187 mod_fault = FALSE;
10188 pai = (int)pa_index(phys);
10189 if (__probable(mustsynch)) {
10190 LOCK_PVH(pai);
10191 }
10192 pv_h = pai_to_pvh(pai);
10193
10194 pte_p = PT_ENTRY_NULL;
10195 pve_p = PV_ENTRY_NULL;
10196 if (pvh_test_type(pv_h, PVH_TYPE_PTEP)) {
10197 pte_p = pvh_ptep(pv_h);
10198 } else if (pvh_test_type(pv_h, PVH_TYPE_PVEP)) {
10199 pve_p = pvh_list(pv_h);
10200 }
10201
10202 is_reusable = IS_REUSABLE_PAGE(pai);
10203 is_internal = IS_INTERNAL_PAGE(pai);
10204
10205 while ((pve_p != PV_ENTRY_NULL) || (pte_p != PT_ENTRY_NULL)) {
10206 vm_map_address_t va;
10207 pt_entry_t spte;
10208 pt_entry_t tmplate;
10209 pmap_t pmap;
10210 boolean_t update_pte;
10211
10212 if (pve_p != PV_ENTRY_NULL) {
10213 pte_p = pve_get_ptep(pve_p);
10214 }
10215
10216 if (pte_p == PT_ENTRY_NULL) {
10217 panic("pte_p is NULL: pve_p=%p ppnum=0x%x\n", pve_p, ppnum);
10218 }
10219 #ifdef PVH_FLAG_IOMMU
10220 if ((vm_offset_t)pte_p & PVH_FLAG_IOMMU) {
10221 goto fff_skip_pve;
10222 }
10223 #endif
10224 if (*pte_p == ARM_PTE_EMPTY) {
10225 panic("pte is NULL: pte_p=%p ppnum=0x%x\n", pte_p, ppnum);
10226 }
10227 if (ARM_PTE_IS_COMPRESSED(*pte_p, pte_p)) {
10228 panic("pte is COMPRESSED: pte_p=%p ppnum=0x%x\n", pte_p, ppnum);
10229 }
10230
10231 pmap = ptep_get_pmap(pte_p);
10232 const pt_attr_t * pt_attr = pmap_get_pt_attr(pmap);
10233 va = ptep_get_va(pte_p);
10234
10235 assert(va >= pmap->min && va < pmap->max);
10236
10237 /* update pmap stats and ledgers */
10238 if (IS_ALTACCT_PAGE(pai, pve_p)) {
10239 /*
10240 * We do not track "reusable" status for
10241 * "alternate accounting" mappings.
10242 */
10243 } else if ((options & PMAP_OPTIONS_CLEAR_REUSABLE) &&
10244 is_reusable &&
10245 is_internal &&
10246 pmap != kernel_pmap) {
10247 /* one less "reusable" */
10248 __assert_only int32_t orig_reusable = OSAddAtomic(-1, &pmap->stats.reusable);
10249 PMAP_STATS_ASSERTF(orig_reusable > 0, pmap, "stats.reusable %d", orig_reusable);
10250 /* one more "internal" */
10251 __assert_only int32_t orig_internal = OSAddAtomic(+1, &pmap->stats.internal);
10252 PMAP_STATS_PEAK(pmap->stats.internal);
10253 PMAP_STATS_ASSERTF(orig_internal >= 0, pmap, "stats.internal %d", orig_internal);
10254 pmap_ledger_credit(pmap, task_ledgers.internal, pt_attr_page_size(pt_attr) * PAGE_RATIO);
10255 assert(!IS_ALTACCT_PAGE(pai, pve_p));
10256 assert(IS_INTERNAL_PAGE(pai));
10257 pmap_ledger_credit(pmap, task_ledgers.phys_footprint, pt_attr_page_size(pt_attr) * PAGE_RATIO);
10258
10259 /*
10260 * Since the page is being marked non-reusable, we assume that it will be
10261 * modified soon. Avoid the cost of another trap to handle the fast
10262 * fault when we next write to this page.
10263 */
10264 clear_write_fault = TRUE;
10265 } else if ((options & PMAP_OPTIONS_SET_REUSABLE) &&
10266 !is_reusable &&
10267 is_internal &&
10268 pmap != kernel_pmap) {
10269 /* one more "reusable" */
10270 __assert_only int32_t orig_reusable = OSAddAtomic(+1, &pmap->stats.reusable);
10271 PMAP_STATS_PEAK(pmap->stats.reusable);
10272 PMAP_STATS_ASSERTF(orig_reusable >= 0, pmap, "stats.reusable %d", orig_reusable);
10273 /* one less "internal" */
10274 __assert_only int32_t orig_internal = OSAddAtomic(-1, &pmap->stats.internal);
10275 PMAP_STATS_ASSERTF(orig_internal > 0, pmap, "stats.internal %d", orig_internal);
10276 pmap_ledger_debit(pmap, task_ledgers.internal, pt_attr_page_size(pt_attr) * PAGE_RATIO);
10277 assert(!IS_ALTACCT_PAGE(pai, pve_p));
10278 assert(IS_INTERNAL_PAGE(pai));
10279 pmap_ledger_debit(pmap, task_ledgers.phys_footprint, pt_attr_page_size(pt_attr) * PAGE_RATIO);
10280 }
10281
10282 bool wiredskip = pte_is_wired(*pte_p) &&
10283 ((options & PMAP_OPTIONS_FF_WIRED) == 0);
10284
10285 if (wiredskip) {
10286 result = FALSE;
10287 goto fff_skip_pve;
10288 }
10289
10290 spte = *pte_p;
10291 tmplate = spte;
10292 update_pte = FALSE;
10293
10294 if ((allow_mode & VM_PROT_READ) != VM_PROT_READ) {
10295 /* read protection sets the pte to fault */
10296 tmplate = tmplate & ~ARM_PTE_AF;
10297 update_pte = TRUE;
10298 ref_fault = TRUE;
10299 }
10300 if ((allow_mode & VM_PROT_WRITE) != VM_PROT_WRITE) {
10301 /* take away write permission if set */
10302 if (pmap == kernel_pmap) {
10303 if ((tmplate & ARM_PTE_APMASK) == ARM_PTE_AP(AP_RWNA)) {
10304 tmplate = ((tmplate & ~ARM_PTE_APMASK) | ARM_PTE_AP(AP_RONA));
10305 pte_set_was_writeable(tmplate, true);
10306 update_pte = TRUE;
10307 mod_fault = TRUE;
10308 }
10309 } else {
10310 if ((tmplate & ARM_PTE_APMASK) == pt_attr_leaf_rw(pt_attr)) {
10311 tmplate = ((tmplate & ~ARM_PTE_APMASK) | pt_attr_leaf_ro(pt_attr));
10312 pte_set_was_writeable(tmplate, true);
10313 update_pte = TRUE;
10314 mod_fault = TRUE;
10315 }
10316 }
10317 }
10318
10319 #if MACH_ASSERT && XNU_MONITOR
10320 if (is_pte_xprr_protected(pmap, spte)) {
10321 if (pte_to_xprr_perm(spte) != pte_to_xprr_perm(tmplate)) {
10322 panic("%s: attempted to mutate an xPRR mapping pte_p=%p, pmap=%p, pv_h=%p, pve_p=%p, pte=0x%llx, tmplate=0x%llx, va=0x%llx, "
10323 "ppnum=0x%x, options=0x%x, allow_mode=0x%x",
10324 __FUNCTION__, pte_p, pmap, pv_h, pve_p, (unsigned long long)spte, (unsigned long long)tmplate, (unsigned long long)va,
10325 ppnum, options, allow_mode);
10326 }
10327 }
10328 #endif /* MACH_ASSERT && XNU_MONITOR */
10329
10330 if (result && update_pte) {
10331 if (*pte_p != ARM_PTE_TYPE_FAULT &&
10332 !ARM_PTE_IS_COMPRESSED(*pte_p, pte_p)) {
10333 WRITE_PTE_STRONG(pte_p, tmplate);
10334 if (!flush_range ||
10335 ((flush_range->ptfr_pmap != pmap) || va >= flush_range->ptfr_end || va < flush_range->ptfr_start)) {
10336 pmap_get_pt_ops(pmap)->flush_tlb_region_async(va,
10337 pt_attr_page_size(pt_attr) * PAGE_RATIO, pmap);
10338 }
10339 tlb_flush_needed = TRUE;
10340 } else {
10341 WRITE_PTE(pte_p, tmplate);
10342 __builtin_arm_isb(ISB_SY);
10343 }
10344 }
10345
10346 fff_skip_pve:
10347 pte_p = PT_ENTRY_NULL;
10348 if (pve_p != PV_ENTRY_NULL) {
10349 pve_p = PVE_NEXT_PTR(pve_next(pve_p));
10350 }
10351 }
10352
10353 /*
10354 * If we are using the same approach for ref and mod
10355 * faults on this PTE, do not clear the write fault;
10356 * this would cause both ref and mod to be set on the
10357 * page again, and prevent us from taking ANY read/write
10358 * fault on the mapping.
10359 */
10360 if (clear_write_fault && !ref_aliases_mod) {
10361 arm_clear_fast_fault(ppnum, VM_PROT_WRITE);
10362 }
10363 if (tlb_flush_needed) {
10364 if (flush_range) {
10365 /* Delayed flush. Signal to the caller that the flush is needed. */
10366 flush_range->ptfr_flush_needed = true;
10367 } else {
10368 sync_tlb_flush();
10369 }
10370 }
10371
10372 /* update global "reusable" status for this page */
10373 if (is_internal) {
10374 if ((options & PMAP_OPTIONS_CLEAR_REUSABLE) &&
10375 is_reusable) {
10376 CLR_REUSABLE_PAGE(pai);
10377 } else if ((options & PMAP_OPTIONS_SET_REUSABLE) &&
10378 !is_reusable) {
10379 SET_REUSABLE_PAGE(pai);
10380 }
10381 }
10382
10383 if (mod_fault) {
10384 SET_MODFAULT_PAGE(pai);
10385 }
10386 if (ref_fault) {
10387 SET_REFFAULT_PAGE(pai);
10388 }
10389 if (__probable(mustsynch)) {
10390 UNLOCK_PVH(pai);
10391 }
10392 return result;
10393 }
10394
10395 MARK_AS_PMAP_TEXT static boolean_t
10396 arm_force_fast_fault_internal(
10397 ppnum_t ppnum,
10398 vm_prot_t allow_mode,
10399 int options)
10400 {
10401 if (__improbable((options & PMAP_OPTIONS_FF_LOCKED) != 0)) {
10402 panic("arm_force_fast_fault(0x%x, 0x%x, 0x%x): invalid options", ppnum, allow_mode, options);
10403 }
10404 return arm_force_fast_fault_with_flush_range(ppnum, allow_mode, options, NULL);
10405 }
10406
10407 /*
10408 * Routine: arm_force_fast_fault
10409 *
10410 * Function:
10411 * Force all mappings for this page to fault according
10412 * to the access modes allowed, so we can gather ref/modify
10413 * bits again.
10414 */
10415
10416 boolean_t
10417 arm_force_fast_fault(
10418 ppnum_t ppnum,
10419 vm_prot_t allow_mode,
10420 int options,
10421 __unused void *arg)
10422 {
10423 pmap_paddr_t phys = ptoa(ppnum);
10424
10425 assert(ppnum != vm_page_fictitious_addr);
10426
10427 if (!pa_valid(phys)) {
10428 return FALSE; /* Not a managed page. */
10429 }
10430
10431 #if XNU_MONITOR
10432 return arm_force_fast_fault_ppl(ppnum, allow_mode, options);
10433 #else
10434 return arm_force_fast_fault_internal(ppnum, allow_mode, options);
10435 #endif
10436 }
10437
10438 /*
10439 * Routine: arm_clear_fast_fault
10440 *
10441 * Function:
10442 * Clear pending force fault for all mappings for this page based on
10443 * the observed fault type, update ref/modify bits.
10444 */
10445 MARK_AS_PMAP_TEXT static boolean_t
10446 arm_clear_fast_fault(
10447 ppnum_t ppnum,
10448 vm_prot_t fault_type)
10449 {
10450 pmap_paddr_t pa = ptoa(ppnum);
10451 pv_entry_t *pve_p;
10452 pt_entry_t *pte_p;
10453 int pai;
10454 boolean_t result;
10455 boolean_t tlb_flush_needed = FALSE;
10456 pv_entry_t **pv_h;
10457
10458 assert(ppnum != vm_page_fictitious_addr);
10459
10460 if (!pa_valid(pa)) {
10461 return FALSE; /* Not a managed page. */
10462 }
10463
10464 result = FALSE;
10465 pai = (int)pa_index(pa);
10466 ASSERT_PVH_LOCKED(pai);
10467 pv_h = pai_to_pvh(pai);
10468
10469 pte_p = PT_ENTRY_NULL;
10470 pve_p = PV_ENTRY_NULL;
10471 if (pvh_test_type(pv_h, PVH_TYPE_PTEP)) {
10472 pte_p = pvh_ptep(pv_h);
10473 } else if (pvh_test_type(pv_h, PVH_TYPE_PVEP)) {
10474 pve_p = pvh_list(pv_h);
10475 }
10476
10477 while ((pve_p != PV_ENTRY_NULL) || (pte_p != PT_ENTRY_NULL)) {
10478 vm_map_address_t va;
10479 pt_entry_t spte;
10480 pt_entry_t tmplate;
10481 pmap_t pmap;
10482
10483 if (pve_p != PV_ENTRY_NULL) {
10484 pte_p = pve_get_ptep(pve_p);
10485 }
10486
10487 if (pte_p == PT_ENTRY_NULL) {
10488 panic("pte_p is NULL: pve_p=%p ppnum=0x%x\n", pve_p, ppnum);
10489 }
10490 #ifdef PVH_FLAG_IOMMU
10491 if ((vm_offset_t)pte_p & PVH_FLAG_IOMMU) {
10492 goto cff_skip_pve;
10493 }
10494 #endif
10495 if (*pte_p == ARM_PTE_EMPTY) {
10496 panic("pte is NULL: pte_p=%p ppnum=0x%x\n", pte_p, ppnum);
10497 }
10498
10499 pmap = ptep_get_pmap(pte_p);
10500 va = ptep_get_va(pte_p);
10501
10502 assert(va >= pmap->min && va < pmap->max);
10503
10504 spte = *pte_p;
10505 tmplate = spte;
10506
10507 if ((fault_type & VM_PROT_WRITE) && (pte_was_writeable(spte))) {
10508 {
10509 if (pmap == kernel_pmap) {
10510 tmplate = ((spte & ~ARM_PTE_APMASK) | ARM_PTE_AP(AP_RWNA));
10511 } else {
10512 tmplate = ((spte & ~ARM_PTE_APMASK) | pt_attr_leaf_rw(pmap_get_pt_attr(pmap)));
10513 }
10514 }
10515
10516 tmplate |= ARM_PTE_AF;
10517
10518 pte_set_was_writeable(tmplate, false);
10519 pa_set_bits(pa, PP_ATTR_REFERENCED | PP_ATTR_MODIFIED);
10520 } else if ((fault_type & VM_PROT_READ) && ((spte & ARM_PTE_AF) != ARM_PTE_AF)) {
10521 tmplate = spte | ARM_PTE_AF;
10522
10523 {
10524 pa_set_bits(pa, PP_ATTR_REFERENCED);
10525 }
10526 }
10527
10528 #if MACH_ASSERT && XNU_MONITOR
10529 if (is_pte_xprr_protected(pmap, spte)) {
10530 if (pte_to_xprr_perm(spte) != pte_to_xprr_perm(tmplate)) {
10531 panic("%s: attempted to mutate an xPRR mapping pte_p=%p, pmap=%p, pv_h=%p, pve_p=%p, pte=0x%llx, tmplate=0x%llx, va=0x%llx, "
10532 "ppnum=0x%x, fault_type=0x%x",
10533 __FUNCTION__, pte_p, pmap, pv_h, pve_p, (unsigned long long)spte, (unsigned long long)tmplate, (unsigned long long)va,
10534 ppnum, fault_type);
10535 }
10536 }
10537 #endif /* MACH_ASSERT && XNU_MONITOR */
10538
10539 if (spte != tmplate) {
10540 if (spte != ARM_PTE_TYPE_FAULT) {
10541 WRITE_PTE_STRONG(pte_p, tmplate);
10542 pmap_get_pt_ops(pmap)->flush_tlb_region_async(va,
10543 pt_attr_page_size(pmap_get_pt_attr(pmap)) * PAGE_RATIO, pmap);
10544 tlb_flush_needed = TRUE;
10545 } else {
10546 WRITE_PTE(pte_p, tmplate);
10547 __builtin_arm_isb(ISB_SY);
10548 }
10549 result = TRUE;
10550 }
10551
10552 #ifdef PVH_FLAG_IOMMU
10553 cff_skip_pve:
10554 #endif
10555 pte_p = PT_ENTRY_NULL;
10556 if (pve_p != PV_ENTRY_NULL) {
10557 pve_p = PVE_NEXT_PTR(pve_next(pve_p));
10558 }
10559 }
10560 if (tlb_flush_needed) {
10561 sync_tlb_flush();
10562 }
10563 return result;
10564 }
10565
10566 /*
10567 * Determine if the fault was induced by software tracking of
10568 * modify/reference bits. If so, re-enable the mapping (and set
10569 * the appropriate bits).
10570 *
10571 * Returns KERN_SUCCESS if the fault was induced and was
10572 * successfully handled.
10573 *
10574 * Returns KERN_FAILURE if the fault was not induced and
10575 * the function was unable to deal with it.
10576 *
10577 * Returns KERN_PROTECTION_FAILURE if the pmap layer explictly
10578 * disallows this type of access.
10579 */
10580 MARK_AS_PMAP_TEXT static kern_return_t
10581 arm_fast_fault_internal(
10582 pmap_t pmap,
10583 vm_map_address_t va,
10584 vm_prot_t fault_type,
10585 __unused bool was_af_fault,
10586 __unused bool from_user)
10587 {
10588 kern_return_t result = KERN_FAILURE;
10589 pt_entry_t *ptep;
10590 pt_entry_t spte = ARM_PTE_TYPE_FAULT;
10591 int pai;
10592 pmap_paddr_t pa;
10593 VALIDATE_PMAP(pmap);
10594
10595 pmap_lock(pmap);
10596
10597 /*
10598 * If the entry doesn't exist, is completely invalid, or is already
10599 * valid, we can't fix it here.
10600 */
10601
10602 ptep = pmap_pte(pmap, va);
10603 if (ptep != PT_ENTRY_NULL) {
10604 while (true) {
10605 spte = *ptep;
10606
10607 pa = pte_to_pa(spte);
10608
10609 if ((spte == ARM_PTE_TYPE_FAULT) ||
10610 ARM_PTE_IS_COMPRESSED(spte, ptep)) {
10611 pmap_unlock(pmap);
10612 return result;
10613 }
10614
10615 if (!pa_valid(pa)) {
10616 pmap_unlock(pmap);
10617 #if XNU_MONITOR
10618 if (pmap_cache_attributes((ppnum_t)atop(pa)) & PP_ATTR_MONITOR) {
10619 return KERN_PROTECTION_FAILURE;
10620 } else
10621 #endif
10622 return result;
10623 }
10624 pai = (int)pa_index(pa);
10625 LOCK_PVH(pai);
10626 #if __APRR_SUPPORTED__
10627 if (*ptep == spte) {
10628 /*
10629 * Double-check the spte value, as we care
10630 * about the AF bit.
10631 */
10632 break;
10633 }
10634 UNLOCK_PVH(pai);
10635 #else /* !(__APRR_SUPPORTED__*/
10636 break;
10637 #endif /* !(__APRR_SUPPORTED__*/
10638 }
10639 } else {
10640 pmap_unlock(pmap);
10641 return result;
10642 }
10643
10644 #if __APRR_SUPPORTED__
10645 /* Check to see if this mapping had APRR restrictions. */
10646 if (is_pte_xprr_protected(pmap, spte)) {
10647 /*
10648 * We have faulted on an XPRR managed mapping; decide if the access should be
10649 * reattempted or if it should cause an exception. Now that all JIT entitled
10650 * task threads always have MPRR enabled we're only here because of
10651 * an AF fault or an actual permission fault. AF faults will have result
10652 * changed to KERN_SUCCESS below upon arm_clear_fast_fault return.
10653 */
10654 if (was_af_fault && (spte & ARM_PTE_AF)) {
10655 result = KERN_SUCCESS;
10656 goto out;
10657 } else {
10658 result = KERN_PROTECTION_FAILURE;
10659 }
10660 }
10661 #endif /* __APRR_SUPPORTED__*/
10662
10663 if ((IS_REFFAULT_PAGE(pai)) ||
10664 ((fault_type & VM_PROT_WRITE) && IS_MODFAULT_PAGE(pai))) {
10665 /*
10666 * An attempted access will always clear ref/mod fault state, as
10667 * appropriate for the fault type. arm_clear_fast_fault will
10668 * update the associated PTEs for the page as appropriate; if
10669 * any PTEs are updated, we redrive the access. If the mapping
10670 * does not actually allow for the attempted access, the
10671 * following fault will (hopefully) fail to update any PTEs, and
10672 * thus cause arm_fast_fault to decide that it failed to handle
10673 * the fault.
10674 */
10675 if (IS_REFFAULT_PAGE(pai)) {
10676 CLR_REFFAULT_PAGE(pai);
10677 }
10678 if ((fault_type & VM_PROT_WRITE) && IS_MODFAULT_PAGE(pai)) {
10679 CLR_MODFAULT_PAGE(pai);
10680 }
10681
10682 if (arm_clear_fast_fault((ppnum_t)atop(pa), fault_type)) {
10683 /*
10684 * Should this preserve KERN_PROTECTION_FAILURE? The
10685 * cost of not doing so is a another fault in a case
10686 * that should already result in an exception.
10687 */
10688 result = KERN_SUCCESS;
10689 }
10690 }
10691
10692 #if __APRR_SUPPORTED__
10693 out:
10694 #endif /* __APRR_SUPPORTED__*/
10695 UNLOCK_PVH(pai);
10696 pmap_unlock(pmap);
10697 return result;
10698 }
10699
10700 kern_return_t
10701 arm_fast_fault(
10702 pmap_t pmap,
10703 vm_map_address_t va,
10704 vm_prot_t fault_type,
10705 bool was_af_fault,
10706 __unused bool from_user)
10707 {
10708 kern_return_t result = KERN_FAILURE;
10709
10710 if (va < pmap->min || va >= pmap->max) {
10711 return result;
10712 }
10713
10714 PMAP_TRACE(3, PMAP_CODE(PMAP__FAST_FAULT) | DBG_FUNC_START,
10715 VM_KERNEL_ADDRHIDE(pmap), VM_KERNEL_ADDRHIDE(va), fault_type,
10716 from_user);
10717
10718 #if (__ARM_VMSA__ == 7)
10719 if (pmap != kernel_pmap) {
10720 pmap_cpu_data_t *cpu_data_ptr = pmap_get_cpu_data();
10721 pmap_t cur_pmap;
10722 pmap_t cur_user_pmap;
10723
10724 cur_pmap = current_pmap();
10725 cur_user_pmap = cpu_data_ptr->cpu_user_pmap;
10726
10727 if ((cur_user_pmap == cur_pmap) && (cur_pmap == pmap)) {
10728 if (cpu_data_ptr->cpu_user_pmap_stamp != pmap->stamp) {
10729 pmap_set_pmap(pmap, current_thread());
10730 result = KERN_SUCCESS;
10731 goto done;
10732 }
10733 }
10734 }
10735 #endif
10736
10737 #if XNU_MONITOR
10738 result = arm_fast_fault_ppl(pmap, va, fault_type, was_af_fault, from_user);
10739 #else
10740 result = arm_fast_fault_internal(pmap, va, fault_type, was_af_fault, from_user);
10741 #endif
10742
10743 #if (__ARM_VMSA__ == 7)
10744 done:
10745 #endif
10746
10747 PMAP_TRACE(3, PMAP_CODE(PMAP__FAST_FAULT) | DBG_FUNC_END, result);
10748
10749 return result;
10750 }
10751
10752 void
10753 pmap_copy_page(
10754 ppnum_t psrc,
10755 ppnum_t pdst)
10756 {
10757 bcopy_phys((addr64_t) (ptoa(psrc)),
10758 (addr64_t) (ptoa(pdst)),
10759 PAGE_SIZE);
10760 }
10761
10762
10763 /*
10764 * pmap_copy_page copies the specified (machine independent) pages.
10765 */
10766 void
10767 pmap_copy_part_page(
10768 ppnum_t psrc,
10769 vm_offset_t src_offset,
10770 ppnum_t pdst,
10771 vm_offset_t dst_offset,
10772 vm_size_t len)
10773 {
10774 bcopy_phys((addr64_t) (ptoa(psrc) + src_offset),
10775 (addr64_t) (ptoa(pdst) + dst_offset),
10776 len);
10777 }
10778
10779
10780 /*
10781 * pmap_zero_page zeros the specified (machine independent) page.
10782 */
10783 void
10784 pmap_zero_page(
10785 ppnum_t pn)
10786 {
10787 assert(pn != vm_page_fictitious_addr);
10788 bzero_phys((addr64_t) ptoa(pn), PAGE_SIZE);
10789 }
10790
10791 /*
10792 * pmap_zero_part_page
10793 * zeros the specified (machine independent) part of a page.
10794 */
10795 void
10796 pmap_zero_part_page(
10797 ppnum_t pn,
10798 vm_offset_t offset,
10799 vm_size_t len)
10800 {
10801 assert(pn != vm_page_fictitious_addr);
10802 assert(offset + len <= PAGE_SIZE);
10803 bzero_phys((addr64_t) (ptoa(pn) + offset), len);
10804 }
10805
10806 void
10807 pmap_map_globals(
10808 void)
10809 {
10810 pt_entry_t *ptep, pte;
10811
10812 ptep = pmap_pte(kernel_pmap, LOWGLOBAL_ALIAS);
10813 assert(ptep != PT_ENTRY_NULL);
10814 assert(*ptep == ARM_PTE_EMPTY);
10815
10816 pte = pa_to_pte(ml_static_vtop((vm_offset_t)&lowGlo)) | AP_RONA | ARM_PTE_NX | ARM_PTE_PNX | ARM_PTE_AF | ARM_PTE_TYPE;
10817 #if __ARM_KERNEL_PROTECT__
10818 pte |= ARM_PTE_NG;
10819 #endif /* __ARM_KERNEL_PROTECT__ */
10820 pte |= ARM_PTE_ATTRINDX(CACHE_ATTRINDX_WRITEBACK);
10821 #if (__ARM_VMSA__ > 7)
10822 pte |= ARM_PTE_SH(SH_OUTER_MEMORY);
10823 #else
10824 pte |= ARM_PTE_SH;
10825 #endif
10826 *ptep = pte;
10827 FLUSH_PTE_RANGE(ptep, (ptep + 1));
10828 PMAP_UPDATE_TLBS(kernel_pmap, LOWGLOBAL_ALIAS, LOWGLOBAL_ALIAS + PAGE_SIZE, false);
10829 }
10830
10831 vm_offset_t
10832 pmap_cpu_windows_copy_addr(int cpu_num, unsigned int index)
10833 {
10834 if (__improbable(index >= CPUWINDOWS_MAX)) {
10835 panic("%s: invalid index %u", __func__, index);
10836 }
10837 return (vm_offset_t)(CPUWINDOWS_BASE + (PAGE_SIZE * ((CPUWINDOWS_MAX * cpu_num) + index)));
10838 }
10839
10840 MARK_AS_PMAP_TEXT static unsigned int
10841 pmap_map_cpu_windows_copy_internal(
10842 ppnum_t pn,
10843 vm_prot_t prot,
10844 unsigned int wimg_bits)
10845 {
10846 pt_entry_t *ptep = NULL, pte;
10847 pmap_cpu_data_t *pmap_cpu_data = pmap_get_cpu_data();
10848 unsigned int cpu_num;
10849 unsigned int i;
10850 vm_offset_t cpu_copywindow_vaddr = 0;
10851 bool need_strong_sync = false;
10852
10853 #if XNU_MONITOR
10854 unsigned int cacheattr = (!pa_valid(ptoa(pn)) ? pmap_cache_attributes(pn) : 0);
10855 need_strong_sync = ((cacheattr & PMAP_IO_RANGE_STRONG_SYNC) != 0);
10856 #endif
10857
10858 #if XNU_MONITOR
10859 #ifdef __ARM_COHERENT_IO__
10860 if (__improbable(pa_valid(ptoa(pn)) && !pmap_ppl_disable)) {
10861 panic("%s: attempted to map a managed page, "
10862 "pn=%u, prot=0x%x, wimg_bits=0x%x",
10863 __FUNCTION__,
10864 pn, prot, wimg_bits);
10865 }
10866 if (__improbable((cacheattr & PP_ATTR_MONITOR) && (prot != VM_PROT_READ) && !pmap_ppl_disable)) {
10867 panic("%s: attempt to map PPL-protected I/O address 0x%llx as writable", __func__, (uint64_t)ptoa(pn));
10868 }
10869
10870 #else /* __ARM_COHERENT_IO__ */
10871 #error CPU copy windows are not properly supported with both the PPL and incoherent IO
10872 #endif /* __ARM_COHERENT_IO__ */
10873 #endif /* XNU_MONITOR */
10874 cpu_num = pmap_cpu_data->cpu_number;
10875
10876 for (i = 0; i < CPUWINDOWS_MAX; i++) {
10877 cpu_copywindow_vaddr = pmap_cpu_windows_copy_addr(cpu_num, i);
10878 ptep = pmap_pte(kernel_pmap, cpu_copywindow_vaddr);
10879 assert(!ARM_PTE_IS_COMPRESSED(*ptep, ptep));
10880 if (*ptep == ARM_PTE_TYPE_FAULT) {
10881 break;
10882 }
10883 }
10884 if (i == CPUWINDOWS_MAX) {
10885 panic("pmap_map_cpu_windows_copy: out of window\n");
10886 }
10887
10888 pte = pa_to_pte(ptoa(pn)) | ARM_PTE_TYPE | ARM_PTE_AF | ARM_PTE_NX | ARM_PTE_PNX;
10889 #if __ARM_KERNEL_PROTECT__
10890 pte |= ARM_PTE_NG;
10891 #endif /* __ARM_KERNEL_PROTECT__ */
10892
10893 pte |= wimg_to_pte(wimg_bits);
10894
10895 if (prot & VM_PROT_WRITE) {
10896 pte |= ARM_PTE_AP(AP_RWNA);
10897 } else {
10898 pte |= ARM_PTE_AP(AP_RONA);
10899 }
10900
10901 WRITE_PTE_FAST(ptep, pte);
10902 /*
10903 * Invalidate tlb. Cover nested cpu_copywindow_vaddr usage with the interrupted context
10904 * in pmap_unmap_cpu_windows_copy() after clearing the pte and before tlb invalidate.
10905 */
10906 FLUSH_PTE_STRONG(ptep);
10907 PMAP_UPDATE_TLBS(kernel_pmap, cpu_copywindow_vaddr, cpu_copywindow_vaddr + PAGE_SIZE, pmap_cpu_data->copywindow_strong_sync[i]);
10908 pmap_cpu_data->copywindow_strong_sync[i] = need_strong_sync;
10909
10910 return i;
10911 }
10912
10913 unsigned int
10914 pmap_map_cpu_windows_copy(
10915 ppnum_t pn,
10916 vm_prot_t prot,
10917 unsigned int wimg_bits)
10918 {
10919 #if XNU_MONITOR
10920 return pmap_map_cpu_windows_copy_ppl(pn, prot, wimg_bits);
10921 #else
10922 return pmap_map_cpu_windows_copy_internal(pn, prot, wimg_bits);
10923 #endif
10924 }
10925
10926 MARK_AS_PMAP_TEXT static void
10927 pmap_unmap_cpu_windows_copy_internal(
10928 unsigned int index)
10929 {
10930 pt_entry_t *ptep;
10931 unsigned int cpu_num;
10932 vm_offset_t cpu_copywindow_vaddr = 0;
10933 pmap_cpu_data_t *pmap_cpu_data = pmap_get_cpu_data();
10934
10935 cpu_num = pmap_cpu_data->cpu_number;
10936
10937 cpu_copywindow_vaddr = pmap_cpu_windows_copy_addr(cpu_num, index);
10938 /* Issue full-system DSB to ensure prior operations on the per-CPU window
10939 * (which are likely to have been on I/O memory) are complete before
10940 * tearing down the mapping. */
10941 __builtin_arm_dsb(DSB_SY);
10942 ptep = pmap_pte(kernel_pmap, cpu_copywindow_vaddr);
10943 WRITE_PTE_STRONG(ptep, ARM_PTE_TYPE_FAULT);
10944 PMAP_UPDATE_TLBS(kernel_pmap, cpu_copywindow_vaddr, cpu_copywindow_vaddr + PAGE_SIZE, pmap_cpu_data->copywindow_strong_sync[index]);
10945 }
10946
10947 void
10948 pmap_unmap_cpu_windows_copy(
10949 unsigned int index)
10950 {
10951 #if XNU_MONITOR
10952 return pmap_unmap_cpu_windows_copy_ppl(index);
10953 #else
10954 return pmap_unmap_cpu_windows_copy_internal(index);
10955 #endif
10956 }
10957
10958 #if XNU_MONITOR
10959
10960 /*
10961 * The HMAC SHA driver needs to be able to operate on physical pages in
10962 * place without copying them out. This function provides an interface
10963 * to run a callback on a given page, making use of a CPU copy window
10964 * if necessary.
10965 *
10966 * This should only be used during the hibernation process since every DRAM page
10967 * will be mapped as VM_WIMG_DEFAULT. This can cause coherency issues if the pages
10968 * were originally mapped as VM_WIMG_IO/RT. In the hibernation case, by the time
10969 * we start copying memory all other agents shouldn't be writing to memory so we
10970 * can ignore these coherency issues. Regardless of this code, if other agents
10971 * were modifying memory during the image creation process, there would be
10972 * issues anyway.
10973 */
10974 MARK_AS_PMAP_TEXT void
10975 pmap_invoke_with_page(
10976 ppnum_t page_number,
10977 void *ctx,
10978 void (*callback)(void *ctx, ppnum_t page_number, const void *page))
10979 {
10980 #if HIBERNATION
10981 /* This function should only be used from within a hibernation context. */
10982 assert((gIOHibernateState == kIOHibernateStateHibernating) ||
10983 (gIOHibernateState == kIOHibernateStateWakingFromHibernate));
10984
10985 /* from bcopy_phys_internal */
10986 vm_offset_t src = ptoa_64(page_number);
10987 vm_offset_t tmp_src;
10988 bool use_copy_window_src = !pmap_valid_address(src);
10989 unsigned int src_index;
10990 if (use_copy_window_src) {
10991 unsigned int wimg_bits_src = pmap_cache_attributes(page_number);
10992
10993 /**
10994 * Always map DRAM as VM_WIMG_DEFAULT (regardless of whether it's
10995 * kernel-managed) to denote that it's safe to use memcpy on it.
10996 */
10997 if (is_dram_addr(src)) {
10998 wimg_bits_src = VM_WIMG_DEFAULT;
10999 }
11000
11001 src_index = pmap_map_cpu_windows_copy_internal(page_number, VM_PROT_READ, wimg_bits_src);
11002 tmp_src = pmap_cpu_windows_copy_addr(pmap_get_cpu_data()->cpu_number, src_index);
11003 } else {
11004 vm_size_t count = PAGE_SIZE;
11005 tmp_src = phystokv_range((pmap_paddr_t)src, &count);
11006 }
11007
11008 callback(ctx, page_number, (const void *)tmp_src);
11009
11010 if (use_copy_window_src) {
11011 pmap_unmap_cpu_windows_copy_internal(src_index);
11012 }
11013 #else
11014 #pragma unused(page_number, ctx, callback)
11015 #endif /* HIBERNATION */
11016 }
11017
11018 /*
11019 * Loop over every pmap_io_range (I/O ranges marked as owned by
11020 * the PPL in the device tree) and conditionally call callback() on each range
11021 * that needs to be included in the hibernation image.
11022 *
11023 * @param ctx Will be passed as-is into the callback method. Use NULL if no
11024 * context is needed in the callback.
11025 * @param callback Callback function invoked on each range (gated by flag).
11026 */
11027 MARK_AS_PMAP_TEXT void
11028 pmap_hibernate_invoke(void *ctx, void (*callback)(void *ctx, uint64_t addr, uint64_t len))
11029 {
11030 for (unsigned int i = 0; i < num_io_rgns; ++i) {
11031 if (io_attr_table[i].wimg & PMAP_IO_RANGE_NEEDS_HIBERNATING) {
11032 callback(ctx, io_attr_table[i].addr, io_attr_table[i].len);
11033 }
11034 }
11035 }
11036
11037 /**
11038 * Set the HASHED pv_head_table flag for the passed in physical page if it's a
11039 * PPL-owned page. Otherwise, do nothing.
11040 *
11041 * @param addr Physical address of the page to set the HASHED flag on.
11042 */
11043 MARK_AS_PMAP_TEXT void
11044 pmap_set_ppl_hashed_flag(const pmap_paddr_t addr)
11045 {
11046 /* Ignore non-managed kernel memory. */
11047 if (!pa_valid(addr)) {
11048 return;
11049 }
11050
11051 const int pai = (int)pa_index(addr);
11052 if (pp_attr_table[pai] & PP_ATTR_MONITOR) {
11053 pv_entry_t **pv_h = pai_to_pvh(pai);
11054
11055 /* Mark that the PPL-owned page has been hashed into the hibernation image. */
11056 LOCK_PVH(pai);
11057 pvh_set_flags(pv_h, pvh_get_flags(pv_h) | PVH_FLAG_HASHED);
11058 UNLOCK_PVH(pai);
11059 }
11060 }
11061
11062 /**
11063 * Loop through every physical page in the system and clear out the HASHED flag
11064 * on every PPL-owned page. That flag is used to keep track of which pages have
11065 * been hashed into the hibernation image during the hibernation entry process.
11066 *
11067 * The HASHED flag needs to be cleared out between hibernation cycles because the
11068 * pv_head_table and pp_attr_table's might have been copied into the hibernation
11069 * image with the HASHED flag set on certain pages. It's important to clear the
11070 * HASHED flag to ensure that the enforcement of all PPL-owned memory being hashed
11071 * into the hibernation image can't be compromised across hibernation cycles.
11072 */
11073 MARK_AS_PMAP_TEXT void
11074 pmap_clear_ppl_hashed_flag_all(void)
11075 {
11076 const int last_index = (int)pa_index(vm_last_phys);
11077 pv_entry_t **pv_h = NULL;
11078
11079 for (int pai = 0; pai < last_index; ++pai) {
11080 pv_h = pai_to_pvh(pai);
11081
11082 /* Test for PPL-owned pages that have the HASHED flag set in its pv_head_table entry. */
11083 if ((pvh_get_flags(pv_h) & PVH_FLAG_HASHED) &&
11084 (pp_attr_table[pai] & PP_ATTR_MONITOR)) {
11085 LOCK_PVH(pai);
11086 pvh_set_flags(pv_h, pvh_get_flags(pv_h) & ~PVH_FLAG_HASHED);
11087 UNLOCK_PVH(pai);
11088 }
11089 }
11090 }
11091
11092 /**
11093 * Enforce that all PPL-owned pages were hashed into the hibernation image. The
11094 * ppl_hib driver will call this after all wired pages have been copied into the
11095 * hibernation image.
11096 */
11097 MARK_AS_PMAP_TEXT void
11098 pmap_check_ppl_hashed_flag_all(void)
11099 {
11100 const int last_index = (int)pa_index(vm_last_phys);
11101 pv_entry_t **pv_h = NULL;
11102
11103 for (int pai = 0; pai < last_index; ++pai) {
11104 pv_h = pai_to_pvh(pai);
11105
11106 /**
11107 * The PMAP stacks are explicitly not saved into the image so skip checking
11108 * the pages that contain the PMAP stacks.
11109 */
11110 const bool is_pmap_stack = (pai >= (int)pa_index(pmap_stacks_start_pa)) &&
11111 (pai < (int)pa_index(pmap_stacks_end_pa));
11112
11113 if (!is_pmap_stack &&
11114 (pp_attr_table[pai] & PP_ATTR_MONITOR) &&
11115 !(pvh_get_flags(pv_h) & PVH_FLAG_HASHED)) {
11116 panic("Found PPL-owned page that was not hashed into the hibernation image: pai %d", pai);
11117 }
11118 }
11119 }
11120
11121 #endif /* XNU_MONITOR */
11122
11123 /*
11124 * Indicate that a pmap is intended to be used as a nested pmap
11125 * within one or more larger address spaces. This must be set
11126 * before pmap_nest() is called with this pmap as the 'subordinate'.
11127 */
11128 MARK_AS_PMAP_TEXT static void
11129 pmap_set_nested_internal(
11130 pmap_t pmap)
11131 {
11132 VALIDATE_PMAP(pmap);
11133 pmap->nested = TRUE;
11134 pmap_get_pt_ops(pmap)->free_id(pmap);
11135 }
11136
11137 void
11138 pmap_set_nested(
11139 pmap_t pmap)
11140 {
11141 #if XNU_MONITOR
11142 pmap_set_nested_ppl(pmap);
11143 #else
11144 pmap_set_nested_internal(pmap);
11145 #endif
11146 }
11147
11148 /*
11149 * pmap_trim_range(pmap, start, end)
11150 *
11151 * pmap = pmap to operate on
11152 * start = start of the range
11153 * end = end of the range
11154 *
11155 * Attempts to deallocate TTEs for the given range in the nested range.
11156 */
11157 MARK_AS_PMAP_TEXT static void
11158 pmap_trim_range(
11159 pmap_t pmap,
11160 addr64_t start,
11161 addr64_t end)
11162 {
11163 addr64_t cur;
11164 addr64_t nested_region_start;
11165 addr64_t nested_region_end;
11166 addr64_t adjusted_start;
11167 addr64_t adjusted_end;
11168 addr64_t adjust_offmask;
11169 tt_entry_t * tte_p;
11170 pt_entry_t * pte_p;
11171 __unused const pt_attr_t * const pt_attr = pmap_get_pt_attr(pmap);
11172
11173 if (__improbable(end < start)) {
11174 panic("%s: invalid address range, "
11175 "pmap=%p, start=%p, end=%p",
11176 __func__,
11177 pmap, (void*)start, (void*)end);
11178 }
11179
11180 nested_region_start = pmap->nested_region_addr;
11181 nested_region_end = nested_region_start + pmap->nested_region_size;
11182
11183 if (__improbable((start < nested_region_start) || (end > nested_region_end))) {
11184 panic("%s: range outside nested region %p-%p, "
11185 "pmap=%p, start=%p, end=%p",
11186 __func__, (void *)nested_region_start, (void *)nested_region_end,
11187 pmap, (void*)start, (void*)end);
11188 }
11189
11190 /* Contract the range to TT page boundaries. */
11191 adjust_offmask = pt_attr_leaf_table_offmask(pt_attr);
11192 adjusted_start = ((start + adjust_offmask) & ~adjust_offmask);
11193 adjusted_end = end & ~adjust_offmask;
11194 bool modified = false;
11195
11196 /* Iterate over the range, trying to remove TTEs. */
11197 for (cur = adjusted_start; (cur < adjusted_end) && (cur >= adjusted_start); cur += pt_attr_twig_size(pt_attr)) {
11198 pmap_lock(pmap);
11199
11200 tte_p = pmap_tte(pmap, cur);
11201
11202 if (tte_p == (tt_entry_t *) NULL) {
11203 goto done;
11204 }
11205
11206 if ((*tte_p & ARM_TTE_TYPE_MASK) == ARM_TTE_TYPE_TABLE) {
11207 pte_p = (pt_entry_t *) ttetokv(*tte_p);
11208
11209 if ((ptep_get_info(pte_p)->refcnt == 0) &&
11210 (pmap != kernel_pmap)) {
11211 if (pmap->nested == TRUE) {
11212 /* Deallocate for the nested map. */
11213 pmap_tte_deallocate(pmap, tte_p, pt_attr_twig_level(pt_attr));
11214 } else {
11215 /* Just remove for the parent map. */
11216 pmap_tte_remove(pmap, tte_p, pt_attr_twig_level(pt_attr));
11217 }
11218
11219 pmap_get_pt_ops(pmap)->flush_tlb_tte_async(cur, pmap);
11220 modified = true;
11221 }
11222 }
11223
11224 done:
11225 pmap_unlock(pmap);
11226 }
11227
11228 if (modified) {
11229 sync_tlb_flush();
11230 }
11231
11232 #if (__ARM_VMSA__ > 7)
11233 /* Remove empty L2 TTs. */
11234 adjusted_start = ((start + pt_attr_ln_offmask(pt_attr, PMAP_TT_L1_LEVEL)) & ~pt_attr_ln_offmask(pt_attr, PMAP_TT_L1_LEVEL));
11235 adjusted_end = end & ~pt_attr_ln_offmask(pt_attr, PMAP_TT_L1_LEVEL);
11236
11237 for (cur = adjusted_start; (cur < adjusted_end) && (cur >= adjusted_start); cur += pt_attr_ln_size(pt_attr, PMAP_TT_L1_LEVEL)) {
11238 /* For each L1 entry in our range... */
11239 pmap_lock(pmap);
11240
11241 bool remove_tt1e = true;
11242 tt_entry_t * tt1e_p = pmap_tt1e(pmap, cur);
11243 tt_entry_t * tt2e_start;
11244 tt_entry_t * tt2e_end;
11245 tt_entry_t * tt2e_p;
11246 tt_entry_t tt1e;
11247
11248 if (tt1e_p == NULL) {
11249 pmap_unlock(pmap);
11250 continue;
11251 }
11252
11253 tt1e = *tt1e_p;
11254
11255 if (tt1e == ARM_TTE_TYPE_FAULT) {
11256 pmap_unlock(pmap);
11257 continue;
11258 }
11259
11260 tt2e_start = &((tt_entry_t*) phystokv(tt1e & ARM_TTE_TABLE_MASK))[0];
11261 tt2e_end = &tt2e_start[pt_attr_page_size(pt_attr) / sizeof(*tt2e_start)];
11262
11263 for (tt2e_p = tt2e_start; tt2e_p < tt2e_end; tt2e_p++) {
11264 if (*tt2e_p != ARM_TTE_TYPE_FAULT) {
11265 /*
11266 * If any TTEs are populated, don't remove the
11267 * L1 TT.
11268 */
11269 remove_tt1e = false;
11270 }
11271 }
11272
11273 if (remove_tt1e) {
11274 pmap_tte_deallocate(pmap, tt1e_p, PMAP_TT_L1_LEVEL);
11275 PMAP_UPDATE_TLBS(pmap, cur, cur + PAGE_SIZE, false);
11276 }
11277
11278 pmap_unlock(pmap);
11279 }
11280 #endif /* (__ARM_VMSA__ > 7) */
11281 }
11282
11283 /*
11284 * pmap_trim_internal(grand, subord, vstart, size)
11285 *
11286 * grand = pmap subord is nested in
11287 * subord = nested pmap
11288 * vstart = start of the used range in grand
11289 * size = size of the used range
11290 *
11291 * Attempts to trim the shared region page tables down to only cover the given
11292 * range in subord and grand.
11293 */
11294 MARK_AS_PMAP_TEXT static void
11295 pmap_trim_internal(
11296 pmap_t grand,
11297 pmap_t subord,
11298 addr64_t vstart,
11299 uint64_t size)
11300 {
11301 addr64_t vend;
11302 addr64_t adjust_offmask;
11303
11304 if (__improbable(os_add_overflow(vstart, size, &vend))) {
11305 panic("%s: grand addr wraps around, "
11306 "grand=%p, subord=%p, vstart=%p, size=%#llx",
11307 __func__, grand, subord, (void*)vstart, size);
11308 }
11309
11310 VALIDATE_PMAP(grand);
11311 VALIDATE_PMAP(subord);
11312
11313 __unused const pt_attr_t * const pt_attr = pmap_get_pt_attr(grand);
11314
11315 pmap_lock(subord);
11316
11317 if (__improbable(!subord->nested)) {
11318 panic("%s: subord is not nestable, "
11319 "grand=%p, subord=%p, vstart=%p, size=%#llx",
11320 __func__, grand, subord, (void*)vstart, size);
11321 }
11322
11323 if (__improbable(grand->nested)) {
11324 panic("%s: grand is nestable, "
11325 "grand=%p, subord=%p, vstart=%p, size=%#llx",
11326 __func__, grand, subord, (void*)vstart, size);
11327 }
11328
11329 if (__improbable(grand->nested_pmap != subord)) {
11330 panic("%s: grand->nested != subord, "
11331 "grand=%p, subord=%p, vstart=%p, size=%#llx",
11332 __func__, grand, subord, (void*)vstart, size);
11333 }
11334
11335 if (__improbable((size != 0) &&
11336 ((vstart < grand->nested_region_addr) || (vend > (grand->nested_region_addr + grand->nested_region_size))))) {
11337 panic("%s: grand range not in nested region, "
11338 "grand=%p, subord=%p, vstart=%p, size=%#llx",
11339 __func__, grand, subord, (void*)vstart, size);
11340 }
11341
11342
11343 if (!grand->nested_has_no_bounds_ref) {
11344 assert(subord->nested_bounds_set);
11345
11346 if (!grand->nested_bounds_set) {
11347 /* Inherit the bounds from subord. */
11348 grand->nested_region_true_start = subord->nested_region_true_start;
11349 grand->nested_region_true_end = subord->nested_region_true_end;
11350 grand->nested_bounds_set = true;
11351 }
11352
11353 pmap_unlock(subord);
11354 return;
11355 }
11356
11357 if ((!subord->nested_bounds_set) && size) {
11358 adjust_offmask = pt_attr_leaf_table_offmask(pt_attr);
11359
11360 subord->nested_region_true_start = vstart;
11361 subord->nested_region_true_end = vend;
11362 subord->nested_region_true_start &= ~adjust_offmask;
11363
11364 if (__improbable(os_add_overflow(subord->nested_region_true_end, adjust_offmask, &subord->nested_region_true_end))) {
11365 panic("%s: padded true end wraps around, "
11366 "grand=%p, subord=%p, vstart=%p, size=%#llx",
11367 __func__, grand, subord, (void*)vstart, size);
11368 }
11369
11370 subord->nested_region_true_end &= ~adjust_offmask;
11371 subord->nested_bounds_set = true;
11372 }
11373
11374 if (subord->nested_bounds_set) {
11375 /* Inherit the bounds from subord. */
11376 grand->nested_region_true_start = subord->nested_region_true_start;
11377 grand->nested_region_true_end = subord->nested_region_true_end;
11378 grand->nested_bounds_set = true;
11379
11380 /* If we know the bounds, we can trim the pmap. */
11381 grand->nested_has_no_bounds_ref = false;
11382 pmap_unlock(subord);
11383 } else {
11384 /* Don't trim if we don't know the bounds. */
11385 pmap_unlock(subord);
11386 return;
11387 }
11388
11389 /* Trim grand to only cover the given range. */
11390 pmap_trim_range(grand, grand->nested_region_addr, grand->nested_region_true_start);
11391 pmap_trim_range(grand, grand->nested_region_true_end, (grand->nested_region_addr + grand->nested_region_size));
11392
11393 /* Try to trim subord. */
11394 pmap_trim_subord(subord);
11395 }
11396
11397 MARK_AS_PMAP_TEXT static void
11398 pmap_trim_self(pmap_t pmap)
11399 {
11400 if (pmap->nested_has_no_bounds_ref && pmap->nested_pmap) {
11401 /* If we have a no bounds ref, we need to drop it. */
11402 pmap_lock_ro(pmap->nested_pmap);
11403 pmap->nested_has_no_bounds_ref = false;
11404 boolean_t nested_bounds_set = pmap->nested_pmap->nested_bounds_set;
11405 vm_map_offset_t nested_region_true_start = pmap->nested_pmap->nested_region_true_start;
11406 vm_map_offset_t nested_region_true_end = pmap->nested_pmap->nested_region_true_end;
11407 pmap_unlock_ro(pmap->nested_pmap);
11408
11409 if (nested_bounds_set) {
11410 pmap_trim_range(pmap, pmap->nested_region_addr, nested_region_true_start);
11411 pmap_trim_range(pmap, nested_region_true_end, (pmap->nested_region_addr + pmap->nested_region_size));
11412 }
11413 /*
11414 * Try trimming the nested pmap, in case we had the
11415 * last reference.
11416 */
11417 pmap_trim_subord(pmap->nested_pmap);
11418 }
11419 }
11420
11421 /*
11422 * pmap_trim_subord(grand, subord)
11423 *
11424 * grand = pmap that we have nested subord in
11425 * subord = nested pmap we are attempting to trim
11426 *
11427 * Trims subord if possible
11428 */
11429 MARK_AS_PMAP_TEXT static void
11430 pmap_trim_subord(pmap_t subord)
11431 {
11432 bool contract_subord = false;
11433
11434 pmap_lock(subord);
11435
11436 subord->nested_no_bounds_refcnt--;
11437
11438 if ((subord->nested_no_bounds_refcnt == 0) && (subord->nested_bounds_set)) {
11439 /* If this was the last no bounds reference, trim subord. */
11440 contract_subord = true;
11441 }
11442
11443 pmap_unlock(subord);
11444
11445 if (contract_subord) {
11446 pmap_trim_range(subord, subord->nested_region_addr, subord->nested_region_true_start);
11447 pmap_trim_range(subord, subord->nested_region_true_end, subord->nested_region_addr + subord->nested_region_size);
11448 }
11449 }
11450
11451 void
11452 pmap_trim(
11453 pmap_t grand,
11454 pmap_t subord,
11455 addr64_t vstart,
11456 uint64_t size)
11457 {
11458 #if XNU_MONITOR
11459 pmap_trim_ppl(grand, subord, vstart, size);
11460
11461 pmap_ledger_check_balance(grand);
11462 pmap_ledger_check_balance(subord);
11463 #else
11464 pmap_trim_internal(grand, subord, vstart, size);
11465 #endif
11466 }
11467
11468 #if HAS_APPLE_PAC && XNU_MONITOR
11469 static void *
11470 pmap_sign_user_ptr_internal(void *value, ptrauth_key key, uint64_t discriminator, uint64_t jop_key)
11471 {
11472 void *res = NULL;
11473 boolean_t current_intr_state = ml_set_interrupts_enabled(FALSE);
11474
11475 uint64_t saved_jop_state = ml_enable_user_jop_key(jop_key);
11476 switch (key) {
11477 case ptrauth_key_asia:
11478 res = ptrauth_sign_unauthenticated(value, ptrauth_key_asia, discriminator);
11479 break;
11480 case ptrauth_key_asda:
11481 res = ptrauth_sign_unauthenticated(value, ptrauth_key_asda, discriminator);
11482 break;
11483 default:
11484 panic("attempt to sign user pointer without process independent key");
11485 }
11486 ml_disable_user_jop_key(jop_key, saved_jop_state);
11487
11488 ml_set_interrupts_enabled(current_intr_state);
11489
11490 return res;
11491 }
11492
11493 void *
11494 pmap_sign_user_ptr(void *value, ptrauth_key key, uint64_t discriminator, uint64_t jop_key)
11495 {
11496 return pmap_sign_user_ptr_internal(value, key, discriminator, jop_key);
11497 }
11498
11499 static void *
11500 pmap_auth_user_ptr_internal(void *value, ptrauth_key key, uint64_t discriminator, uint64_t jop_key)
11501 {
11502 if ((key != ptrauth_key_asia) && (key != ptrauth_key_asda)) {
11503 panic("attempt to auth user pointer without process independent key");
11504 }
11505
11506 void *res = NULL;
11507 boolean_t current_intr_state = ml_set_interrupts_enabled(FALSE);
11508
11509 uint64_t saved_jop_state = ml_enable_user_jop_key(jop_key);
11510 res = ml_auth_ptr_unchecked(value, key, discriminator);
11511 ml_disable_user_jop_key(jop_key, saved_jop_state);
11512
11513 ml_set_interrupts_enabled(current_intr_state);
11514
11515 return res;
11516 }
11517
11518 void *
11519 pmap_auth_user_ptr(void *value, ptrauth_key key, uint64_t discriminator, uint64_t jop_key)
11520 {
11521 return pmap_auth_user_ptr_internal(value, key, discriminator, jop_key);
11522 }
11523 #endif /* HAS_APPLE_PAC && XNU_MONITOR */
11524
11525 /*
11526 * kern_return_t pmap_nest(grand, subord, vstart, size)
11527 *
11528 * grand = the pmap that we will nest subord into
11529 * subord = the pmap that goes into the grand
11530 * vstart = start of range in pmap to be inserted
11531 * size = Size of nest area (up to 16TB)
11532 *
11533 * Inserts a pmap into another. This is used to implement shared segments.
11534 *
11535 */
11536
11537 MARK_AS_PMAP_TEXT static kern_return_t
11538 pmap_nest_internal(
11539 pmap_t grand,
11540 pmap_t subord,
11541 addr64_t vstart,
11542 uint64_t size)
11543 {
11544 kern_return_t kr = KERN_FAILURE;
11545 vm_map_offset_t vaddr;
11546 tt_entry_t *stte_p;
11547 tt_entry_t *gtte_p;
11548 unsigned int i;
11549 unsigned int num_tte;
11550 unsigned int nested_region_asid_bitmap_size;
11551 unsigned int* nested_region_asid_bitmap;
11552 int expand_options = 0;
11553 bool deref_subord = true;
11554 pmap_t __ptrauth_only subord_addr;
11555
11556 addr64_t vend;
11557 if (__improbable(os_add_overflow(vstart, size, &vend))) {
11558 panic("%s: %p grand addr wraps around: 0x%llx + 0x%llx", __func__, grand, vstart, size);
11559 }
11560
11561 VALIDATE_PMAP(grand);
11562 pmap_reference_internal(subord); // This call will also validate subord
11563
11564 __unused const pt_attr_t * const pt_attr = pmap_get_pt_attr(grand);
11565 assert(pmap_get_pt_attr(subord) == pt_attr);
11566
11567 #if XNU_MONITOR
11568 expand_options |= PMAP_TT_ALLOCATE_NOWAIT;
11569 #endif
11570
11571 if (__improbable(((size | vstart) & (pt_attr_leaf_table_offmask(pt_attr))) != 0x0ULL)) {
11572 panic("pmap_nest() pmap %p unaligned nesting request 0x%llx, 0x%llx\n", grand, vstart, size);
11573 }
11574
11575 if (__improbable(!subord->nested)) {
11576 panic("%s: subordinate pmap %p is not nestable", __func__, subord);
11577 }
11578
11579 if (subord->nested_region_asid_bitmap == NULL) {
11580 nested_region_asid_bitmap_size = (unsigned int)(size >> pt_attr_twig_shift(pt_attr)) / (sizeof(unsigned int) * NBBY);
11581
11582 #if XNU_MONITOR
11583 pmap_paddr_t pa = 0;
11584
11585 if (__improbable((nested_region_asid_bitmap_size * sizeof(unsigned int)) > PAGE_SIZE)) {
11586 panic("%s: nested_region_asid_bitmap_size=%u will not fit in a page, "
11587 "grand=%p, subord=%p, vstart=0x%llx, size=%llx",
11588 __FUNCTION__, nested_region_asid_bitmap_size,
11589 grand, subord, vstart, size);
11590 }
11591
11592 kr = pmap_pages_alloc_zeroed(&pa, PAGE_SIZE, PMAP_PAGES_ALLOCATE_NOWAIT);
11593
11594 if (kr != KERN_SUCCESS) {
11595 goto nest_cleanup;
11596 }
11597
11598 assert(pa);
11599
11600 nested_region_asid_bitmap = (unsigned int *)phystokv(pa);
11601 #else
11602 nested_region_asid_bitmap = kheap_alloc(KHEAP_DATA_BUFFERS,
11603 nested_region_asid_bitmap_size * sizeof(unsigned int),
11604 Z_WAITOK | Z_ZERO);
11605 #endif
11606
11607 pmap_lock(subord);
11608 if (subord->nested_region_asid_bitmap == NULL) {
11609 subord->nested_region_asid_bitmap_size = nested_region_asid_bitmap_size;
11610 subord->nested_region_addr = vstart;
11611 subord->nested_region_size = (mach_vm_offset_t) size;
11612
11613 /**
11614 * Ensure that the rest of the subord->nested_region_* fields are
11615 * initialized and visible before setting the nested_region_asid_bitmap
11616 * field (which is used as the flag to say that the rest are initialized).
11617 */
11618 __builtin_arm_dmb(DMB_ISHST);
11619 subord->nested_region_asid_bitmap = nested_region_asid_bitmap;
11620 nested_region_asid_bitmap = NULL;
11621 }
11622 pmap_unlock(subord);
11623 if (nested_region_asid_bitmap != NULL) {
11624 #if XNU_MONITOR
11625 pmap_pages_free(kvtophys((vm_offset_t)nested_region_asid_bitmap), PAGE_SIZE);
11626 #else
11627 kheap_free(KHEAP_DATA_BUFFERS, nested_region_asid_bitmap,
11628 nested_region_asid_bitmap_size * sizeof(unsigned int));
11629 #endif
11630 }
11631 }
11632
11633 /**
11634 * Ensure subsequent reads of the subord->nested_region_* fields don't get
11635 * speculated before their initialization.
11636 */
11637 __builtin_arm_dmb(DMB_ISHLD);
11638
11639 if ((subord->nested_region_addr + subord->nested_region_size) < vend) {
11640 uint64_t new_size;
11641 unsigned int new_nested_region_asid_bitmap_size;
11642 unsigned int* new_nested_region_asid_bitmap;
11643
11644 nested_region_asid_bitmap = NULL;
11645 nested_region_asid_bitmap_size = 0;
11646 new_size = vend - subord->nested_region_addr;
11647
11648 /* We explicitly add 1 to the bitmap allocation size in order to avoid issues with truncation. */
11649 new_nested_region_asid_bitmap_size = (unsigned int)((new_size >> pt_attr_twig_shift(pt_attr)) / (sizeof(unsigned int) * NBBY)) + 1;
11650
11651 #if XNU_MONITOR
11652 pmap_paddr_t pa = 0;
11653
11654 if (__improbable((new_nested_region_asid_bitmap_size * sizeof(unsigned int)) > PAGE_SIZE)) {
11655 panic("%s: new_nested_region_asid_bitmap_size=%u will not fit in a page, "
11656 "grand=%p, subord=%p, vstart=0x%llx, new_size=%llx",
11657 __FUNCTION__, new_nested_region_asid_bitmap_size,
11658 grand, subord, vstart, new_size);
11659 }
11660
11661 kr = pmap_pages_alloc_zeroed(&pa, PAGE_SIZE, PMAP_PAGES_ALLOCATE_NOWAIT);
11662
11663 if (kr != KERN_SUCCESS) {
11664 goto nest_cleanup;
11665 }
11666
11667 assert(pa);
11668
11669 new_nested_region_asid_bitmap = (unsigned int *)phystokv(pa);
11670 #else
11671 new_nested_region_asid_bitmap = kheap_alloc(KHEAP_DATA_BUFFERS,
11672 new_nested_region_asid_bitmap_size * sizeof(unsigned int),
11673 Z_WAITOK | Z_ZERO);
11674 #endif
11675 pmap_lock(subord);
11676 if (subord->nested_region_size < new_size) {
11677 bcopy(subord->nested_region_asid_bitmap,
11678 new_nested_region_asid_bitmap, subord->nested_region_asid_bitmap_size);
11679 nested_region_asid_bitmap_size = subord->nested_region_asid_bitmap_size;
11680 nested_region_asid_bitmap = subord->nested_region_asid_bitmap;
11681 subord->nested_region_asid_bitmap = new_nested_region_asid_bitmap;
11682 subord->nested_region_asid_bitmap_size = new_nested_region_asid_bitmap_size;
11683 subord->nested_region_size = new_size;
11684 new_nested_region_asid_bitmap = NULL;
11685 }
11686 pmap_unlock(subord);
11687 if (nested_region_asid_bitmap != NULL) {
11688 #if XNU_MONITOR
11689 pmap_pages_free(kvtophys((vm_offset_t)nested_region_asid_bitmap), PAGE_SIZE);
11690 #else
11691 kheap_free(KHEAP_DATA_BUFFERS, nested_region_asid_bitmap,
11692 nested_region_asid_bitmap_size * sizeof(unsigned int));
11693 #endif
11694 }
11695 if (new_nested_region_asid_bitmap != NULL) {
11696 #if XNU_MONITOR
11697 pmap_pages_free(kvtophys((vm_offset_t)new_nested_region_asid_bitmap), PAGE_SIZE);
11698 #else
11699 kheap_free(KHEAP_DATA_BUFFERS, new_nested_region_asid_bitmap,
11700 new_nested_region_asid_bitmap_size * sizeof(unsigned int));
11701 #endif
11702 }
11703 }
11704
11705 pmap_lock(subord);
11706
11707 #if __has_feature(ptrauth_calls)
11708 subord_addr = ptrauth_sign_unauthenticated(subord,
11709 ptrauth_key_process_independent_data,
11710 ptrauth_blend_discriminator(&grand->nested_pmap, ptrauth_string_discriminator("pmap.nested_pmap")));
11711 #else
11712 subord_addr = subord;
11713 #endif // __has_feature(ptrauth_calls)
11714
11715 if (os_atomic_cmpxchg(&grand->nested_pmap, PMAP_NULL, subord_addr, relaxed)) {
11716 /*
11717 * If this is grand's first nesting operation, keep the reference on subord.
11718 * It will be released by pmap_destroy_internal() when grand is destroyed.
11719 */
11720 deref_subord = false;
11721
11722 if (!subord->nested_bounds_set) {
11723 /*
11724 * We are nesting without the shared regions bounds
11725 * being known. We'll have to trim the pmap later.
11726 */
11727 grand->nested_has_no_bounds_ref = true;
11728 subord->nested_no_bounds_refcnt++;
11729 }
11730
11731 grand->nested_region_addr = vstart;
11732 grand->nested_region_size = (mach_vm_offset_t) size;
11733 } else {
11734 if (__improbable(grand->nested_pmap != subord)) {
11735 panic("pmap_nest() pmap %p has a nested pmap\n", grand);
11736 } else if (__improbable(grand->nested_region_addr > vstart)) {
11737 panic("pmap_nest() pmap %p : attempt to nest outside the nested region\n", grand);
11738 } else if ((grand->nested_region_addr + grand->nested_region_size) < vend) {
11739 grand->nested_region_size = (mach_vm_offset_t)(vstart - grand->nested_region_addr + size);
11740 }
11741 }
11742
11743 #if (__ARM_VMSA__ == 7)
11744 vaddr = (vm_map_offset_t) vstart;
11745 num_tte = size >> ARM_TT_L1_SHIFT;
11746
11747 for (i = 0; i < num_tte; i++) {
11748 if (((subord->nested_region_true_start) > vaddr) || ((subord->nested_region_true_end) <= vaddr)) {
11749 goto expand_next;
11750 }
11751
11752 stte_p = pmap_tte(subord, vaddr);
11753 if ((stte_p == (tt_entry_t *)NULL) || (((*stte_p) & ARM_TTE_TYPE_MASK) != ARM_TTE_TYPE_TABLE)) {
11754 pmap_unlock(subord);
11755 kr = pmap_expand(subord, vaddr, expand_options, PMAP_TT_L2_LEVEL);
11756
11757 if (kr != KERN_SUCCESS) {
11758 pmap_lock(grand);
11759 goto done;
11760 }
11761
11762 pmap_lock(subord);
11763 }
11764 pmap_unlock(subord);
11765 pmap_lock(grand);
11766 stte_p = pmap_tte(grand, vaddr);
11767 if (stte_p == (tt_entry_t *)NULL) {
11768 pmap_unlock(grand);
11769 kr = pmap_expand(grand, vaddr, expand_options, PMAP_TT_L1_LEVEL);
11770
11771 if (kr != KERN_SUCCESS) {
11772 pmap_lock(grand);
11773 goto done;
11774 }
11775 } else {
11776 pmap_unlock(grand);
11777 kr = KERN_SUCCESS;
11778 }
11779 pmap_lock(subord);
11780
11781 expand_next:
11782 vaddr += ARM_TT_L1_SIZE;
11783 }
11784
11785 #else
11786 vaddr = (vm_map_offset_t) vstart;
11787 num_tte = (unsigned int)(size >> pt_attr_twig_shift(pt_attr));
11788
11789 for (i = 0; i < num_tte; i++) {
11790 if (((subord->nested_region_true_start) > vaddr) || ((subord->nested_region_true_end) <= vaddr)) {
11791 goto expand_next;
11792 }
11793
11794 stte_p = pmap_tte(subord, vaddr);
11795 if (stte_p == PT_ENTRY_NULL || *stte_p == ARM_TTE_EMPTY) {
11796 pmap_unlock(subord);
11797 kr = pmap_expand(subord, vaddr, expand_options, pt_attr_leaf_level(pt_attr));
11798
11799 if (kr != KERN_SUCCESS) {
11800 pmap_lock(grand);
11801 goto done;
11802 }
11803
11804 pmap_lock(subord);
11805 }
11806 expand_next:
11807 vaddr += pt_attr_twig_size(pt_attr);
11808 }
11809 #endif
11810 pmap_unlock(subord);
11811
11812 /*
11813 * copy tte's from subord pmap into grand pmap
11814 */
11815
11816 pmap_lock(grand);
11817 vaddr = (vm_map_offset_t) vstart;
11818
11819
11820 #if (__ARM_VMSA__ == 7)
11821 for (i = 0; i < num_tte; i++) {
11822 if (((subord->nested_region_true_start) > vaddr) || ((subord->nested_region_true_end) <= vaddr)) {
11823 goto nest_next;
11824 }
11825
11826 stte_p = pmap_tte(subord, vaddr);
11827 gtte_p = pmap_tte(grand, vaddr);
11828 *gtte_p = *stte_p;
11829
11830 nest_next:
11831 vaddr += ARM_TT_L1_SIZE;
11832 }
11833 #else
11834 for (i = 0; i < num_tte; i++) {
11835 if (((subord->nested_region_true_start) > vaddr) || ((subord->nested_region_true_end) <= vaddr)) {
11836 goto nest_next;
11837 }
11838
11839 stte_p = pmap_tte(subord, vaddr);
11840 gtte_p = pmap_tte(grand, vaddr);
11841 if (gtte_p == PT_ENTRY_NULL) {
11842 pmap_unlock(grand);
11843 kr = pmap_expand(grand, vaddr, expand_options, pt_attr_twig_level(pt_attr));
11844 pmap_lock(grand);
11845
11846 if (kr != KERN_SUCCESS) {
11847 goto done;
11848 }
11849
11850 gtte_p = pmap_tt2e(grand, vaddr);
11851 }
11852 *gtte_p = *stte_p;
11853
11854 nest_next:
11855 vaddr += pt_attr_twig_size(pt_attr);
11856 }
11857 #endif
11858
11859 kr = KERN_SUCCESS;
11860 done:
11861
11862 stte_p = pmap_tte(grand, vstart);
11863 FLUSH_PTE_RANGE_STRONG(stte_p, stte_p + num_tte);
11864 PMAP_UPDATE_TLBS(grand, vstart, vend, false);
11865
11866 pmap_unlock(grand);
11867 #if XNU_MONITOR
11868 nest_cleanup:
11869 #endif
11870 if (deref_subord) {
11871 pmap_destroy_internal(subord);
11872 }
11873 return kr;
11874 }
11875
11876 kern_return_t
11877 pmap_nest(
11878 pmap_t grand,
11879 pmap_t subord,
11880 addr64_t vstart,
11881 uint64_t size)
11882 {
11883 kern_return_t kr = KERN_FAILURE;
11884
11885 PMAP_TRACE(2, PMAP_CODE(PMAP__NEST) | DBG_FUNC_START,
11886 VM_KERNEL_ADDRHIDE(grand), VM_KERNEL_ADDRHIDE(subord),
11887 VM_KERNEL_ADDRHIDE(vstart));
11888
11889 #if XNU_MONITOR
11890 while ((kr = pmap_nest_ppl(grand, subord, vstart, size)) == KERN_RESOURCE_SHORTAGE) {
11891 pmap_alloc_page_for_ppl(0);
11892 }
11893
11894 pmap_ledger_check_balance(grand);
11895 pmap_ledger_check_balance(subord);
11896 #else
11897 kr = pmap_nest_internal(grand, subord, vstart, size);
11898 #endif
11899
11900 PMAP_TRACE(2, PMAP_CODE(PMAP__NEST) | DBG_FUNC_END, kr);
11901
11902 return kr;
11903 }
11904
11905 /*
11906 * kern_return_t pmap_unnest(grand, vaddr)
11907 *
11908 * grand = the pmap that will have the virtual range unnested
11909 * vaddr = start of range in pmap to be unnested
11910 * size = size of range in pmap to be unnested
11911 *
11912 */
11913
11914 kern_return_t
11915 pmap_unnest(
11916 pmap_t grand,
11917 addr64_t vaddr,
11918 uint64_t size)
11919 {
11920 return pmap_unnest_options(grand, vaddr, size, 0);
11921 }
11922
11923 MARK_AS_PMAP_TEXT static kern_return_t
11924 pmap_unnest_options_internal(
11925 pmap_t grand,
11926 addr64_t vaddr,
11927 uint64_t size,
11928 unsigned int option)
11929 {
11930 vm_map_offset_t start;
11931 vm_map_offset_t addr;
11932 tt_entry_t *tte_p;
11933 unsigned int current_index;
11934 unsigned int start_index;
11935 unsigned int max_index;
11936 unsigned int num_tte;
11937 unsigned int i;
11938
11939 addr64_t vend;
11940 if (__improbable(os_add_overflow(vaddr, size, &vend))) {
11941 panic("%s: %p vaddr wraps around: 0x%llx + 0x%llx", __func__, grand, vaddr, size);
11942 }
11943
11944 VALIDATE_PMAP(grand);
11945
11946 __unused const pt_attr_t * const pt_attr = pmap_get_pt_attr(grand);
11947
11948 if (((size | vaddr) & pt_attr_twig_offmask(pt_attr)) != 0x0ULL) {
11949 panic("pmap_unnest(): unaligned request");
11950 }
11951
11952 if ((option & PMAP_UNNEST_CLEAN) == 0) {
11953 if (grand->nested_pmap == NULL) {
11954 panic("%s: %p has no nested pmap", __func__, grand);
11955 }
11956
11957 if ((vaddr < grand->nested_region_addr) || (vend > (grand->nested_region_addr + grand->nested_region_size))) {
11958 panic("%s: %p: unnest request to region not-fully-nested region [%p, %p)", __func__, grand, (void*)vaddr, (void*)vend);
11959 }
11960
11961 pmap_lock(grand->nested_pmap);
11962
11963 start = vaddr;
11964 start_index = (unsigned int)((vaddr - grand->nested_region_addr) >> pt_attr_twig_shift(pt_attr));
11965 max_index = (unsigned int)(start_index + (size >> pt_attr_twig_shift(pt_attr)));
11966 num_tte = (unsigned int)(size >> pt_attr_twig_shift(pt_attr));
11967
11968 for (current_index = start_index, addr = start; current_index < max_index; current_index++, addr += pt_attr_twig_size(pt_attr)) {
11969 pt_entry_t *bpte, *epte, *cpte;
11970
11971 if (addr < grand->nested_pmap->nested_region_true_start) {
11972 /* We haven't reached the interesting range. */
11973 continue;
11974 }
11975
11976 if (addr >= grand->nested_pmap->nested_region_true_end) {
11977 /* We're done with the interesting range. */
11978 break;
11979 }
11980
11981 bpte = pmap_pte(grand->nested_pmap, addr);
11982 epte = bpte + (pt_attr_leaf_index_mask(pt_attr) >> pt_attr_leaf_shift(pt_attr));
11983
11984 if (!testbit(current_index, (int *)grand->nested_pmap->nested_region_asid_bitmap)) {
11985 setbit(current_index, (int *)grand->nested_pmap->nested_region_asid_bitmap);
11986
11987 for (cpte = bpte; cpte <= epte; cpte++) {
11988 pmap_paddr_t pa;
11989 int pai = 0;
11990 boolean_t managed = FALSE;
11991 pt_entry_t spte;
11992
11993 if ((*cpte != ARM_PTE_TYPE_FAULT)
11994 && (!ARM_PTE_IS_COMPRESSED(*cpte, cpte))) {
11995 spte = *cpte;
11996 while (!managed) {
11997 pa = pte_to_pa(spte);
11998 if (!pa_valid(pa)) {
11999 break;
12000 }
12001 pai = (int)pa_index(pa);
12002 LOCK_PVH(pai);
12003 spte = *cpte;
12004 pa = pte_to_pa(spte);
12005 if (pai == (int)pa_index(pa)) {
12006 managed = TRUE;
12007 break; // Leave the PVH locked as we'll unlock it after we update the PTE
12008 }
12009 UNLOCK_PVH(pai);
12010 }
12011
12012 if (((spte & ARM_PTE_NG) != ARM_PTE_NG)) {
12013 WRITE_PTE_FAST(cpte, (spte | ARM_PTE_NG));
12014 }
12015
12016 if (managed) {
12017 ASSERT_PVH_LOCKED(pai);
12018 UNLOCK_PVH(pai);
12019 }
12020 }
12021 }
12022 }
12023
12024 FLUSH_PTE_RANGE_STRONG(bpte, epte);
12025 }
12026
12027 flush_mmu_tlb_region_asid_async(vaddr, (unsigned)size, grand->nested_pmap);
12028 sync_tlb_flush();
12029
12030 pmap_unlock(grand->nested_pmap);
12031 }
12032
12033 pmap_lock(grand);
12034
12035 /*
12036 * invalidate all pdes for segment at vaddr in pmap grand
12037 */
12038 start = vaddr;
12039 addr = vaddr;
12040
12041 num_tte = (unsigned int)(size >> pt_attr_twig_shift(pt_attr));
12042
12043 for (i = 0; i < num_tte; i++, addr += pt_attr_twig_size(pt_attr)) {
12044 if (addr < grand->nested_pmap->nested_region_true_start) {
12045 /* We haven't reached the interesting range. */
12046 continue;
12047 }
12048
12049 if (addr >= grand->nested_pmap->nested_region_true_end) {
12050 /* We're done with the interesting range. */
12051 break;
12052 }
12053
12054 tte_p = pmap_tte(grand, addr);
12055 *tte_p = ARM_TTE_TYPE_FAULT;
12056 }
12057
12058 tte_p = pmap_tte(grand, start);
12059 FLUSH_PTE_RANGE_STRONG(tte_p, tte_p + num_tte);
12060 PMAP_UPDATE_TLBS(grand, start, vend, false);
12061
12062 pmap_unlock(grand);
12063
12064 return KERN_SUCCESS;
12065 }
12066
12067 kern_return_t
12068 pmap_unnest_options(
12069 pmap_t grand,
12070 addr64_t vaddr,
12071 uint64_t size,
12072 unsigned int option)
12073 {
12074 kern_return_t kr = KERN_FAILURE;
12075
12076 PMAP_TRACE(2, PMAP_CODE(PMAP__UNNEST) | DBG_FUNC_START,
12077 VM_KERNEL_ADDRHIDE(grand), VM_KERNEL_ADDRHIDE(vaddr));
12078
12079 #if XNU_MONITOR
12080 kr = pmap_unnest_options_ppl(grand, vaddr, size, option);
12081 #else
12082 kr = pmap_unnest_options_internal(grand, vaddr, size, option);
12083 #endif
12084
12085 PMAP_TRACE(2, PMAP_CODE(PMAP__UNNEST) | DBG_FUNC_END, kr);
12086
12087 return kr;
12088 }
12089
12090 boolean_t
12091 pmap_adjust_unnest_parameters(
12092 __unused pmap_t p,
12093 __unused vm_map_offset_t *s,
12094 __unused vm_map_offset_t *e)
12095 {
12096 return TRUE; /* to get to log_unnest_badness()... */
12097 }
12098
12099 /*
12100 * disable no-execute capability on
12101 * the specified pmap
12102 */
12103 #if DEVELOPMENT || DEBUG
12104 void
12105 pmap_disable_NX(
12106 pmap_t pmap)
12107 {
12108 pmap->nx_enabled = FALSE;
12109 }
12110 #else
12111 void
12112 pmap_disable_NX(
12113 __unused pmap_t pmap)
12114 {
12115 }
12116 #endif
12117
12118 /*
12119 * flush a range of hardware TLB entries.
12120 * NOTE: assumes the smallest TLB entry in use will be for
12121 * an ARM small page (4K).
12122 */
12123
12124 #define ARM_FULL_TLB_FLUSH_THRESHOLD 64
12125
12126 #if __ARM_RANGE_TLBI__
12127 #define ARM64_RANGE_TLB_FLUSH_THRESHOLD 1
12128 #define ARM64_FULL_TLB_FLUSH_THRESHOLD ARM64_TLB_RANGE_PAGES
12129 #else
12130 #define ARM64_FULL_TLB_FLUSH_THRESHOLD 256
12131 #endif // __ARM_RANGE_TLBI__
12132
12133 static void
12134 flush_mmu_tlb_region_asid_async(
12135 vm_offset_t va,
12136 size_t length,
12137 pmap_t pmap)
12138 {
12139 #if (__ARM_VMSA__ == 7)
12140 vm_offset_t end = va + length;
12141 uint32_t asid;
12142
12143 asid = pmap->hw_asid;
12144
12145 if (length / ARM_SMALL_PAGE_SIZE > ARM_FULL_TLB_FLUSH_THRESHOLD) {
12146 boolean_t flush_all = FALSE;
12147
12148 if ((asid == 0) || (pmap->nested == TRUE)) {
12149 flush_all = TRUE;
12150 }
12151 if (flush_all) {
12152 flush_mmu_tlb_async();
12153 } else {
12154 flush_mmu_tlb_asid_async(asid);
12155 }
12156
12157 return;
12158 }
12159 if (pmap->nested == TRUE) {
12160 #if !__ARM_MP_EXT__
12161 flush_mmu_tlb();
12162 #else
12163 va = arm_trunc_page(va);
12164 while (va < end) {
12165 flush_mmu_tlb_mva_entries_async(va);
12166 va += ARM_SMALL_PAGE_SIZE;
12167 }
12168 #endif
12169 return;
12170 }
12171 va = arm_trunc_page(va) | (asid & 0xff);
12172 flush_mmu_tlb_entries_async(va, end);
12173
12174 #else
12175 unsigned long pmap_page_shift = pt_attr_leaf_shift(pmap_get_pt_attr(pmap));
12176 const uint64_t pmap_page_size = 1ULL << pmap_page_shift;
12177 ppnum_t npages = (ppnum_t)(length >> pmap_page_shift);
12178 uint32_t asid;
12179
12180 asid = pmap->hw_asid;
12181
12182 if (npages > ARM64_FULL_TLB_FLUSH_THRESHOLD) {
12183 boolean_t flush_all = FALSE;
12184
12185 if ((asid == 0) || (pmap->nested == TRUE)) {
12186 flush_all = TRUE;
12187 }
12188 if (flush_all) {
12189 flush_mmu_tlb_async();
12190 } else {
12191 flush_mmu_tlb_asid_async((uint64_t)asid << TLBI_ASID_SHIFT);
12192 }
12193 return;
12194 }
12195 #if __ARM_RANGE_TLBI__
12196 if (npages > ARM64_RANGE_TLB_FLUSH_THRESHOLD) {
12197 va = generate_rtlbi_param(npages, asid, va, pmap_page_shift);
12198 if (pmap->nested == TRUE) {
12199 flush_mmu_tlb_allrange_async(va);
12200 } else {
12201 flush_mmu_tlb_range_async(va);
12202 }
12203 return;
12204 }
12205 #endif
12206 vm_offset_t end = tlbi_asid(asid) | tlbi_addr(va + length);
12207 va = tlbi_asid(asid) | tlbi_addr(va);
12208
12209 if (pmap->nested == TRUE) {
12210 flush_mmu_tlb_allentries_async(va, end, pmap_page_size);
12211 } else {
12212 flush_mmu_tlb_entries_async(va, end, pmap_page_size);
12213 }
12214
12215 #endif
12216 }
12217
12218 MARK_AS_PMAP_TEXT static void
12219 flush_mmu_tlb_tte_asid_async(vm_offset_t va, pmap_t pmap)
12220 {
12221 #if (__ARM_VMSA__ == 7)
12222 flush_mmu_tlb_entry_async((va & ~ARM_TT_L1_PT_OFFMASK) | (pmap->hw_asid & 0xff));
12223 flush_mmu_tlb_entry_async(((va & ~ARM_TT_L1_PT_OFFMASK) + ARM_TT_L1_SIZE) | (pmap->hw_asid & 0xff));
12224 flush_mmu_tlb_entry_async(((va & ~ARM_TT_L1_PT_OFFMASK) + 2 * ARM_TT_L1_SIZE) | (pmap->hw_asid & 0xff));
12225 flush_mmu_tlb_entry_async(((va & ~ARM_TT_L1_PT_OFFMASK) + 3 * ARM_TT_L1_SIZE) | (pmap->hw_asid & 0xff));
12226 #else
12227 flush_mmu_tlb_entry_async(tlbi_addr(va & ~pt_attr_twig_offmask(pmap_get_pt_attr(pmap))) | tlbi_asid(pmap->hw_asid));
12228 #endif
12229 }
12230
12231 MARK_AS_PMAP_TEXT static void
12232 flush_mmu_tlb_full_asid_async(pmap_t pmap)
12233 {
12234 #if (__ARM_VMSA__ == 7)
12235 flush_mmu_tlb_asid_async(pmap->hw_asid);
12236 #else /* (__ARM_VMSA__ == 7) */
12237 flush_mmu_tlb_asid_async((uint64_t)(pmap->hw_asid) << TLBI_ASID_SHIFT);
12238 #endif /* (__ARM_VMSA__ == 7) */
12239 }
12240
12241 void
12242 flush_mmu_tlb_region(
12243 vm_offset_t va,
12244 unsigned length)
12245 {
12246 flush_mmu_tlb_region_asid_async(va, length, kernel_pmap);
12247 sync_tlb_flush();
12248 }
12249
12250 static pmap_io_range_t*
12251 pmap_find_io_attr(pmap_paddr_t paddr)
12252 {
12253 pmap_io_range_t find_range = {.addr = paddr & ~PAGE_MASK, .len = PAGE_SIZE};
12254 unsigned int begin = 0, end = num_io_rgns - 1;
12255 if ((num_io_rgns == 0) || (paddr < io_attr_table[begin].addr) ||
12256 (paddr >= (io_attr_table[end].addr + io_attr_table[end].len))) {
12257 return NULL;
12258 }
12259
12260 for (;;) {
12261 unsigned int middle = (begin + end) / 2;
12262 int cmp = cmp_io_rgns(&find_range, &io_attr_table[middle]);
12263 if (cmp == 0) {
12264 return &io_attr_table[middle];
12265 } else if (begin == end) {
12266 break;
12267 } else if (cmp > 0) {
12268 begin = middle + 1;
12269 } else {
12270 end = middle;
12271 }
12272 }
12273
12274 return NULL;
12275 }
12276
12277 unsigned int
12278 pmap_cache_attributes(
12279 ppnum_t pn)
12280 {
12281 pmap_paddr_t paddr;
12282 int pai;
12283 unsigned int result;
12284 pp_attr_t pp_attr_current;
12285
12286 paddr = ptoa(pn);
12287
12288 assert(vm_last_phys > vm_first_phys); // Check that pmap has been bootstrapped
12289
12290 if (!pa_valid(paddr)) {
12291 pmap_io_range_t *io_rgn = pmap_find_io_attr(paddr);
12292 return (io_rgn == NULL) ? VM_WIMG_IO : io_rgn->wimg;
12293 }
12294
12295 result = VM_WIMG_DEFAULT;
12296
12297 pai = (int)pa_index(paddr);
12298
12299 pp_attr_current = pp_attr_table[pai];
12300 if (pp_attr_current & PP_ATTR_WIMG_MASK) {
12301 result = pp_attr_current & PP_ATTR_WIMG_MASK;
12302 }
12303 return result;
12304 }
12305
12306 MARK_AS_PMAP_TEXT static void
12307 pmap_sync_wimg(ppnum_t pn, unsigned int wimg_bits_prev, unsigned int wimg_bits_new)
12308 {
12309 if ((wimg_bits_prev != wimg_bits_new)
12310 && ((wimg_bits_prev == VM_WIMG_COPYBACK)
12311 || ((wimg_bits_prev == VM_WIMG_INNERWBACK)
12312 && (wimg_bits_new != VM_WIMG_COPYBACK))
12313 || ((wimg_bits_prev == VM_WIMG_WTHRU)
12314 && ((wimg_bits_new != VM_WIMG_COPYBACK) || (wimg_bits_new != VM_WIMG_INNERWBACK))))) {
12315 pmap_sync_page_attributes_phys(pn);
12316 }
12317
12318 if ((wimg_bits_new == VM_WIMG_RT) && (wimg_bits_prev != VM_WIMG_RT)) {
12319 pmap_force_dcache_clean(phystokv(ptoa(pn)), PAGE_SIZE);
12320 }
12321 }
12322
12323 MARK_AS_PMAP_TEXT static __unused void
12324 pmap_update_compressor_page_internal(ppnum_t pn, unsigned int prev_cacheattr, unsigned int new_cacheattr)
12325 {
12326 pmap_paddr_t paddr = ptoa(pn);
12327 int pai = (int)pa_index(paddr);
12328
12329 if (__improbable(!pa_valid(paddr))) {
12330 panic("%s called on non-managed page 0x%08x", __func__, pn);
12331 }
12332
12333 LOCK_PVH(pai);
12334
12335 #if XNU_MONITOR
12336 if (__improbable(pa_test_monitor(paddr))) {
12337 panic("%s invoked on PPL page 0x%08x", __func__, pn);
12338 }
12339 #endif
12340
12341 pmap_update_cache_attributes_locked(pn, new_cacheattr);
12342
12343 UNLOCK_PVH(pai);
12344
12345 pmap_sync_wimg(pn, prev_cacheattr & VM_WIMG_MASK, new_cacheattr & VM_WIMG_MASK);
12346 }
12347
12348 void *
12349 pmap_map_compressor_page(ppnum_t pn)
12350 {
12351 #if __ARM_PTE_PHYSMAP__
12352 unsigned int cacheattr = pmap_cache_attributes(pn) & VM_WIMG_MASK;
12353 if (cacheattr != VM_WIMG_DEFAULT) {
12354 #if XNU_MONITOR
12355 pmap_update_compressor_page_ppl(pn, cacheattr, VM_WIMG_DEFAULT);
12356 #else
12357 pmap_update_compressor_page_internal(pn, cacheattr, VM_WIMG_DEFAULT);
12358 #endif
12359 }
12360 #endif
12361 return (void*)phystokv(ptoa(pn));
12362 }
12363
12364 void
12365 pmap_unmap_compressor_page(ppnum_t pn __unused, void *kva __unused)
12366 {
12367 #if __ARM_PTE_PHYSMAP__
12368 unsigned int cacheattr = pmap_cache_attributes(pn) & VM_WIMG_MASK;
12369 if (cacheattr != VM_WIMG_DEFAULT) {
12370 #if XNU_MONITOR
12371 pmap_update_compressor_page_ppl(pn, VM_WIMG_DEFAULT, cacheattr);
12372 #else
12373 pmap_update_compressor_page_internal(pn, VM_WIMG_DEFAULT, cacheattr);
12374 #endif
12375 }
12376 #endif
12377 }
12378
12379 MARK_AS_PMAP_TEXT static boolean_t
12380 pmap_batch_set_cache_attributes_internal(
12381 ppnum_t pn,
12382 unsigned int cacheattr,
12383 unsigned int page_cnt,
12384 unsigned int page_index,
12385 boolean_t doit,
12386 unsigned int *res)
12387 {
12388 pmap_paddr_t paddr;
12389 int pai;
12390 pp_attr_t pp_attr_current;
12391 pp_attr_t pp_attr_template;
12392 unsigned int wimg_bits_prev, wimg_bits_new;
12393
12394 if (cacheattr & VM_WIMG_USE_DEFAULT) {
12395 cacheattr = VM_WIMG_DEFAULT;
12396 }
12397
12398 if ((doit == FALSE) && (*res == 0)) {
12399 pmap_pin_kernel_pages((vm_offset_t)res, sizeof(*res));
12400 *res = page_cnt;
12401 pmap_unpin_kernel_pages((vm_offset_t)res, sizeof(*res));
12402 if (platform_cache_batch_wimg(cacheattr & (VM_WIMG_MASK), page_cnt << PAGE_SHIFT) == FALSE) {
12403 return FALSE;
12404 }
12405 }
12406
12407 paddr = ptoa(pn);
12408
12409 if (!pa_valid(paddr)) {
12410 panic("pmap_batch_set_cache_attributes(): pn 0x%08x not managed", pn);
12411 }
12412
12413 pai = (int)pa_index(paddr);
12414
12415 if (doit) {
12416 LOCK_PVH(pai);
12417 #if XNU_MONITOR
12418 if (pa_test_monitor(paddr)) {
12419 panic("%s invoked on PPL page 0x%llx", __func__, (uint64_t)paddr);
12420 }
12421 #endif
12422 }
12423
12424 do {
12425 pp_attr_current = pp_attr_table[pai];
12426 wimg_bits_prev = VM_WIMG_DEFAULT;
12427 if (pp_attr_current & PP_ATTR_WIMG_MASK) {
12428 wimg_bits_prev = pp_attr_current & PP_ATTR_WIMG_MASK;
12429 }
12430
12431 pp_attr_template = (pp_attr_current & ~PP_ATTR_WIMG_MASK) | PP_ATTR_WIMG(cacheattr & (VM_WIMG_MASK));
12432
12433 if (!doit) {
12434 break;
12435 }
12436
12437 /* WIMG bits should only be updated under the PVH lock, but we should do this in a CAS loop
12438 * to avoid losing simultaneous updates to other bits like refmod. */
12439 } while (!OSCompareAndSwap16(pp_attr_current, pp_attr_template, &pp_attr_table[pai]));
12440
12441 wimg_bits_new = VM_WIMG_DEFAULT;
12442 if (pp_attr_template & PP_ATTR_WIMG_MASK) {
12443 wimg_bits_new = pp_attr_template & PP_ATTR_WIMG_MASK;
12444 }
12445
12446 if (doit) {
12447 if (wimg_bits_new != wimg_bits_prev) {
12448 pmap_update_cache_attributes_locked(pn, cacheattr);
12449 }
12450 UNLOCK_PVH(pai);
12451 if ((wimg_bits_new == VM_WIMG_RT) && (wimg_bits_prev != VM_WIMG_RT)) {
12452 pmap_force_dcache_clean(phystokv(paddr), PAGE_SIZE);
12453 }
12454 } else {
12455 if (wimg_bits_new == VM_WIMG_COPYBACK) {
12456 return FALSE;
12457 }
12458 if (wimg_bits_prev == wimg_bits_new) {
12459 pmap_pin_kernel_pages((vm_offset_t)res, sizeof(*res));
12460 *res = *res - 1;
12461 pmap_unpin_kernel_pages((vm_offset_t)res, sizeof(*res));
12462 if (!platform_cache_batch_wimg(wimg_bits_new, (*res) << PAGE_SHIFT)) {
12463 return FALSE;
12464 }
12465 }
12466 return TRUE;
12467 }
12468
12469 if (page_cnt == (page_index + 1)) {
12470 wimg_bits_prev = VM_WIMG_COPYBACK;
12471 if (((wimg_bits_prev != wimg_bits_new))
12472 && ((wimg_bits_prev == VM_WIMG_COPYBACK)
12473 || ((wimg_bits_prev == VM_WIMG_INNERWBACK)
12474 && (wimg_bits_new != VM_WIMG_COPYBACK))
12475 || ((wimg_bits_prev == VM_WIMG_WTHRU)
12476 && ((wimg_bits_new != VM_WIMG_COPYBACK) || (wimg_bits_new != VM_WIMG_INNERWBACK))))) {
12477 platform_cache_flush_wimg(wimg_bits_new);
12478 }
12479 }
12480
12481 return TRUE;
12482 }
12483
12484 boolean_t
12485 pmap_batch_set_cache_attributes(
12486 ppnum_t pn,
12487 unsigned int cacheattr,
12488 unsigned int page_cnt,
12489 unsigned int page_index,
12490 boolean_t doit,
12491 unsigned int *res)
12492 {
12493 #if XNU_MONITOR
12494 return pmap_batch_set_cache_attributes_ppl(pn, cacheattr, page_cnt, page_index, doit, res);
12495 #else
12496 return pmap_batch_set_cache_attributes_internal(pn, cacheattr, page_cnt, page_index, doit, res);
12497 #endif
12498 }
12499
12500 MARK_AS_PMAP_TEXT static void
12501 pmap_set_cache_attributes_priv(
12502 ppnum_t pn,
12503 unsigned int cacheattr,
12504 boolean_t external __unused)
12505 {
12506 pmap_paddr_t paddr;
12507 int pai;
12508 pp_attr_t pp_attr_current;
12509 pp_attr_t pp_attr_template;
12510 unsigned int wimg_bits_prev, wimg_bits_new;
12511
12512 paddr = ptoa(pn);
12513
12514 if (!pa_valid(paddr)) {
12515 return; /* Not a managed page. */
12516 }
12517
12518 if (cacheattr & VM_WIMG_USE_DEFAULT) {
12519 cacheattr = VM_WIMG_DEFAULT;
12520 }
12521
12522 pai = (int)pa_index(paddr);
12523
12524 LOCK_PVH(pai);
12525
12526 #if XNU_MONITOR
12527 if (external && pa_test_monitor(paddr)) {
12528 panic("%s invoked on PPL page 0x%llx", __func__, (uint64_t)paddr);
12529 } else if (!external && !pa_test_monitor(paddr)) {
12530 panic("%s invoked on non-PPL page 0x%llx", __func__, (uint64_t)paddr);
12531 }
12532 #endif
12533
12534 do {
12535 pp_attr_current = pp_attr_table[pai];
12536 wimg_bits_prev = VM_WIMG_DEFAULT;
12537 if (pp_attr_current & PP_ATTR_WIMG_MASK) {
12538 wimg_bits_prev = pp_attr_current & PP_ATTR_WIMG_MASK;
12539 }
12540
12541 pp_attr_template = (pp_attr_current & ~PP_ATTR_WIMG_MASK) | PP_ATTR_WIMG(cacheattr & (VM_WIMG_MASK));
12542
12543 /* WIMG bits should only be updated under the PVH lock, but we should do this in a CAS loop
12544 * to avoid losing simultaneous updates to other bits like refmod. */
12545 } while (!OSCompareAndSwap16(pp_attr_current, pp_attr_template, &pp_attr_table[pai]));
12546
12547 wimg_bits_new = VM_WIMG_DEFAULT;
12548 if (pp_attr_template & PP_ATTR_WIMG_MASK) {
12549 wimg_bits_new = pp_attr_template & PP_ATTR_WIMG_MASK;
12550 }
12551
12552 if (wimg_bits_new != wimg_bits_prev) {
12553 pmap_update_cache_attributes_locked(pn, cacheattr);
12554 }
12555
12556 UNLOCK_PVH(pai);
12557
12558 pmap_sync_wimg(pn, wimg_bits_prev, wimg_bits_new);
12559 }
12560
12561 MARK_AS_PMAP_TEXT static void
12562 pmap_set_cache_attributes_internal(
12563 ppnum_t pn,
12564 unsigned int cacheattr)
12565 {
12566 pmap_set_cache_attributes_priv(pn, cacheattr, TRUE);
12567 }
12568
12569 void
12570 pmap_set_cache_attributes(
12571 ppnum_t pn,
12572 unsigned int cacheattr)
12573 {
12574 #if XNU_MONITOR
12575 pmap_set_cache_attributes_ppl(pn, cacheattr);
12576 #else
12577 pmap_set_cache_attributes_internal(pn, cacheattr);
12578 #endif
12579 }
12580
12581 MARK_AS_PMAP_TEXT void
12582 pmap_update_cache_attributes_locked(
12583 ppnum_t ppnum,
12584 unsigned attributes)
12585 {
12586 pmap_paddr_t phys = ptoa(ppnum);
12587 pv_entry_t *pve_p;
12588 pt_entry_t *pte_p;
12589 pv_entry_t **pv_h;
12590 pt_entry_t tmplate;
12591 unsigned int pai;
12592 boolean_t tlb_flush_needed = FALSE;
12593
12594 PMAP_TRACE(2, PMAP_CODE(PMAP__UPDATE_CACHING) | DBG_FUNC_START, ppnum, attributes);
12595
12596 if (pmap_panic_dev_wimg_on_managed) {
12597 switch (attributes & VM_WIMG_MASK) {
12598 case VM_WIMG_IO: // nGnRnE
12599 case VM_WIMG_POSTED: // nGnRE
12600 /* supported on DRAM, but slow, so we disallow */
12601
12602 case VM_WIMG_POSTED_REORDERED: // nGRE
12603 case VM_WIMG_POSTED_COMBINED_REORDERED: // GRE
12604 /* unsupported on DRAM */
12605
12606 panic("%s: trying to use unsupported VM_WIMG type for managed page, VM_WIMG=%x, ppnum=%#x",
12607 __FUNCTION__, attributes & VM_WIMG_MASK, ppnum);
12608 break;
12609
12610 default:
12611 /* not device type memory, all good */
12612
12613 break;
12614 }
12615 }
12616
12617 #if __ARM_PTE_PHYSMAP__
12618 vm_offset_t kva = phystokv(phys);
12619 pte_p = pmap_pte(kernel_pmap, kva);
12620
12621 tmplate = *pte_p;
12622 tmplate &= ~(ARM_PTE_ATTRINDXMASK | ARM_PTE_SHMASK);
12623 #if XNU_MONITOR
12624 tmplate |= (wimg_to_pte(attributes) & ~ARM_PTE_XPRR_MASK);
12625 #else
12626 tmplate |= wimg_to_pte(attributes);
12627 #endif
12628 #if (__ARM_VMSA__ > 7)
12629 if (tmplate & ARM_PTE_HINT_MASK) {
12630 panic("%s: physical aperture PTE %p has hint bit set, va=%p, pte=0x%llx",
12631 __FUNCTION__, pte_p, (void *)kva, tmplate);
12632 }
12633 #endif
12634 WRITE_PTE_STRONG(pte_p, tmplate);
12635 flush_mmu_tlb_region_asid_async(kva, PAGE_SIZE, kernel_pmap);
12636 tlb_flush_needed = TRUE;
12637 #endif
12638
12639 pai = (unsigned int)pa_index(phys);
12640
12641 pv_h = pai_to_pvh(pai);
12642
12643 pte_p = PT_ENTRY_NULL;
12644 pve_p = PV_ENTRY_NULL;
12645 if (pvh_test_type(pv_h, PVH_TYPE_PTEP)) {
12646 pte_p = pvh_ptep(pv_h);
12647 } else if (pvh_test_type(pv_h, PVH_TYPE_PVEP)) {
12648 pve_p = pvh_list(pv_h);
12649 pte_p = PT_ENTRY_NULL;
12650 }
12651
12652 while ((pve_p != PV_ENTRY_NULL) || (pte_p != PT_ENTRY_NULL)) {
12653 vm_map_address_t va;
12654 pmap_t pmap;
12655
12656 if (pve_p != PV_ENTRY_NULL) {
12657 pte_p = pve_get_ptep(pve_p);
12658 }
12659 #ifdef PVH_FLAG_IOMMU
12660 if ((vm_offset_t)pte_p & PVH_FLAG_IOMMU) {
12661 goto cache_skip_pve;
12662 }
12663 #endif
12664 pmap = ptep_get_pmap(pte_p);
12665 va = ptep_get_va(pte_p);
12666
12667 tmplate = *pte_p;
12668 tmplate &= ~(ARM_PTE_ATTRINDXMASK | ARM_PTE_SHMASK);
12669 tmplate |= pmap_get_pt_ops(pmap)->wimg_to_pte(attributes);
12670
12671 WRITE_PTE_STRONG(pte_p, tmplate);
12672 pmap_get_pt_ops(pmap)->flush_tlb_region_async(va,
12673 pt_attr_page_size(pmap_get_pt_attr(pmap)) * PAGE_RATIO, pmap);
12674 tlb_flush_needed = TRUE;
12675
12676 #ifdef PVH_FLAG_IOMMU
12677 cache_skip_pve:
12678 #endif
12679 pte_p = PT_ENTRY_NULL;
12680 if (pve_p != PV_ENTRY_NULL) {
12681 pve_p = PVE_NEXT_PTR(pve_next(pve_p));
12682 }
12683 }
12684 if (tlb_flush_needed) {
12685 /* For targets that distinguish between mild and strong DSB, mild DSB
12686 * will not drain the prefetcher. This can lead to prefetch-driven
12687 * cache fills that defeat the uncacheable requirement of the RT memory type.
12688 * In those cases, strong DSB must instead be employed to drain the prefetcher. */
12689 pmap_sync_tlb((attributes & VM_WIMG_MASK) == VM_WIMG_RT);
12690 }
12691
12692 PMAP_TRACE(2, PMAP_CODE(PMAP__UPDATE_CACHING) | DBG_FUNC_END, ppnum, attributes);
12693 }
12694
12695 #if (__ARM_VMSA__ == 7)
12696 void
12697 pmap_create_sharedpages(vm_map_address_t *kernel_data_addr, vm_map_address_t *kernel_text_addr,
12698 vm_map_address_t *user_commpage_addr)
12699 {
12700 pmap_paddr_t pa;
12701 kern_return_t kr;
12702
12703 assert(kernel_data_addr != NULL);
12704 assert(kernel_text_addr != NULL);
12705 assert(user_commpage_addr != NULL);
12706
12707 (void) pmap_pages_alloc_zeroed(&pa, PAGE_SIZE, 0);
12708
12709 kr = pmap_enter(kernel_pmap, _COMM_PAGE_BASE_ADDRESS, atop(pa), VM_PROT_READ | VM_PROT_WRITE, VM_PROT_NONE, VM_WIMG_USE_DEFAULT, TRUE);
12710 assert(kr == KERN_SUCCESS);
12711
12712 *kernel_data_addr = phystokv(pa);
12713 // We don't have PFZ for 32 bit arm, always NULL
12714 *kernel_text_addr = 0;
12715 *user_commpage_addr = 0;
12716 }
12717
12718 #else /* __ARM_VMSA__ == 7 */
12719
12720 static void
12721 pmap_update_tt3e(
12722 pmap_t pmap,
12723 vm_address_t address,
12724 tt_entry_t template)
12725 {
12726 tt_entry_t *ptep, pte;
12727
12728 ptep = pmap_tt3e(pmap, address);
12729 if (ptep == NULL) {
12730 panic("%s: no ptep?\n", __FUNCTION__);
12731 }
12732
12733 pte = *ptep;
12734 pte = tte_to_pa(pte) | template;
12735 WRITE_PTE_STRONG(ptep, pte);
12736 }
12737
12738 /* Note absence of non-global bit */
12739 #define PMAP_COMM_PAGE_PTE_TEMPLATE (ARM_PTE_TYPE_VALID \
12740 | ARM_PTE_ATTRINDX(CACHE_ATTRINDX_WRITEBACK) \
12741 | ARM_PTE_SH(SH_INNER_MEMORY) | ARM_PTE_NX \
12742 | ARM_PTE_PNX | ARM_PTE_AP(AP_RORO) | ARM_PTE_AF)
12743
12744 /* Note absence of non-global bit and no-execute bit. */
12745 #define PMAP_COMM_PAGE_TEXT_PTE_TEMPLATE (ARM_PTE_TYPE_VALID \
12746 | ARM_PTE_ATTRINDX(CACHE_ATTRINDX_WRITEBACK) \
12747 | ARM_PTE_SH(SH_INNER_MEMORY) | ARM_PTE_PNX \
12748 | ARM_PTE_AP(AP_RORO) | ARM_PTE_AF)
12749
12750 void
12751 pmap_create_sharedpages(vm_map_address_t *kernel_data_addr, vm_map_address_t *kernel_text_addr,
12752 vm_map_address_t *user_text_addr)
12753 {
12754 kern_return_t kr;
12755 pmap_paddr_t data_pa = 0; // data address
12756 pmap_paddr_t text_pa = 0; // text address
12757
12758 *kernel_data_addr = 0;
12759 *kernel_text_addr = 0;
12760 *user_text_addr = 0;
12761
12762 #if XNU_MONITOR
12763 data_pa = pmap_alloc_page_for_kern(0);
12764 assert(data_pa);
12765 memset((char *) phystokv(data_pa), 0, PAGE_SIZE);
12766 #if CONFIG_ARM_PFZ
12767 text_pa = pmap_alloc_page_for_kern(0);
12768 assert(text_pa);
12769 memset((char *) phystokv(text_pa), 0, PAGE_SIZE);
12770 #endif
12771
12772 #else /* XNU_MONITOR */
12773 (void) pmap_pages_alloc_zeroed(&data_pa, PAGE_SIZE, 0);
12774 #if CONFIG_ARM_PFZ
12775 (void) pmap_pages_alloc_zeroed(&text_pa, PAGE_SIZE, 0);
12776 #endif
12777
12778 #endif /* XNU_MONITOR */
12779
12780 #ifdef CONFIG_XNUPOST
12781 /*
12782 * The kernel pmap maintains a user accessible mapping of the commpage
12783 * to test PAN.
12784 */
12785 kr = pmap_enter(kernel_pmap, _COMM_HIGH_PAGE64_BASE_ADDRESS, (ppnum_t)atop(data_pa), VM_PROT_READ, VM_PROT_NONE, VM_WIMG_USE_DEFAULT, TRUE);
12786 assert(kr == KERN_SUCCESS);
12787
12788 /*
12789 * This mapping should not be global (as we only expect to reference it
12790 * during testing).
12791 */
12792 pmap_update_tt3e(kernel_pmap, _COMM_HIGH_PAGE64_BASE_ADDRESS, PMAP_COMM_PAGE_PTE_TEMPLATE | ARM_PTE_NG);
12793
12794 #if KASAN
12795 kasan_map_shadow(_COMM_HIGH_PAGE64_BASE_ADDRESS, PAGE_SIZE, true);
12796 #endif
12797 #endif /* CONFIG_XNUPOST */
12798
12799 /*
12800 * In order to avoid burning extra pages on mapping the shared page, we
12801 * create a dedicated pmap for the shared page. We forcibly nest the
12802 * translation tables from this pmap into other pmaps. The level we
12803 * will nest at depends on the MMU configuration (page size, TTBR range,
12804 * etc). Typically, this is at L1 for 4K tasks and L2 for 16K tasks.
12805 *
12806 * Note that this is NOT "the nested pmap" (which is used to nest the
12807 * shared cache).
12808 *
12809 * Note that we update parameters of the entry for our unique needs (NG
12810 * entry, etc.).
12811 */
12812 sharedpage_pmap_default = pmap_create_options(NULL, 0x0, 0);
12813 assert(sharedpage_pmap_default != NULL);
12814
12815 /* The user 64-bit mapping... */
12816 kr = pmap_enter_addr(sharedpage_pmap_default, _COMM_PAGE64_BASE_ADDRESS, data_pa, VM_PROT_READ, VM_PROT_NONE, VM_WIMG_USE_DEFAULT, TRUE);
12817 assert(kr == KERN_SUCCESS);
12818 pmap_update_tt3e(sharedpage_pmap_default, _COMM_PAGE64_BASE_ADDRESS, PMAP_COMM_PAGE_PTE_TEMPLATE);
12819 #if CONFIG_ARM_PFZ
12820 /* User mapping of comm page text section for 64 bit mapping only
12821 *
12822 * We don't insert it into the 32 bit mapping because we don't want 32 bit
12823 * user processes to get this page mapped in, they should never call into
12824 * this page.
12825 *
12826 * The data comm page is in a pre-reserved L3 VA range and the text commpage
12827 * is slid in the same L3 as the data commpage. It is either outside the
12828 * max of user VA or is pre-reserved in the vm_map_exec(). This means that
12829 * it is reserved and unavailable to mach VM for future mappings.
12830 */
12831 const pt_attr_t * const pt_attr = pmap_get_pt_attr(sharedpage_pmap_default);
12832 int num_ptes = pt_attr_leaf_size(pt_attr) >> PTE_SHIFT;
12833
12834 vm_map_address_t commpage_text_va = 0;
12835
12836 do {
12837 int text_leaf_index = random() % num_ptes;
12838
12839 // Generate a VA for the commpage text with the same root and twig index as data
12840 // comm page, but with new leaf index we've just generated.
12841 commpage_text_va = (_COMM_PAGE64_BASE_ADDRESS & ~pt_attr_leaf_index_mask(pt_attr));
12842 commpage_text_va |= (text_leaf_index << pt_attr_leaf_shift(pt_attr));
12843 } while (commpage_text_va == _COMM_PAGE64_BASE_ADDRESS); // Try again if we collide (should be unlikely)
12844
12845 // Assert that this is empty
12846 __assert_only pt_entry_t *ptep = pmap_pte(sharedpage_pmap_default, commpage_text_va);
12847 assert(ptep != PT_ENTRY_NULL);
12848 assert(*ptep == ARM_TTE_EMPTY);
12849
12850 // At this point, we've found the address we want to insert our comm page at
12851 kr = pmap_enter_addr(sharedpage_pmap_default, commpage_text_va, text_pa, VM_PROT_READ | VM_PROT_EXECUTE, VM_PROT_NONE, VM_WIMG_USE_DEFAULT, TRUE);
12852 assert(kr == KERN_SUCCESS);
12853 // Mark it as global page R/X so that it doesn't get thrown out on tlb flush
12854 pmap_update_tt3e(sharedpage_pmap_default, commpage_text_va, PMAP_COMM_PAGE_TEXT_PTE_TEMPLATE);
12855
12856 *user_text_addr = commpage_text_va;
12857 #endif
12858
12859 /* ...and the user 32-bit mapping. */
12860 kr = pmap_enter_addr(sharedpage_pmap_default, _COMM_PAGE32_BASE_ADDRESS, data_pa, VM_PROT_READ, VM_PROT_NONE, VM_WIMG_USE_DEFAULT, TRUE);
12861 assert(kr == KERN_SUCCESS);
12862 pmap_update_tt3e(sharedpage_pmap_default, _COMM_PAGE32_BASE_ADDRESS, PMAP_COMM_PAGE_PTE_TEMPLATE);
12863
12864 #if __ARM_MIXED_PAGE_SIZE__
12865 /**
12866 * To handle 4K tasks a new view/pmap of the shared page is needed. These are a
12867 * new set of page tables that point to the exact same 16K shared page as
12868 * before. Only the first 4K of the 16K shared page is mapped since that's
12869 * the only part that contains relevant data.
12870 */
12871 sharedpage_pmap_4k = pmap_create_options(NULL, 0x0, PMAP_CREATE_FORCE_4K_PAGES);
12872 assert(sharedpage_pmap_4k != NULL);
12873
12874 /* The user 64-bit mapping... */
12875 kr = pmap_enter_addr(sharedpage_pmap_4k, _COMM_PAGE64_BASE_ADDRESS, data_pa, VM_PROT_READ, VM_PROT_NONE, VM_WIMG_USE_DEFAULT, TRUE);
12876 assert(kr == KERN_SUCCESS);
12877 pmap_update_tt3e(sharedpage_pmap_4k, _COMM_PAGE64_BASE_ADDRESS, PMAP_COMM_PAGE_PTE_TEMPLATE);
12878
12879 /* ...and the user 32-bit mapping. */
12880 kr = pmap_enter_addr(sharedpage_pmap_4k, _COMM_PAGE32_BASE_ADDRESS, data_pa, VM_PROT_READ, VM_PROT_NONE, VM_WIMG_USE_DEFAULT, TRUE);
12881 assert(kr == KERN_SUCCESS);
12882 pmap_update_tt3e(sharedpage_pmap_4k, _COMM_PAGE32_BASE_ADDRESS, PMAP_COMM_PAGE_PTE_TEMPLATE);
12883
12884 #endif
12885
12886 /* For manipulation in kernel, go straight to physical page */
12887 *kernel_data_addr = phystokv(data_pa);
12888 *kernel_text_addr = (text_pa) ? phystokv(text_pa) : 0;
12889
12890 return;
12891 }
12892
12893
12894 /*
12895 * Asserts to ensure that the TTEs we nest to map the shared page do not overlap
12896 * with user controlled TTEs for regions that aren't explicitly reserved by the
12897 * VM (e.g., _COMM_PAGE64_NESTING_START/_COMM_PAGE64_BASE_ADDRESS).
12898 */
12899 #if (ARM_PGSHIFT == 14)
12900 static_assert((_COMM_PAGE32_BASE_ADDRESS & ~ARM_TT_L2_OFFMASK) >= VM_MAX_ADDRESS);
12901 #elif (ARM_PGSHIFT == 12)
12902 static_assert((_COMM_PAGE32_BASE_ADDRESS & ~ARM_TT_L1_OFFMASK) >= VM_MAX_ADDRESS);
12903 #else
12904 #error Nested shared page mapping is unsupported on this config
12905 #endif
12906
12907 MARK_AS_PMAP_TEXT static kern_return_t
12908 pmap_insert_sharedpage_internal(
12909 pmap_t pmap)
12910 {
12911 kern_return_t kr = KERN_SUCCESS;
12912 vm_offset_t sharedpage_vaddr;
12913 pt_entry_t *ttep, *src_ttep;
12914 int options = 0;
12915 pmap_t sharedpage_pmap = sharedpage_pmap_default;
12916
12917 const pt_attr_t * const pt_attr = pmap_get_pt_attr(pmap);
12918 const unsigned int sharedpage_level = pt_attr_sharedpage_level(pt_attr);
12919
12920 #if __ARM_MIXED_PAGE_SIZE__
12921 #if !__ARM_16K_PG__
12922 /* The following code assumes that sharedpage_pmap_default is a 16KB pmap. */
12923 #error "pmap_insert_sharedpage_internal requires a 16KB default kernel page size when __ARM_MIXED_PAGE_SIZE__ is enabled"
12924 #endif /* !__ARM_16K_PG__ */
12925
12926 /* Choose the correct shared page pmap to use. */
12927 const uint64_t pmap_page_size = pt_attr_page_size(pt_attr);
12928 if (pmap_page_size == 16384) {
12929 sharedpage_pmap = sharedpage_pmap_default;
12930 } else if (pmap_page_size == 4096) {
12931 sharedpage_pmap = sharedpage_pmap_4k;
12932 } else {
12933 panic("No shared page pmap exists for the wanted page size: %llu", pmap_page_size);
12934 }
12935 #endif /* __ARM_MIXED_PAGE_SIZE__ */
12936
12937 VALIDATE_PMAP(pmap);
12938 #if XNU_MONITOR
12939 options |= PMAP_OPTIONS_NOWAIT;
12940 #endif /* XNU_MONITOR */
12941
12942 #if _COMM_PAGE_AREA_LENGTH != PAGE_SIZE
12943 #error We assume a single page.
12944 #endif
12945
12946 if (pmap_is_64bit(pmap)) {
12947 sharedpage_vaddr = _COMM_PAGE64_BASE_ADDRESS;
12948 } else {
12949 sharedpage_vaddr = _COMM_PAGE32_BASE_ADDRESS;
12950 }
12951
12952
12953 pmap_lock(pmap);
12954
12955 /*
12956 * For 4KB pages, we either "nest" at the level one page table (1GB) or level
12957 * two (2MB) depending on the address space layout. For 16KB pages, each level
12958 * one entry is 64GB, so we must go to the second level entry (32MB) in order
12959 * to "nest".
12960 *
12961 * Note: This is not "nesting" in the shared cache sense. This definition of
12962 * nesting just means inserting pointers to pre-allocated tables inside of
12963 * the passed in pmap to allow us to share page tables (which map the shared
12964 * page) for every task. This saves at least one page of memory per process
12965 * compared to creating new page tables in every process for mapping the
12966 * shared page.
12967 */
12968
12969 /**
12970 * Allocate the twig page tables if needed, and slam a pointer to the shared
12971 * page's tables into place.
12972 */
12973 while ((ttep = pmap_ttne(pmap, sharedpage_level, sharedpage_vaddr)) == TT_ENTRY_NULL) {
12974 pmap_unlock(pmap);
12975
12976 kr = pmap_expand(pmap, sharedpage_vaddr, options, sharedpage_level);
12977
12978 if (kr != KERN_SUCCESS) {
12979 #if XNU_MONITOR
12980 if (kr == KERN_RESOURCE_SHORTAGE) {
12981 return kr;
12982 } else
12983 #endif
12984 {
12985 panic("Failed to pmap_expand for commpage, pmap=%p", pmap);
12986 }
12987 }
12988
12989 pmap_lock(pmap);
12990 }
12991
12992 if (*ttep != ARM_PTE_EMPTY) {
12993 panic("%s: Found something mapped at the commpage address?!", __FUNCTION__);
12994 }
12995
12996 src_ttep = pmap_ttne(sharedpage_pmap, sharedpage_level, sharedpage_vaddr);
12997
12998 *ttep = *src_ttep;
12999 FLUSH_PTE_STRONG(ttep);
13000
13001 pmap_unlock(pmap);
13002
13003 return kr;
13004 }
13005
13006 static void
13007 pmap_unmap_sharedpage(
13008 pmap_t pmap)
13009 {
13010 pt_entry_t *ttep;
13011 vm_offset_t sharedpage_vaddr;
13012 pmap_t sharedpage_pmap = sharedpage_pmap_default;
13013
13014 const pt_attr_t * const pt_attr = pmap_get_pt_attr(pmap);
13015 const unsigned int sharedpage_level = pt_attr_sharedpage_level(pt_attr);
13016
13017 #if __ARM_MIXED_PAGE_SIZE__
13018 #if !__ARM_16K_PG__
13019 /* The following code assumes that sharedpage_pmap_default is a 16KB pmap. */
13020 #error "pmap_unmap_sharedpage requires a 16KB default kernel page size when __ARM_MIXED_PAGE_SIZE__ is enabled"
13021 #endif /* !__ARM_16K_PG__ */
13022
13023 /* Choose the correct shared page pmap to use. */
13024 const uint64_t pmap_page_size = pt_attr_page_size(pt_attr);
13025 if (pmap_page_size == 16384) {
13026 sharedpage_pmap = sharedpage_pmap_default;
13027 } else if (pmap_page_size == 4096) {
13028 sharedpage_pmap = sharedpage_pmap_4k;
13029 } else {
13030 panic("No shared page pmap exists for the wanted page size: %llu", pmap_page_size);
13031 }
13032 #endif /* __ARM_MIXED_PAGE_SIZE__ */
13033
13034 #if _COMM_PAGE_AREA_LENGTH != PAGE_SIZE
13035 #error We assume a single page.
13036 #endif
13037
13038 if (pmap_is_64bit(pmap)) {
13039 sharedpage_vaddr = _COMM_PAGE64_BASE_ADDRESS;
13040 } else {
13041 sharedpage_vaddr = _COMM_PAGE32_BASE_ADDRESS;
13042 }
13043
13044
13045 ttep = pmap_ttne(pmap, sharedpage_level, sharedpage_vaddr);
13046
13047 if (ttep == NULL) {
13048 return;
13049 }
13050
13051 /* It had better be mapped to the shared page. */
13052 if (*ttep != ARM_TTE_EMPTY && *ttep != *pmap_ttne(sharedpage_pmap, sharedpage_level, sharedpage_vaddr)) {
13053 panic("%s: Something other than commpage mapped in shared page slot?", __FUNCTION__);
13054 }
13055
13056 *ttep = ARM_TTE_EMPTY;
13057 FLUSH_PTE_STRONG(ttep);
13058
13059 flush_mmu_tlb_region_asid_async(sharedpage_vaddr, PAGE_SIZE, pmap);
13060 sync_tlb_flush();
13061 }
13062
13063 void
13064 pmap_insert_sharedpage(
13065 pmap_t pmap)
13066 {
13067 #if XNU_MONITOR
13068 kern_return_t kr = KERN_FAILURE;
13069
13070 while ((kr = pmap_insert_sharedpage_ppl(pmap)) == KERN_RESOURCE_SHORTAGE) {
13071 pmap_alloc_page_for_ppl(0);
13072 }
13073
13074 pmap_ledger_check_balance(pmap);
13075
13076 if (kr != KERN_SUCCESS) {
13077 panic("%s: failed to insert the shared page, kr=%d, "
13078 "pmap=%p",
13079 __FUNCTION__, kr,
13080 pmap);
13081 }
13082 #else
13083 pmap_insert_sharedpage_internal(pmap);
13084 #endif
13085 }
13086
13087 static boolean_t
13088 pmap_is_64bit(
13089 pmap_t pmap)
13090 {
13091 return pmap->is_64bit;
13092 }
13093
13094 bool
13095 pmap_is_exotic(
13096 pmap_t pmap __unused)
13097 {
13098 return false;
13099 }
13100
13101 #endif
13102
13103 /* ARMTODO -- an implementation that accounts for
13104 * holes in the physical map, if any.
13105 */
13106 boolean_t
13107 pmap_valid_page(
13108 ppnum_t pn)
13109 {
13110 return pa_valid(ptoa(pn));
13111 }
13112
13113 boolean_t
13114 pmap_bootloader_page(
13115 ppnum_t pn)
13116 {
13117 pmap_paddr_t paddr = ptoa(pn);
13118
13119 if (pa_valid(paddr)) {
13120 return FALSE;
13121 }
13122 pmap_io_range_t *io_rgn = pmap_find_io_attr(paddr);
13123 return (io_rgn != NULL) && (io_rgn->wimg & PMAP_IO_RANGE_CARVEOUT);
13124 }
13125
13126 MARK_AS_PMAP_TEXT static boolean_t
13127 pmap_is_empty_internal(
13128 pmap_t pmap,
13129 vm_map_offset_t va_start,
13130 vm_map_offset_t va_end)
13131 {
13132 vm_map_offset_t block_start, block_end;
13133 tt_entry_t *tte_p;
13134
13135 if (pmap == NULL) {
13136 return TRUE;
13137 }
13138
13139 VALIDATE_PMAP(pmap);
13140
13141 __unused const pt_attr_t * const pt_attr = pmap_get_pt_attr(pmap);
13142 unsigned int initial_not_in_kdp = not_in_kdp;
13143
13144 if ((pmap != kernel_pmap) && (initial_not_in_kdp)) {
13145 pmap_lock_ro(pmap);
13146 }
13147
13148 #if (__ARM_VMSA__ == 7)
13149 if (tte_index(pmap, pt_attr, va_end) >= pmap->tte_index_max) {
13150 if ((pmap != kernel_pmap) && (initial_not_in_kdp)) {
13151 pmap_unlock_ro(pmap);
13152 }
13153 return TRUE;
13154 }
13155 #endif
13156
13157 /* TODO: This will be faster if we increment ttep at each level. */
13158 block_start = va_start;
13159
13160 while (block_start < va_end) {
13161 pt_entry_t *bpte_p, *epte_p;
13162 pt_entry_t *pte_p;
13163
13164 block_end = (block_start + pt_attr_twig_size(pt_attr)) & ~pt_attr_twig_offmask(pt_attr);
13165 if (block_end > va_end) {
13166 block_end = va_end;
13167 }
13168
13169 tte_p = pmap_tte(pmap, block_start);
13170 if ((tte_p != PT_ENTRY_NULL)
13171 && ((*tte_p & ARM_TTE_TYPE_MASK) == ARM_TTE_TYPE_TABLE)) {
13172 pte_p = (pt_entry_t *) ttetokv(*tte_p);
13173 bpte_p = &pte_p[pte_index(pmap, pt_attr, block_start)];
13174 epte_p = &pte_p[pte_index(pmap, pt_attr, block_end)];
13175
13176 for (pte_p = bpte_p; pte_p < epte_p; pte_p++) {
13177 if (*pte_p != ARM_PTE_EMPTY) {
13178 if ((pmap != kernel_pmap) && (initial_not_in_kdp)) {
13179 pmap_unlock_ro(pmap);
13180 }
13181 return FALSE;
13182 }
13183 }
13184 }
13185 block_start = block_end;
13186 }
13187
13188 if ((pmap != kernel_pmap) && (initial_not_in_kdp)) {
13189 pmap_unlock_ro(pmap);
13190 }
13191
13192 return TRUE;
13193 }
13194
13195 boolean_t
13196 pmap_is_empty(
13197 pmap_t pmap,
13198 vm_map_offset_t va_start,
13199 vm_map_offset_t va_end)
13200 {
13201 #if XNU_MONITOR
13202 return pmap_is_empty_ppl(pmap, va_start, va_end);
13203 #else
13204 return pmap_is_empty_internal(pmap, va_start, va_end);
13205 #endif
13206 }
13207
13208 vm_map_offset_t
13209 pmap_max_offset(
13210 boolean_t is64,
13211 unsigned int option)
13212 {
13213 return (is64) ? pmap_max_64bit_offset(option) : pmap_max_32bit_offset(option);
13214 }
13215
13216 vm_map_offset_t
13217 pmap_max_64bit_offset(
13218 __unused unsigned int option)
13219 {
13220 vm_map_offset_t max_offset_ret = 0;
13221
13222 #if defined(__arm64__)
13223 #define ARM64_MIN_MAX_ADDRESS (SHARED_REGION_BASE_ARM64 + SHARED_REGION_SIZE_ARM64 + 0x20000000) // end of shared region + 512MB for various purposes
13224 _Static_assert((ARM64_MIN_MAX_ADDRESS > SHARED_REGION_BASE_ARM64) && (ARM64_MIN_MAX_ADDRESS <= MACH_VM_MAX_ADDRESS),
13225 "Minimum address space size outside allowable range");
13226 const vm_map_offset_t min_max_offset = ARM64_MIN_MAX_ADDRESS; // end of shared region + 512MB for various purposes
13227 if (option == ARM_PMAP_MAX_OFFSET_DEFAULT) {
13228 max_offset_ret = arm64_pmap_max_offset_default;
13229 } else if (option == ARM_PMAP_MAX_OFFSET_MIN) {
13230 max_offset_ret = min_max_offset;
13231 } else if (option == ARM_PMAP_MAX_OFFSET_MAX) {
13232 max_offset_ret = MACH_VM_MAX_ADDRESS;
13233 } else if (option == ARM_PMAP_MAX_OFFSET_DEVICE) {
13234 if (arm64_pmap_max_offset_default) {
13235 max_offset_ret = arm64_pmap_max_offset_default;
13236 } else if (max_mem > 0xC0000000) {
13237 max_offset_ret = min_max_offset + 0x138000000; // Max offset is 13.375GB for devices with > 3GB of memory
13238 } else if (max_mem > 0x40000000) {
13239 max_offset_ret = min_max_offset + 0x38000000; // Max offset is 9.375GB for devices with > 1GB and <= 3GB of memory
13240 } else {
13241 max_offset_ret = min_max_offset;
13242 }
13243 } else if (option == ARM_PMAP_MAX_OFFSET_JUMBO) {
13244 if (arm64_pmap_max_offset_default) {
13245 // Allow the boot-arg to override jumbo size
13246 max_offset_ret = arm64_pmap_max_offset_default;
13247 } else {
13248 max_offset_ret = MACH_VM_MAX_ADDRESS; // Max offset is 64GB for pmaps with special "jumbo" blessing
13249 }
13250 } else {
13251 panic("pmap_max_64bit_offset illegal option 0x%x\n", option);
13252 }
13253
13254 assert(max_offset_ret <= MACH_VM_MAX_ADDRESS);
13255 assert(max_offset_ret >= min_max_offset);
13256 #else
13257 panic("Can't run pmap_max_64bit_offset on non-64bit architectures\n");
13258 #endif
13259
13260 return max_offset_ret;
13261 }
13262
13263 vm_map_offset_t
13264 pmap_max_32bit_offset(
13265 unsigned int option)
13266 {
13267 vm_map_offset_t max_offset_ret = 0;
13268
13269 if (option == ARM_PMAP_MAX_OFFSET_DEFAULT) {
13270 max_offset_ret = arm_pmap_max_offset_default;
13271 } else if (option == ARM_PMAP_MAX_OFFSET_MIN) {
13272 max_offset_ret = 0x80000000;
13273 } else if (option == ARM_PMAP_MAX_OFFSET_MAX) {
13274 max_offset_ret = VM_MAX_ADDRESS;
13275 } else if (option == ARM_PMAP_MAX_OFFSET_DEVICE) {
13276 if (arm_pmap_max_offset_default) {
13277 max_offset_ret = arm_pmap_max_offset_default;
13278 } else if (max_mem > 0x20000000) {
13279 max_offset_ret = 0x80000000;
13280 } else {
13281 max_offset_ret = 0x80000000;
13282 }
13283 } else if (option == ARM_PMAP_MAX_OFFSET_JUMBO) {
13284 max_offset_ret = 0x80000000;
13285 } else {
13286 panic("pmap_max_32bit_offset illegal option 0x%x\n", option);
13287 }
13288
13289 assert(max_offset_ret <= MACH_VM_MAX_ADDRESS);
13290 return max_offset_ret;
13291 }
13292
13293 #if CONFIG_DTRACE
13294 /*
13295 * Constrain DTrace copyin/copyout actions
13296 */
13297 extern kern_return_t dtrace_copyio_preflight(addr64_t);
13298 extern kern_return_t dtrace_copyio_postflight(addr64_t);
13299
13300 kern_return_t
13301 dtrace_copyio_preflight(
13302 __unused addr64_t va)
13303 {
13304 if (current_map() == kernel_map) {
13305 return KERN_FAILURE;
13306 } else {
13307 return KERN_SUCCESS;
13308 }
13309 }
13310
13311 kern_return_t
13312 dtrace_copyio_postflight(
13313 __unused addr64_t va)
13314 {
13315 return KERN_SUCCESS;
13316 }
13317 #endif /* CONFIG_DTRACE */
13318
13319
13320 void
13321 pmap_flush_context_init(__unused pmap_flush_context *pfc)
13322 {
13323 }
13324
13325
13326 void
13327 pmap_flush(
13328 __unused pmap_flush_context *cpus_to_flush)
13329 {
13330 /* not implemented yet */
13331 return;
13332 }
13333
13334 #if XNU_MONITOR
13335
13336 /*
13337 * Enforce that the address range described by kva and nbytes is not currently
13338 * PPL-owned, and won't become PPL-owned while pinned. This is to prevent
13339 * unintentionally writing to PPL-owned memory.
13340 */
13341 static void
13342 pmap_pin_kernel_pages(vm_offset_t kva, size_t nbytes)
13343 {
13344 vm_offset_t end;
13345 if (os_add_overflow(kva, nbytes, &end)) {
13346 panic("%s(%p, 0x%llx): overflow", __func__, (void*)kva, (uint64_t)nbytes);
13347 }
13348 for (vm_offset_t ckva = kva; ckva < end; ckva = round_page(ckva + 1)) {
13349 pmap_paddr_t pa = kvtophys(ckva);
13350 if (!pa_valid(pa)) {
13351 panic("%s(%p): invalid physical page 0x%llx", __func__, (void*)kva, (uint64_t)pa);
13352 }
13353 pp_attr_t attr;
13354 unsigned int pai = (unsigned int)pa_index(pa);
13355 if (ckva == phystokv(pa)) {
13356 panic("%s(%p): attempt to pin static mapping for page 0x%llx", __func__, (void*)kva, (uint64_t)pa);
13357 }
13358 do {
13359 attr = pp_attr_table[pai] & ~PP_ATTR_NO_MONITOR;
13360 if (attr & PP_ATTR_MONITOR) {
13361 panic("%s(%p): physical page 0x%llx belongs to PPL", __func__, (void*)kva, (uint64_t)pa);
13362 }
13363 } while (!OSCompareAndSwap16(attr, attr | PP_ATTR_NO_MONITOR, &pp_attr_table[pai]));
13364 }
13365 }
13366
13367 static void
13368 pmap_unpin_kernel_pages(vm_offset_t kva, size_t nbytes)
13369 {
13370 vm_offset_t end;
13371 if (os_add_overflow(kva, nbytes, &end)) {
13372 panic("%s(%p, 0x%llx): overflow", __func__, (void*)kva, (uint64_t)nbytes);
13373 }
13374 for (vm_offset_t ckva = kva; ckva < end; ckva = round_page(ckva + 1)) {
13375 pmap_paddr_t pa = kvtophys(ckva);
13376 if (!pa_valid(pa)) {
13377 panic("%s(%p): invalid physical page 0x%llx", __func__, (void*)kva, (uint64_t)pa);
13378 }
13379 if (!(pp_attr_table[pa_index(pa)] & PP_ATTR_NO_MONITOR)) {
13380 panic("%s(%p): physical page 0x%llx not pinned", __func__, (void*)kva, (uint64_t)pa);
13381 }
13382 assert(!(pp_attr_table[pa_index(pa)] & PP_ATTR_MONITOR));
13383 pa_clear_no_monitor(pa);
13384 }
13385 }
13386
13387 /*
13388 * Lock down a page, making all mappings read-only, and preventing
13389 * further mappings or removal of this particular kva's mapping.
13390 * Effectively, it makes the page at kva immutable.
13391 */
13392 MARK_AS_PMAP_TEXT static void
13393 pmap_ppl_lockdown_page(vm_address_t kva)
13394 {
13395 pmap_paddr_t pa = kvtophys(kva);
13396 unsigned int pai = (unsigned int)pa_index(pa);
13397 LOCK_PVH(pai);
13398 pv_entry_t **pv_h = pai_to_pvh(pai);
13399
13400 if (__improbable(pa_test_monitor(pa))) {
13401 panic("%#lx: page %llx belongs to PPL", kva, pa);
13402 }
13403
13404 if (__improbable(pvh_get_flags(pv_h) & (PVH_FLAG_LOCKDOWN | PVH_FLAG_EXEC))) {
13405 panic("%#lx: already locked down/executable (%#llx)", kva, pvh_get_flags(pv_h));
13406 }
13407
13408 pt_entry_t *pte_p = pmap_pte(kernel_pmap, kva);
13409
13410 if (pte_p == PT_ENTRY_NULL) {
13411 panic("%#lx: NULL pte", kva);
13412 }
13413
13414 pt_entry_t tmplate = *pte_p;
13415 if (__improbable((tmplate & ARM_PTE_APMASK) != ARM_PTE_AP(AP_RWNA))) {
13416 panic("%#lx: not a kernel r/w page (%#llx)", kva, tmplate & ARM_PTE_APMASK);
13417 }
13418
13419 pvh_set_flags(pv_h, pvh_get_flags(pv_h) | PVH_FLAG_LOCKDOWN);
13420
13421 pmap_set_ptov_ap(pai, AP_RONA, FALSE);
13422
13423 UNLOCK_PVH(pai);
13424
13425 pmap_page_protect_options_internal((ppnum_t)atop(pa), VM_PROT_READ, 0);
13426 }
13427
13428 /*
13429 * Release a page from being locked down to the PPL, making it writable
13430 * to the kernel once again.
13431 */
13432 MARK_AS_PMAP_TEXT static void
13433 pmap_ppl_unlockdown_page(vm_address_t kva)
13434 {
13435 pmap_paddr_t pa = kvtophys(kva);
13436 unsigned int pai = (unsigned int)pa_index(pa);
13437 LOCK_PVH(pai);
13438 pv_entry_t **pv_h = pai_to_pvh(pai);
13439
13440 vm_offset_t pvh_flags = pvh_get_flags(pv_h);
13441
13442 if (__improbable(!(pvh_flags & PVH_FLAG_LOCKDOWN))) {
13443 panic("unlockdown attempt on not locked down virtual %#lx/pai %d", kva, pai);
13444 }
13445
13446 pvh_set_flags(pv_h, pvh_flags & ~PVH_FLAG_LOCKDOWN);
13447 pmap_set_ptov_ap(pai, AP_RWNA, FALSE);
13448 UNLOCK_PVH(pai);
13449 }
13450
13451 #else /* XNU_MONITOR */
13452
13453 static void __unused
13454 pmap_pin_kernel_pages(vm_offset_t kva __unused, size_t nbytes __unused)
13455 {
13456 }
13457
13458 static void __unused
13459 pmap_unpin_kernel_pages(vm_offset_t kva __unused, size_t nbytes __unused)
13460 {
13461 }
13462
13463 #endif /* !XNU_MONITOR */
13464
13465
13466 #define PMAP_RESIDENT_INVALID ((mach_vm_size_t)-1)
13467
13468 MARK_AS_PMAP_TEXT static mach_vm_size_t
13469 pmap_query_resident_internal(
13470 pmap_t pmap,
13471 vm_map_address_t start,
13472 vm_map_address_t end,
13473 mach_vm_size_t *compressed_bytes_p)
13474 {
13475 mach_vm_size_t resident_bytes = 0;
13476 mach_vm_size_t compressed_bytes = 0;
13477
13478 pt_entry_t *bpte, *epte;
13479 pt_entry_t *pte_p;
13480 tt_entry_t *tte_p;
13481
13482 if (pmap == NULL) {
13483 return PMAP_RESIDENT_INVALID;
13484 }
13485
13486 VALIDATE_PMAP(pmap);
13487
13488 const pt_attr_t * const pt_attr = pmap_get_pt_attr(pmap);
13489
13490 /* Ensure that this request is valid, and addresses exactly one TTE. */
13491 if (__improbable((start % pt_attr_page_size(pt_attr)) ||
13492 (end % pt_attr_page_size(pt_attr)))) {
13493 panic("%s: address range %p, %p not page-aligned to 0x%llx", __func__, (void*)start, (void*)end, pt_attr_page_size(pt_attr));
13494 }
13495
13496 if (__improbable((end < start) || (end > ((start + pt_attr_twig_size(pt_attr)) & ~pt_attr_twig_offmask(pt_attr))))) {
13497 panic("%s: invalid address range %p, %p", __func__, (void*)start, (void*)end);
13498 }
13499
13500 pmap_lock_ro(pmap);
13501 tte_p = pmap_tte(pmap, start);
13502 if (tte_p == (tt_entry_t *) NULL) {
13503 pmap_unlock_ro(pmap);
13504 return PMAP_RESIDENT_INVALID;
13505 }
13506 if ((*tte_p & ARM_TTE_TYPE_MASK) == ARM_TTE_TYPE_TABLE) {
13507 pte_p = (pt_entry_t *) ttetokv(*tte_p);
13508 bpte = &pte_p[pte_index(pmap, pt_attr, start)];
13509 epte = &pte_p[pte_index(pmap, pt_attr, end)];
13510
13511 for (; bpte < epte; bpte++) {
13512 if (ARM_PTE_IS_COMPRESSED(*bpte, bpte)) {
13513 compressed_bytes += pt_attr_page_size(pt_attr);
13514 } else if (pa_valid(pte_to_pa(*bpte))) {
13515 resident_bytes += pt_attr_page_size(pt_attr);
13516 }
13517 }
13518 }
13519 pmap_unlock_ro(pmap);
13520
13521 if (compressed_bytes_p) {
13522 pmap_pin_kernel_pages((vm_offset_t)compressed_bytes_p, sizeof(*compressed_bytes_p));
13523 *compressed_bytes_p += compressed_bytes;
13524 pmap_unpin_kernel_pages((vm_offset_t)compressed_bytes_p, sizeof(*compressed_bytes_p));
13525 }
13526
13527 return resident_bytes;
13528 }
13529
13530 mach_vm_size_t
13531 pmap_query_resident(
13532 pmap_t pmap,
13533 vm_map_address_t start,
13534 vm_map_address_t end,
13535 mach_vm_size_t *compressed_bytes_p)
13536 {
13537 mach_vm_size_t total_resident_bytes;
13538 mach_vm_size_t compressed_bytes;
13539 vm_map_address_t va;
13540
13541
13542 if (pmap == PMAP_NULL) {
13543 if (compressed_bytes_p) {
13544 *compressed_bytes_p = 0;
13545 }
13546 return 0;
13547 }
13548
13549 __unused const pt_attr_t * const pt_attr = pmap_get_pt_attr(pmap);
13550
13551 total_resident_bytes = 0;
13552 compressed_bytes = 0;
13553
13554 PMAP_TRACE(3, PMAP_CODE(PMAP__QUERY_RESIDENT) | DBG_FUNC_START,
13555 VM_KERNEL_ADDRHIDE(pmap), VM_KERNEL_ADDRHIDE(start),
13556 VM_KERNEL_ADDRHIDE(end));
13557
13558 va = start;
13559 while (va < end) {
13560 vm_map_address_t l;
13561 mach_vm_size_t resident_bytes;
13562
13563 l = ((va + pt_attr_twig_size(pt_attr)) & ~pt_attr_twig_offmask(pt_attr));
13564
13565 if (l > end) {
13566 l = end;
13567 }
13568 #if XNU_MONITOR
13569 resident_bytes = pmap_query_resident_ppl(pmap, va, l, compressed_bytes_p);
13570 #else
13571 resident_bytes = pmap_query_resident_internal(pmap, va, l, compressed_bytes_p);
13572 #endif
13573 if (resident_bytes == PMAP_RESIDENT_INVALID) {
13574 break;
13575 }
13576
13577 total_resident_bytes += resident_bytes;
13578
13579 va = l;
13580 }
13581
13582 if (compressed_bytes_p) {
13583 *compressed_bytes_p = compressed_bytes;
13584 }
13585
13586 PMAP_TRACE(3, PMAP_CODE(PMAP__QUERY_RESIDENT) | DBG_FUNC_END,
13587 total_resident_bytes);
13588
13589 return total_resident_bytes;
13590 }
13591
13592 #if MACH_ASSERT
13593 static void
13594 pmap_check_ledgers(
13595 pmap_t pmap)
13596 {
13597 int pid;
13598 char *procname;
13599
13600 if (pmap->pmap_pid == 0) {
13601 /*
13602 * This pmap was not or is no longer fully associated
13603 * with a task (e.g. the old pmap after a fork()/exec() or
13604 * spawn()). Its "ledger" still points at a task that is
13605 * now using a different (and active) address space, so
13606 * we can't check that all the pmap ledgers are balanced here.
13607 *
13608 * If the "pid" is set, that means that we went through
13609 * pmap_set_process() in task_terminate_internal(), so
13610 * this task's ledger should not have been re-used and
13611 * all the pmap ledgers should be back to 0.
13612 */
13613 return;
13614 }
13615
13616 pid = pmap->pmap_pid;
13617 procname = pmap->pmap_procname;
13618
13619 vm_map_pmap_check_ledgers(pmap, pmap->ledger, pid, procname);
13620
13621 PMAP_STATS_ASSERTF(pmap->stats.resident_count == 0, pmap, "stats.resident_count %d", pmap->stats.resident_count);
13622 #if 00
13623 PMAP_STATS_ASSERTF(pmap->stats.wired_count == 0, pmap, "stats.wired_count %d", pmap->stats.wired_count);
13624 #endif
13625 PMAP_STATS_ASSERTF(pmap->stats.device == 0, pmap, "stats.device %d", pmap->stats.device);
13626 PMAP_STATS_ASSERTF(pmap->stats.internal == 0, pmap, "stats.internal %d", pmap->stats.internal);
13627 PMAP_STATS_ASSERTF(pmap->stats.external == 0, pmap, "stats.external %d", pmap->stats.external);
13628 PMAP_STATS_ASSERTF(pmap->stats.reusable == 0, pmap, "stats.reusable %d", pmap->stats.reusable);
13629 PMAP_STATS_ASSERTF(pmap->stats.compressed == 0, pmap, "stats.compressed %lld", pmap->stats.compressed);
13630 }
13631 #endif /* MACH_ASSERT */
13632
13633 void
13634 pmap_advise_pagezero_range(__unused pmap_t p, __unused uint64_t a)
13635 {
13636 }
13637
13638
13639 #if CONFIG_PGTRACE
13640 #define PROF_START uint64_t t, nanot;\
13641 t = mach_absolute_time();
13642
13643 #define PROF_END absolutetime_to_nanoseconds(mach_absolute_time()-t, &nanot);\
13644 kprintf("%s: took %llu ns\n", __func__, nanot);
13645
13646 #define PMAP_PGTRACE_LOCK(p) \
13647 do { \
13648 *(p) = ml_set_interrupts_enabled(false); \
13649 if (simple_lock_try(&(pmap_pgtrace.lock), LCK_GRP_NULL)) break; \
13650 ml_set_interrupts_enabled(*(p)); \
13651 } while (true)
13652
13653 #define PMAP_PGTRACE_UNLOCK(p) \
13654 do { \
13655 simple_unlock(&(pmap_pgtrace.lock)); \
13656 ml_set_interrupts_enabled(*(p)); \
13657 } while (0)
13658
13659 #define PGTRACE_WRITE_PTE(pte_p, pte_entry) \
13660 do { \
13661 *(pte_p) = (pte_entry); \
13662 FLUSH_PTE(pte_p); \
13663 } while (0)
13664
13665 #define PGTRACE_MAX_MAP 16 // maximum supported va to same pa
13666
13667 typedef enum {
13668 UNDEFINED,
13669 PA_UNDEFINED,
13670 VA_UNDEFINED,
13671 DEFINED
13672 } pmap_pgtrace_page_state_t;
13673
13674 typedef struct {
13675 queue_chain_t chain;
13676
13677 /*
13678 * pa - pa
13679 * maps - list of va maps to upper pa
13680 * map_pool - map pool
13681 * map_waste - waste can
13682 * state - state
13683 */
13684 pmap_paddr_t pa;
13685 queue_head_t maps;
13686 queue_head_t map_pool;
13687 queue_head_t map_waste;
13688 pmap_pgtrace_page_state_t state;
13689 } pmap_pgtrace_page_t;
13690
13691 typedef struct {
13692 queue_chain_t chain;
13693 pmap_t pmap;
13694 vm_map_offset_t va;
13695 } pmap_va_t;
13696
13697 static ZONE_VIEW_DEFINE(ZV_PMAP_VA, "pmap va",
13698 KHEAP_ID_DEFAULT, sizeof(pmap_va_t));
13699
13700 static ZONE_VIEW_DEFINE(ZV_PMAP_PGTRACE, "pmap pgtrace",
13701 KHEAP_ID_DEFAULT, sizeof(pmap_pgtrace_page_t));
13702
13703 static struct {
13704 /*
13705 * pages - list of tracing page info
13706 */
13707 queue_head_t pages;
13708 decl_simple_lock_data(, lock);
13709 } pmap_pgtrace = {};
13710
13711 static void
13712 pmap_pgtrace_init(void)
13713 {
13714 queue_init(&(pmap_pgtrace.pages));
13715 simple_lock_init(&(pmap_pgtrace.lock), 0);
13716
13717 boolean_t enabled;
13718
13719 if (PE_parse_boot_argn("pgtrace", &enabled, sizeof(enabled))) {
13720 pgtrace_enabled = enabled;
13721 }
13722 }
13723
13724 // find a page with given pa - pmap_pgtrace should be locked
13725 inline static pmap_pgtrace_page_t *
13726 pmap_pgtrace_find_page(pmap_paddr_t pa)
13727 {
13728 queue_head_t *q = &(pmap_pgtrace.pages);
13729 pmap_pgtrace_page_t *p;
13730
13731 queue_iterate(q, p, pmap_pgtrace_page_t *, chain) {
13732 if (p->state == UNDEFINED) {
13733 continue;
13734 }
13735 if (p->state == PA_UNDEFINED) {
13736 continue;
13737 }
13738 if (p->pa == pa) {
13739 return p;
13740 }
13741 }
13742
13743 return NULL;
13744 }
13745
13746 // enter clone of given pmap, va page and range - pmap should be locked
13747 static bool
13748 pmap_pgtrace_enter_clone(pmap_t pmap, vm_map_offset_t va_page, vm_map_offset_t start, vm_map_offset_t end)
13749 {
13750 bool ints;
13751 queue_head_t *q = &(pmap_pgtrace.pages);
13752 pmap_paddr_t pa_page;
13753 pt_entry_t *ptep, *cptep;
13754 pmap_pgtrace_page_t *p;
13755 bool found = false;
13756
13757 pmap_assert_locked_w(pmap);
13758 assert(va_page == arm_trunc_page(va_page));
13759
13760 PMAP_PGTRACE_LOCK(&ints);
13761
13762 ptep = pmap_pte(pmap, va_page);
13763
13764 // target pte should exist
13765 if (!ptep || !(*ptep & ARM_PTE_TYPE_VALID)) {
13766 PMAP_PGTRACE_UNLOCK(&ints);
13767 return false;
13768 }
13769
13770 queue_head_t *mapq;
13771 queue_head_t *mappool;
13772 pmap_pgtrace_map_t *map = NULL;
13773
13774 pa_page = pte_to_pa(*ptep);
13775
13776 // find if we have a page info defined for this
13777 queue_iterate(q, p, pmap_pgtrace_page_t *, chain) {
13778 mapq = &(p->maps);
13779 mappool = &(p->map_pool);
13780
13781 switch (p->state) {
13782 case PA_UNDEFINED:
13783 queue_iterate(mapq, map, pmap_pgtrace_map_t *, chain) {
13784 if (map->cloned == false && map->pmap == pmap && map->ova == va_page) {
13785 p->pa = pa_page;
13786 map->range.start = start;
13787 map->range.end = end;
13788 found = true;
13789 break;
13790 }
13791 }
13792 break;
13793
13794 case VA_UNDEFINED:
13795 if (p->pa != pa_page) {
13796 break;
13797 }
13798 queue_iterate(mapq, map, pmap_pgtrace_map_t *, chain) {
13799 if (map->cloned == false) {
13800 map->pmap = pmap;
13801 map->ova = va_page;
13802 map->range.start = start;
13803 map->range.end = end;
13804 found = true;
13805 break;
13806 }
13807 }
13808 break;
13809
13810 case DEFINED:
13811 if (p->pa != pa_page) {
13812 break;
13813 }
13814 queue_iterate(mapq, map, pmap_pgtrace_map_t *, chain) {
13815 if (map->cloned == true && map->pmap == pmap && map->ova == va_page) {
13816 kprintf("%s: skip existing mapping at va=%llx\n", __func__, va_page);
13817 break;
13818 } else if (map->cloned == true && map->pmap == kernel_pmap && map->cva[1] == va_page) {
13819 kprintf("%s: skip clone mapping at va=%llx\n", __func__, va_page);
13820 break;
13821 } else if (map->cloned == false && map->pmap == pmap && map->ova == va_page) {
13822 // range should be already defined as well
13823 found = true;
13824 break;
13825 }
13826 }
13827 break;
13828
13829 default:
13830 panic("invalid state p->state=%x\n", p->state);
13831 }
13832
13833 if (found == true) {
13834 break;
13835 }
13836 }
13837
13838 // do not clone if no page info found
13839 if (found == false) {
13840 PMAP_PGTRACE_UNLOCK(&ints);
13841 return false;
13842 }
13843
13844 // copy pre, target and post ptes to clone ptes
13845 for (int i = 0; i < 3; i++) {
13846 ptep = pmap_pte(pmap, va_page + (i - 1) * ARM_PGBYTES);
13847 cptep = pmap_pte(kernel_pmap, map->cva[i]);
13848 assert(cptep != NULL);
13849 if (ptep == NULL) {
13850 PGTRACE_WRITE_PTE(cptep, (pt_entry_t)NULL);
13851 } else {
13852 PGTRACE_WRITE_PTE(cptep, *ptep);
13853 }
13854 PMAP_UPDATE_TLBS(kernel_pmap, map->cva[i], map->cva[i] + ARM_PGBYTES, false);
13855 }
13856
13857 // get ptes for original and clone
13858 ptep = pmap_pte(pmap, va_page);
13859 cptep = pmap_pte(kernel_pmap, map->cva[1]);
13860
13861 // invalidate original pte and mark it as a pgtrace page
13862 PGTRACE_WRITE_PTE(ptep, (*ptep | ARM_PTE_PGTRACE) & ~ARM_PTE_TYPE_VALID);
13863 PMAP_UPDATE_TLBS(pmap, map->ova, map->ova + ARM_PGBYTES, false);
13864
13865 map->cloned = true;
13866 p->state = DEFINED;
13867
13868 kprintf("%s: pa_page=%llx va_page=%llx cva[1]=%llx pmap=%p ptep=%p cptep=%p\n", __func__, pa_page, va_page, map->cva[1], pmap, ptep, cptep);
13869
13870 PMAP_PGTRACE_UNLOCK(&ints);
13871
13872 return true;
13873 }
13874
13875 // This function removes trace bit and validate pte if applicable. Pmap must be locked.
13876 static void
13877 pmap_pgtrace_remove_clone(pmap_t pmap, pmap_paddr_t pa, vm_map_offset_t va)
13878 {
13879 bool ints, found = false;
13880 pmap_pgtrace_page_t *p;
13881 pt_entry_t *ptep;
13882
13883 PMAP_PGTRACE_LOCK(&ints);
13884
13885 // we must have this page info
13886 p = pmap_pgtrace_find_page(pa);
13887 if (p == NULL) {
13888 goto unlock_exit;
13889 }
13890
13891 // find matching map
13892 queue_head_t *mapq = &(p->maps);
13893 queue_head_t *mappool = &(p->map_pool);
13894 pmap_pgtrace_map_t *map;
13895
13896 queue_iterate(mapq, map, pmap_pgtrace_map_t *, chain) {
13897 if (map->pmap == pmap && map->ova == va) {
13898 found = true;
13899 break;
13900 }
13901 }
13902
13903 if (!found) {
13904 goto unlock_exit;
13905 }
13906
13907 if (map->cloned == true) {
13908 // Restore back the pte to original state
13909 ptep = pmap_pte(pmap, map->ova);
13910 assert(ptep);
13911 PGTRACE_WRITE_PTE(ptep, *ptep | ARM_PTE_TYPE_VALID);
13912 PMAP_UPDATE_TLBS(pmap, va, va + ARM_PGBYTES, false);
13913
13914 // revert clone pages
13915 for (int i = 0; i < 3; i++) {
13916 ptep = pmap_pte(kernel_pmap, map->cva[i]);
13917 assert(ptep != NULL);
13918 PGTRACE_WRITE_PTE(ptep, map->cva_spte[i]);
13919 PMAP_UPDATE_TLBS(kernel_pmap, map->cva[i], map->cva[i] + ARM_PGBYTES, false);
13920 }
13921 }
13922
13923 queue_remove(mapq, map, pmap_pgtrace_map_t *, chain);
13924 map->pmap = NULL;
13925 map->ova = (vm_map_offset_t)NULL;
13926 map->cloned = false;
13927 queue_enter_first(mappool, map, pmap_pgtrace_map_t *, chain);
13928
13929 kprintf("%s: p=%p pa=%llx va=%llx\n", __func__, p, pa, va);
13930
13931 unlock_exit:
13932 PMAP_PGTRACE_UNLOCK(&ints);
13933 }
13934
13935 // remove all clones of given pa - pmap must be locked
13936 static void
13937 pmap_pgtrace_remove_all_clone(pmap_paddr_t pa)
13938 {
13939 bool ints;
13940 pmap_pgtrace_page_t *p;
13941 pt_entry_t *ptep;
13942
13943 PMAP_PGTRACE_LOCK(&ints);
13944
13945 // we must have this page info
13946 p = pmap_pgtrace_find_page(pa);
13947 if (p == NULL) {
13948 PMAP_PGTRACE_UNLOCK(&ints);
13949 return;
13950 }
13951
13952 queue_head_t *mapq = &(p->maps);
13953 queue_head_t *mappool = &(p->map_pool);
13954 queue_head_t *mapwaste = &(p->map_waste);
13955 pmap_pgtrace_map_t *map;
13956
13957 // move maps to waste
13958 while (!queue_empty(mapq)) {
13959 queue_remove_first(mapq, map, pmap_pgtrace_map_t *, chain);
13960 queue_enter_first(mapwaste, map, pmap_pgtrace_map_t*, chain);
13961 }
13962
13963 PMAP_PGTRACE_UNLOCK(&ints);
13964
13965 // sanitize maps in waste
13966 queue_iterate(mapwaste, map, pmap_pgtrace_map_t *, chain) {
13967 if (map->cloned == true) {
13968 pmap_lock(map->pmap);
13969
13970 // restore back original pte
13971 ptep = pmap_pte(map->pmap, map->ova);
13972 assert(ptep);
13973 PGTRACE_WRITE_PTE(ptep, *ptep | ARM_PTE_TYPE_VALID);
13974 PMAP_UPDATE_TLBS(map->pmap, map->ova, map->ova + ARM_PGBYTES, false);
13975
13976 // revert clone ptes
13977 for (int i = 0; i < 3; i++) {
13978 ptep = pmap_pte(kernel_pmap, map->cva[i]);
13979 assert(ptep != NULL);
13980 PGTRACE_WRITE_PTE(ptep, map->cva_spte[i]);
13981 PMAP_UPDATE_TLBS(kernel_pmap, map->cva[i], map->cva[i] + ARM_PGBYTES, false);
13982 }
13983
13984 pmap_unlock(map->pmap);
13985 }
13986
13987 map->pmap = NULL;
13988 map->ova = (vm_map_offset_t)NULL;
13989 map->cloned = false;
13990 }
13991
13992 PMAP_PGTRACE_LOCK(&ints);
13993
13994 // recycle maps back to map_pool
13995 while (!queue_empty(mapwaste)) {
13996 queue_remove_first(mapwaste, map, pmap_pgtrace_map_t *, chain);
13997 queue_enter_first(mappool, map, pmap_pgtrace_map_t*, chain);
13998 }
13999
14000 PMAP_PGTRACE_UNLOCK(&ints);
14001 }
14002
14003 inline static void
14004 pmap_pgtrace_get_search_space(pmap_t pmap, vm_map_offset_t *startp, vm_map_offset_t *endp)
14005 {
14006 uint64_t tsz;
14007 vm_map_offset_t end;
14008
14009 if (pmap == kernel_pmap) {
14010 tsz = (get_tcr() >> TCR_T1SZ_SHIFT) & TCR_TSZ_MASK;
14011 *startp = MAX(VM_MIN_KERNEL_ADDRESS, (UINT64_MAX >> (64 - tsz)) << (64 - tsz));
14012 *endp = VM_MAX_KERNEL_ADDRESS;
14013 } else {
14014 tsz = (get_tcr() >> TCR_T0SZ_SHIFT) & TCR_TSZ_MASK;
14015 if (tsz == 64) {
14016 end = 0;
14017 } else {
14018 end = ((uint64_t)1 << (64 - tsz)) - 1;
14019 }
14020
14021 *startp = 0;
14022 *endp = end;
14023 }
14024
14025 assert(*endp > *startp);
14026
14027 return;
14028 }
14029
14030 // has pa mapped in given pmap? then clone it
14031 static uint64_t
14032 pmap_pgtrace_clone_from_pa(pmap_t pmap, pmap_paddr_t pa, vm_map_offset_t start_offset, vm_map_offset_t end_offset)
14033 {
14034 uint64_t ret = 0;
14035 vm_map_offset_t min, max;
14036 vm_map_offset_t cur_page, end_page;
14037 pt_entry_t *ptep;
14038 tt_entry_t *ttep;
14039 tt_entry_t tte;
14040 __unused const pt_attr_t * const pt_attr = pmap_get_pt_attr(pmap);
14041
14042 pmap_pgtrace_get_search_space(pmap, &min, &max);
14043
14044 cur_page = arm_trunc_page(min);
14045 end_page = arm_trunc_page(max);
14046 while (cur_page <= end_page) {
14047 vm_map_offset_t add = 0;
14048
14049 pmap_lock(pmap);
14050
14051 // skip uninterested space
14052 if (pmap == kernel_pmap &&
14053 ((vm_kernel_base <= cur_page && cur_page < vm_kernel_top) ||
14054 (vm_kext_base <= cur_page && cur_page < vm_kext_top))) {
14055 add = ARM_PGBYTES;
14056 goto unlock_continue;
14057 }
14058
14059 // check whether we can skip l1
14060 ttep = pmap_tt1e(pmap, cur_page);
14061 assert(ttep);
14062 tte = *ttep;
14063 if ((tte & (ARM_TTE_TYPE_MASK | ARM_TTE_VALID)) != (ARM_TTE_TYPE_TABLE | ARM_TTE_VALID)) {
14064 add = ARM_TT_L1_SIZE;
14065 goto unlock_continue;
14066 }
14067
14068 // how about l2
14069 tte = ((tt_entry_t*) phystokv(tte & ARM_TTE_TABLE_MASK))[tt2_index(pmap, pt_attr, cur_page)];
14070
14071 if ((tte & (ARM_TTE_TYPE_MASK | ARM_TTE_VALID)) != (ARM_TTE_TYPE_TABLE | ARM_TTE_VALID)) {
14072 add = ARM_TT_L2_SIZE;
14073 goto unlock_continue;
14074 }
14075
14076 // ptep finally
14077 ptep = &(((pt_entry_t*) phystokv(tte & ARM_TTE_TABLE_MASK))[tt3_index(pmap, pt_attr, cur_page)]);
14078 if (ptep == PT_ENTRY_NULL) {
14079 add = ARM_TT_L3_SIZE;
14080 goto unlock_continue;
14081 }
14082
14083 if (arm_trunc_page(pa) == pte_to_pa(*ptep)) {
14084 if (pmap_pgtrace_enter_clone(pmap, cur_page, start_offset, end_offset) == true) {
14085 ret++;
14086 }
14087 }
14088
14089 add = ARM_PGBYTES;
14090
14091 unlock_continue:
14092 pmap_unlock(pmap);
14093
14094 //overflow
14095 if (cur_page + add < cur_page) {
14096 break;
14097 }
14098
14099 cur_page += add;
14100 }
14101
14102
14103 return ret;
14104 }
14105
14106 // search pv table and clone vas of given pa
14107 static uint64_t
14108 pmap_pgtrace_clone_from_pvtable(pmap_paddr_t pa, vm_map_offset_t start_offset, vm_map_offset_t end_offset)
14109 {
14110 uint64_t ret = 0;
14111 unsigned long pai;
14112 pv_entry_t **pvh;
14113 pt_entry_t *ptep;
14114 pmap_t pmap;
14115
14116 queue_head_t pmapvaq;
14117 pmap_va_t *pmapva;
14118
14119 queue_init(&pmapvaq);
14120
14121 pai = pa_index(pa);
14122 LOCK_PVH(pai);
14123 pvh = pai_to_pvh(pai);
14124
14125 // collect pmap/va pair from pvh
14126 if (pvh_test_type(pvh, PVH_TYPE_PTEP)) {
14127 ptep = pvh_ptep(pvh);
14128 pmap = ptep_get_pmap(ptep);
14129
14130 pmapva = (pmap_va_t *)zalloc(ZV_PMAP_VA);
14131 pmapva->pmap = pmap;
14132 pmapva->va = ptep_get_va(ptep);
14133
14134 queue_enter_first(&pmapvaq, pmapva, pmap_va_t *, chain);
14135 } else if (pvh_test_type(pvh, PVH_TYPE_PVEP)) {
14136 pv_entry_t *pvep;
14137
14138 pvep = pvh_list(pvh);
14139 while (pvep) {
14140 ptep = pve_get_ptep(pvep);
14141 pmap = ptep_get_pmap(ptep);
14142
14143 pmapva = (pmap_va_t *)zalloc(ZV_PMAP_VA);
14144 pmapva->pmap = pmap;
14145 pmapva->va = ptep_get_va(ptep);
14146
14147 queue_enter_first(&pmapvaq, pmapva, pmap_va_t *, chain);
14148
14149 pvep = PVE_NEXT_PTR(pve_next(pvep));
14150 }
14151 }
14152
14153 UNLOCK_PVH(pai);
14154
14155 // clone them while making sure mapping still exists
14156 queue_iterate(&pmapvaq, pmapva, pmap_va_t *, chain) {
14157 pmap_lock(pmapva->pmap);
14158 ptep = pmap_pte(pmapva->pmap, pmapva->va);
14159 if (pte_to_pa(*ptep) == pa) {
14160 if (pmap_pgtrace_enter_clone(pmapva->pmap, pmapva->va, start_offset, end_offset) == true) {
14161 ret++;
14162 }
14163 }
14164 pmap_unlock(pmapva->pmap);
14165
14166 zfree(ZV_PMAP_VA, pmapva);
14167 }
14168
14169 return ret;
14170 }
14171
14172 // allocate a page info
14173 static pmap_pgtrace_page_t *
14174 pmap_pgtrace_alloc_page(void)
14175 {
14176 pmap_pgtrace_page_t *p;
14177 queue_head_t *mapq;
14178 queue_head_t *mappool;
14179 queue_head_t *mapwaste;
14180 pmap_pgtrace_map_t *map;
14181
14182 p = zalloc(ZV_PMAP_PGTRACE);
14183 assert(p);
14184
14185 p->state = UNDEFINED;
14186
14187 mapq = &(p->maps);
14188 mappool = &(p->map_pool);
14189 mapwaste = &(p->map_waste);
14190 queue_init(mapq);
14191 queue_init(mappool);
14192 queue_init(mapwaste);
14193
14194 for (int i = 0; i < PGTRACE_MAX_MAP; i++) {
14195 vm_map_offset_t newcva;
14196 pt_entry_t *cptep;
14197 kern_return_t kr;
14198 vm_map_entry_t entry;
14199
14200 // get a clone va
14201 vm_object_reference(kernel_object);
14202 kr = vm_map_find_space(kernel_map, &newcva, vm_map_round_page(3 * ARM_PGBYTES, PAGE_MASK), 0, 0, VM_MAP_KERNEL_FLAGS_NONE, VM_KERN_MEMORY_DIAG, &entry);
14203 if (kr != KERN_SUCCESS) {
14204 panic("%s VM couldn't find any space kr=%d\n", __func__, kr);
14205 }
14206 VME_OBJECT_SET(entry, kernel_object);
14207 VME_OFFSET_SET(entry, newcva);
14208 vm_map_unlock(kernel_map);
14209
14210 // fill default clone page info and add to pool
14211 map = zalloc(ZV_PMAP_PGTRACE);
14212 for (int j = 0; j < 3; j++) {
14213 vm_map_offset_t addr = newcva + j * ARM_PGBYTES;
14214
14215 // pre-expand pmap while preemption enabled
14216 kr = pmap_expand(kernel_pmap, addr, 0, PMAP_TT_L3_LEVEL);
14217 if (kr != KERN_SUCCESS) {
14218 panic("%s: pmap_expand(kernel_pmap, addr=%llx) returns kr=%d\n", __func__, addr, kr);
14219 }
14220
14221 cptep = pmap_pte(kernel_pmap, addr);
14222 assert(cptep != NULL);
14223
14224 map->cva[j] = addr;
14225 map->cva_spte[j] = *cptep;
14226 }
14227 map->range.start = map->range.end = 0;
14228 map->cloned = false;
14229 queue_enter_first(mappool, map, pmap_pgtrace_map_t *, chain);
14230 }
14231
14232 return p;
14233 }
14234
14235 // free a page info
14236 static void
14237 pmap_pgtrace_free_page(pmap_pgtrace_page_t *p)
14238 {
14239 queue_head_t *mapq;
14240 queue_head_t *mappool;
14241 queue_head_t *mapwaste;
14242 pmap_pgtrace_map_t *map;
14243
14244 assert(p);
14245
14246 mapq = &(p->maps);
14247 mappool = &(p->map_pool);
14248 mapwaste = &(p->map_waste);
14249
14250 while (!queue_empty(mapq)) {
14251 queue_remove_first(mapq, map, pmap_pgtrace_map_t *, chain);
14252 zfree(ZV_PMAP_PGTRACE, map);
14253 }
14254
14255 while (!queue_empty(mappool)) {
14256 queue_remove_first(mappool, map, pmap_pgtrace_map_t *, chain);
14257 zfree(ZV_PMAP_PGTRACE, map);
14258 }
14259
14260 while (!queue_empty(mapwaste)) {
14261 queue_remove_first(mapwaste, map, pmap_pgtrace_map_t *, chain);
14262 zfree(ZV_PMAP_PGTRACE, map);
14263 }
14264
14265 zfree(ZV_PMAP_PGTRACE, p);
14266 }
14267
14268 // construct page infos with the given address range
14269 int
14270 pmap_pgtrace_add_page(pmap_t pmap, vm_map_offset_t start, vm_map_offset_t end)
14271 {
14272 int ret = 0;
14273 pt_entry_t *ptep;
14274 queue_head_t *q = &(pmap_pgtrace.pages);
14275 bool ints;
14276 vm_map_offset_t cur_page, end_page;
14277
14278 if (start > end) {
14279 kprintf("%s: invalid start=%llx > end=%llx\n", __func__, start, end);
14280 return -1;
14281 }
14282
14283 PROF_START
14284
14285 // add each page in given range
14286 cur_page = arm_trunc_page(start);
14287 end_page = arm_trunc_page(end);
14288 while (cur_page <= end_page) {
14289 pmap_paddr_t pa_page = 0;
14290 uint64_t num_cloned = 0;
14291 pmap_pgtrace_page_t *p = NULL, *newp;
14292 bool free_newp = true;
14293 pmap_pgtrace_page_state_t state;
14294
14295 // do all allocations outside of spinlocks
14296 newp = pmap_pgtrace_alloc_page();
14297
14298 // keep lock orders in pmap, kernel_pmap and pgtrace lock
14299 if (pmap != NULL) {
14300 pmap_lock_ro(pmap);
14301 }
14302 if (pmap != kernel_pmap) {
14303 pmap_lock_ro(kernel_pmap);
14304 }
14305
14306 // addresses are physical if pmap is null
14307 if (pmap == NULL) {
14308 ptep = NULL;
14309 pa_page = cur_page;
14310 state = VA_UNDEFINED;
14311 } else {
14312 ptep = pmap_pte(pmap, cur_page);
14313 if (ptep != NULL) {
14314 pa_page = pte_to_pa(*ptep);
14315 state = DEFINED;
14316 } else {
14317 state = PA_UNDEFINED;
14318 }
14319 }
14320
14321 // search if we have a page info already
14322 PMAP_PGTRACE_LOCK(&ints);
14323 if (state != PA_UNDEFINED) {
14324 p = pmap_pgtrace_find_page(pa_page);
14325 }
14326
14327 // add pre-allocated page info if nothing found
14328 if (p == NULL) {
14329 queue_enter_first(q, newp, pmap_pgtrace_page_t *, chain);
14330 p = newp;
14331 free_newp = false;
14332 }
14333
14334 // now p points what we want
14335 p->state = state;
14336
14337 queue_head_t *mapq = &(p->maps);
14338 queue_head_t *mappool = &(p->map_pool);
14339 pmap_pgtrace_map_t *map;
14340 vm_map_offset_t start_offset, end_offset;
14341
14342 // calculate trace offsets in the page
14343 if (cur_page > start) {
14344 start_offset = 0;
14345 } else {
14346 start_offset = start - cur_page;
14347 }
14348 if (cur_page == end_page) {
14349 end_offset = end - end_page;
14350 } else {
14351 end_offset = ARM_PGBYTES - 1;
14352 }
14353
14354 kprintf("%s: pmap=%p cur_page=%llx ptep=%p state=%d start_offset=%llx end_offset=%llx\n", __func__, pmap, cur_page, ptep, state, start_offset, end_offset);
14355
14356 // fill map info
14357 assert(!queue_empty(mappool));
14358 queue_remove_first(mappool, map, pmap_pgtrace_map_t *, chain);
14359 if (p->state == PA_UNDEFINED) {
14360 map->pmap = pmap;
14361 map->ova = cur_page;
14362 map->range.start = start_offset;
14363 map->range.end = end_offset;
14364 } else if (p->state == VA_UNDEFINED) {
14365 p->pa = pa_page;
14366 map->range.start = start_offset;
14367 map->range.end = end_offset;
14368 } else if (p->state == DEFINED) {
14369 p->pa = pa_page;
14370 map->pmap = pmap;
14371 map->ova = cur_page;
14372 map->range.start = start_offset;
14373 map->range.end = end_offset;
14374 } else {
14375 panic("invalid p->state=%d\n", p->state);
14376 }
14377
14378 // not cloned yet
14379 map->cloned = false;
14380 queue_enter(mapq, map, pmap_pgtrace_map_t *, chain);
14381
14382 // unlock locks
14383 PMAP_PGTRACE_UNLOCK(&ints);
14384 if (pmap != kernel_pmap) {
14385 pmap_unlock_ro(kernel_pmap);
14386 }
14387 if (pmap != NULL) {
14388 pmap_unlock_ro(pmap);
14389 }
14390
14391 // now clone it
14392 if (pa_valid(pa_page)) {
14393 num_cloned = pmap_pgtrace_clone_from_pvtable(pa_page, start_offset, end_offset);
14394 }
14395 if (pmap == NULL) {
14396 num_cloned += pmap_pgtrace_clone_from_pa(kernel_pmap, pa_page, start_offset, end_offset);
14397 } else {
14398 num_cloned += pmap_pgtrace_clone_from_pa(pmap, pa_page, start_offset, end_offset);
14399 }
14400
14401 // free pre-allocations if we didn't add it to the q
14402 if (free_newp) {
14403 pmap_pgtrace_free_page(newp);
14404 }
14405
14406 if (num_cloned == 0) {
14407 kprintf("%s: no mapping found for pa_page=%llx but will be added when a page entered\n", __func__, pa_page);
14408 }
14409
14410 ret += num_cloned;
14411
14412 // overflow
14413 if (cur_page + ARM_PGBYTES < cur_page) {
14414 break;
14415 } else {
14416 cur_page += ARM_PGBYTES;
14417 }
14418 }
14419
14420 PROF_END
14421
14422 return ret;
14423 }
14424
14425 // delete page infos for given address range
14426 int
14427 pmap_pgtrace_delete_page(pmap_t pmap, vm_map_offset_t start, vm_map_offset_t end)
14428 {
14429 int ret = 0;
14430 bool ints;
14431 queue_head_t *q = &(pmap_pgtrace.pages);
14432 pmap_pgtrace_page_t *p;
14433 vm_map_offset_t cur_page, end_page;
14434
14435 kprintf("%s start=%llx end=%llx\n", __func__, start, end);
14436
14437 PROF_START
14438
14439 pt_entry_t *ptep;
14440 pmap_paddr_t pa_page;
14441
14442 // remove page info from start to end
14443 cur_page = arm_trunc_page(start);
14444 end_page = arm_trunc_page(end);
14445 while (cur_page <= end_page) {
14446 p = NULL;
14447
14448 if (pmap == NULL) {
14449 pa_page = cur_page;
14450 } else {
14451 pmap_lock(pmap);
14452 ptep = pmap_pte(pmap, cur_page);
14453 if (ptep == NULL) {
14454 pmap_unlock(pmap);
14455 goto cont;
14456 }
14457 pa_page = pte_to_pa(*ptep);
14458 pmap_unlock(pmap);
14459 }
14460
14461 // remove all clones and validate
14462 pmap_pgtrace_remove_all_clone(pa_page);
14463
14464 // find page info and delete
14465 PMAP_PGTRACE_LOCK(&ints);
14466 p = pmap_pgtrace_find_page(pa_page);
14467 if (p != NULL) {
14468 queue_remove(q, p, pmap_pgtrace_page_t *, chain);
14469 ret++;
14470 }
14471 PMAP_PGTRACE_UNLOCK(&ints);
14472
14473 // free outside of locks
14474 if (p != NULL) {
14475 pmap_pgtrace_free_page(p);
14476 }
14477
14478 cont:
14479 // overflow
14480 if (cur_page + ARM_PGBYTES < cur_page) {
14481 break;
14482 } else {
14483 cur_page += ARM_PGBYTES;
14484 }
14485 }
14486
14487 PROF_END
14488
14489 return ret;
14490 }
14491
14492 kern_return_t
14493 pmap_pgtrace_fault(pmap_t pmap, vm_map_offset_t va, arm_saved_state_t *ss)
14494 {
14495 pt_entry_t *ptep;
14496 pgtrace_run_result_t res;
14497 pmap_pgtrace_page_t *p;
14498 bool ints, found = false;
14499 pmap_paddr_t pa;
14500
14501 // Quick check if we are interested
14502 ptep = pmap_pte(pmap, va);
14503 if (!ptep || !(*ptep & ARM_PTE_PGTRACE)) {
14504 return KERN_FAILURE;
14505 }
14506
14507 PMAP_PGTRACE_LOCK(&ints);
14508
14509 // Check again since access is serialized
14510 ptep = pmap_pte(pmap, va);
14511 if (!ptep || !(*ptep & ARM_PTE_PGTRACE)) {
14512 PMAP_PGTRACE_UNLOCK(&ints);
14513 return KERN_FAILURE;
14514 } else if ((*ptep & ARM_PTE_TYPE_VALID) == ARM_PTE_TYPE_VALID) {
14515 // Somehow this cpu's tlb has not updated
14516 kprintf("%s Somehow this cpu's tlb has not updated?\n", __func__);
14517 PMAP_UPDATE_TLBS(pmap, va, va + ARM_PGBYTES, false);
14518
14519 PMAP_PGTRACE_UNLOCK(&ints);
14520 return KERN_SUCCESS;
14521 }
14522
14523 // Find if this pa is what we are tracing
14524 pa = pte_to_pa(*ptep);
14525
14526 p = pmap_pgtrace_find_page(arm_trunc_page(pa));
14527 if (p == NULL) {
14528 panic("%s Can't find va=%llx pa=%llx from tracing pages\n", __func__, va, pa);
14529 }
14530
14531 // find if pmap and va are also matching
14532 queue_head_t *mapq = &(p->maps);
14533 queue_head_t *mapwaste = &(p->map_waste);
14534 pmap_pgtrace_map_t *map;
14535
14536 queue_iterate(mapq, map, pmap_pgtrace_map_t *, chain) {
14537 if (map->pmap == pmap && map->ova == arm_trunc_page(va)) {
14538 found = true;
14539 break;
14540 }
14541 }
14542
14543 // if not found, search map waste as they are still valid
14544 if (!found) {
14545 queue_iterate(mapwaste, map, pmap_pgtrace_map_t *, chain) {
14546 if (map->pmap == pmap && map->ova == arm_trunc_page(va)) {
14547 found = true;
14548 break;
14549 }
14550 }
14551 }
14552
14553 if (!found) {
14554 panic("%s Can't find va=%llx pa=%llx from tracing pages\n", __func__, va, pa);
14555 }
14556
14557 // Decode and run it on the clone map
14558 bzero(&res, sizeof(res));
14559 pgtrace_decode_and_run(*(uint32_t *)get_saved_state_pc(ss), // instruction
14560 va, map->cva, // fault va and clone page vas
14561 ss, &res);
14562
14563 // write a log if in range
14564 vm_map_offset_t offset = va - map->ova;
14565 if (map->range.start <= offset && offset <= map->range.end) {
14566 pgtrace_write_log(res);
14567 }
14568
14569 PMAP_PGTRACE_UNLOCK(&ints);
14570
14571 // Return to next instruction
14572 add_saved_state_pc(ss, sizeof(uint32_t));
14573
14574 return KERN_SUCCESS;
14575 }
14576 #endif
14577
14578 /**
14579 * The minimum shared region nesting size is used by the VM to determine when to
14580 * break up large mappings to nested regions. The smallest size that these
14581 * mappings can be broken into is determined by what page table level those
14582 * regions are being nested in at and the size of the page tables.
14583 *
14584 * For instance, if a nested region is nesting at L2 for a process utilizing
14585 * 16KB page tables, then the minimum nesting size would be 32MB (size of an L2
14586 * block entry).
14587 *
14588 * @param pmap The target pmap to determine the block size based on whether it's
14589 * using 16KB or 4KB page tables.
14590 */
14591 uint64_t
14592 pmap_shared_region_size_min(__unused pmap_t pmap)
14593 {
14594 #if (__ARM_VMSA__ > 7)
14595 const pt_attr_t * const pt_attr = pmap_get_pt_attr(pmap);
14596
14597 /**
14598 * We always nest the shared region at L2 (32MB for 16KB pages, 2MB for
14599 * 4KB pages). This means that a target pmap will contain L2 entries that
14600 * point to shared L3 page tables in the shared region pmap.
14601 */
14602 return pt_attr_twig_size(pt_attr);
14603
14604 #else
14605 return ARM_NESTING_SIZE_MIN;
14606 #endif
14607 }
14608
14609 /**
14610 * The concept of a nesting size maximum was made to accomodate restrictions in
14611 * place for nesting regions on PowerPC. There are no restrictions to max nesting
14612 * sizes on x86/armv7/armv8 and this should get removed.
14613 *
14614 * TODO: <rdar://problem/65247502> Completely remove pmap_nesting_size_max()
14615 */
14616 uint64_t
14617 pmap_nesting_size_max(__unused pmap_t pmap)
14618 {
14619 return ARM_NESTING_SIZE_MAX;
14620 }
14621
14622 boolean_t
14623 pmap_enforces_execute_only(
14624 #if (__ARM_VMSA__ == 7)
14625 __unused
14626 #endif
14627 pmap_t pmap)
14628 {
14629 #if (__ARM_VMSA__ > 7)
14630 return pmap != kernel_pmap;
14631 #else
14632 return FALSE;
14633 #endif
14634 }
14635
14636 MARK_AS_PMAP_TEXT void
14637 pmap_set_vm_map_cs_enforced_internal(
14638 pmap_t pmap,
14639 bool new_value)
14640 {
14641 VALIDATE_PMAP(pmap);
14642 pmap->pmap_vm_map_cs_enforced = new_value;
14643 }
14644
14645 void
14646 pmap_set_vm_map_cs_enforced(
14647 pmap_t pmap,
14648 bool new_value)
14649 {
14650 #if XNU_MONITOR
14651 pmap_set_vm_map_cs_enforced_ppl(pmap, new_value);
14652 #else
14653 pmap_set_vm_map_cs_enforced_internal(pmap, new_value);
14654 #endif
14655 }
14656
14657 extern int cs_process_enforcement_enable;
14658 bool
14659 pmap_get_vm_map_cs_enforced(
14660 pmap_t pmap)
14661 {
14662 if (cs_process_enforcement_enable) {
14663 return true;
14664 }
14665 return pmap->pmap_vm_map_cs_enforced;
14666 }
14667
14668 MARK_AS_PMAP_TEXT void
14669 pmap_set_jit_entitled_internal(
14670 __unused pmap_t pmap)
14671 {
14672 return;
14673 }
14674
14675 void
14676 pmap_set_jit_entitled(
14677 pmap_t pmap)
14678 {
14679 #if XNU_MONITOR
14680 pmap_set_jit_entitled_ppl(pmap);
14681 #else
14682 pmap_set_jit_entitled_internal(pmap);
14683 #endif
14684 }
14685
14686 bool
14687 pmap_get_jit_entitled(
14688 __unused pmap_t pmap)
14689 {
14690 return false;
14691 }
14692
14693 MARK_AS_PMAP_TEXT static kern_return_t
14694 pmap_query_page_info_internal(
14695 pmap_t pmap,
14696 vm_map_offset_t va,
14697 int *disp_p)
14698 {
14699 pmap_paddr_t pa;
14700 int disp;
14701 int pai;
14702 pt_entry_t *pte;
14703 pv_entry_t **pv_h, *pve_p;
14704
14705 if (pmap == PMAP_NULL || pmap == kernel_pmap) {
14706 pmap_pin_kernel_pages((vm_offset_t)disp_p, sizeof(*disp_p));
14707 *disp_p = 0;
14708 pmap_unpin_kernel_pages((vm_offset_t)disp_p, sizeof(*disp_p));
14709 return KERN_INVALID_ARGUMENT;
14710 }
14711
14712 disp = 0;
14713
14714 VALIDATE_PMAP(pmap);
14715 pmap_lock_ro(pmap);
14716
14717 pte = pmap_pte(pmap, va);
14718 if (pte == PT_ENTRY_NULL) {
14719 goto done;
14720 }
14721
14722 pa = pte_to_pa(*pte);
14723 if (pa == 0) {
14724 if (ARM_PTE_IS_COMPRESSED(*pte, pte)) {
14725 disp |= PMAP_QUERY_PAGE_COMPRESSED;
14726 if (*pte & ARM_PTE_COMPRESSED_ALT) {
14727 disp |= PMAP_QUERY_PAGE_COMPRESSED_ALTACCT;
14728 }
14729 }
14730 } else {
14731 disp |= PMAP_QUERY_PAGE_PRESENT;
14732 pai = (int) pa_index(pa);
14733 if (!pa_valid(pa)) {
14734 goto done;
14735 }
14736 LOCK_PVH(pai);
14737 pv_h = pai_to_pvh(pai);
14738 pve_p = PV_ENTRY_NULL;
14739 if (pvh_test_type(pv_h, PVH_TYPE_PVEP)) {
14740 pve_p = pvh_list(pv_h);
14741 while (pve_p != PV_ENTRY_NULL &&
14742 pve_get_ptep(pve_p) != pte) {
14743 pve_p = PVE_NEXT_PTR(pve_next(pve_p));
14744 }
14745 }
14746 if (IS_ALTACCT_PAGE(pai, pve_p)) {
14747 disp |= PMAP_QUERY_PAGE_ALTACCT;
14748 } else if (IS_REUSABLE_PAGE(pai)) {
14749 disp |= PMAP_QUERY_PAGE_REUSABLE;
14750 } else if (IS_INTERNAL_PAGE(pai)) {
14751 disp |= PMAP_QUERY_PAGE_INTERNAL;
14752 }
14753 UNLOCK_PVH(pai);
14754 }
14755
14756 done:
14757 pmap_unlock_ro(pmap);
14758 pmap_pin_kernel_pages((vm_offset_t)disp_p, sizeof(*disp_p));
14759 *disp_p = disp;
14760 pmap_unpin_kernel_pages((vm_offset_t)disp_p, sizeof(*disp_p));
14761 return KERN_SUCCESS;
14762 }
14763
14764 kern_return_t
14765 pmap_query_page_info(
14766 pmap_t pmap,
14767 vm_map_offset_t va,
14768 int *disp_p)
14769 {
14770 #if XNU_MONITOR
14771 return pmap_query_page_info_ppl(pmap, va, disp_p);
14772 #else
14773 return pmap_query_page_info_internal(pmap, va, disp_p);
14774 #endif
14775 }
14776
14777 MARK_AS_PMAP_TEXT kern_return_t
14778 pmap_return_internal(__unused boolean_t do_panic, __unused boolean_t do_recurse)
14779 {
14780
14781 return KERN_SUCCESS;
14782 }
14783
14784 kern_return_t
14785 pmap_return(boolean_t do_panic, boolean_t do_recurse)
14786 {
14787 #if XNU_MONITOR
14788 return pmap_return_ppl(do_panic, do_recurse);
14789 #else
14790 return pmap_return_internal(do_panic, do_recurse);
14791 #endif
14792 }
14793
14794
14795
14796
14797 MARK_AS_PMAP_TEXT static void
14798 pmap_footprint_suspend_internal(
14799 vm_map_t map,
14800 boolean_t suspend)
14801 {
14802 #if DEVELOPMENT || DEBUG
14803 if (suspend) {
14804 current_thread()->pmap_footprint_suspended = TRUE;
14805 map->pmap->footprint_was_suspended = TRUE;
14806 } else {
14807 current_thread()->pmap_footprint_suspended = FALSE;
14808 }
14809 #else /* DEVELOPMENT || DEBUG */
14810 (void) map;
14811 (void) suspend;
14812 #endif /* DEVELOPMENT || DEBUG */
14813 }
14814
14815 void
14816 pmap_footprint_suspend(
14817 vm_map_t map,
14818 boolean_t suspend)
14819 {
14820 #if XNU_MONITOR
14821 pmap_footprint_suspend_ppl(map, suspend);
14822 #else
14823 pmap_footprint_suspend_internal(map, suspend);
14824 #endif
14825 }
14826
14827 #if defined(__arm64__) && (DEVELOPMENT || DEBUG)
14828
14829 struct page_table_dump_header {
14830 uint64_t pa;
14831 uint64_t num_entries;
14832 uint64_t start_va;
14833 uint64_t end_va;
14834 };
14835
14836 static kern_return_t
14837 pmap_dump_page_tables_recurse(pmap_t pmap,
14838 const tt_entry_t *ttp,
14839 unsigned int cur_level,
14840 unsigned int level_mask,
14841 uint64_t start_va,
14842 void *buf_start,
14843 void *buf_end,
14844 size_t *bytes_copied)
14845 {
14846 const pt_attr_t * const pt_attr = pmap_get_pt_attr(pmap);
14847 uint64_t num_entries = pt_attr_page_size(pt_attr) / sizeof(*ttp);
14848
14849 uint64_t size = pt_attr->pta_level_info[cur_level].size;
14850 uint64_t valid_mask = pt_attr->pta_level_info[cur_level].valid_mask;
14851 uint64_t type_mask = pt_attr->pta_level_info[cur_level].type_mask;
14852 uint64_t type_block = pt_attr->pta_level_info[cur_level].type_block;
14853
14854 void *bufp = (uint8_t*)buf_start + *bytes_copied;
14855
14856 if (cur_level == pt_attr_root_level(pt_attr)) {
14857 num_entries = pmap_root_alloc_size(pmap) / sizeof(tt_entry_t);
14858 }
14859
14860 uint64_t tt_size = num_entries * sizeof(tt_entry_t);
14861 const tt_entry_t *tt_end = &ttp[num_entries];
14862
14863 if (((vm_offset_t)buf_end - (vm_offset_t)bufp) < (tt_size + sizeof(struct page_table_dump_header))) {
14864 return KERN_INSUFFICIENT_BUFFER_SIZE;
14865 }
14866
14867 if (level_mask & (1U << cur_level)) {
14868 struct page_table_dump_header *header = (struct page_table_dump_header*)bufp;
14869 header->pa = ml_static_vtop((vm_offset_t)ttp);
14870 header->num_entries = num_entries;
14871 header->start_va = start_va;
14872 header->end_va = start_va + (num_entries * size);
14873
14874 bcopy(ttp, (uint8_t*)bufp + sizeof(*header), tt_size);
14875 *bytes_copied = *bytes_copied + sizeof(*header) + tt_size;
14876 }
14877 uint64_t current_va = start_va;
14878
14879 for (const tt_entry_t *ttep = ttp; ttep < tt_end; ttep++, current_va += size) {
14880 tt_entry_t tte = *ttep;
14881
14882 if (!(tte & valid_mask)) {
14883 continue;
14884 }
14885
14886 if ((tte & type_mask) == type_block) {
14887 continue;
14888 } else {
14889 if (cur_level >= pt_attr_leaf_level(pt_attr)) {
14890 panic("%s: corrupt entry %#llx at %p, "
14891 "ttp=%p, cur_level=%u, bufp=%p, buf_end=%p",
14892 __FUNCTION__, tte, ttep,
14893 ttp, cur_level, bufp, buf_end);
14894 }
14895
14896 const tt_entry_t *next_tt = (const tt_entry_t*)phystokv(tte & ARM_TTE_TABLE_MASK);
14897
14898 kern_return_t recurse_result = pmap_dump_page_tables_recurse(pmap, next_tt, cur_level + 1,
14899 level_mask, current_va, buf_start, buf_end, bytes_copied);
14900
14901 if (recurse_result != KERN_SUCCESS) {
14902 return recurse_result;
14903 }
14904 }
14905 }
14906
14907 return KERN_SUCCESS;
14908 }
14909
14910 kern_return_t
14911 pmap_dump_page_tables(pmap_t pmap, void *bufp, void *buf_end, unsigned int level_mask, size_t *bytes_copied)
14912 {
14913 if (not_in_kdp) {
14914 panic("pmap_dump_page_tables must only be called from kernel debugger context");
14915 }
14916 return pmap_dump_page_tables_recurse(pmap, pmap->tte, pt_attr_root_level(pmap_get_pt_attr(pmap)),
14917 level_mask, pmap->min, bufp, buf_end, bytes_copied);
14918 }
14919
14920 #else /* defined(__arm64__) && (DEVELOPMENT || DEBUG) */
14921
14922 kern_return_t
14923 pmap_dump_page_tables(pmap_t pmap __unused, void *bufp __unused, void *buf_end __unused,
14924 unsigned int level_mask __unused, size_t *bytes_copied __unused)
14925 {
14926 return KERN_NOT_SUPPORTED;
14927 }
14928 #endif /* !defined(__arm64__) */
14929
14930
14931 #ifdef CONFIG_XNUPOST
14932 #ifdef __arm64__
14933 static volatile bool pmap_test_took_fault = false;
14934
14935 static bool
14936 pmap_test_fault_handler(arm_saved_state_t * state)
14937 {
14938 bool retval = false;
14939 uint32_t esr = get_saved_state_esr(state);
14940 esr_exception_class_t class = ESR_EC(esr);
14941 fault_status_t fsc = ISS_IA_FSC(ESR_ISS(esr));
14942
14943 if ((class == ESR_EC_DABORT_EL1) &&
14944 ((fsc == FSC_PERMISSION_FAULT_L3) || (fsc == FSC_ACCESS_FLAG_FAULT_L3))) {
14945 pmap_test_took_fault = true;
14946 /* return to the instruction immediately after the call to NX page */
14947 set_saved_state_pc(state, get_saved_state_pc(state) + 4);
14948 retval = true;
14949 }
14950
14951 return retval;
14952 }
14953
14954 static bool
14955 pmap_test_access(pmap_t pmap, vm_map_address_t va, bool should_fault, bool is_write)
14956 {
14957 /*
14958 * We're switching pmaps without using the normal thread mechanism;
14959 * disable interrupts and preemption to avoid any unexpected memory
14960 * accesses.
14961 */
14962 boolean_t old_int_state = ml_set_interrupts_enabled(false);
14963 pmap_t old_pmap = current_pmap();
14964 mp_disable_preemption();
14965 pmap_switch(pmap);
14966
14967 pmap_test_took_fault = false;
14968
14969 /* Disable PAN; pmap shouldn't be the kernel pmap. */
14970 #if __ARM_PAN_AVAILABLE__
14971 __builtin_arm_wsr("pan", 0);
14972 #endif /* __ARM_PAN_AVAILABLE__ */
14973 ml_expect_fault_begin(pmap_test_fault_handler, va);
14974
14975 if (is_write) {
14976 *((volatile uint64_t*)(va)) = 0xdec0de;
14977 } else {
14978 volatile uint64_t tmp = *((volatile uint64_t*)(va));
14979 (void)tmp;
14980 }
14981
14982 /* Save the fault bool, and undo the gross stuff we did. */
14983 bool took_fault = pmap_test_took_fault;
14984 ml_expect_fault_end();
14985 #if __ARM_PAN_AVAILABLE__
14986 __builtin_arm_wsr("pan", 1);
14987 #endif /* __ARM_PAN_AVAILABLE__ */
14988
14989 pmap_switch(old_pmap);
14990 mp_enable_preemption();
14991 ml_set_interrupts_enabled(old_int_state);
14992 bool retval = (took_fault == should_fault);
14993 return retval;
14994 }
14995
14996 static bool
14997 pmap_test_read(pmap_t pmap, vm_map_address_t va, bool should_fault)
14998 {
14999 bool retval = pmap_test_access(pmap, va, should_fault, false);
15000
15001 if (!retval) {
15002 T_FAIL("%s: %s, "
15003 "pmap=%p, va=%p, should_fault=%u",
15004 __func__, should_fault ? "did not fault" : "faulted",
15005 pmap, (void*)va, (unsigned)should_fault);
15006 }
15007
15008 return retval;
15009 }
15010
15011 static bool
15012 pmap_test_write(pmap_t pmap, vm_map_address_t va, bool should_fault)
15013 {
15014 bool retval = pmap_test_access(pmap, va, should_fault, true);
15015
15016 if (!retval) {
15017 T_FAIL("%s: %s, "
15018 "pmap=%p, va=%p, should_fault=%u",
15019 __func__, should_fault ? "did not fault" : "faulted",
15020 pmap, (void*)va, (unsigned)should_fault);
15021 }
15022
15023 return retval;
15024 }
15025
15026 static bool
15027 pmap_test_check_refmod(pmap_paddr_t pa, unsigned int should_be_set)
15028 {
15029 unsigned int should_be_clear = (~should_be_set) & (VM_MEM_REFERENCED | VM_MEM_MODIFIED);
15030 unsigned int bits = pmap_get_refmod((ppnum_t)atop(pa));
15031
15032 bool retval = (((bits & should_be_set) == should_be_set) && ((bits & should_be_clear) == 0));
15033
15034 if (!retval) {
15035 T_FAIL("%s: bits=%u, "
15036 "pa=%p, should_be_set=%u",
15037 __func__, bits,
15038 (void*)pa, should_be_set);
15039 }
15040
15041 return retval;
15042 }
15043
15044 static __attribute__((noinline)) bool
15045 pmap_test_read_write(pmap_t pmap, vm_map_address_t va, bool allow_read, bool allow_write)
15046 {
15047 bool retval = (pmap_test_read(pmap, va, !allow_read) | pmap_test_write(pmap, va, !allow_write));
15048 return retval;
15049 }
15050
15051 static int
15052 pmap_test_test_config(unsigned int flags)
15053 {
15054 T_LOG("running pmap_test_test_config flags=0x%X", flags);
15055 unsigned int map_count = 0;
15056 unsigned long page_ratio = 0;
15057 pmap_t pmap = pmap_create_options(NULL, 0, flags);
15058
15059 if (!pmap) {
15060 panic("Failed to allocate pmap");
15061 }
15062
15063 __unused const pt_attr_t * const pt_attr = pmap_get_pt_attr(pmap);
15064 uintptr_t native_page_size = pt_attr_page_size(native_pt_attr);
15065 uintptr_t pmap_page_size = pt_attr_page_size(pt_attr);
15066 uintptr_t pmap_twig_size = pt_attr_twig_size(pt_attr);
15067
15068 if (pmap_page_size <= native_page_size) {
15069 page_ratio = native_page_size / pmap_page_size;
15070 } else {
15071 /*
15072 * We claim to support a page_ratio of less than 1, which is
15073 * not currently supported by the pmap layer; panic.
15074 */
15075 panic("%s: page_ratio < 1, native_page_size=%lu, pmap_page_size=%lu"
15076 "flags=%u",
15077 __func__, native_page_size, pmap_page_size,
15078 flags);
15079 }
15080
15081 if (PAGE_RATIO > 1) {
15082 /*
15083 * The kernel is deliberately pretending to have 16KB pages.
15084 * The pmap layer has code that supports this, so pretend the
15085 * page size is larger than it is.
15086 */
15087 pmap_page_size = PAGE_SIZE;
15088 native_page_size = PAGE_SIZE;
15089 }
15090
15091 /*
15092 * Get two pages from the VM; one to be mapped wired, and one to be
15093 * mapped nonwired.
15094 */
15095 vm_page_t unwired_vm_page = vm_page_grab();
15096 vm_page_t wired_vm_page = vm_page_grab();
15097
15098 if ((unwired_vm_page == VM_PAGE_NULL) || (wired_vm_page == VM_PAGE_NULL)) {
15099 panic("Failed to grab VM pages");
15100 }
15101
15102 ppnum_t pn = VM_PAGE_GET_PHYS_PAGE(unwired_vm_page);
15103 ppnum_t wired_pn = VM_PAGE_GET_PHYS_PAGE(wired_vm_page);
15104
15105 pmap_paddr_t pa = ptoa(pn);
15106 pmap_paddr_t wired_pa = ptoa(wired_pn);
15107
15108 /*
15109 * We'll start mappings at the second twig TT. This keeps us from only
15110 * using the first entry in each TT, which would trivially be address
15111 * 0; one of the things we will need to test is retrieving the VA for
15112 * a given PTE.
15113 */
15114 vm_map_address_t va_base = pmap_twig_size;
15115 vm_map_address_t wired_va_base = ((2 * pmap_twig_size) - pmap_page_size);
15116
15117 if (wired_va_base < (va_base + (page_ratio * pmap_page_size))) {
15118 /*
15119 * Not exactly a functional failure, but this test relies on
15120 * there being a spare PTE slot we can use to pin the TT.
15121 */
15122 panic("Cannot pin translation table");
15123 }
15124
15125 /*
15126 * Create the wired mapping; this will prevent the pmap layer from
15127 * reclaiming our test TTs, which would interfere with this test
15128 * ("interfere" -> "make it panic").
15129 */
15130 pmap_enter_addr(pmap, wired_va_base, wired_pa, VM_PROT_READ, VM_PROT_READ, 0, true);
15131
15132 /*
15133 * Create read-only mappings of the nonwired page; if the pmap does
15134 * not use the same page size as the kernel, create multiple mappings
15135 * so that the kernel page is fully mapped.
15136 */
15137 for (map_count = 0; map_count < page_ratio; map_count++) {
15138 pmap_enter_addr(pmap, va_base + (pmap_page_size * map_count), pa + (pmap_page_size * (map_count)), VM_PROT_READ, VM_PROT_READ, 0, false);
15139 }
15140
15141 /* Validate that all the PTEs have the expected PA and VA. */
15142 for (map_count = 0; map_count < page_ratio; map_count++) {
15143 pt_entry_t * ptep = pmap_pte(pmap, va_base + (pmap_page_size * map_count));
15144
15145 if (pte_to_pa(*ptep) != (pa + (pmap_page_size * map_count))) {
15146 T_FAIL("Unexpected pa=%p, expected %p, map_count=%u",
15147 (void*)pte_to_pa(*ptep), (void*)(pa + (pmap_page_size * map_count)), map_count);
15148 }
15149
15150 if (ptep_get_va(ptep) != (va_base + (pmap_page_size * map_count))) {
15151 T_FAIL("Unexpected va=%p, expected %p, map_count=%u",
15152 (void*)ptep_get_va(ptep), (void*)(va_base + (pmap_page_size * map_count)), map_count);
15153 }
15154 }
15155
15156 T_LOG("Validate that reads to our mapping do not fault.");
15157 pmap_test_read(pmap, va_base, false);
15158
15159 T_LOG("Validate that writes to our mapping fault.");
15160 pmap_test_write(pmap, va_base, true);
15161
15162 T_LOG("Make the first mapping writable.");
15163 pmap_enter_addr(pmap, va_base, pa, VM_PROT_READ | VM_PROT_WRITE, VM_PROT_READ | VM_PROT_WRITE, 0, false);
15164
15165 T_LOG("Validate that writes to our mapping do not fault.");
15166 pmap_test_write(pmap, va_base, false);
15167
15168 #if PMAP_CS
15169 bool pmap_cs_enforced = pmap->pmap_cs_enforced;
15170
15171 T_LOG("Disable PMAP CS enforcement");
15172 pmap_cs_configure_enforcement(pmap, false);
15173 #endif
15174
15175 T_LOG("Make the first mapping XO.");
15176 pmap_enter_addr(pmap, va_base, pa, VM_PROT_EXECUTE, VM_PROT_EXECUTE, 0, false);
15177
15178 #if __APRR_SUPPORTED__
15179 T_LOG("Validate that reads to our mapping fault.");
15180 pmap_test_read(pmap, va_base, true);
15181 #else
15182 T_LOG("Validate that reads to our mapping do not fault.");
15183 pmap_test_read(pmap, va_base, false);
15184 #endif
15185
15186 T_LOG("Validate that writes to our mapping fault.");
15187 pmap_test_write(pmap, va_base, true);
15188
15189 #if PMAP_CS
15190 T_LOG("Set PMAP CS enforcement configuration to previous value.");
15191 pmap_cs_configure_enforcement(pmap, pmap_cs_enforced);
15192 #endif
15193
15194 /*
15195 * For page ratios of greater than 1: validate that writes to the other
15196 * mappings still fault. Remove the mappings afterwards (we're done
15197 * with page ratio testing).
15198 */
15199 for (map_count = 1; map_count < page_ratio; map_count++) {
15200 pmap_test_write(pmap, va_base + (pmap_page_size * map_count), true);
15201 pmap_remove(pmap, va_base + (pmap_page_size * map_count), va_base + (pmap_page_size * map_count) + pmap_page_size);
15202 }
15203
15204 T_LOG("Mark the page unreferenced and unmodified.");
15205 pmap_clear_refmod(pn, VM_MEM_MODIFIED | VM_MEM_REFERENCED);
15206 pmap_test_check_refmod(pa, 0);
15207
15208 /*
15209 * Begin testing the ref/mod state machine. Re-enter the mapping with
15210 * different protection/fault_type settings, and confirm that the
15211 * ref/mod state matches our expectations at each step.
15212 */
15213 T_LOG("!ref/!mod: read, no fault. Expect ref/!mod");
15214 pmap_enter_addr(pmap, va_base, pa, VM_PROT_READ, VM_PROT_NONE, 0, false);
15215 pmap_test_check_refmod(pa, VM_MEM_REFERENCED);
15216
15217 T_LOG("!ref/!mod: read, read fault. Expect ref/!mod");
15218 pmap_clear_refmod(pn, VM_MEM_MODIFIED | VM_MEM_REFERENCED);
15219 pmap_enter_addr(pmap, va_base, pa, VM_PROT_READ, VM_PROT_READ, 0, false);
15220 pmap_test_check_refmod(pa, VM_MEM_REFERENCED);
15221
15222 T_LOG("!ref/!mod: rw, read fault. Expect ref/!mod");
15223 pmap_clear_refmod(pn, VM_MEM_MODIFIED | VM_MEM_REFERENCED);
15224 pmap_enter_addr(pmap, va_base, pa, VM_PROT_READ | VM_PROT_WRITE, VM_PROT_NONE, 0, false);
15225 pmap_test_check_refmod(pa, VM_MEM_REFERENCED);
15226
15227 T_LOG("ref/!mod: rw, read fault. Expect ref/!mod");
15228 pmap_enter_addr(pmap, va_base, pa, VM_PROT_READ | VM_PROT_WRITE, VM_PROT_READ, 0, false);
15229 pmap_test_check_refmod(pa, VM_MEM_REFERENCED);
15230
15231 T_LOG("!ref/!mod: rw, rw fault. Expect ref/mod");
15232 pmap_clear_refmod(pn, VM_MEM_MODIFIED | VM_MEM_REFERENCED);
15233 pmap_enter_addr(pmap, va_base, pa, VM_PROT_READ | VM_PROT_WRITE, VM_PROT_READ | VM_PROT_WRITE, 0, false);
15234 pmap_test_check_refmod(pa, VM_MEM_REFERENCED | VM_MEM_MODIFIED);
15235
15236 /*
15237 * Shared memory testing; we'll have two mappings; one read-only,
15238 * one read-write.
15239 */
15240 vm_map_address_t rw_base = va_base;
15241 vm_map_address_t ro_base = va_base + pmap_page_size;
15242
15243 pmap_enter_addr(pmap, rw_base, pa, VM_PROT_READ | VM_PROT_WRITE, VM_PROT_READ | VM_PROT_WRITE, 0, false);
15244 pmap_enter_addr(pmap, ro_base, pa, VM_PROT_READ, VM_PROT_READ, 0, false);
15245
15246 /*
15247 * Test that we take faults as expected for unreferenced/unmodified
15248 * pages. Also test the arm_fast_fault interface, to ensure that
15249 * mapping permissions change as expected.
15250 */
15251 T_LOG("!ref/!mod: expect no access");
15252 pmap_clear_refmod(pn, VM_MEM_MODIFIED | VM_MEM_REFERENCED);
15253 pmap_test_read_write(pmap, ro_base, false, false);
15254 pmap_test_read_write(pmap, rw_base, false, false);
15255
15256 T_LOG("Read fault; expect !ref/!mod -> ref/!mod, read access");
15257 arm_fast_fault(pmap, rw_base, VM_PROT_READ, false, false);
15258 pmap_test_check_refmod(pa, VM_MEM_REFERENCED);
15259 pmap_test_read_write(pmap, ro_base, true, false);
15260 pmap_test_read_write(pmap, rw_base, true, false);
15261
15262 T_LOG("Write fault; expect ref/!mod -> ref/mod, read and write access");
15263 arm_fast_fault(pmap, rw_base, VM_PROT_READ | VM_PROT_WRITE, false, false);
15264 pmap_test_check_refmod(pa, VM_MEM_REFERENCED | VM_MEM_MODIFIED);
15265 pmap_test_read_write(pmap, ro_base, true, false);
15266 pmap_test_read_write(pmap, rw_base, true, true);
15267
15268 T_LOG("Write fault; expect !ref/!mod -> ref/mod, read and write access");
15269 pmap_clear_refmod(pn, VM_MEM_MODIFIED | VM_MEM_REFERENCED);
15270 arm_fast_fault(pmap, rw_base, VM_PROT_READ | VM_PROT_WRITE, false, false);
15271 pmap_test_check_refmod(pa, VM_MEM_REFERENCED | VM_MEM_MODIFIED);
15272 pmap_test_read_write(pmap, ro_base, true, false);
15273 pmap_test_read_write(pmap, rw_base, true, true);
15274
15275 T_LOG("RW protect both mappings; should not change protections.");
15276 pmap_protect(pmap, ro_base, ro_base + pmap_page_size, VM_PROT_READ | VM_PROT_WRITE);
15277 pmap_protect(pmap, rw_base, rw_base + pmap_page_size, VM_PROT_READ | VM_PROT_WRITE);
15278 pmap_test_read_write(pmap, ro_base, true, false);
15279 pmap_test_read_write(pmap, rw_base, true, true);
15280
15281 T_LOG("Read protect both mappings; RW mapping should become RO.");
15282 pmap_protect(pmap, ro_base, ro_base + pmap_page_size, VM_PROT_READ);
15283 pmap_protect(pmap, rw_base, rw_base + pmap_page_size, VM_PROT_READ);
15284 pmap_test_read_write(pmap, ro_base, true, false);
15285 pmap_test_read_write(pmap, rw_base, true, false);
15286
15287 T_LOG("RW protect the page; mappings should not change protections.");
15288 pmap_enter_addr(pmap, rw_base, pa, VM_PROT_READ | VM_PROT_WRITE, VM_PROT_READ | VM_PROT_WRITE, 0, false);
15289 pmap_page_protect(pn, VM_PROT_ALL);
15290 pmap_test_read_write(pmap, ro_base, true, false);
15291 pmap_test_read_write(pmap, rw_base, true, true);
15292
15293 T_LOG("Read protect the page; RW mapping should become RO.");
15294 pmap_page_protect(pn, VM_PROT_READ);
15295 pmap_test_read_write(pmap, ro_base, true, false);
15296 pmap_test_read_write(pmap, rw_base, true, false);
15297
15298 T_LOG("Validate that disconnect removes all known mappings of the page.");
15299 pmap_disconnect(pn);
15300 if (!pmap_verify_free(pn)) {
15301 T_FAIL("Page still has mappings");
15302 }
15303
15304 T_LOG("Remove the wired mapping, so we can tear down the test map.");
15305 pmap_remove(pmap, wired_va_base, wired_va_base + pmap_page_size);
15306 pmap_destroy(pmap);
15307
15308 T_LOG("Release the pages back to the VM.");
15309 vm_page_lock_queues();
15310 vm_page_free(unwired_vm_page);
15311 vm_page_free(wired_vm_page);
15312 vm_page_unlock_queues();
15313
15314 T_LOG("Testing successful!");
15315 return 0;
15316 }
15317 #endif /* __arm64__ */
15318
15319 kern_return_t
15320 pmap_test(void)
15321 {
15322 T_LOG("Starting pmap_tests");
15323 #ifdef __arm64__
15324 int flags = 0;
15325 flags |= PMAP_CREATE_64BIT;
15326
15327 #if __ARM_MIXED_PAGE_SIZE__
15328 T_LOG("Testing VM_PAGE_SIZE_4KB");
15329 pmap_test_test_config(flags | PMAP_CREATE_FORCE_4K_PAGES);
15330 T_LOG("Testing VM_PAGE_SIZE_16KB");
15331 pmap_test_test_config(flags);
15332 #else /* __ARM_MIXED_PAGE_SIZE__ */
15333 pmap_test_test_config(flags);
15334 #endif /* __ARM_MIXED_PAGE_SIZE__ */
15335
15336 #endif /* __arm64__ */
15337 T_PASS("completed pmap_test successfully");
15338 return KERN_SUCCESS;
15339 }
15340 #endif /* CONFIG_XNUPOST */