]> git.saurik.com Git - apple/xnu.git/blob - osfmk/arm/pmap.c
xnu-7195.101.1.tar.gz
[apple/xnu.git] / osfmk / arm / pmap.c
1 /*
2 * Copyright (c) 2011-2020 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28 #include <string.h>
29 #include <mach_assert.h>
30 #include <mach_ldebug.h>
31
32 #include <mach/shared_region.h>
33 #include <mach/vm_param.h>
34 #include <mach/vm_prot.h>
35 #include <mach/vm_map.h>
36 #include <mach/machine/vm_param.h>
37 #include <mach/machine/vm_types.h>
38
39 #include <mach/boolean.h>
40 #include <kern/bits.h>
41 #include <kern/thread.h>
42 #include <kern/sched.h>
43 #include <kern/zalloc.h>
44 #include <kern/kalloc.h>
45 #include <kern/ledger.h>
46 #include <kern/spl.h>
47 #include <kern/trustcache.h>
48
49 #include <os/overflow.h>
50
51 #include <vm/pmap.h>
52 #include <vm/vm_map.h>
53 #include <vm/vm_kern.h>
54 #include <vm/vm_protos.h>
55 #include <vm/vm_object.h>
56 #include <vm/vm_page.h>
57 #include <vm/vm_pageout.h>
58 #include <vm/cpm.h>
59
60 #include <libkern/img4/interface.h>
61 #include <libkern/section_keywords.h>
62 #include <sys/errno.h>
63
64 #include <machine/atomic.h>
65 #include <machine/thread.h>
66 #include <machine/lowglobals.h>
67
68 #include <arm/caches_internal.h>
69 #include <arm/cpu_data.h>
70 #include <arm/cpu_data_internal.h>
71 #include <arm/cpu_capabilities.h>
72 #include <arm/cpu_number.h>
73 #include <arm/machine_cpu.h>
74 #include <arm/misc_protos.h>
75 #include <arm/trap.h>
76
77 #if (__ARM_VMSA__ > 7)
78 #include <arm64/proc_reg.h>
79 #include <pexpert/arm64/boot.h>
80 #if CONFIG_PGTRACE
81 #include <stdint.h>
82 #include <arm64/pgtrace.h>
83 #if CONFIG_PGTRACE_NONKEXT
84 #include <arm64/pgtrace_decoder.h>
85 #endif // CONFIG_PGTRACE_NONKEXT
86 #endif // CONFIG_PGTRACE
87 #if defined(KERNEL_INTEGRITY_KTRR) || defined(KERNEL_INTEGRITY_CTRR)
88 #include <arm64/amcc_rorgn.h>
89 #endif // defined(KERNEL_INTEGRITY_KTRR) || defined(KERNEL_INTEGRITY_CTRR)
90 #endif
91
92 #include <pexpert/device_tree.h>
93
94 #include <san/kasan.h>
95 #include <sys/cdefs.h>
96
97 #if defined(HAS_APPLE_PAC)
98 #include <ptrauth.h>
99 #endif
100
101 #ifdef CONFIG_XNUPOST
102 #include <tests/xnupost.h>
103 #endif
104
105
106 #if HIBERNATION
107 #include <IOKit/IOHibernatePrivate.h>
108 #endif /* HIBERNATION */
109
110 #define PMAP_TT_L0_LEVEL 0x0
111 #define PMAP_TT_L1_LEVEL 0x1
112 #define PMAP_TT_L2_LEVEL 0x2
113 #define PMAP_TT_L3_LEVEL 0x3
114
115 #ifdef __ARM64_PMAP_SUBPAGE_L1__
116 #if (__ARM_VMSA__ <= 7)
117 #error This is not supported for old-style page tables
118 #endif
119 #define PMAP_ROOT_ALLOC_SIZE (((ARM_TT_L1_INDEX_MASK >> ARM_TT_L1_SHIFT) + 1) * sizeof(tt_entry_t))
120 #else
121 #if (__ARM_VMSA__ <= 7)
122 #define PMAP_ROOT_ALLOC_SIZE (ARM_PGBYTES * 2)
123 #else
124 #define PMAP_ROOT_ALLOC_SIZE (ARM_PGBYTES)
125 #endif
126 #endif
127
128 extern u_int32_t random(void); /* from <libkern/libkern.h> */
129
130 static bool alloc_asid(pmap_t pmap);
131 static void free_asid(pmap_t pmap);
132 static void flush_mmu_tlb_region_asid_async(vm_offset_t va, size_t length, pmap_t pmap);
133 static void flush_mmu_tlb_full_asid_async(pmap_t pmap);
134 static pt_entry_t wimg_to_pte(unsigned int wimg);
135
136 struct page_table_ops {
137 bool (*alloc_id)(pmap_t pmap);
138 void (*free_id)(pmap_t pmap);
139 void (*flush_tlb_region_async)(vm_offset_t va, size_t length, pmap_t pmap);
140 void (*flush_tlb_async)(pmap_t pmap);
141 pt_entry_t (*wimg_to_pte)(unsigned int wimg);
142 };
143
144 static const struct page_table_ops native_pt_ops =
145 {
146 .alloc_id = alloc_asid,
147 .free_id = free_asid,
148 .flush_tlb_region_async = flush_mmu_tlb_region_asid_async,
149 .flush_tlb_async = flush_mmu_tlb_full_asid_async,
150 .wimg_to_pte = wimg_to_pte,
151 };
152
153 #if (__ARM_VMSA__ > 7)
154 const struct page_table_level_info pmap_table_level_info_16k[] =
155 {
156 [0] = {
157 .size = ARM_16K_TT_L0_SIZE,
158 .offmask = ARM_16K_TT_L0_OFFMASK,
159 .shift = ARM_16K_TT_L0_SHIFT,
160 .index_mask = ARM_16K_TT_L0_INDEX_MASK,
161 .valid_mask = ARM_TTE_VALID,
162 .type_mask = ARM_TTE_TYPE_MASK,
163 .type_block = ARM_TTE_TYPE_BLOCK
164 },
165 [1] = {
166 .size = ARM_16K_TT_L1_SIZE,
167 .offmask = ARM_16K_TT_L1_OFFMASK,
168 .shift = ARM_16K_TT_L1_SHIFT,
169 .index_mask = ARM_16K_TT_L1_INDEX_MASK,
170 .valid_mask = ARM_TTE_VALID,
171 .type_mask = ARM_TTE_TYPE_MASK,
172 .type_block = ARM_TTE_TYPE_BLOCK
173 },
174 [2] = {
175 .size = ARM_16K_TT_L2_SIZE,
176 .offmask = ARM_16K_TT_L2_OFFMASK,
177 .shift = ARM_16K_TT_L2_SHIFT,
178 .index_mask = ARM_16K_TT_L2_INDEX_MASK,
179 .valid_mask = ARM_TTE_VALID,
180 .type_mask = ARM_TTE_TYPE_MASK,
181 .type_block = ARM_TTE_TYPE_BLOCK
182 },
183 [3] = {
184 .size = ARM_16K_TT_L3_SIZE,
185 .offmask = ARM_16K_TT_L3_OFFMASK,
186 .shift = ARM_16K_TT_L3_SHIFT,
187 .index_mask = ARM_16K_TT_L3_INDEX_MASK,
188 .valid_mask = ARM_PTE_TYPE_VALID,
189 .type_mask = ARM_PTE_TYPE_MASK,
190 .type_block = ARM_TTE_TYPE_L3BLOCK
191 }
192 };
193
194 const struct page_table_level_info pmap_table_level_info_4k[] =
195 {
196 [0] = {
197 .size = ARM_4K_TT_L0_SIZE,
198 .offmask = ARM_4K_TT_L0_OFFMASK,
199 .shift = ARM_4K_TT_L0_SHIFT,
200 .index_mask = ARM_4K_TT_L0_INDEX_MASK,
201 .valid_mask = ARM_TTE_VALID,
202 .type_mask = ARM_TTE_TYPE_MASK,
203 .type_block = ARM_TTE_TYPE_BLOCK
204 },
205 [1] = {
206 .size = ARM_4K_TT_L1_SIZE,
207 .offmask = ARM_4K_TT_L1_OFFMASK,
208 .shift = ARM_4K_TT_L1_SHIFT,
209 .index_mask = ARM_4K_TT_L1_INDEX_MASK,
210 .valid_mask = ARM_TTE_VALID,
211 .type_mask = ARM_TTE_TYPE_MASK,
212 .type_block = ARM_TTE_TYPE_BLOCK
213 },
214 [2] = {
215 .size = ARM_4K_TT_L2_SIZE,
216 .offmask = ARM_4K_TT_L2_OFFMASK,
217 .shift = ARM_4K_TT_L2_SHIFT,
218 .index_mask = ARM_4K_TT_L2_INDEX_MASK,
219 .valid_mask = ARM_TTE_VALID,
220 .type_mask = ARM_TTE_TYPE_MASK,
221 .type_block = ARM_TTE_TYPE_BLOCK
222 },
223 [3] = {
224 .size = ARM_4K_TT_L3_SIZE,
225 .offmask = ARM_4K_TT_L3_OFFMASK,
226 .shift = ARM_4K_TT_L3_SHIFT,
227 .index_mask = ARM_4K_TT_L3_INDEX_MASK,
228 .valid_mask = ARM_PTE_TYPE_VALID,
229 .type_mask = ARM_PTE_TYPE_MASK,
230 .type_block = ARM_TTE_TYPE_L3BLOCK
231 }
232 };
233
234 struct page_table_attr {
235 const struct page_table_level_info * const pta_level_info;
236 const struct page_table_ops * const pta_ops;
237 const uintptr_t ap_ro;
238 const uintptr_t ap_rw;
239 const uintptr_t ap_rona;
240 const uintptr_t ap_rwna;
241 const uintptr_t ap_xn;
242 const uintptr_t ap_x;
243 const unsigned int pta_root_level;
244 const unsigned int pta_sharedpage_level;
245 const unsigned int pta_max_level;
246 #if __ARM_MIXED_PAGE_SIZE__
247 const uint64_t pta_tcr_value;
248 #endif /* __ARM_MIXED_PAGE_SIZE__ */
249 const uint64_t pta_page_size;
250 const uint64_t pta_page_shift;
251 };
252
253 const struct page_table_attr pmap_pt_attr_4k = {
254 .pta_level_info = pmap_table_level_info_4k,
255 .pta_root_level = (T0SZ_BOOT - 16) / 9,
256 #if __ARM_MIXED_PAGE_SIZE__
257 .pta_sharedpage_level = PMAP_TT_L2_LEVEL,
258 #else /* __ARM_MIXED_PAGE_SIZE__ */
259 #if __ARM_16K_PG__
260 .pta_sharedpage_level = PMAP_TT_L2_LEVEL,
261 #else /* __ARM_16K_PG__ */
262 .pta_sharedpage_level = PMAP_TT_L1_LEVEL,
263 #endif /* __ARM_16K_PG__ */
264 #endif /* __ARM_MIXED_PAGE_SIZE__ */
265 .pta_max_level = PMAP_TT_L3_LEVEL,
266 .pta_ops = &native_pt_ops,
267 .ap_ro = ARM_PTE_AP(AP_RORO),
268 .ap_rw = ARM_PTE_AP(AP_RWRW),
269 .ap_rona = ARM_PTE_AP(AP_RONA),
270 .ap_rwna = ARM_PTE_AP(AP_RWNA),
271 .ap_xn = ARM_PTE_PNX | ARM_PTE_NX,
272 .ap_x = ARM_PTE_PNX,
273 #if __ARM_MIXED_PAGE_SIZE__
274 .pta_tcr_value = TCR_EL1_4KB,
275 #endif /* __ARM_MIXED_PAGE_SIZE__ */
276 .pta_page_size = 4096,
277 .pta_page_shift = 12,
278 };
279
280 const struct page_table_attr pmap_pt_attr_16k = {
281 .pta_level_info = pmap_table_level_info_16k,
282 .pta_root_level = PMAP_TT_L1_LEVEL,
283 .pta_sharedpage_level = PMAP_TT_L2_LEVEL,
284 .pta_max_level = PMAP_TT_L3_LEVEL,
285 .pta_ops = &native_pt_ops,
286 .ap_ro = ARM_PTE_AP(AP_RORO),
287 .ap_rw = ARM_PTE_AP(AP_RWRW),
288 .ap_rona = ARM_PTE_AP(AP_RONA),
289 .ap_rwna = ARM_PTE_AP(AP_RWNA),
290 .ap_xn = ARM_PTE_PNX | ARM_PTE_NX,
291 .ap_x = ARM_PTE_PNX,
292 #if __ARM_MIXED_PAGE_SIZE__
293 .pta_tcr_value = TCR_EL1_16KB,
294 #endif /* __ARM_MIXED_PAGE_SIZE__ */
295 .pta_page_size = 16384,
296 .pta_page_shift = 14,
297 };
298
299 #if __ARM_16K_PG__
300 const struct page_table_attr * const native_pt_attr = &pmap_pt_attr_16k;
301 #else /* !__ARM_16K_PG__ */
302 const struct page_table_attr * const native_pt_attr = &pmap_pt_attr_4k;
303 #endif /* !__ARM_16K_PG__ */
304
305
306 #else /* (__ARM_VMSA__ > 7) */
307 /*
308 * We don't support pmap parameterization for VMSA7, so use an opaque
309 * page_table_attr structure.
310 */
311 const struct page_table_attr * const native_pt_attr = NULL;
312 #endif /* (__ARM_VMSA__ > 7) */
313
314 typedef struct page_table_attr pt_attr_t;
315
316 /* Macro for getting pmap attributes; not a function for const propagation. */
317 #if ARM_PARAMETERIZED_PMAP
318 /* The page table attributes are linked to the pmap */
319 #define pmap_get_pt_attr(pmap) ((pmap)->pmap_pt_attr)
320 #define pmap_get_pt_ops(pmap) ((pmap)->pmap_pt_attr->pta_ops)
321 #else /* !ARM_PARAMETERIZED_PMAP */
322 /* The page table attributes are fixed (to allow for const propagation) */
323 #define pmap_get_pt_attr(pmap) (native_pt_attr)
324 #define pmap_get_pt_ops(pmap) (&native_pt_ops)
325 #endif /* !ARM_PARAMETERIZED_PMAP */
326
327 #if (__ARM_VMSA__ > 7)
328 static inline uint64_t
329 pt_attr_page_size(const pt_attr_t * const pt_attr)
330 {
331 return pt_attr->pta_page_size;
332 }
333
334 __unused static inline uint64_t
335 pt_attr_ln_size(const pt_attr_t * const pt_attr, unsigned int level)
336 {
337 return pt_attr->pta_level_info[level].size;
338 }
339
340 __unused static inline uint64_t
341 pt_attr_ln_shift(const pt_attr_t * const pt_attr, unsigned int level)
342 {
343 return pt_attr->pta_level_info[level].shift;
344 }
345
346 static inline uint64_t
347 pt_attr_ln_offmask(const pt_attr_t * const pt_attr, unsigned int level)
348 {
349 return pt_attr->pta_level_info[level].offmask;
350 }
351
352 __unused static inline uint64_t
353 pt_attr_ln_pt_offmask(const pt_attr_t * const pt_attr, unsigned int level)
354 {
355 return pt_attr_ln_offmask(pt_attr, level);
356 }
357
358 __unused static inline uint64_t
359 pt_attr_ln_index_mask(const pt_attr_t * const pt_attr, unsigned int level)
360 {
361 return pt_attr->pta_level_info[level].index_mask;
362 }
363
364 static inline unsigned int
365 pt_attr_twig_level(const pt_attr_t * const pt_attr)
366 {
367 return pt_attr->pta_max_level - 1;
368 }
369
370 static inline unsigned int
371 pt_attr_root_level(const pt_attr_t * const pt_attr)
372 {
373 return pt_attr->pta_root_level;
374 }
375
376 /**
377 * This is the level at which to copy a pt_entry from the sharedpage_pmap into
378 * the user pmap. Typically L1 for 4K pages, and L2 for 16K pages. In this way,
379 * the sharedpage's L2/L3 page tables are reused in every 4k task, whereas only
380 * the L3 page table is reused in 16K tasks.
381 */
382 static inline unsigned int
383 pt_attr_sharedpage_level(const pt_attr_t * const pt_attr)
384 {
385 return pt_attr->pta_sharedpage_level;
386 }
387
388 static __unused inline uint64_t
389 pt_attr_leaf_size(const pt_attr_t * const pt_attr)
390 {
391 return pt_attr->pta_level_info[pt_attr->pta_max_level].size;
392 }
393
394 static __unused inline uint64_t
395 pt_attr_leaf_offmask(const pt_attr_t * const pt_attr)
396 {
397 return pt_attr->pta_level_info[pt_attr->pta_max_level].offmask;
398 }
399
400 static inline uint64_t
401 pt_attr_leaf_shift(const pt_attr_t * const pt_attr)
402 {
403 return pt_attr->pta_level_info[pt_attr->pta_max_level].shift;
404 }
405
406 static __unused inline uint64_t
407 pt_attr_leaf_index_mask(const pt_attr_t * const pt_attr)
408 {
409 return pt_attr->pta_level_info[pt_attr->pta_max_level].index_mask;
410 }
411
412 static inline uint64_t
413 pt_attr_twig_size(const pt_attr_t * const pt_attr)
414 {
415 return pt_attr->pta_level_info[pt_attr->pta_max_level - 1].size;
416 }
417
418 static inline uint64_t
419 pt_attr_twig_offmask(const pt_attr_t * const pt_attr)
420 {
421 return pt_attr->pta_level_info[pt_attr->pta_max_level - 1].offmask;
422 }
423
424 static inline uint64_t
425 pt_attr_twig_shift(const pt_attr_t * const pt_attr)
426 {
427 return pt_attr->pta_level_info[pt_attr->pta_max_level - 1].shift;
428 }
429
430 static __unused inline uint64_t
431 pt_attr_twig_index_mask(const pt_attr_t * const pt_attr)
432 {
433 return pt_attr->pta_level_info[pt_attr->pta_max_level - 1].index_mask;
434 }
435
436 static inline uint64_t
437 pt_attr_leaf_table_size(const pt_attr_t * const pt_attr)
438 {
439 return pt_attr_twig_size(pt_attr);
440 }
441
442 static inline uint64_t
443 pt_attr_leaf_table_offmask(const pt_attr_t * const pt_attr)
444 {
445 return pt_attr_twig_offmask(pt_attr);
446 }
447
448 static inline uintptr_t
449 pt_attr_leaf_rw(const pt_attr_t * const pt_attr)
450 {
451 return pt_attr->ap_rw;
452 }
453
454 static inline uintptr_t
455 pt_attr_leaf_ro(const pt_attr_t * const pt_attr)
456 {
457 return pt_attr->ap_ro;
458 }
459
460 static inline uintptr_t
461 pt_attr_leaf_rona(const pt_attr_t * const pt_attr)
462 {
463 return pt_attr->ap_rona;
464 }
465
466 static inline uintptr_t
467 pt_attr_leaf_rwna(const pt_attr_t * const pt_attr)
468 {
469 return pt_attr->ap_rwna;
470 }
471
472 static inline uintptr_t
473 pt_attr_leaf_xn(const pt_attr_t * const pt_attr)
474 {
475 return pt_attr->ap_xn;
476 }
477
478 static inline uintptr_t
479 pt_attr_leaf_x(const pt_attr_t * const pt_attr)
480 {
481 return pt_attr->ap_x;
482 }
483
484 #else /* (__ARM_VMSA__ > 7) */
485 static inline uint64_t
486 pt_attr_page_size(__unused const pt_attr_t * const pt_attr)
487 {
488 return PAGE_SIZE;
489 }
490
491 __unused static inline unsigned int
492 pt_attr_root_level(__unused const pt_attr_t * const pt_attr)
493 {
494 return PMAP_TT_L1_LEVEL;
495 }
496
497 __unused static inline unsigned int
498 pt_attr_sharedpage_level(__unused const pt_attr_t * const pt_attr)
499 {
500 return PMAP_TT_L1_LEVEL;
501 }
502
503 static inline unsigned int
504 pt_attr_twig_level(__unused const pt_attr_t * const pt_attr)
505 {
506 return PMAP_TT_L1_LEVEL;
507 }
508
509 static inline uint64_t
510 pt_attr_twig_size(__unused const pt_attr_t * const pt_attr)
511 {
512 return ARM_TT_TWIG_SIZE;
513 }
514
515 static inline uint64_t
516 pt_attr_twig_offmask(__unused const pt_attr_t * const pt_attr)
517 {
518 return ARM_TT_TWIG_OFFMASK;
519 }
520
521 static inline uint64_t
522 pt_attr_twig_shift(__unused const pt_attr_t * const pt_attr)
523 {
524 return ARM_TT_TWIG_SHIFT;
525 }
526
527 static __unused inline uint64_t
528 pt_attr_twig_index_mask(__unused const pt_attr_t * const pt_attr)
529 {
530 return ARM_TT_TWIG_INDEX_MASK;
531 }
532
533 __unused static inline uint64_t
534 pt_attr_leaf_size(__unused const pt_attr_t * const pt_attr)
535 {
536 return ARM_TT_LEAF_SIZE;
537 }
538
539 __unused static inline uint64_t
540 pt_attr_leaf_offmask(__unused const pt_attr_t * const pt_attr)
541 {
542 return ARM_TT_LEAF_OFFMASK;
543 }
544
545 static inline uint64_t
546 pt_attr_leaf_shift(__unused const pt_attr_t * const pt_attr)
547 {
548 return ARM_TT_LEAF_SHIFT;
549 }
550
551 static __unused inline uint64_t
552 pt_attr_leaf_index_mask(__unused const pt_attr_t * const pt_attr)
553 {
554 return ARM_TT_LEAF_INDEX_MASK;
555 }
556
557 static inline uint64_t
558 pt_attr_leaf_table_size(__unused const pt_attr_t * const pt_attr)
559 {
560 return ARM_TT_L1_PT_SIZE;
561 }
562
563 static inline uint64_t
564 pt_attr_leaf_table_offmask(__unused const pt_attr_t * const pt_attr)
565 {
566 return ARM_TT_L1_PT_OFFMASK;
567 }
568
569 static inline uintptr_t
570 pt_attr_leaf_rw(__unused const pt_attr_t * const pt_attr)
571 {
572 return ARM_PTE_AP(AP_RWRW);
573 }
574
575 static inline uintptr_t
576 pt_attr_leaf_ro(__unused const pt_attr_t * const pt_attr)
577 {
578 return ARM_PTE_AP(AP_RORO);
579 }
580
581 static inline uintptr_t
582 pt_attr_leaf_rona(__unused const pt_attr_t * const pt_attr)
583 {
584 return ARM_PTE_AP(AP_RONA);
585 }
586
587 static inline uintptr_t
588 pt_attr_leaf_rwna(__unused const pt_attr_t * const pt_attr)
589 {
590 return ARM_PTE_AP(AP_RWNA);
591 }
592
593 static inline uintptr_t
594 pt_attr_leaf_xn(__unused const pt_attr_t * const pt_attr)
595 {
596 return ARM_PTE_NX;
597 }
598
599 static inline uintptr_t
600 pt_attr_leaf_x(__unused const pt_attr_t * const pt_attr)
601 {
602 return ARM_PTE_PNX;
603 }
604
605 __unused static inline uintptr_t
606 pt_attr_ln_offmask(__unused const pt_attr_t * const pt_attr, unsigned int level)
607 {
608 if (level == PMAP_TT_L1_LEVEL) {
609 return ARM_TT_L1_OFFMASK;
610 } else if (level == PMAP_TT_L2_LEVEL) {
611 return ARM_TT_L2_OFFMASK;
612 }
613
614 return 0;
615 }
616
617 static inline uintptr_t
618 pt_attr_ln_pt_offmask(__unused const pt_attr_t * const pt_attr, unsigned int level)
619 {
620 if (level == PMAP_TT_L1_LEVEL) {
621 return ARM_TT_L1_PT_OFFMASK;
622 } else if (level == PMAP_TT_L2_LEVEL) {
623 return ARM_TT_L2_OFFMASK;
624 }
625
626 return 0;
627 }
628
629 #endif /* (__ARM_VMSA__ > 7) */
630
631 static inline unsigned int
632 pt_attr_leaf_level(const pt_attr_t * const pt_attr)
633 {
634 return pt_attr_twig_level(pt_attr) + 1;
635 }
636
637
638 static inline void
639 pmap_sync_tlb(bool strong __unused)
640 {
641 sync_tlb_flush();
642 }
643
644 #if MACH_ASSERT
645 int vm_footprint_suspend_allowed = 1;
646
647 extern int pmap_ledgers_panic;
648 extern int pmap_ledgers_panic_leeway;
649
650 int pmap_stats_assert = 1;
651 #define PMAP_STATS_ASSERTF(cond, pmap, fmt, ...) \
652 MACRO_BEGIN \
653 if (pmap_stats_assert && (pmap)->pmap_stats_assert) \
654 assertf(cond, fmt, ##__VA_ARGS__); \
655 MACRO_END
656 #else /* MACH_ASSERT */
657 #define PMAP_STATS_ASSERTF(cond, pmap, fmt, ...)
658 #endif /* MACH_ASSERT */
659
660 #if DEVELOPMENT || DEBUG
661 #define PMAP_FOOTPRINT_SUSPENDED(pmap) \
662 (current_thread()->pmap_footprint_suspended)
663 #else /* DEVELOPMENT || DEBUG */
664 #define PMAP_FOOTPRINT_SUSPENDED(pmap) (FALSE)
665 #endif /* DEVELOPMENT || DEBUG */
666
667
668 #ifdef PLATFORM_BridgeOS
669 static struct pmap_legacy_trust_cache *pmap_legacy_trust_caches MARK_AS_PMAP_DATA = NULL;
670 #endif
671 static struct pmap_image4_trust_cache *pmap_image4_trust_caches MARK_AS_PMAP_DATA = NULL;
672
673 MARK_AS_PMAP_DATA SIMPLE_LOCK_DECLARE(pmap_loaded_trust_caches_lock, 0);
674
675
676 /*
677 * Represents a tlb range that will be flushed before exiting
678 * the ppl.
679 * Used by phys_attribute_clear_range to defer flushing pages in
680 * this range until the end of the operation.
681 */
682 typedef struct pmap_tlb_flush_range {
683 pmap_t ptfr_pmap;
684 vm_map_address_t ptfr_start;
685 vm_map_address_t ptfr_end;
686 bool ptfr_flush_needed;
687 } pmap_tlb_flush_range_t;
688
689 #if XNU_MONITOR
690 /*
691 * PPL External References.
692 */
693 extern vm_offset_t segPPLDATAB;
694 extern unsigned long segSizePPLDATA;
695 extern vm_offset_t segPPLTEXTB;
696 extern unsigned long segSizePPLTEXT;
697 extern vm_offset_t segPPLDATACONSTB;
698 extern unsigned long segSizePPLDATACONST;
699
700
701 /*
702 * PPL Global Variables
703 */
704
705 #if (DEVELOPMENT || DEBUG) || CONFIG_CSR_FROM_DT
706 /* Indicates if the PPL will enforce mapping policies; set by -unsafe_kernel_text */
707 SECURITY_READ_ONLY_LATE(boolean_t) pmap_ppl_disable = FALSE;
708 #else
709 const boolean_t pmap_ppl_disable = FALSE;
710 #endif
711
712 /* Indicates if the PPL has started applying APRR. */
713 boolean_t pmap_ppl_locked_down MARK_AS_PMAP_DATA = FALSE;
714
715 /*
716 * The PPL cannot invoke the kernel in order to allocate memory, so we must
717 * maintain a list of free pages that the PPL owns. The kernel can give the PPL
718 * additional pages.
719 */
720 MARK_AS_PMAP_DATA SIMPLE_LOCK_DECLARE(pmap_ppl_free_page_lock, 0);
721 void ** pmap_ppl_free_page_list MARK_AS_PMAP_DATA = NULL;
722 uint64_t pmap_ppl_free_page_count MARK_AS_PMAP_DATA = 0;
723 uint64_t pmap_ppl_pages_returned_to_kernel_count_total = 0;
724
725 struct pmap_cpu_data_array_entry pmap_cpu_data_array[MAX_CPUS] MARK_AS_PMAP_DATA = {0};
726
727 extern void *pmap_stacks_start;
728 extern void *pmap_stacks_end;
729 SECURITY_READ_ONLY_LATE(pmap_paddr_t) pmap_stacks_start_pa = 0;
730 SECURITY_READ_ONLY_LATE(pmap_paddr_t) pmap_stacks_end_pa = 0;
731 SECURITY_READ_ONLY_LATE(pmap_paddr_t) ppl_cpu_save_area_start = 0;
732 SECURITY_READ_ONLY_LATE(pmap_paddr_t) ppl_cpu_save_area_end = 0;
733
734 /* Allocation data/locks for pmap structures. */
735 #if XNU_MONITOR
736 MARK_AS_PMAP_DATA SIMPLE_LOCK_DECLARE(pmap_free_list_lock, 0);
737 #endif
738 SECURITY_READ_ONLY_LATE(unsigned long) pmap_array_count = 0;
739 SECURITY_READ_ONLY_LATE(void *) pmap_array_begin = NULL;
740 SECURITY_READ_ONLY_LATE(void *) pmap_array_end = NULL;
741 SECURITY_READ_ONLY_LATE(pmap_t) pmap_array = NULL;
742 pmap_t pmap_free_list MARK_AS_PMAP_DATA = NULL;
743
744 /* Allocation data/locks/structs for task ledger structures. */
745 #define PMAP_LEDGER_DATA_BYTES \
746 (((sizeof(task_ledgers) / sizeof(int)) * sizeof(struct ledger_entry)) + sizeof(struct ledger))
747
748 /*
749 * Maximum number of ledgers allowed are maximum number of tasks
750 * allowed on system plus some more i.e. ~10% of total tasks = 200.
751 */
752 #define MAX_PMAP_LEDGERS (pmap_max_asids + 200)
753 #define PMAP_ARRAY_SIZE (pmap_max_asids)
754
755 typedef struct pmap_ledger_data {
756 char pld_data[PMAP_LEDGER_DATA_BYTES];
757 } pmap_ledger_data_t;
758
759 typedef struct pmap_ledger {
760 union {
761 struct pmap_ledger_data ple_data;
762 struct pmap_ledger * next;
763 };
764
765 struct pmap_ledger ** back_ptr;
766 } pmap_ledger_t;
767
768 SECURITY_READ_ONLY_LATE(bool) pmap_ledger_alloc_initialized = false;
769 MARK_AS_PMAP_DATA SIMPLE_LOCK_DECLARE(pmap_ledger_lock, 0);
770 SECURITY_READ_ONLY_LATE(void *) pmap_ledger_refcnt_begin = NULL;
771 SECURITY_READ_ONLY_LATE(void *) pmap_ledger_refcnt_end = NULL;
772 SECURITY_READ_ONLY_LATE(os_refcnt_t *) pmap_ledger_refcnt = NULL;
773 SECURITY_READ_ONLY_LATE(void *) pmap_ledger_ptr_array_begin = NULL;
774 SECURITY_READ_ONLY_LATE(void *) pmap_ledger_ptr_array_end = NULL;
775 SECURITY_READ_ONLY_LATE(pmap_ledger_t * *) pmap_ledger_ptr_array = NULL;
776 uint64_t pmap_ledger_ptr_array_free_index MARK_AS_PMAP_DATA = 0;
777 pmap_ledger_t * pmap_ledger_free_list MARK_AS_PMAP_DATA = NULL;
778
779 #define pmap_ledger_debit(p, e, a) ledger_debit_nocheck((p)->ledger, e, a)
780 #define pmap_ledger_credit(p, e, a) ledger_credit_nocheck((p)->ledger, e, a)
781
782 static inline void
783 pmap_check_ledger_fields(ledger_t ledger)
784 {
785 if (ledger == NULL) {
786 return;
787 }
788
789 thread_t cur_thread = current_thread();
790 ledger_check_new_balance(cur_thread, ledger, task_ledgers.alternate_accounting);
791 ledger_check_new_balance(cur_thread, ledger, task_ledgers.alternate_accounting_compressed);
792 ledger_check_new_balance(cur_thread, ledger, task_ledgers.internal);
793 ledger_check_new_balance(cur_thread, ledger, task_ledgers.internal_compressed);
794 ledger_check_new_balance(cur_thread, ledger, task_ledgers.page_table);
795 ledger_check_new_balance(cur_thread, ledger, task_ledgers.phys_footprint);
796 ledger_check_new_balance(cur_thread, ledger, task_ledgers.phys_mem);
797 ledger_check_new_balance(cur_thread, ledger, task_ledgers.tkm_private);
798 ledger_check_new_balance(cur_thread, ledger, task_ledgers.wired_mem);
799 }
800
801 #define pmap_ledger_check_balance(p) pmap_check_ledger_fields((p)->ledger)
802
803 #else /* XNU_MONITOR */
804
805 #define pmap_ledger_debit(p, e, a) ledger_debit((p)->ledger, e, a)
806 #define pmap_ledger_credit(p, e, a) ledger_credit((p)->ledger, e, a)
807
808 #endif /* !XNU_MONITOR */
809
810
811 /* Virtual memory region for early allocation */
812 #if (__ARM_VMSA__ == 7)
813 #define VREGION1_HIGH_WINDOW (0)
814 #else
815 #define VREGION1_HIGH_WINDOW (PE_EARLY_BOOT_VA)
816 #endif
817 #define VREGION1_START ((VM_MAX_KERNEL_ADDRESS & CPUWINDOWS_BASE_MASK) - VREGION1_HIGH_WINDOW)
818 #define VREGION1_SIZE (trunc_page(VM_MAX_KERNEL_ADDRESS - (VREGION1_START)))
819
820 extern uint8_t bootstrap_pagetables[];
821
822 extern unsigned int not_in_kdp;
823
824 extern vm_offset_t first_avail;
825
826 extern pmap_paddr_t avail_start;
827 extern pmap_paddr_t avail_end;
828
829 extern vm_offset_t virtual_space_start; /* Next available kernel VA */
830 extern vm_offset_t virtual_space_end; /* End of kernel address space */
831 extern vm_offset_t static_memory_end;
832
833 extern const vm_map_address_t physmap_base;
834 extern const vm_map_address_t physmap_end;
835
836 extern int maxproc, hard_maxproc;
837
838 vm_address_t MARK_AS_PMAP_DATA image4_slab = 0;
839
840 #if (__ARM_VMSA__ > 7)
841 /* The number of address bits one TTBR can cover. */
842 #define PGTABLE_ADDR_BITS (64ULL - T0SZ_BOOT)
843
844 /*
845 * The bounds on our TTBRs. These are for sanity checking that
846 * an address is accessible by a TTBR before we attempt to map it.
847 */
848 #define ARM64_TTBR0_MIN_ADDR (0ULL)
849 #define ARM64_TTBR0_MAX_ADDR (0ULL + (1ULL << PGTABLE_ADDR_BITS) - 1)
850 #define ARM64_TTBR1_MIN_ADDR (0ULL - (1ULL << PGTABLE_ADDR_BITS))
851 #define ARM64_TTBR1_MAX_ADDR (~0ULL)
852
853 /* The level of the root of a page table. */
854 const uint64_t arm64_root_pgtable_level = (3 - ((PGTABLE_ADDR_BITS - 1 - ARM_PGSHIFT) / (ARM_PGSHIFT - TTE_SHIFT)));
855
856 /* The number of entries in the root TT of a page table. */
857 const uint64_t arm64_root_pgtable_num_ttes = (2 << ((PGTABLE_ADDR_BITS - 1 - ARM_PGSHIFT) % (ARM_PGSHIFT - TTE_SHIFT)));
858 #else
859 const uint64_t arm64_root_pgtable_level = 0;
860 const uint64_t arm64_root_pgtable_num_ttes = 0;
861 #endif
862
863 struct pmap kernel_pmap_store MARK_AS_PMAP_DATA;
864 SECURITY_READ_ONLY_LATE(pmap_t) kernel_pmap = &kernel_pmap_store;
865
866 struct vm_object pmap_object_store VM_PAGE_PACKED_ALIGNED; /* store pt pages */
867 SECURITY_READ_ONLY_LATE(vm_object_t) pmap_object = &pmap_object_store;
868
869 static SECURITY_READ_ONLY_LATE(zone_t) pmap_zone; /* zone of pmap structures */
870
871 MARK_AS_PMAP_DATA SIMPLE_LOCK_DECLARE(pmaps_lock, 0);
872 MARK_AS_PMAP_DATA SIMPLE_LOCK_DECLARE(tt1_lock, 0);
873 unsigned int pmap_stamp MARK_AS_PMAP_DATA;
874 queue_head_t map_pmap_list MARK_AS_PMAP_DATA;
875
876 MARK_AS_PMAP_DATA SIMPLE_LOCK_DECLARE(pt_pages_lock, 0);
877 queue_head_t pt_page_list MARK_AS_PMAP_DATA; /* pt page ptd entries list */
878
879 MARK_AS_PMAP_DATA SIMPLE_LOCK_DECLARE(pmap_pages_lock, 0);
880
881 typedef struct page_free_entry {
882 struct page_free_entry *next;
883 } page_free_entry_t;
884
885 #define PAGE_FREE_ENTRY_NULL ((page_free_entry_t *) 0)
886
887 page_free_entry_t *pmap_pages_reclaim_list MARK_AS_PMAP_DATA; /* Reclaimed pt page list */
888 unsigned int pmap_pages_request_count MARK_AS_PMAP_DATA; /* Pending requests to reclaim pt page */
889 unsigned long long pmap_pages_request_acum MARK_AS_PMAP_DATA;
890
891
892 typedef struct tt_free_entry {
893 struct tt_free_entry *next;
894 } tt_free_entry_t;
895
896 #define TT_FREE_ENTRY_NULL ((tt_free_entry_t *) 0)
897
898 tt_free_entry_t *free_page_size_tt_list MARK_AS_PMAP_DATA;
899 unsigned int free_page_size_tt_count MARK_AS_PMAP_DATA;
900 unsigned int free_page_size_tt_max MARK_AS_PMAP_DATA;
901 #define FREE_PAGE_SIZE_TT_MAX 4
902 tt_free_entry_t *free_two_page_size_tt_list MARK_AS_PMAP_DATA;
903 unsigned int free_two_page_size_tt_count MARK_AS_PMAP_DATA;
904 unsigned int free_two_page_size_tt_max MARK_AS_PMAP_DATA;
905 #define FREE_TWO_PAGE_SIZE_TT_MAX 4
906 tt_free_entry_t *free_tt_list MARK_AS_PMAP_DATA;
907 unsigned int free_tt_count MARK_AS_PMAP_DATA;
908 unsigned int free_tt_max MARK_AS_PMAP_DATA;
909
910 #define TT_FREE_ENTRY_NULL ((tt_free_entry_t *) 0)
911
912 boolean_t pmap_gc_allowed MARK_AS_PMAP_DATA = TRUE;
913 boolean_t pmap_gc_forced MARK_AS_PMAP_DATA = FALSE;
914 boolean_t pmap_gc_allowed_by_time_throttle = TRUE;
915
916 unsigned int inuse_user_ttepages_count MARK_AS_PMAP_DATA = 0; /* non-root, non-leaf user pagetable pages, in units of PAGE_SIZE */
917 unsigned int inuse_user_ptepages_count MARK_AS_PMAP_DATA = 0; /* leaf user pagetable pages, in units of PAGE_SIZE */
918 unsigned int inuse_user_tteroot_count MARK_AS_PMAP_DATA = 0; /* root user pagetables, in units of PMAP_ROOT_ALLOC_SIZE */
919 unsigned int inuse_kernel_ttepages_count MARK_AS_PMAP_DATA = 0; /* non-root, non-leaf kernel pagetable pages, in units of PAGE_SIZE */
920 unsigned int inuse_kernel_ptepages_count MARK_AS_PMAP_DATA = 0; /* leaf kernel pagetable pages, in units of PAGE_SIZE */
921 unsigned int inuse_kernel_tteroot_count MARK_AS_PMAP_DATA = 0; /* root kernel pagetables, in units of PMAP_ROOT_ALLOC_SIZE */
922 unsigned int inuse_pmap_pages_count = 0; /* debugging */
923
924 SECURITY_READ_ONLY_LATE(tt_entry_t *) invalid_tte = 0;
925 SECURITY_READ_ONLY_LATE(pmap_paddr_t) invalid_ttep = 0;
926
927 SECURITY_READ_ONLY_LATE(tt_entry_t *) cpu_tte = 0; /* set by arm_vm_init() - keep out of bss */
928 SECURITY_READ_ONLY_LATE(pmap_paddr_t) cpu_ttep = 0; /* set by arm_vm_init() - phys tte addr */
929
930 #if DEVELOPMENT || DEBUG
931 int nx_enabled = 1; /* enable no-execute protection */
932 int allow_data_exec = 0; /* No apps may execute data */
933 int allow_stack_exec = 0; /* No apps may execute from the stack */
934 unsigned long pmap_asid_flushes MARK_AS_PMAP_DATA = 0;
935 unsigned long pmap_asid_hits MARK_AS_PMAP_DATA = 0;
936 unsigned long pmap_asid_misses MARK_AS_PMAP_DATA = 0;
937 #else /* DEVELOPMENT || DEBUG */
938 const int nx_enabled = 1; /* enable no-execute protection */
939 const int allow_data_exec = 0; /* No apps may execute data */
940 const int allow_stack_exec = 0; /* No apps may execute from the stack */
941 #endif /* DEVELOPMENT || DEBUG */
942
943 /**
944 * This variable is set true during hibernation entry to protect pmap data structures
945 * during image copying, and reset false on hibernation exit.
946 */
947 bool hib_entry_pmap_lockdown MARK_AS_PMAP_DATA = false;
948
949 /* Macro used to ensure that pmap data structures aren't modified during hibernation image copying. */
950 #if HIBERNATION
951 #define ASSERT_NOT_HIBERNATING() (assertf(!hib_entry_pmap_lockdown, \
952 "Attempted to modify PMAP data structures after hibernation image copying has begun."))
953 #else
954 #define ASSERT_NOT_HIBERNATING()
955 #endif /* HIBERNATION */
956
957 #define PV_ENTRY_NULL ((pv_entry_t *) 0)
958
959 /*
960 * PMAP LEDGERS:
961 * We use the least significant bit of the "pve_next" pointer in a "pv_entry"
962 * as a marker for pages mapped through an "alternate accounting" mapping.
963 * These macros set, clear and test for this marker and extract the actual
964 * value of the "pve_next" pointer.
965 */
966 #define PVE_NEXT_ALTACCT ((uintptr_t) 0x1)
967 #define PVE_NEXT_SET_ALTACCT(pve_next_p) \
968 *(pve_next_p) = (struct pv_entry *) (((uintptr_t) *(pve_next_p)) | \
969 PVE_NEXT_ALTACCT)
970 #define PVE_NEXT_CLR_ALTACCT(pve_next_p) \
971 *(pve_next_p) = (struct pv_entry *) (((uintptr_t) *(pve_next_p)) & \
972 ~PVE_NEXT_ALTACCT)
973 #define PVE_NEXT_IS_ALTACCT(pve_next) \
974 ((((uintptr_t) (pve_next)) & PVE_NEXT_ALTACCT) ? TRUE : FALSE)
975 #define PVE_NEXT_PTR(pve_next) \
976 ((struct pv_entry *)(((uintptr_t) (pve_next)) & \
977 ~PVE_NEXT_ALTACCT))
978 #if MACH_ASSERT
979 static void pmap_check_ledgers(pmap_t pmap);
980 #else
981 static inline void
982 pmap_check_ledgers(__unused pmap_t pmap)
983 {
984 }
985 #endif /* MACH_ASSERT */
986
987 SECURITY_READ_ONLY_LATE(pv_entry_t * *) pv_head_table; /* array of pv entry pointers */
988
989 pv_free_list_t pv_free MARK_AS_PMAP_DATA = {0};
990 pv_free_list_t pv_kern_free MARK_AS_PMAP_DATA = {0};
991 MARK_AS_PMAP_DATA SIMPLE_LOCK_DECLARE(pv_free_list_lock, 0);
992 MARK_AS_PMAP_DATA SIMPLE_LOCK_DECLARE(pv_kern_free_list_lock, 0);
993
994 SIMPLE_LOCK_DECLARE(phys_backup_lock, 0);
995
996 /*
997 * pt_desc - structure to keep info on page assigned to page tables
998 */
999 #if (__ARM_VMSA__ == 7)
1000 #define PT_INDEX_MAX 1
1001 #else /* (__ARM_VMSA__ != 7) */
1002
1003 #if __ARM_MIXED_PAGE_SIZE__
1004 #define PT_INDEX_MAX (ARM_PGBYTES / 4096)
1005 #elif (ARM_PGSHIFT == 14)
1006 #define PT_INDEX_MAX 1
1007 #elif (ARM_PGSHIFT == 12)
1008 #define PT_INDEX_MAX 4
1009 #else
1010 #error Unsupported ARM_PGSHIFT
1011 #endif /* (ARM_PGSHIFT != 14) */
1012
1013 #endif /* (__ARM_VMSA__ != 7) */
1014
1015 #define PT_DESC_REFCOUNT 0x4000U
1016 #define PT_DESC_IOMMU_REFCOUNT 0x8000U
1017
1018 typedef struct {
1019 /*
1020 * For non-leaf pagetables, should always be PT_DESC_REFCOUNT
1021 * For leaf pagetables, should reflect the number of non-empty PTEs
1022 * For IOMMU pages, should always be PT_DESC_IOMMU_REFCOUNT
1023 */
1024 unsigned short refcnt;
1025 /*
1026 * For non-leaf pagetables, should be 0
1027 * For leaf pagetables, should reflect the number of wired entries
1028 * For IOMMU pages, may optionally reflect a driver-defined refcount (IOMMU operations are implicitly wired)
1029 */
1030 unsigned short wiredcnt;
1031 vm_offset_t va;
1032 } ptd_info_t;
1033
1034 typedef struct pt_desc {
1035 queue_chain_t pt_page;
1036 union {
1037 struct pmap *pmap;
1038 };
1039 ptd_info_t ptd_info[PT_INDEX_MAX];
1040 } pt_desc_t;
1041
1042
1043 #define PTD_ENTRY_NULL ((pt_desc_t *) 0)
1044
1045 SECURITY_READ_ONLY_LATE(pt_desc_t *) ptd_root_table;
1046
1047 pt_desc_t *ptd_free_list MARK_AS_PMAP_DATA = PTD_ENTRY_NULL;
1048 SECURITY_READ_ONLY_LATE(boolean_t) ptd_preboot = TRUE;
1049 unsigned int ptd_free_count MARK_AS_PMAP_DATA = 0;
1050 decl_simple_lock_data(, ptd_free_list_lock MARK_AS_PMAP_DATA);
1051
1052 /*
1053 * physical page attribute
1054 */
1055 typedef u_int16_t pp_attr_t;
1056
1057 #define PP_ATTR_WIMG_MASK 0x003F
1058 #define PP_ATTR_WIMG(x) ((x) & PP_ATTR_WIMG_MASK)
1059
1060 #define PP_ATTR_REFERENCED 0x0040
1061 #define PP_ATTR_MODIFIED 0x0080
1062
1063 #define PP_ATTR_INTERNAL 0x0100
1064 #define PP_ATTR_REUSABLE 0x0200
1065 #define PP_ATTR_ALTACCT 0x0400
1066 #define PP_ATTR_NOENCRYPT 0x0800
1067
1068 #define PP_ATTR_REFFAULT 0x1000
1069 #define PP_ATTR_MODFAULT 0x2000
1070
1071 #if XNU_MONITOR
1072 /*
1073 * Denotes that a page is owned by the PPL. This is modified/checked with the
1074 * PVH lock held, to avoid ownership related races. This does not need to be a
1075 * PP_ATTR bit (as we have the lock), but for now this is a convenient place to
1076 * put the bit.
1077 */
1078 #define PP_ATTR_MONITOR 0x4000
1079
1080 /*
1081 * Denotes that a page *cannot* be owned by the PPL. This is required in order
1082 * to temporarily 'pin' kernel pages that are used to store PPL output parameters.
1083 * Otherwise a malicious or buggy caller could pass PPL-owned memory for these
1084 * parameters and in so doing stage a write gadget against the PPL.
1085 */
1086 #define PP_ATTR_NO_MONITOR 0x8000
1087
1088 /*
1089 * All of the bits owned by the PPL; kernel requests to set or clear these bits
1090 * are illegal.
1091 */
1092 #define PP_ATTR_PPL_OWNED_BITS (PP_ATTR_MONITOR | PP_ATTR_NO_MONITOR)
1093 #endif
1094
1095 SECURITY_READ_ONLY_LATE(volatile pp_attr_t*) pp_attr_table;
1096
1097 /**
1098 * The layout of this structure needs to map 1-to-1 with the pmap-io-range device
1099 * tree nodes. Astris (through the LowGlobals) also depends on the consistency
1100 * of this structure.
1101 */
1102 typedef struct pmap_io_range {
1103 uint64_t addr;
1104 uint64_t len;
1105 #define PMAP_IO_RANGE_STRONG_SYNC (1UL << 31) // Strong DSB required for pages in this range
1106 #define PMAP_IO_RANGE_CARVEOUT (1UL << 30) // Corresponds to memory carved out by bootloader
1107 #define PMAP_IO_RANGE_NEEDS_HIBERNATING (1UL << 29) // Pages in this range need to be included in the hibernation image
1108 uint32_t wimg; // lower 16 bits treated as pp_attr_t, upper 16 bits contain additional mapping flags
1109 uint32_t signature; // 4CC
1110 } __attribute__((packed)) pmap_io_range_t;
1111
1112 SECURITY_READ_ONLY_LATE(pmap_io_range_t*) io_attr_table = (pmap_io_range_t*)0;
1113
1114 SECURITY_READ_ONLY_LATE(pmap_paddr_t) vm_first_phys = (pmap_paddr_t) 0;
1115 SECURITY_READ_ONLY_LATE(pmap_paddr_t) vm_last_phys = (pmap_paddr_t) 0;
1116
1117 SECURITY_READ_ONLY_LATE(unsigned int) num_io_rgns = 0;
1118
1119 SECURITY_READ_ONLY_LATE(boolean_t) pmap_initialized = FALSE; /* Has pmap_init completed? */
1120
1121 SECURITY_READ_ONLY_LATE(vm_map_offset_t) arm_pmap_max_offset_default = 0x0;
1122 #if defined(__arm64__)
1123 # ifdef XNU_TARGET_OS_OSX
1124 SECURITY_READ_ONLY_LATE(vm_map_offset_t) arm64_pmap_max_offset_default = MACH_VM_MAX_ADDRESS;
1125 # else
1126 SECURITY_READ_ONLY_LATE(vm_map_offset_t) arm64_pmap_max_offset_default = 0x0;
1127 # endif
1128 #endif /* __arm64__ */
1129
1130 #if PMAP_PANIC_DEV_WIMG_ON_MANAGED && (DEVELOPMENT || DEBUG)
1131 SECURITY_READ_ONLY_LATE(boolean_t) pmap_panic_dev_wimg_on_managed = TRUE;
1132 #else
1133 SECURITY_READ_ONLY_LATE(boolean_t) pmap_panic_dev_wimg_on_managed = FALSE;
1134 #endif
1135
1136 MARK_AS_PMAP_DATA SIMPLE_LOCK_DECLARE(asid_lock, 0);
1137 SECURITY_READ_ONLY_LATE(static uint32_t) pmap_max_asids = 0;
1138 SECURITY_READ_ONLY_LATE(int) pmap_asid_plru = 1;
1139 SECURITY_READ_ONLY_LATE(uint16_t) asid_chunk_size = 0;
1140 SECURITY_READ_ONLY_LATE(static bitmap_t*) asid_bitmap;
1141 static bitmap_t asid_plru_bitmap[BITMAP_LEN(MAX_HW_ASIDS)] MARK_AS_PMAP_DATA;
1142 static uint64_t asid_plru_generation[BITMAP_LEN(MAX_HW_ASIDS)] MARK_AS_PMAP_DATA = {0};
1143 static uint64_t asid_plru_gencount MARK_AS_PMAP_DATA = 0;
1144
1145
1146 #if (__ARM_VMSA__ > 7)
1147 #if __ARM_MIXED_PAGE_SIZE__
1148 SECURITY_READ_ONLY_LATE(pmap_t) sharedpage_pmap_4k;
1149 #endif
1150 SECURITY_READ_ONLY_LATE(pmap_t) sharedpage_pmap_default;
1151 #endif
1152
1153 #if XNU_MONITOR
1154 /*
1155 * We define our target as 8 pages; enough for 2 page table pages, a PTD page,
1156 * and a PV page; in essence, twice as many pages as may be necessary to satisfy
1157 * a single pmap_enter request.
1158 */
1159 #define PMAP_MIN_FREE_PPL_PAGES 8
1160 #endif
1161
1162 #define pa_index(pa) \
1163 (atop((pa) - vm_first_phys))
1164
1165 #define pai_to_pvh(pai) \
1166 (&pv_head_table[pai])
1167
1168 #define pa_valid(x) \
1169 ((x) >= vm_first_phys && (x) < vm_last_phys)
1170
1171 /* PTE Define Macros */
1172
1173 #define pte_is_wired(pte) \
1174 (((pte) & ARM_PTE_WIRED_MASK) == ARM_PTE_WIRED)
1175
1176 #define pte_was_writeable(pte) \
1177 (((pte) & ARM_PTE_WRITEABLE) == ARM_PTE_WRITEABLE)
1178
1179 #define pte_set_was_writeable(pte, was_writeable) \
1180 do { \
1181 if ((was_writeable)) { \
1182 (pte) |= ARM_PTE_WRITEABLE; \
1183 } else { \
1184 (pte) &= ~ARM_PTE_WRITEABLE; \
1185 } \
1186 } while(0)
1187
1188 /* PVE Define Macros */
1189
1190 #define pve_next(pve) \
1191 ((pve)->pve_next)
1192
1193 #define pve_link_field(pve) \
1194 (&pve_next(pve))
1195
1196 #define pve_link(pp, e) \
1197 ((pve_next(e) = pve_next(pp)), (pve_next(pp) = (e)))
1198
1199 #define pve_unlink(pp, e) \
1200 (pve_next(pp) = pve_next(e))
1201
1202 /* bits held in the ptep pointer field */
1203
1204 #define pve_get_ptep(pve) \
1205 ((pve)->pve_ptep)
1206
1207 #define pve_set_ptep(pve, ptep_new) \
1208 do { \
1209 (pve)->pve_ptep = (ptep_new); \
1210 } while (0)
1211
1212 /* PTEP Define Macros */
1213
1214 /* mask for page descriptor index */
1215 #define ARM_TT_PT_INDEX_MASK ARM_PGMASK
1216
1217 #if (__ARM_VMSA__ == 7)
1218
1219 /*
1220 * Shift value used for reconstructing the virtual address for a PTE.
1221 */
1222 #define ARM_TT_PT_ADDR_SHIFT (10U)
1223
1224 #define ptep_get_pmap(ptep) \
1225 ((((pt_desc_t *) (pvh_list(pai_to_pvh(pa_index(ml_static_vtop((((vm_offset_t)(ptep) & ~ARM_PGMASK))))))))->pmap))
1226
1227 #else
1228
1229 #if (ARM_PGSHIFT == 12)
1230 /*
1231 * Shift value used for reconstructing the virtual address for a PTE.
1232 */
1233 #define ARM_TT_PT_ADDR_SHIFT (9ULL)
1234 #else
1235
1236 /*
1237 * Shift value used for reconstructing the virtual address for a PTE.
1238 */
1239 #define ARM_TT_PT_ADDR_SHIFT (11ULL)
1240 #endif
1241
1242 #define ptep_get_pmap(ptep) \
1243 ((((pt_desc_t *) (pvh_list(pai_to_pvh(pa_index(ml_static_vtop((((vm_offset_t)(ptep) & ~ARM_PGMASK))))))))->pmap))
1244
1245 #endif
1246
1247 #define ptep_get_ptd(ptep) \
1248 ((struct pt_desc *)(pvh_list(pai_to_pvh(pa_index(ml_static_vtop((vm_offset_t)(ptep)))))))
1249
1250
1251 /* PVH Define Macros */
1252
1253 /* pvhead type */
1254 #define PVH_TYPE_NULL 0x0UL
1255 #define PVH_TYPE_PVEP 0x1UL
1256 #define PVH_TYPE_PTEP 0x2UL
1257 #define PVH_TYPE_PTDP 0x3UL
1258
1259 #define PVH_TYPE_MASK (0x3UL)
1260
1261 #ifdef __arm64__
1262
1263 /* All flags listed below are stored in the PV head pointer unless otherwise noted */
1264 #define PVH_FLAG_IOMMU 0x4UL /* Stored in each PTE, or in PV head for single-PTE PV heads */
1265 #define PVH_FLAG_IOMMU_TABLE (1ULL << 63) /* Stored in each PTE, or in PV head for single-PTE PV heads */
1266 #define PVH_FLAG_CPU (1ULL << 62)
1267 #define PVH_LOCK_BIT 61
1268 #define PVH_FLAG_LOCK (1ULL << PVH_LOCK_BIT)
1269 #define PVH_FLAG_EXEC (1ULL << 60)
1270 #define PVH_FLAG_LOCKDOWN (1ULL << 59)
1271 #define PVH_FLAG_HASHED (1ULL << 58) /* Used to mark that a page has been hashed into the hibernation image. */
1272 #define PVH_HIGH_FLAGS (PVH_FLAG_CPU | PVH_FLAG_LOCK | PVH_FLAG_EXEC | PVH_FLAG_LOCKDOWN | PVH_FLAG_HASHED)
1273
1274 #else /* !__arm64__ */
1275
1276 #define PVH_LOCK_BIT 31
1277 #define PVH_FLAG_LOCK (1UL << PVH_LOCK_BIT)
1278 #define PVH_HIGH_FLAGS PVH_FLAG_LOCK
1279
1280 #endif
1281
1282 #define PVH_LIST_MASK (~PVH_TYPE_MASK)
1283
1284 #define pvh_test_type(h, b) \
1285 ((*(vm_offset_t *)(h) & (PVH_TYPE_MASK)) == (b))
1286
1287 #define pvh_ptep(h) \
1288 ((pt_entry_t *)((*(vm_offset_t *)(h) & PVH_LIST_MASK) | PVH_HIGH_FLAGS))
1289
1290 #define pvh_list(h) \
1291 ((pv_entry_t *)((*(vm_offset_t *)(h) & PVH_LIST_MASK) | PVH_HIGH_FLAGS))
1292
1293 #define pvh_get_flags(h) \
1294 (*(vm_offset_t *)(h) & PVH_HIGH_FLAGS)
1295
1296 #define pvh_set_flags(h, f) \
1297 do { \
1298 os_atomic_store((vm_offset_t *)(h), (*(vm_offset_t *)(h) & ~PVH_HIGH_FLAGS) | (f), \
1299 relaxed); \
1300 } while (0)
1301
1302 #define pvh_update_head(h, e, t) \
1303 do { \
1304 assert(*(vm_offset_t *)(h) & PVH_FLAG_LOCK); \
1305 os_atomic_store((vm_offset_t *)(h), (vm_offset_t)(e) | (t) | PVH_FLAG_LOCK, \
1306 relaxed); \
1307 } while (0)
1308
1309 #define pvh_update_head_unlocked(h, e, t) \
1310 do { \
1311 assert(!(*(vm_offset_t *)(h) & PVH_FLAG_LOCK)); \
1312 *(vm_offset_t *)(h) = ((vm_offset_t)(e) | (t)) & ~PVH_FLAG_LOCK; \
1313 } while (0)
1314
1315 #define pvh_add(h, e) \
1316 do { \
1317 assert(!pvh_test_type((h), PVH_TYPE_PTEP)); \
1318 pve_next(e) = pvh_list(h); \
1319 pvh_update_head((h), (e), PVH_TYPE_PVEP); \
1320 } while (0)
1321
1322 #define pvh_remove(h, p, e) \
1323 do { \
1324 assert(!PVE_NEXT_IS_ALTACCT(pve_next((e)))); \
1325 if ((p) == (h)) { \
1326 if (PVE_NEXT_PTR(pve_next((e))) == PV_ENTRY_NULL) { \
1327 pvh_update_head((h), PV_ENTRY_NULL, PVH_TYPE_NULL); \
1328 } else { \
1329 pvh_update_head((h), PVE_NEXT_PTR(pve_next((e))), PVH_TYPE_PVEP); \
1330 } \
1331 } else { \
1332 /* \
1333 * PMAP LEDGERS: \
1334 * preserve the "alternate accounting" bit \
1335 * when updating "p" (the previous entry's \
1336 * "pve_next"). \
1337 */ \
1338 boolean_t __is_altacct; \
1339 __is_altacct = PVE_NEXT_IS_ALTACCT(*(p)); \
1340 *(p) = PVE_NEXT_PTR(pve_next((e))); \
1341 if (__is_altacct) { \
1342 PVE_NEXT_SET_ALTACCT((p)); \
1343 } else { \
1344 PVE_NEXT_CLR_ALTACCT((p)); \
1345 } \
1346 } \
1347 } while (0)
1348
1349
1350 /* PPATTR Define Macros */
1351
1352 #define ppattr_set_bits(h, b) os_atomic_or((h), (pp_attr_t)(b), acq_rel)
1353 #define ppattr_clear_bits(h, b) os_atomic_andnot((h), (pp_attr_t)(b), acq_rel)
1354
1355 #define ppattr_test_bits(h, b) \
1356 ((*(h) & (pp_attr_t)(b)) == (pp_attr_t)(b))
1357
1358 #define pa_set_bits(x, b) \
1359 do { \
1360 if (pa_valid(x)) \
1361 ppattr_set_bits(&pp_attr_table[pa_index(x)], \
1362 (b)); \
1363 } while (0)
1364
1365 #define pa_test_bits(x, b) \
1366 (pa_valid(x) ? ppattr_test_bits(&pp_attr_table[pa_index(x)],\
1367 (b)) : FALSE)
1368
1369 #define pa_clear_bits(x, b) \
1370 do { \
1371 if (pa_valid(x)) \
1372 ppattr_clear_bits(&pp_attr_table[pa_index(x)], \
1373 (b)); \
1374 } while (0)
1375
1376 #define pa_set_modify(x) \
1377 pa_set_bits(x, PP_ATTR_MODIFIED)
1378
1379 #define pa_clear_modify(x) \
1380 pa_clear_bits(x, PP_ATTR_MODIFIED)
1381
1382 #define pa_set_reference(x) \
1383 pa_set_bits(x, PP_ATTR_REFERENCED)
1384
1385 #define pa_clear_reference(x) \
1386 pa_clear_bits(x, PP_ATTR_REFERENCED)
1387
1388 #if XNU_MONITOR
1389 #define pa_set_monitor(x) \
1390 pa_set_bits((x), PP_ATTR_MONITOR)
1391
1392 #define pa_clear_monitor(x) \
1393 pa_clear_bits((x), PP_ATTR_MONITOR)
1394
1395 #define pa_test_monitor(x) \
1396 pa_test_bits((x), PP_ATTR_MONITOR)
1397
1398 #define pa_set_no_monitor(x) \
1399 pa_set_bits((x), PP_ATTR_NO_MONITOR)
1400
1401 #define pa_clear_no_monitor(x) \
1402 pa_clear_bits((x), PP_ATTR_NO_MONITOR)
1403
1404 #define pa_test_no_monitor(x) \
1405 pa_test_bits((x), PP_ATTR_NO_MONITOR)
1406 #endif
1407
1408 #define IS_INTERNAL_PAGE(pai) \
1409 ppattr_test_bits(&pp_attr_table[pai], PP_ATTR_INTERNAL)
1410 #define SET_INTERNAL_PAGE(pai) \
1411 ppattr_set_bits(&pp_attr_table[pai], PP_ATTR_INTERNAL)
1412 #define CLR_INTERNAL_PAGE(pai) \
1413 ppattr_clear_bits(&pp_attr_table[pai], PP_ATTR_INTERNAL)
1414
1415 #define IS_REUSABLE_PAGE(pai) \
1416 ppattr_test_bits(&pp_attr_table[pai], PP_ATTR_REUSABLE)
1417 #define SET_REUSABLE_PAGE(pai) \
1418 ppattr_set_bits(&pp_attr_table[pai], PP_ATTR_REUSABLE)
1419 #define CLR_REUSABLE_PAGE(pai) \
1420 ppattr_clear_bits(&pp_attr_table[pai], PP_ATTR_REUSABLE)
1421
1422 #define IS_ALTACCT_PAGE(pai, pve_p) \
1423 (((pve_p) == NULL) \
1424 ? ppattr_test_bits(&pp_attr_table[pai], PP_ATTR_ALTACCT) \
1425 : PVE_NEXT_IS_ALTACCT(pve_next((pve_p))))
1426 #define SET_ALTACCT_PAGE(pai, pve_p) \
1427 if ((pve_p) == NULL) { \
1428 ppattr_set_bits(&pp_attr_table[pai], PP_ATTR_ALTACCT); \
1429 } else { \
1430 PVE_NEXT_SET_ALTACCT(&pve_next((pve_p))); \
1431 }
1432 #define CLR_ALTACCT_PAGE(pai, pve_p) \
1433 if ((pve_p) == NULL) { \
1434 ppattr_clear_bits(&pp_attr_table[pai], PP_ATTR_ALTACCT); \
1435 } else { \
1436 PVE_NEXT_CLR_ALTACCT(&pve_next((pve_p))); \
1437 }
1438
1439 #define IS_REFFAULT_PAGE(pai) \
1440 ppattr_test_bits(&pp_attr_table[pai], PP_ATTR_REFFAULT)
1441 #define SET_REFFAULT_PAGE(pai) \
1442 ppattr_set_bits(&pp_attr_table[pai], PP_ATTR_REFFAULT)
1443 #define CLR_REFFAULT_PAGE(pai) \
1444 ppattr_clear_bits(&pp_attr_table[pai], PP_ATTR_REFFAULT)
1445
1446 #define IS_MODFAULT_PAGE(pai) \
1447 ppattr_test_bits(&pp_attr_table[pai], PP_ATTR_MODFAULT)
1448 #define SET_MODFAULT_PAGE(pai) \
1449 ppattr_set_bits(&pp_attr_table[pai], PP_ATTR_MODFAULT)
1450 #define CLR_MODFAULT_PAGE(pai) \
1451 ppattr_clear_bits(&pp_attr_table[pai], PP_ATTR_MODFAULT)
1452
1453 #define tte_get_ptd(tte) \
1454 ((struct pt_desc *)(pvh_list(pai_to_pvh(pa_index((vm_offset_t)((tte) & ~PAGE_MASK))))))
1455
1456
1457 #if (__ARM_VMSA__ == 7)
1458
1459 #define tte_index(pmap, pt_attr, addr) \
1460 ttenum((addr))
1461
1462 #define pte_index(pmap, pt_attr, addr) \
1463 ptenum((addr))
1464
1465 #else
1466
1467 #define ttn_index(pmap, pt_attr, addr, pt_level) \
1468 (((addr) & (pt_attr)->pta_level_info[(pt_level)].index_mask) >> (pt_attr)->pta_level_info[(pt_level)].shift)
1469
1470 #define tt0_index(pmap, pt_attr, addr) \
1471 ttn_index((pmap), (pt_attr), (addr), PMAP_TT_L0_LEVEL)
1472
1473 #define tt1_index(pmap, pt_attr, addr) \
1474 ttn_index((pmap), (pt_attr), (addr), PMAP_TT_L1_LEVEL)
1475
1476 #define tt2_index(pmap, pt_attr, addr) \
1477 ttn_index((pmap), (pt_attr), (addr), PMAP_TT_L2_LEVEL)
1478
1479 #define tt3_index(pmap, pt_attr, addr) \
1480 ttn_index((pmap), (pt_attr), (addr), PMAP_TT_L3_LEVEL)
1481
1482 #define tte_index(pmap, pt_attr, addr) \
1483 tt2_index((pmap), (pt_attr), (addr))
1484
1485 #define pte_index(pmap, pt_attr, addr) \
1486 tt3_index((pmap), (pt_attr), (addr))
1487
1488 #endif
1489
1490
1491 static inline ptd_info_t *
1492 ptd_get_info(pt_desc_t *ptd, const tt_entry_t *ttep)
1493 {
1494 assert(ptd->ptd_info[0].refcnt != PT_DESC_IOMMU_REFCOUNT);
1495 #if PT_INDEX_MAX == 1
1496 #pragma unused(ttep)
1497 return &ptd->ptd_info[0];
1498 #else
1499 uint64_t pmap_page_shift = pt_attr_leaf_shift(pmap_get_pt_attr(ptd->pmap));
1500 vm_offset_t ttep_page = (vm_offset_t)ttep >> pmap_page_shift;
1501 unsigned int ttep_index = ttep_page & ((1U << (PAGE_SHIFT - pmap_page_shift)) - 1);
1502 assert(ttep_index < PT_INDEX_MAX);
1503 return &ptd->ptd_info[ttep_index];
1504 #endif
1505 }
1506
1507 static inline ptd_info_t *
1508 ptep_get_info(const pt_entry_t *ptep)
1509 {
1510 return ptd_get_info(ptep_get_ptd(ptep), ptep);
1511 }
1512
1513 static inline vm_map_address_t
1514 ptep_get_va(const pt_entry_t *ptep)
1515 {
1516 pv_entry_t **pv_h;
1517 const pt_attr_t * const pt_attr = pmap_get_pt_attr(ptep_get_pmap(ptep));
1518 pv_h = pai_to_pvh(pa_index(ml_static_vtop(((vm_offset_t)ptep))));;
1519
1520 assert(pvh_test_type(pv_h, PVH_TYPE_PTDP));
1521 pt_desc_t *ptdp = (pt_desc_t *)(pvh_list(pv_h));
1522
1523 vm_map_address_t va = ptd_get_info(ptdp, ptep)->va;
1524 vm_offset_t ptep_index = ((vm_offset_t)ptep & pt_attr_leaf_offmask(pt_attr)) / sizeof(*ptep);
1525
1526 va += (ptep_index << pt_attr_leaf_shift(pt_attr));
1527
1528 return va;
1529 }
1530
1531 static inline void
1532 pte_set_wired(pmap_t pmap, pt_entry_t *ptep, boolean_t wired)
1533 {
1534 if (wired) {
1535 *ptep |= ARM_PTE_WIRED;
1536 } else {
1537 *ptep &= ~ARM_PTE_WIRED;
1538 }
1539 /*
1540 * Do not track wired page count for kernel pagetable pages. Kernel mappings are
1541 * not guaranteed to have PTDs in the first place, and kernel pagetable pages are
1542 * never reclaimed.
1543 */
1544 if (pmap == kernel_pmap) {
1545 return;
1546 }
1547 unsigned short *ptd_wiredcnt_ptr;
1548 ptd_wiredcnt_ptr = &(ptep_get_info(ptep)->wiredcnt);
1549 if (wired) {
1550 os_atomic_add(ptd_wiredcnt_ptr, (unsigned short)1, relaxed);
1551 } else {
1552 unsigned short prev_wired = os_atomic_sub_orig(ptd_wiredcnt_ptr, (unsigned short)1, relaxed);
1553 if (__improbable(prev_wired == 0)) {
1554 panic("pmap %p (pte %p): wired count underflow", pmap, ptep);
1555 }
1556 }
1557 }
1558
1559 /*
1560 * Lock on pmap system
1561 */
1562
1563 lck_grp_t pmap_lck_grp MARK_AS_PMAP_DATA;
1564
1565 static inline void
1566 pmap_lock_init(pmap_t pmap)
1567 {
1568 lck_rw_init(&pmap->rwlock, &pmap_lck_grp, 0);
1569 pmap->rwlock.lck_rw_can_sleep = FALSE;
1570 }
1571
1572 static inline void
1573 pmap_lock_destroy(pmap_t pmap)
1574 {
1575 lck_rw_destroy(&pmap->rwlock, &pmap_lck_grp);
1576 }
1577
1578 static inline void
1579 pmap_lock(pmap_t pmap)
1580 {
1581 #if !XNU_MONITOR
1582 mp_disable_preemption();
1583 #endif
1584 lck_rw_lock_exclusive(&pmap->rwlock);
1585 }
1586
1587 static inline void
1588 pmap_lock_ro(pmap_t pmap)
1589 {
1590 #if !XNU_MONITOR
1591 mp_disable_preemption();
1592 #endif
1593 lck_rw_lock_shared(&pmap->rwlock);
1594 }
1595
1596 static inline void
1597 pmap_unlock(pmap_t pmap)
1598 {
1599 lck_rw_unlock_exclusive(&pmap->rwlock);
1600 #if !XNU_MONITOR
1601 mp_enable_preemption();
1602 #endif
1603 }
1604
1605 static inline void
1606 pmap_unlock_ro(pmap_t pmap)
1607 {
1608 lck_rw_unlock_shared(&pmap->rwlock);
1609 #if !XNU_MONITOR
1610 mp_enable_preemption();
1611 #endif
1612 }
1613
1614 static inline bool
1615 pmap_try_lock(pmap_t pmap)
1616 {
1617 bool ret;
1618
1619 #if !XNU_MONITOR
1620 mp_disable_preemption();
1621 #endif
1622 ret = lck_rw_try_lock_exclusive(&pmap->rwlock);
1623 if (!ret) {
1624 #if !XNU_MONITOR
1625 mp_enable_preemption();
1626 #endif
1627 }
1628
1629 return ret;
1630 }
1631
1632 //assert that ONLY READ lock is held
1633 __unused static inline void
1634 pmap_assert_locked_r(__unused pmap_t pmap)
1635 {
1636 #if MACH_ASSERT
1637 lck_rw_assert(&pmap->rwlock, LCK_RW_ASSERT_SHARED);
1638 #else
1639 (void)pmap;
1640 #endif
1641 }
1642 //assert that ONLY WRITE lock is held
1643 __unused static inline void
1644 pmap_assert_locked_w(__unused pmap_t pmap)
1645 {
1646 #if MACH_ASSERT
1647 lck_rw_assert(&pmap->rwlock, LCK_RW_ASSERT_EXCLUSIVE);
1648 #else
1649 (void)pmap;
1650 #endif
1651 }
1652
1653 //assert that either READ or WRITE lock is held
1654 __unused static inline void
1655 pmap_assert_locked_any(__unused pmap_t pmap)
1656 {
1657 #if MACH_ASSERT
1658 lck_rw_assert(&pmap->rwlock, LCK_RW_ASSERT_HELD);
1659 #endif
1660 }
1661
1662
1663 #if defined(__arm64__)
1664 #define PVH_LOCK_WORD 1 /* Assumes little-endian */
1665 #else
1666 #define PVH_LOCK_WORD 0
1667 #endif
1668
1669 #define ASSERT_PVH_LOCKED(index) \
1670 do { \
1671 assert((vm_offset_t)(pv_head_table[index]) & PVH_FLAG_LOCK); \
1672 } while (0)
1673
1674 #define LOCK_PVH(index) \
1675 do { \
1676 pmap_lock_bit((uint32_t*)(&pv_head_table[index]) + PVH_LOCK_WORD, PVH_LOCK_BIT - (PVH_LOCK_WORD * 32)); \
1677 } while (0)
1678
1679 #define UNLOCK_PVH(index) \
1680 do { \
1681 ASSERT_PVH_LOCKED(index); \
1682 pmap_unlock_bit((uint32_t*)(&pv_head_table[index]) + PVH_LOCK_WORD, PVH_LOCK_BIT - (PVH_LOCK_WORD * 32)); \
1683 } while (0)
1684
1685 #define PMAP_UPDATE_TLBS(pmap, s, e, strong) { \
1686 pmap_get_pt_ops(pmap)->flush_tlb_region_async(s, (size_t)((e) - (s)), pmap); \
1687 pmap_sync_tlb(strong); \
1688 }
1689
1690 #define FLUSH_PTE_RANGE(spte, epte) \
1691 __builtin_arm_dmb(DMB_ISH);
1692
1693 #define FLUSH_PTE(pte_p) \
1694 __builtin_arm_dmb(DMB_ISH);
1695
1696 #define FLUSH_PTE_STRONG(pte_p) \
1697 __builtin_arm_dsb(DSB_ISH);
1698
1699 #define FLUSH_PTE_RANGE_STRONG(spte, epte) \
1700 __builtin_arm_dsb(DSB_ISH);
1701
1702 #define WRITE_PTE_FAST(pte_p, pte_entry) \
1703 __unreachable_ok_push \
1704 if (TEST_PAGE_RATIO_4) { \
1705 if (((unsigned)(pte_p)) & 0x1f) { \
1706 panic("%s: WRITE_PTE_FAST is unaligned, " \
1707 "pte_p=%p, pte_entry=%p", \
1708 __FUNCTION__, \
1709 pte_p, (void*)pte_entry); \
1710 } \
1711 if (((pte_entry) & ~ARM_PTE_COMPRESSED_MASK) == ARM_PTE_EMPTY) { \
1712 *(pte_p) = (pte_entry); \
1713 *((pte_p)+1) = (pte_entry); \
1714 *((pte_p)+2) = (pte_entry); \
1715 *((pte_p)+3) = (pte_entry); \
1716 } else { \
1717 *(pte_p) = (pte_entry); \
1718 *((pte_p)+1) = (pte_entry) | 0x1000; \
1719 *((pte_p)+2) = (pte_entry) | 0x2000; \
1720 *((pte_p)+3) = (pte_entry) | 0x3000; \
1721 } \
1722 } else { \
1723 *(pte_p) = (pte_entry); \
1724 } \
1725 __unreachable_ok_pop
1726
1727 #define WRITE_PTE(pte_p, pte_entry) \
1728 WRITE_PTE_FAST(pte_p, pte_entry); \
1729 FLUSH_PTE(pte_p);
1730
1731 #define WRITE_PTE_STRONG(pte_p, pte_entry) \
1732 WRITE_PTE_FAST(pte_p, pte_entry); \
1733 FLUSH_PTE_STRONG(pte_p);
1734
1735 /*
1736 * Other useful macros.
1737 */
1738 #define current_pmap() \
1739 (vm_map_pmap(current_thread()->map))
1740
1741 #if XNU_MONITOR
1742 /*
1743 * PPL-related macros.
1744 */
1745 #define ARRAY_ELEM_PTR_IS_VALID(_ptr_, _elem_size_, _array_begin_, _array_end_) \
1746 (((_ptr_) >= (typeof(_ptr_))_array_begin_) && \
1747 ((_ptr_) < (typeof(_ptr_))_array_end_) && \
1748 !((((void *)(_ptr_)) - ((void *)_array_begin_)) % (_elem_size_)))
1749
1750 #define PMAP_PTR_IS_VALID(x) ARRAY_ELEM_PTR_IS_VALID(x, sizeof(struct pmap), pmap_array_begin, pmap_array_end)
1751
1752 #define USER_PMAP_IS_VALID(x) (PMAP_PTR_IS_VALID(x) && (os_atomic_load(&(x)->ref_count, relaxed) > 0))
1753
1754 #define VALIDATE_PMAP(x) \
1755 if (__improbable(((x) != kernel_pmap) && !USER_PMAP_IS_VALID(x))) \
1756 panic("%s: invalid pmap %p", __func__, (x));
1757
1758 #define VALIDATE_LEDGER_PTR(x) \
1759 if (__improbable(!ARRAY_ELEM_PTR_IS_VALID(x, sizeof(void *), pmap_ledger_ptr_array_begin, pmap_ledger_ptr_array_end))) \
1760 panic("%s: invalid ledger ptr %p", __func__, (x));
1761
1762 #define ARRAY_ELEM_INDEX(x, _elem_size_, _array_begin_) ((uint64_t)((((void *)(x)) - (_array_begin_)) / (_elem_size_)))
1763
1764 static uint64_t
1765 pmap_ledger_validate(void * ledger)
1766 {
1767 uint64_t array_index;
1768 pmap_ledger_t ** ledger_ptr_array_ptr = ((pmap_ledger_t*)ledger)->back_ptr;
1769 VALIDATE_LEDGER_PTR(ledger_ptr_array_ptr);
1770 array_index = ARRAY_ELEM_INDEX(ledger_ptr_array_ptr, sizeof(pmap_ledger_t *), pmap_ledger_ptr_array_begin);
1771
1772 if (array_index >= MAX_PMAP_LEDGERS) {
1773 panic("%s: ledger %p array index invalid, index was %#llx", __func__, ledger, array_index);
1774 }
1775
1776 pmap_ledger_t *ledger_ptr = *ledger_ptr_array_ptr;
1777
1778 if (__improbable(ledger_ptr != ledger)) {
1779 panic("%s: ledger pointer mismatch, %p != %p", __func__, ledger, ledger_ptr);
1780 }
1781
1782 return array_index;
1783 }
1784
1785 #else /* XNU_MONITOR */
1786
1787 #define VALIDATE_PMAP(x) assert((x) != NULL);
1788
1789 #endif /* XNU_MONITOR */
1790
1791 #if DEVELOPMENT || DEBUG
1792
1793 /*
1794 * Trace levels are controlled by a bitmask in which each
1795 * level can be enabled/disabled by the (1<<level) position
1796 * in the boot arg
1797 * Level 0: PPL extension functionality
1798 * Level 1: pmap lifecycle (create/destroy/switch)
1799 * Level 2: mapping lifecycle (enter/remove/protect/nest/unnest)
1800 * Level 3: internal state management (attributes/fast-fault)
1801 * Level 4-7: TTE traces for paging levels 0-3. TTBs are traced at level 4.
1802 */
1803
1804 SECURITY_READ_ONLY_LATE(unsigned int) pmap_trace_mask = 0;
1805
1806 #define PMAP_TRACE(level, ...) \
1807 if (__improbable((1 << (level)) & pmap_trace_mask)) { \
1808 KDBG_RELEASE(__VA_ARGS__); \
1809 }
1810 #else /* DEVELOPMENT || DEBUG */
1811
1812 #define PMAP_TRACE(level, ...)
1813
1814 #endif /* DEVELOPMENT || DEBUG */
1815
1816
1817 /*
1818 * Internal function prototypes (forward declarations).
1819 */
1820
1821 typedef enum {
1822 PV_ALLOC_SUCCESS,
1823 PV_ALLOC_RETRY,
1824 PV_ALLOC_FAIL
1825 } pv_alloc_return_t;
1826
1827 static pv_alloc_return_t pv_alloc(
1828 pmap_t pmap,
1829 unsigned int pai,
1830 pv_entry_t **pvepp);
1831
1832 static void ptd_bootstrap(
1833 pt_desc_t *ptdp, unsigned int ptd_cnt);
1834
1835 static inline pt_desc_t *ptd_alloc_unlinked(void);
1836
1837 static pt_desc_t *ptd_alloc(pmap_t pmap);
1838
1839 static void ptd_deallocate(pt_desc_t *ptdp);
1840
1841 static void ptd_init(
1842 pt_desc_t *ptdp, pmap_t pmap, vm_map_address_t va, unsigned int ttlevel, pt_entry_t * pte_p);
1843
1844 static void pmap_set_reference(
1845 ppnum_t pn);
1846
1847 pmap_paddr_t pmap_vtophys(
1848 pmap_t pmap, addr64_t va);
1849
1850 void pmap_switch_user_ttb(
1851 pmap_t pmap);
1852
1853 static kern_return_t pmap_expand(
1854 pmap_t, vm_map_address_t, unsigned int options, unsigned int level);
1855
1856 static int pmap_remove_range(
1857 pmap_t, vm_map_address_t, pt_entry_t *, pt_entry_t *);
1858
1859 static int pmap_remove_range_options(
1860 pmap_t, vm_map_address_t, pt_entry_t *, pt_entry_t *, vm_map_address_t *, bool *, int);
1861
1862 static tt_entry_t *pmap_tt1_allocate(
1863 pmap_t, vm_size_t, unsigned int);
1864
1865 #define PMAP_TT_ALLOCATE_NOWAIT 0x1
1866
1867 static void pmap_tt1_deallocate(
1868 pmap_t, tt_entry_t *, vm_size_t, unsigned int);
1869
1870 #define PMAP_TT_DEALLOCATE_NOBLOCK 0x1
1871
1872 static kern_return_t pmap_tt_allocate(
1873 pmap_t, tt_entry_t **, unsigned int, unsigned int);
1874
1875 #define PMAP_TT_ALLOCATE_NOWAIT 0x1
1876
1877 static void pmap_tte_deallocate(
1878 pmap_t, vm_offset_t, vm_offset_t, bool, tt_entry_t *, unsigned int);
1879
1880 const unsigned int arm_hardware_page_size = ARM_PGBYTES;
1881 const unsigned int arm_pt_desc_size = sizeof(pt_desc_t);
1882 const unsigned int arm_pt_root_size = PMAP_ROOT_ALLOC_SIZE;
1883
1884 #define PMAP_TT_DEALLOCATE_NOBLOCK 0x1
1885
1886 #if (__ARM_VMSA__ > 7)
1887
1888 static inline tt_entry_t *pmap_tt1e(
1889 pmap_t, vm_map_address_t);
1890
1891 static inline tt_entry_t *pmap_tt2e(
1892 pmap_t, vm_map_address_t);
1893
1894 static inline pt_entry_t *pmap_tt3e(
1895 pmap_t, vm_map_address_t);
1896
1897 static inline pt_entry_t *pmap_ttne(
1898 pmap_t, unsigned int, vm_map_address_t);
1899
1900 static void pmap_unmap_sharedpage(
1901 pmap_t pmap);
1902
1903 static boolean_t
1904 pmap_is_64bit(pmap_t);
1905
1906
1907 #endif /* (__ARM_VMSA__ > 7) */
1908
1909 static inline tt_entry_t *pmap_tte(
1910 pmap_t, vm_map_address_t);
1911
1912 static inline pt_entry_t *pmap_pte(
1913 pmap_t, vm_map_address_t);
1914
1915 static void pmap_update_cache_attributes_locked(
1916 ppnum_t, unsigned);
1917
1918 static boolean_t arm_clear_fast_fault(
1919 ppnum_t ppnum,
1920 vm_prot_t fault_type);
1921
1922 static pmap_paddr_t pmap_pages_reclaim(
1923 void);
1924
1925 static kern_return_t pmap_pages_alloc_zeroed(
1926 pmap_paddr_t *pa,
1927 unsigned size,
1928 unsigned option);
1929
1930 #define PMAP_PAGES_ALLOCATE_NOWAIT 0x1
1931 #define PMAP_PAGES_RECLAIM_NOWAIT 0x2
1932
1933 static void pmap_pages_free(
1934 pmap_paddr_t pa,
1935 unsigned size);
1936
1937 static void pmap_pin_kernel_pages(vm_offset_t kva, size_t nbytes);
1938
1939 static void pmap_unpin_kernel_pages(vm_offset_t kva, size_t nbytes);
1940
1941 static void pmap_trim_self(pmap_t pmap);
1942 static void pmap_trim_subord(pmap_t subord);
1943
1944
1945 /*
1946 * Temporary prototypes, while we wait for pmap_enter to move to taking an
1947 * address instead of a page number.
1948 */
1949 static kern_return_t
1950 pmap_enter_addr(
1951 pmap_t pmap,
1952 vm_map_address_t v,
1953 pmap_paddr_t pa,
1954 vm_prot_t prot,
1955 vm_prot_t fault_type,
1956 unsigned int flags,
1957 boolean_t wired);
1958
1959 kern_return_t
1960 pmap_enter_options_addr(
1961 pmap_t pmap,
1962 vm_map_address_t v,
1963 pmap_paddr_t pa,
1964 vm_prot_t prot,
1965 vm_prot_t fault_type,
1966 unsigned int flags,
1967 boolean_t wired,
1968 unsigned int options,
1969 __unused void *arg);
1970
1971 #ifdef CONFIG_XNUPOST
1972 kern_return_t pmap_test(void);
1973 #endif /* CONFIG_XNUPOST */
1974
1975 #if XNU_MONITOR
1976 static pmap_paddr_t pmap_alloc_page_for_kern(unsigned int options);
1977 static void pmap_alloc_page_for_ppl(unsigned int options);
1978
1979
1980 /*
1981 * This macro generates prototypes for the *_internal functions, which
1982 * represent the PPL interface. When the PPL is enabled, this will also
1983 * generate prototypes for the PPL entrypoints (*_ppl), as well as generating
1984 * the entrypoints.
1985 */
1986 #define GEN_ASM_NAME(__function_name) _##__function_name##_ppl
1987
1988 #define PMAP_SUPPORT_PROTOTYPES_WITH_ASM_INTERNAL(__return_type, __function_name, __function_args, __function_index, __assembly_function_name) \
1989 static __return_type __function_name##_internal __function_args; \
1990 extern __return_type __function_name##_ppl __function_args; \
1991 __asm__ (".text \n" \
1992 ".align 2 \n" \
1993 ".globl " #__assembly_function_name "\n" \
1994 #__assembly_function_name ":\n" \
1995 "mov x15, " #__function_index "\n" \
1996 "b _aprr_ppl_enter\n")
1997
1998 #define PMAP_SUPPORT_PROTOTYPES_WITH_ASM(__return_type, __function_name, __function_args, __function_index, __assembly_function_name) \
1999 PMAP_SUPPORT_PROTOTYPES_WITH_ASM_INTERNAL(__return_type, __function_name, __function_args, __function_index, __assembly_function_name)
2000
2001 #define PMAP_SUPPORT_PROTOTYPES(__return_type, __function_name, __function_args, __function_index) \
2002 PMAP_SUPPORT_PROTOTYPES_WITH_ASM(__return_type, __function_name, __function_args, __function_index, GEN_ASM_NAME(__function_name))
2003 #else /* XNU_MONITOR */
2004 #define PMAP_SUPPORT_PROTOTYPES(__return_type, __function_name, __function_args, __function_index) \
2005 static __return_type __function_name##_internal __function_args
2006 #endif /* XNU_MONITOR */
2007
2008 PMAP_SUPPORT_PROTOTYPES(
2009 kern_return_t,
2010 arm_fast_fault, (pmap_t pmap,
2011 vm_map_address_t va,
2012 vm_prot_t fault_type,
2013 bool was_af_fault,
2014 bool from_user), ARM_FAST_FAULT_INDEX);
2015
2016
2017 PMAP_SUPPORT_PROTOTYPES(
2018 boolean_t,
2019 arm_force_fast_fault, (ppnum_t ppnum,
2020 vm_prot_t allow_mode,
2021 int options), ARM_FORCE_FAST_FAULT_INDEX);
2022
2023 MARK_AS_PMAP_TEXT static boolean_t
2024 arm_force_fast_fault_with_flush_range(
2025 ppnum_t ppnum,
2026 vm_prot_t allow_mode,
2027 int options,
2028 pmap_tlb_flush_range_t *flush_range);
2029
2030 PMAP_SUPPORT_PROTOTYPES(
2031 kern_return_t,
2032 mapping_free_prime, (void), MAPPING_FREE_PRIME_INDEX);
2033
2034 PMAP_SUPPORT_PROTOTYPES(
2035 boolean_t,
2036 pmap_batch_set_cache_attributes, (ppnum_t pn,
2037 unsigned int cacheattr,
2038 unsigned int page_cnt,
2039 unsigned int page_index,
2040 boolean_t doit,
2041 unsigned int *res), PMAP_BATCH_SET_CACHE_ATTRIBUTES_INDEX);
2042
2043 PMAP_SUPPORT_PROTOTYPES(
2044 void,
2045 pmap_change_wiring, (pmap_t pmap,
2046 vm_map_address_t v,
2047 boolean_t wired), PMAP_CHANGE_WIRING_INDEX);
2048
2049 PMAP_SUPPORT_PROTOTYPES(
2050 pmap_t,
2051 pmap_create_options, (ledger_t ledger,
2052 vm_map_size_t size,
2053 unsigned int flags,
2054 kern_return_t * kr), PMAP_CREATE_INDEX);
2055
2056 PMAP_SUPPORT_PROTOTYPES(
2057 void,
2058 pmap_destroy, (pmap_t pmap), PMAP_DESTROY_INDEX);
2059
2060 PMAP_SUPPORT_PROTOTYPES(
2061 kern_return_t,
2062 pmap_enter_options, (pmap_t pmap,
2063 vm_map_address_t v,
2064 pmap_paddr_t pa,
2065 vm_prot_t prot,
2066 vm_prot_t fault_type,
2067 unsigned int flags,
2068 boolean_t wired,
2069 unsigned int options), PMAP_ENTER_OPTIONS_INDEX);
2070
2071 PMAP_SUPPORT_PROTOTYPES(
2072 pmap_paddr_t,
2073 pmap_find_pa, (pmap_t pmap,
2074 addr64_t va), PMAP_FIND_PA_INDEX);
2075
2076 #if (__ARM_VMSA__ > 7)
2077 PMAP_SUPPORT_PROTOTYPES(
2078 kern_return_t,
2079 pmap_insert_sharedpage, (pmap_t pmap), PMAP_INSERT_SHAREDPAGE_INDEX);
2080 #endif
2081
2082
2083 PMAP_SUPPORT_PROTOTYPES(
2084 boolean_t,
2085 pmap_is_empty, (pmap_t pmap,
2086 vm_map_offset_t va_start,
2087 vm_map_offset_t va_end), PMAP_IS_EMPTY_INDEX);
2088
2089
2090 PMAP_SUPPORT_PROTOTYPES(
2091 unsigned int,
2092 pmap_map_cpu_windows_copy, (ppnum_t pn,
2093 vm_prot_t prot,
2094 unsigned int wimg_bits), PMAP_MAP_CPU_WINDOWS_COPY_INDEX);
2095
2096 PMAP_SUPPORT_PROTOTYPES(
2097 kern_return_t,
2098 pmap_nest, (pmap_t grand,
2099 pmap_t subord,
2100 addr64_t vstart,
2101 uint64_t size), PMAP_NEST_INDEX);
2102
2103 PMAP_SUPPORT_PROTOTYPES(
2104 void,
2105 pmap_page_protect_options, (ppnum_t ppnum,
2106 vm_prot_t prot,
2107 unsigned int options,
2108 void *arg), PMAP_PAGE_PROTECT_OPTIONS_INDEX);
2109
2110 PMAP_SUPPORT_PROTOTYPES(
2111 vm_map_address_t,
2112 pmap_protect_options, (pmap_t pmap,
2113 vm_map_address_t start,
2114 vm_map_address_t end,
2115 vm_prot_t prot,
2116 unsigned int options,
2117 void *args), PMAP_PROTECT_OPTIONS_INDEX);
2118
2119 PMAP_SUPPORT_PROTOTYPES(
2120 kern_return_t,
2121 pmap_query_page_info, (pmap_t pmap,
2122 vm_map_offset_t va,
2123 int *disp_p), PMAP_QUERY_PAGE_INFO_INDEX);
2124
2125 PMAP_SUPPORT_PROTOTYPES(
2126 mach_vm_size_t,
2127 pmap_query_resident, (pmap_t pmap,
2128 vm_map_address_t start,
2129 vm_map_address_t end,
2130 mach_vm_size_t * compressed_bytes_p), PMAP_QUERY_RESIDENT_INDEX);
2131
2132 PMAP_SUPPORT_PROTOTYPES(
2133 void,
2134 pmap_reference, (pmap_t pmap), PMAP_REFERENCE_INDEX);
2135
2136 PMAP_SUPPORT_PROTOTYPES(
2137 vm_map_address_t,
2138 pmap_remove_options, (pmap_t pmap,
2139 vm_map_address_t start,
2140 vm_map_address_t end,
2141 int options), PMAP_REMOVE_OPTIONS_INDEX);
2142
2143 PMAP_SUPPORT_PROTOTYPES(
2144 kern_return_t,
2145 pmap_return, (boolean_t do_panic,
2146 boolean_t do_recurse), PMAP_RETURN_INDEX);
2147
2148 PMAP_SUPPORT_PROTOTYPES(
2149 void,
2150 pmap_set_cache_attributes, (ppnum_t pn,
2151 unsigned int cacheattr), PMAP_SET_CACHE_ATTRIBUTES_INDEX);
2152
2153 PMAP_SUPPORT_PROTOTYPES(
2154 void,
2155 pmap_update_compressor_page, (ppnum_t pn,
2156 unsigned int prev_cacheattr, unsigned int new_cacheattr), PMAP_UPDATE_COMPRESSOR_PAGE_INDEX);
2157
2158 PMAP_SUPPORT_PROTOTYPES(
2159 void,
2160 pmap_set_nested, (pmap_t pmap), PMAP_SET_NESTED_INDEX);
2161
2162 #if MACH_ASSERT || XNU_MONITOR
2163 PMAP_SUPPORT_PROTOTYPES(
2164 void,
2165 pmap_set_process, (pmap_t pmap,
2166 int pid,
2167 char *procname), PMAP_SET_PROCESS_INDEX);
2168 #endif
2169
2170 PMAP_SUPPORT_PROTOTYPES(
2171 void,
2172 pmap_unmap_cpu_windows_copy, (unsigned int index), PMAP_UNMAP_CPU_WINDOWS_COPY_INDEX);
2173
2174 PMAP_SUPPORT_PROTOTYPES(
2175 kern_return_t,
2176 pmap_unnest_options, (pmap_t grand,
2177 addr64_t vaddr,
2178 uint64_t size,
2179 unsigned int option), PMAP_UNNEST_OPTIONS_INDEX);
2180
2181 #if XNU_MONITOR
2182 PMAP_SUPPORT_PROTOTYPES(
2183 void,
2184 pmap_cpu_data_init, (unsigned int cpu_number), PMAP_CPU_DATA_INIT_INDEX);
2185 #endif
2186
2187 PMAP_SUPPORT_PROTOTYPES(
2188 void,
2189 phys_attribute_set, (ppnum_t pn,
2190 unsigned int bits), PHYS_ATTRIBUTE_SET_INDEX);
2191
2192 #if XNU_MONITOR
2193 PMAP_SUPPORT_PROTOTYPES(
2194 void,
2195 pmap_mark_page_as_ppl_page, (pmap_paddr_t pa, bool initially_free), PMAP_MARK_PAGE_AS_PMAP_PAGE_INDEX);
2196 #endif
2197
2198 PMAP_SUPPORT_PROTOTYPES(
2199 void,
2200 phys_attribute_clear, (ppnum_t pn,
2201 unsigned int bits,
2202 int options,
2203 void *arg), PHYS_ATTRIBUTE_CLEAR_INDEX);
2204
2205 #if __ARM_RANGE_TLBI__
2206 PMAP_SUPPORT_PROTOTYPES(
2207 vm_map_address_t,
2208 phys_attribute_clear_range, (pmap_t pmap,
2209 vm_map_address_t start,
2210 vm_map_address_t end,
2211 unsigned int bits,
2212 unsigned int options), PHYS_ATTRIBUTE_CLEAR_RANGE_INDEX);
2213 #endif /* __ARM_RANGE_TLBI__ */
2214
2215
2216 PMAP_SUPPORT_PROTOTYPES(
2217 void,
2218 pmap_switch, (pmap_t pmap), PMAP_SWITCH_INDEX);
2219
2220 PMAP_SUPPORT_PROTOTYPES(
2221 void,
2222 pmap_switch_user_ttb, (pmap_t pmap), PMAP_SWITCH_USER_TTB_INDEX);
2223
2224 PMAP_SUPPORT_PROTOTYPES(
2225 void,
2226 pmap_clear_user_ttb, (void), PMAP_CLEAR_USER_TTB_INDEX);
2227
2228 #if XNU_MONITOR
2229 PMAP_SUPPORT_PROTOTYPES(
2230 uint64_t,
2231 pmap_release_ppl_pages_to_kernel, (void), PMAP_RELEASE_PAGES_TO_KERNEL_INDEX);
2232 #endif
2233
2234 PMAP_SUPPORT_PROTOTYPES(
2235 void,
2236 pmap_set_vm_map_cs_enforced, (pmap_t pmap, bool new_value), PMAP_SET_VM_MAP_CS_ENFORCED_INDEX);
2237
2238 PMAP_SUPPORT_PROTOTYPES(
2239 void,
2240 pmap_set_jit_entitled, (pmap_t pmap), PMAP_SET_JIT_ENTITLED_INDEX);
2241
2242 #if __has_feature(ptrauth_calls) && defined(XNU_TARGET_OS_OSX)
2243 PMAP_SUPPORT_PROTOTYPES(
2244 void,
2245 pmap_disable_user_jop, (pmap_t pmap), PMAP_DISABLE_USER_JOP_INDEX);
2246 #endif /* __has_feature(ptrauth_calls) && defined(XNU_TARGET_OS_OSX) */
2247
2248 PMAP_SUPPORT_PROTOTYPES(
2249 void,
2250 pmap_trim, (pmap_t grand,
2251 pmap_t subord,
2252 addr64_t vstart,
2253 uint64_t size), PMAP_TRIM_INDEX);
2254
2255 #if HAS_APPLE_PAC
2256 PMAP_SUPPORT_PROTOTYPES(
2257 void *,
2258 pmap_sign_user_ptr, (void *value, ptrauth_key key, uint64_t discriminator, uint64_t jop_key), PMAP_SIGN_USER_PTR);
2259 PMAP_SUPPORT_PROTOTYPES(
2260 void *,
2261 pmap_auth_user_ptr, (void *value, ptrauth_key key, uint64_t discriminator, uint64_t jop_key), PMAP_AUTH_USER_PTR);
2262 #endif /* HAS_APPLE_PAC */
2263
2264
2265
2266
2267 PMAP_SUPPORT_PROTOTYPES(
2268 bool,
2269 pmap_is_trust_cache_loaded, (const uuid_t uuid), PMAP_IS_TRUST_CACHE_LOADED_INDEX);
2270
2271 PMAP_SUPPORT_PROTOTYPES(
2272 uint32_t,
2273 pmap_lookup_in_static_trust_cache, (const uint8_t cdhash[CS_CDHASH_LEN]), PMAP_LOOKUP_IN_STATIC_TRUST_CACHE_INDEX);
2274
2275 PMAP_SUPPORT_PROTOTYPES(
2276 bool,
2277 pmap_lookup_in_loaded_trust_caches, (const uint8_t cdhash[CS_CDHASH_LEN]), PMAP_LOOKUP_IN_LOADED_TRUST_CACHES_INDEX);
2278
2279 PMAP_SUPPORT_PROTOTYPES(
2280 void,
2281 pmap_set_compilation_service_cdhash, (const uint8_t cdhash[CS_CDHASH_LEN]),
2282 PMAP_SET_COMPILATION_SERVICE_CDHASH_INDEX);
2283
2284 PMAP_SUPPORT_PROTOTYPES(
2285 bool,
2286 pmap_match_compilation_service_cdhash, (const uint8_t cdhash[CS_CDHASH_LEN]),
2287 PMAP_MATCH_COMPILATION_SERVICE_CDHASH_INDEX);
2288
2289 #if XNU_MONITOR
2290 static void pmap_mark_page_as_ppl_page(pmap_paddr_t pa);
2291 #endif
2292
2293 void pmap_footprint_suspend(vm_map_t map,
2294 boolean_t suspend);
2295 PMAP_SUPPORT_PROTOTYPES(
2296 void,
2297 pmap_footprint_suspend, (vm_map_t map,
2298 boolean_t suspend),
2299 PMAP_FOOTPRINT_SUSPEND_INDEX);
2300
2301 #if XNU_MONITOR
2302 PMAP_SUPPORT_PROTOTYPES(
2303 void,
2304 pmap_ledger_alloc_init, (size_t),
2305 PMAP_LEDGER_ALLOC_INIT_INDEX);
2306
2307 PMAP_SUPPORT_PROTOTYPES(
2308 ledger_t,
2309 pmap_ledger_alloc, (void),
2310 PMAP_LEDGER_ALLOC_INDEX);
2311
2312 PMAP_SUPPORT_PROTOTYPES(
2313 void,
2314 pmap_ledger_free, (ledger_t),
2315 PMAP_LEDGER_FREE_INDEX);
2316 #endif
2317
2318
2319
2320
2321 #if CONFIG_PGTRACE
2322 boolean_t pgtrace_enabled = 0;
2323
2324 typedef struct {
2325 queue_chain_t chain;
2326
2327 /*
2328 * pmap - pmap for below addresses
2329 * ova - original va page address
2330 * cva - clone va addresses for pre, target and post pages
2331 * cva_spte - clone saved ptes
2332 * range - trace range in this map
2333 * cloned - has been cloned or not
2334 */
2335 pmap_t pmap;
2336 vm_map_offset_t ova;
2337 vm_map_offset_t cva[3];
2338 pt_entry_t cva_spte[3];
2339 struct {
2340 pmap_paddr_t start;
2341 pmap_paddr_t end;
2342 } range;
2343 bool cloned;
2344 } pmap_pgtrace_map_t;
2345
2346 static void pmap_pgtrace_init(void);
2347 static bool pmap_pgtrace_enter_clone(pmap_t pmap, vm_map_offset_t va_page, vm_map_offset_t start, vm_map_offset_t end);
2348 static void pmap_pgtrace_remove_clone(pmap_t pmap, pmap_paddr_t pa_page, vm_map_offset_t va_page);
2349 static void pmap_pgtrace_remove_all_clone(pmap_paddr_t pa);
2350 #endif
2351
2352 #if DEVELOPMENT || DEBUG
2353 PMAP_SUPPORT_PROTOTYPES(
2354 kern_return_t,
2355 pmap_test_text_corruption, (pmap_paddr_t),
2356 PMAP_TEST_TEXT_CORRUPTION_INDEX);
2357 #endif /* DEVELOPMENT || DEBUG */
2358
2359 #if (__ARM_VMSA__ > 7)
2360 /*
2361 * The low global vector page is mapped at a fixed alias.
2362 * Since the page size is 16k for H8 and newer we map the globals to a 16k
2363 * aligned address. Readers of the globals (e.g. lldb, panic server) need
2364 * to check both addresses anyway for backward compatibility. So for now
2365 * we leave H6 and H7 where they were.
2366 */
2367 #if (ARM_PGSHIFT == 14)
2368 #define LOWGLOBAL_ALIAS (LOW_GLOBAL_BASE_ADDRESS + 0x4000)
2369 #else
2370 #define LOWGLOBAL_ALIAS (LOW_GLOBAL_BASE_ADDRESS + 0x2000)
2371 #endif
2372
2373 #else
2374 #define LOWGLOBAL_ALIAS (0xFFFF1000)
2375 #endif
2376
2377 long long alloc_tteroot_count __attribute__((aligned(8))) MARK_AS_PMAP_DATA = 0LL;
2378 long long alloc_ttepages_count __attribute__((aligned(8))) MARK_AS_PMAP_DATA = 0LL;
2379 long long alloc_ptepages_count __attribute__((aligned(8))) MARK_AS_PMAP_DATA = 0LL;
2380 long long alloc_pmap_pages_count __attribute__((aligned(8))) = 0LL;
2381
2382 #if XNU_MONITOR
2383
2384 #if __has_feature(ptrauth_calls)
2385 #define __ptrauth_ppl_handler __ptrauth(ptrauth_key_function_pointer, true, 0)
2386 #else
2387 #define __ptrauth_ppl_handler
2388 #endif
2389
2390 /*
2391 * Table of function pointers used for PPL dispatch.
2392 */
2393 const void * __ptrauth_ppl_handler const ppl_handler_table[PMAP_COUNT] = {
2394 [ARM_FAST_FAULT_INDEX] = arm_fast_fault_internal,
2395 [ARM_FORCE_FAST_FAULT_INDEX] = arm_force_fast_fault_internal,
2396 [MAPPING_FREE_PRIME_INDEX] = mapping_free_prime_internal,
2397 [PHYS_ATTRIBUTE_CLEAR_INDEX] = phys_attribute_clear_internal,
2398 [PHYS_ATTRIBUTE_SET_INDEX] = phys_attribute_set_internal,
2399 [PMAP_BATCH_SET_CACHE_ATTRIBUTES_INDEX] = pmap_batch_set_cache_attributes_internal,
2400 [PMAP_CHANGE_WIRING_INDEX] = pmap_change_wiring_internal,
2401 [PMAP_CREATE_INDEX] = pmap_create_options_internal,
2402 [PMAP_DESTROY_INDEX] = pmap_destroy_internal,
2403 [PMAP_ENTER_OPTIONS_INDEX] = pmap_enter_options_internal,
2404 [PMAP_FIND_PA_INDEX] = pmap_find_pa_internal,
2405 [PMAP_INSERT_SHAREDPAGE_INDEX] = pmap_insert_sharedpage_internal,
2406 [PMAP_IS_EMPTY_INDEX] = pmap_is_empty_internal,
2407 [PMAP_MAP_CPU_WINDOWS_COPY_INDEX] = pmap_map_cpu_windows_copy_internal,
2408 [PMAP_MARK_PAGE_AS_PMAP_PAGE_INDEX] = pmap_mark_page_as_ppl_page_internal,
2409 [PMAP_NEST_INDEX] = pmap_nest_internal,
2410 [PMAP_PAGE_PROTECT_OPTIONS_INDEX] = pmap_page_protect_options_internal,
2411 [PMAP_PROTECT_OPTIONS_INDEX] = pmap_protect_options_internal,
2412 [PMAP_QUERY_PAGE_INFO_INDEX] = pmap_query_page_info_internal,
2413 [PMAP_QUERY_RESIDENT_INDEX] = pmap_query_resident_internal,
2414 [PMAP_REFERENCE_INDEX] = pmap_reference_internal,
2415 [PMAP_REMOVE_OPTIONS_INDEX] = pmap_remove_options_internal,
2416 [PMAP_RETURN_INDEX] = pmap_return_internal,
2417 [PMAP_SET_CACHE_ATTRIBUTES_INDEX] = pmap_set_cache_attributes_internal,
2418 [PMAP_UPDATE_COMPRESSOR_PAGE_INDEX] = pmap_update_compressor_page_internal,
2419 [PMAP_SET_NESTED_INDEX] = pmap_set_nested_internal,
2420 [PMAP_SET_PROCESS_INDEX] = pmap_set_process_internal,
2421 [PMAP_SWITCH_INDEX] = pmap_switch_internal,
2422 [PMAP_SWITCH_USER_TTB_INDEX] = pmap_switch_user_ttb_internal,
2423 [PMAP_CLEAR_USER_TTB_INDEX] = pmap_clear_user_ttb_internal,
2424 [PMAP_UNMAP_CPU_WINDOWS_COPY_INDEX] = pmap_unmap_cpu_windows_copy_internal,
2425 [PMAP_UNNEST_OPTIONS_INDEX] = pmap_unnest_options_internal,
2426 [PMAP_FOOTPRINT_SUSPEND_INDEX] = pmap_footprint_suspend_internal,
2427 [PMAP_CPU_DATA_INIT_INDEX] = pmap_cpu_data_init_internal,
2428 [PMAP_RELEASE_PAGES_TO_KERNEL_INDEX] = pmap_release_ppl_pages_to_kernel_internal,
2429 [PMAP_SET_VM_MAP_CS_ENFORCED_INDEX] = pmap_set_vm_map_cs_enforced_internal,
2430 [PMAP_SET_JIT_ENTITLED_INDEX] = pmap_set_jit_entitled_internal,
2431 [PMAP_IS_TRUST_CACHE_LOADED_INDEX] = pmap_is_trust_cache_loaded_internal,
2432 [PMAP_LOOKUP_IN_STATIC_TRUST_CACHE_INDEX] = pmap_lookup_in_static_trust_cache_internal,
2433 [PMAP_LOOKUP_IN_LOADED_TRUST_CACHES_INDEX] = pmap_lookup_in_loaded_trust_caches_internal,
2434 [PMAP_SET_COMPILATION_SERVICE_CDHASH_INDEX] = pmap_set_compilation_service_cdhash_internal,
2435 [PMAP_MATCH_COMPILATION_SERVICE_CDHASH_INDEX] = pmap_match_compilation_service_cdhash_internal,
2436 [PMAP_TRIM_INDEX] = pmap_trim_internal,
2437 [PMAP_LEDGER_ALLOC_INIT_INDEX] = pmap_ledger_alloc_init_internal,
2438 [PMAP_LEDGER_ALLOC_INDEX] = pmap_ledger_alloc_internal,
2439 [PMAP_LEDGER_FREE_INDEX] = pmap_ledger_free_internal,
2440 #if HAS_APPLE_PAC
2441 [PMAP_SIGN_USER_PTR] = pmap_sign_user_ptr_internal,
2442 [PMAP_AUTH_USER_PTR] = pmap_auth_user_ptr_internal,
2443 #endif /* HAS_APPLE_PAC */
2444 #if __ARM_RANGE_TLBI__
2445 [PHYS_ATTRIBUTE_CLEAR_RANGE_INDEX] = phys_attribute_clear_range_internal,
2446 #endif /* __ARM_RANGE_TLBI__ */
2447 #if __has_feature(ptrauth_calls) && defined(XNU_TARGET_OS_OSX)
2448 [PMAP_DISABLE_USER_JOP_INDEX] = pmap_disable_user_jop_internal,
2449 #endif /* __has_feature(ptrauth_calls) && defined(XNU_TARGET_OS_OSX) */
2450
2451 #if DEVELOPMENT || DEBUG
2452 [PMAP_TEST_TEXT_CORRUPTION_INDEX] = pmap_test_text_corruption_internal,
2453 #endif /* DEVELOPMENT || DEBUG */
2454 };
2455 #endif
2456
2457
2458 /*
2459 * Allocates and initializes a per-CPU data structure for the pmap.
2460 */
2461 MARK_AS_PMAP_TEXT static void
2462 pmap_cpu_data_init_internal(unsigned int cpu_number)
2463 {
2464 pmap_cpu_data_t * pmap_cpu_data = pmap_get_cpu_data();
2465
2466 #if XNU_MONITOR
2467 /* Verify cacheline-aligned */
2468 assert(((vm_offset_t)pmap_cpu_data & ((1 << MAX_L2_CLINE) - 1)) == 0);
2469 if (pmap_cpu_data->cpu_number != PMAP_INVALID_CPU_NUM) {
2470 panic("%s: pmap_cpu_data->cpu_number=%u, "
2471 "cpu_number=%u",
2472 __FUNCTION__, pmap_cpu_data->cpu_number,
2473 cpu_number);
2474 }
2475 #endif
2476 pmap_cpu_data->cpu_number = cpu_number;
2477 }
2478
2479 void
2480 pmap_cpu_data_init(void)
2481 {
2482 #if XNU_MONITOR
2483 pmap_cpu_data_init_ppl(cpu_number());
2484 #else
2485 pmap_cpu_data_init_internal(cpu_number());
2486 #endif
2487 }
2488
2489 static void
2490 pmap_cpu_data_array_init(void)
2491 {
2492 #if XNU_MONITOR
2493 unsigned int i = 0;
2494 pmap_paddr_t ppl_cpu_save_area_cur = 0;
2495 pt_entry_t template, *pte_p;
2496 vm_offset_t stack_va = (vm_offset_t)pmap_stacks_start + ARM_PGBYTES;
2497 assert((pmap_stacks_start != NULL) && (pmap_stacks_end != NULL));
2498 pmap_stacks_start_pa = avail_start;
2499
2500 for (i = 0; i < MAX_CPUS; i++) {
2501 for (vm_offset_t cur_va = stack_va; cur_va < (stack_va + PPL_STACK_SIZE); cur_va += ARM_PGBYTES) {
2502 assert(cur_va < (vm_offset_t)pmap_stacks_end);
2503 pte_p = pmap_pte(kernel_pmap, cur_va);
2504 assert(*pte_p == ARM_PTE_EMPTY);
2505 template = pa_to_pte(avail_start) | ARM_PTE_AF | ARM_PTE_SH(SH_OUTER_MEMORY) | ARM_PTE_TYPE |
2506 ARM_PTE_ATTRINDX(CACHE_ATTRINDX_DEFAULT) | xprr_perm_to_pte(XPRR_PPL_RW_PERM);
2507 #if __ARM_KERNEL_PROTECT__
2508 template |= ARM_PTE_NG;
2509 #endif /* __ARM_KERNEL_PROTECT__ */
2510 WRITE_PTE(pte_p, template);
2511 __builtin_arm_isb(ISB_SY);
2512 avail_start += ARM_PGBYTES;
2513 }
2514 #if KASAN
2515 kasan_map_shadow(stack_va, PPL_STACK_SIZE, false);
2516 #endif
2517 pmap_cpu_data_array[i].cpu_data.cpu_number = PMAP_INVALID_CPU_NUM;
2518 pmap_cpu_data_array[i].cpu_data.ppl_state = PPL_STATE_KERNEL;
2519 pmap_cpu_data_array[i].cpu_data.ppl_stack = (void*)(stack_va + PPL_STACK_SIZE);
2520 stack_va += (PPL_STACK_SIZE + ARM_PGBYTES);
2521 }
2522 sync_tlb_flush();
2523 pmap_stacks_end_pa = avail_start;
2524
2525 ppl_cpu_save_area_start = avail_start;
2526 ppl_cpu_save_area_end = ppl_cpu_save_area_start;
2527 ppl_cpu_save_area_cur = ppl_cpu_save_area_start;
2528
2529 for (i = 0; i < MAX_CPUS; i++) {
2530 while ((ppl_cpu_save_area_end - ppl_cpu_save_area_cur) < sizeof(arm_context_t)) {
2531 avail_start += PAGE_SIZE;
2532 ppl_cpu_save_area_end = avail_start;
2533 }
2534
2535 pmap_cpu_data_array[i].cpu_data.save_area = (arm_context_t *)phystokv(ppl_cpu_save_area_cur);
2536 ppl_cpu_save_area_cur += sizeof(arm_context_t);
2537 }
2538 #endif
2539
2540 pmap_cpu_data_init();
2541 }
2542
2543 pmap_cpu_data_t *
2544 pmap_get_cpu_data(void)
2545 {
2546 pmap_cpu_data_t * pmap_cpu_data = NULL;
2547
2548 #if XNU_MONITOR
2549 extern pmap_cpu_data_t* ml_get_ppl_cpu_data(void);
2550 pmap_cpu_data = ml_get_ppl_cpu_data();
2551 #else
2552 pmap_cpu_data = &getCpuDatap()->cpu_pmap_cpu_data;
2553 #endif
2554
2555 return pmap_cpu_data;
2556 }
2557
2558 #if __arm64__
2559 /*
2560 * Disable interrupts and return previous state.
2561 *
2562 * The PPL has its own interrupt state facility separately from
2563 * ml_set_interrupts_enable(), since that function is not part of the
2564 * PPL, and so doing things like manipulating untrusted data and
2565 * taking ASTs.
2566 *
2567 * @return The previous interrupt state, to be restored with
2568 * pmap_interrupts_restore().
2569 */
2570 static uint64_t __attribute__((warn_unused_result)) __used
2571 pmap_interrupts_disable(void)
2572 {
2573 uint64_t state = __builtin_arm_rsr64("DAIF");
2574
2575 if ((state & DAIF_STANDARD_DISABLE) != DAIF_STANDARD_DISABLE) {
2576 __builtin_arm_wsr64("DAIFSet", DAIFSC_STANDARD_DISABLE);
2577 }
2578
2579 return state;
2580 }
2581
2582 /*
2583 * Restore previous interrupt state.
2584 *
2585 * @param state The previous interrupt state to restore.
2586 */
2587 static void __used
2588 pmap_interrupts_restore(uint64_t state)
2589 {
2590 // no unknown bits?
2591 assert((state & ~DAIF_ALL) == 0);
2592
2593 if (state != DAIF_STANDARD_DISABLE) {
2594 __builtin_arm_wsr64("DAIF", state);
2595 }
2596 }
2597
2598 /*
2599 * Query interrupt state.
2600 *
2601 * ml_get_interrupts_enabled() is safe enough at the time of writing
2602 * this comment, but because it is not considered part of the PPL, so
2603 * could change without notice, and because it presently only checks
2604 * DAIF_IRQ, we have our own version.
2605 *
2606 * @return true if interrupts are enable (not fully disabled).
2607 */
2608
2609 static bool __attribute__((warn_unused_result)) __used
2610 pmap_interrupts_enabled(void)
2611 {
2612 return (__builtin_arm_rsr64("DAIF") & DAIF_STANDARD_DISABLE) != DAIF_STANDARD_DISABLE;
2613 }
2614 #endif /* __arm64__ */
2615
2616 #if XNU_MONITOR
2617 /*
2618 * pmap_set_range_xprr_perm takes a range (specified using start and end) that
2619 * falls within the physical aperture. All mappings within this range have
2620 * their protections changed from those specified by the expected_perm to those
2621 * specified by the new_perm.
2622 */
2623 static void
2624 pmap_set_range_xprr_perm(vm_address_t start,
2625 vm_address_t end,
2626 unsigned int expected_perm,
2627 unsigned int new_perm)
2628 {
2629 #if (__ARM_VMSA__ == 7)
2630 #error This function is not supported on older ARM hardware
2631 #else
2632 pmap_t pmap = NULL;
2633
2634 vm_address_t va = 0;
2635 vm_address_t tte_start = 0;
2636 vm_address_t tte_end = 0;
2637
2638 tt_entry_t *tte_p = NULL;
2639 pt_entry_t *pte_p = NULL;
2640 pt_entry_t *cpte_p = NULL;
2641 pt_entry_t *bpte_p = NULL;
2642 pt_entry_t *epte_p = NULL;
2643
2644 tt_entry_t tte = 0;
2645 pt_entry_t cpte = 0;
2646 pt_entry_t template = 0;
2647
2648 pmap = kernel_pmap;
2649
2650 va = start;
2651
2652 /*
2653 * Validate our arguments; any invalid argument will be grounds for a
2654 * panic.
2655 */
2656 if ((start | end) % ARM_PGBYTES) {
2657 panic("%s: start or end not page aligned, "
2658 "start=%p, end=%p, new_perm=%u, expected_perm=%u",
2659 __FUNCTION__,
2660 (void *)start, (void *)end, new_perm, expected_perm);
2661 }
2662
2663 if (start > end) {
2664 panic("%s: start > end, "
2665 "start=%p, end=%p, new_perm=%u, expected_perm=%u",
2666 __FUNCTION__,
2667 (void *)start, (void *)end, new_perm, expected_perm);
2668 }
2669
2670 bool in_physmap = (start >= physmap_base) && (end < physmap_end);
2671 bool in_static = (start >= gVirtBase) && (end < static_memory_end);
2672
2673 if (!(in_physmap || in_static)) {
2674 panic("%s: address not in static region or physical aperture, "
2675 "start=%p, end=%p, new_perm=%u, expected_perm=%u",
2676 __FUNCTION__,
2677 (void *)start, (void *)end, new_perm, expected_perm);
2678 }
2679
2680 if ((new_perm > XPRR_MAX_PERM) || (expected_perm > XPRR_MAX_PERM)) {
2681 panic("%s: invalid XPRR index, "
2682 "start=%p, end=%p, new_perm=%u, expected_perm=%u",
2683 __FUNCTION__,
2684 (void *)start, (void *)end, new_perm, expected_perm);
2685 }
2686
2687 /*
2688 * Walk over the PTEs for the given range, and set the protections on
2689 * those PTEs.
2690 */
2691 while (va < end) {
2692 tte_start = va;
2693 tte_end = ((va + pt_attr_twig_size(native_pt_attr)) & ~pt_attr_twig_offmask(native_pt_attr));
2694
2695 if (tte_end > end) {
2696 tte_end = end;
2697 }
2698
2699 tte_p = pmap_tte(pmap, va);
2700
2701 /*
2702 * The physical aperture should not have holes.
2703 * The physical aperture should be contiguous.
2704 * Do not make eye contact with the physical aperture.
2705 */
2706 if (tte_p == NULL) {
2707 panic("%s: physical aperture tte is NULL, "
2708 "start=%p, end=%p, new_perm=%u, expected_perm=%u",
2709 __FUNCTION__,
2710 (void *)start, (void *)end, new_perm, expected_perm);
2711 }
2712
2713 tte = *tte_p;
2714
2715 if ((tte & ARM_TTE_TYPE_MASK) == ARM_TTE_TYPE_TABLE) {
2716 /*
2717 * Walk over the given L3 page table page and update the
2718 * PTEs.
2719 */
2720 pte_p = (pt_entry_t *)ttetokv(tte);
2721 bpte_p = &pte_p[pte_index(pmap, native_pt_attr, va)];
2722 epte_p = bpte_p + ((tte_end - va) >> pt_attr_leaf_shift(native_pt_attr));
2723
2724 for (cpte_p = bpte_p; cpte_p < epte_p;
2725 cpte_p += PAGE_SIZE / ARM_PGBYTES, va += PAGE_SIZE) {
2726 int pai = (int)pa_index(pte_to_pa(*cpte_p));
2727 LOCK_PVH(pai);
2728 cpte = *cpte_p;
2729
2730 /*
2731 * Every PTE involved should be valid, should
2732 * not have the hint bit set, and should have
2733 * Every valid PTE involved should
2734 * not have the hint bit set and should have
2735 * the expected APRR index.
2736 */
2737 if ((cpte & ARM_PTE_TYPE_MASK) ==
2738 ARM_PTE_TYPE_FAULT) {
2739 panic("%s: physical aperture PTE is invalid, va=%p, "
2740 "start=%p, end=%p, new_perm=%u, expected_perm=%u",
2741 __FUNCTION__,
2742 (void *)va,
2743 (void *)start, (void *)end, new_perm, expected_perm);
2744 UNLOCK_PVH(pai);
2745 continue;
2746 }
2747
2748 if (cpte & ARM_PTE_HINT_MASK) {
2749 panic("%s: physical aperture PTE has hint bit set, va=%p, cpte=0x%llx, "
2750 "start=%p, end=%p, new_perm=%u, expected_perm=%u",
2751 __FUNCTION__,
2752 (void *)va, cpte,
2753 (void *)start, (void *)end, new_perm, expected_perm);
2754 }
2755
2756 if (pte_to_xprr_perm(cpte) != expected_perm) {
2757 panic("%s: perm=%llu does not match expected_perm, cpte=0x%llx, "
2758 "start=%p, end=%p, new_perm=%u, expected_perm=%u",
2759 __FUNCTION__,
2760 pte_to_xprr_perm(cpte), cpte,
2761 (void *)start, (void *)end, new_perm, expected_perm);
2762 }
2763
2764 template = cpte;
2765 template &= ~ARM_PTE_XPRR_MASK;
2766 template |= xprr_perm_to_pte(new_perm);
2767
2768 WRITE_PTE_STRONG(cpte_p, template);
2769 UNLOCK_PVH(pai);
2770 }
2771 } else {
2772 panic("%s: tte=0x%llx is not a table type entry, "
2773 "start=%p, end=%p, new_perm=%u, expected_perm=%u",
2774 __FUNCTION__,
2775 tte,
2776 (void *)start, (void *)end, new_perm, expected_perm);
2777 }
2778
2779 va = tte_end;
2780 }
2781
2782 PMAP_UPDATE_TLBS(pmap, start, end, false);
2783 #endif /* (__ARM_VMSA__ == 7) */
2784 }
2785
2786 /*
2787 * A convenience function for setting protections on a single page.
2788 */
2789 static inline void
2790 pmap_set_xprr_perm(vm_address_t page_kva,
2791 unsigned int expected_perm,
2792 unsigned int new_perm)
2793 {
2794 pmap_set_range_xprr_perm(page_kva, page_kva + PAGE_SIZE, expected_perm, new_perm);
2795 }
2796 #endif /* XNU_MONITOR */
2797
2798
2799 /*
2800 * pmap_pages_reclaim(): return a page by freeing an active pagetable page.
2801 * To be eligible, a pt page must be assigned to a non-kernel pmap.
2802 * It must not have any wired PTEs and must contain at least one valid PTE.
2803 * If no eligible page is found in the pt page list, return 0.
2804 */
2805 pmap_paddr_t
2806 pmap_pages_reclaim(
2807 void)
2808 {
2809 boolean_t found_page;
2810 unsigned i;
2811 pt_desc_t *ptdp;
2812
2813 /*
2814 * In a loop, check for a page in the reclaimed pt page list.
2815 * if one is present, unlink that page and return the physical page address.
2816 * Otherwise, scan the pt page list for an eligible pt page to reclaim.
2817 * If found, invoke pmap_remove_range() on its pmap and address range then
2818 * deallocates that pt page. This will end up adding the pt page to the
2819 * reclaimed pt page list.
2820 */
2821
2822 pmap_simple_lock(&pmap_pages_lock);
2823 pmap_pages_request_count++;
2824 pmap_pages_request_acum++;
2825
2826 while (1) {
2827 if (pmap_pages_reclaim_list != (page_free_entry_t *)NULL) {
2828 page_free_entry_t *page_entry;
2829
2830 page_entry = pmap_pages_reclaim_list;
2831 pmap_pages_reclaim_list = pmap_pages_reclaim_list->next;
2832 pmap_simple_unlock(&pmap_pages_lock);
2833
2834 return (pmap_paddr_t)ml_static_vtop((vm_offset_t)page_entry);
2835 }
2836
2837 pmap_simple_unlock(&pmap_pages_lock);
2838
2839 pmap_simple_lock(&pt_pages_lock);
2840 ptdp = (pt_desc_t *)queue_first(&pt_page_list);
2841 found_page = FALSE;
2842
2843 while (!queue_end(&pt_page_list, (queue_entry_t)ptdp)) {
2844 if ((ptdp->pmap->nested == FALSE)
2845 && (pmap_try_lock(ptdp->pmap))) {
2846 assert(ptdp->pmap != kernel_pmap);
2847 unsigned refcnt_acc = 0;
2848 unsigned wiredcnt_acc = 0;
2849
2850 for (i = 0; i < PT_INDEX_MAX; i++) {
2851 if (ptdp->ptd_info[i].refcnt == PT_DESC_REFCOUNT) {
2852 /* Do not attempt to free a page that contains an L2 table */
2853 refcnt_acc = 0;
2854 break;
2855 }
2856 refcnt_acc += ptdp->ptd_info[i].refcnt;
2857 wiredcnt_acc += ptdp->ptd_info[i].wiredcnt;
2858 }
2859 if ((wiredcnt_acc == 0) && (refcnt_acc != 0)) {
2860 found_page = TRUE;
2861 /* Leave ptdp->pmap locked here. We're about to reclaim
2862 * a tt page from it, so we don't want anyone else messing
2863 * with it while we do that. */
2864 break;
2865 }
2866 pmap_unlock(ptdp->pmap);
2867 }
2868 ptdp = (pt_desc_t *)queue_next((queue_t)ptdp);
2869 }
2870 if (!found_page) {
2871 pmap_simple_unlock(&pt_pages_lock);
2872 return (pmap_paddr_t)0;
2873 } else {
2874 bool need_strong_sync = false;
2875 vm_map_address_t va;
2876 pmap_t pmap;
2877 pt_entry_t *bpte, *epte;
2878 pt_entry_t *pte_p;
2879 tt_entry_t *tte_p;
2880
2881 pmap_simple_unlock(&pt_pages_lock);
2882 pmap = ptdp->pmap;
2883 pmap_assert_locked_w(pmap); // pmap write lock should be held from loop above
2884
2885 const pt_attr_t * const pt_attr = pmap_get_pt_attr(pmap);
2886
2887 for (i = 0; i < (PAGE_SIZE / pt_attr_page_size(pt_attr)); i++) {
2888 va = ptdp->ptd_info[i].va;
2889
2890 /* If the VA is bogus, this may represent an unallocated region
2891 * or one which is in transition (already being freed or expanded).
2892 * Don't try to remove mappings here. */
2893 if (va == (vm_offset_t)-1) {
2894 continue;
2895 }
2896
2897 tte_p = pmap_tte(pmap, va);
2898 if ((tte_p != (tt_entry_t *) NULL)
2899 && ((*tte_p & ARM_TTE_TYPE_MASK) == ARM_TTE_TYPE_TABLE)) {
2900 pte_p = (pt_entry_t *) ttetokv(*tte_p);
2901 bpte = &pte_p[pte_index(pmap, pt_attr, va)];
2902 epte = bpte + pt_attr_page_size(pt_attr) / sizeof(pt_entry_t);
2903 /*
2904 * Use PMAP_OPTIONS_REMOVE to clear any
2905 * "compressed" markers and update the
2906 * "compressed" counter in pmap->stats.
2907 * This means that we lose accounting for
2908 * any compressed pages in this range
2909 * but the alternative is to not be able
2910 * to account for their future decompression,
2911 * which could cause the counter to drift
2912 * more and more.
2913 */
2914 pmap_remove_range_options(
2915 pmap, va, bpte, epte, NULL,
2916 &need_strong_sync, PMAP_OPTIONS_REMOVE);
2917 if (ptd_get_info(ptdp, pte_p)->refcnt != 0) {
2918 panic("%s: ptdp %p, count %d", __FUNCTION__, ptdp, ptd_get_info(ptdp, pte_p)->refcnt);
2919 }
2920
2921 pmap_tte_deallocate(pmap, va, va + (size_t)pt_attr_leaf_table_size(pt_attr), need_strong_sync,
2922 tte_p, pt_attr_twig_level(pt_attr));
2923 }
2924 }
2925 // Undo the lock we grabbed when we found ptdp above
2926 pmap_unlock(pmap);
2927 }
2928 pmap_simple_lock(&pmap_pages_lock);
2929 }
2930 }
2931
2932 #if XNU_MONITOR
2933 /*
2934 * Return a PPL page to the free list.
2935 */
2936 MARK_AS_PMAP_TEXT static void
2937 pmap_give_free_ppl_page(pmap_paddr_t paddr)
2938 {
2939 assert((paddr & ARM_PGMASK) == 0);
2940 void ** new_head = (void **)phystokv(paddr);
2941 pmap_simple_lock(&pmap_ppl_free_page_lock);
2942
2943 void * cur_head = pmap_ppl_free_page_list;
2944 *new_head = cur_head;
2945 pmap_ppl_free_page_list = new_head;
2946 pmap_ppl_free_page_count++;
2947
2948 pmap_simple_unlock(&pmap_ppl_free_page_lock);
2949 }
2950
2951 /*
2952 * Get a PPL page from the free list.
2953 */
2954 MARK_AS_PMAP_TEXT static pmap_paddr_t
2955 pmap_get_free_ppl_page(void)
2956 {
2957 pmap_paddr_t result = 0;
2958
2959 pmap_simple_lock(&pmap_ppl_free_page_lock);
2960
2961 if (pmap_ppl_free_page_list != NULL) {
2962 void ** new_head = NULL;
2963 new_head = *((void**)pmap_ppl_free_page_list);
2964 result = kvtophys((vm_offset_t)pmap_ppl_free_page_list);
2965 pmap_ppl_free_page_list = new_head;
2966 pmap_ppl_free_page_count--;
2967 } else {
2968 result = 0L;
2969 }
2970
2971 pmap_simple_unlock(&pmap_ppl_free_page_lock);
2972 assert((result & ARM_PGMASK) == 0);
2973
2974 return result;
2975 }
2976
2977 /*
2978 * pmap_mark_page_as_ppl_page claims a page on behalf of the PPL by marking it
2979 * as PPL-owned and only allowing the PPL to write to it.
2980 */
2981 MARK_AS_PMAP_TEXT static void
2982 pmap_mark_page_as_ppl_page_internal(pmap_paddr_t pa, bool initially_free)
2983 {
2984 vm_offset_t kva = 0;
2985 unsigned int pai = 0;
2986 pp_attr_t attr;
2987
2988 /*
2989 * Mark each page that we allocate as belonging to the monitor, as we
2990 * intend to use it for monitor-y stuff (page tables, table pages, that
2991 * sort of thing).
2992 */
2993 if (!pa_valid(pa)) {
2994 panic("%s: bad address, "
2995 "pa=%p",
2996 __func__,
2997 (void *)pa);
2998 }
2999
3000 pai = (unsigned int)pa_index(pa);
3001 LOCK_PVH(pai);
3002
3003 /* A page that the PPL already owns can't be given to the PPL. */
3004 if (pa_test_monitor(pa)) {
3005 panic("%s: page already belongs to PPL, "
3006 "pa=0x%llx",
3007 __FUNCTION__,
3008 pa);
3009 }
3010 /* The page cannot be mapped outside of the physical aperture. */
3011 if (!pmap_verify_free((ppnum_t)atop(pa))) {
3012 panic("%s: page is not free, "
3013 "pa=0x%llx",
3014 __FUNCTION__,
3015 pa);
3016 }
3017
3018 do {
3019 attr = pp_attr_table[pai];
3020 if (attr & PP_ATTR_NO_MONITOR) {
3021 panic("%s: page excluded from PPL, "
3022 "pa=0x%llx",
3023 __FUNCTION__,
3024 pa);
3025 }
3026 } while (!OSCompareAndSwap16(attr, attr | PP_ATTR_MONITOR, &pp_attr_table[pai]));
3027
3028 UNLOCK_PVH(pai);
3029
3030 kva = phystokv(pa);
3031 pmap_set_xprr_perm(kva, XPRR_KERN_RW_PERM, XPRR_PPL_RW_PERM);
3032
3033 if (initially_free) {
3034 pmap_give_free_ppl_page(pa);
3035 }
3036 }
3037
3038 static void
3039 pmap_mark_page_as_ppl_page(pmap_paddr_t pa)
3040 {
3041 pmap_mark_page_as_ppl_page_ppl(pa, true);
3042 }
3043
3044 MARK_AS_PMAP_TEXT static void
3045 pmap_mark_page_as_kernel_page(pmap_paddr_t pa)
3046 {
3047 vm_offset_t kva = 0;
3048 unsigned int pai = 0;
3049
3050 pai = (unsigned int)pa_index(pa);
3051 LOCK_PVH(pai);
3052
3053 if (!pa_test_monitor(pa)) {
3054 panic("%s: page is not a PPL page, "
3055 "pa=%p",
3056 __FUNCTION__,
3057 (void *)pa);
3058 }
3059
3060 pa_clear_monitor(pa);
3061 UNLOCK_PVH(pai);
3062
3063 kva = phystokv(pa);
3064 pmap_set_xprr_perm(kva, XPRR_PPL_RW_PERM, XPRR_KERN_RW_PERM);
3065 }
3066
3067 MARK_AS_PMAP_TEXT static pmap_paddr_t
3068 pmap_release_ppl_pages_to_kernel_internal(void)
3069 {
3070 pmap_paddr_t pa = 0;
3071
3072 if (pmap_ppl_free_page_count <= PMAP_MIN_FREE_PPL_PAGES) {
3073 goto done;
3074 }
3075
3076 pa = pmap_get_free_ppl_page();
3077
3078 if (!pa) {
3079 goto done;
3080 }
3081
3082 pmap_mark_page_as_kernel_page(pa);
3083
3084 done:
3085 return pa;
3086 }
3087
3088 static uint64_t
3089 pmap_release_ppl_pages_to_kernel(void)
3090 {
3091 pmap_paddr_t pa = 0;
3092 vm_page_t m = VM_PAGE_NULL;
3093 vm_page_t local_freeq = VM_PAGE_NULL;
3094 uint64_t pmap_ppl_pages_returned_to_kernel_count = 0;
3095
3096 while (pmap_ppl_free_page_count > PMAP_MIN_FREE_PPL_PAGES) {
3097 pa = pmap_release_ppl_pages_to_kernel_ppl();
3098
3099 if (!pa) {
3100 break;
3101 }
3102
3103 /* If we retrieved a page, add it to the free queue. */
3104 vm_object_lock(pmap_object);
3105 m = vm_page_lookup(pmap_object, (pa - gPhysBase));
3106 assert(m != VM_PAGE_NULL);
3107 assert(VM_PAGE_WIRED(m));
3108
3109 m->vmp_busy = TRUE;
3110 m->vmp_snext = local_freeq;
3111 local_freeq = m;
3112 pmap_ppl_pages_returned_to_kernel_count++;
3113 pmap_ppl_pages_returned_to_kernel_count_total++;
3114
3115 vm_object_unlock(pmap_object);
3116 }
3117
3118 if (local_freeq) {
3119 /* We need to hold the object lock for freeing pages. */
3120 vm_object_lock(pmap_object);
3121 vm_page_free_list(local_freeq, TRUE);
3122 vm_object_unlock(pmap_object);
3123 }
3124
3125 return pmap_ppl_pages_returned_to_kernel_count;
3126 }
3127 #endif
3128
3129 static inline void
3130 pmap_enqueue_pages(vm_page_t m)
3131 {
3132 vm_page_t m_prev;
3133 vm_object_lock(pmap_object);
3134 while (m != VM_PAGE_NULL) {
3135 vm_page_insert_wired(m, pmap_object, (vm_object_offset_t) ((ptoa(VM_PAGE_GET_PHYS_PAGE(m))) - gPhysBase), VM_KERN_MEMORY_PTE);
3136 m_prev = m;
3137 m = NEXT_PAGE(m_prev);
3138 *(NEXT_PAGE_PTR(m_prev)) = VM_PAGE_NULL;
3139 }
3140 vm_object_unlock(pmap_object);
3141 }
3142
3143 static kern_return_t
3144 pmap_pages_alloc_zeroed(
3145 pmap_paddr_t *pa,
3146 unsigned size,
3147 unsigned option)
3148 {
3149 #if XNU_MONITOR
3150 ASSERT_NOT_HIBERNATING();
3151
3152 if (size != PAGE_SIZE) {
3153 panic("%s: size != PAGE_SIZE, "
3154 "pa=%p, size=%u, option=%u",
3155 __FUNCTION__,
3156 pa, size, option);
3157 }
3158
3159
3160 assert(option & PMAP_PAGES_ALLOCATE_NOWAIT);
3161
3162 *pa = pmap_get_free_ppl_page();
3163
3164 if ((*pa == 0) && (option & PMAP_PAGES_RECLAIM_NOWAIT)) {
3165 *pa = pmap_pages_reclaim();
3166 }
3167
3168 if (*pa == 0) {
3169 return KERN_RESOURCE_SHORTAGE;
3170 } else {
3171 bzero((void*)phystokv(*pa), size);
3172 return KERN_SUCCESS;
3173 }
3174 #else
3175 vm_page_t m = VM_PAGE_NULL;
3176
3177 thread_t self = current_thread();
3178 // We qualify to allocate reserved memory
3179 uint16_t thread_options = self->options;
3180 self->options |= TH_OPT_VMPRIV;
3181 if (__probable(size == PAGE_SIZE)) {
3182 while ((m = vm_page_grab()) == VM_PAGE_NULL) {
3183 if (option & PMAP_PAGES_ALLOCATE_NOWAIT) {
3184 break;
3185 }
3186
3187 VM_PAGE_WAIT();
3188 }
3189 if (m != VM_PAGE_NULL) {
3190 vm_page_lock_queues();
3191 vm_page_wire(m, VM_KERN_MEMORY_PTE, TRUE);
3192 vm_page_unlock_queues();
3193 }
3194 } else if (size == 2 * PAGE_SIZE) {
3195 while (cpm_allocate(size, &m, 0, 1, TRUE, 0) != KERN_SUCCESS) {
3196 if (option & PMAP_PAGES_ALLOCATE_NOWAIT) {
3197 break;
3198 }
3199
3200 VM_PAGE_WAIT();
3201 }
3202 } else {
3203 panic("%s: invalid size %u", __func__, size);
3204 }
3205
3206 self->options = thread_options;
3207
3208 if ((m == VM_PAGE_NULL) && (option & PMAP_PAGES_RECLAIM_NOWAIT)) {
3209 assert(size == PAGE_SIZE);
3210 *pa = pmap_pages_reclaim();
3211 if (*pa != 0) {
3212 bzero((void*)phystokv(*pa), size);
3213 return KERN_SUCCESS;
3214 }
3215 }
3216
3217 if (m == VM_PAGE_NULL) {
3218 return KERN_RESOURCE_SHORTAGE;
3219 }
3220
3221 *pa = (pmap_paddr_t)ptoa(VM_PAGE_GET_PHYS_PAGE(m));
3222
3223 pmap_enqueue_pages(m);
3224
3225 OSAddAtomic(size >> PAGE_SHIFT, &inuse_pmap_pages_count);
3226 OSAddAtomic64(size >> PAGE_SHIFT, &alloc_pmap_pages_count);
3227
3228 bzero((void*)phystokv(*pa), size);
3229 return KERN_SUCCESS;
3230 #endif
3231 }
3232
3233 #if XNU_MONITOR
3234 static pmap_paddr_t
3235 pmap_alloc_page_for_kern(unsigned int options)
3236 {
3237 pmap_paddr_t paddr;
3238 vm_page_t m;
3239
3240 while ((m = vm_page_grab()) == VM_PAGE_NULL) {
3241 if (options & PMAP_PAGES_ALLOCATE_NOWAIT) {
3242 return 0;
3243 }
3244 VM_PAGE_WAIT();
3245 }
3246
3247 vm_page_lock_queues();
3248 vm_page_wire(m, VM_KERN_MEMORY_PTE, TRUE);
3249 vm_page_unlock_queues();
3250
3251 paddr = (pmap_paddr_t)ptoa(VM_PAGE_GET_PHYS_PAGE(m));
3252
3253 if (__improbable(paddr == 0)) {
3254 panic("%s: paddr is 0", __func__);
3255 }
3256
3257 pmap_enqueue_pages(m);
3258
3259 OSAddAtomic(1, &inuse_pmap_pages_count);
3260 OSAddAtomic64(1, &alloc_pmap_pages_count);
3261
3262 return paddr;
3263 }
3264
3265 static void
3266 pmap_alloc_page_for_ppl(unsigned int options)
3267 {
3268 thread_t self = current_thread();
3269 // We qualify to allocate reserved memory
3270 uint16_t thread_options = self->options;
3271 self->options |= TH_OPT_VMPRIV;
3272 pmap_paddr_t paddr = pmap_alloc_page_for_kern(options);
3273 self->options = thread_options;
3274 if (paddr != 0) {
3275 pmap_mark_page_as_ppl_page(paddr);
3276 }
3277 }
3278
3279 static pmap_t
3280 pmap_alloc_pmap(void)
3281 {
3282 pmap_t pmap = PMAP_NULL;
3283
3284 pmap_simple_lock(&pmap_free_list_lock);
3285
3286 if (pmap_free_list != PMAP_NULL) {
3287 pmap = pmap_free_list;
3288 pmap_free_list = *((pmap_t *)pmap);
3289
3290 if (!PMAP_PTR_IS_VALID(pmap)) {
3291 panic("%s: allocated pmap is not valid, pmap=%p",
3292 __FUNCTION__, pmap);
3293 }
3294 }
3295
3296 pmap_simple_unlock(&pmap_free_list_lock);
3297
3298 return pmap;
3299 }
3300
3301 static void
3302 pmap_free_pmap(pmap_t pmap)
3303 {
3304 if (!PMAP_PTR_IS_VALID(pmap)) {
3305 panic("%s: pmap is not valid, "
3306 "pmap=%p",
3307 __FUNCTION__,
3308 pmap);
3309 }
3310
3311 pmap_simple_lock(&pmap_free_list_lock);
3312 *((pmap_t *)pmap) = pmap_free_list;
3313 pmap_free_list = pmap;
3314 pmap_simple_unlock(&pmap_free_list_lock);
3315 }
3316
3317 static void
3318 pmap_bootstrap_pmap_free_list(void)
3319 {
3320 pmap_t cur_head = PMAP_NULL;
3321 unsigned long i = 0;
3322
3323 simple_lock_init(&pmap_free_list_lock, 0);
3324
3325 for (i = 0; i < pmap_array_count; i++) {
3326 *((pmap_t *)(&pmap_array[i])) = cur_head;
3327 cur_head = &pmap_array[i];
3328 }
3329
3330 pmap_free_list = cur_head;
3331 }
3332 #endif
3333
3334 static void
3335 pmap_pages_free(
3336 pmap_paddr_t pa,
3337 unsigned size)
3338 {
3339 if (__improbable(pmap_pages_request_count != 0)) {
3340 page_free_entry_t *page_entry;
3341
3342 pmap_simple_lock(&pmap_pages_lock);
3343
3344 if (pmap_pages_request_count != 0) {
3345 pmap_pages_request_count--;
3346 page_entry = (page_free_entry_t *)phystokv(pa);
3347 page_entry->next = pmap_pages_reclaim_list;
3348 pmap_pages_reclaim_list = page_entry;
3349 pmap_simple_unlock(&pmap_pages_lock);
3350 return;
3351 }
3352
3353 pmap_simple_unlock(&pmap_pages_lock);
3354 }
3355
3356 #if XNU_MONITOR
3357 (void)size;
3358
3359 pmap_give_free_ppl_page(pa);
3360 #else
3361 vm_page_t m;
3362 pmap_paddr_t pa_max;
3363
3364 OSAddAtomic(-(size >> PAGE_SHIFT), &inuse_pmap_pages_count);
3365
3366 for (pa_max = pa + size; pa < pa_max; pa = pa + PAGE_SIZE) {
3367 vm_object_lock(pmap_object);
3368 m = vm_page_lookup(pmap_object, (pa - gPhysBase));
3369 assert(m != VM_PAGE_NULL);
3370 assert(VM_PAGE_WIRED(m));
3371 vm_page_lock_queues();
3372 vm_page_free(m);
3373 vm_page_unlock_queues();
3374 vm_object_unlock(pmap_object);
3375 }
3376 #endif
3377 }
3378
3379 static inline void
3380 PMAP_ZINFO_PALLOC(
3381 pmap_t pmap, int bytes)
3382 {
3383 pmap_ledger_credit(pmap, task_ledgers.tkm_private, bytes);
3384 }
3385
3386 static inline void
3387 PMAP_ZINFO_PFREE(
3388 pmap_t pmap,
3389 int bytes)
3390 {
3391 pmap_ledger_debit(pmap, task_ledgers.tkm_private, bytes);
3392 }
3393
3394 static inline void
3395 pmap_tt_ledger_credit(
3396 pmap_t pmap,
3397 vm_size_t size)
3398 {
3399 if (pmap != kernel_pmap) {
3400 pmap_ledger_credit(pmap, task_ledgers.phys_footprint, size);
3401 pmap_ledger_credit(pmap, task_ledgers.page_table, size);
3402 }
3403 }
3404
3405 static inline void
3406 pmap_tt_ledger_debit(
3407 pmap_t pmap,
3408 vm_size_t size)
3409 {
3410 if (pmap != kernel_pmap) {
3411 pmap_ledger_debit(pmap, task_ledgers.phys_footprint, size);
3412 pmap_ledger_debit(pmap, task_ledgers.page_table, size);
3413 }
3414 }
3415
3416 static inline void
3417 pmap_update_plru(uint16_t asid_index)
3418 {
3419 if (__probable(pmap_asid_plru)) {
3420 unsigned plru_index = asid_index >> 6;
3421 if (__improbable(os_atomic_andnot(&asid_plru_bitmap[plru_index], (1ULL << (asid_index & 63)), relaxed) == 0)) {
3422 asid_plru_generation[plru_index] = ++asid_plru_gencount;
3423 asid_plru_bitmap[plru_index] = ((plru_index == (MAX_HW_ASIDS >> 6)) ? ~(1ULL << 63) : UINT64_MAX);
3424 }
3425 }
3426 }
3427
3428 static bool
3429 alloc_asid(pmap_t pmap)
3430 {
3431 int vasid = -1;
3432 uint16_t hw_asid;
3433
3434 pmap_simple_lock(&asid_lock);
3435
3436 if (__probable(pmap_asid_plru)) {
3437 unsigned plru_index = 0;
3438 uint64_t lowest_gen = asid_plru_generation[0];
3439 uint64_t lowest_gen_bitmap = asid_plru_bitmap[0];
3440 for (unsigned i = 1; i < (sizeof(asid_plru_generation) / sizeof(asid_plru_generation[0])); ++i) {
3441 if (asid_plru_generation[i] < lowest_gen) {
3442 plru_index = i;
3443 lowest_gen = asid_plru_generation[i];
3444 lowest_gen_bitmap = asid_plru_bitmap[i];
3445 }
3446 }
3447
3448 for (; plru_index < BITMAP_LEN(pmap_max_asids); plru_index += ((MAX_HW_ASIDS + 1) >> 6)) {
3449 uint64_t temp_plru = lowest_gen_bitmap & asid_bitmap[plru_index];
3450 if (temp_plru) {
3451 vasid = (plru_index << 6) + lsb_first(temp_plru);
3452 #if DEVELOPMENT || DEBUG
3453 ++pmap_asid_hits;
3454 #endif
3455 break;
3456 }
3457 }
3458 }
3459 if (__improbable(vasid < 0)) {
3460 // bitmap_first() returns highest-order bits first, but a 0-based scheme works
3461 // slightly better with the collision detection scheme used by pmap_switch_internal().
3462 vasid = bitmap_lsb_first(&asid_bitmap[0], pmap_max_asids);
3463 #if DEVELOPMENT || DEBUG
3464 ++pmap_asid_misses;
3465 #endif
3466 }
3467 if (__improbable(vasid < 0)) {
3468 pmap_simple_unlock(&asid_lock);
3469 return false;
3470 }
3471 assert((uint32_t)vasid < pmap_max_asids);
3472 assert(bitmap_test(&asid_bitmap[0], (unsigned int)vasid));
3473 bitmap_clear(&asid_bitmap[0], (unsigned int)vasid);
3474 pmap_simple_unlock(&asid_lock);
3475 hw_asid = vasid % asid_chunk_size;
3476 pmap->sw_asid = (uint8_t)(vasid / asid_chunk_size);
3477 if (__improbable(hw_asid == MAX_HW_ASIDS)) {
3478 /* If we took a PLRU "miss" and ended up with a hardware ASID we can't actually support,
3479 * reassign to a reserved VASID. */
3480 assert(pmap->sw_asid < UINT8_MAX);
3481 pmap->sw_asid = UINT8_MAX;
3482 /* Allocate from the high end of the hardware ASID range to reduce the likelihood of
3483 * aliasing with vital system processes, which are likely to have lower ASIDs. */
3484 hw_asid = MAX_HW_ASIDS - 1 - (uint16_t)(vasid / asid_chunk_size);
3485 assert(hw_asid < MAX_HW_ASIDS);
3486 }
3487 pmap_update_plru(hw_asid);
3488 hw_asid += 1; // Account for ASID 0, which is reserved for the kernel
3489 #if __ARM_KERNEL_PROTECT__
3490 hw_asid <<= 1; // We're really handing out 2 hardware ASIDs, one for EL0 and one for EL1 access
3491 #endif
3492 pmap->hw_asid = hw_asid;
3493 return true;
3494 }
3495
3496 static void
3497 free_asid(pmap_t pmap)
3498 {
3499 unsigned int vasid;
3500 uint16_t hw_asid = os_atomic_xchg(&pmap->hw_asid, 0, relaxed);
3501 if (__improbable(hw_asid == 0)) {
3502 return;
3503 }
3504
3505 #if __ARM_KERNEL_PROTECT__
3506 hw_asid >>= 1;
3507 #endif
3508 hw_asid -= 1;
3509
3510 if (__improbable(pmap->sw_asid == UINT8_MAX)) {
3511 vasid = ((MAX_HW_ASIDS - 1 - hw_asid) * asid_chunk_size) + MAX_HW_ASIDS;
3512 } else {
3513 vasid = ((unsigned int)pmap->sw_asid * asid_chunk_size) + hw_asid;
3514 }
3515
3516 if (__probable(pmap_asid_plru)) {
3517 os_atomic_or(&asid_plru_bitmap[hw_asid >> 6], (1ULL << (hw_asid & 63)), relaxed);
3518 }
3519 pmap_simple_lock(&asid_lock);
3520 assert(!bitmap_test(&asid_bitmap[0], vasid));
3521 bitmap_set(&asid_bitmap[0], vasid);
3522 pmap_simple_unlock(&asid_lock);
3523 }
3524
3525
3526 #if XNU_MONITOR
3527
3528 /*
3529 * Increase the padding for PPL devices to accommodate increased
3530 * mapping pressure from IOMMUs. This isn't strictly necessary, but
3531 * will reduce the need to retry mappings due to PV allocation failure.
3532 */
3533
3534 #define PV_LOW_WATER_MARK_DEFAULT (0x400)
3535 #define PV_KERN_LOW_WATER_MARK_DEFAULT (0x400)
3536 #define PV_ALLOC_CHUNK_INITIAL (0x400)
3537 #define PV_KERN_ALLOC_CHUNK_INITIAL (0x400)
3538 #define PV_CPU_MIN (0x80)
3539 #define PV_CPU_MAX (0x400)
3540
3541 #else
3542
3543 #define PV_LOW_WATER_MARK_DEFAULT (0x200)
3544 #define PV_KERN_LOW_WATER_MARK_DEFAULT (0x200)
3545 #define PV_ALLOC_CHUNK_INITIAL (0x200)
3546 #define PV_KERN_ALLOC_CHUNK_INITIAL (0x200)
3547 #define PV_CPU_MIN (0x40)
3548 #define PV_CPU_MAX (0x200)
3549
3550 #endif
3551
3552 #define PV_ALLOC_INITIAL_TARGET (PV_ALLOC_CHUNK_INITIAL * 5)
3553 #define PV_KERN_ALLOC_INITIAL_TARGET (PV_KERN_ALLOC_CHUNK_INITIAL)
3554
3555 uint32_t pv_page_count MARK_AS_PMAP_DATA = 0;
3556
3557 uint32_t pv_kern_low_water_mark MARK_AS_PMAP_DATA = PV_KERN_LOW_WATER_MARK_DEFAULT;
3558 uint32_t pv_alloc_initial_target MARK_AS_PMAP_DATA = PV_ALLOC_INITIAL_TARGET;
3559 uint32_t pv_kern_alloc_initial_target MARK_AS_PMAP_DATA = PV_KERN_ALLOC_INITIAL_TARGET;
3560
3561 unsigned pmap_reserve_replenish_stat MARK_AS_PMAP_DATA;
3562 unsigned pmap_kern_reserve_alloc_stat MARK_AS_PMAP_DATA;
3563
3564 static inline void pv_list_alloc(pv_entry_t **pv_ep);
3565 static inline void pv_list_kern_alloc(pv_entry_t **pv_e);
3566 static inline void pv_list_free(pv_entry_t *pv_eh, pv_entry_t *pv_et, int pv_cnt, uint32_t kern_target);
3567
3568 static pv_alloc_return_t
3569 pv_alloc(
3570 pmap_t pmap,
3571 unsigned int pai,
3572 pv_entry_t **pvepp)
3573 {
3574 if (pmap != NULL) {
3575 pmap_assert_locked_w(pmap);
3576 }
3577 ASSERT_PVH_LOCKED(pai);
3578 pv_list_alloc(pvepp);
3579 if (PV_ENTRY_NULL != *pvepp) {
3580 return PV_ALLOC_SUCCESS;
3581 }
3582 #if XNU_MONITOR
3583 unsigned alloc_flags = PMAP_PAGES_ALLOCATE_NOWAIT;
3584 #else
3585 unsigned alloc_flags = 0;
3586 #endif
3587 if ((pmap == NULL) || (kernel_pmap == pmap)) {
3588 pv_list_kern_alloc(pvepp);
3589
3590 if (PV_ENTRY_NULL != *pvepp) {
3591 return PV_ALLOC_SUCCESS;
3592 }
3593 alloc_flags = PMAP_PAGES_ALLOCATE_NOWAIT | PMAP_PAGES_RECLAIM_NOWAIT;
3594 }
3595 pv_entry_t *pv_e;
3596 pv_entry_t *pv_eh;
3597 pv_entry_t *pv_et;
3598 int pv_cnt;
3599 pmap_paddr_t pa;
3600 kern_return_t ret;
3601 pv_alloc_return_t pv_status = PV_ALLOC_RETRY;
3602
3603 UNLOCK_PVH(pai);
3604 if (pmap != NULL) {
3605 pmap_unlock(pmap);
3606 }
3607
3608 ret = pmap_pages_alloc_zeroed(&pa, PAGE_SIZE, alloc_flags);
3609
3610 if (ret != KERN_SUCCESS) {
3611 pv_status = PV_ALLOC_FAIL;
3612 goto pv_alloc_cleanup;
3613 }
3614
3615 pv_page_count++;
3616
3617 pv_e = (pv_entry_t *)phystokv(pa);
3618 *pvepp = pv_e;
3619 pv_cnt = (PAGE_SIZE / sizeof(pv_entry_t)) - 1;
3620 pv_eh = pv_e + 1;
3621 pv_et = &pv_e[pv_cnt];
3622
3623 pv_list_free(pv_eh, pv_et, pv_cnt, pv_kern_low_water_mark);
3624 pv_alloc_cleanup:
3625 if (pmap != NULL) {
3626 pmap_lock(pmap);
3627 }
3628 LOCK_PVH(pai);
3629 return pv_status;
3630 }
3631
3632 static inline void
3633 pv_free_entry(
3634 pv_entry_t *pvep)
3635 {
3636 pv_list_free(pvep, pvep, 1, pv_kern_low_water_mark);
3637 }
3638
3639 static inline void
3640 pv_free_list_alloc(pv_free_list_t *free_list, pv_entry_t **pv_ep)
3641 {
3642 assert(((free_list->list != NULL) && (free_list->count > 0)) ||
3643 ((free_list->list == NULL) && (free_list->count == 0)));
3644
3645 if ((*pv_ep = free_list->list) != NULL) {
3646 pv_entry_t *pv_e = *pv_ep;
3647 if ((pv_e->pve_next == NULL) && (free_list->count > 1)) {
3648 free_list->list = pv_e + 1;
3649 } else {
3650 free_list->list = pv_e->pve_next;
3651 pv_e->pve_next = PV_ENTRY_NULL;
3652 }
3653 free_list->count--;
3654 }
3655 }
3656
3657 static inline void
3658 pv_list_alloc(pv_entry_t **pv_ep)
3659 {
3660 assert(*pv_ep == PV_ENTRY_NULL);
3661 #if !XNU_MONITOR
3662 mp_disable_preemption();
3663 #endif
3664 pmap_cpu_data_t *pmap_cpu_data = pmap_get_cpu_data();
3665 pv_free_list_alloc(&pmap_cpu_data->pv_free, pv_ep);
3666 #if !XNU_MONITOR
3667 mp_enable_preemption();
3668 #endif
3669 if (*pv_ep != PV_ENTRY_NULL) {
3670 return;
3671 }
3672 #if !XNU_MONITOR
3673 if (pv_kern_free.count < pv_kern_low_water_mark) {
3674 /*
3675 * If the kernel reserved pool is low, let non-kernel mappings wait for a page
3676 * from the VM.
3677 */
3678 return;
3679 }
3680 #endif
3681 pmap_simple_lock(&pv_free_list_lock);
3682 pv_free_list_alloc(&pv_free, pv_ep);
3683 pmap_simple_unlock(&pv_free_list_lock);
3684 }
3685
3686 static inline void
3687 pv_list_free(pv_entry_t *pv_eh, pv_entry_t *pv_et, int pv_cnt, uint32_t kern_target)
3688 {
3689 if (pv_cnt == 1) {
3690 bool limit_exceeded = false;
3691 #if !XNU_MONITOR
3692 mp_disable_preemption();
3693 #endif
3694 pmap_cpu_data_t *pmap_cpu_data = pmap_get_cpu_data();
3695 pv_et->pve_next = pmap_cpu_data->pv_free.list;
3696 pmap_cpu_data->pv_free.list = pv_eh;
3697 if (pmap_cpu_data->pv_free.count == PV_CPU_MIN) {
3698 pmap_cpu_data->pv_free_tail = pv_et;
3699 }
3700 pmap_cpu_data->pv_free.count += pv_cnt;
3701 if (__improbable(pmap_cpu_data->pv_free.count > PV_CPU_MAX)) {
3702 pv_et = pmap_cpu_data->pv_free_tail;
3703 pv_cnt = pmap_cpu_data->pv_free.count - PV_CPU_MIN;
3704 pmap_cpu_data->pv_free.list = pmap_cpu_data->pv_free_tail->pve_next;
3705 pmap_cpu_data->pv_free.count = PV_CPU_MIN;
3706 limit_exceeded = true;
3707 }
3708 #if !XNU_MONITOR
3709 mp_enable_preemption();
3710 #endif
3711 if (__probable(!limit_exceeded)) {
3712 return;
3713 }
3714 }
3715 if (__improbable(pv_kern_free.count < kern_target)) {
3716 pmap_simple_lock(&pv_kern_free_list_lock);
3717 pv_et->pve_next = pv_kern_free.list;
3718 pv_kern_free.list = pv_eh;
3719 pv_kern_free.count += pv_cnt;
3720 pmap_simple_unlock(&pv_kern_free_list_lock);
3721 } else {
3722 pmap_simple_lock(&pv_free_list_lock);
3723 pv_et->pve_next = pv_free.list;
3724 pv_free.list = pv_eh;
3725 pv_free.count += pv_cnt;
3726 pmap_simple_unlock(&pv_free_list_lock);
3727 }
3728 }
3729
3730 static inline void
3731 pv_list_kern_alloc(pv_entry_t **pv_ep)
3732 {
3733 assert(*pv_ep == PV_ENTRY_NULL);
3734 pmap_simple_lock(&pv_kern_free_list_lock);
3735 if (pv_kern_free.count > 0) {
3736 pmap_kern_reserve_alloc_stat++;
3737 }
3738 pv_free_list_alloc(&pv_kern_free, pv_ep);
3739 pmap_simple_unlock(&pv_kern_free_list_lock);
3740 }
3741
3742 void
3743 mapping_adjust(void)
3744 {
3745 // Not implemented for arm/arm64
3746 }
3747
3748 /*
3749 * Fills the kernel and general PV free lists back up to their low watermarks.
3750 */
3751 MARK_AS_PMAP_TEXT static kern_return_t
3752 mapping_replenish_internal(uint32_t kern_target_count, uint32_t user_target_count)
3753 {
3754 pv_entry_t *pv_eh;
3755 pv_entry_t *pv_et;
3756 int pv_cnt;
3757 pmap_paddr_t pa;
3758 kern_return_t ret = KERN_SUCCESS;
3759
3760 while ((pv_free.count < user_target_count) || (pv_kern_free.count < kern_target_count)) {
3761 #if XNU_MONITOR
3762 if ((ret = pmap_pages_alloc_zeroed(&pa, PAGE_SIZE, PMAP_PAGES_ALLOCATE_NOWAIT)) != KERN_SUCCESS) {
3763 return ret;
3764 }
3765 #else
3766 ret = pmap_pages_alloc_zeroed(&pa, PAGE_SIZE, 0);
3767 assert(ret == KERN_SUCCESS);
3768 #endif
3769
3770 pv_page_count++;
3771
3772 pv_eh = (pv_entry_t *)phystokv(pa);
3773 pv_cnt = PAGE_SIZE / sizeof(pv_entry_t);
3774 pv_et = &pv_eh[pv_cnt - 1];
3775
3776 pmap_reserve_replenish_stat += pv_cnt;
3777 pv_list_free(pv_eh, pv_et, pv_cnt, kern_target_count);
3778 }
3779
3780 return ret;
3781 }
3782
3783 /*
3784 * Creates a target number of free pv_entry_t objects for the kernel free list
3785 * and the general free list.
3786 */
3787 MARK_AS_PMAP_TEXT static kern_return_t
3788 mapping_free_prime_internal(void)
3789 {
3790 return mapping_replenish_internal(pv_kern_alloc_initial_target, pv_alloc_initial_target);
3791 }
3792
3793 void
3794 mapping_free_prime(void)
3795 {
3796 kern_return_t kr = KERN_FAILURE;
3797
3798 #if XNU_MONITOR
3799 unsigned int i = 0;
3800
3801 /*
3802 * Allocate the needed PPL pages up front, to minimize the chance that
3803 * we will need to call into the PPL multiple times.
3804 */
3805 for (i = 0; i < pv_alloc_initial_target; i += (PAGE_SIZE / sizeof(pv_entry_t))) {
3806 pmap_alloc_page_for_ppl(0);
3807 }
3808
3809 for (i = 0; i < pv_kern_alloc_initial_target; i += (PAGE_SIZE / sizeof(pv_entry_t))) {
3810 pmap_alloc_page_for_ppl(0);
3811 }
3812
3813 while ((kr = mapping_free_prime_ppl()) == KERN_RESOURCE_SHORTAGE) {
3814 pmap_alloc_page_for_ppl(0);
3815 }
3816 #else
3817 kr = mapping_free_prime_internal();
3818 #endif
3819
3820 if (kr != KERN_SUCCESS) {
3821 panic("%s: failed, kr=%d",
3822 __FUNCTION__, kr);
3823 }
3824 }
3825
3826 static void
3827 ptd_bootstrap(
3828 pt_desc_t *ptdp,
3829 unsigned int ptd_cnt)
3830 {
3831 simple_lock_init(&ptd_free_list_lock, 0);
3832 // Region represented by ptdp should be cleared by pmap_bootstrap()
3833 *((void**)(&ptdp[ptd_cnt - 1])) = (void*)ptd_free_list;
3834 ptd_free_list = ptdp;
3835 ptd_free_count += ptd_cnt;
3836 ptd_preboot = FALSE;
3837 }
3838
3839 static pt_desc_t*
3840 ptd_alloc_unlinked(void)
3841 {
3842 pt_desc_t *ptdp;
3843 unsigned i;
3844
3845 if (!ptd_preboot) {
3846 pmap_simple_lock(&ptd_free_list_lock);
3847 }
3848
3849 assert(((ptd_free_list != NULL) && (ptd_free_count > 0)) ||
3850 ((ptd_free_list == NULL) && (ptd_free_count == 0)));
3851
3852 if (ptd_free_count == 0) {
3853 unsigned int ptd_cnt = PAGE_SIZE / sizeof(pt_desc_t);
3854
3855 if (ptd_preboot) {
3856 ptdp = (pt_desc_t *)avail_start;
3857 avail_start += PAGE_SIZE;
3858 bzero(ptdp, PAGE_SIZE);
3859 } else {
3860 pmap_paddr_t pa;
3861
3862 pmap_simple_unlock(&ptd_free_list_lock);
3863
3864 if (pmap_pages_alloc_zeroed(&pa, PAGE_SIZE, PMAP_PAGES_ALLOCATE_NOWAIT) != KERN_SUCCESS) {
3865 return NULL;
3866 }
3867 ptdp = (pt_desc_t *)phystokv(pa);
3868
3869 pmap_simple_lock(&ptd_free_list_lock);
3870 }
3871
3872 *((void**)(&ptdp[ptd_cnt - 1])) = (void*)ptd_free_list;
3873 ptd_free_list = ptdp;
3874 ptd_free_count += ptd_cnt;
3875 }
3876
3877 if ((ptdp = ptd_free_list) != PTD_ENTRY_NULL) {
3878 ptd_free_list = (pt_desc_t *)(*(void **)ptdp);
3879 if ((ptd_free_list == NULL) && (ptd_free_count > 1)) {
3880 ptd_free_list = ptdp + 1;
3881 }
3882 ptd_free_count--;
3883 } else {
3884 panic("%s: out of ptd entry",
3885 __FUNCTION__);
3886 }
3887
3888 if (!ptd_preboot) {
3889 pmap_simple_unlock(&ptd_free_list_lock);
3890 }
3891
3892 ptdp->pt_page.next = NULL;
3893 ptdp->pt_page.prev = NULL;
3894 ptdp->pmap = NULL;
3895
3896 for (i = 0; i < PT_INDEX_MAX; i++) {
3897 ptdp->ptd_info[i].va = (vm_offset_t)-1;
3898 ptdp->ptd_info[i].refcnt = 0;
3899 ptdp->ptd_info[i].wiredcnt = 0;
3900 }
3901
3902 return ptdp;
3903 }
3904
3905 static inline pt_desc_t*
3906 ptd_alloc(pmap_t pmap)
3907 {
3908 pt_desc_t *ptdp = ptd_alloc_unlinked();
3909
3910 if (ptdp == NULL) {
3911 return NULL;
3912 }
3913
3914 ptdp->pmap = pmap;
3915 if (pmap != kernel_pmap) {
3916 /* We should never try to reclaim kernel pagetable pages in
3917 * pmap_pages_reclaim(), so don't enter them into the list. */
3918 pmap_simple_lock(&pt_pages_lock);
3919 queue_enter(&pt_page_list, ptdp, pt_desc_t *, pt_page);
3920 pmap_simple_unlock(&pt_pages_lock);
3921 }
3922
3923 pmap_tt_ledger_credit(pmap, sizeof(*ptdp));
3924 return ptdp;
3925 }
3926
3927 static void
3928 ptd_deallocate(pt_desc_t *ptdp)
3929 {
3930 pmap_t pmap = ptdp->pmap;
3931
3932 if (ptd_preboot) {
3933 panic("%s: early boot, "
3934 "ptdp=%p",
3935 __FUNCTION__,
3936 ptdp);
3937 }
3938
3939 if (ptdp->pt_page.next != NULL) {
3940 pmap_simple_lock(&pt_pages_lock);
3941 queue_remove(&pt_page_list, ptdp, pt_desc_t *, pt_page);
3942 pmap_simple_unlock(&pt_pages_lock);
3943 }
3944 pmap_simple_lock(&ptd_free_list_lock);
3945 (*(void **)ptdp) = (void *)ptd_free_list;
3946 ptd_free_list = (pt_desc_t *)ptdp;
3947 ptd_free_count++;
3948 pmap_simple_unlock(&ptd_free_list_lock);
3949 if (pmap != NULL) {
3950 pmap_tt_ledger_debit(pmap, sizeof(*ptdp));
3951 }
3952 }
3953
3954 static void
3955 ptd_init(
3956 pt_desc_t *ptdp,
3957 pmap_t pmap,
3958 vm_map_address_t va,
3959 unsigned int level,
3960 pt_entry_t *pte_p)
3961 {
3962 const pt_attr_t * const pt_attr = pmap_get_pt_attr(pmap);
3963
3964 if (ptdp->pmap != pmap) {
3965 panic("%s: pmap mismatch, "
3966 "ptdp=%p, pmap=%p, va=%p, level=%u, pte_p=%p",
3967 __FUNCTION__,
3968 ptdp, pmap, (void*)va, level, pte_p);
3969 }
3970
3971 assert(level > pt_attr_root_level(pt_attr));
3972 ptd_info_t *ptd_info = ptd_get_info(ptdp, pte_p);
3973 ptd_info->va = (vm_offset_t) va & ~pt_attr_ln_pt_offmask(pt_attr, level - 1);
3974
3975 if (level < pt_attr_leaf_level(pt_attr)) {
3976 ptd_info->refcnt = PT_DESC_REFCOUNT;
3977 }
3978 }
3979
3980
3981 boolean_t
3982 pmap_valid_address(
3983 pmap_paddr_t addr)
3984 {
3985 return pa_valid(addr);
3986 }
3987
3988 #if (__ARM_VMSA__ == 7)
3989
3990 /*
3991 * Given an offset and a map, compute the address of the
3992 * corresponding translation table entry.
3993 */
3994 static inline tt_entry_t *
3995 pmap_tte(pmap_t pmap,
3996 vm_map_address_t addr)
3997 {
3998 __unused const pt_attr_t * const pt_attr = pmap_get_pt_attr(pmap);
3999
4000 if (!(tte_index(pmap, pt_attr, addr) < pmap->tte_index_max)) {
4001 return (tt_entry_t *)NULL;
4002 }
4003 return &pmap->tte[tte_index(pmap, pt_attr, addr)];
4004 }
4005
4006
4007 /*
4008 * Given an offset and a map, compute the address of the
4009 * pte. If the address is invalid with respect to the map
4010 * then PT_ENTRY_NULL is returned (and the map may need to grow).
4011 *
4012 * This is only used internally.
4013 */
4014 static inline pt_entry_t *
4015 pmap_pte(
4016 pmap_t pmap,
4017 vm_map_address_t addr)
4018 {
4019 pt_entry_t *ptp;
4020 tt_entry_t *ttp;
4021 tt_entry_t tte;
4022
4023 ttp = pmap_tte(pmap, addr);
4024 if (ttp == (tt_entry_t *)NULL) {
4025 return PT_ENTRY_NULL;
4026 }
4027 tte = *ttp;
4028 #if MACH_ASSERT
4029 if ((tte & ARM_TTE_TYPE_MASK) == ARM_TTE_TYPE_BLOCK) {
4030 panic("%s: Attempt to demote L1 block, tte=0x%lx, "
4031 "pmap=%p, addr=%p",
4032 __FUNCTION__, (unsigned long)tte,
4033 pmap, (void*)addr);
4034 }
4035 #endif
4036 if ((tte & ARM_TTE_TYPE_MASK) != ARM_TTE_TYPE_TABLE) {
4037 return PT_ENTRY_NULL;
4038 }
4039 ptp = (pt_entry_t *) ttetokv(tte) + pte_index(pmap, pt_addr, addr);
4040 return ptp;
4041 }
4042
4043 __unused static inline tt_entry_t *
4044 pmap_ttne(pmap_t pmap,
4045 unsigned int target_level,
4046 vm_map_address_t addr)
4047 {
4048 tt_entry_t * ret_ttep = NULL;
4049
4050 switch (target_level) {
4051 case 1:
4052 ret_ttep = pmap_tte(pmap, addr);
4053 break;
4054 case 2:
4055 ret_ttep = (tt_entry_t *)pmap_pte(pmap, addr);
4056 break;
4057 default:
4058 panic("%s: bad level, "
4059 "pmap=%p, target_level=%u, addr=%p",
4060 __FUNCTION__,
4061 pmap, target_level, (void *)addr);
4062 }
4063
4064 return ret_ttep;
4065 }
4066
4067 #else
4068
4069 static inline tt_entry_t *
4070 pmap_ttne(pmap_t pmap,
4071 unsigned int target_level,
4072 vm_map_address_t addr)
4073 {
4074 tt_entry_t * ttp = NULL;
4075 tt_entry_t * ttep = NULL;
4076 tt_entry_t tte = ARM_TTE_EMPTY;
4077 unsigned int cur_level;
4078
4079 const pt_attr_t * const pt_attr = pmap_get_pt_attr(pmap);
4080
4081 ttp = pmap->tte;
4082
4083 assert(target_level <= pt_attr->pta_max_level);
4084
4085 for (cur_level = pt_attr->pta_root_level; cur_level <= target_level; cur_level++) {
4086 ttep = &ttp[ttn_index(pmap, pt_attr, addr, cur_level)];
4087
4088 if (cur_level == target_level) {
4089 break;
4090 }
4091
4092 tte = *ttep;
4093
4094 #if MACH_ASSERT
4095 if ((tte & (ARM_TTE_TYPE_MASK | ARM_TTE_VALID)) == (ARM_TTE_TYPE_BLOCK | ARM_TTE_VALID)) {
4096 panic("%s: Attempt to demote L%u block, tte=0x%llx, "
4097 "pmap=%p, target_level=%u, addr=%p",
4098 __FUNCTION__, cur_level, tte,
4099 pmap, target_level, (void*)addr);
4100 }
4101 #endif
4102 if ((tte & (ARM_TTE_TYPE_MASK | ARM_TTE_VALID)) != (ARM_TTE_TYPE_TABLE | ARM_TTE_VALID)) {
4103 return TT_ENTRY_NULL;
4104 }
4105
4106 ttp = (tt_entry_t*)phystokv(tte & ARM_TTE_TABLE_MASK);
4107 }
4108
4109 return ttep;
4110 }
4111
4112 /*
4113 * Given an offset and a map, compute the address of level 1 translation table entry.
4114 * If the tranlation is invalid then PT_ENTRY_NULL is returned.
4115 */
4116 static inline tt_entry_t *
4117 pmap_tt1e(pmap_t pmap,
4118 vm_map_address_t addr)
4119 {
4120 return pmap_ttne(pmap, PMAP_TT_L1_LEVEL, addr);
4121 }
4122
4123 /*
4124 * Given an offset and a map, compute the address of level 2 translation table entry.
4125 * If the tranlation is invalid then PT_ENTRY_NULL is returned.
4126 */
4127 static inline tt_entry_t *
4128 pmap_tt2e(pmap_t pmap,
4129 vm_map_address_t addr)
4130 {
4131 return pmap_ttne(pmap, PMAP_TT_L2_LEVEL, addr);
4132 }
4133
4134
4135 /*
4136 * Given an offset and a map, compute the address of level 3 translation table entry.
4137 * If the tranlation is invalid then PT_ENTRY_NULL is returned.
4138 */
4139 static inline pt_entry_t *
4140 pmap_tt3e(
4141 pmap_t pmap,
4142 vm_map_address_t addr)
4143 {
4144 return (pt_entry_t*)pmap_ttne(pmap, PMAP_TT_L3_LEVEL, addr);
4145 }
4146
4147 static inline tt_entry_t *
4148 pmap_tte(
4149 pmap_t pmap,
4150 vm_map_address_t addr)
4151 {
4152 return pmap_tt2e(pmap, addr);
4153 }
4154
4155 static inline pt_entry_t *
4156 pmap_pte(
4157 pmap_t pmap,
4158 vm_map_address_t addr)
4159 {
4160 return pmap_tt3e(pmap, addr);
4161 }
4162
4163 #endif
4164
4165
4166
4167
4168
4169
4170 /*
4171 * Map memory at initialization. The physical addresses being
4172 * mapped are not managed and are never unmapped.
4173 *
4174 * For now, VM is already on, we only need to map the
4175 * specified memory.
4176 */
4177 vm_map_address_t
4178 pmap_map(
4179 vm_map_address_t virt,
4180 vm_offset_t start,
4181 vm_offset_t end,
4182 vm_prot_t prot,
4183 unsigned int flags)
4184 {
4185 kern_return_t kr;
4186 vm_size_t ps;
4187
4188 ps = PAGE_SIZE;
4189 while (start < end) {
4190 kr = pmap_enter(kernel_pmap, virt, (ppnum_t)atop(start),
4191 prot, VM_PROT_NONE, flags, FALSE);
4192
4193 if (kr != KERN_SUCCESS) {
4194 panic("%s: failed pmap_enter, "
4195 "virt=%p, start_addr=%p, end_addr=%p, prot=%#x, flags=%#x",
4196 __FUNCTION__,
4197 (void *) virt, (void *) start, (void *) end, prot, flags);
4198 }
4199
4200 virt += ps;
4201 start += ps;
4202 }
4203 return virt;
4204 }
4205
4206 vm_map_address_t
4207 pmap_map_bd_with_options(
4208 vm_map_address_t virt,
4209 vm_offset_t start,
4210 vm_offset_t end,
4211 vm_prot_t prot,
4212 int32_t options)
4213 {
4214 pt_entry_t tmplate;
4215 pt_entry_t *ptep;
4216 vm_map_address_t vaddr;
4217 vm_offset_t paddr;
4218 pt_entry_t mem_attr;
4219
4220 switch (options & PMAP_MAP_BD_MASK) {
4221 case PMAP_MAP_BD_WCOMB:
4222 mem_attr = ARM_PTE_ATTRINDX(CACHE_ATTRINDX_WRITECOMB);
4223 #if (__ARM_VMSA__ > 7)
4224 mem_attr |= ARM_PTE_SH(SH_OUTER_MEMORY);
4225 #else
4226 mem_attr |= ARM_PTE_SH;
4227 #endif
4228 break;
4229 case PMAP_MAP_BD_POSTED:
4230 mem_attr = ARM_PTE_ATTRINDX(CACHE_ATTRINDX_POSTED);
4231 break;
4232 case PMAP_MAP_BD_POSTED_REORDERED:
4233 mem_attr = ARM_PTE_ATTRINDX(CACHE_ATTRINDX_POSTED_REORDERED);
4234 break;
4235 case PMAP_MAP_BD_POSTED_COMBINED_REORDERED:
4236 mem_attr = ARM_PTE_ATTRINDX(CACHE_ATTRINDX_POSTED_COMBINED_REORDERED);
4237 break;
4238 default:
4239 mem_attr = ARM_PTE_ATTRINDX(CACHE_ATTRINDX_DISABLE);
4240 break;
4241 }
4242
4243 tmplate = pa_to_pte(start) | ARM_PTE_AP((prot & VM_PROT_WRITE) ? AP_RWNA : AP_RONA) |
4244 mem_attr | ARM_PTE_TYPE | ARM_PTE_NX | ARM_PTE_PNX | ARM_PTE_AF;
4245 #if __ARM_KERNEL_PROTECT__
4246 tmplate |= ARM_PTE_NG;
4247 #endif /* __ARM_KERNEL_PROTECT__ */
4248
4249 vaddr = virt;
4250 paddr = start;
4251 while (paddr < end) {
4252 ptep = pmap_pte(kernel_pmap, vaddr);
4253 if (ptep == PT_ENTRY_NULL) {
4254 panic("%s: no PTE for vaddr=%p, "
4255 "virt=%p, start=%p, end=%p, prot=0x%x, options=0x%x",
4256 __FUNCTION__, (void*)vaddr,
4257 (void*)virt, (void*)start, (void*)end, prot, options);
4258 }
4259
4260 assert(!ARM_PTE_IS_COMPRESSED(*ptep, ptep));
4261 WRITE_PTE_STRONG(ptep, tmplate);
4262
4263 pte_increment_pa(tmplate);
4264 vaddr += PAGE_SIZE;
4265 paddr += PAGE_SIZE;
4266 }
4267
4268 if (end >= start) {
4269 flush_mmu_tlb_region(virt, (unsigned)(end - start));
4270 }
4271
4272 return vaddr;
4273 }
4274
4275 /*
4276 * Back-door routine for mapping kernel VM at initialization.
4277 * Useful for mapping memory outside the range
4278 * [vm_first_phys, vm_last_phys] (i.e., devices).
4279 * Otherwise like pmap_map.
4280 */
4281 vm_map_address_t
4282 pmap_map_bd(
4283 vm_map_address_t virt,
4284 vm_offset_t start,
4285 vm_offset_t end,
4286 vm_prot_t prot)
4287 {
4288 pt_entry_t tmplate;
4289 pt_entry_t *ptep;
4290 vm_map_address_t vaddr;
4291 vm_offset_t paddr;
4292
4293 /* not cacheable and not buffered */
4294 tmplate = pa_to_pte(start)
4295 | ARM_PTE_TYPE | ARM_PTE_AF | ARM_PTE_NX | ARM_PTE_PNX
4296 | ARM_PTE_AP((prot & VM_PROT_WRITE) ? AP_RWNA : AP_RONA)
4297 | ARM_PTE_ATTRINDX(CACHE_ATTRINDX_DISABLE);
4298 #if __ARM_KERNEL_PROTECT__
4299 tmplate |= ARM_PTE_NG;
4300 #endif /* __ARM_KERNEL_PROTECT__ */
4301
4302 vaddr = virt;
4303 paddr = start;
4304 while (paddr < end) {
4305 ptep = pmap_pte(kernel_pmap, vaddr);
4306 if (ptep == PT_ENTRY_NULL) {
4307 panic("pmap_map_bd");
4308 }
4309 assert(!ARM_PTE_IS_COMPRESSED(*ptep, ptep));
4310 WRITE_PTE_STRONG(ptep, tmplate);
4311
4312 pte_increment_pa(tmplate);
4313 vaddr += PAGE_SIZE;
4314 paddr += PAGE_SIZE;
4315 }
4316
4317 if (end >= start) {
4318 flush_mmu_tlb_region(virt, (unsigned)(end - start));
4319 }
4320
4321 return vaddr;
4322 }
4323
4324 /*
4325 * Back-door routine for mapping kernel VM at initialization.
4326 * Useful for mapping memory specific physical addresses in early
4327 * boot (i.e., before kernel_map is initialized).
4328 *
4329 * Maps are in the VM_HIGH_KERNEL_WINDOW area.
4330 */
4331
4332 vm_map_address_t
4333 pmap_map_high_window_bd(
4334 vm_offset_t pa_start,
4335 vm_size_t len,
4336 vm_prot_t prot)
4337 {
4338 pt_entry_t *ptep, pte;
4339 #if (__ARM_VMSA__ == 7)
4340 vm_map_address_t va_start = VM_HIGH_KERNEL_WINDOW;
4341 vm_map_address_t va_max = VM_MAX_KERNEL_ADDRESS;
4342 #else
4343 vm_map_address_t va_start = VREGION1_START;
4344 vm_map_address_t va_max = VREGION1_START + VREGION1_SIZE;
4345 #endif
4346 vm_map_address_t va_end;
4347 vm_map_address_t va;
4348 vm_size_t offset;
4349
4350 offset = pa_start & PAGE_MASK;
4351 pa_start -= offset;
4352 len += offset;
4353
4354 if (len > (va_max - va_start)) {
4355 panic("%s: area too large, "
4356 "pa_start=%p, len=%p, prot=0x%x",
4357 __FUNCTION__,
4358 (void*)pa_start, (void*)len, prot);
4359 }
4360
4361 scan:
4362 for (; va_start < va_max; va_start += PAGE_SIZE) {
4363 ptep = pmap_pte(kernel_pmap, va_start);
4364 assert(!ARM_PTE_IS_COMPRESSED(*ptep, ptep));
4365 if (*ptep == ARM_PTE_TYPE_FAULT) {
4366 break;
4367 }
4368 }
4369 if (va_start > va_max) {
4370 panic("%s: insufficient pages, "
4371 "pa_start=%p, len=%p, prot=0x%x",
4372 __FUNCTION__,
4373 (void*)pa_start, (void*)len, prot);
4374 }
4375
4376 for (va_end = va_start + PAGE_SIZE; va_end < va_start + len; va_end += PAGE_SIZE) {
4377 ptep = pmap_pte(kernel_pmap, va_end);
4378 assert(!ARM_PTE_IS_COMPRESSED(*ptep, ptep));
4379 if (*ptep != ARM_PTE_TYPE_FAULT) {
4380 va_start = va_end + PAGE_SIZE;
4381 goto scan;
4382 }
4383 }
4384
4385 for (va = va_start; va < va_end; va += PAGE_SIZE, pa_start += PAGE_SIZE) {
4386 ptep = pmap_pte(kernel_pmap, va);
4387 pte = pa_to_pte(pa_start)
4388 | ARM_PTE_TYPE | ARM_PTE_AF | ARM_PTE_NX | ARM_PTE_PNX
4389 | ARM_PTE_AP((prot & VM_PROT_WRITE) ? AP_RWNA : AP_RONA)
4390 | ARM_PTE_ATTRINDX(CACHE_ATTRINDX_DEFAULT);
4391 #if (__ARM_VMSA__ > 7)
4392 pte |= ARM_PTE_SH(SH_OUTER_MEMORY);
4393 #else
4394 pte |= ARM_PTE_SH;
4395 #endif
4396 #if __ARM_KERNEL_PROTECT__
4397 pte |= ARM_PTE_NG;
4398 #endif /* __ARM_KERNEL_PROTECT__ */
4399 WRITE_PTE_STRONG(ptep, pte);
4400 }
4401 PMAP_UPDATE_TLBS(kernel_pmap, va_start, va_start + len, false);
4402 #if KASAN
4403 kasan_notify_address(va_start, len);
4404 #endif
4405 return va_start;
4406 }
4407
4408 #define PMAP_ALIGN(addr, align) ((addr) + ((align) - 1) & ~((align) - 1))
4409
4410 static void
4411 pmap_compute_pv_targets(void)
4412 {
4413 DTEntry entry;
4414 void const *prop = NULL;
4415 int err;
4416 unsigned int prop_size;
4417
4418 err = SecureDTLookupEntry(NULL, "/defaults", &entry);
4419 assert(err == kSuccess);
4420
4421 if (kSuccess == SecureDTGetProperty(entry, "pmap-pv-count", &prop, &prop_size)) {
4422 if (prop_size != sizeof(pv_alloc_initial_target)) {
4423 panic("pmap-pv-count property is not a 32-bit integer");
4424 }
4425 pv_alloc_initial_target = *((uint32_t const *)prop);
4426 }
4427
4428 if (kSuccess == SecureDTGetProperty(entry, "pmap-kern-pv-count", &prop, &prop_size)) {
4429 if (prop_size != sizeof(pv_kern_alloc_initial_target)) {
4430 panic("pmap-kern-pv-count property is not a 32-bit integer");
4431 }
4432 pv_kern_alloc_initial_target = *((uint32_t const *)prop);
4433 }
4434
4435 if (kSuccess == SecureDTGetProperty(entry, "pmap-kern-pv-min", &prop, &prop_size)) {
4436 if (prop_size != sizeof(pv_kern_low_water_mark)) {
4437 panic("pmap-kern-pv-min property is not a 32-bit integer");
4438 }
4439 pv_kern_low_water_mark = *((uint32_t const *)prop);
4440 }
4441 }
4442
4443
4444 static uint32_t
4445 pmap_compute_max_asids(void)
4446 {
4447 DTEntry entry;
4448 void const *prop = NULL;
4449 uint32_t max_asids;
4450 int err;
4451 unsigned int prop_size;
4452
4453 err = SecureDTLookupEntry(NULL, "/defaults", &entry);
4454 assert(err == kSuccess);
4455
4456 if (kSuccess != SecureDTGetProperty(entry, "pmap-max-asids", &prop, &prop_size)) {
4457 /* TODO: consider allowing maxproc limits to be scaled earlier so that
4458 * we can choose a more flexible default value here. */
4459 return MAX_ASIDS;
4460 }
4461
4462 if (prop_size != sizeof(max_asids)) {
4463 panic("pmap-max-asids property is not a 32-bit integer");
4464 }
4465
4466 max_asids = *((uint32_t const *)prop);
4467 /* Round up to the nearest 64 to make things a bit easier for the Pseudo-LRU allocator. */
4468 max_asids = (max_asids + 63) & ~63UL;
4469
4470 if (((max_asids + MAX_HW_ASIDS) / (MAX_HW_ASIDS + 1)) > MIN(MAX_HW_ASIDS, UINT8_MAX)) {
4471 /* currently capped by size of pmap->sw_asid */
4472 panic("pmap-max-asids too large");
4473 }
4474 if (max_asids == 0) {
4475 panic("pmap-max-asids cannot be zero");
4476 }
4477 return max_asids;
4478 }
4479
4480
4481 static vm_size_t
4482 pmap_compute_io_rgns(void)
4483 {
4484 DTEntry entry;
4485 pmap_io_range_t const *ranges;
4486 uint64_t rgn_end;
4487 void const *prop = NULL;
4488 int err;
4489 unsigned int prop_size;
4490
4491 err = SecureDTLookupEntry(NULL, "/defaults", &entry);
4492 assert(err == kSuccess);
4493
4494 if (kSuccess != SecureDTGetProperty(entry, "pmap-io-ranges", &prop, &prop_size)) {
4495 return 0;
4496 }
4497
4498 ranges = prop;
4499 for (unsigned int i = 0; i < (prop_size / sizeof(*ranges)); ++i) {
4500 if (ranges[i].addr & PAGE_MASK) {
4501 panic("pmap I/O region %u addr 0x%llx is not page-aligned", i, ranges[i].addr);
4502 }
4503 if (ranges[i].len & PAGE_MASK) {
4504 panic("pmap I/O region %u length 0x%llx is not page-aligned", i, ranges[i].len);
4505 }
4506 if (os_add_overflow(ranges[i].addr, ranges[i].len, &rgn_end)) {
4507 panic("pmap I/O region %u addr 0x%llx length 0x%llx wraps around", i, ranges[i].addr, ranges[i].len);
4508 }
4509 if (((ranges[i].addr <= gPhysBase) && (rgn_end > gPhysBase)) ||
4510 ((ranges[i].addr < avail_end) && (rgn_end >= avail_end)) ||
4511 ((ranges[i].addr > gPhysBase) && (rgn_end < avail_end))) {
4512 panic("pmap I/O region %u addr 0x%llx length 0x%llx overlaps physical memory", i, ranges[i].addr, ranges[i].len);
4513 }
4514
4515 ++num_io_rgns;
4516 }
4517
4518 return num_io_rgns * sizeof(*ranges);
4519 }
4520
4521 /*
4522 * return < 0 for a < b
4523 * 0 for a == b
4524 * > 0 for a > b
4525 */
4526 typedef int (*cmpfunc_t)(const void *a, const void *b);
4527
4528 extern void
4529 qsort(void *a, size_t n, size_t es, cmpfunc_t cmp);
4530
4531 static int
4532 cmp_io_rgns(const void *a, const void *b)
4533 {
4534 const pmap_io_range_t *range_a = a;
4535 const pmap_io_range_t *range_b = b;
4536 if ((range_b->addr + range_b->len) <= range_a->addr) {
4537 return 1;
4538 } else if ((range_a->addr + range_a->len) <= range_b->addr) {
4539 return -1;
4540 } else {
4541 return 0;
4542 }
4543 }
4544
4545 static void
4546 pmap_load_io_rgns(void)
4547 {
4548 DTEntry entry;
4549 pmap_io_range_t const *ranges;
4550 void const *prop = NULL;
4551 int err;
4552 unsigned int prop_size;
4553
4554 if (num_io_rgns == 0) {
4555 return;
4556 }
4557
4558 err = SecureDTLookupEntry(NULL, "/defaults", &entry);
4559 assert(err == kSuccess);
4560
4561 err = SecureDTGetProperty(entry, "pmap-io-ranges", &prop, &prop_size);
4562 assert(err == kSuccess);
4563
4564 ranges = prop;
4565 for (unsigned int i = 0; i < (prop_size / sizeof(*ranges)); ++i) {
4566 io_attr_table[i] = ranges[i];
4567 }
4568
4569 qsort(io_attr_table, num_io_rgns, sizeof(*ranges), cmp_io_rgns);
4570 }
4571
4572 #if __arm64__
4573 /*
4574 * pmap_get_arm64_prot
4575 *
4576 * return effective armv8 VMSA block protections including
4577 * table AP/PXN/XN overrides of a pmap entry
4578 *
4579 */
4580
4581 uint64_t
4582 pmap_get_arm64_prot(
4583 pmap_t pmap,
4584 vm_offset_t addr)
4585 {
4586 tt_entry_t tte = 0;
4587 unsigned int level = 0;
4588 uint64_t tte_type = 0;
4589 uint64_t effective_prot_bits = 0;
4590 uint64_t aggregate_tte = 0;
4591 uint64_t table_ap_bits = 0, table_xn = 0, table_pxn = 0;
4592 const pt_attr_t * const pt_attr = pmap_get_pt_attr(pmap);
4593
4594 for (level = pt_attr->pta_root_level; level <= pt_attr->pta_max_level; level++) {
4595 tte = *pmap_ttne(pmap, level, addr);
4596
4597 if (!(tte & ARM_TTE_VALID)) {
4598 return 0;
4599 }
4600
4601 tte_type = tte & ARM_TTE_TYPE_MASK;
4602
4603 if ((tte_type == ARM_TTE_TYPE_BLOCK) ||
4604 (level == pt_attr->pta_max_level)) {
4605 /* Block or page mapping; both have the same protection bit layout. */
4606 break;
4607 } else if (tte_type == ARM_TTE_TYPE_TABLE) {
4608 /* All of the table bits we care about are overrides, so just OR them together. */
4609 aggregate_tte |= tte;
4610 }
4611 }
4612
4613 table_ap_bits = ((aggregate_tte >> ARM_TTE_TABLE_APSHIFT) & AP_MASK);
4614 table_xn = (aggregate_tte & ARM_TTE_TABLE_XN);
4615 table_pxn = (aggregate_tte & ARM_TTE_TABLE_PXN);
4616
4617 /* Start with the PTE bits. */
4618 effective_prot_bits = tte & (ARM_PTE_APMASK | ARM_PTE_NX | ARM_PTE_PNX);
4619
4620 /* Table AP bits mask out block/page AP bits */
4621 effective_prot_bits &= ~(ARM_PTE_AP(table_ap_bits));
4622
4623 /* XN/PXN bits can be OR'd in. */
4624 effective_prot_bits |= (table_xn ? ARM_PTE_NX : 0);
4625 effective_prot_bits |= (table_pxn ? ARM_PTE_PNX : 0);
4626
4627 return effective_prot_bits;
4628 }
4629 #endif /* __arm64__ */
4630
4631
4632 /*
4633 * Bootstrap the system enough to run with virtual memory.
4634 *
4635 * The early VM initialization code has already allocated
4636 * the first CPU's translation table and made entries for
4637 * all the one-to-one mappings to be found there.
4638 *
4639 * We must set up the kernel pmap structures, the
4640 * physical-to-virtual translation lookup tables for the
4641 * physical memory to be managed (between avail_start and
4642 * avail_end).
4643 *
4644 * Map the kernel's code and data, and allocate the system page table.
4645 * Page_size must already be set.
4646 *
4647 * Parameters:
4648 * first_avail first available physical page -
4649 * after kernel page tables
4650 * avail_start PA of first managed physical page
4651 * avail_end PA of last managed physical page
4652 */
4653
4654 void
4655 pmap_bootstrap(
4656 vm_offset_t vstart)
4657 {
4658 pmap_paddr_t pmap_struct_start;
4659 vm_size_t pv_head_size;
4660 vm_size_t ptd_root_table_size;
4661 vm_size_t pp_attr_table_size;
4662 vm_size_t io_attr_table_size;
4663 vm_size_t asid_table_size;
4664 unsigned int npages;
4665 vm_map_offset_t maxoffset;
4666
4667 lck_grp_init(&pmap_lck_grp, "pmap", LCK_GRP_ATTR_NULL);
4668
4669 #if XNU_MONITOR
4670
4671 #if DEVELOPMENT || DEBUG
4672 PE_parse_boot_argn("-unsafe_kernel_text", &pmap_ppl_disable, sizeof(pmap_ppl_disable));
4673 #endif
4674
4675 #if CONFIG_CSR_FROM_DT
4676 if (csr_unsafe_kernel_text) {
4677 pmap_ppl_disable = true;
4678 }
4679 #endif /* CONFIG_CSR_FROM_DT */
4680
4681 #endif /* XNU_MONITOR */
4682
4683 #if DEVELOPMENT || DEBUG
4684 if (PE_parse_boot_argn("pmap_trace", &pmap_trace_mask, sizeof(pmap_trace_mask))) {
4685 kprintf("Kernel traces for pmap operations enabled\n");
4686 }
4687 #endif
4688
4689 /*
4690 * Initialize the kernel pmap.
4691 */
4692 pmap_stamp = 1;
4693 #if ARM_PARAMETERIZED_PMAP
4694 kernel_pmap->pmap_pt_attr = native_pt_attr;
4695 #endif /* ARM_PARAMETERIZED_PMAP */
4696 #if HAS_APPLE_PAC
4697 kernel_pmap->disable_jop = 0;
4698 #endif /* HAS_APPLE_PAC */
4699 kernel_pmap->tte = cpu_tte;
4700 kernel_pmap->ttep = cpu_ttep;
4701 #if (__ARM_VMSA__ > 7)
4702 kernel_pmap->min = ARM64_TTBR1_MIN_ADDR;
4703 #else
4704 kernel_pmap->min = VM_MIN_KERNEL_AND_KEXT_ADDRESS;
4705 #endif
4706 kernel_pmap->max = VM_MAX_KERNEL_ADDRESS;
4707 os_atomic_init(&kernel_pmap->ref_count, 1);
4708 kernel_pmap->gc_status = 0;
4709 kernel_pmap->nx_enabled = TRUE;
4710 #ifdef __arm64__
4711 kernel_pmap->is_64bit = TRUE;
4712 #else
4713 kernel_pmap->is_64bit = FALSE;
4714 #endif
4715 kernel_pmap->stamp = os_atomic_inc(&pmap_stamp, relaxed);
4716
4717 #if ARM_PARAMETERIZED_PMAP
4718 kernel_pmap->pmap_pt_attr = native_pt_attr;
4719 #endif /* ARM_PARAMETERIZED_PMAP */
4720
4721 kernel_pmap->nested_region_addr = 0x0ULL;
4722 kernel_pmap->nested_region_size = 0x0ULL;
4723 kernel_pmap->nested_region_asid_bitmap = NULL;
4724 kernel_pmap->nested_region_asid_bitmap_size = 0x0UL;
4725
4726 #if (__ARM_VMSA__ == 7)
4727 kernel_pmap->tte_index_max = 4 * NTTES;
4728 #endif
4729 kernel_pmap->hw_asid = 0;
4730 kernel_pmap->sw_asid = 0;
4731
4732 pmap_lock_init(kernel_pmap);
4733 memset((void *) &kernel_pmap->stats, 0, sizeof(kernel_pmap->stats));
4734
4735 /* allocate space for and initialize the bookkeeping structures */
4736 io_attr_table_size = pmap_compute_io_rgns();
4737 npages = (unsigned int)atop(mem_size);
4738 pp_attr_table_size = npages * sizeof(pp_attr_t);
4739 pv_head_size = round_page(sizeof(pv_entry_t *) * npages);
4740 // allocate enough initial PTDs to map twice the available physical memory
4741 ptd_root_table_size = sizeof(pt_desc_t) * (mem_size / ((PAGE_SIZE / sizeof(pt_entry_t)) * ARM_PGBYTES)) * 2;
4742 pmap_max_asids = pmap_compute_max_asids();
4743 pmap_asid_plru = (pmap_max_asids > MAX_HW_ASIDS);
4744 PE_parse_boot_argn("pmap_asid_plru", &pmap_asid_plru, sizeof(pmap_asid_plru));
4745 /* Align the range of available hardware ASIDs to a multiple of 64 to enable the
4746 * masking used by the PLRU scheme. This means we must handle the case in which
4747 * the returned hardware ASID is MAX_HW_ASIDS, which we do in alloc_asid() and free_asid(). */
4748 _Static_assert(sizeof(asid_plru_bitmap[0] == sizeof(uint64_t)), "bitmap_t is not a 64-bit integer");
4749 _Static_assert(((MAX_HW_ASIDS + 1) % 64) == 0, "MAX_HW_ASIDS + 1 is not divisible by 64");
4750 asid_chunk_size = (pmap_asid_plru ? (MAX_HW_ASIDS + 1) : MAX_HW_ASIDS);
4751
4752 asid_table_size = sizeof(*asid_bitmap) * BITMAP_LEN(pmap_max_asids);
4753
4754 pmap_compute_pv_targets();
4755
4756 pmap_struct_start = avail_start;
4757
4758 pp_attr_table = (pp_attr_t *) phystokv(avail_start);
4759 avail_start = PMAP_ALIGN(avail_start + pp_attr_table_size, __alignof(pp_attr_t));
4760 io_attr_table = (pmap_io_range_t *) phystokv(avail_start);
4761 avail_start = PMAP_ALIGN(avail_start + io_attr_table_size, __alignof(pv_entry_t*));
4762 pv_head_table = (pv_entry_t **) phystokv(avail_start);
4763 avail_start = PMAP_ALIGN(avail_start + pv_head_size, __alignof(pt_desc_t));
4764 ptd_root_table = (pt_desc_t *)phystokv(avail_start);
4765 avail_start = PMAP_ALIGN(avail_start + ptd_root_table_size, __alignof(bitmap_t));
4766 asid_bitmap = (bitmap_t*)phystokv(avail_start);
4767 avail_start = round_page(avail_start + asid_table_size);
4768
4769 memset((char *)phystokv(pmap_struct_start), 0, avail_start - pmap_struct_start);
4770
4771 pmap_load_io_rgns();
4772 ptd_bootstrap(ptd_root_table, (unsigned int)(ptd_root_table_size / sizeof(pt_desc_t)));
4773
4774 #if XNU_MONITOR
4775 pmap_array_begin = (void *)phystokv(avail_start);
4776 pmap_array = pmap_array_begin;
4777 avail_start += round_page(PMAP_ARRAY_SIZE * sizeof(struct pmap));
4778 pmap_array_end = (void *)phystokv(avail_start);
4779
4780 pmap_array_count = ((pmap_array_end - pmap_array_begin) / sizeof(struct pmap));
4781
4782 pmap_bootstrap_pmap_free_list();
4783
4784 pmap_ledger_ptr_array_begin = (void *)phystokv(avail_start);
4785 pmap_ledger_ptr_array = pmap_ledger_ptr_array_begin;
4786 avail_start += round_page(MAX_PMAP_LEDGERS * sizeof(void*));
4787 pmap_ledger_ptr_array_end = (void *)phystokv(avail_start);
4788
4789 pmap_ledger_refcnt_begin = (void *)phystokv(avail_start);
4790 pmap_ledger_refcnt = pmap_ledger_refcnt_begin;
4791 avail_start += round_page(MAX_PMAP_LEDGERS * sizeof(os_refcnt_t));
4792 pmap_ledger_refcnt_end = (void *)phystokv(avail_start);
4793 #endif
4794 pmap_cpu_data_array_init();
4795
4796 vm_first_phys = gPhysBase;
4797 vm_last_phys = trunc_page(avail_end);
4798
4799 queue_init(&map_pmap_list);
4800 queue_enter(&map_pmap_list, kernel_pmap, pmap_t, pmaps);
4801 free_page_size_tt_list = TT_FREE_ENTRY_NULL;
4802 free_page_size_tt_count = 0;
4803 free_page_size_tt_max = 0;
4804 free_two_page_size_tt_list = TT_FREE_ENTRY_NULL;
4805 free_two_page_size_tt_count = 0;
4806 free_two_page_size_tt_max = 0;
4807 free_tt_list = TT_FREE_ENTRY_NULL;
4808 free_tt_count = 0;
4809 free_tt_max = 0;
4810
4811 queue_init(&pt_page_list);
4812
4813 pmap_pages_request_count = 0;
4814 pmap_pages_request_acum = 0;
4815 pmap_pages_reclaim_list = PAGE_FREE_ENTRY_NULL;
4816
4817 virtual_space_start = vstart;
4818 virtual_space_end = VM_MAX_KERNEL_ADDRESS;
4819
4820 bitmap_full(&asid_bitmap[0], pmap_max_asids);
4821 bitmap_full(&asid_plru_bitmap[0], MAX_HW_ASIDS);
4822 // Clear the highest-order bit, which corresponds to MAX_HW_ASIDS + 1
4823 asid_plru_bitmap[MAX_HW_ASIDS >> 6] = ~(1ULL << 63);
4824
4825
4826
4827 if (PE_parse_boot_argn("arm_maxoffset", &maxoffset, sizeof(maxoffset))) {
4828 maxoffset = trunc_page(maxoffset);
4829 if ((maxoffset >= pmap_max_offset(FALSE, ARM_PMAP_MAX_OFFSET_MIN))
4830 && (maxoffset <= pmap_max_offset(FALSE, ARM_PMAP_MAX_OFFSET_MAX))) {
4831 arm_pmap_max_offset_default = maxoffset;
4832 }
4833 }
4834 #if defined(__arm64__)
4835 if (PE_parse_boot_argn("arm64_maxoffset", &maxoffset, sizeof(maxoffset))) {
4836 maxoffset = trunc_page(maxoffset);
4837 if ((maxoffset >= pmap_max_offset(TRUE, ARM_PMAP_MAX_OFFSET_MIN))
4838 && (maxoffset <= pmap_max_offset(TRUE, ARM_PMAP_MAX_OFFSET_MAX))) {
4839 arm64_pmap_max_offset_default = maxoffset;
4840 }
4841 }
4842 #endif
4843
4844 PE_parse_boot_argn("pmap_panic_dev_wimg_on_managed", &pmap_panic_dev_wimg_on_managed, sizeof(pmap_panic_dev_wimg_on_managed));
4845
4846
4847 #if MACH_ASSERT
4848 PE_parse_boot_argn("pmap_stats_assert",
4849 &pmap_stats_assert,
4850 sizeof(pmap_stats_assert));
4851 PE_parse_boot_argn("vm_footprint_suspend_allowed",
4852 &vm_footprint_suspend_allowed,
4853 sizeof(vm_footprint_suspend_allowed));
4854 #endif /* MACH_ASSERT */
4855
4856 #if KASAN
4857 /* Shadow the CPU copy windows, as they fall outside of the physical aperture */
4858 kasan_map_shadow(CPUWINDOWS_BASE, CPUWINDOWS_TOP - CPUWINDOWS_BASE, true);
4859 #endif /* KASAN */
4860 }
4861
4862 #if XNU_MONITOR
4863
4864 static inline void
4865 pa_set_range_monitor(pmap_paddr_t start_pa, pmap_paddr_t end_pa)
4866 {
4867 pmap_paddr_t cur_pa;
4868 for (cur_pa = start_pa; cur_pa < end_pa; cur_pa += ARM_PGBYTES) {
4869 assert(pa_valid(cur_pa));
4870 pa_set_monitor(cur_pa);
4871 }
4872 }
4873
4874 static void
4875 pa_set_range_xprr_perm(pmap_paddr_t start_pa,
4876 pmap_paddr_t end_pa,
4877 unsigned int expected_perm,
4878 unsigned int new_perm)
4879 {
4880 vm_offset_t start_va = phystokv(start_pa);
4881 vm_offset_t end_va = start_va + (end_pa - start_pa);
4882
4883 pa_set_range_monitor(start_pa, end_pa);
4884 pmap_set_range_xprr_perm(start_va, end_va, expected_perm, new_perm);
4885 }
4886
4887 static void
4888 pmap_lockdown_kc(void)
4889 {
4890 extern vm_offset_t vm_kernelcache_base;
4891 extern vm_offset_t vm_kernelcache_top;
4892 pmap_paddr_t start_pa = kvtophys(vm_kernelcache_base);
4893 pmap_paddr_t end_pa = start_pa + (vm_kernelcache_top - vm_kernelcache_base);
4894 pmap_paddr_t cur_pa = start_pa;
4895 vm_offset_t cur_va = vm_kernelcache_base;
4896 while (cur_pa < end_pa) {
4897 vm_size_t range_size = end_pa - cur_pa;
4898 vm_offset_t ptov_va = phystokv_range(cur_pa, &range_size);
4899 if (ptov_va != cur_va) {
4900 /*
4901 * If the physical address maps back to a virtual address that is non-linear
4902 * w.r.t. the kernelcache, that means it corresponds to memory that will be
4903 * reclaimed by the OS and should therefore not be locked down.
4904 */
4905 cur_pa += range_size;
4906 cur_va += range_size;
4907 continue;
4908 }
4909 unsigned int pai = (unsigned int)pa_index(cur_pa);
4910 pv_entry_t **pv_h = pai_to_pvh(pai);
4911
4912 vm_offset_t pvh_flags = pvh_get_flags(pv_h);
4913
4914 if (__improbable(pvh_flags & PVH_FLAG_LOCKDOWN)) {
4915 panic("pai %d already locked down", pai);
4916 }
4917 pvh_set_flags(pv_h, pvh_flags | PVH_FLAG_LOCKDOWN);
4918 cur_pa += ARM_PGBYTES;
4919 cur_va += ARM_PGBYTES;
4920 }
4921 #if defined(KERNEL_INTEGRITY_CTRR) && defined(CONFIG_XNUPOST)
4922 extern uint64_t ctrr_ro_test;
4923 extern uint64_t ctrr_nx_test;
4924 pmap_paddr_t exclude_pages[] = {kvtophys((vm_offset_t)&ctrr_ro_test), kvtophys((vm_offset_t)&ctrr_nx_test)};
4925 for (unsigned i = 0; i < (sizeof(exclude_pages) / sizeof(exclude_pages[0])); ++i) {
4926 pv_entry_t **pv_h = pai_to_pvh(pa_index(exclude_pages[i]));
4927 pvh_set_flags(pv_h, pvh_get_flags(pv_h) & ~PVH_FLAG_LOCKDOWN);
4928 }
4929 #endif
4930 }
4931
4932 void
4933 pmap_static_allocations_done(void)
4934 {
4935 pmap_paddr_t monitor_start_pa;
4936 pmap_paddr_t monitor_end_pa;
4937
4938 /*
4939 * Protect the bootstrap (V=P and V->P) page tables.
4940 *
4941 * These bootstrap allocations will be used primarily for page tables.
4942 * If we wish to secure the page tables, we need to start by marking
4943 * these bootstrap allocations as pages that we want to protect.
4944 */
4945 monitor_start_pa = kvtophys((vm_offset_t)&bootstrap_pagetables);
4946 monitor_end_pa = monitor_start_pa + BOOTSTRAP_TABLE_SIZE;
4947
4948 /* The bootstrap page tables are mapped RW at boostrap. */
4949 pa_set_range_xprr_perm(monitor_start_pa, monitor_end_pa, XPRR_KERN_RW_PERM, XPRR_KERN_RO_PERM);
4950
4951 /*
4952 * We use avail_start as a pointer to the first address that has not
4953 * been reserved for bootstrap, so we know which pages to give to the
4954 * virtual memory layer.
4955 */
4956 monitor_start_pa = BootArgs->topOfKernelData;
4957 monitor_end_pa = avail_start;
4958
4959 /* The other bootstrap allocations are mapped RW at bootstrap. */
4960 pa_set_range_xprr_perm(monitor_start_pa, monitor_end_pa, XPRR_KERN_RW_PERM, XPRR_PPL_RW_PERM);
4961
4962 /*
4963 * The RO page tables are mapped RW in arm_vm_init() and later restricted
4964 * to RO in arm_vm_prot_finalize(), which is called after this function.
4965 * Here we only need to mark the underlying physical pages as PPL-owned to ensure
4966 * they can't be allocated for other uses. We don't need a special xPRR
4967 * protection index, as there is no PPL_RO index, and these pages are ultimately
4968 * protected by KTRR/CTRR. Furthermore, use of PPL_RW for these pages would
4969 * expose us to a functional issue on H11 devices where CTRR shifts the APRR
4970 * lookup table index to USER_XO before APRR is applied, leading the hardware
4971 * to believe we are dealing with an user XO page upon performing a translation.
4972 */
4973 monitor_start_pa = kvtophys((vm_offset_t)&ropagetable_begin);
4974 monitor_end_pa = monitor_start_pa + ((vm_offset_t)&ropagetable_end - (vm_offset_t)&ropagetable_begin);
4975 pa_set_range_monitor(monitor_start_pa, monitor_end_pa);
4976
4977 monitor_start_pa = kvtophys(segPPLDATAB);
4978 monitor_end_pa = monitor_start_pa + segSizePPLDATA;
4979
4980 /* PPL data is RW for the PPL, RO for the kernel. */
4981 pa_set_range_xprr_perm(monitor_start_pa, monitor_end_pa, XPRR_KERN_RW_PERM, XPRR_PPL_RW_PERM);
4982
4983 monitor_start_pa = kvtophys(segPPLTEXTB);
4984 monitor_end_pa = monitor_start_pa + segSizePPLTEXT;
4985
4986 /* PPL text is RX for the PPL, RO for the kernel. */
4987 pa_set_range_xprr_perm(monitor_start_pa, monitor_end_pa, XPRR_KERN_RX_PERM, XPRR_PPL_RX_PERM);
4988
4989
4990 /*
4991 * In order to support DTrace, the save areas for the PPL must be
4992 * writable. This is due to the fact that DTrace will try to update
4993 * register state.
4994 */
4995 if (pmap_ppl_disable) {
4996 vm_offset_t monitor_start_va = phystokv(ppl_cpu_save_area_start);
4997 vm_offset_t monitor_end_va = monitor_start_va + (ppl_cpu_save_area_end - ppl_cpu_save_area_start);
4998
4999 pmap_set_range_xprr_perm(monitor_start_va, monitor_end_va, XPRR_PPL_RW_PERM, XPRR_KERN_RW_PERM);
5000 }
5001
5002
5003 if (segSizePPLDATACONST > 0) {
5004 monitor_start_pa = kvtophys(segPPLDATACONSTB);
5005 monitor_end_pa = monitor_start_pa + segSizePPLDATACONST;
5006
5007 pa_set_range_xprr_perm(monitor_start_pa, monitor_end_pa, XPRR_KERN_RO_PERM, XPRR_KERN_RO_PERM);
5008 }
5009
5010 /*
5011 * Mark the original physical aperture mapping for the PPL stack pages RO as an additional security
5012 * precaution. The real RW mappings are at a different location with guard pages.
5013 */
5014 pa_set_range_xprr_perm(pmap_stacks_start_pa, pmap_stacks_end_pa, XPRR_PPL_RW_PERM, XPRR_KERN_RO_PERM);
5015
5016 /* Prevent remapping of the kernelcache */
5017 pmap_lockdown_kc();
5018 }
5019
5020
5021 void
5022 pmap_lockdown_ppl(void)
5023 {
5024 /* Mark the PPL as being locked down. */
5025
5026 #error "XPRR configuration error"
5027
5028 }
5029 #endif /* XNU_MONITOR */
5030
5031 void
5032 pmap_virtual_space(
5033 vm_offset_t *startp,
5034 vm_offset_t *endp
5035 )
5036 {
5037 *startp = virtual_space_start;
5038 *endp = virtual_space_end;
5039 }
5040
5041
5042 boolean_t
5043 pmap_virtual_region(
5044 unsigned int region_select,
5045 vm_map_offset_t *startp,
5046 vm_map_size_t *size
5047 )
5048 {
5049 boolean_t ret = FALSE;
5050 #if defined(KERNEL_INTEGRITY_KTRR) || defined(KERNEL_INTEGRITY_CTRR)
5051 if (region_select == 0) {
5052 /*
5053 * In this config, the bootstrap mappings should occupy their own L2
5054 * TTs, as they should be immutable after boot. Having the associated
5055 * TTEs and PTEs in their own pages allows us to lock down those pages,
5056 * while allowing the rest of the kernel address range to be remapped.
5057 */
5058 #if (__ARM_VMSA__ > 7)
5059 *startp = LOW_GLOBAL_BASE_ADDRESS & ~ARM_TT_L2_OFFMASK;
5060 #else
5061 #error Unsupported configuration
5062 #endif
5063 #if defined(ARM_LARGE_MEMORY)
5064 *size = ((KERNEL_PMAP_HEAP_RANGE_START - *startp) & ~PAGE_MASK);
5065 #else
5066 *size = ((VM_MAX_KERNEL_ADDRESS - *startp) & ~PAGE_MASK);
5067 #endif
5068 ret = TRUE;
5069 }
5070 #else
5071 #if (__ARM_VMSA__ > 7)
5072 unsigned long low_global_vr_mask = 0;
5073 vm_map_size_t low_global_vr_size = 0;
5074 #endif
5075
5076 if (region_select == 0) {
5077 #if (__ARM_VMSA__ == 7)
5078 *startp = gVirtBase & 0xFFC00000;
5079 *size = ((virtual_space_start - (gVirtBase & 0xFFC00000)) + ~0xFFC00000) & 0xFFC00000;
5080 #else
5081 /* Round to avoid overlapping with the V=P area; round to at least the L2 block size. */
5082 if (!TEST_PAGE_SIZE_4K) {
5083 *startp = gVirtBase & 0xFFFFFFFFFE000000;
5084 *size = ((virtual_space_start - (gVirtBase & 0xFFFFFFFFFE000000)) + ~0xFFFFFFFFFE000000) & 0xFFFFFFFFFE000000;
5085 } else {
5086 *startp = gVirtBase & 0xFFFFFFFFFF800000;
5087 *size = ((virtual_space_start - (gVirtBase & 0xFFFFFFFFFF800000)) + ~0xFFFFFFFFFF800000) & 0xFFFFFFFFFF800000;
5088 }
5089 #endif
5090 ret = TRUE;
5091 }
5092 if (region_select == 1) {
5093 *startp = VREGION1_START;
5094 *size = VREGION1_SIZE;
5095 ret = TRUE;
5096 }
5097 #if (__ARM_VMSA__ > 7)
5098 /* We need to reserve a range that is at least the size of an L2 block mapping for the low globals */
5099 if (!TEST_PAGE_SIZE_4K) {
5100 low_global_vr_mask = 0xFFFFFFFFFE000000;
5101 low_global_vr_size = 0x2000000;
5102 } else {
5103 low_global_vr_mask = 0xFFFFFFFFFF800000;
5104 low_global_vr_size = 0x800000;
5105 }
5106
5107 if (((gVirtBase & low_global_vr_mask) != LOW_GLOBAL_BASE_ADDRESS) && (region_select == 2)) {
5108 *startp = LOW_GLOBAL_BASE_ADDRESS;
5109 *size = low_global_vr_size;
5110 ret = TRUE;
5111 }
5112
5113 if (region_select == 3) {
5114 /* In this config, we allow the bootstrap mappings to occupy the same
5115 * page table pages as the heap.
5116 */
5117 *startp = VM_MIN_KERNEL_ADDRESS;
5118 *size = LOW_GLOBAL_BASE_ADDRESS - *startp;
5119 ret = TRUE;
5120 }
5121 #endif
5122 #endif
5123 return ret;
5124 }
5125
5126 /*
5127 * Routines to track and allocate physical pages during early boot.
5128 * On most systems that memory runs from first_avail through to avail_end
5129 * with no gaps.
5130 *
5131 * However if the system supports ECC and bad_ram_pages_count > 0, we
5132 * need to be careful and skip those pages.
5133 */
5134 static unsigned int avail_page_count = 0;
5135 static bool need_ram_ranges_init = true;
5136
5137 #if defined(__arm64__)
5138 pmap_paddr_t *bad_ram_pages = NULL;
5139 unsigned int bad_ram_pages_count = 0;
5140
5141 /*
5142 * We use this sub-range of bad_ram_pages for pmap_next_page()
5143 */
5144 static pmap_paddr_t *skip_pages;
5145 static unsigned int skip_pages_count = 0;
5146
5147 #define MAX_BAD_RAM_PAGE_COUNT 64
5148 static pmap_paddr_t bad_ram_pages_arr[MAX_BAD_RAM_PAGE_COUNT];
5149
5150 /*
5151 * XXX - temporary code to get the bad pages array from boot-args.
5152 * expects a comma separated list of offsets from the start
5153 * of physical memory to be considered bad.
5154 *
5155 * HERE JOE -- will eventually be replaced by data provided by iboot
5156 */
5157 static void
5158 parse_bad_ram_pages_boot_arg(void)
5159 {
5160 char buf[256] = {0};
5161 char *s = buf;
5162 char *end;
5163 int count = 0;
5164 pmap_paddr_t num;
5165 extern uint64_t strtouq(const char *, char **, int);
5166
5167 if (!PE_parse_boot_arg_str("bad_ram_pages", buf, sizeof(buf))) {
5168 goto done;
5169 }
5170
5171 while (*s && count < MAX_BAD_RAM_PAGE_COUNT) {
5172 num = (pmap_paddr_t)strtouq(s, &end, 0);
5173 if (num == 0) {
5174 break;
5175 }
5176 num &= ~PAGE_MASK;
5177
5178 bad_ram_pages_arr[count++] = gDramBase + num;
5179
5180 if (*end != ',') {
5181 break;
5182 }
5183
5184 s = end + 1;
5185 }
5186
5187 done:
5188 bad_ram_pages = bad_ram_pages_arr;
5189 bad_ram_pages_count = count;
5190 }
5191
5192 /*
5193 * Comparison routine for qsort of array of physical addresses.
5194 */
5195 static int
5196 pmap_paddr_cmp(void *a, void *b)
5197 {
5198 pmap_paddr_t *x = a;
5199 pmap_paddr_t *y = b;
5200 if (*x < *y) {
5201 return -1;
5202 }
5203 return *x > *y;
5204 }
5205 #endif /* defined(__arm64__) */
5206
5207 /*
5208 * Look up ppn in the sorted bad_ram_pages array.
5209 */
5210 bool
5211 pmap_is_bad_ram(__unused ppnum_t ppn)
5212 {
5213 #if defined(__arm64__)
5214 pmap_paddr_t pa = ptoa(ppn);
5215 int low = 0;
5216 int high = bad_ram_pages_count - 1;
5217 int mid;
5218
5219 while (low <= high) {
5220 mid = (low + high) / 2;
5221 if (bad_ram_pages[mid] < pa) {
5222 low = mid + 1;
5223 } else if (bad_ram_pages[mid] > pa) {
5224 high = mid - 1;
5225 } else {
5226 return true;
5227 }
5228 }
5229 #endif /* defined(__arm64__) */
5230 return false;
5231 }
5232
5233 /*
5234 * Initialize the count of available pages. If we have bad_ram_pages, then sort the list of them.
5235 * No lock needed here, as this code is called while kernel boot up is single threaded.
5236 */
5237 static void
5238 initialize_ram_ranges(void)
5239 {
5240 pmap_paddr_t first = first_avail;
5241 pmap_paddr_t end = avail_end;
5242
5243 assert(first <= end);
5244 assert(first == (first & ~PAGE_MASK));
5245 assert(end == (end & ~PAGE_MASK));
5246 avail_page_count = atop(end - first);
5247
5248 #if defined(__arm64__)
5249 /*
5250 * XXX Temporary code for testing, until there is iboot support
5251 *
5252 * Parse a list of known bad pages from a boot-args.
5253 */
5254 parse_bad_ram_pages_boot_arg();
5255
5256 /*
5257 * Sort and filter the bad pages list and adjust avail_page_count.
5258 */
5259 if (bad_ram_pages_count != 0) {
5260 qsort(bad_ram_pages, bad_ram_pages_count, sizeof(*bad_ram_pages), (cmpfunc_t)pmap_paddr_cmp);
5261 skip_pages = bad_ram_pages;
5262 skip_pages_count = bad_ram_pages_count;
5263
5264 /* ignore any pages before first */
5265 while (skip_pages_count > 0 && skip_pages[0] < first) {
5266 --skip_pages_count;
5267 ++skip_pages;
5268 }
5269
5270 /* ignore any pages at or after end */
5271 while (skip_pages_count > 0 && skip_pages[skip_pages_count - 1] >= end) {
5272 --skip_pages_count;
5273 }
5274
5275 avail_page_count -= skip_pages_count;
5276 }
5277 #endif /* defined(__arm64__) */
5278 need_ram_ranges_init = false;
5279 }
5280
5281 unsigned int
5282 pmap_free_pages(
5283 void)
5284 {
5285 if (need_ram_ranges_init) {
5286 initialize_ram_ranges();
5287 }
5288 return avail_page_count;
5289 }
5290
5291 unsigned int
5292 pmap_free_pages_span(
5293 void)
5294 {
5295 if (need_ram_ranges_init) {
5296 initialize_ram_ranges();
5297 }
5298 return (unsigned int)atop(avail_end - first_avail);
5299 }
5300
5301
5302 boolean_t
5303 pmap_next_page_hi(
5304 ppnum_t * pnum,
5305 __unused boolean_t might_free)
5306 {
5307 return pmap_next_page(pnum);
5308 }
5309
5310
5311 boolean_t
5312 pmap_next_page(
5313 ppnum_t *pnum)
5314 {
5315 if (need_ram_ranges_init) {
5316 initialize_ram_ranges();
5317 }
5318
5319 #if defined(__arm64__)
5320 /*
5321 * Skip over any known bad pages.
5322 */
5323 while (skip_pages_count > 0 && first_avail == skip_pages[0]) {
5324 first_avail += PAGE_SIZE;
5325 ++skip_pages;
5326 --skip_pages_count;
5327 }
5328 #endif /* defined(__arm64__) */
5329
5330 if (first_avail != avail_end) {
5331 *pnum = (ppnum_t)atop(first_avail);
5332 first_avail += PAGE_SIZE;
5333 assert(avail_page_count > 0);
5334 --avail_page_count;
5335 return TRUE;
5336 }
5337 assert(avail_page_count == 0);
5338 return FALSE;
5339 }
5340
5341 void
5342 pmap_retire_page(
5343 __unused ppnum_t pnum)
5344 {
5345 /* XXX Justin TBD - mark the page as unusable in pmap data structures */
5346 }
5347
5348
5349 /*
5350 * Initialize the pmap module.
5351 * Called by vm_init, to initialize any structures that the pmap
5352 * system needs to map virtual memory.
5353 */
5354 void
5355 pmap_init(
5356 void)
5357 {
5358 /*
5359 * Protect page zero in the kernel map.
5360 * (can be overruled by permanent transltion
5361 * table entries at page zero - see arm_vm_init).
5362 */
5363 vm_protect(kernel_map, 0, PAGE_SIZE, TRUE, VM_PROT_NONE);
5364
5365 pmap_initialized = TRUE;
5366
5367 /*
5368 * Create the zone of physical maps
5369 * and the physical-to-virtual entries.
5370 */
5371 pmap_zone = zone_create_ext("pmap", sizeof(struct pmap),
5372 ZC_ZFREE_CLEARMEM, ZONE_ID_PMAP, NULL);
5373
5374
5375 /*
5376 * Initialize the pmap object (for tracking the vm_page_t
5377 * structures for pages we allocate to be page tables in
5378 * pmap_expand().
5379 */
5380 _vm_object_allocate(mem_size, pmap_object);
5381 pmap_object->copy_strategy = MEMORY_OBJECT_COPY_NONE;
5382
5383 /*
5384 * The values of [hard_]maxproc may have been scaled, make sure
5385 * they are still less than the value of pmap_max_asids.
5386 */
5387 if ((uint32_t)maxproc > pmap_max_asids) {
5388 maxproc = pmap_max_asids;
5389 }
5390 if ((uint32_t)hard_maxproc > pmap_max_asids) {
5391 hard_maxproc = pmap_max_asids;
5392 }
5393
5394 #if CONFIG_PGTRACE
5395 pmap_pgtrace_init();
5396 #endif
5397 }
5398
5399 boolean_t
5400 pmap_verify_free(
5401 ppnum_t ppnum)
5402 {
5403 pv_entry_t **pv_h;
5404 int pai;
5405 pmap_paddr_t phys = ptoa(ppnum);
5406
5407 assert(phys != vm_page_fictitious_addr);
5408
5409 if (!pa_valid(phys)) {
5410 return FALSE;
5411 }
5412
5413 pai = (int)pa_index(phys);
5414 pv_h = pai_to_pvh(pai);
5415
5416 return pvh_test_type(pv_h, PVH_TYPE_NULL);
5417 }
5418
5419 #if MACH_ASSERT
5420 void
5421 pmap_assert_free(ppnum_t ppnum)
5422 {
5423 assertf(pmap_verify_free(ppnum), "page = 0x%x", ppnum);
5424 (void)ppnum;
5425 }
5426 #endif
5427
5428
5429 #if XNU_MONITOR
5430 MARK_AS_PMAP_TEXT static void
5431 pmap_ledger_alloc_init_internal(size_t size)
5432 {
5433 pmap_simple_lock(&pmap_ledger_lock);
5434
5435 if (pmap_ledger_alloc_initialized) {
5436 panic("%s: already initialized, "
5437 "size=%lu",
5438 __func__,
5439 size);
5440 }
5441
5442 if ((size > sizeof(pmap_ledger_data_t)) ||
5443 ((sizeof(pmap_ledger_data_t) - size) % sizeof(struct ledger_entry))) {
5444 panic("%s: size mismatch, expected %lu, "
5445 "size=%lu",
5446 __func__, PMAP_LEDGER_DATA_BYTES,
5447 size);
5448 }
5449
5450 pmap_ledger_alloc_initialized = true;
5451
5452 pmap_simple_unlock(&pmap_ledger_lock);
5453 }
5454
5455 MARK_AS_PMAP_TEXT static ledger_t
5456 pmap_ledger_alloc_internal(void)
5457 {
5458 pmap_paddr_t paddr;
5459 uint64_t vaddr, vstart, vend;
5460 uint64_t index;
5461
5462 ledger_t new_ledger;
5463 uint64_t array_index;
5464
5465 pmap_simple_lock(&pmap_ledger_lock);
5466 if (pmap_ledger_free_list == NULL) {
5467 paddr = pmap_get_free_ppl_page();
5468
5469 if (paddr == 0) {
5470 pmap_simple_unlock(&pmap_ledger_lock);
5471 return NULL;
5472 }
5473
5474 vstart = phystokv(paddr);
5475 vend = vstart + PAGE_SIZE;
5476
5477 for (vaddr = vstart; (vaddr < vend) && ((vaddr + sizeof(pmap_ledger_t)) <= vend); vaddr += sizeof(pmap_ledger_t)) {
5478 pmap_ledger_t *free_ledger;
5479
5480 index = pmap_ledger_ptr_array_free_index++;
5481
5482 if (index >= MAX_PMAP_LEDGERS) {
5483 panic("%s: pmap_ledger_ptr_array is full, index=%llu",
5484 __func__, index);
5485 }
5486
5487 free_ledger = (pmap_ledger_t*)vaddr;
5488
5489 pmap_ledger_ptr_array[index] = free_ledger;
5490 free_ledger->back_ptr = &pmap_ledger_ptr_array[index];
5491
5492 free_ledger->next = pmap_ledger_free_list;
5493 pmap_ledger_free_list = free_ledger;
5494 }
5495
5496 pa_set_range_xprr_perm(paddr, paddr + PAGE_SIZE, XPRR_PPL_RW_PERM, XPRR_KERN_RW_PERM);
5497 }
5498
5499 new_ledger = (ledger_t)pmap_ledger_free_list;
5500 pmap_ledger_free_list = pmap_ledger_free_list->next;
5501
5502 array_index = pmap_ledger_validate(new_ledger);
5503 os_ref_init(&pmap_ledger_refcnt[array_index], NULL);
5504
5505 pmap_simple_unlock(&pmap_ledger_lock);
5506
5507 return new_ledger;
5508 }
5509
5510 MARK_AS_PMAP_TEXT static void
5511 pmap_ledger_free_internal(ledger_t ledger)
5512 {
5513 pmap_ledger_t* free_ledger;
5514
5515 free_ledger = (pmap_ledger_t*)ledger;
5516
5517 pmap_simple_lock(&pmap_ledger_lock);
5518 uint64_t array_index = pmap_ledger_validate(ledger);
5519
5520 if (os_ref_release(&pmap_ledger_refcnt[array_index]) != 0) {
5521 panic("%s: ledger still referenced, "
5522 "ledger=%p",
5523 __func__,
5524 ledger);
5525 }
5526
5527 free_ledger->next = pmap_ledger_free_list;
5528 pmap_ledger_free_list = free_ledger;
5529 pmap_simple_unlock(&pmap_ledger_lock);
5530 }
5531
5532
5533 static void
5534 pmap_ledger_retain(ledger_t ledger)
5535 {
5536 pmap_simple_lock(&pmap_ledger_lock);
5537 uint64_t array_index = pmap_ledger_validate(ledger);
5538 os_ref_retain(&pmap_ledger_refcnt[array_index]);
5539 pmap_simple_unlock(&pmap_ledger_lock);
5540 }
5541
5542 static void
5543 pmap_ledger_release(ledger_t ledger)
5544 {
5545 pmap_simple_lock(&pmap_ledger_lock);
5546 uint64_t array_index = pmap_ledger_validate(ledger);
5547 os_ref_release_live(&pmap_ledger_refcnt[array_index]);
5548 pmap_simple_unlock(&pmap_ledger_lock);
5549 }
5550
5551 void
5552 pmap_ledger_alloc_init(size_t size)
5553 {
5554 pmap_ledger_alloc_init_ppl(size);
5555 }
5556
5557 ledger_t
5558 pmap_ledger_alloc(void)
5559 {
5560 ledger_t retval = NULL;
5561
5562 while ((retval = pmap_ledger_alloc_ppl()) == NULL) {
5563 pmap_alloc_page_for_ppl(0);
5564 }
5565
5566 return retval;
5567 }
5568
5569 void
5570 pmap_ledger_free(ledger_t ledger)
5571 {
5572 pmap_ledger_free_ppl(ledger);
5573 }
5574 #else /* XNU_MONITOR */
5575 __dead2
5576 void
5577 pmap_ledger_alloc_init(size_t size)
5578 {
5579 panic("%s: unsupported, "
5580 "size=%lu",
5581 __func__, size);
5582 }
5583
5584 __dead2
5585 ledger_t
5586 pmap_ledger_alloc(void)
5587 {
5588 panic("%s: unsupported",
5589 __func__);
5590 }
5591
5592 __dead2
5593 void
5594 pmap_ledger_free(ledger_t ledger)
5595 {
5596 panic("%s: unsupported, "
5597 "ledger=%p",
5598 __func__, ledger);
5599 }
5600 #endif /* XNU_MONITOR */
5601
5602 static vm_size_t
5603 pmap_root_alloc_size(pmap_t pmap)
5604 {
5605 #if (__ARM_VMSA__ > 7)
5606 #pragma unused(pmap)
5607 const pt_attr_t * const pt_attr = pmap_get_pt_attr(pmap);
5608 unsigned int root_level = pt_attr_root_level(pt_attr);
5609 return ((pt_attr_ln_index_mask(pt_attr, root_level) >> pt_attr_ln_shift(pt_attr, root_level)) + 1) * sizeof(tt_entry_t);
5610 #else
5611 (void)pmap;
5612 return PMAP_ROOT_ALLOC_SIZE;
5613 #endif
5614 }
5615
5616 /*
5617 * Create and return a physical map.
5618 *
5619 * If the size specified for the map
5620 * is zero, the map is an actual physical
5621 * map, and may be referenced by the
5622 * hardware.
5623 *
5624 * If the size specified is non-zero,
5625 * the map will be used in software only, and
5626 * is bounded by that size.
5627 */
5628 MARK_AS_PMAP_TEXT static pmap_t
5629 pmap_create_options_internal(
5630 ledger_t ledger,
5631 vm_map_size_t size,
5632 unsigned int flags,
5633 kern_return_t *kr)
5634 {
5635 unsigned i;
5636 unsigned tte_index_max;
5637 pmap_t p;
5638 bool is_64bit = flags & PMAP_CREATE_64BIT;
5639 #if defined(HAS_APPLE_PAC)
5640 bool disable_jop = flags & PMAP_CREATE_DISABLE_JOP;
5641 #endif /* defined(HAS_APPLE_PAC) */
5642 kern_return_t local_kr = KERN_SUCCESS;
5643
5644 /*
5645 * A software use-only map doesn't even need a pmap.
5646 */
5647 if (size != 0) {
5648 return PMAP_NULL;
5649 }
5650
5651 if (0 != (flags & ~PMAP_CREATE_KNOWN_FLAGS)) {
5652 return PMAP_NULL;
5653 }
5654
5655 #if XNU_MONITOR
5656 if ((p = pmap_alloc_pmap()) == PMAP_NULL) {
5657 local_kr = KERN_NO_SPACE;
5658 goto pmap_create_fail;
5659 }
5660
5661 if (ledger) {
5662 pmap_ledger_validate(ledger);
5663 pmap_ledger_retain(ledger);
5664 }
5665 #else
5666 /*
5667 * Allocate a pmap struct from the pmap_zone. Then allocate
5668 * the translation table of the right size for the pmap.
5669 */
5670 if ((p = (pmap_t) zalloc(pmap_zone)) == PMAP_NULL) {
5671 local_kr = KERN_RESOURCE_SHORTAGE;
5672 goto pmap_create_fail;
5673 }
5674 #endif
5675
5676 p->ledger = ledger;
5677
5678
5679 p->pmap_vm_map_cs_enforced = false;
5680
5681 if (flags & PMAP_CREATE_64BIT) {
5682 p->min = MACH_VM_MIN_ADDRESS;
5683 p->max = MACH_VM_MAX_ADDRESS;
5684 } else {
5685 p->min = VM_MIN_ADDRESS;
5686 p->max = VM_MAX_ADDRESS;
5687 }
5688 #if defined(HAS_APPLE_PAC)
5689 p->disable_jop = disable_jop;
5690 #endif /* defined(HAS_APPLE_PAC) */
5691
5692 p->nested_region_true_start = 0;
5693 p->nested_region_true_end = ~0;
5694
5695 os_atomic_init(&p->ref_count, 1);
5696 p->gc_status = 0;
5697 p->stamp = os_atomic_inc(&pmap_stamp, relaxed);
5698 p->nx_enabled = TRUE;
5699 p->is_64bit = is_64bit;
5700 p->nested = FALSE;
5701 p->nested_pmap = PMAP_NULL;
5702
5703 #if ARM_PARAMETERIZED_PMAP
5704 /* Default to the native pt_attr */
5705 p->pmap_pt_attr = native_pt_attr;
5706 #endif /* ARM_PARAMETERIZED_PMAP */
5707 #if __ARM_MIXED_PAGE_SIZE__
5708 if (flags & PMAP_CREATE_FORCE_4K_PAGES) {
5709 p->pmap_pt_attr = &pmap_pt_attr_4k;
5710 }
5711 #endif /* __ARM_MIXED_PAGE_SIZE__ */
5712
5713 if (!pmap_get_pt_ops(p)->alloc_id(p)) {
5714 local_kr = KERN_NO_SPACE;
5715 goto id_alloc_fail;
5716 }
5717
5718 pmap_lock_init(p);
5719 memset((void *) &p->stats, 0, sizeof(p->stats));
5720
5721 p->tt_entry_free = (tt_entry_t *)0;
5722 tte_index_max = ((unsigned)pmap_root_alloc_size(p) / sizeof(tt_entry_t));
5723
5724 #if (__ARM_VMSA__ == 7)
5725 p->tte_index_max = tte_index_max;
5726 #endif
5727
5728 #if XNU_MONITOR
5729 p->tte = pmap_tt1_allocate(p, pmap_root_alloc_size(p), PMAP_TT_ALLOCATE_NOWAIT);
5730 #else
5731 p->tte = pmap_tt1_allocate(p, pmap_root_alloc_size(p), 0);
5732 #endif
5733 if (!(p->tte)) {
5734 local_kr = KERN_RESOURCE_SHORTAGE;
5735 goto tt1_alloc_fail;
5736 }
5737
5738 p->ttep = ml_static_vtop((vm_offset_t)p->tte);
5739 PMAP_TRACE(4, PMAP_CODE(PMAP__TTE), VM_KERNEL_ADDRHIDE(p), VM_KERNEL_ADDRHIDE(p->min), VM_KERNEL_ADDRHIDE(p->max), p->ttep);
5740
5741 /* nullify the translation table */
5742 for (i = 0; i < tte_index_max; i++) {
5743 p->tte[i] = ARM_TTE_TYPE_FAULT;
5744 }
5745
5746 FLUSH_PTE_RANGE(p->tte, p->tte + tte_index_max);
5747
5748 /*
5749 * initialize the rest of the structure
5750 */
5751 p->nested_region_addr = 0x0ULL;
5752 p->nested_region_size = 0x0ULL;
5753 p->nested_region_asid_bitmap = NULL;
5754 p->nested_region_asid_bitmap_size = 0x0UL;
5755
5756 p->nested_has_no_bounds_ref = false;
5757 p->nested_no_bounds_refcnt = 0;
5758 p->nested_bounds_set = false;
5759
5760
5761 #if MACH_ASSERT
5762 p->pmap_stats_assert = TRUE;
5763 p->pmap_pid = 0;
5764 strlcpy(p->pmap_procname, "<nil>", sizeof(p->pmap_procname));
5765 #endif /* MACH_ASSERT */
5766 #if DEVELOPMENT || DEBUG
5767 p->footprint_was_suspended = FALSE;
5768 #endif /* DEVELOPMENT || DEBUG */
5769
5770 pmap_simple_lock(&pmaps_lock);
5771 queue_enter(&map_pmap_list, p, pmap_t, pmaps);
5772 pmap_simple_unlock(&pmaps_lock);
5773
5774 return p;
5775
5776 tt1_alloc_fail:
5777 pmap_get_pt_ops(p)->free_id(p);
5778 id_alloc_fail:
5779 #if XNU_MONITOR
5780 pmap_free_pmap(p);
5781
5782 if (ledger) {
5783 pmap_ledger_release(ledger);
5784 }
5785 #else
5786 zfree(pmap_zone, p);
5787 #endif
5788 pmap_create_fail:
5789 #if XNU_MONITOR
5790 pmap_pin_kernel_pages((vm_offset_t)kr, sizeof(*kr));
5791 #endif
5792 *kr = local_kr;
5793 #if XNU_MONITOR
5794 pmap_unpin_kernel_pages((vm_offset_t)kr, sizeof(*kr));
5795 #endif
5796 return PMAP_NULL;
5797 }
5798
5799 pmap_t
5800 pmap_create_options(
5801 ledger_t ledger,
5802 vm_map_size_t size,
5803 unsigned int flags)
5804 {
5805 pmap_t pmap;
5806 kern_return_t kr = KERN_SUCCESS;
5807
5808 PMAP_TRACE(1, PMAP_CODE(PMAP__CREATE) | DBG_FUNC_START, size, flags);
5809
5810 ledger_reference(ledger);
5811
5812 #if XNU_MONITOR
5813 for (;;) {
5814 pmap = pmap_create_options_ppl(ledger, size, flags, &kr);
5815 if (kr != KERN_RESOURCE_SHORTAGE) {
5816 break;
5817 }
5818 assert(pmap == PMAP_NULL);
5819 pmap_alloc_page_for_ppl(0);
5820 kr = KERN_SUCCESS;
5821 }
5822 #else
5823 pmap = pmap_create_options_internal(ledger, size, flags, &kr);
5824 #endif
5825
5826 if (pmap == PMAP_NULL) {
5827 ledger_dereference(ledger);
5828 }
5829
5830 PMAP_TRACE(1, PMAP_CODE(PMAP__CREATE) | DBG_FUNC_END, VM_KERNEL_ADDRHIDE(pmap), PMAP_VASID(pmap), pmap->hw_asid);
5831
5832 return pmap;
5833 }
5834
5835 #if XNU_MONITOR
5836 /*
5837 * This symbol remains in place when the PPL is enabled so that the dispatch
5838 * table does not change from development to release configurations.
5839 */
5840 #endif
5841 #if MACH_ASSERT || XNU_MONITOR
5842 MARK_AS_PMAP_TEXT static void
5843 pmap_set_process_internal(
5844 __unused pmap_t pmap,
5845 __unused int pid,
5846 __unused char *procname)
5847 {
5848 #if MACH_ASSERT
5849 if (pmap == NULL) {
5850 return;
5851 }
5852
5853 VALIDATE_PMAP(pmap);
5854
5855 pmap->pmap_pid = pid;
5856 strlcpy(pmap->pmap_procname, procname, sizeof(pmap->pmap_procname));
5857 if (pmap_ledgers_panic_leeway) {
5858 /*
5859 * XXX FBDP
5860 * Some processes somehow trigger some issues that make
5861 * the pmap stats and ledgers go off track, causing
5862 * some assertion failures and ledger panics.
5863 * Turn off the sanity checks if we allow some ledger leeway
5864 * because of that. We'll still do a final check in
5865 * pmap_check_ledgers() for discrepancies larger than the
5866 * allowed leeway after the address space has been fully
5867 * cleaned up.
5868 */
5869 pmap->pmap_stats_assert = FALSE;
5870 ledger_disable_panic_on_negative(pmap->ledger,
5871 task_ledgers.phys_footprint);
5872 ledger_disable_panic_on_negative(pmap->ledger,
5873 task_ledgers.internal);
5874 ledger_disable_panic_on_negative(pmap->ledger,
5875 task_ledgers.internal_compressed);
5876 ledger_disable_panic_on_negative(pmap->ledger,
5877 task_ledgers.iokit_mapped);
5878 ledger_disable_panic_on_negative(pmap->ledger,
5879 task_ledgers.alternate_accounting);
5880 ledger_disable_panic_on_negative(pmap->ledger,
5881 task_ledgers.alternate_accounting_compressed);
5882 }
5883 #endif /* MACH_ASSERT */
5884 }
5885 #endif /* MACH_ASSERT || XNU_MONITOR */
5886
5887 #if MACH_ASSERT
5888 void
5889 pmap_set_process(
5890 pmap_t pmap,
5891 int pid,
5892 char *procname)
5893 {
5894 #if XNU_MONITOR
5895 pmap_set_process_ppl(pmap, pid, procname);
5896 #else
5897 pmap_set_process_internal(pmap, pid, procname);
5898 #endif
5899 }
5900 #endif /* MACH_ASSERT */
5901
5902 #if (__ARM_VMSA__ > 7)
5903 /*
5904 * pmap_deallocate_all_leaf_tts:
5905 *
5906 * Recursive function for deallocating all leaf TTEs. Walks the given TT,
5907 * removing and deallocating all TTEs.
5908 */
5909 MARK_AS_PMAP_TEXT static void
5910 pmap_deallocate_all_leaf_tts(pmap_t pmap, tt_entry_t * first_ttep, unsigned level)
5911 {
5912 tt_entry_t tte = ARM_TTE_EMPTY;
5913 tt_entry_t * ttep = NULL;
5914 tt_entry_t * last_ttep = NULL;
5915
5916 const pt_attr_t * const pt_attr = pmap_get_pt_attr(pmap);
5917
5918 assert(level < pt_attr_leaf_level(pt_attr));
5919
5920 last_ttep = &first_ttep[ttn_index(pmap, pt_attr, ~0, level)];
5921
5922 for (ttep = first_ttep; ttep <= last_ttep; ttep++) {
5923 tte = *ttep;
5924
5925 if (!(tte & ARM_TTE_VALID)) {
5926 continue;
5927 }
5928
5929 if ((tte & ARM_TTE_TYPE_MASK) == ARM_TTE_TYPE_BLOCK) {
5930 panic("%s: found block mapping, ttep=%p, tte=%p, "
5931 "pmap=%p, first_ttep=%p, level=%u",
5932 __FUNCTION__, ttep, (void *)tte,
5933 pmap, first_ttep, level);
5934 }
5935
5936 /* Must be valid, type table */
5937 if (level < pt_attr_twig_level(pt_attr)) {
5938 /* If we haven't reached the twig level, recurse to the next level. */
5939 pmap_deallocate_all_leaf_tts(pmap, (tt_entry_t *)phystokv((tte) & ARM_TTE_TABLE_MASK), level + 1);
5940 }
5941
5942 /* Remove the TTE. */
5943 pmap_lock(pmap);
5944 pmap_tte_deallocate(pmap, 0, 0, false, ttep, level);
5945 pmap_unlock(pmap);
5946 }
5947 }
5948 #endif /* (__ARM_VMSA__ > 7) */
5949
5950 /*
5951 * We maintain stats and ledgers so that a task's physical footprint is:
5952 * phys_footprint = ((internal - alternate_accounting)
5953 * + (internal_compressed - alternate_accounting_compressed)
5954 * + iokit_mapped
5955 * + purgeable_nonvolatile
5956 * + purgeable_nonvolatile_compressed
5957 * + page_table)
5958 * where "alternate_accounting" includes "iokit" and "purgeable" memory.
5959 */
5960
5961 /*
5962 * Retire the given physical map from service.
5963 * Should only be called if the map contains
5964 * no valid mappings.
5965 */
5966 MARK_AS_PMAP_TEXT static void
5967 pmap_destroy_internal(
5968 pmap_t pmap)
5969 {
5970 if (pmap == PMAP_NULL) {
5971 return;
5972 }
5973
5974 VALIDATE_PMAP(pmap);
5975
5976 __unused const pt_attr_t * const pt_attr = pmap_get_pt_attr(pmap);
5977
5978 int32_t ref_count = os_atomic_dec(&pmap->ref_count, relaxed);
5979 if (ref_count > 0) {
5980 return;
5981 } else if (ref_count < 0) {
5982 panic("pmap %p: refcount underflow", pmap);
5983 } else if (pmap == kernel_pmap) {
5984 panic("pmap %p: attempt to destroy kernel pmap", pmap);
5985 }
5986
5987 #if (__ARM_VMSA__ > 7)
5988 pmap_unmap_sharedpage(pmap);
5989 #endif /* (__ARM_VMSA__ > 7) */
5990
5991 pmap_simple_lock(&pmaps_lock);
5992 while (pmap->gc_status & PMAP_GC_INFLIGHT) {
5993 pmap->gc_status |= PMAP_GC_WAIT;
5994 assert_wait((event_t) &pmap->gc_status, THREAD_UNINT);
5995 pmap_simple_unlock(&pmaps_lock);
5996 (void) thread_block(THREAD_CONTINUE_NULL);
5997 pmap_simple_lock(&pmaps_lock);
5998 }
5999 queue_remove(&map_pmap_list, pmap, pmap_t, pmaps);
6000 pmap_simple_unlock(&pmaps_lock);
6001
6002 pmap_trim_self(pmap);
6003
6004 /*
6005 * Free the memory maps, then the
6006 * pmap structure.
6007 */
6008 #if (__ARM_VMSA__ == 7)
6009 unsigned int i = 0;
6010 pt_entry_t *ttep;
6011
6012 pmap_lock(pmap);
6013 for (i = 0; i < pmap->tte_index_max; i++) {
6014 ttep = &pmap->tte[i];
6015 if ((*ttep & ARM_TTE_TYPE_MASK) == ARM_TTE_TYPE_TABLE) {
6016 pmap_tte_deallocate(pmap, 0, 0, false, ttep, PMAP_TT_L1_LEVEL);
6017 }
6018 }
6019 pmap_unlock(pmap);
6020 #else /* (__ARM_VMSA__ == 7) */
6021 pmap_deallocate_all_leaf_tts(pmap, pmap->tte, pt_attr_root_level(pt_attr));
6022 #endif /* (__ARM_VMSA__ == 7) */
6023
6024
6025
6026 if (pmap->tte) {
6027 #if (__ARM_VMSA__ == 7)
6028 pmap_tt1_deallocate(pmap, pmap->tte, pmap->tte_index_max * sizeof(tt_entry_t), 0);
6029 pmap->tte_index_max = 0;
6030 #else /* (__ARM_VMSA__ == 7) */
6031 pmap_tt1_deallocate(pmap, pmap->tte, pmap_root_alloc_size(pmap), 0);
6032 #endif /* (__ARM_VMSA__ == 7) */
6033 pmap->tte = (tt_entry_t *) NULL;
6034 pmap->ttep = 0;
6035 }
6036
6037 assert((tt_free_entry_t*)pmap->tt_entry_free == NULL);
6038
6039 if (__improbable(pmap->nested)) {
6040 pmap_get_pt_ops(pmap)->flush_tlb_region_async(pmap->nested_region_addr, pmap->nested_region_size, pmap);
6041 sync_tlb_flush();
6042 } else {
6043 pmap_get_pt_ops(pmap)->flush_tlb_async(pmap);
6044 sync_tlb_flush();
6045 /* return its asid to the pool */
6046 pmap_get_pt_ops(pmap)->free_id(pmap);
6047 /* release the reference we hold on the nested pmap */
6048 pmap_destroy_internal(pmap->nested_pmap);
6049 }
6050
6051 pmap_check_ledgers(pmap);
6052
6053 if (pmap->nested_region_asid_bitmap) {
6054 #if XNU_MONITOR
6055 pmap_pages_free(kvtophys((vm_offset_t)(pmap->nested_region_asid_bitmap)), PAGE_SIZE);
6056 #else
6057 kheap_free(KHEAP_DATA_BUFFERS, pmap->nested_region_asid_bitmap,
6058 pmap->nested_region_asid_bitmap_size * sizeof(unsigned int));
6059 #endif
6060 }
6061
6062 #if XNU_MONITOR
6063 if (pmap->ledger) {
6064 pmap_ledger_release(pmap->ledger);
6065 }
6066
6067 pmap_lock_destroy(pmap);
6068 pmap_free_pmap(pmap);
6069 #else
6070 pmap_lock_destroy(pmap);
6071 zfree(pmap_zone, pmap);
6072 #endif
6073 }
6074
6075 void
6076 pmap_destroy(
6077 pmap_t pmap)
6078 {
6079 ledger_t ledger;
6080
6081 PMAP_TRACE(1, PMAP_CODE(PMAP__DESTROY) | DBG_FUNC_START, VM_KERNEL_ADDRHIDE(pmap), PMAP_VASID(pmap), pmap->hw_asid);
6082
6083 ledger = pmap->ledger;
6084
6085 #if XNU_MONITOR
6086 pmap_destroy_ppl(pmap);
6087
6088 pmap_check_ledger_fields(ledger);
6089 #else
6090 pmap_destroy_internal(pmap);
6091 #endif
6092
6093 ledger_dereference(ledger);
6094
6095 PMAP_TRACE(1, PMAP_CODE(PMAP__DESTROY) | DBG_FUNC_END);
6096 }
6097
6098
6099 /*
6100 * Add a reference to the specified pmap.
6101 */
6102 MARK_AS_PMAP_TEXT static void
6103 pmap_reference_internal(
6104 pmap_t pmap)
6105 {
6106 if (pmap != PMAP_NULL) {
6107 VALIDATE_PMAP(pmap);
6108 os_atomic_inc(&pmap->ref_count, relaxed);
6109 }
6110 }
6111
6112 void
6113 pmap_reference(
6114 pmap_t pmap)
6115 {
6116 #if XNU_MONITOR
6117 pmap_reference_ppl(pmap);
6118 #else
6119 pmap_reference_internal(pmap);
6120 #endif
6121 }
6122
6123 static tt_entry_t *
6124 pmap_tt1_allocate(
6125 pmap_t pmap,
6126 vm_size_t size,
6127 unsigned option)
6128 {
6129 tt_entry_t *tt1 = NULL;
6130 tt_free_entry_t *tt1_free;
6131 pmap_paddr_t pa;
6132 vm_address_t va;
6133 vm_address_t va_end;
6134 kern_return_t ret;
6135
6136 if ((size < PAGE_SIZE) && (size != PMAP_ROOT_ALLOC_SIZE)) {
6137 size = PAGE_SIZE;
6138 }
6139
6140 pmap_simple_lock(&tt1_lock);
6141 if ((size == PAGE_SIZE) && (free_page_size_tt_count != 0)) {
6142 free_page_size_tt_count--;
6143 tt1 = (tt_entry_t *)free_page_size_tt_list;
6144 free_page_size_tt_list = ((tt_free_entry_t *)tt1)->next;
6145 } else if ((size == 2 * PAGE_SIZE) && (free_two_page_size_tt_count != 0)) {
6146 free_two_page_size_tt_count--;
6147 tt1 = (tt_entry_t *)free_two_page_size_tt_list;
6148 free_two_page_size_tt_list = ((tt_free_entry_t *)tt1)->next;
6149 } else if ((size < PAGE_SIZE) && (free_tt_count != 0)) {
6150 free_tt_count--;
6151 tt1 = (tt_entry_t *)free_tt_list;
6152 free_tt_list = (tt_free_entry_t *)((tt_free_entry_t *)tt1)->next;
6153 }
6154
6155 pmap_simple_unlock(&tt1_lock);
6156
6157 if (tt1 != NULL) {
6158 pmap_tt_ledger_credit(pmap, size);
6159 return (tt_entry_t *)tt1;
6160 }
6161
6162 ret = pmap_pages_alloc_zeroed(&pa, (unsigned)((size < PAGE_SIZE)? PAGE_SIZE : size), ((option & PMAP_TT_ALLOCATE_NOWAIT)? PMAP_PAGES_ALLOCATE_NOWAIT : 0));
6163
6164 if (ret == KERN_RESOURCE_SHORTAGE) {
6165 return (tt_entry_t *)0;
6166 }
6167
6168 #if XNU_MONITOR
6169 assert(pa);
6170 #endif
6171
6172 if (size < PAGE_SIZE) {
6173 va = phystokv(pa) + size;
6174 tt_free_entry_t *local_free_list = (tt_free_entry_t*)va;
6175 tt_free_entry_t *next_free = NULL;
6176 for (va_end = phystokv(pa) + PAGE_SIZE; va < va_end; va = va + size) {
6177 tt1_free = (tt_free_entry_t *)va;
6178 tt1_free->next = next_free;
6179 next_free = tt1_free;
6180 }
6181 pmap_simple_lock(&tt1_lock);
6182 local_free_list->next = free_tt_list;
6183 free_tt_list = next_free;
6184 free_tt_count += ((PAGE_SIZE / size) - 1);
6185 if (free_tt_count > free_tt_max) {
6186 free_tt_max = free_tt_count;
6187 }
6188 pmap_simple_unlock(&tt1_lock);
6189 }
6190
6191 /* Always report root allocations in units of PMAP_ROOT_ALLOC_SIZE, which can be obtained by sysctl arm_pt_root_size.
6192 * Depending on the device, this can vary between 512b and 16K. */
6193 OSAddAtomic((uint32_t)(size / PMAP_ROOT_ALLOC_SIZE), (pmap == kernel_pmap ? &inuse_kernel_tteroot_count : &inuse_user_tteroot_count));
6194 OSAddAtomic64(size / PMAP_ROOT_ALLOC_SIZE, &alloc_tteroot_count);
6195 pmap_tt_ledger_credit(pmap, size);
6196
6197 return (tt_entry_t *) phystokv(pa);
6198 }
6199
6200 static void
6201 pmap_tt1_deallocate(
6202 pmap_t pmap,
6203 tt_entry_t *tt,
6204 vm_size_t size,
6205 unsigned option)
6206 {
6207 tt_free_entry_t *tt_entry;
6208
6209 if ((size < PAGE_SIZE) && (size != PMAP_ROOT_ALLOC_SIZE)) {
6210 size = PAGE_SIZE;
6211 }
6212
6213 tt_entry = (tt_free_entry_t *)tt;
6214 assert(not_in_kdp);
6215 pmap_simple_lock(&tt1_lock);
6216
6217 if (size < PAGE_SIZE) {
6218 free_tt_count++;
6219 if (free_tt_count > free_tt_max) {
6220 free_tt_max = free_tt_count;
6221 }
6222 tt_entry->next = free_tt_list;
6223 free_tt_list = tt_entry;
6224 }
6225
6226 if (size == PAGE_SIZE) {
6227 free_page_size_tt_count++;
6228 if (free_page_size_tt_count > free_page_size_tt_max) {
6229 free_page_size_tt_max = free_page_size_tt_count;
6230 }
6231 tt_entry->next = free_page_size_tt_list;
6232 free_page_size_tt_list = tt_entry;
6233 }
6234
6235 if (size == 2 * PAGE_SIZE) {
6236 free_two_page_size_tt_count++;
6237 if (free_two_page_size_tt_count > free_two_page_size_tt_max) {
6238 free_two_page_size_tt_max = free_two_page_size_tt_count;
6239 }
6240 tt_entry->next = free_two_page_size_tt_list;
6241 free_two_page_size_tt_list = tt_entry;
6242 }
6243
6244 if (option & PMAP_TT_DEALLOCATE_NOBLOCK) {
6245 pmap_simple_unlock(&tt1_lock);
6246 pmap_tt_ledger_debit(pmap, size);
6247 return;
6248 }
6249
6250 while (free_page_size_tt_count > FREE_PAGE_SIZE_TT_MAX) {
6251 free_page_size_tt_count--;
6252 tt = (tt_entry_t *)free_page_size_tt_list;
6253 free_page_size_tt_list = ((tt_free_entry_t *)tt)->next;
6254
6255 pmap_simple_unlock(&tt1_lock);
6256
6257 pmap_pages_free(ml_static_vtop((vm_offset_t)tt), PAGE_SIZE);
6258
6259 OSAddAtomic(-(int32_t)(PAGE_SIZE / PMAP_ROOT_ALLOC_SIZE), (pmap == kernel_pmap ? &inuse_kernel_tteroot_count : &inuse_user_tteroot_count));
6260
6261 pmap_simple_lock(&tt1_lock);
6262 }
6263
6264 while (free_two_page_size_tt_count > FREE_TWO_PAGE_SIZE_TT_MAX) {
6265 free_two_page_size_tt_count--;
6266 tt = (tt_entry_t *)free_two_page_size_tt_list;
6267 free_two_page_size_tt_list = ((tt_free_entry_t *)tt)->next;
6268
6269 pmap_simple_unlock(&tt1_lock);
6270
6271 pmap_pages_free(ml_static_vtop((vm_offset_t)tt), 2 * PAGE_SIZE);
6272
6273 OSAddAtomic(-2 * (int32_t)(PAGE_SIZE / PMAP_ROOT_ALLOC_SIZE), (pmap == kernel_pmap ? &inuse_kernel_tteroot_count : &inuse_user_tteroot_count));
6274
6275 pmap_simple_lock(&tt1_lock);
6276 }
6277 pmap_simple_unlock(&tt1_lock);
6278 pmap_tt_ledger_debit(pmap, size);
6279 }
6280
6281 static kern_return_t
6282 pmap_tt_allocate(
6283 pmap_t pmap,
6284 tt_entry_t **ttp,
6285 unsigned int level,
6286 unsigned int options)
6287 {
6288 pmap_paddr_t pa;
6289 *ttp = NULL;
6290
6291 pmap_lock(pmap);
6292 if ((tt_free_entry_t *)pmap->tt_entry_free != NULL) {
6293 tt_free_entry_t *tt_free_cur, *tt_free_next;
6294
6295 tt_free_cur = ((tt_free_entry_t *)pmap->tt_entry_free);
6296 tt_free_next = tt_free_cur->next;
6297 tt_free_cur->next = NULL;
6298 *ttp = (tt_entry_t *)tt_free_cur;
6299 pmap->tt_entry_free = (tt_entry_t *)tt_free_next;
6300 }
6301 pmap_unlock(pmap);
6302
6303 if (*ttp == NULL) {
6304 pt_desc_t *ptdp;
6305
6306 /*
6307 * Allocate a VM page for the level x page table entries.
6308 */
6309 while (pmap_pages_alloc_zeroed(&pa, PAGE_SIZE, ((options & PMAP_TT_ALLOCATE_NOWAIT)? PMAP_PAGES_ALLOCATE_NOWAIT : 0)) != KERN_SUCCESS) {
6310 if (options & PMAP_OPTIONS_NOWAIT) {
6311 return KERN_RESOURCE_SHORTAGE;
6312 }
6313 VM_PAGE_WAIT();
6314 }
6315
6316 while ((ptdp = ptd_alloc(pmap)) == NULL) {
6317 if (options & PMAP_OPTIONS_NOWAIT) {
6318 pmap_pages_free(pa, PAGE_SIZE);
6319 return KERN_RESOURCE_SHORTAGE;
6320 }
6321 VM_PAGE_WAIT();
6322 }
6323
6324 if (level < pt_attr_leaf_level(pmap_get_pt_attr(pmap))) {
6325 OSAddAtomic64(1, &alloc_ttepages_count);
6326 OSAddAtomic(1, (pmap == kernel_pmap ? &inuse_kernel_ttepages_count : &inuse_user_ttepages_count));
6327 } else {
6328 OSAddAtomic64(1, &alloc_ptepages_count);
6329 OSAddAtomic(1, (pmap == kernel_pmap ? &inuse_kernel_ptepages_count : &inuse_user_ptepages_count));
6330 }
6331
6332 pmap_tt_ledger_credit(pmap, PAGE_SIZE);
6333
6334 PMAP_ZINFO_PALLOC(pmap, PAGE_SIZE);
6335
6336 pvh_update_head_unlocked(pai_to_pvh(pa_index(pa)), ptdp, PVH_TYPE_PTDP);
6337
6338 uint64_t pmap_page_size = pt_attr_page_size(pmap_get_pt_attr(pmap));
6339 if (PAGE_SIZE > pmap_page_size) {
6340 vm_address_t va;
6341 vm_address_t va_end;
6342
6343 pmap_lock(pmap);
6344
6345 for (va_end = phystokv(pa) + PAGE_SIZE, va = phystokv(pa) + pmap_page_size; va < va_end; va = va + pmap_page_size) {
6346 ((tt_free_entry_t *)va)->next = (tt_free_entry_t *)pmap->tt_entry_free;
6347 pmap->tt_entry_free = (tt_entry_t *)va;
6348 }
6349 pmap_unlock(pmap);
6350 }
6351
6352 *ttp = (tt_entry_t *)phystokv(pa);
6353 }
6354
6355 #if XNU_MONITOR
6356 assert(*ttp);
6357 #endif
6358
6359 return KERN_SUCCESS;
6360 }
6361
6362
6363 static void
6364 pmap_tt_deallocate(
6365 pmap_t pmap,
6366 tt_entry_t *ttp,
6367 unsigned int level)
6368 {
6369 pt_desc_t *ptdp;
6370 ptd_info_t *ptd_info;
6371 unsigned pt_acc_cnt;
6372 unsigned i;
6373 vm_offset_t free_page = 0;
6374 const pt_attr_t * const pt_attr = pmap_get_pt_attr(pmap);
6375 unsigned max_pt_index = PAGE_SIZE / pt_attr_page_size(pt_attr);
6376
6377 pmap_lock(pmap);
6378
6379 ptdp = ptep_get_ptd((vm_offset_t)ttp);
6380 ptd_info = ptd_get_info(ptdp, ttp);
6381
6382 ptd_info->va = (vm_offset_t)-1;
6383
6384 if ((level < pt_attr_leaf_level(pt_attr)) && (ptd_info->refcnt == PT_DESC_REFCOUNT)) {
6385 ptd_info->refcnt = 0;
6386 }
6387
6388 if (ptd_info->refcnt != 0) {
6389 panic("pmap_tt_deallocate(): ptdp %p, count %d\n", ptdp, ptd_info->refcnt);
6390 }
6391
6392 ptd_info->refcnt = 0;
6393
6394 for (i = 0, pt_acc_cnt = 0; i < max_pt_index; i++) {
6395 pt_acc_cnt += ptdp->ptd_info[i].refcnt;
6396 }
6397
6398 if (pt_acc_cnt == 0) {
6399 tt_free_entry_t *tt_free_list = (tt_free_entry_t *)&pmap->tt_entry_free;
6400 unsigned pt_free_entry_cnt = 1;
6401
6402 while (pt_free_entry_cnt < max_pt_index && tt_free_list) {
6403 tt_free_entry_t *tt_free_list_next;
6404
6405 tt_free_list_next = tt_free_list->next;
6406 if ((((vm_offset_t)tt_free_list_next) - ((vm_offset_t)ttp & ~PAGE_MASK)) < PAGE_SIZE) {
6407 pt_free_entry_cnt++;
6408 }
6409 tt_free_list = tt_free_list_next;
6410 }
6411 if (pt_free_entry_cnt == max_pt_index) {
6412 tt_free_entry_t *tt_free_list_cur;
6413
6414 free_page = (vm_offset_t)ttp & ~PAGE_MASK;
6415 tt_free_list = (tt_free_entry_t *)&pmap->tt_entry_free;
6416 tt_free_list_cur = (tt_free_entry_t *)&pmap->tt_entry_free;
6417
6418 while (tt_free_list_cur) {
6419 tt_free_entry_t *tt_free_list_next;
6420
6421 tt_free_list_next = tt_free_list_cur->next;
6422 if ((((vm_offset_t)tt_free_list_next) - free_page) < PAGE_SIZE) {
6423 tt_free_list->next = tt_free_list_next->next;
6424 } else {
6425 tt_free_list = tt_free_list_next;
6426 }
6427 tt_free_list_cur = tt_free_list_next;
6428 }
6429 } else {
6430 ((tt_free_entry_t *)ttp)->next = (tt_free_entry_t *)pmap->tt_entry_free;
6431 pmap->tt_entry_free = ttp;
6432 }
6433 } else {
6434 ((tt_free_entry_t *)ttp)->next = (tt_free_entry_t *)pmap->tt_entry_free;
6435 pmap->tt_entry_free = ttp;
6436 }
6437
6438 pmap_unlock(pmap);
6439
6440 if (free_page != 0) {
6441 ptd_deallocate(ptep_get_ptd((vm_offset_t)free_page));
6442 *(pt_desc_t **)pai_to_pvh(pa_index(ml_static_vtop(free_page))) = NULL;
6443 pmap_pages_free(ml_static_vtop(free_page), PAGE_SIZE);
6444 if (level < pt_attr_leaf_level(pt_attr)) {
6445 OSAddAtomic(-1, (pmap == kernel_pmap ? &inuse_kernel_ttepages_count : &inuse_user_ttepages_count));
6446 } else {
6447 OSAddAtomic(-1, (pmap == kernel_pmap ? &inuse_kernel_ptepages_count : &inuse_user_ptepages_count));
6448 }
6449 PMAP_ZINFO_PFREE(pmap, PAGE_SIZE);
6450 pmap_tt_ledger_debit(pmap, PAGE_SIZE);
6451 }
6452 }
6453
6454 /**
6455 * Safely clear out a translation table entry.
6456 *
6457 * @note If the TTE to clear out points to a leaf table, then that leaf table
6458 * must have a refcnt of zero before the TTE can be removed.
6459 *
6460 * @param pmap The pmap containing the page table whose TTE is being removed.
6461 * @param va_start Beginning of the VA range mapped by the table being removed, for TLB maintenance
6462 * @param va_end Non-inclusive end of the VA range mapped by the table being removed, for TLB maintenance
6463 * @param need_strong_sync Indicates whether strong DSB should be used to synchronize TLB maintenance
6464 * @param ttep Pointer to the TTE that should be cleared out.
6465 * @param level The level of the page table that contains the TTE to be removed.
6466 */
6467 static void
6468 pmap_tte_remove(
6469 pmap_t pmap,
6470 vm_offset_t va_start,
6471 vm_offset_t va_end,
6472 bool need_strong_sync,
6473 tt_entry_t *ttep,
6474 unsigned int level)
6475 {
6476 tt_entry_t tte = *ttep;
6477
6478 if (__improbable(tte == 0)) {
6479 panic("%s: null tt_entry ttep==%p", __func__, ttep);
6480 }
6481
6482 if (__improbable((level == pt_attr_twig_level(pmap_get_pt_attr(pmap))) &&
6483 (ptep_get_info((pt_entry_t*)ttetokv(tte))->refcnt != 0))) {
6484 panic("%s: non-zero pagetable refcount: pmap=%p ttep=%p ptd=%p refcnt=0x%x", __func__,
6485 pmap, ttep, tte_get_ptd(tte), ptep_get_info((pt_entry_t*)ttetokv(tte))->refcnt);
6486 }
6487
6488 #if (__ARM_VMSA__ == 7)
6489 {
6490 tt_entry_t *ttep_4M = (tt_entry_t *) ((vm_offset_t)ttep & 0xFFFFFFF0);
6491 unsigned i;
6492
6493 for (i = 0; i < 4; i++, ttep_4M++) {
6494 *ttep_4M = (tt_entry_t) 0;
6495 }
6496 FLUSH_PTE_RANGE_STRONG(ttep_4M - 4, ttep_4M);
6497 }
6498 #else
6499 *ttep = (tt_entry_t) 0;
6500 FLUSH_PTE_STRONG(ttep);
6501 #endif /* (__ARM_VMSA__ == 7) */
6502 // If given a VA range, we're being asked to flush the TLB before the table in ttep is freed.
6503 if (va_end > va_start) {
6504 #if (__ARM_VMSA__ == 7)
6505 // Ensure intermediate translations are flushed for each 1MB block
6506 flush_mmu_tlb_entry_async((va_start & ~ARM_TT_L1_PT_OFFMASK) | (pmap->hw_asid & 0xff));
6507 flush_mmu_tlb_entry_async(((va_start & ~ARM_TT_L1_PT_OFFMASK) + ARM_TT_L1_SIZE) | (pmap->hw_asid & 0xff));
6508 flush_mmu_tlb_entry_async(((va_start & ~ARM_TT_L1_PT_OFFMASK) + 2 * ARM_TT_L1_SIZE) | (pmap->hw_asid & 0xff));
6509 flush_mmu_tlb_entry_async(((va_start & ~ARM_TT_L1_PT_OFFMASK) + 3 * ARM_TT_L1_SIZE) | (pmap->hw_asid & 0xff));
6510 #endif
6511 PMAP_UPDATE_TLBS(pmap, va_start, va_end, need_strong_sync);
6512 }
6513 }
6514
6515 /**
6516 * Given a pointer to an entry within a `level` page table, delete the
6517 * page table at `level` + 1 that is represented by that entry. For instance,
6518 * to delete an unused L3 table, `ttep` would be a pointer to the L2 entry that
6519 * contains the PA of the L3 table, and `level` would be "2".
6520 *
6521 * @note If the table getting deallocated is a leaf table, then that leaf table
6522 * must have a refcnt of zero before getting deallocated. All other levels
6523 * must have a refcnt of PT_DESC_REFCOUNT in their page table descriptor.
6524 *
6525 * @param pmap The pmap that owns the page table to be deallocated.
6526 * @param va_start Beginning of the VA range mapped by the table being removed, for TLB maintenance
6527 * @param va_end Non-inclusive end of the VA range mapped by the table being removed, for TLB maintenance
6528 * @param need_strong_sync Indicates whether strong DSB should be used to synchronize TLB maintenance
6529 * @param ttep Pointer to the `level` TTE to remove.
6530 * @param level The level of the table that contains an entry pointing to the
6531 * table to be removed. The deallocated page table will be a
6532 * `level` + 1 table (so if `level` is 2, then an L3 table will be
6533 * deleted).
6534 */
6535 static void
6536 pmap_tte_deallocate(
6537 pmap_t pmap,
6538 vm_offset_t va_start,
6539 vm_offset_t va_end,
6540 bool need_strong_sync,
6541 tt_entry_t *ttep,
6542 unsigned int level)
6543 {
6544 pmap_paddr_t pa;
6545 tt_entry_t tte;
6546
6547 pmap_assert_locked_w(pmap);
6548
6549 tte = *ttep;
6550
6551 #if MACH_ASSERT
6552 if (tte_get_ptd(tte)->pmap != pmap) {
6553 panic("%s: Passed in pmap doesn't own the page table to be deleted ptd=%p ptd->pmap=%p pmap=%p",
6554 __func__, tte_get_ptd(tte), tte_get_ptd(tte)->pmap, pmap);
6555 }
6556 #endif /* MACH_ASSERT */
6557
6558 pmap_tte_remove(pmap, va_start, va_end, need_strong_sync, ttep, level);
6559
6560 if ((tte & ARM_TTE_TYPE_MASK) == ARM_TTE_TYPE_TABLE) {
6561 uint64_t pmap_page_size = pt_attr_page_size(pmap_get_pt_attr(pmap));
6562 #if MACH_ASSERT
6563 pt_entry_t *pte_p = ((pt_entry_t *) (ttetokv(tte) & ~(pmap_page_size - 1)));
6564
6565 for (unsigned i = 0; i < (pmap_page_size / sizeof(*pte_p)); i++, pte_p++) {
6566 if (__improbable(ARM_PTE_IS_COMPRESSED(*pte_p, pte_p))) {
6567 panic_plain("%s: Found compressed mapping in soon to be deleted "
6568 "L%d table tte=0x%llx pmap=%p, pte_p=%p, pte=0x%llx",
6569 __func__, level + 1, (uint64_t)tte, pmap, pte_p, (uint64_t)(*pte_p));
6570 } else if (__improbable(((*pte_p) & ARM_PTE_TYPE_MASK) != ARM_PTE_TYPE_FAULT)) {
6571 panic_plain("%s: Found valid mapping in soon to be deleted L%d "
6572 "table tte=0x%llx pmap=%p, pte_p=%p, pte=0x%llx",
6573 __func__, level + 1, (uint64_t)tte, pmap, pte_p, (uint64_t)(*pte_p));
6574 }
6575 }
6576 #endif /* MACH_ASSERT */
6577 pmap_unlock(pmap);
6578
6579 /* Clear any page offset: we mean to free the whole page, but armv7 TTEs may only be
6580 * aligned on 1K boundaries. We clear the surrounding "chunk" of 4 TTEs above. */
6581 pa = tte_to_pa(tte) & ~(pmap_page_size - 1);
6582 pmap_tt_deallocate(pmap, (tt_entry_t *) phystokv(pa), level + 1);
6583 pmap_lock(pmap);
6584 }
6585 }
6586
6587 /*
6588 * Remove a range of hardware page-table entries.
6589 * The entries given are the first (inclusive)
6590 * and last (exclusive) entries for the VM pages.
6591 * The virtual address is the va for the first pte.
6592 *
6593 * The pmap must be locked.
6594 * If the pmap is not the kernel pmap, the range must lie
6595 * entirely within one pte-page. This is NOT checked.
6596 * Assumes that the pte-page exists.
6597 *
6598 * Returns the number of PTE changed
6599 */
6600 static int
6601 pmap_remove_range(
6602 pmap_t pmap,
6603 vm_map_address_t va,
6604 pt_entry_t *bpte,
6605 pt_entry_t *epte)
6606 {
6607 bool need_strong_sync = false;
6608 int num_changed = pmap_remove_range_options(pmap, va, bpte, epte, NULL,
6609 &need_strong_sync, PMAP_OPTIONS_REMOVE);
6610 if (num_changed > 0) {
6611 PMAP_UPDATE_TLBS(pmap, va,
6612 va + (pt_attr_page_size(pmap_get_pt_attr(pmap)) * (epte - bpte)), need_strong_sync);
6613 }
6614 return num_changed;
6615 }
6616
6617
6618 #ifdef PVH_FLAG_EXEC
6619
6620 /*
6621 * Update the access protection bits of the physical aperture mapping for a page.
6622 * This is useful, for example, in guranteeing that a verified executable page
6623 * has no writable mappings anywhere in the system, including the physical
6624 * aperture. flush_tlb_async can be set to true to avoid unnecessary TLB
6625 * synchronization overhead in cases where the call to this function is
6626 * guaranteed to be followed by other TLB operations.
6627 */
6628 static void
6629 pmap_set_ptov_ap(unsigned int pai __unused, unsigned int ap __unused, boolean_t flush_tlb_async __unused)
6630 {
6631 #if __ARM_PTE_PHYSMAP__
6632 ASSERT_PVH_LOCKED(pai);
6633 vm_offset_t kva = phystokv(vm_first_phys + (pmap_paddr_t)ptoa(pai));
6634 pt_entry_t *pte_p = pmap_pte(kernel_pmap, kva);
6635
6636 pt_entry_t tmplate = *pte_p;
6637 if ((tmplate & ARM_PTE_APMASK) == ARM_PTE_AP(ap)) {
6638 return;
6639 }
6640 tmplate = (tmplate & ~ARM_PTE_APMASK) | ARM_PTE_AP(ap);
6641 #if (__ARM_VMSA__ > 7)
6642 if (tmplate & ARM_PTE_HINT_MASK) {
6643 panic("%s: physical aperture PTE %p has hint bit set, va=%p, pte=0x%llx",
6644 __func__, pte_p, (void *)kva, tmplate);
6645 }
6646 #endif
6647 WRITE_PTE_STRONG(pte_p, tmplate);
6648 flush_mmu_tlb_region_asid_async(kva, PAGE_SIZE, kernel_pmap);
6649 if (!flush_tlb_async) {
6650 sync_tlb_flush();
6651 }
6652 #endif
6653 }
6654
6655 #endif /* defined(PVH_FLAG_EXEC) */
6656
6657 static void
6658 pmap_remove_pv(
6659 pmap_t pmap,
6660 pt_entry_t *cpte,
6661 int pai,
6662 int *num_internal,
6663 int *num_alt_internal,
6664 int *num_reusable,
6665 int *num_external)
6666 {
6667 pv_entry_t **pv_h, **pve_pp;
6668 pv_entry_t *pve_p;
6669
6670 ASSERT_NOT_HIBERNATING();
6671 ASSERT_PVH_LOCKED(pai);
6672 pv_h = pai_to_pvh(pai);
6673 vm_offset_t pvh_flags = pvh_get_flags(pv_h);
6674
6675 #if XNU_MONITOR
6676 if (__improbable(pvh_flags & PVH_FLAG_LOCKDOWN)) {
6677 panic("%d is locked down (%#lx), cannot remove", pai, pvh_flags);
6678 }
6679 #endif
6680
6681 if (pvh_test_type(pv_h, PVH_TYPE_PTEP)) {
6682 if (__improbable((cpte != pvh_ptep(pv_h)))) {
6683 panic("%s: cpte=%p does not match pv_h=%p (%p), pai=0x%x\n", __func__, cpte, pv_h, pvh_ptep(pv_h), pai);
6684 }
6685 if (IS_ALTACCT_PAGE(pai, PV_ENTRY_NULL)) {
6686 assert(IS_INTERNAL_PAGE(pai));
6687 (*num_internal)++;
6688 (*num_alt_internal)++;
6689 CLR_ALTACCT_PAGE(pai, PV_ENTRY_NULL);
6690 } else if (IS_INTERNAL_PAGE(pai)) {
6691 if (IS_REUSABLE_PAGE(pai)) {
6692 (*num_reusable)++;
6693 } else {
6694 (*num_internal)++;
6695 }
6696 } else {
6697 (*num_external)++;
6698 }
6699 pvh_update_head(pv_h, PV_ENTRY_NULL, PVH_TYPE_NULL);
6700 } else if (pvh_test_type(pv_h, PVH_TYPE_PVEP)) {
6701 pve_pp = pv_h;
6702 pve_p = pvh_list(pv_h);
6703
6704 while (pve_p != PV_ENTRY_NULL &&
6705 (pve_get_ptep(pve_p) != cpte)) {
6706 pve_pp = pve_link_field(pve_p);
6707 pve_p = PVE_NEXT_PTR(pve_next(pve_p));
6708 }
6709
6710 if (__improbable((pve_p == PV_ENTRY_NULL))) {
6711 panic("%s: cpte=%p (pai=0x%x) not in pv_h=%p\n", __func__, cpte, pai, pv_h);
6712 }
6713
6714 #if MACH_ASSERT
6715 if ((pmap != NULL) && (kern_feature_override(KF_PMAPV_OVRD) == FALSE)) {
6716 pv_entry_t *check_pve_p = PVE_NEXT_PTR(pve_next(pve_p));
6717 while (check_pve_p != PV_ENTRY_NULL) {
6718 if (pve_get_ptep(check_pve_p) == cpte) {
6719 panic("%s: duplicate pve entry cpte=%p pmap=%p, pv_h=%p, pve_p=%p, pai=0x%x",
6720 __func__, cpte, pmap, pv_h, pve_p, pai);
6721 }
6722 check_pve_p = PVE_NEXT_PTR(pve_next(check_pve_p));
6723 }
6724 }
6725 #endif
6726
6727 if (IS_ALTACCT_PAGE(pai, pve_p)) {
6728 assert(IS_INTERNAL_PAGE(pai));
6729 (*num_internal)++;
6730 (*num_alt_internal)++;
6731 CLR_ALTACCT_PAGE(pai, pve_p);
6732 } else if (IS_INTERNAL_PAGE(pai)) {
6733 if (IS_REUSABLE_PAGE(pai)) {
6734 (*num_reusable)++;
6735 } else {
6736 (*num_internal)++;
6737 }
6738 } else {
6739 (*num_external)++;
6740 }
6741
6742 pvh_remove(pv_h, pve_pp, pve_p);
6743 pv_free_entry(pve_p);
6744 if (!pvh_test_type(pv_h, PVH_TYPE_NULL)) {
6745 pvh_set_flags(pv_h, pvh_flags);
6746 }
6747 } else {
6748 panic("%s: unexpected PV head %p, cpte=%p pmap=%p pv_h=%p pai=0x%x",
6749 __func__, *pv_h, cpte, pmap, pv_h, pai);
6750 }
6751
6752 #ifdef PVH_FLAG_EXEC
6753 if ((pvh_flags & PVH_FLAG_EXEC) && pvh_test_type(pv_h, PVH_TYPE_NULL)) {
6754 pmap_set_ptov_ap(pai, AP_RWNA, FALSE);
6755 }
6756 #endif
6757 }
6758
6759 static int
6760 pmap_remove_range_options(
6761 pmap_t pmap,
6762 vm_map_address_t va,
6763 pt_entry_t *bpte,
6764 pt_entry_t *epte,
6765 vm_map_address_t *eva,
6766 bool *need_strong_sync __unused,
6767 int options)
6768 {
6769 pt_entry_t *cpte;
6770 size_t npages = 0;
6771 int num_removed, num_unwired;
6772 int num_pte_changed;
6773 int pai = 0;
6774 pmap_paddr_t pa;
6775 int num_external, num_internal, num_reusable;
6776 int num_alt_internal;
6777 uint64_t num_compressed, num_alt_compressed;
6778 int16_t refcnt = 0;
6779
6780 pmap_assert_locked_w(pmap);
6781
6782 const pt_attr_t * const pt_attr = pmap_get_pt_attr(pmap);
6783 uint64_t pmap_page_size = PAGE_RATIO * pt_attr_page_size(pt_attr);
6784
6785 if (__improbable((uintptr_t)epte > (((uintptr_t)bpte + pmap_page_size) & ~(pmap_page_size - 1)))) {
6786 panic("%s: PTE range [%p, %p) in pmap %p crosses page table boundary", __func__, bpte, epte, pmap);
6787 }
6788
6789 num_removed = 0;
6790 num_unwired = 0;
6791 num_pte_changed = 0;
6792 num_external = 0;
6793 num_internal = 0;
6794 num_reusable = 0;
6795 num_compressed = 0;
6796 num_alt_internal = 0;
6797 num_alt_compressed = 0;
6798
6799 for (cpte = bpte; cpte < epte;
6800 cpte += PAGE_RATIO, va += pmap_page_size) {
6801 pt_entry_t spte;
6802 boolean_t managed = FALSE;
6803
6804 /*
6805 * Check for pending preemption on every iteration: the PV list may be arbitrarily long,
6806 * so we need to be as aggressive as possible in checking for preemption when we can.
6807 */
6808 if (__improbable((eva != NULL) && npages++ && pmap_pending_preemption())) {
6809 *eva = va;
6810 break;
6811 }
6812 spte = *((volatile pt_entry_t*)cpte);
6813
6814 #if CONFIG_PGTRACE
6815 if (pgtrace_enabled) {
6816 pmap_pgtrace_remove_clone(pmap, pte_to_pa(spte), va);
6817 }
6818 #endif
6819
6820 while (!managed) {
6821 if (pmap != kernel_pmap &&
6822 (options & PMAP_OPTIONS_REMOVE) &&
6823 (ARM_PTE_IS_COMPRESSED(spte, cpte))) {
6824 /*
6825 * "pmap" must be locked at this point,
6826 * so this should not race with another
6827 * pmap_remove_range() or pmap_enter().
6828 */
6829
6830 /* one less "compressed"... */
6831 num_compressed++;
6832 if (spte & ARM_PTE_COMPRESSED_ALT) {
6833 /* ... but it used to be "ALTACCT" */
6834 num_alt_compressed++;
6835 }
6836
6837 /* clear marker */
6838 WRITE_PTE_FAST(cpte, ARM_PTE_TYPE_FAULT);
6839 /*
6840 * "refcnt" also accounts for
6841 * our "compressed" markers,
6842 * so let's update it here.
6843 */
6844 --refcnt;
6845 spte = *((volatile pt_entry_t*)cpte);
6846 }
6847 /*
6848 * It may be possible for the pte to transition from managed
6849 * to unmanaged in this timeframe; for now, elide the assert.
6850 * We should break out as a consequence of checking pa_valid.
6851 */
6852 //assert(!ARM_PTE_IS_COMPRESSED(spte));
6853 pa = pte_to_pa(spte);
6854 if (!pa_valid(pa)) {
6855 #if XNU_MONITOR
6856 unsigned int cacheattr = pmap_cache_attributes((ppnum_t)atop(pa));
6857 #endif
6858 #if XNU_MONITOR
6859 if (__improbable((cacheattr & PP_ATTR_MONITOR) &&
6860 (pte_to_xprr_perm(spte) != XPRR_KERN_RO_PERM) && !pmap_ppl_disable)) {
6861 panic("%s: attempt to remove mapping of writable PPL-protected I/O address 0x%llx",
6862 __func__, (uint64_t)pa);
6863 }
6864 #endif
6865 break;
6866 }
6867 pai = (int)pa_index(pa);
6868 LOCK_PVH(pai);
6869 spte = *((volatile pt_entry_t*)cpte);
6870 pa = pte_to_pa(spte);
6871 if (pai == (int)pa_index(pa)) {
6872 managed = TRUE;
6873 break; // Leave pai locked as we will unlock it after we free the PV entry
6874 }
6875 UNLOCK_PVH(pai);
6876 }
6877
6878 if (ARM_PTE_IS_COMPRESSED(*cpte, cpte)) {
6879 /*
6880 * There used to be a valid mapping here but it
6881 * has already been removed when the page was
6882 * sent to the VM compressor, so nothing left to
6883 * remove now...
6884 */
6885 continue;
6886 }
6887
6888 /* remove the translation, do not flush the TLB */
6889 if (*cpte != ARM_PTE_TYPE_FAULT) {
6890 assertf(!ARM_PTE_IS_COMPRESSED(*cpte, cpte), "unexpected compressed pte %p (=0x%llx)", cpte, (uint64_t)*cpte);
6891 assertf((*cpte & ARM_PTE_TYPE_VALID) == ARM_PTE_TYPE, "invalid pte %p (=0x%llx)", cpte, (uint64_t)*cpte);
6892 #if MACH_ASSERT
6893 if (managed && (pmap != kernel_pmap) && (ptep_get_va(cpte) != va)) {
6894 panic("pmap_remove_range_options(): VA mismatch: cpte=%p ptd=%p pte=0x%llx va=0x%llx, cpte va=0x%llx",
6895 cpte, ptep_get_ptd(cpte), (uint64_t)*cpte, (uint64_t)va, (uint64_t)ptep_get_va(cpte));
6896 }
6897 #endif
6898 WRITE_PTE_FAST(cpte, ARM_PTE_TYPE_FAULT);
6899 num_pte_changed++;
6900 }
6901
6902 if ((spte != ARM_PTE_TYPE_FAULT) &&
6903 (pmap != kernel_pmap)) {
6904 assertf(!ARM_PTE_IS_COMPRESSED(spte, cpte), "unexpected compressed pte %p (=0x%llx)", cpte, (uint64_t)spte);
6905 assertf((spte & ARM_PTE_TYPE_VALID) == ARM_PTE_TYPE, "invalid pte %p (=0x%llx)", cpte, (uint64_t)spte);
6906 --refcnt;
6907 }
6908
6909 if (pte_is_wired(spte)) {
6910 pte_set_wired(pmap, cpte, 0);
6911 num_unwired++;
6912 }
6913 /*
6914 * if not managed, we're done
6915 */
6916 if (!managed) {
6917 continue;
6918 }
6919 /*
6920 * find and remove the mapping from the chain for this
6921 * physical address.
6922 */
6923
6924 pmap_remove_pv(pmap, cpte, pai, &num_internal, &num_alt_internal, &num_reusable, &num_external);
6925
6926 UNLOCK_PVH(pai);
6927 num_removed++;
6928 }
6929
6930 /*
6931 * Update the counts
6932 */
6933 OSAddAtomic(-num_removed, (SInt32 *) &pmap->stats.resident_count);
6934 pmap_ledger_debit(pmap, task_ledgers.phys_mem, num_removed * pmap_page_size);
6935
6936 if (pmap != kernel_pmap) {
6937 if ((refcnt != 0) && (OSAddAtomic16(refcnt, (SInt16 *) &(ptep_get_info(bpte)->refcnt)) <= 0)) {
6938 panic("pmap_remove_range_options: over-release of ptdp %p for pte [%p, %p)", ptep_get_ptd(bpte), bpte, epte);
6939 }
6940 /* update pmap stats... */
6941 OSAddAtomic(-num_unwired, (SInt32 *) &pmap->stats.wired_count);
6942 if (num_external) {
6943 __assert_only int32_t orig_external = OSAddAtomic(-num_external, &pmap->stats.external);
6944 PMAP_STATS_ASSERTF(orig_external >= num_external,
6945 pmap,
6946 "pmap=%p bpte=%p epte=%p num_external=%d stats.external=%d",
6947 pmap, bpte, epte, num_external, orig_external);
6948 }
6949 if (num_internal) {
6950 __assert_only int32_t orig_internal = OSAddAtomic(-num_internal, &pmap->stats.internal);
6951 PMAP_STATS_ASSERTF(orig_internal >= num_internal,
6952 pmap,
6953 "pmap=%p bpte=%p epte=%p num_internal=%d stats.internal=%d num_reusable=%d stats.reusable=%d",
6954 pmap, bpte, epte,
6955 num_internal, orig_internal,
6956 num_reusable, pmap->stats.reusable);
6957 }
6958 if (num_reusable) {
6959 __assert_only int32_t orig_reusable = OSAddAtomic(-num_reusable, &pmap->stats.reusable);
6960 PMAP_STATS_ASSERTF(orig_reusable >= num_reusable,
6961 pmap,
6962 "pmap=%p bpte=%p epte=%p num_internal=%d stats.internal=%d num_reusable=%d stats.reusable=%d",
6963 pmap, bpte, epte,
6964 num_internal, pmap->stats.internal,
6965 num_reusable, orig_reusable);
6966 }
6967 if (num_compressed) {
6968 __assert_only uint64_t orig_compressed = OSAddAtomic64(-num_compressed, &pmap->stats.compressed);
6969 PMAP_STATS_ASSERTF(orig_compressed >= num_compressed,
6970 pmap,
6971 "pmap=%p bpte=%p epte=%p num_compressed=%lld num_alt_compressed=%lld stats.compressed=%lld",
6972 pmap, bpte, epte, num_compressed, num_alt_compressed,
6973 orig_compressed);
6974 }
6975 /* ... and ledgers */
6976 pmap_ledger_debit(pmap, task_ledgers.wired_mem, (num_unwired) * pmap_page_size);
6977 pmap_ledger_debit(pmap, task_ledgers.internal, (num_internal) * pmap_page_size);
6978 pmap_ledger_debit(pmap, task_ledgers.alternate_accounting, (num_alt_internal) * pmap_page_size);
6979 pmap_ledger_debit(pmap, task_ledgers.alternate_accounting_compressed, (num_alt_compressed) * pmap_page_size);
6980 pmap_ledger_debit(pmap, task_ledgers.internal_compressed, (num_compressed) * pmap_page_size);
6981 /* make needed adjustments to phys_footprint */
6982 pmap_ledger_debit(pmap, task_ledgers.phys_footprint,
6983 ((num_internal -
6984 num_alt_internal) +
6985 (num_compressed -
6986 num_alt_compressed)) * pmap_page_size);
6987 }
6988
6989 /* flush the ptable entries we have written */
6990 if (num_pte_changed > 0) {
6991 FLUSH_PTE_RANGE_STRONG(bpte, epte);
6992 }
6993
6994 return num_pte_changed;
6995 }
6996
6997
6998 /*
6999 * Remove the given range of addresses
7000 * from the specified map.
7001 *
7002 * It is assumed that the start and end are properly
7003 * rounded to the hardware page size.
7004 */
7005 void
7006 pmap_remove(
7007 pmap_t pmap,
7008 vm_map_address_t start,
7009 vm_map_address_t end)
7010 {
7011 pmap_remove_options(pmap, start, end, PMAP_OPTIONS_REMOVE);
7012 }
7013
7014 MARK_AS_PMAP_TEXT static vm_map_address_t
7015 pmap_remove_options_internal(
7016 pmap_t pmap,
7017 vm_map_address_t start,
7018 vm_map_address_t end,
7019 int options)
7020 {
7021 vm_map_address_t eva = end;
7022 pt_entry_t *bpte, *epte;
7023 pt_entry_t *pte_p;
7024 tt_entry_t *tte_p;
7025 int remove_count = 0;
7026 bool need_strong_sync = false;
7027
7028 if (__improbable(end < start)) {
7029 panic("%s: invalid address range %p, %p", __func__, (void*)start, (void*)end);
7030 }
7031
7032 VALIDATE_PMAP(pmap);
7033
7034 __unused const pt_attr_t * const pt_attr = pmap_get_pt_attr(pmap);
7035
7036 pmap_lock(pmap);
7037
7038 tte_p = pmap_tte(pmap, start);
7039
7040 if (tte_p == (tt_entry_t *) NULL) {
7041 goto done;
7042 }
7043
7044 if ((*tte_p & ARM_TTE_TYPE_MASK) == ARM_TTE_TYPE_TABLE) {
7045 pte_p = (pt_entry_t *) ttetokv(*tte_p);
7046 bpte = &pte_p[pte_index(pmap, pt_attr, start)];
7047 epte = bpte + ((end - start) >> pt_attr_leaf_shift(pt_attr));
7048
7049 remove_count = pmap_remove_range_options(pmap, start, bpte, epte, &eva,
7050 &need_strong_sync, options);
7051
7052 if ((pmap != kernel_pmap) && (pmap->nested == FALSE) && (ptep_get_info(pte_p)->refcnt == 0)) {
7053 pmap_tte_deallocate(pmap, start, eva, need_strong_sync, tte_p, pt_attr_twig_level(pt_attr));
7054 remove_count = 0; // pmap_tte_deallocate has flushed the TLB for us
7055 }
7056 }
7057
7058 done:
7059 pmap_unlock(pmap);
7060
7061 if (remove_count > 0) {
7062 PMAP_UPDATE_TLBS(pmap, start, eva, need_strong_sync);
7063 }
7064 return eva;
7065 }
7066
7067 void
7068 pmap_remove_options(
7069 pmap_t pmap,
7070 vm_map_address_t start,
7071 vm_map_address_t end,
7072 int options)
7073 {
7074 vm_map_address_t va;
7075
7076 if (pmap == PMAP_NULL) {
7077 return;
7078 }
7079
7080 __unused const pt_attr_t * const pt_attr = pmap_get_pt_attr(pmap);
7081
7082 PMAP_TRACE(2, PMAP_CODE(PMAP__REMOVE) | DBG_FUNC_START,
7083 VM_KERNEL_ADDRHIDE(pmap), VM_KERNEL_ADDRHIDE(start),
7084 VM_KERNEL_ADDRHIDE(end));
7085
7086 #if MACH_ASSERT
7087 if ((start | end) & pt_attr_leaf_offmask(pt_attr)) {
7088 panic("pmap_remove_options() pmap %p start 0x%llx end 0x%llx\n",
7089 pmap, (uint64_t)start, (uint64_t)end);
7090 }
7091 if ((end < start) || (start < pmap->min) || (end > pmap->max)) {
7092 panic("pmap_remove_options(): invalid address range, pmap=%p, start=0x%llx, end=0x%llx\n",
7093 pmap, (uint64_t)start, (uint64_t)end);
7094 }
7095 #endif
7096 assert(get_preemption_level() == 0);
7097
7098 /*
7099 * Invalidate the translation buffer first
7100 */
7101 va = start;
7102 while (va < end) {
7103 vm_map_address_t l;
7104
7105 l = ((va + pt_attr_twig_size(pt_attr)) & ~pt_attr_twig_offmask(pt_attr));
7106 if (l > end) {
7107 l = end;
7108 }
7109
7110 #if XNU_MONITOR
7111 va = pmap_remove_options_ppl(pmap, va, l, options);
7112
7113 pmap_ledger_check_balance(pmap);
7114 #else
7115 va = pmap_remove_options_internal(pmap, va, l, options);
7116 #endif
7117 }
7118
7119 PMAP_TRACE(2, PMAP_CODE(PMAP__REMOVE) | DBG_FUNC_END);
7120 }
7121
7122
7123 /*
7124 * Remove phys addr if mapped in specified map
7125 */
7126 void
7127 pmap_remove_some_phys(
7128 __unused pmap_t map,
7129 __unused ppnum_t pn)
7130 {
7131 /* Implement to support working set code */
7132 }
7133
7134 void
7135 pmap_set_pmap(
7136 pmap_t pmap,
7137 #if !__ARM_USER_PROTECT__
7138 __unused
7139 #endif
7140 thread_t thread)
7141 {
7142 pmap_switch(pmap);
7143 #if __ARM_USER_PROTECT__
7144 thread->machine.uptw_ttb = ((unsigned int) pmap->ttep) | TTBR_SETUP;
7145 thread->machine.asid = pmap->hw_asid;
7146 #endif
7147 }
7148
7149 static void
7150 pmap_flush_core_tlb_asid_async(pmap_t pmap)
7151 {
7152 #if (__ARM_VMSA__ == 7)
7153 flush_core_tlb_asid_async(pmap->hw_asid);
7154 #else
7155 flush_core_tlb_asid_async(((uint64_t) pmap->hw_asid) << TLBI_ASID_SHIFT);
7156 #endif
7157 }
7158
7159 static inline bool
7160 pmap_user_ttb_is_clear(void)
7161 {
7162 #if (__ARM_VMSA__ > 7)
7163 return get_mmu_ttb() == (invalid_ttep & TTBR_BADDR_MASK);
7164 #else
7165 return get_mmu_ttb() == kernel_pmap->ttep;
7166 #endif
7167 }
7168
7169 MARK_AS_PMAP_TEXT static void
7170 pmap_switch_internal(
7171 pmap_t pmap)
7172 {
7173 VALIDATE_PMAP(pmap);
7174 pmap_cpu_data_t *cpu_data_ptr = pmap_get_cpu_data();
7175 uint16_t asid_index = pmap->hw_asid;
7176 bool do_asid_flush = false;
7177
7178 if (__improbable((asid_index == 0) && (pmap != kernel_pmap))) {
7179 panic("%s: attempt to activate pmap with invalid ASID %p", __func__, pmap);
7180 }
7181 #if __ARM_KERNEL_PROTECT__
7182 asid_index >>= 1;
7183 #endif
7184
7185 #if (__ARM_VMSA__ > 7)
7186 pmap_t last_nested_pmap = cpu_data_ptr->cpu_nested_pmap;
7187 __unused const pt_attr_t *last_nested_pmap_attr = cpu_data_ptr->cpu_nested_pmap_attr;
7188 __unused vm_map_address_t last_nested_region_addr = cpu_data_ptr->cpu_nested_region_addr;
7189 __unused vm_map_offset_t last_nested_region_size = cpu_data_ptr->cpu_nested_region_size;
7190 bool do_shared_region_flush = ((pmap != kernel_pmap) && (last_nested_pmap != NULL) && (pmap->nested_pmap != last_nested_pmap));
7191 bool break_before_make = do_shared_region_flush;
7192 #else
7193 bool do_shared_region_flush = false;
7194 bool break_before_make = false;
7195 #endif
7196
7197 if ((pmap_max_asids > MAX_HW_ASIDS) && (asid_index > 0)) {
7198 asid_index -= 1;
7199 pmap_update_plru(asid_index);
7200
7201 /* Paranoia. */
7202 assert(asid_index < (sizeof(cpu_data_ptr->cpu_sw_asids) / sizeof(*cpu_data_ptr->cpu_sw_asids)));
7203
7204 /* Extract the "virtual" bits of the ASIDs (which could cause us to alias). */
7205 uint8_t new_sw_asid = pmap->sw_asid;
7206 uint8_t last_sw_asid = cpu_data_ptr->cpu_sw_asids[asid_index];
7207
7208 if (new_sw_asid != last_sw_asid) {
7209 /*
7210 * If the virtual ASID of the new pmap does not match the virtual ASID
7211 * last seen on this CPU for the physical ASID (that was a mouthful),
7212 * then this switch runs the risk of aliasing. We need to flush the
7213 * TLB for this phyiscal ASID in this case.
7214 */
7215 cpu_data_ptr->cpu_sw_asids[asid_index] = new_sw_asid;
7216 do_asid_flush = true;
7217 break_before_make = true;
7218 }
7219 }
7220
7221 #if __ARM_MIXED_PAGE_SIZE__
7222 if (pmap_get_pt_attr(pmap)->pta_tcr_value != get_tcr()) {
7223 break_before_make = true;
7224 }
7225 #endif
7226 if (__improbable(break_before_make && !pmap_user_ttb_is_clear())) {
7227 PMAP_TRACE(1, PMAP_CODE(PMAP__CLEAR_USER_TTB), VM_KERNEL_ADDRHIDE(pmap), PMAP_VASID(pmap), pmap->hw_asid);
7228 pmap_clear_user_ttb_internal();
7229 }
7230
7231 #if (__ARM_VMSA__ > 7)
7232 /* If we're switching to a different nested pmap (i.e. shared region), we'll need
7233 * to flush the userspace mappings for that region. Those mappings are global
7234 * and will not be protected by the ASID. It should also be cheaper to flush the
7235 * entire local TLB rather than to do a broadcast MMU flush by VA region. */
7236 if (__improbable(do_shared_region_flush)) {
7237 #if __ARM_RANGE_TLBI__
7238 uint64_t page_shift_prev = pt_attr_leaf_shift(last_nested_pmap_attr);
7239 vm_map_offset_t npages_prev = last_nested_region_size >> page_shift_prev;
7240
7241 /* NOTE: here we flush the global TLB entries for the previous nested region only.
7242 * There may still be non-global entries that overlap with the incoming pmap's
7243 * nested region. On Apple SoCs at least, this is acceptable. Those non-global entries
7244 * must necessarily belong to a different ASID than the incoming pmap, or they would
7245 * be flushed in the do_asid_flush case below. This will prevent them from conflicting
7246 * with the incoming pmap's nested region. However, the ARMv8 ARM is not crystal clear
7247 * on whether such a global/inactive-nonglobal overlap is acceptable, so we may need
7248 * to consider additional invalidation here in the future. */
7249 if (npages_prev <= ARM64_TLB_RANGE_PAGES) {
7250 flush_core_tlb_allrange_async(generate_rtlbi_param((ppnum_t)npages_prev, 0, last_nested_region_addr, page_shift_prev));
7251 } else {
7252 do_asid_flush = false;
7253 flush_core_tlb_async();
7254 }
7255 #else
7256 do_asid_flush = false;
7257 flush_core_tlb_async();
7258 #endif // __ARM_RANGE_TLBI__
7259 }
7260 #endif // (__ARM_VMSA__ > 7)
7261 if (__improbable(do_asid_flush)) {
7262 pmap_flush_core_tlb_asid_async(pmap);
7263 #if DEVELOPMENT || DEBUG
7264 os_atomic_inc(&pmap_asid_flushes, relaxed);
7265 #endif
7266 }
7267 if (__improbable(do_asid_flush || do_shared_region_flush)) {
7268 sync_tlb_flush();
7269 }
7270
7271 pmap_switch_user_ttb_internal(pmap);
7272 }
7273
7274 void
7275 pmap_switch(
7276 pmap_t pmap)
7277 {
7278 PMAP_TRACE(1, PMAP_CODE(PMAP__SWITCH) | DBG_FUNC_START, VM_KERNEL_ADDRHIDE(pmap), PMAP_VASID(pmap), pmap->hw_asid);
7279 #if XNU_MONITOR
7280 pmap_switch_ppl(pmap);
7281 #else
7282 pmap_switch_internal(pmap);
7283 #endif
7284 PMAP_TRACE(1, PMAP_CODE(PMAP__SWITCH) | DBG_FUNC_END);
7285 }
7286
7287 void
7288 pmap_require(pmap_t pmap)
7289 {
7290 #if XNU_MONITOR
7291 VALIDATE_PMAP(pmap);
7292 #else
7293 if (pmap != kernel_pmap) {
7294 zone_id_require(ZONE_ID_PMAP, sizeof(struct pmap), pmap);
7295 }
7296 #endif
7297 }
7298
7299 void
7300 pmap_page_protect(
7301 ppnum_t ppnum,
7302 vm_prot_t prot)
7303 {
7304 pmap_page_protect_options(ppnum, prot, 0, NULL);
7305 }
7306
7307 /*
7308 * Routine: pmap_page_protect_options
7309 *
7310 * Function:
7311 * Lower the permission for all mappings to a given
7312 * page.
7313 */
7314 MARK_AS_PMAP_TEXT static void
7315 pmap_page_protect_options_with_flush_range(
7316 ppnum_t ppnum,
7317 vm_prot_t prot,
7318 unsigned int options,
7319 pmap_tlb_flush_range_t *flush_range)
7320 {
7321 pmap_paddr_t phys = ptoa(ppnum);
7322 pv_entry_t **pv_h;
7323 pv_entry_t **pve_pp;
7324 pv_entry_t *pve_p;
7325 pv_entry_t *pveh_p;
7326 pv_entry_t *pvet_p;
7327 pt_entry_t *pte_p;
7328 pv_entry_t *new_pve_p;
7329 pt_entry_t *new_pte_p;
7330 vm_offset_t pvh_flags;
7331 int pai;
7332 boolean_t remove;
7333 boolean_t set_NX;
7334 boolean_t tlb_flush_needed = FALSE;
7335 unsigned int pvh_cnt = 0;
7336
7337 assert(ppnum != vm_page_fictitious_addr);
7338
7339 /* Only work with managed pages. */
7340 if (!pa_valid(phys)) {
7341 return;
7342 }
7343
7344 /*
7345 * Determine the new protection.
7346 */
7347 switch (prot) {
7348 case VM_PROT_ALL:
7349 return; /* nothing to do */
7350 case VM_PROT_READ:
7351 case VM_PROT_READ | VM_PROT_EXECUTE:
7352 remove = FALSE;
7353 break;
7354 default:
7355 /* PPL security model requires that we flush TLBs before we exit if the page may be recycled. */
7356 options = options & ~PMAP_OPTIONS_NOFLUSH;
7357 remove = TRUE;
7358 break;
7359 }
7360
7361 pai = (int)pa_index(phys);
7362 LOCK_PVH(pai);
7363 pv_h = pai_to_pvh(pai);
7364 pvh_flags = pvh_get_flags(pv_h);
7365
7366 #if XNU_MONITOR
7367 if (__improbable(remove && (pvh_flags & PVH_FLAG_LOCKDOWN))) {
7368 panic("%d is locked down (%#llx), cannot remove", pai, pvh_get_flags(pv_h));
7369 }
7370 #endif
7371
7372 pte_p = PT_ENTRY_NULL;
7373 pve_p = PV_ENTRY_NULL;
7374 pve_pp = pv_h;
7375 pveh_p = PV_ENTRY_NULL;
7376 pvet_p = PV_ENTRY_NULL;
7377 new_pve_p = PV_ENTRY_NULL;
7378 new_pte_p = PT_ENTRY_NULL;
7379 if (pvh_test_type(pv_h, PVH_TYPE_PTEP)) {
7380 pte_p = pvh_ptep(pv_h);
7381 } else if (pvh_test_type(pv_h, PVH_TYPE_PVEP)) {
7382 pve_p = pvh_list(pv_h);
7383 pveh_p = pve_p;
7384 }
7385
7386 while ((pve_p != PV_ENTRY_NULL) || (pte_p != PT_ENTRY_NULL)) {
7387 vm_map_address_t va = 0;
7388 pmap_t pmap = NULL;
7389 pt_entry_t tmplate = ARM_PTE_TYPE_FAULT;
7390 boolean_t update = FALSE;
7391
7392 if (pve_p != PV_ENTRY_NULL) {
7393 pte_p = pve_get_ptep(pve_p);
7394 }
7395
7396 #ifdef PVH_FLAG_IOMMU
7397 if ((vm_offset_t)pte_p & PVH_FLAG_IOMMU) {
7398 #if XNU_MONITOR
7399 if (__improbable(pvh_flags & PVH_FLAG_LOCKDOWN)) {
7400 panic("pmap_page_protect: ppnum 0x%x locked down, cannot be owned by iommu 0x%llx, pve_p=%p",
7401 ppnum, (uint64_t)pte_p & ~PVH_FLAG_IOMMU, pve_p);
7402 }
7403 #endif
7404 if (remove) {
7405 if (options & PMAP_OPTIONS_COMPRESSOR) {
7406 panic("pmap_page_protect: attempt to compress ppnum 0x%x owned by iommu 0x%llx, pve_p=%p",
7407 ppnum, (uint64_t)pte_p & ~PVH_FLAG_IOMMU, pve_p);
7408 }
7409 if (pve_p != PV_ENTRY_NULL) {
7410 pv_entry_t *temp_pve_p = PVE_NEXT_PTR(pve_next(pve_p));
7411 pvh_remove(pv_h, pve_pp, pve_p);
7412 pveh_p = pvh_list(pv_h);
7413 pve_next(pve_p) = new_pve_p;
7414 new_pve_p = pve_p;
7415 pve_p = temp_pve_p;
7416 continue;
7417 } else {
7418 new_pte_p = pte_p;
7419 break;
7420 }
7421 }
7422 goto protect_skip_pve;
7423 }
7424 #endif
7425 pmap = ptep_get_pmap(pte_p);
7426 va = ptep_get_va(pte_p);
7427
7428 if (pte_p == PT_ENTRY_NULL) {
7429 panic("pmap_page_protect: pmap=%p prot=%d options=%u, pv_h=%p, pveh_p=%p, pve_p=%p, va=0x%llx ppnum: 0x%x\n",
7430 pmap, prot, options, pv_h, pveh_p, pve_p, (uint64_t)va, ppnum);
7431 } else if ((pmap == NULL) || (atop(pte_to_pa(*pte_p)) != ppnum)) {
7432 #if MACH_ASSERT
7433 if (kern_feature_override(KF_PMAPV_OVRD) == FALSE) {
7434 pv_entry_t *check_pve_p = pveh_p;
7435 while (check_pve_p != PV_ENTRY_NULL) {
7436 if ((check_pve_p != pve_p) && (pve_get_ptep(check_pve_p) == pte_p)) {
7437 panic("pmap_page_protect: duplicate pve entry pte_p=%p pmap=%p prot=%d options=%u, pv_h=%p, pveh_p=%p, pve_p=%p, pte=0x%llx, va=0x%llx ppnum: 0x%x\n",
7438 pte_p, pmap, prot, options, pv_h, pveh_p, pve_p, (uint64_t)*pte_p, (uint64_t)va, ppnum);
7439 }
7440 check_pve_p = PVE_NEXT_PTR(pve_next(check_pve_p));
7441 }
7442 }
7443 #endif
7444 panic("pmap_page_protect: bad pve entry pte_p=%p pmap=%p prot=%d options=%u, pv_h=%p, pveh_p=%p, pve_p=%p, pte=0x%llx, va=0x%llx ppnum: 0x%x\n",
7445 pte_p, pmap, prot, options, pv_h, pveh_p, pve_p, (uint64_t)*pte_p, (uint64_t)va, ppnum);
7446 }
7447
7448 #if DEVELOPMENT || DEBUG
7449 if ((prot & VM_PROT_EXECUTE) || !nx_enabled || !pmap->nx_enabled)
7450 #else
7451 if ((prot & VM_PROT_EXECUTE))
7452 #endif
7453 {
7454 set_NX = FALSE;
7455 } else {
7456 set_NX = TRUE;
7457 }
7458
7459 /* Remove the mapping if new protection is NONE */
7460 if (remove) {
7461 boolean_t is_altacct = FALSE;
7462 const pt_attr_t * const pt_attr = pmap_get_pt_attr(pmap);
7463 pt_entry_t spte = *pte_p;
7464
7465 if (IS_ALTACCT_PAGE(pai, pve_p)) {
7466 is_altacct = TRUE;
7467 } else {
7468 is_altacct = FALSE;
7469 }
7470
7471 if (pte_is_wired(spte)) {
7472 pte_set_wired(pmap, pte_p, 0);
7473 spte = *pte_p;
7474 if (pmap != kernel_pmap) {
7475 pmap_ledger_debit(pmap, task_ledgers.wired_mem, pt_attr_page_size(pt_attr) * PAGE_RATIO);
7476 OSAddAtomic(-1, (SInt32 *) &pmap->stats.wired_count);
7477 }
7478 }
7479
7480 if (spte != ARM_PTE_TYPE_FAULT &&
7481 pmap != kernel_pmap &&
7482 (options & PMAP_OPTIONS_COMPRESSOR) &&
7483 IS_INTERNAL_PAGE(pai)) {
7484 assert(!ARM_PTE_IS_COMPRESSED(*pte_p, pte_p));
7485 /* mark this PTE as having been "compressed" */
7486 tmplate = ARM_PTE_COMPRESSED;
7487 if (is_altacct) {
7488 tmplate |= ARM_PTE_COMPRESSED_ALT;
7489 is_altacct = TRUE;
7490 }
7491 } else {
7492 tmplate = ARM_PTE_TYPE_FAULT;
7493 }
7494
7495 /**
7496 * The entry must be written before the refcnt is decremented to
7497 * prevent use-after-free races with code paths that deallocate page
7498 * tables based on a zero refcnt.
7499 */
7500 if (spte != tmplate) {
7501 WRITE_PTE_STRONG(pte_p, tmplate);
7502 update = TRUE;
7503 }
7504
7505 if ((spte != ARM_PTE_TYPE_FAULT) &&
7506 (tmplate == ARM_PTE_TYPE_FAULT) &&
7507 (pmap != kernel_pmap)) {
7508 if (OSAddAtomic16(-1, (SInt16 *) &(ptep_get_info(pte_p)->refcnt)) <= 0) {
7509 panic("pmap_page_protect_options(): over-release of ptdp %p for pte %p\n", ptep_get_ptd(pte_p), pte_p);
7510 }
7511 }
7512
7513 pvh_cnt++;
7514 pmap_ledger_debit(pmap, task_ledgers.phys_mem, pt_attr_page_size(pt_attr) * PAGE_RATIO);
7515 OSAddAtomic(-1, (SInt32 *) &pmap->stats.resident_count);
7516
7517 #if MACH_ASSERT
7518 /*
7519 * We only ever compress internal pages.
7520 */
7521 if (options & PMAP_OPTIONS_COMPRESSOR) {
7522 assert(IS_INTERNAL_PAGE(pai));
7523 }
7524 #endif
7525
7526 if (pmap != kernel_pmap) {
7527 if (IS_REUSABLE_PAGE(pai) &&
7528 IS_INTERNAL_PAGE(pai) &&
7529 !is_altacct) {
7530 __assert_only int32_t orig_reusable = OSAddAtomic(-1, &pmap->stats.reusable);
7531 PMAP_STATS_ASSERTF(orig_reusable > 0, pmap, "stats.reusable %d", orig_reusable);
7532 } else if (IS_INTERNAL_PAGE(pai)) {
7533 __assert_only int32_t orig_internal = OSAddAtomic(-1, &pmap->stats.internal);
7534 PMAP_STATS_ASSERTF(orig_internal > 0, pmap, "stats.internal %d", orig_internal);
7535 } else {
7536 __assert_only int32_t orig_external = OSAddAtomic(-1, &pmap->stats.external);
7537 PMAP_STATS_ASSERTF(orig_external > 0, pmap, "stats.external %d", orig_external);
7538 }
7539 if ((options & PMAP_OPTIONS_COMPRESSOR) &&
7540 IS_INTERNAL_PAGE(pai)) {
7541 /* adjust "compressed" stats */
7542 OSAddAtomic64(+1, &pmap->stats.compressed);
7543 PMAP_STATS_PEAK(pmap->stats.compressed);
7544 pmap->stats.compressed_lifetime++;
7545 }
7546
7547 if (IS_ALTACCT_PAGE(pai, pve_p)) {
7548 assert(IS_INTERNAL_PAGE(pai));
7549 pmap_ledger_debit(pmap, task_ledgers.internal, pt_attr_page_size(pt_attr) * PAGE_RATIO);
7550 pmap_ledger_debit(pmap, task_ledgers.alternate_accounting, pt_attr_page_size(pt_attr) * PAGE_RATIO);
7551 if (options & PMAP_OPTIONS_COMPRESSOR) {
7552 pmap_ledger_credit(pmap, task_ledgers.internal_compressed, pt_attr_page_size(pt_attr) * PAGE_RATIO);
7553 pmap_ledger_credit(pmap, task_ledgers.alternate_accounting_compressed, pt_attr_page_size(pt_attr) * PAGE_RATIO);
7554 }
7555
7556 /*
7557 * Cleanup our marker before
7558 * we free this pv_entry.
7559 */
7560 CLR_ALTACCT_PAGE(pai, pve_p);
7561 } else if (IS_REUSABLE_PAGE(pai)) {
7562 assert(IS_INTERNAL_PAGE(pai));
7563 if (options & PMAP_OPTIONS_COMPRESSOR) {
7564 pmap_ledger_credit(pmap, task_ledgers.internal_compressed, pt_attr_page_size(pt_attr) * PAGE_RATIO);
7565 /* was not in footprint, but is now */
7566 pmap_ledger_credit(pmap, task_ledgers.phys_footprint, pt_attr_page_size(pt_attr) * PAGE_RATIO);
7567 }
7568 } else if (IS_INTERNAL_PAGE(pai)) {
7569 pmap_ledger_debit(pmap, task_ledgers.internal, pt_attr_page_size(pt_attr) * PAGE_RATIO);
7570
7571 /*
7572 * Update all stats related to physical footprint, which only
7573 * deals with internal pages.
7574 */
7575 if (options & PMAP_OPTIONS_COMPRESSOR) {
7576 /*
7577 * This removal is only being done so we can send this page to
7578 * the compressor; therefore it mustn't affect total task footprint.
7579 */
7580 pmap_ledger_credit(pmap, task_ledgers.internal_compressed, pt_attr_page_size(pt_attr) * PAGE_RATIO);
7581 } else {
7582 /*
7583 * This internal page isn't going to the compressor, so adjust stats to keep
7584 * phys_footprint up to date.
7585 */
7586 pmap_ledger_debit(pmap, task_ledgers.phys_footprint, pt_attr_page_size(pt_attr) * PAGE_RATIO);
7587 }
7588 } else {
7589 /* external page: no impact on ledgers */
7590 }
7591 }
7592
7593 if (pve_p != PV_ENTRY_NULL) {
7594 assert(pve_next(pve_p) == PVE_NEXT_PTR(pve_next(pve_p)));
7595 }
7596 } else {
7597 pt_entry_t spte;
7598 const pt_attr_t *const pt_attr = pmap_get_pt_attr(pmap);
7599
7600 spte = *pte_p;
7601
7602 if (pmap == kernel_pmap) {
7603 tmplate = ((spte & ~ARM_PTE_APMASK) | ARM_PTE_AP(AP_RONA));
7604 } else {
7605 tmplate = ((spte & ~ARM_PTE_APMASK) | pt_attr_leaf_ro(pt_attr));
7606 }
7607
7608 pte_set_was_writeable(tmplate, false);
7609 /*
7610 * While the naive implementation of this would serve to add execute
7611 * permission, this is not how the VM uses this interface, or how
7612 * x86_64 implements it. So ignore requests to add execute permissions.
7613 */
7614 if (set_NX) {
7615 tmplate |= pt_attr_leaf_xn(pt_attr);
7616 }
7617
7618
7619 if (*pte_p != ARM_PTE_TYPE_FAULT &&
7620 !ARM_PTE_IS_COMPRESSED(*pte_p, pte_p) &&
7621 *pte_p != tmplate) {
7622 if (options & PMAP_OPTIONS_NOFLUSH) {
7623 WRITE_PTE_FAST(pte_p, tmplate);
7624 } else {
7625 WRITE_PTE_STRONG(pte_p, tmplate);
7626 }
7627 update = TRUE;
7628 }
7629 }
7630
7631 /* Invalidate TLBs for all CPUs using it */
7632 if (update && !(options & PMAP_OPTIONS_NOFLUSH)) {
7633 if (remove || !flush_range ||
7634 ((flush_range->ptfr_pmap != pmap) || va >= flush_range->ptfr_end || va < flush_range->ptfr_start)) {
7635 pmap_get_pt_ops(pmap)->flush_tlb_region_async(va,
7636 pt_attr_page_size(pmap_get_pt_attr(pmap)) * PAGE_RATIO, pmap);
7637 }
7638 tlb_flush_needed = TRUE;
7639 }
7640
7641 #ifdef PVH_FLAG_IOMMU
7642 protect_skip_pve:
7643 #endif
7644 pte_p = PT_ENTRY_NULL;
7645 pvet_p = pve_p;
7646 if (pve_p != PV_ENTRY_NULL) {
7647 if (remove) {
7648 assert(pve_next(pve_p) == PVE_NEXT_PTR(pve_next(pve_p)));
7649 }
7650 pve_pp = pve_link_field(pve_p);
7651 pve_p = PVE_NEXT_PTR(pve_next(pve_p));
7652 }
7653 }
7654
7655 #ifdef PVH_FLAG_EXEC
7656 if (remove && (pvh_get_flags(pv_h) & PVH_FLAG_EXEC)) {
7657 pmap_set_ptov_ap(pai, AP_RWNA, tlb_flush_needed);
7658 }
7659 #endif
7660 /* if we removed a bunch of entries, take care of them now */
7661 if (remove) {
7662 if (new_pve_p != PV_ENTRY_NULL) {
7663 pvh_update_head(pv_h, new_pve_p, PVH_TYPE_PVEP);
7664 pvh_set_flags(pv_h, pvh_flags);
7665 } else if (new_pte_p != PT_ENTRY_NULL) {
7666 pvh_update_head(pv_h, new_pte_p, PVH_TYPE_PTEP);
7667 pvh_set_flags(pv_h, pvh_flags);
7668 } else {
7669 pvh_update_head(pv_h, PV_ENTRY_NULL, PVH_TYPE_NULL);
7670 }
7671 }
7672
7673 if (flush_range && tlb_flush_needed) {
7674 if (!remove) {
7675 flush_range->ptfr_flush_needed = true;
7676 tlb_flush_needed = FALSE;
7677 }
7678 }
7679
7680 /*
7681 * If we removed PV entries, ensure prior TLB flushes are complete before we drop the PVH
7682 * lock to allow the backing pages to be repurposed. This is a security precaution, aimed
7683 * primarily at XNU_MONITOR configurations, to reduce the likelihood of an attacker causing
7684 * a page to be repurposed while it is still live in the TLBs.
7685 */
7686 if (remove && tlb_flush_needed) {
7687 sync_tlb_flush();
7688 }
7689
7690 UNLOCK_PVH(pai);
7691
7692 if (!remove && tlb_flush_needed) {
7693 sync_tlb_flush();
7694 }
7695
7696 if (remove && (pvet_p != PV_ENTRY_NULL)) {
7697 pv_list_free(pveh_p, pvet_p, pvh_cnt, pv_kern_low_water_mark);
7698 }
7699 }
7700
7701 MARK_AS_PMAP_TEXT static void
7702 pmap_page_protect_options_internal(
7703 ppnum_t ppnum,
7704 vm_prot_t prot,
7705 unsigned int options,
7706 void *arg)
7707 {
7708 if (arg != NULL) {
7709 /*
7710 * If the argument is non-NULL, the VM layer is conveying its intention that the TLBs should
7711 * ultimately be flushed. The nature of ARM TLB maintenance is such that we can flush the
7712 * TLBs much more precisely if we do so inline with the pagetable updates, and PPL security
7713 * model requires that we not exit the PPL without performing required TLB flushes anyway.
7714 * In that case, force the flush to take place.
7715 */
7716 options &= ~PMAP_OPTIONS_NOFLUSH;
7717 }
7718 pmap_page_protect_options_with_flush_range(ppnum, prot, options, NULL);
7719 }
7720
7721 void
7722 pmap_page_protect_options(
7723 ppnum_t ppnum,
7724 vm_prot_t prot,
7725 unsigned int options,
7726 void *arg)
7727 {
7728 pmap_paddr_t phys = ptoa(ppnum);
7729
7730 assert(ppnum != vm_page_fictitious_addr);
7731
7732 /* Only work with managed pages. */
7733 if (!pa_valid(phys)) {
7734 return;
7735 }
7736
7737 /*
7738 * Determine the new protection.
7739 */
7740 if (prot == VM_PROT_ALL) {
7741 return; /* nothing to do */
7742 }
7743
7744 PMAP_TRACE(2, PMAP_CODE(PMAP__PAGE_PROTECT) | DBG_FUNC_START, ppnum, prot);
7745
7746 #if XNU_MONITOR
7747 pmap_page_protect_options_ppl(ppnum, prot, options, arg);
7748 #else
7749 pmap_page_protect_options_internal(ppnum, prot, options, arg);
7750 #endif
7751
7752 PMAP_TRACE(2, PMAP_CODE(PMAP__PAGE_PROTECT) | DBG_FUNC_END);
7753 }
7754
7755
7756 #if __has_feature(ptrauth_calls) && defined(XNU_TARGET_OS_OSX)
7757 MARK_AS_PMAP_TEXT void
7758 pmap_disable_user_jop_internal(pmap_t pmap)
7759 {
7760 if (pmap == kernel_pmap) {
7761 panic("%s: called with kernel_pmap\n", __func__);
7762 }
7763 VALIDATE_PMAP(pmap);
7764 pmap->disable_jop = true;
7765 }
7766
7767 void
7768 pmap_disable_user_jop(pmap_t pmap)
7769 {
7770 #if XNU_MONITOR
7771 pmap_disable_user_jop_ppl(pmap);
7772 #else
7773 pmap_disable_user_jop_internal(pmap);
7774 #endif
7775 }
7776 #endif /* __has_feature(ptrauth_calls) && defined(XNU_TARGET_OS_OSX) */
7777
7778 /*
7779 * Indicates if the pmap layer enforces some additional restrictions on the
7780 * given set of protections.
7781 */
7782 bool
7783 pmap_has_prot_policy(__unused pmap_t pmap, __unused bool translated_allow_execute, __unused vm_prot_t prot)
7784 {
7785 return false;
7786 }
7787
7788 /*
7789 * Set the physical protection on the
7790 * specified range of this map as requested.
7791 * VERY IMPORTANT: Will not increase permissions.
7792 * VERY IMPORTANT: Only pmap_enter() is allowed to grant permissions.
7793 */
7794 void
7795 pmap_protect(
7796 pmap_t pmap,
7797 vm_map_address_t b,
7798 vm_map_address_t e,
7799 vm_prot_t prot)
7800 {
7801 pmap_protect_options(pmap, b, e, prot, 0, NULL);
7802 }
7803
7804 MARK_AS_PMAP_TEXT static vm_map_address_t
7805 pmap_protect_options_internal(
7806 pmap_t pmap,
7807 vm_map_address_t start,
7808 vm_map_address_t end,
7809 vm_prot_t prot,
7810 unsigned int options,
7811 __unused void *args)
7812 {
7813 const pt_attr_t *const pt_attr = pmap_get_pt_attr(pmap);
7814 tt_entry_t *tte_p;
7815 pt_entry_t *bpte_p, *epte_p;
7816 pt_entry_t *pte_p;
7817 boolean_t set_NX = TRUE;
7818 #if (__ARM_VMSA__ > 7)
7819 boolean_t set_XO = FALSE;
7820 #endif
7821 boolean_t should_have_removed = FALSE;
7822 bool need_strong_sync = false;
7823
7824 if (__improbable((end < start) || (end > ((start + pt_attr_twig_size(pt_attr)) & ~pt_attr_twig_offmask(pt_attr))))) {
7825 panic("%s: invalid address range %p, %p", __func__, (void*)start, (void*)end);
7826 }
7827
7828 #if DEVELOPMENT || DEBUG
7829 if (options & PMAP_OPTIONS_PROTECT_IMMEDIATE) {
7830 if ((prot & VM_PROT_ALL) == VM_PROT_NONE) {
7831 should_have_removed = TRUE;
7832 }
7833 } else
7834 #endif
7835 {
7836 /* Determine the new protection. */
7837 switch (prot) {
7838 #if (__ARM_VMSA__ > 7)
7839 case VM_PROT_EXECUTE:
7840 set_XO = TRUE;
7841 OS_FALLTHROUGH;
7842 #endif
7843 case VM_PROT_READ:
7844 case VM_PROT_READ | VM_PROT_EXECUTE:
7845 break;
7846 case VM_PROT_READ | VM_PROT_WRITE:
7847 case VM_PROT_ALL:
7848 return end; /* nothing to do */
7849 default:
7850 should_have_removed = TRUE;
7851 }
7852 }
7853
7854 if (should_have_removed) {
7855 panic("%s: should have been a remove operation, "
7856 "pmap=%p, start=%p, end=%p, prot=%#x, options=%#x, args=%p",
7857 __FUNCTION__,
7858 pmap, (void *)start, (void *)end, prot, options, args);
7859 }
7860
7861 #if DEVELOPMENT || DEBUG
7862 if ((prot & VM_PROT_EXECUTE) || !nx_enabled || !pmap->nx_enabled)
7863 #else
7864 if ((prot & VM_PROT_EXECUTE))
7865 #endif
7866 {
7867 set_NX = FALSE;
7868 } else {
7869 set_NX = TRUE;
7870 }
7871
7872 const uint64_t pmap_page_size = PAGE_RATIO * pt_attr_page_size(pt_attr);
7873 vm_map_address_t va = start;
7874 unsigned int npages = 0;
7875
7876 VALIDATE_PMAP(pmap);
7877 pmap_lock(pmap);
7878
7879 tte_p = pmap_tte(pmap, start);
7880
7881 if ((tte_p != (tt_entry_t *) NULL) && (*tte_p & ARM_TTE_TYPE_MASK) == ARM_TTE_TYPE_TABLE) {
7882 bpte_p = (pt_entry_t *) ttetokv(*tte_p);
7883 bpte_p = &bpte_p[pte_index(pmap, pt_attr, start)];
7884 epte_p = bpte_p + ((end - start) >> pt_attr_leaf_shift(pt_attr));
7885 pte_p = bpte_p;
7886
7887 for (pte_p = bpte_p;
7888 pte_p < epte_p;
7889 pte_p += PAGE_RATIO, va += pmap_page_size) {
7890 ++npages;
7891 if (__improbable(!(npages % PMAP_DEFAULT_PREEMPTION_CHECK_PAGE_INTERVAL) &&
7892 pmap_pending_preemption())) {
7893 break;
7894 }
7895 pt_entry_t spte;
7896 #if DEVELOPMENT || DEBUG
7897 boolean_t force_write = FALSE;
7898 #endif
7899
7900 spte = *((volatile pt_entry_t*)pte_p);
7901
7902 if ((spte == ARM_PTE_TYPE_FAULT) ||
7903 ARM_PTE_IS_COMPRESSED(spte, pte_p)) {
7904 continue;
7905 }
7906
7907 pmap_paddr_t pa;
7908 int pai = 0;
7909 boolean_t managed = FALSE;
7910
7911 while (!managed) {
7912 /*
7913 * It may be possible for the pte to transition from managed
7914 * to unmanaged in this timeframe; for now, elide the assert.
7915 * We should break out as a consequence of checking pa_valid.
7916 */
7917 // assert(!ARM_PTE_IS_COMPRESSED(spte));
7918 pa = pte_to_pa(spte);
7919 if (!pa_valid(pa)) {
7920 break;
7921 }
7922 pai = (int)pa_index(pa);
7923 LOCK_PVH(pai);
7924 spte = *((volatile pt_entry_t*)pte_p);
7925 pa = pte_to_pa(spte);
7926 if (pai == (int)pa_index(pa)) {
7927 managed = TRUE;
7928 break; // Leave the PVH locked as we will unlock it after we free the PTE
7929 }
7930 UNLOCK_PVH(pai);
7931 }
7932
7933 if ((spte == ARM_PTE_TYPE_FAULT) ||
7934 ARM_PTE_IS_COMPRESSED(spte, pte_p)) {
7935 continue;
7936 }
7937
7938 pt_entry_t tmplate;
7939
7940 if (pmap == kernel_pmap) {
7941 #if DEVELOPMENT || DEBUG
7942 if ((options & PMAP_OPTIONS_PROTECT_IMMEDIATE) && (prot & VM_PROT_WRITE)) {
7943 force_write = TRUE;
7944 tmplate = ((spte & ~ARM_PTE_APMASK) | ARM_PTE_AP(AP_RWNA));
7945 } else
7946 #endif
7947 {
7948 tmplate = ((spte & ~ARM_PTE_APMASK) | ARM_PTE_AP(AP_RONA));
7949 }
7950 } else {
7951 #if DEVELOPMENT || DEBUG
7952 if ((options & PMAP_OPTIONS_PROTECT_IMMEDIATE) && (prot & VM_PROT_WRITE)) {
7953 force_write = TRUE;
7954 tmplate = ((spte & ~ARM_PTE_APMASK) | pt_attr_leaf_rw(pt_attr));
7955 } else
7956 #endif
7957 {
7958 tmplate = ((spte & ~ARM_PTE_APMASK) | pt_attr_leaf_ro(pt_attr));
7959 }
7960 }
7961
7962 /*
7963 * XXX Removing "NX" would
7964 * grant "execute" access
7965 * immediately, bypassing any
7966 * checks VM might want to do
7967 * in its soft fault path.
7968 * pmap_protect() and co. are
7969 * not allowed to increase
7970 * access permissions.
7971 */
7972 if (set_NX) {
7973 tmplate |= pt_attr_leaf_xn(pt_attr);
7974 } else {
7975 #if (__ARM_VMSA__ > 7)
7976 if (pmap == kernel_pmap) {
7977 /* do NOT clear "PNX"! */
7978 tmplate |= ARM_PTE_NX;
7979 } else {
7980 /* do NOT clear "NX"! */
7981 tmplate |= pt_attr_leaf_x(pt_attr);
7982 if (set_XO) {
7983 tmplate &= ~ARM_PTE_APMASK;
7984 tmplate |= pt_attr_leaf_rona(pt_attr);
7985 }
7986 }
7987 #endif
7988 }
7989
7990 #if DEVELOPMENT || DEBUG
7991 if (force_write) {
7992 /*
7993 * TODO: Run CS/Monitor checks here.
7994 */
7995 if (managed) {
7996 /*
7997 * We are marking the page as writable,
7998 * so we consider it to be modified and
7999 * referenced.
8000 */
8001 pa_set_bits(pa, PP_ATTR_REFERENCED | PP_ATTR_MODIFIED);
8002 tmplate |= ARM_PTE_AF;
8003
8004 if (IS_REFFAULT_PAGE(pai)) {
8005 CLR_REFFAULT_PAGE(pai);
8006 }
8007
8008 if (IS_MODFAULT_PAGE(pai)) {
8009 CLR_MODFAULT_PAGE(pai);
8010 }
8011 }
8012 } else if (options & PMAP_OPTIONS_PROTECT_IMMEDIATE) {
8013 /*
8014 * An immediate request for anything other than
8015 * write should still mark the page as
8016 * referenced if managed.
8017 */
8018 if (managed) {
8019 pa_set_bits(pa, PP_ATTR_REFERENCED);
8020 tmplate |= ARM_PTE_AF;
8021
8022 if (IS_REFFAULT_PAGE(pai)) {
8023 CLR_REFFAULT_PAGE(pai);
8024 }
8025 }
8026 }
8027 #endif
8028
8029 /* We do not expect to write fast fault the entry. */
8030 pte_set_was_writeable(tmplate, false);
8031
8032 WRITE_PTE_FAST(pte_p, tmplate);
8033
8034 if (managed) {
8035 ASSERT_PVH_LOCKED(pai);
8036 UNLOCK_PVH(pai);
8037 }
8038 }
8039 FLUSH_PTE_RANGE_STRONG(bpte_p, pte_p);
8040 PMAP_UPDATE_TLBS(pmap, start, va, need_strong_sync);
8041 } else {
8042 va = end;
8043 }
8044
8045 pmap_unlock(pmap);
8046 return va;
8047 }
8048
8049 void
8050 pmap_protect_options(
8051 pmap_t pmap,
8052 vm_map_address_t b,
8053 vm_map_address_t e,
8054 vm_prot_t prot,
8055 unsigned int options,
8056 __unused void *args)
8057 {
8058 vm_map_address_t l, beg;
8059
8060 __unused const pt_attr_t * const pt_attr = pmap_get_pt_attr(pmap);
8061
8062 if ((b | e) & pt_attr_leaf_offmask(pt_attr)) {
8063 panic("pmap_protect_options() pmap %p start 0x%llx end 0x%llx\n",
8064 pmap, (uint64_t)b, (uint64_t)e);
8065 }
8066
8067 assert(get_preemption_level() == 0);
8068
8069 #if DEVELOPMENT || DEBUG
8070 if (options & PMAP_OPTIONS_PROTECT_IMMEDIATE) {
8071 if ((prot & VM_PROT_ALL) == VM_PROT_NONE) {
8072 pmap_remove_options(pmap, b, e, options);
8073 return;
8074 }
8075 } else
8076 #endif
8077 {
8078 /* Determine the new protection. */
8079 switch (prot) {
8080 case VM_PROT_EXECUTE:
8081 case VM_PROT_READ:
8082 case VM_PROT_READ | VM_PROT_EXECUTE:
8083 break;
8084 case VM_PROT_READ | VM_PROT_WRITE:
8085 case VM_PROT_ALL:
8086 return; /* nothing to do */
8087 default:
8088 pmap_remove_options(pmap, b, e, options);
8089 return;
8090 }
8091 }
8092
8093 PMAP_TRACE(2, PMAP_CODE(PMAP__PROTECT) | DBG_FUNC_START,
8094 VM_KERNEL_ADDRHIDE(pmap), VM_KERNEL_ADDRHIDE(b),
8095 VM_KERNEL_ADDRHIDE(e));
8096
8097 beg = b;
8098
8099 while (beg < e) {
8100 l = ((beg + pt_attr_twig_size(pt_attr)) & ~pt_attr_twig_offmask(pt_attr));
8101
8102 if (l > e) {
8103 l = e;
8104 }
8105
8106 #if XNU_MONITOR
8107 beg = pmap_protect_options_ppl(pmap, beg, l, prot, options, args);
8108 #else
8109 beg = pmap_protect_options_internal(pmap, beg, l, prot, options, args);
8110 #endif
8111 }
8112
8113 PMAP_TRACE(2, PMAP_CODE(PMAP__PROTECT) | DBG_FUNC_END);
8114 }
8115
8116 /* Map a (possibly) autogenned block */
8117 kern_return_t
8118 pmap_map_block(
8119 pmap_t pmap,
8120 addr64_t va,
8121 ppnum_t pa,
8122 uint32_t size,
8123 vm_prot_t prot,
8124 int attr,
8125 __unused unsigned int flags)
8126 {
8127 kern_return_t kr;
8128 addr64_t original_va = va;
8129 uint32_t page;
8130
8131 for (page = 0; page < size; page++) {
8132 kr = pmap_enter(pmap, va, pa, prot, VM_PROT_NONE, attr, TRUE);
8133
8134 if (kr != KERN_SUCCESS) {
8135 /*
8136 * This will panic for now, as it is unclear that
8137 * removing the mappings is correct.
8138 */
8139 panic("%s: failed pmap_enter, "
8140 "pmap=%p, va=%#llx, pa=%u, size=%u, prot=%#x, flags=%#x",
8141 __FUNCTION__,
8142 pmap, va, pa, size, prot, flags);
8143
8144 pmap_remove(pmap, original_va, va - original_va);
8145 return kr;
8146 }
8147
8148 va += PAGE_SIZE;
8149 pa++;
8150 }
8151
8152 return KERN_SUCCESS;
8153 }
8154
8155 kern_return_t
8156 pmap_enter_addr(
8157 pmap_t pmap,
8158 vm_map_address_t v,
8159 pmap_paddr_t pa,
8160 vm_prot_t prot,
8161 vm_prot_t fault_type,
8162 unsigned int flags,
8163 boolean_t wired)
8164 {
8165 return pmap_enter_options_addr(pmap, v, pa, prot, fault_type, flags, wired, 0, NULL);
8166 }
8167
8168 /*
8169 * Insert the given physical page (p) at
8170 * the specified virtual address (v) in the
8171 * target physical map with the protection requested.
8172 *
8173 * If specified, the page will be wired down, meaning
8174 * that the related pte can not be reclaimed.
8175 *
8176 * NB: This is the only routine which MAY NOT lazy-evaluate
8177 * or lose information. That is, this routine must actually
8178 * insert this page into the given map eventually (must make
8179 * forward progress eventually.
8180 */
8181 kern_return_t
8182 pmap_enter(
8183 pmap_t pmap,
8184 vm_map_address_t v,
8185 ppnum_t pn,
8186 vm_prot_t prot,
8187 vm_prot_t fault_type,
8188 unsigned int flags,
8189 boolean_t wired)
8190 {
8191 return pmap_enter_addr(pmap, v, ((pmap_paddr_t)pn) << PAGE_SHIFT, prot, fault_type, flags, wired);
8192 }
8193
8194 static inline void
8195 pmap_enter_pte(pmap_t pmap, pt_entry_t *pte_p, pt_entry_t pte, vm_map_address_t v)
8196 {
8197 const pt_attr_t * const pt_attr = pmap_get_pt_attr(pmap);
8198
8199 if (pmap != kernel_pmap && ((pte & ARM_PTE_WIRED) != (*pte_p & ARM_PTE_WIRED))) {
8200 SInt16 *ptd_wiredcnt_ptr = (SInt16 *)&(ptep_get_info(pte_p)->wiredcnt);
8201 if (pte & ARM_PTE_WIRED) {
8202 OSAddAtomic16(1, ptd_wiredcnt_ptr);
8203 pmap_ledger_credit(pmap, task_ledgers.wired_mem, pt_attr_page_size(pt_attr) * PAGE_RATIO);
8204 OSAddAtomic(1, (SInt32 *) &pmap->stats.wired_count);
8205 } else {
8206 OSAddAtomic16(-1, ptd_wiredcnt_ptr);
8207 pmap_ledger_debit(pmap, task_ledgers.wired_mem, pt_attr_page_size(pt_attr) * PAGE_RATIO);
8208 OSAddAtomic(-1, (SInt32 *) &pmap->stats.wired_count);
8209 }
8210 }
8211 if (*pte_p != ARM_PTE_TYPE_FAULT &&
8212 !ARM_PTE_IS_COMPRESSED(*pte_p, pte_p)) {
8213 WRITE_PTE_STRONG(pte_p, pte);
8214 PMAP_UPDATE_TLBS(pmap, v, v + (pt_attr_page_size(pt_attr) * PAGE_RATIO), false);
8215 } else {
8216 WRITE_PTE(pte_p, pte);
8217 __builtin_arm_isb(ISB_SY);
8218 }
8219
8220 PMAP_TRACE(4 + pt_attr_leaf_level(pt_attr), PMAP_CODE(PMAP__TTE), VM_KERNEL_ADDRHIDE(pmap),
8221 VM_KERNEL_ADDRHIDE(v), VM_KERNEL_ADDRHIDE(v + (pt_attr_page_size(pt_attr) * PAGE_RATIO)), pte);
8222 }
8223
8224 MARK_AS_PMAP_TEXT static pt_entry_t
8225 wimg_to_pte(unsigned int wimg)
8226 {
8227 pt_entry_t pte;
8228
8229 switch (wimg & (VM_WIMG_MASK)) {
8230 case VM_WIMG_IO:
8231 case VM_WIMG_RT:
8232 pte = ARM_PTE_ATTRINDX(CACHE_ATTRINDX_DISABLE);
8233 pte |= ARM_PTE_NX | ARM_PTE_PNX;
8234 break;
8235 case VM_WIMG_POSTED:
8236 pte = ARM_PTE_ATTRINDX(CACHE_ATTRINDX_POSTED);
8237 pte |= ARM_PTE_NX | ARM_PTE_PNX;
8238 break;
8239 case VM_WIMG_POSTED_REORDERED:
8240 pte = ARM_PTE_ATTRINDX(CACHE_ATTRINDX_POSTED_REORDERED);
8241 pte |= ARM_PTE_NX | ARM_PTE_PNX;
8242 break;
8243 case VM_WIMG_POSTED_COMBINED_REORDERED:
8244 pte = ARM_PTE_ATTRINDX(CACHE_ATTRINDX_POSTED_COMBINED_REORDERED);
8245 pte |= ARM_PTE_NX | ARM_PTE_PNX;
8246 break;
8247 case VM_WIMG_WCOMB:
8248 pte = ARM_PTE_ATTRINDX(CACHE_ATTRINDX_WRITECOMB);
8249 pte |= ARM_PTE_NX | ARM_PTE_PNX;
8250 break;
8251 case VM_WIMG_WTHRU:
8252 pte = ARM_PTE_ATTRINDX(CACHE_ATTRINDX_WRITETHRU);
8253 #if (__ARM_VMSA__ > 7)
8254 pte |= ARM_PTE_SH(SH_OUTER_MEMORY);
8255 #else
8256 pte |= ARM_PTE_SH;
8257 #endif
8258 break;
8259 case VM_WIMG_COPYBACK:
8260 pte = ARM_PTE_ATTRINDX(CACHE_ATTRINDX_WRITEBACK);
8261 #if (__ARM_VMSA__ > 7)
8262 pte |= ARM_PTE_SH(SH_OUTER_MEMORY);
8263 #else
8264 pte |= ARM_PTE_SH;
8265 #endif
8266 break;
8267 case VM_WIMG_INNERWBACK:
8268 pte = ARM_PTE_ATTRINDX(CACHE_ATTRINDX_INNERWRITEBACK);
8269 #if (__ARM_VMSA__ > 7)
8270 pte |= ARM_PTE_SH(SH_INNER_MEMORY);
8271 #else
8272 pte |= ARM_PTE_SH;
8273 #endif
8274 break;
8275 default:
8276 pte = ARM_PTE_ATTRINDX(CACHE_ATTRINDX_DEFAULT);
8277 #if (__ARM_VMSA__ > 7)
8278 pte |= ARM_PTE_SH(SH_OUTER_MEMORY);
8279 #else
8280 pte |= ARM_PTE_SH;
8281 #endif
8282 }
8283
8284 return pte;
8285 }
8286
8287 static pv_alloc_return_t
8288 pmap_enter_pv(
8289 pmap_t pmap,
8290 pt_entry_t *pte_p,
8291 int pai,
8292 unsigned int options,
8293 pv_entry_t **pve_p,
8294 boolean_t *is_altacct)
8295 {
8296 pv_entry_t **pv_h;
8297 pv_h = pai_to_pvh(pai);
8298 boolean_t first_cpu_mapping;
8299
8300 ASSERT_NOT_HIBERNATING();
8301 ASSERT_PVH_LOCKED(pai);
8302
8303 vm_offset_t pvh_flags = pvh_get_flags(pv_h);
8304
8305 #if XNU_MONITOR
8306 if (__improbable(pvh_flags & PVH_FLAG_LOCKDOWN)) {
8307 panic("%d is locked down (%#lx), cannot enter", pai, pvh_flags);
8308 }
8309 #endif
8310
8311 #ifdef PVH_FLAG_CPU
8312 /* An IOMMU mapping may already be present for a page that hasn't yet
8313 * had a CPU mapping established, so we use PVH_FLAG_CPU to determine
8314 * if this is the first CPU mapping. We base internal/reusable
8315 * accounting on the options specified for the first CPU mapping.
8316 * PVH_FLAG_CPU, and thus this accounting, will then persist as long
8317 * as there are *any* mappings of the page. The accounting for a
8318 * page should not need to change until the page is recycled by the
8319 * VM layer, and we assert that there are no mappings when a page
8320 * is recycled. An IOMMU mapping of a freed/recycled page is
8321 * considered a security violation & potential DMA corruption path.*/
8322 first_cpu_mapping = ((pmap != NULL) && !(pvh_flags & PVH_FLAG_CPU));
8323 if (first_cpu_mapping) {
8324 pvh_flags |= PVH_FLAG_CPU;
8325 }
8326 #else
8327 first_cpu_mapping = pvh_test_type(pv_h, PVH_TYPE_NULL);
8328 #endif
8329
8330 if (first_cpu_mapping) {
8331 if (options & PMAP_OPTIONS_INTERNAL) {
8332 SET_INTERNAL_PAGE(pai);
8333 } else {
8334 CLR_INTERNAL_PAGE(pai);
8335 }
8336 if ((options & PMAP_OPTIONS_INTERNAL) &&
8337 (options & PMAP_OPTIONS_REUSABLE)) {
8338 SET_REUSABLE_PAGE(pai);
8339 } else {
8340 CLR_REUSABLE_PAGE(pai);
8341 }
8342 }
8343 if (pvh_test_type(pv_h, PVH_TYPE_NULL)) {
8344 pvh_update_head(pv_h, pte_p, PVH_TYPE_PTEP);
8345 if (pmap != NULL && pmap != kernel_pmap &&
8346 ((options & PMAP_OPTIONS_ALT_ACCT) ||
8347 PMAP_FOOTPRINT_SUSPENDED(pmap)) &&
8348 IS_INTERNAL_PAGE(pai)) {
8349 /*
8350 * Make a note to ourselves that this mapping is using alternative
8351 * accounting. We'll need this in order to know which ledger to
8352 * debit when the mapping is removed.
8353 *
8354 * The altacct bit must be set while the pv head is locked. Defer
8355 * the ledger accounting until after we've dropped the lock.
8356 */
8357 SET_ALTACCT_PAGE(pai, PV_ENTRY_NULL);
8358 *is_altacct = TRUE;
8359 } else {
8360 CLR_ALTACCT_PAGE(pai, PV_ENTRY_NULL);
8361 }
8362 } else {
8363 pv_alloc_return_t ret;
8364 if (pvh_test_type(pv_h, PVH_TYPE_PTEP)) {
8365 pt_entry_t *pte1_p;
8366
8367 /*
8368 * convert pvh list from PVH_TYPE_PTEP to PVH_TYPE_PVEP
8369 */
8370 pte1_p = pvh_ptep(pv_h);
8371 pvh_set_flags(pv_h, pvh_flags);
8372 if ((*pve_p == PV_ENTRY_NULL) && ((ret = pv_alloc(pmap, pai, pve_p)) != PV_ALLOC_SUCCESS)) {
8373 return ret;
8374 }
8375
8376 pve_set_ptep(*pve_p, pte1_p);
8377 (*pve_p)->pve_next = PV_ENTRY_NULL;
8378
8379 if (IS_ALTACCT_PAGE(pai, PV_ENTRY_NULL)) {
8380 /*
8381 * transfer "altacct" from
8382 * pp_attr to this pve
8383 */
8384 CLR_ALTACCT_PAGE(pai, PV_ENTRY_NULL);
8385 SET_ALTACCT_PAGE(pai, *pve_p);
8386 }
8387 pvh_update_head(pv_h, *pve_p, PVH_TYPE_PVEP);
8388 *pve_p = PV_ENTRY_NULL;
8389 } else if (!pvh_test_type(pv_h, PVH_TYPE_PVEP)) {
8390 panic("%s: unexpected PV head %p, pte_p=%p pmap=%p pv_h=%p",
8391 __func__, *pv_h, pte_p, pmap, pv_h);
8392 }
8393 /*
8394 * Set up pv_entry for this new mapping and then
8395 * add it to the list for this physical page.
8396 */
8397 pvh_set_flags(pv_h, pvh_flags);
8398 if ((*pve_p == PV_ENTRY_NULL) && ((ret = pv_alloc(pmap, pai, pve_p)) != PV_ALLOC_SUCCESS)) {
8399 return ret;
8400 }
8401
8402 pve_set_ptep(*pve_p, pte_p);
8403 (*pve_p)->pve_next = PV_ENTRY_NULL;
8404
8405 pvh_add(pv_h, *pve_p);
8406
8407 if (pmap != NULL && pmap != kernel_pmap &&
8408 ((options & PMAP_OPTIONS_ALT_ACCT) ||
8409 PMAP_FOOTPRINT_SUSPENDED(pmap)) &&
8410 IS_INTERNAL_PAGE(pai)) {
8411 /*
8412 * Make a note to ourselves that this
8413 * mapping is using alternative
8414 * accounting. We'll need this in order
8415 * to know which ledger to debit when
8416 * the mapping is removed.
8417 *
8418 * The altacct bit must be set while
8419 * the pv head is locked. Defer the
8420 * ledger accounting until after we've
8421 * dropped the lock.
8422 */
8423 SET_ALTACCT_PAGE(pai, *pve_p);
8424 *is_altacct = TRUE;
8425 }
8426
8427 *pve_p = PV_ENTRY_NULL;
8428 }
8429
8430 pvh_set_flags(pv_h, pvh_flags);
8431
8432 return PV_ALLOC_SUCCESS;
8433 }
8434
8435 MARK_AS_PMAP_TEXT static kern_return_t
8436 pmap_enter_options_internal(
8437 pmap_t pmap,
8438 vm_map_address_t v,
8439 pmap_paddr_t pa,
8440 vm_prot_t prot,
8441 vm_prot_t fault_type,
8442 unsigned int flags,
8443 boolean_t wired,
8444 unsigned int options)
8445 {
8446 ppnum_t pn = (ppnum_t)atop(pa);
8447 pt_entry_t pte;
8448 pt_entry_t spte;
8449 pt_entry_t *pte_p;
8450 pv_entry_t *pve_p;
8451 boolean_t set_NX;
8452 boolean_t set_XO = FALSE;
8453 boolean_t refcnt_updated;
8454 boolean_t wiredcnt_updated;
8455 unsigned int wimg_bits;
8456 boolean_t was_compressed, was_alt_compressed;
8457 kern_return_t kr = KERN_SUCCESS;
8458
8459 VALIDATE_PMAP(pmap);
8460
8461 #if XNU_MONITOR
8462 if (__improbable((options & PMAP_OPTIONS_NOWAIT) == 0)) {
8463 panic("pmap_enter_options() called without PMAP_OPTIONS_NOWAIT set");
8464 }
8465 #endif
8466
8467 __unused const pt_attr_t * const pt_attr = pmap_get_pt_attr(pmap);
8468
8469 if ((v) & pt_attr_leaf_offmask(pt_attr)) {
8470 panic("pmap_enter_options() pmap %p v 0x%llx\n",
8471 pmap, (uint64_t)v);
8472 }
8473
8474 if ((pa) & pt_attr_leaf_offmask(pt_attr)) {
8475 panic("pmap_enter_options() pmap %p pa 0x%llx\n",
8476 pmap, (uint64_t)pa);
8477 }
8478
8479 if ((prot & VM_PROT_EXECUTE) && (pmap == kernel_pmap)) {
8480 #if defined(KERNEL_INTEGRITY_CTRR) && defined(CONFIG_XNUPOST)
8481 extern vm_offset_t ctrr_test_page;
8482 if (__probable(v != ctrr_test_page))
8483 #endif
8484 panic("pmap_enter_options(): attempt to add executable mapping to kernel_pmap");
8485 }
8486
8487 #if DEVELOPMENT || DEBUG
8488 if ((prot & VM_PROT_EXECUTE) || !nx_enabled || !pmap->nx_enabled)
8489 #else
8490 if ((prot & VM_PROT_EXECUTE))
8491 #endif
8492 {
8493 set_NX = FALSE;
8494 } else {
8495 set_NX = TRUE;
8496 }
8497
8498 #if (__ARM_VMSA__ > 7)
8499 if (prot == VM_PROT_EXECUTE) {
8500 set_XO = TRUE;
8501 }
8502 #endif
8503
8504 assert(pn != vm_page_fictitious_addr);
8505
8506 refcnt_updated = FALSE;
8507 wiredcnt_updated = FALSE;
8508 pve_p = PV_ENTRY_NULL;
8509 was_compressed = FALSE;
8510 was_alt_compressed = FALSE;
8511
8512 pmap_lock(pmap);
8513
8514 /*
8515 * Expand pmap to include this pte. Assume that
8516 * pmap is always expanded to include enough hardware
8517 * pages to map one VM page.
8518 */
8519 while ((pte_p = pmap_pte(pmap, v)) == PT_ENTRY_NULL) {
8520 /* Must unlock to expand the pmap. */
8521 pmap_unlock(pmap);
8522
8523 kr = pmap_expand(pmap, v, options, pt_attr_leaf_level(pt_attr));
8524
8525 if (kr != KERN_SUCCESS) {
8526 return kr;
8527 }
8528
8529 pmap_lock(pmap);
8530 }
8531
8532 if (options & PMAP_OPTIONS_NOENTER) {
8533 pmap_unlock(pmap);
8534 return KERN_SUCCESS;
8535 }
8536
8537 Pmap_enter_retry:
8538
8539 spte = *pte_p;
8540
8541 if (ARM_PTE_IS_COMPRESSED(spte, pte_p) && !refcnt_updated) {
8542 /*
8543 * "pmap" should be locked at this point, so this should
8544 * not race with another pmap_enter() or pmap_remove_range().
8545 */
8546 assert(pmap != kernel_pmap);
8547
8548 /* one less "compressed" */
8549 OSAddAtomic64(-1, &pmap->stats.compressed);
8550 pmap_ledger_debit(pmap, task_ledgers.internal_compressed,
8551 pt_attr_page_size(pt_attr) * PAGE_RATIO);
8552
8553 was_compressed = TRUE;
8554 if (spte & ARM_PTE_COMPRESSED_ALT) {
8555 was_alt_compressed = TRUE;
8556 pmap_ledger_debit(pmap, task_ledgers.alternate_accounting_compressed, pt_attr_page_size(pt_attr) * PAGE_RATIO);
8557 } else {
8558 /* was part of the footprint */
8559 pmap_ledger_debit(pmap, task_ledgers.phys_footprint, pt_attr_page_size(pt_attr) * PAGE_RATIO);
8560 }
8561
8562 /* clear "compressed" marker */
8563 /* XXX is it necessary since we're about to overwrite it ? */
8564 WRITE_PTE_FAST(pte_p, ARM_PTE_TYPE_FAULT);
8565 spte = ARM_PTE_TYPE_FAULT;
8566
8567 /*
8568 * We're replacing a "compressed" marker with a valid PTE,
8569 * so no change for "refcnt".
8570 */
8571 refcnt_updated = TRUE;
8572 }
8573
8574 if ((spte != ARM_PTE_TYPE_FAULT) && (pte_to_pa(spte) != pa)) {
8575 pmap_remove_range(pmap, v, pte_p, pte_p + PAGE_RATIO);
8576 }
8577
8578 pte = pa_to_pte(pa) | ARM_PTE_TYPE;
8579
8580 if (wired) {
8581 pte |= ARM_PTE_WIRED;
8582 }
8583
8584 if (set_NX) {
8585 pte |= pt_attr_leaf_xn(pt_attr);
8586 } else {
8587 #if (__ARM_VMSA__ > 7)
8588 if (pmap == kernel_pmap) {
8589 pte |= ARM_PTE_NX;
8590 } else {
8591 pte |= pt_attr_leaf_x(pt_attr);
8592 }
8593 #endif
8594 }
8595
8596 if (pmap == kernel_pmap) {
8597 #if __ARM_KERNEL_PROTECT__
8598 pte |= ARM_PTE_NG;
8599 #endif /* __ARM_KERNEL_PROTECT__ */
8600 if (prot & VM_PROT_WRITE) {
8601 pte |= ARM_PTE_AP(AP_RWNA);
8602 pa_set_bits(pa, PP_ATTR_MODIFIED | PP_ATTR_REFERENCED);
8603 } else {
8604 pte |= ARM_PTE_AP(AP_RONA);
8605 pa_set_bits(pa, PP_ATTR_REFERENCED);
8606 }
8607 #if (__ARM_VMSA__ == 7)
8608 if ((_COMM_PAGE_BASE_ADDRESS <= v) && (v < _COMM_PAGE_BASE_ADDRESS + _COMM_PAGE_AREA_LENGTH)) {
8609 pte = (pte & ~(ARM_PTE_APMASK)) | ARM_PTE_AP(AP_RORO);
8610 }
8611 #endif
8612 } else {
8613 if (!pmap->nested) {
8614 pte |= ARM_PTE_NG;
8615 } else if ((pmap->nested_region_asid_bitmap)
8616 && (v >= pmap->nested_region_addr)
8617 && (v < (pmap->nested_region_addr + pmap->nested_region_size))) {
8618 unsigned int index = (unsigned int)((v - pmap->nested_region_addr) >> pt_attr_twig_shift(pt_attr));
8619
8620 if ((pmap->nested_region_asid_bitmap)
8621 && testbit(index, (int *)pmap->nested_region_asid_bitmap)) {
8622 pte |= ARM_PTE_NG;
8623 }
8624 }
8625 #if MACH_ASSERT
8626 if (pmap->nested_pmap != NULL) {
8627 vm_map_address_t nest_vaddr;
8628 pt_entry_t *nest_pte_p;
8629
8630 nest_vaddr = v;
8631
8632 if ((nest_vaddr >= pmap->nested_region_addr)
8633 && (nest_vaddr < (pmap->nested_region_addr + pmap->nested_region_size))
8634 && ((nest_pte_p = pmap_pte(pmap->nested_pmap, nest_vaddr)) != PT_ENTRY_NULL)
8635 && (*nest_pte_p != ARM_PTE_TYPE_FAULT)
8636 && (!ARM_PTE_IS_COMPRESSED(*nest_pte_p, nest_pte_p))
8637 && (((*nest_pte_p) & ARM_PTE_NG) != ARM_PTE_NG)) {
8638 unsigned int index = (unsigned int)((v - pmap->nested_region_addr) >> pt_attr_twig_shift(pt_attr));
8639
8640 if ((pmap->nested_pmap->nested_region_asid_bitmap)
8641 && !testbit(index, (int *)pmap->nested_pmap->nested_region_asid_bitmap)) {
8642 panic("pmap_enter(): Global attribute conflict nest_pte_p=%p pmap=%p v=0x%llx spte=0x%llx \n",
8643 nest_pte_p, pmap, (uint64_t)v, (uint64_t)*nest_pte_p);
8644 }
8645 }
8646 }
8647 #endif
8648 if (prot & VM_PROT_WRITE) {
8649 if (pa_valid(pa) && (!pa_test_bits(pa, PP_ATTR_MODIFIED))) {
8650 assert(!pmap->nested); /* no write access in a nested pmap */
8651 if (fault_type & VM_PROT_WRITE) {
8652 if (set_XO) {
8653 pte |= pt_attr_leaf_rwna(pt_attr);
8654 } else {
8655 pte |= pt_attr_leaf_rw(pt_attr);
8656 }
8657 pa_set_bits(pa, PP_ATTR_REFERENCED | PP_ATTR_MODIFIED);
8658 } else {
8659 if (set_XO) {
8660 pte |= pt_attr_leaf_rona(pt_attr);
8661 } else {
8662 pte |= pt_attr_leaf_ro(pt_attr);
8663 }
8664 /*
8665 * Mark the page as MODFAULT so that a subsequent write
8666 * may be handled through arm_fast_fault().
8667 */
8668 pa_set_bits(pa, PP_ATTR_REFERENCED | PP_ATTR_MODFAULT);
8669 pte_set_was_writeable(pte, true);
8670 }
8671 } else {
8672 if (set_XO) {
8673 pte |= pt_attr_leaf_rwna(pt_attr);
8674 } else {
8675 pte |= pt_attr_leaf_rw(pt_attr);
8676 }
8677 pa_set_bits(pa, PP_ATTR_REFERENCED);
8678 }
8679 } else {
8680 if (set_XO) {
8681 pte |= pt_attr_leaf_rona(pt_attr);
8682 } else {
8683 pte |= pt_attr_leaf_ro(pt_attr);;
8684 }
8685 pa_set_bits(pa, PP_ATTR_REFERENCED);
8686 }
8687 }
8688
8689 pte |= ARM_PTE_AF;
8690
8691 volatile uint16_t *refcnt = NULL;
8692 volatile uint16_t *wiredcnt = NULL;
8693 if (pmap != kernel_pmap) {
8694 ptd_info_t *ptd_info = ptep_get_info(pte_p);
8695 refcnt = &ptd_info->refcnt;
8696 wiredcnt = &ptd_info->wiredcnt;
8697 /* Bump the wired count to keep the PTE page from being reclaimed. We need this because
8698 * we may drop the PVH and pmap locks later in pmap_enter() if we need to allocate
8699 * a new PV entry. */
8700 if (!wiredcnt_updated) {
8701 OSAddAtomic16(1, (volatile int16_t*)wiredcnt);
8702 wiredcnt_updated = TRUE;
8703 }
8704 if (!refcnt_updated) {
8705 OSAddAtomic16(1, (volatile int16_t*)refcnt);
8706 refcnt_updated = TRUE;
8707 }
8708 }
8709
8710 if (pa_valid(pa)) {
8711 int pai;
8712 boolean_t is_altacct, is_internal;
8713
8714 is_internal = FALSE;
8715 is_altacct = FALSE;
8716
8717 pai = (int)pa_index(pa);
8718
8719 LOCK_PVH(pai);
8720
8721 Pmap_enter_loop:
8722 if ((flags & (VM_WIMG_MASK | VM_WIMG_USE_DEFAULT))) {
8723 wimg_bits = (flags & (VM_WIMG_MASK | VM_WIMG_USE_DEFAULT));
8724 } else {
8725 wimg_bits = pmap_cache_attributes(pn);
8726 }
8727
8728 /* We may be retrying this operation after dropping the PVH lock.
8729 * Cache attributes for the physical page may have changed while the lock
8730 * was dropped, so clear any cache attributes we may have previously set
8731 * in the PTE template. */
8732 pte &= ~(ARM_PTE_ATTRINDXMASK | ARM_PTE_SHMASK);
8733 pte |= pmap_get_pt_ops(pmap)->wimg_to_pte(wimg_bits);
8734
8735 #if XNU_MONITOR
8736 /* The regular old kernel is not allowed to remap PPL pages. */
8737 if (__improbable(pa_test_monitor(pa))) {
8738 panic("%s: page belongs to PPL, "
8739 "pmap=%p, v=0x%llx, pa=%p, prot=0x%x, fault_type=0x%x, flags=0x%x, wired=%u, options=0x%x",
8740 __FUNCTION__,
8741 pmap, v, (void*)pa, prot, fault_type, flags, wired, options);
8742 }
8743
8744 if (__improbable(pvh_get_flags(pai_to_pvh(pai)) & PVH_FLAG_LOCKDOWN)) {
8745 panic("%s: page locked down, "
8746 "pmap=%p, v=0x%llx, pa=%p, prot=0x%x, fault_type=0x%x, flags=0x%x, wired=%u, options=0x%x",
8747 __FUNCTION__,
8748 pmap, v, (void *)pa, prot, fault_type, flags, wired, options);
8749 }
8750 #endif
8751
8752
8753 if (pte == *pte_p) {
8754 /*
8755 * This pmap_enter operation has been completed by another thread
8756 * undo refcnt on pt and return
8757 */
8758 UNLOCK_PVH(pai);
8759 goto Pmap_enter_cleanup;
8760 } else if (pte_to_pa(*pte_p) == pa) {
8761 pmap_enter_pte(pmap, pte_p, pte, v);
8762 UNLOCK_PVH(pai);
8763 goto Pmap_enter_cleanup;
8764 } else if (*pte_p != ARM_PTE_TYPE_FAULT) {
8765 /*
8766 * pte has been modified by another thread
8767 * hold refcnt on pt and retry pmap_enter operation
8768 */
8769 UNLOCK_PVH(pai);
8770 goto Pmap_enter_retry;
8771 }
8772 pv_alloc_return_t pv_status = pmap_enter_pv(pmap, pte_p, pai, options, &pve_p, &is_altacct);
8773 if (pv_status == PV_ALLOC_RETRY) {
8774 goto Pmap_enter_loop;
8775 } else if (pv_status == PV_ALLOC_FAIL) {
8776 UNLOCK_PVH(pai);
8777 kr = KERN_RESOURCE_SHORTAGE;
8778 goto Pmap_enter_cleanup;
8779 }
8780
8781 pmap_enter_pte(pmap, pte_p, pte, v);
8782
8783 if (pmap != kernel_pmap) {
8784 if (IS_REUSABLE_PAGE(pai) &&
8785 !is_altacct) {
8786 assert(IS_INTERNAL_PAGE(pai));
8787 OSAddAtomic(+1, &pmap->stats.reusable);
8788 PMAP_STATS_PEAK(pmap->stats.reusable);
8789 } else if (IS_INTERNAL_PAGE(pai)) {
8790 OSAddAtomic(+1, &pmap->stats.internal);
8791 PMAP_STATS_PEAK(pmap->stats.internal);
8792 is_internal = TRUE;
8793 } else {
8794 OSAddAtomic(+1, &pmap->stats.external);
8795 PMAP_STATS_PEAK(pmap->stats.external);
8796 }
8797 }
8798
8799 UNLOCK_PVH(pai);
8800
8801 if (pmap != kernel_pmap) {
8802 pmap_ledger_credit(pmap, task_ledgers.phys_mem, pt_attr_page_size(pt_attr) * PAGE_RATIO);
8803
8804 if (is_internal) {
8805 /*
8806 * Make corresponding adjustments to
8807 * phys_footprint statistics.
8808 */
8809 pmap_ledger_credit(pmap, task_ledgers.internal, pt_attr_page_size(pt_attr) * PAGE_RATIO);
8810 if (is_altacct) {
8811 /*
8812 * If this page is internal and
8813 * in an IOKit region, credit
8814 * the task's total count of
8815 * dirty, internal IOKit pages.
8816 * It should *not* count towards
8817 * the task's total physical
8818 * memory footprint, because
8819 * this entire region was
8820 * already billed to the task
8821 * at the time the mapping was
8822 * created.
8823 *
8824 * Put another way, this is
8825 * internal++ and
8826 * alternate_accounting++, so
8827 * net effect on phys_footprint
8828 * is 0. That means: don't
8829 * touch phys_footprint here.
8830 */
8831 pmap_ledger_credit(pmap, task_ledgers.alternate_accounting, pt_attr_page_size(pt_attr) * PAGE_RATIO);
8832 } else {
8833 pmap_ledger_credit(pmap, task_ledgers.phys_footprint, pt_attr_page_size(pt_attr) * PAGE_RATIO);
8834 }
8835 }
8836 }
8837
8838 OSAddAtomic(1, (SInt32 *) &pmap->stats.resident_count);
8839 if (pmap->stats.resident_count > pmap->stats.resident_max) {
8840 pmap->stats.resident_max = pmap->stats.resident_count;
8841 }
8842 } else {
8843 if (prot & VM_PROT_EXECUTE) {
8844 kr = KERN_FAILURE;
8845 goto Pmap_enter_cleanup;
8846 }
8847
8848 wimg_bits = pmap_cache_attributes(pn);
8849 if ((flags & (VM_WIMG_MASK | VM_WIMG_USE_DEFAULT))) {
8850 wimg_bits = (wimg_bits & (~VM_WIMG_MASK)) | (flags & (VM_WIMG_MASK | VM_WIMG_USE_DEFAULT));
8851 }
8852
8853 pte |= pmap_get_pt_ops(pmap)->wimg_to_pte(wimg_bits);
8854
8855 #if XNU_MONITOR
8856 if ((wimg_bits & PP_ATTR_MONITOR) && !pmap_ppl_disable) {
8857 uint64_t xprr_perm = pte_to_xprr_perm(pte);
8858 switch (xprr_perm) {
8859 case XPRR_KERN_RO_PERM:
8860 break;
8861 case XPRR_KERN_RW_PERM:
8862 pte &= ~ARM_PTE_XPRR_MASK;
8863 pte |= xprr_perm_to_pte(XPRR_PPL_RW_PERM);
8864 break;
8865 default:
8866 panic("Unsupported xPRR perm %llu for pte 0x%llx", xprr_perm, (uint64_t)pte);
8867 }
8868 }
8869 #endif
8870 pmap_enter_pte(pmap, pte_p, pte, v);
8871 }
8872
8873 goto Pmap_enter_return;
8874
8875 Pmap_enter_cleanup:
8876
8877 if (refcnt != NULL) {
8878 assert(refcnt_updated);
8879 if (OSAddAtomic16(-1, (volatile int16_t*)refcnt) <= 0) {
8880 panic("pmap_enter(): over-release of ptdp %p for pte %p\n", ptep_get_ptd(pte_p), pte_p);
8881 }
8882 }
8883
8884 Pmap_enter_return:
8885
8886 #if CONFIG_PGTRACE
8887 if (pgtrace_enabled) {
8888 // Clone and invalidate original mapping if eligible
8889 pmap_pgtrace_enter_clone(pmap, v + ARM_PGBYTES, 0, 0);
8890 }
8891 #endif /* CONFIG_PGTRACE */
8892
8893 if (pve_p != PV_ENTRY_NULL) {
8894 pv_free_entry(pve_p);
8895 }
8896
8897 if (wiredcnt_updated && (OSAddAtomic16(-1, (volatile int16_t*)wiredcnt) <= 0)) {
8898 panic("pmap_enter(): over-unwire of ptdp %p for pte %p\n", ptep_get_ptd(pte_p), pte_p);
8899 }
8900
8901 pmap_unlock(pmap);
8902
8903 return kr;
8904 }
8905
8906 kern_return_t
8907 pmap_enter_options_addr(
8908 pmap_t pmap,
8909 vm_map_address_t v,
8910 pmap_paddr_t pa,
8911 vm_prot_t prot,
8912 vm_prot_t fault_type,
8913 unsigned int flags,
8914 boolean_t wired,
8915 unsigned int options,
8916 __unused void *arg)
8917 {
8918 kern_return_t kr = KERN_FAILURE;
8919
8920
8921 PMAP_TRACE(2, PMAP_CODE(PMAP__ENTER) | DBG_FUNC_START,
8922 VM_KERNEL_ADDRHIDE(pmap), VM_KERNEL_ADDRHIDE(v), pa, prot);
8923
8924
8925 #if XNU_MONITOR
8926 /*
8927 * If NOWAIT was not requested, loop until the enter does not
8928 * fail due to lack of resources.
8929 */
8930 while ((kr = pmap_enter_options_ppl(pmap, v, pa, prot, fault_type, flags, wired, options | PMAP_OPTIONS_NOWAIT)) == KERN_RESOURCE_SHORTAGE) {
8931 pmap_alloc_page_for_ppl((options & PMAP_OPTIONS_NOWAIT) ? PMAP_PAGES_ALLOCATE_NOWAIT : 0);
8932 if (options & PMAP_OPTIONS_NOWAIT) {
8933 break;
8934 }
8935 }
8936
8937 pmap_ledger_check_balance(pmap);
8938 #else
8939 kr = pmap_enter_options_internal(pmap, v, pa, prot, fault_type, flags, wired, options);
8940 #endif
8941
8942 PMAP_TRACE(2, PMAP_CODE(PMAP__ENTER) | DBG_FUNC_END, kr);
8943
8944 return kr;
8945 }
8946
8947 kern_return_t
8948 pmap_enter_options(
8949 pmap_t pmap,
8950 vm_map_address_t v,
8951 ppnum_t pn,
8952 vm_prot_t prot,
8953 vm_prot_t fault_type,
8954 unsigned int flags,
8955 boolean_t wired,
8956 unsigned int options,
8957 __unused void *arg)
8958 {
8959 return pmap_enter_options_addr(pmap, v, ((pmap_paddr_t)pn) << PAGE_SHIFT, prot, fault_type, flags, wired, options, arg);
8960 }
8961
8962 /*
8963 * Routine: pmap_change_wiring
8964 * Function: Change the wiring attribute for a map/virtual-address
8965 * pair.
8966 * In/out conditions:
8967 * The mapping must already exist in the pmap.
8968 */
8969 MARK_AS_PMAP_TEXT static void
8970 pmap_change_wiring_internal(
8971 pmap_t pmap,
8972 vm_map_address_t v,
8973 boolean_t wired)
8974 {
8975 pt_entry_t *pte_p;
8976 pmap_paddr_t pa;
8977
8978 VALIDATE_PMAP(pmap);
8979
8980 pmap_lock(pmap);
8981
8982 const pt_attr_t * pt_attr = pmap_get_pt_attr(pmap);
8983
8984 pte_p = pmap_pte(pmap, v);
8985 if (pte_p == PT_ENTRY_NULL) {
8986 if (!wired) {
8987 /*
8988 * The PTE may have already been cleared by a disconnect/remove operation, and the L3 table
8989 * may have been freed by a remove operation.
8990 */
8991 goto pmap_change_wiring_return;
8992 } else {
8993 panic("%s: Attempt to wire nonexistent PTE for pmap %p", __func__, pmap);
8994 }
8995 }
8996 /*
8997 * Use volatile loads to prevent the compiler from collapsing references to 'pa' back to loads of pte_p
8998 * until we've grabbed the final PVH lock; PTE contents may change during this time.
8999 */
9000 pa = pte_to_pa(*((volatile pt_entry_t*)pte_p));
9001
9002 while (pa_valid(pa)) {
9003 pmap_paddr_t new_pa;
9004
9005 LOCK_PVH((int)pa_index(pa));
9006 new_pa = pte_to_pa(*((volatile pt_entry_t*)pte_p));
9007
9008 if (pa == new_pa) {
9009 break;
9010 }
9011
9012 UNLOCK_PVH((int)pa_index(pa));
9013 pa = new_pa;
9014 }
9015
9016 /* PTE checks must be performed after acquiring the PVH lock (if applicable for the PA) */
9017 if ((*pte_p == ARM_PTE_EMPTY) || (ARM_PTE_IS_COMPRESSED(*pte_p, pte_p))) {
9018 if (!wired) {
9019 /* PTE cleared by prior remove/disconnect operation */
9020 goto pmap_change_wiring_cleanup;
9021 } else {
9022 panic("%s: Attempt to wire empty/compressed PTE %p (=0x%llx) for pmap %p",
9023 __func__, pte_p, (uint64_t)*pte_p, pmap);
9024 }
9025 }
9026
9027 assertf((*pte_p & ARM_PTE_TYPE_VALID) == ARM_PTE_TYPE, "invalid pte %p (=0x%llx)", pte_p, (uint64_t)*pte_p);
9028 if (wired != pte_is_wired(*pte_p)) {
9029 pte_set_wired(pmap, pte_p, wired);
9030 if (pmap != kernel_pmap) {
9031 if (wired) {
9032 OSAddAtomic(+1, (SInt32 *) &pmap->stats.wired_count);
9033 pmap_ledger_credit(pmap, task_ledgers.wired_mem, pt_attr_page_size(pt_attr) * PAGE_RATIO);
9034 } else if (!wired) {
9035 __assert_only int32_t orig_wired = OSAddAtomic(-1, (SInt32 *) &pmap->stats.wired_count);
9036 PMAP_STATS_ASSERTF(orig_wired > 0, pmap, "stats.wired_count %d", orig_wired);
9037 pmap_ledger_debit(pmap, task_ledgers.wired_mem, pt_attr_page_size(pt_attr) * PAGE_RATIO);
9038 }
9039 }
9040 }
9041
9042 pmap_change_wiring_cleanup:
9043 if (pa_valid(pa)) {
9044 UNLOCK_PVH((int)pa_index(pa));
9045 }
9046
9047 pmap_change_wiring_return:
9048 pmap_unlock(pmap);
9049 }
9050
9051 void
9052 pmap_change_wiring(
9053 pmap_t pmap,
9054 vm_map_address_t v,
9055 boolean_t wired)
9056 {
9057 #if XNU_MONITOR
9058 pmap_change_wiring_ppl(pmap, v, wired);
9059
9060 pmap_ledger_check_balance(pmap);
9061 #else
9062 pmap_change_wiring_internal(pmap, v, wired);
9063 #endif
9064 }
9065
9066 MARK_AS_PMAP_TEXT static pmap_paddr_t
9067 pmap_find_pa_internal(
9068 pmap_t pmap,
9069 addr64_t va)
9070 {
9071 pmap_paddr_t pa = 0;
9072
9073 VALIDATE_PMAP(pmap);
9074
9075 if (pmap != kernel_pmap) {
9076 pmap_lock_ro(pmap);
9077 }
9078
9079 pa = pmap_vtophys(pmap, va);
9080
9081 if (pmap != kernel_pmap) {
9082 pmap_unlock_ro(pmap);
9083 }
9084
9085 return pa;
9086 }
9087
9088 pmap_paddr_t
9089 pmap_find_pa_nofault(pmap_t pmap, addr64_t va)
9090 {
9091 pmap_paddr_t pa = 0;
9092
9093 if (pmap == kernel_pmap) {
9094 pa = mmu_kvtop(va);
9095 } else if ((current_thread()->map) && (pmap == vm_map_pmap(current_thread()->map))) {
9096 /*
9097 * Note that this doesn't account for PAN: mmu_uvtop() may return a valid
9098 * translation even if PAN would prevent kernel access through the translation.
9099 * It's therefore assumed the UVA will be accessed in a PAN-disabled context.
9100 */
9101 pa = mmu_uvtop(va);
9102 }
9103 return pa;
9104 }
9105
9106 pmap_paddr_t
9107 pmap_find_pa(
9108 pmap_t pmap,
9109 addr64_t va)
9110 {
9111 pmap_paddr_t pa = pmap_find_pa_nofault(pmap, va);
9112
9113 if (pa != 0) {
9114 return pa;
9115 }
9116
9117 if (not_in_kdp) {
9118 #if XNU_MONITOR
9119 return pmap_find_pa_ppl(pmap, va);
9120 #else
9121 return pmap_find_pa_internal(pmap, va);
9122 #endif
9123 } else {
9124 return pmap_vtophys(pmap, va);
9125 }
9126 }
9127
9128 ppnum_t
9129 pmap_find_phys_nofault(
9130 pmap_t pmap,
9131 addr64_t va)
9132 {
9133 ppnum_t ppn;
9134 ppn = atop(pmap_find_pa_nofault(pmap, va));
9135 return ppn;
9136 }
9137
9138 ppnum_t
9139 pmap_find_phys(
9140 pmap_t pmap,
9141 addr64_t va)
9142 {
9143 ppnum_t ppn;
9144 ppn = atop(pmap_find_pa(pmap, va));
9145 return ppn;
9146 }
9147
9148
9149 pmap_paddr_t
9150 kvtophys(
9151 vm_offset_t va)
9152 {
9153 pmap_paddr_t pa;
9154
9155 pa = mmu_kvtop(va);
9156 if (pa) {
9157 return pa;
9158 }
9159 return pmap_vtophys(kernel_pmap, va);
9160 }
9161
9162 pmap_paddr_t
9163 pmap_vtophys(
9164 pmap_t pmap,
9165 addr64_t va)
9166 {
9167 if ((va < pmap->min) || (va >= pmap->max)) {
9168 return 0;
9169 }
9170
9171 #if (__ARM_VMSA__ == 7)
9172 tt_entry_t *tte_p, tte;
9173 pt_entry_t *pte_p;
9174 pmap_paddr_t pa;
9175
9176 tte_p = pmap_tte(pmap, va);
9177 if (tte_p == (tt_entry_t *) NULL) {
9178 return (pmap_paddr_t) 0;
9179 }
9180
9181 tte = *tte_p;
9182 if ((tte & ARM_TTE_TYPE_MASK) == ARM_TTE_TYPE_TABLE) {
9183 pte_p = (pt_entry_t *) ttetokv(tte) + pte_index(pmap, pt_attr, va);
9184 pa = pte_to_pa(*pte_p) | (va & ARM_PGMASK);
9185 //LIONEL ppn = (ppnum_t) atop(pte_to_pa(*pte_p) | (va & ARM_PGMASK));
9186 #if DEVELOPMENT || DEBUG
9187 if (atop(pa) != 0 &&
9188 ARM_PTE_IS_COMPRESSED(*pte_p, pte_p)) {
9189 panic("pmap_vtophys(%p,0x%llx): compressed pte_p=%p 0x%llx with ppn=0x%x\n",
9190 pmap, va, pte_p, (uint64_t) (*pte_p), atop(pa));
9191 }
9192 #endif /* DEVELOPMENT || DEBUG */
9193 } else if ((tte & ARM_TTE_TYPE_MASK) == ARM_TTE_TYPE_BLOCK) {
9194 if ((tte & ARM_TTE_BLOCK_SUPER) == ARM_TTE_BLOCK_SUPER) {
9195 pa = suptte_to_pa(tte) | (va & ARM_TT_L1_SUPER_OFFMASK);
9196 } else {
9197 pa = sectte_to_pa(tte) | (va & ARM_TT_L1_BLOCK_OFFMASK);
9198 }
9199 } else {
9200 pa = 0;
9201 }
9202 #else
9203 tt_entry_t * ttp = NULL;
9204 tt_entry_t * ttep = NULL;
9205 tt_entry_t tte = ARM_TTE_EMPTY;
9206 pmap_paddr_t pa = 0;
9207 unsigned int cur_level;
9208
9209 const pt_attr_t * const pt_attr = pmap_get_pt_attr(pmap);
9210
9211 ttp = pmap->tte;
9212
9213 for (cur_level = pt_attr_root_level(pt_attr); cur_level <= pt_attr_leaf_level(pt_attr); cur_level++) {
9214 ttep = &ttp[ttn_index(pmap, pt_attr, va, cur_level)];
9215
9216 tte = *ttep;
9217
9218 const uint64_t valid_mask = pt_attr->pta_level_info[cur_level].valid_mask;
9219 const uint64_t type_mask = pt_attr->pta_level_info[cur_level].type_mask;
9220 const uint64_t type_block = pt_attr->pta_level_info[cur_level].type_block;
9221 const uint64_t offmask = pt_attr->pta_level_info[cur_level].offmask;
9222
9223 if ((tte & valid_mask) != valid_mask) {
9224 return (pmap_paddr_t) 0;
9225 }
9226
9227 /* This detects both leaf entries and intermediate block mappings. */
9228 if ((tte & type_mask) == type_block) {
9229 pa = ((tte & ARM_TTE_PA_MASK & ~offmask) | (va & offmask));
9230 break;
9231 }
9232
9233 ttp = (tt_entry_t*)phystokv(tte & ARM_TTE_TABLE_MASK);
9234 }
9235 #endif
9236
9237 return pa;
9238 }
9239
9240 /*
9241 * pmap_init_pte_page - Initialize a page table page.
9242 */
9243 void
9244 pmap_init_pte_page(
9245 pmap_t pmap,
9246 pt_entry_t *pte_p,
9247 vm_offset_t va,
9248 unsigned int ttlevel,
9249 boolean_t alloc_ptd)
9250 {
9251 pt_desc_t *ptdp = NULL;
9252 vm_offset_t *pvh;
9253
9254 pvh = (vm_offset_t *)(pai_to_pvh(pa_index(ml_static_vtop((vm_offset_t)pte_p))));
9255
9256 if (pvh_test_type(pvh, PVH_TYPE_NULL)) {
9257 if (alloc_ptd) {
9258 /*
9259 * This path should only be invoked from arm_vm_init. If we are emulating 16KB pages
9260 * on 4KB hardware, we may already have allocated a page table descriptor for a
9261 * bootstrap request, so we check for an existing PTD here.
9262 */
9263 ptdp = ptd_alloc(pmap);
9264 if (ptdp == NULL) {
9265 panic("%s: unable to allocate PTD", __func__);
9266 }
9267 pvh_update_head_unlocked(pvh, ptdp, PVH_TYPE_PTDP);
9268 } else {
9269 panic("pmap_init_pte_page(): pte_p %p", pte_p);
9270 }
9271 } else if (pvh_test_type(pvh, PVH_TYPE_PTDP)) {
9272 ptdp = (pt_desc_t*)(pvh_list(pvh));
9273 } else {
9274 panic("pmap_init_pte_page(): invalid PVH type for pte_p %p", pte_p);
9275 }
9276
9277 // below barrier ensures previous updates to the page are visible to PTW before
9278 // it is linked to the PTE of previous level
9279 __builtin_arm_dmb(DMB_ISHST);
9280 ptd_init(ptdp, pmap, va, ttlevel, pte_p);
9281 }
9282
9283 /*
9284 * Routine: pmap_expand
9285 *
9286 * Expands a pmap to be able to map the specified virtual address.
9287 *
9288 * Allocates new memory for the default (COARSE) translation table
9289 * entry, initializes all the pte entries to ARM_PTE_TYPE_FAULT and
9290 * also allocates space for the corresponding pv entries.
9291 *
9292 * Nothing should be locked.
9293 */
9294 static kern_return_t
9295 pmap_expand(
9296 pmap_t pmap,
9297 vm_map_address_t v,
9298 unsigned int options,
9299 unsigned int level)
9300 {
9301 __unused const pt_attr_t * const pt_attr = pmap_get_pt_attr(pmap);
9302
9303 #if (__ARM_VMSA__ == 7)
9304 vm_offset_t pa;
9305 tt_entry_t *tte_p;
9306 tt_entry_t *tt_p;
9307 unsigned int i;
9308
9309 #if DEVELOPMENT || DEBUG
9310 /*
9311 * We no longer support root level expansion; panic in case something
9312 * still attempts to trigger it.
9313 */
9314 i = tte_index(pmap, pt_attr, v);
9315
9316 if (i >= pmap->tte_index_max) {
9317 panic("%s: index out of range, index=%u, max=%u, "
9318 "pmap=%p, addr=%p, options=%u, level=%u",
9319 __func__, i, pmap->tte_index_max,
9320 pmap, (void *)v, options, level);
9321 }
9322 #endif /* DEVELOPMENT || DEBUG */
9323
9324 if (level == 1) {
9325 return KERN_SUCCESS;
9326 }
9327
9328 {
9329 tt_entry_t *tte_next_p;
9330
9331 pmap_lock(pmap);
9332 pa = 0;
9333 if (pmap_pte(pmap, v) != PT_ENTRY_NULL) {
9334 pmap_unlock(pmap);
9335 return KERN_SUCCESS;
9336 }
9337 tte_p = &pmap->tte[ttenum(v & ~ARM_TT_L1_PT_OFFMASK)];
9338 for (i = 0, tte_next_p = tte_p; i < 4; i++) {
9339 if (tte_to_pa(*tte_next_p)) {
9340 pa = tte_to_pa(*tte_next_p);
9341 break;
9342 }
9343 tte_next_p++;
9344 }
9345 pa = pa & ~PAGE_MASK;
9346 if (pa) {
9347 tte_p = &pmap->tte[ttenum(v)];
9348 *tte_p = pa_to_tte(pa) | (((v >> ARM_TT_L1_SHIFT) & 0x3) << 10) | ARM_TTE_TYPE_TABLE;
9349 FLUSH_PTE(tte_p);
9350 PMAP_TRACE(5, PMAP_CODE(PMAP__TTE), VM_KERNEL_ADDRHIDE(pmap), VM_KERNEL_ADDRHIDE(v & ~ARM_TT_L1_OFFMASK),
9351 VM_KERNEL_ADDRHIDE((v & ~ARM_TT_L1_OFFMASK) + ARM_TT_L1_SIZE), *tte_p);
9352 pmap_unlock(pmap);
9353 return KERN_SUCCESS;
9354 }
9355 pmap_unlock(pmap);
9356 }
9357 v = v & ~ARM_TT_L1_PT_OFFMASK;
9358
9359
9360 while (pmap_pte(pmap, v) == PT_ENTRY_NULL) {
9361 /*
9362 * Allocate a VM page for the level 2 page table entries.
9363 */
9364 while (pmap_tt_allocate(pmap, &tt_p, PMAP_TT_L2_LEVEL, ((options & PMAP_TT_ALLOCATE_NOWAIT)? PMAP_PAGES_ALLOCATE_NOWAIT : 0)) != KERN_SUCCESS) {
9365 if (options & PMAP_OPTIONS_NOWAIT) {
9366 return KERN_RESOURCE_SHORTAGE;
9367 }
9368 VM_PAGE_WAIT();
9369 }
9370
9371 pmap_lock(pmap);
9372 /*
9373 * See if someone else expanded us first
9374 */
9375 if (pmap_pte(pmap, v) == PT_ENTRY_NULL) {
9376 tt_entry_t *tte_next_p;
9377
9378 pmap_init_pte_page(pmap, (pt_entry_t *) tt_p, v, PMAP_TT_L2_LEVEL, FALSE);
9379 pa = kvtophys((vm_offset_t)tt_p);
9380 tte_p = &pmap->tte[ttenum(v)];
9381 for (i = 0, tte_next_p = tte_p; i < 4; i++) {
9382 *tte_next_p = pa_to_tte(pa) | ARM_TTE_TYPE_TABLE;
9383 PMAP_TRACE(5, PMAP_CODE(PMAP__TTE), VM_KERNEL_ADDRHIDE(pmap), VM_KERNEL_ADDRHIDE((v & ~ARM_TT_L1_PT_OFFMASK) + (i * ARM_TT_L1_SIZE)),
9384 VM_KERNEL_ADDRHIDE((v & ~ARM_TT_L1_PT_OFFMASK) + ((i + 1) * ARM_TT_L1_SIZE)), *tte_p);
9385 tte_next_p++;
9386 pa = pa + 0x400;
9387 }
9388 FLUSH_PTE_RANGE(tte_p, tte_p + 4);
9389
9390 pa = 0x0ULL;
9391 tt_p = (tt_entry_t *)NULL;
9392 }
9393 pmap_unlock(pmap);
9394 if (tt_p != (tt_entry_t *)NULL) {
9395 pmap_tt_deallocate(pmap, tt_p, PMAP_TT_L2_LEVEL);
9396 tt_p = (tt_entry_t *)NULL;
9397 }
9398 }
9399 return KERN_SUCCESS;
9400 #else
9401 pmap_paddr_t pa;
9402 unsigned int ttlevel = pt_attr_root_level(pt_attr);
9403 tt_entry_t *tte_p;
9404 tt_entry_t *tt_p;
9405
9406 pa = 0x0ULL;
9407 tt_p = (tt_entry_t *)NULL;
9408
9409 for (; ttlevel < level; ttlevel++) {
9410 pmap_lock_ro(pmap);
9411
9412 if (pmap_ttne(pmap, ttlevel + 1, v) == PT_ENTRY_NULL) {
9413 pmap_unlock_ro(pmap);
9414 while (pmap_tt_allocate(pmap, &tt_p, ttlevel + 1, ((options & PMAP_TT_ALLOCATE_NOWAIT)? PMAP_PAGES_ALLOCATE_NOWAIT : 0)) != KERN_SUCCESS) {
9415 if (options & PMAP_OPTIONS_NOWAIT) {
9416 return KERN_RESOURCE_SHORTAGE;
9417 }
9418 #if XNU_MONITOR
9419 panic("%s: failed to allocate tt, "
9420 "pmap=%p, v=%p, options=0x%x, level=%u",
9421 __FUNCTION__,
9422 pmap, (void *)v, options, level);
9423 #else
9424 VM_PAGE_WAIT();
9425 #endif
9426 }
9427 pmap_lock(pmap);
9428 if ((pmap_ttne(pmap, ttlevel + 1, v) == PT_ENTRY_NULL)) {
9429 pmap_init_pte_page(pmap, (pt_entry_t *) tt_p, v, ttlevel + 1, FALSE);
9430 pa = kvtophys((vm_offset_t)tt_p);
9431 tte_p = pmap_ttne(pmap, ttlevel, v);
9432 *tte_p = (pa & ARM_TTE_TABLE_MASK) | ARM_TTE_TYPE_TABLE | ARM_TTE_VALID;
9433 PMAP_TRACE(4 + ttlevel, PMAP_CODE(PMAP__TTE), VM_KERNEL_ADDRHIDE(pmap), VM_KERNEL_ADDRHIDE(v & ~pt_attr_ln_offmask(pt_attr, ttlevel)),
9434 VM_KERNEL_ADDRHIDE((v & ~pt_attr_ln_offmask(pt_attr, ttlevel)) + pt_attr_ln_size(pt_attr, ttlevel)), *tte_p);
9435 pa = 0x0ULL;
9436 tt_p = (tt_entry_t *)NULL;
9437 }
9438 pmap_unlock(pmap);
9439 } else {
9440 pmap_unlock_ro(pmap);
9441 }
9442
9443 if (tt_p != (tt_entry_t *)NULL) {
9444 pmap_tt_deallocate(pmap, tt_p, ttlevel + 1);
9445 tt_p = (tt_entry_t *)NULL;
9446 }
9447 }
9448
9449 return KERN_SUCCESS;
9450 #endif
9451 }
9452
9453 /*
9454 * Routine: pmap_collect
9455 * Function:
9456 * Garbage collects the physical map system for
9457 * pages which are no longer used.
9458 * Success need not be guaranteed -- that is, there
9459 * may well be pages which are not referenced, but
9460 * others may be collected.
9461 */
9462 void
9463 pmap_collect(pmap_t pmap)
9464 {
9465 if (pmap == PMAP_NULL) {
9466 return;
9467 }
9468
9469 #if 0
9470 pmap_lock(pmap);
9471 if ((pmap->nested == FALSE) && (pmap != kernel_pmap)) {
9472 /* TODO: Scan for vm page assigned to top level page tables with no reference */
9473 }
9474 pmap_unlock(pmap);
9475 #endif
9476
9477 return;
9478 }
9479
9480 /*
9481 * Routine: pmap_gc
9482 * Function:
9483 * Pmap garbage collection
9484 * Called by the pageout daemon when pages are scarce.
9485 *
9486 */
9487 void
9488 pmap_gc(
9489 void)
9490 {
9491 #if XNU_MONITOR
9492 /*
9493 * We cannot invoke the scheduler from the PPL, so for now we elide the
9494 * GC logic if the PPL is enabled.
9495 */
9496 #endif
9497 #if !XNU_MONITOR
9498 pmap_t pmap, pmap_next;
9499 boolean_t gc_wait;
9500
9501 if (pmap_gc_allowed &&
9502 (pmap_gc_allowed_by_time_throttle ||
9503 pmap_gc_forced)) {
9504 pmap_gc_forced = FALSE;
9505 pmap_gc_allowed_by_time_throttle = FALSE;
9506 pmap_simple_lock(&pmaps_lock);
9507 pmap = CAST_DOWN_EXPLICIT(pmap_t, queue_first(&map_pmap_list));
9508 while (!queue_end(&map_pmap_list, (queue_entry_t)pmap)) {
9509 if (!(pmap->gc_status & PMAP_GC_INFLIGHT)) {
9510 pmap->gc_status |= PMAP_GC_INFLIGHT;
9511 }
9512 pmap_simple_unlock(&pmaps_lock);
9513
9514 pmap_collect(pmap);
9515
9516 pmap_simple_lock(&pmaps_lock);
9517 gc_wait = (pmap->gc_status & PMAP_GC_WAIT);
9518 pmap->gc_status &= ~(PMAP_GC_INFLIGHT | PMAP_GC_WAIT);
9519 pmap_next = CAST_DOWN_EXPLICIT(pmap_t, queue_next(&pmap->pmaps));
9520 if (gc_wait) {
9521 if (!queue_end(&map_pmap_list, (queue_entry_t)pmap_next)) {
9522 pmap_next->gc_status |= PMAP_GC_INFLIGHT;
9523 }
9524 pmap_simple_unlock(&pmaps_lock);
9525 thread_wakeup((event_t) &pmap->gc_status);
9526 pmap_simple_lock(&pmaps_lock);
9527 }
9528 pmap = pmap_next;
9529 }
9530 pmap_simple_unlock(&pmaps_lock);
9531 }
9532 #endif
9533 }
9534
9535 /*
9536 * Called by the VM to reclaim pages that we can reclaim quickly and cheaply.
9537 */
9538 uint64_t
9539 pmap_release_pages_fast(void)
9540 {
9541 #if XNU_MONITOR
9542 return pmap_release_ppl_pages_to_kernel();
9543 #else /* XNU_MONITOR */
9544 return 0;
9545 #endif
9546 }
9547
9548 /*
9549 * By default, don't attempt pmap GC more frequently
9550 * than once / 1 minutes.
9551 */
9552
9553 void
9554 compute_pmap_gc_throttle(
9555 void *arg __unused)
9556 {
9557 pmap_gc_allowed_by_time_throttle = TRUE;
9558 }
9559
9560 /*
9561 * pmap_attribute_cache_sync(vm_offset_t pa)
9562 *
9563 * Invalidates all of the instruction cache on a physical page and
9564 * pushes any dirty data from the data cache for the same physical page
9565 */
9566
9567 kern_return_t
9568 pmap_attribute_cache_sync(
9569 ppnum_t pp,
9570 vm_size_t size,
9571 __unused vm_machine_attribute_t attribute,
9572 __unused vm_machine_attribute_val_t * value)
9573 {
9574 if (size > PAGE_SIZE) {
9575 panic("pmap_attribute_cache_sync size: 0x%llx\n", (uint64_t)size);
9576 } else {
9577 cache_sync_page(pp);
9578 }
9579
9580 return KERN_SUCCESS;
9581 }
9582
9583 /*
9584 * pmap_sync_page_data_phys(ppnum_t pp)
9585 *
9586 * Invalidates all of the instruction cache on a physical page and
9587 * pushes any dirty data from the data cache for the same physical page
9588 */
9589 void
9590 pmap_sync_page_data_phys(
9591 ppnum_t pp)
9592 {
9593 cache_sync_page(pp);
9594 }
9595
9596 /*
9597 * pmap_sync_page_attributes_phys(ppnum_t pp)
9598 *
9599 * Write back and invalidate all cachelines on a physical page.
9600 */
9601 void
9602 pmap_sync_page_attributes_phys(
9603 ppnum_t pp)
9604 {
9605 flush_dcache((vm_offset_t) (pp << PAGE_SHIFT), PAGE_SIZE, TRUE);
9606 }
9607
9608 #if CONFIG_COREDUMP
9609 /* temporary workaround */
9610 boolean_t
9611 coredumpok(
9612 vm_map_t map,
9613 mach_vm_offset_t va)
9614 {
9615 pt_entry_t *pte_p;
9616 pt_entry_t spte;
9617
9618 pte_p = pmap_pte(map->pmap, va);
9619 if (0 == pte_p) {
9620 return FALSE;
9621 }
9622 spte = *pte_p;
9623 return (spte & ARM_PTE_ATTRINDXMASK) == ARM_PTE_ATTRINDX(CACHE_ATTRINDX_DEFAULT);
9624 }
9625 #endif
9626
9627 void
9628 fillPage(
9629 ppnum_t pn,
9630 unsigned int fill)
9631 {
9632 unsigned int *addr;
9633 int count;
9634
9635 addr = (unsigned int *) phystokv(ptoa(pn));
9636 count = PAGE_SIZE / sizeof(unsigned int);
9637 while (count--) {
9638 *addr++ = fill;
9639 }
9640 }
9641
9642 extern void mapping_set_mod(ppnum_t pn);
9643
9644 void
9645 mapping_set_mod(
9646 ppnum_t pn)
9647 {
9648 pmap_set_modify(pn);
9649 }
9650
9651 extern void mapping_set_ref(ppnum_t pn);
9652
9653 void
9654 mapping_set_ref(
9655 ppnum_t pn)
9656 {
9657 pmap_set_reference(pn);
9658 }
9659
9660 /*
9661 * Clear specified attribute bits.
9662 *
9663 * Try to force an arm_fast_fault() for all mappings of
9664 * the page - to force attributes to be set again at fault time.
9665 * If the forcing succeeds, clear the cached bits at the head.
9666 * Otherwise, something must have been wired, so leave the cached
9667 * attributes alone.
9668 */
9669 MARK_AS_PMAP_TEXT static void
9670 phys_attribute_clear_with_flush_range(
9671 ppnum_t pn,
9672 unsigned int bits,
9673 int options,
9674 void *arg,
9675 pmap_tlb_flush_range_t *flush_range)
9676 {
9677 pmap_paddr_t pa = ptoa(pn);
9678 vm_prot_t allow_mode = VM_PROT_ALL;
9679
9680 #if XNU_MONITOR
9681 if (__improbable(bits & PP_ATTR_PPL_OWNED_BITS)) {
9682 panic("%s: illegal request, "
9683 "pn=%u, bits=%#x, options=%#x, arg=%p, flush_range=%p",
9684 __FUNCTION__,
9685 pn, bits, options, arg, flush_range);
9686 }
9687 #endif
9688 if ((arg != NULL) || (flush_range != NULL)) {
9689 options = options & ~PMAP_OPTIONS_NOFLUSH;
9690 }
9691
9692 if (__improbable((bits & PP_ATTR_MODIFIED) &&
9693 (options & PMAP_OPTIONS_NOFLUSH))) {
9694 panic("phys_attribute_clear(0x%x,0x%x,0x%x,%p,%p): "
9695 "should not clear 'modified' without flushing TLBs\n",
9696 pn, bits, options, arg, flush_range);
9697 }
9698
9699 assert(pn != vm_page_fictitious_addr);
9700
9701 if (options & PMAP_OPTIONS_CLEAR_WRITE) {
9702 assert(bits == PP_ATTR_MODIFIED);
9703
9704 pmap_page_protect_options_with_flush_range(pn, (VM_PROT_ALL & ~VM_PROT_WRITE), options, flush_range);
9705 /*
9706 * We short circuit this case; it should not need to
9707 * invoke arm_force_fast_fault, so just clear the modified bit.
9708 * pmap_page_protect has taken care of resetting
9709 * the state so that we'll see the next write as a fault to
9710 * the VM (i.e. we don't want a fast fault).
9711 */
9712 pa_clear_bits(pa, bits);
9713 return;
9714 }
9715 if (bits & PP_ATTR_REFERENCED) {
9716 allow_mode &= ~(VM_PROT_READ | VM_PROT_EXECUTE);
9717 }
9718 if (bits & PP_ATTR_MODIFIED) {
9719 allow_mode &= ~VM_PROT_WRITE;
9720 }
9721
9722 if (bits == PP_ATTR_NOENCRYPT) {
9723 /*
9724 * We short circuit this case; it should not need to
9725 * invoke arm_force_fast_fault, so just clear and
9726 * return. On ARM, this bit is just a debugging aid.
9727 */
9728 pa_clear_bits(pa, bits);
9729 return;
9730 }
9731
9732 if (arm_force_fast_fault_with_flush_range(pn, allow_mode, options, flush_range)) {
9733 pa_clear_bits(pa, bits);
9734 }
9735 }
9736
9737 MARK_AS_PMAP_TEXT static void
9738 phys_attribute_clear_internal(
9739 ppnum_t pn,
9740 unsigned int bits,
9741 int options,
9742 void *arg)
9743 {
9744 phys_attribute_clear_with_flush_range(pn, bits, options, arg, NULL);
9745 }
9746
9747 #if __ARM_RANGE_TLBI__
9748 MARK_AS_PMAP_TEXT static vm_map_address_t
9749 phys_attribute_clear_twig_internal(
9750 pmap_t pmap,
9751 vm_map_address_t start,
9752 vm_map_address_t end,
9753 unsigned int bits,
9754 unsigned int options,
9755 pmap_tlb_flush_range_t *flush_range)
9756 {
9757 pmap_assert_locked_r(pmap);
9758 const pt_attr_t * const pt_attr = pmap_get_pt_attr(pmap);
9759 assert(end >= start);
9760 assert((end - start) <= pt_attr_twig_size(pt_attr));
9761 const uint64_t pmap_page_size = pt_attr_page_size(pt_attr);
9762 vm_map_address_t va = start;
9763 pt_entry_t *pte_p, *start_pte_p, *end_pte_p, *curr_pte_p;
9764 tt_entry_t *tte_p;
9765 tte_p = pmap_tte(pmap, start);
9766 unsigned int npages = 0;
9767
9768 if (tte_p == (tt_entry_t *) NULL) {
9769 return end;
9770 }
9771
9772 if ((*tte_p & ARM_TTE_TYPE_MASK) == ARM_TTE_TYPE_TABLE) {
9773 pte_p = (pt_entry_t *) ttetokv(*tte_p);
9774
9775 start_pte_p = &pte_p[pte_index(pmap, pt_attr, start)];
9776 end_pte_p = start_pte_p + ((end - start) >> pt_attr_leaf_shift(pt_attr));
9777 assert(end_pte_p >= start_pte_p);
9778 for (curr_pte_p = start_pte_p; curr_pte_p < end_pte_p; curr_pte_p++, va += pmap_page_size) {
9779 if (__improbable(npages++ && pmap_pending_preemption())) {
9780 return va;
9781 }
9782 pmap_paddr_t pa = pte_to_pa(*((volatile pt_entry_t*)curr_pte_p));
9783 if (pa_valid(pa)) {
9784 ppnum_t pn = (ppnum_t) atop(pa);
9785 phys_attribute_clear_with_flush_range(pn, bits, options, NULL, flush_range);
9786 }
9787 }
9788 }
9789 return end;
9790 }
9791
9792 MARK_AS_PMAP_TEXT static vm_map_address_t
9793 phys_attribute_clear_range_internal(
9794 pmap_t pmap,
9795 vm_map_address_t start,
9796 vm_map_address_t end,
9797 unsigned int bits,
9798 unsigned int options)
9799 {
9800 if (__improbable(end < start)) {
9801 panic("%s: invalid address range %p, %p", __func__, (void*)start, (void*)end);
9802 }
9803 VALIDATE_PMAP(pmap);
9804
9805 vm_map_address_t va = start;
9806 pmap_tlb_flush_range_t flush_range = {
9807 .ptfr_pmap = pmap,
9808 .ptfr_start = start,
9809 .ptfr_end = end,
9810 .ptfr_flush_needed = false
9811 };
9812
9813 pmap_lock_ro(pmap);
9814 const pt_attr_t * const pt_attr = pmap_get_pt_attr(pmap);
9815
9816 while (va < end) {
9817 vm_map_address_t curr_end;
9818
9819 curr_end = ((va + pt_attr_twig_size(pt_attr)) & ~pt_attr_twig_offmask(pt_attr));
9820 if (curr_end > end) {
9821 curr_end = end;
9822 }
9823
9824 va = phys_attribute_clear_twig_internal(pmap, va, curr_end, bits, options, &flush_range);
9825 if ((va < curr_end) || pmap_pending_preemption()) {
9826 break;
9827 }
9828 }
9829 pmap_unlock_ro(pmap);
9830 if (flush_range.ptfr_flush_needed) {
9831 flush_range.ptfr_end = va;
9832 pmap_get_pt_ops(pmap)->flush_tlb_region_async(
9833 flush_range.ptfr_start,
9834 flush_range.ptfr_end - flush_range.ptfr_start,
9835 flush_range.ptfr_pmap);
9836 sync_tlb_flush();
9837 }
9838 return va;
9839 }
9840
9841 static void
9842 phys_attribute_clear_range(
9843 pmap_t pmap,
9844 vm_map_address_t start,
9845 vm_map_address_t end,
9846 unsigned int bits,
9847 unsigned int options)
9848 {
9849 assert(get_preemption_level() == 0);
9850
9851 PMAP_TRACE(3, PMAP_CODE(PMAP__ATTRIBUTE_CLEAR_RANGE) | DBG_FUNC_START, bits);
9852
9853 while (start < end) {
9854 #if XNU_MONITOR
9855 start = phys_attribute_clear_range_ppl(pmap, start, end, bits, options);
9856 #else
9857 start = phys_attribute_clear_range_internal(pmap, start, end, bits, options);
9858 #endif
9859 }
9860
9861 PMAP_TRACE(3, PMAP_CODE(PMAP__ATTRIBUTE_CLEAR_RANGE) | DBG_FUNC_END);
9862 }
9863 #endif /* __ARM_RANGE_TLBI__ */
9864
9865 static void
9866 phys_attribute_clear(
9867 ppnum_t pn,
9868 unsigned int bits,
9869 int options,
9870 void *arg)
9871 {
9872 /*
9873 * Do we really want this tracepoint? It will be extremely chatty.
9874 * Also, should we have a corresponding trace point for the set path?
9875 */
9876 PMAP_TRACE(3, PMAP_CODE(PMAP__ATTRIBUTE_CLEAR) | DBG_FUNC_START, pn, bits);
9877
9878 #if XNU_MONITOR
9879 phys_attribute_clear_ppl(pn, bits, options, arg);
9880 #else
9881 phys_attribute_clear_internal(pn, bits, options, arg);
9882 #endif
9883
9884 PMAP_TRACE(3, PMAP_CODE(PMAP__ATTRIBUTE_CLEAR) | DBG_FUNC_END);
9885 }
9886
9887 /*
9888 * Set specified attribute bits.
9889 *
9890 * Set cached value in the pv head because we have
9891 * no per-mapping hardware support for referenced and
9892 * modify bits.
9893 */
9894 MARK_AS_PMAP_TEXT static void
9895 phys_attribute_set_internal(
9896 ppnum_t pn,
9897 unsigned int bits)
9898 {
9899 pmap_paddr_t pa = ptoa(pn);
9900 assert(pn != vm_page_fictitious_addr);
9901
9902 #if XNU_MONITOR
9903 if (bits & PP_ATTR_PPL_OWNED_BITS) {
9904 panic("%s: illegal request, "
9905 "pn=%u, bits=%#x",
9906 __FUNCTION__,
9907 pn, bits);
9908 }
9909 #endif
9910
9911 pa_set_bits(pa, (uint16_t)bits);
9912
9913 return;
9914 }
9915
9916 static void
9917 phys_attribute_set(
9918 ppnum_t pn,
9919 unsigned int bits)
9920 {
9921 #if XNU_MONITOR
9922 phys_attribute_set_ppl(pn, bits);
9923 #else
9924 phys_attribute_set_internal(pn, bits);
9925 #endif
9926 }
9927
9928
9929 /*
9930 * Check specified attribute bits.
9931 *
9932 * use the software cached bits (since no hw support).
9933 */
9934 static boolean_t
9935 phys_attribute_test(
9936 ppnum_t pn,
9937 unsigned int bits)
9938 {
9939 pmap_paddr_t pa = ptoa(pn);
9940 assert(pn != vm_page_fictitious_addr);
9941 return pa_test_bits(pa, bits);
9942 }
9943
9944
9945 /*
9946 * Set the modify/reference bits on the specified physical page.
9947 */
9948 void
9949 pmap_set_modify(ppnum_t pn)
9950 {
9951 phys_attribute_set(pn, PP_ATTR_MODIFIED);
9952 }
9953
9954
9955 /*
9956 * Clear the modify bits on the specified physical page.
9957 */
9958 void
9959 pmap_clear_modify(
9960 ppnum_t pn)
9961 {
9962 phys_attribute_clear(pn, PP_ATTR_MODIFIED, 0, NULL);
9963 }
9964
9965
9966 /*
9967 * pmap_is_modified:
9968 *
9969 * Return whether or not the specified physical page is modified
9970 * by any physical maps.
9971 */
9972 boolean_t
9973 pmap_is_modified(
9974 ppnum_t pn)
9975 {
9976 return phys_attribute_test(pn, PP_ATTR_MODIFIED);
9977 }
9978
9979
9980 /*
9981 * Set the reference bit on the specified physical page.
9982 */
9983 static void
9984 pmap_set_reference(
9985 ppnum_t pn)
9986 {
9987 phys_attribute_set(pn, PP_ATTR_REFERENCED);
9988 }
9989
9990 /*
9991 * Clear the reference bits on the specified physical page.
9992 */
9993 void
9994 pmap_clear_reference(
9995 ppnum_t pn)
9996 {
9997 phys_attribute_clear(pn, PP_ATTR_REFERENCED, 0, NULL);
9998 }
9999
10000
10001 /*
10002 * pmap_is_referenced:
10003 *
10004 * Return whether or not the specified physical page is referenced
10005 * by any physical maps.
10006 */
10007 boolean_t
10008 pmap_is_referenced(
10009 ppnum_t pn)
10010 {
10011 return phys_attribute_test(pn, PP_ATTR_REFERENCED);
10012 }
10013
10014 /*
10015 * pmap_get_refmod(phys)
10016 * returns the referenced and modified bits of the specified
10017 * physical page.
10018 */
10019 unsigned int
10020 pmap_get_refmod(
10021 ppnum_t pn)
10022 {
10023 return ((phys_attribute_test(pn, PP_ATTR_MODIFIED)) ? VM_MEM_MODIFIED : 0)
10024 | ((phys_attribute_test(pn, PP_ATTR_REFERENCED)) ? VM_MEM_REFERENCED : 0);
10025 }
10026
10027 static inline unsigned int
10028 pmap_clear_refmod_mask_to_modified_bits(const unsigned int mask)
10029 {
10030 return ((mask & VM_MEM_MODIFIED) ? PP_ATTR_MODIFIED : 0) |
10031 ((mask & VM_MEM_REFERENCED) ? PP_ATTR_REFERENCED : 0);
10032 }
10033
10034 /*
10035 * pmap_clear_refmod(phys, mask)
10036 * clears the referenced and modified bits as specified by the mask
10037 * of the specified physical page.
10038 */
10039 void
10040 pmap_clear_refmod_options(
10041 ppnum_t pn,
10042 unsigned int mask,
10043 unsigned int options,
10044 void *arg)
10045 {
10046 unsigned int bits;
10047
10048 bits = pmap_clear_refmod_mask_to_modified_bits(mask);
10049 phys_attribute_clear(pn, bits, options, arg);
10050 }
10051
10052 /*
10053 * Perform pmap_clear_refmod_options on a virtual address range.
10054 * The operation will be performed in bulk & tlb flushes will be coalesced
10055 * if possible.
10056 *
10057 * Returns true if the operation is supported on this platform.
10058 * If this function returns false, the operation is not supported and
10059 * nothing has been modified in the pmap.
10060 */
10061 bool
10062 pmap_clear_refmod_range_options(
10063 pmap_t pmap __unused,
10064 vm_map_address_t start __unused,
10065 vm_map_address_t end __unused,
10066 unsigned int mask __unused,
10067 unsigned int options __unused)
10068 {
10069 #if __ARM_RANGE_TLBI__
10070 unsigned int bits;
10071 bits = pmap_clear_refmod_mask_to_modified_bits(mask);
10072 phys_attribute_clear_range(pmap, start, end, bits, options);
10073 return true;
10074 #else /* __ARM_RANGE_TLBI__ */
10075 #pragma unused(pmap, start, end, mask, options)
10076 /*
10077 * This operation allows the VM to bulk modify refmod bits on a virtually
10078 * contiguous range of addresses. This is large performance improvement on
10079 * platforms that support ranged tlbi instructions. But on older platforms,
10080 * we can only flush per-page or the entire asid. So we currently
10081 * only support this operation on platforms that support ranged tlbi.
10082 * instructions. On other platforms, we require that
10083 * the VM modify the bits on a per-page basis.
10084 */
10085 return false;
10086 #endif /* __ARM_RANGE_TLBI__ */
10087 }
10088
10089 void
10090 pmap_clear_refmod(
10091 ppnum_t pn,
10092 unsigned int mask)
10093 {
10094 pmap_clear_refmod_options(pn, mask, 0, NULL);
10095 }
10096
10097 unsigned int
10098 pmap_disconnect_options(
10099 ppnum_t pn,
10100 unsigned int options,
10101 void *arg)
10102 {
10103 if ((options & PMAP_OPTIONS_COMPRESSOR_IFF_MODIFIED)) {
10104 /*
10105 * On ARM, the "modified" bit is managed by software, so
10106 * we know up-front if the physical page is "modified",
10107 * without having to scan all the PTEs pointing to it.
10108 * The caller should have made the VM page "busy" so noone
10109 * should be able to establish any new mapping and "modify"
10110 * the page behind us.
10111 */
10112 if (pmap_is_modified(pn)) {
10113 /*
10114 * The page has been modified and will be sent to
10115 * the VM compressor.
10116 */
10117 options |= PMAP_OPTIONS_COMPRESSOR;
10118 } else {
10119 /*
10120 * The page hasn't been modified and will be freed
10121 * instead of compressed.
10122 */
10123 }
10124 }
10125
10126 /* disconnect the page */
10127 pmap_page_protect_options(pn, 0, options, arg);
10128
10129 /* return ref/chg status */
10130 return pmap_get_refmod(pn);
10131 }
10132
10133 /*
10134 * Routine:
10135 * pmap_disconnect
10136 *
10137 * Function:
10138 * Disconnect all mappings for this page and return reference and change status
10139 * in generic format.
10140 *
10141 */
10142 unsigned int
10143 pmap_disconnect(
10144 ppnum_t pn)
10145 {
10146 pmap_page_protect(pn, 0); /* disconnect the page */
10147 return pmap_get_refmod(pn); /* return ref/chg status */
10148 }
10149
10150 boolean_t
10151 pmap_has_managed_page(ppnum_t first, ppnum_t last)
10152 {
10153 if (ptoa(first) >= vm_last_phys) {
10154 return FALSE;
10155 }
10156 if (ptoa(last) < vm_first_phys) {
10157 return FALSE;
10158 }
10159
10160 return TRUE;
10161 }
10162
10163 /*
10164 * The state maintained by the noencrypt functions is used as a
10165 * debugging aid on ARM. This incurs some overhead on the part
10166 * of the caller. A special case check in phys_attribute_clear
10167 * (the most expensive path) currently minimizes this overhead,
10168 * but stubbing these functions out on RELEASE kernels yields
10169 * further wins.
10170 */
10171 boolean_t
10172 pmap_is_noencrypt(
10173 ppnum_t pn)
10174 {
10175 #if DEVELOPMENT || DEBUG
10176 boolean_t result = FALSE;
10177
10178 if (!pa_valid(ptoa(pn))) {
10179 return FALSE;
10180 }
10181
10182 result = (phys_attribute_test(pn, PP_ATTR_NOENCRYPT));
10183
10184 return result;
10185 #else
10186 #pragma unused(pn)
10187 return FALSE;
10188 #endif
10189 }
10190
10191 void
10192 pmap_set_noencrypt(
10193 ppnum_t pn)
10194 {
10195 #if DEVELOPMENT || DEBUG
10196 if (!pa_valid(ptoa(pn))) {
10197 return;
10198 }
10199
10200 phys_attribute_set(pn, PP_ATTR_NOENCRYPT);
10201 #else
10202 #pragma unused(pn)
10203 #endif
10204 }
10205
10206 void
10207 pmap_clear_noencrypt(
10208 ppnum_t pn)
10209 {
10210 #if DEVELOPMENT || DEBUG
10211 if (!pa_valid(ptoa(pn))) {
10212 return;
10213 }
10214
10215 phys_attribute_clear(pn, PP_ATTR_NOENCRYPT, 0, NULL);
10216 #else
10217 #pragma unused(pn)
10218 #endif
10219 }
10220
10221 #if XNU_MONITOR
10222 boolean_t
10223 pmap_is_monitor(ppnum_t pn)
10224 {
10225 assert(pa_valid(ptoa(pn)));
10226 return phys_attribute_test(pn, PP_ATTR_MONITOR);
10227 }
10228 #endif
10229
10230 void
10231 pmap_lock_phys_page(ppnum_t pn)
10232 {
10233 #if !XNU_MONITOR
10234 int pai;
10235 pmap_paddr_t phys = ptoa(pn);
10236
10237 if (pa_valid(phys)) {
10238 pai = (int)pa_index(phys);
10239 LOCK_PVH(pai);
10240 } else
10241 #else
10242 (void)pn;
10243 #endif
10244 { simple_lock(&phys_backup_lock, LCK_GRP_NULL);}
10245 }
10246
10247
10248 void
10249 pmap_unlock_phys_page(ppnum_t pn)
10250 {
10251 #if !XNU_MONITOR
10252 int pai;
10253 pmap_paddr_t phys = ptoa(pn);
10254
10255 if (pa_valid(phys)) {
10256 pai = (int)pa_index(phys);
10257 UNLOCK_PVH(pai);
10258 } else
10259 #else
10260 (void)pn;
10261 #endif
10262 { simple_unlock(&phys_backup_lock);}
10263 }
10264
10265 MARK_AS_PMAP_TEXT static void
10266 pmap_switch_user_ttb_internal(
10267 pmap_t pmap)
10268 {
10269 VALIDATE_PMAP(pmap);
10270 pmap_cpu_data_t *cpu_data_ptr;
10271 cpu_data_ptr = pmap_get_cpu_data();
10272
10273 #if (__ARM_VMSA__ == 7)
10274 cpu_data_ptr->cpu_user_pmap = pmap;
10275 cpu_data_ptr->cpu_user_pmap_stamp = pmap->stamp;
10276
10277 #if MACH_ASSERT && __ARM_USER_PROTECT__
10278 {
10279 unsigned int ttbr0_val, ttbr1_val;
10280 __asm__ volatile ("mrc p15,0,%0,c2,c0,0\n" : "=r"(ttbr0_val));
10281 __asm__ volatile ("mrc p15,0,%0,c2,c0,1\n" : "=r"(ttbr1_val));
10282 if (ttbr0_val != ttbr1_val) {
10283 panic("Misaligned ttbr0 %08X\n", ttbr0_val);
10284 }
10285 if (pmap->ttep & 0x1000) {
10286 panic("Misaligned ttbr0 %08X\n", pmap->ttep);
10287 }
10288 }
10289 #endif
10290 #if !__ARM_USER_PROTECT__
10291 set_mmu_ttb(pmap->ttep);
10292 set_context_id(pmap->hw_asid);
10293 #endif
10294
10295 #else /* (__ARM_VMSA__ == 7) */
10296
10297 if (pmap != kernel_pmap) {
10298 cpu_data_ptr->cpu_nested_pmap = pmap->nested_pmap;
10299 cpu_data_ptr->cpu_nested_pmap_attr = (cpu_data_ptr->cpu_nested_pmap == NULL) ?
10300 NULL : pmap_get_pt_attr(cpu_data_ptr->cpu_nested_pmap);
10301 cpu_data_ptr->cpu_nested_region_addr = pmap->nested_region_addr;
10302 cpu_data_ptr->cpu_nested_region_size = pmap->nested_region_size;
10303 }
10304
10305
10306 #if __ARM_MIXED_PAGE_SIZE__
10307 if ((pmap != kernel_pmap) && (pmap_get_pt_attr(pmap)->pta_tcr_value != get_tcr())) {
10308 set_tcr(pmap_get_pt_attr(pmap)->pta_tcr_value);
10309 }
10310 #endif /* __ARM_MIXED_PAGE_SIZE__ */
10311
10312 if (pmap != kernel_pmap) {
10313 set_mmu_ttb((pmap->ttep & TTBR_BADDR_MASK) | (((uint64_t)pmap->hw_asid) << TTBR_ASID_SHIFT));
10314 } else if (!pmap_user_ttb_is_clear()) {
10315 pmap_clear_user_ttb_internal();
10316 }
10317
10318 #endif /* (__ARM_VMSA__ == 7) */
10319 }
10320
10321 void
10322 pmap_switch_user_ttb(
10323 pmap_t pmap)
10324 {
10325 PMAP_TRACE(1, PMAP_CODE(PMAP__SWITCH_USER_TTB) | DBG_FUNC_START, VM_KERNEL_ADDRHIDE(pmap), PMAP_VASID(pmap), pmap->hw_asid);
10326 #if XNU_MONITOR
10327 pmap_switch_user_ttb_ppl(pmap);
10328 #else
10329 pmap_switch_user_ttb_internal(pmap);
10330 #endif
10331 PMAP_TRACE(1, PMAP_CODE(PMAP__SWITCH_USER_TTB) | DBG_FUNC_END);
10332 }
10333
10334 MARK_AS_PMAP_TEXT static void
10335 pmap_clear_user_ttb_internal(void)
10336 {
10337 #if (__ARM_VMSA__ > 7)
10338 set_mmu_ttb(invalid_ttep & TTBR_BADDR_MASK);
10339 #else
10340 set_mmu_ttb(kernel_pmap->ttep);
10341 #endif
10342 }
10343
10344 void
10345 pmap_clear_user_ttb(void)
10346 {
10347 PMAP_TRACE(3, PMAP_CODE(PMAP__CLEAR_USER_TTB) | DBG_FUNC_START, NULL, 0, 0);
10348 #if XNU_MONITOR
10349 pmap_clear_user_ttb_ppl();
10350 #else
10351 pmap_clear_user_ttb_internal();
10352 #endif
10353 PMAP_TRACE(3, PMAP_CODE(PMAP__CLEAR_USER_TTB) | DBG_FUNC_END);
10354 }
10355
10356 MARK_AS_PMAP_TEXT static boolean_t
10357 arm_force_fast_fault_with_flush_range(
10358 ppnum_t ppnum,
10359 vm_prot_t allow_mode,
10360 int options,
10361 pmap_tlb_flush_range_t *flush_range)
10362 {
10363 pmap_paddr_t phys = ptoa(ppnum);
10364 pv_entry_t *pve_p;
10365 pt_entry_t *pte_p;
10366 int pai;
10367 boolean_t result;
10368 pv_entry_t **pv_h;
10369 boolean_t is_reusable, is_internal;
10370 boolean_t tlb_flush_needed = FALSE;
10371 boolean_t ref_fault;
10372 boolean_t mod_fault;
10373 boolean_t clear_write_fault = FALSE;
10374 boolean_t ref_aliases_mod = FALSE;
10375 bool mustsynch = ((options & PMAP_OPTIONS_FF_LOCKED) == 0);
10376
10377 assert(ppnum != vm_page_fictitious_addr);
10378
10379 if (!pa_valid(phys)) {
10380 return FALSE; /* Not a managed page. */
10381 }
10382
10383 result = TRUE;
10384 ref_fault = FALSE;
10385 mod_fault = FALSE;
10386 pai = (int)pa_index(phys);
10387 if (__probable(mustsynch)) {
10388 LOCK_PVH(pai);
10389 }
10390 pv_h = pai_to_pvh(pai);
10391
10392 pte_p = PT_ENTRY_NULL;
10393 pve_p = PV_ENTRY_NULL;
10394 if (pvh_test_type(pv_h, PVH_TYPE_PTEP)) {
10395 pte_p = pvh_ptep(pv_h);
10396 } else if (pvh_test_type(pv_h, PVH_TYPE_PVEP)) {
10397 pve_p = pvh_list(pv_h);
10398 }
10399
10400 is_reusable = IS_REUSABLE_PAGE(pai);
10401 is_internal = IS_INTERNAL_PAGE(pai);
10402
10403 while ((pve_p != PV_ENTRY_NULL) || (pte_p != PT_ENTRY_NULL)) {
10404 vm_map_address_t va;
10405 pt_entry_t spte;
10406 pt_entry_t tmplate;
10407 pmap_t pmap;
10408 boolean_t update_pte;
10409
10410 if (pve_p != PV_ENTRY_NULL) {
10411 pte_p = pve_get_ptep(pve_p);
10412 }
10413
10414 if (pte_p == PT_ENTRY_NULL) {
10415 panic("pte_p is NULL: pve_p=%p ppnum=0x%x\n", pve_p, ppnum);
10416 }
10417 #ifdef PVH_FLAG_IOMMU
10418 if ((vm_offset_t)pte_p & PVH_FLAG_IOMMU) {
10419 goto fff_skip_pve;
10420 }
10421 #endif
10422 if (*pte_p == ARM_PTE_EMPTY) {
10423 panic("pte is NULL: pte_p=%p ppnum=0x%x\n", pte_p, ppnum);
10424 }
10425 if (ARM_PTE_IS_COMPRESSED(*pte_p, pte_p)) {
10426 panic("pte is COMPRESSED: pte_p=%p ppnum=0x%x\n", pte_p, ppnum);
10427 }
10428
10429 pmap = ptep_get_pmap(pte_p);
10430 const pt_attr_t * pt_attr = pmap_get_pt_attr(pmap);
10431 va = ptep_get_va(pte_p);
10432
10433 assert(va >= pmap->min && va < pmap->max);
10434
10435 /* update pmap stats and ledgers */
10436 if (IS_ALTACCT_PAGE(pai, pve_p)) {
10437 /*
10438 * We do not track "reusable" status for
10439 * "alternate accounting" mappings.
10440 */
10441 } else if ((options & PMAP_OPTIONS_CLEAR_REUSABLE) &&
10442 is_reusable &&
10443 is_internal &&
10444 pmap != kernel_pmap) {
10445 /* one less "reusable" */
10446 __assert_only int32_t orig_reusable = OSAddAtomic(-1, &pmap->stats.reusable);
10447 PMAP_STATS_ASSERTF(orig_reusable > 0, pmap, "stats.reusable %d", orig_reusable);
10448 /* one more "internal" */
10449 __assert_only int32_t orig_internal = OSAddAtomic(+1, &pmap->stats.internal);
10450 PMAP_STATS_PEAK(pmap->stats.internal);
10451 PMAP_STATS_ASSERTF(orig_internal >= 0, pmap, "stats.internal %d", orig_internal);
10452 pmap_ledger_credit(pmap, task_ledgers.internal, pt_attr_page_size(pt_attr) * PAGE_RATIO);
10453 assert(!IS_ALTACCT_PAGE(pai, pve_p));
10454 assert(IS_INTERNAL_PAGE(pai));
10455 pmap_ledger_credit(pmap, task_ledgers.phys_footprint, pt_attr_page_size(pt_attr) * PAGE_RATIO);
10456
10457 /*
10458 * Since the page is being marked non-reusable, we assume that it will be
10459 * modified soon. Avoid the cost of another trap to handle the fast
10460 * fault when we next write to this page.
10461 */
10462 clear_write_fault = TRUE;
10463 } else if ((options & PMAP_OPTIONS_SET_REUSABLE) &&
10464 !is_reusable &&
10465 is_internal &&
10466 pmap != kernel_pmap) {
10467 /* one more "reusable" */
10468 __assert_only int32_t orig_reusable = OSAddAtomic(+1, &pmap->stats.reusable);
10469 PMAP_STATS_PEAK(pmap->stats.reusable);
10470 PMAP_STATS_ASSERTF(orig_reusable >= 0, pmap, "stats.reusable %d", orig_reusable);
10471 /* one less "internal" */
10472 __assert_only int32_t orig_internal = OSAddAtomic(-1, &pmap->stats.internal);
10473 PMAP_STATS_ASSERTF(orig_internal > 0, pmap, "stats.internal %d", orig_internal);
10474 pmap_ledger_debit(pmap, task_ledgers.internal, pt_attr_page_size(pt_attr) * PAGE_RATIO);
10475 assert(!IS_ALTACCT_PAGE(pai, pve_p));
10476 assert(IS_INTERNAL_PAGE(pai));
10477 pmap_ledger_debit(pmap, task_ledgers.phys_footprint, pt_attr_page_size(pt_attr) * PAGE_RATIO);
10478 }
10479
10480 bool wiredskip = pte_is_wired(*pte_p) &&
10481 ((options & PMAP_OPTIONS_FF_WIRED) == 0);
10482
10483 if (wiredskip) {
10484 result = FALSE;
10485 goto fff_skip_pve;
10486 }
10487
10488 spte = *pte_p;
10489 tmplate = spte;
10490 update_pte = FALSE;
10491
10492 if ((allow_mode & VM_PROT_READ) != VM_PROT_READ) {
10493 /* read protection sets the pte to fault */
10494 tmplate = tmplate & ~ARM_PTE_AF;
10495 update_pte = TRUE;
10496 ref_fault = TRUE;
10497 }
10498 if ((allow_mode & VM_PROT_WRITE) != VM_PROT_WRITE) {
10499 /* take away write permission if set */
10500 if (pmap == kernel_pmap) {
10501 if ((tmplate & ARM_PTE_APMASK) == ARM_PTE_AP(AP_RWNA)) {
10502 tmplate = ((tmplate & ~ARM_PTE_APMASK) | ARM_PTE_AP(AP_RONA));
10503 pte_set_was_writeable(tmplate, true);
10504 update_pte = TRUE;
10505 mod_fault = TRUE;
10506 }
10507 } else {
10508 if ((tmplate & ARM_PTE_APMASK) == pt_attr_leaf_rw(pt_attr)) {
10509 tmplate = ((tmplate & ~ARM_PTE_APMASK) | pt_attr_leaf_ro(pt_attr));
10510 pte_set_was_writeable(tmplate, true);
10511 update_pte = TRUE;
10512 mod_fault = TRUE;
10513 }
10514 }
10515 }
10516
10517 #if MACH_ASSERT && XNU_MONITOR
10518 if (is_pte_xprr_protected(pmap, spte)) {
10519 if (pte_to_xprr_perm(spte) != pte_to_xprr_perm(tmplate)) {
10520 panic("%s: attempted to mutate an xPRR mapping pte_p=%p, pmap=%p, pv_h=%p, pve_p=%p, pte=0x%llx, tmplate=0x%llx, va=0x%llx, "
10521 "ppnum=0x%x, options=0x%x, allow_mode=0x%x",
10522 __FUNCTION__, pte_p, pmap, pv_h, pve_p, (unsigned long long)spte, (unsigned long long)tmplate, (unsigned long long)va,
10523 ppnum, options, allow_mode);
10524 }
10525 }
10526 #endif /* MACH_ASSERT && XNU_MONITOR */
10527
10528 if (result && update_pte) {
10529 if (options & PMAP_OPTIONS_NOFLUSH) {
10530 WRITE_PTE_FAST(pte_p, tmplate);
10531 } else {
10532 WRITE_PTE_STRONG(pte_p, tmplate);
10533 if (!flush_range ||
10534 ((flush_range->ptfr_pmap != pmap) || va >= flush_range->ptfr_end || va < flush_range->ptfr_start)) {
10535 pmap_get_pt_ops(pmap)->flush_tlb_region_async(va,
10536 pt_attr_page_size(pt_attr) * PAGE_RATIO, pmap);
10537 }
10538 tlb_flush_needed = TRUE;
10539 }
10540 }
10541
10542 fff_skip_pve:
10543 pte_p = PT_ENTRY_NULL;
10544 if (pve_p != PV_ENTRY_NULL) {
10545 pve_p = PVE_NEXT_PTR(pve_next(pve_p));
10546 }
10547 }
10548
10549 /*
10550 * If we are using the same approach for ref and mod
10551 * faults on this PTE, do not clear the write fault;
10552 * this would cause both ref and mod to be set on the
10553 * page again, and prevent us from taking ANY read/write
10554 * fault on the mapping.
10555 */
10556 if (clear_write_fault && !ref_aliases_mod) {
10557 arm_clear_fast_fault(ppnum, VM_PROT_WRITE);
10558 }
10559 if (tlb_flush_needed) {
10560 if (flush_range) {
10561 /* Delayed flush. Signal to the caller that the flush is needed. */
10562 flush_range->ptfr_flush_needed = true;
10563 } else {
10564 sync_tlb_flush();
10565 }
10566 }
10567
10568 /* update global "reusable" status for this page */
10569 if (is_internal) {
10570 if ((options & PMAP_OPTIONS_CLEAR_REUSABLE) &&
10571 is_reusable) {
10572 CLR_REUSABLE_PAGE(pai);
10573 } else if ((options & PMAP_OPTIONS_SET_REUSABLE) &&
10574 !is_reusable) {
10575 SET_REUSABLE_PAGE(pai);
10576 }
10577 }
10578
10579 if (mod_fault) {
10580 SET_MODFAULT_PAGE(pai);
10581 }
10582 if (ref_fault) {
10583 SET_REFFAULT_PAGE(pai);
10584 }
10585 if (__probable(mustsynch)) {
10586 UNLOCK_PVH(pai);
10587 }
10588 return result;
10589 }
10590
10591 MARK_AS_PMAP_TEXT static boolean_t
10592 arm_force_fast_fault_internal(
10593 ppnum_t ppnum,
10594 vm_prot_t allow_mode,
10595 int options)
10596 {
10597 if (__improbable((options & (PMAP_OPTIONS_FF_LOCKED | PMAP_OPTIONS_NOFLUSH)) != 0)) {
10598 panic("arm_force_fast_fault(0x%x, 0x%x, 0x%x): invalid options", ppnum, allow_mode, options);
10599 }
10600 return arm_force_fast_fault_with_flush_range(ppnum, allow_mode, options, NULL);
10601 }
10602
10603 /*
10604 * Routine: arm_force_fast_fault
10605 *
10606 * Function:
10607 * Force all mappings for this page to fault according
10608 * to the access modes allowed, so we can gather ref/modify
10609 * bits again.
10610 */
10611
10612 boolean_t
10613 arm_force_fast_fault(
10614 ppnum_t ppnum,
10615 vm_prot_t allow_mode,
10616 int options,
10617 __unused void *arg)
10618 {
10619 pmap_paddr_t phys = ptoa(ppnum);
10620
10621 assert(ppnum != vm_page_fictitious_addr);
10622
10623 if (!pa_valid(phys)) {
10624 return FALSE; /* Not a managed page. */
10625 }
10626
10627 #if XNU_MONITOR
10628 return arm_force_fast_fault_ppl(ppnum, allow_mode, options);
10629 #else
10630 return arm_force_fast_fault_internal(ppnum, allow_mode, options);
10631 #endif
10632 }
10633
10634 /*
10635 * Routine: arm_clear_fast_fault
10636 *
10637 * Function:
10638 * Clear pending force fault for all mappings for this page based on
10639 * the observed fault type, update ref/modify bits.
10640 */
10641 MARK_AS_PMAP_TEXT static boolean_t
10642 arm_clear_fast_fault(
10643 ppnum_t ppnum,
10644 vm_prot_t fault_type)
10645 {
10646 pmap_paddr_t pa = ptoa(ppnum);
10647 pv_entry_t *pve_p;
10648 pt_entry_t *pte_p;
10649 int pai;
10650 boolean_t result;
10651 boolean_t tlb_flush_needed = FALSE;
10652 pv_entry_t **pv_h;
10653
10654 assert(ppnum != vm_page_fictitious_addr);
10655
10656 if (!pa_valid(pa)) {
10657 return FALSE; /* Not a managed page. */
10658 }
10659
10660 result = FALSE;
10661 pai = (int)pa_index(pa);
10662 ASSERT_PVH_LOCKED(pai);
10663 pv_h = pai_to_pvh(pai);
10664
10665 pte_p = PT_ENTRY_NULL;
10666 pve_p = PV_ENTRY_NULL;
10667 if (pvh_test_type(pv_h, PVH_TYPE_PTEP)) {
10668 pte_p = pvh_ptep(pv_h);
10669 } else if (pvh_test_type(pv_h, PVH_TYPE_PVEP)) {
10670 pve_p = pvh_list(pv_h);
10671 }
10672
10673 while ((pve_p != PV_ENTRY_NULL) || (pte_p != PT_ENTRY_NULL)) {
10674 vm_map_address_t va;
10675 pt_entry_t spte;
10676 pt_entry_t tmplate;
10677 pmap_t pmap;
10678
10679 if (pve_p != PV_ENTRY_NULL) {
10680 pte_p = pve_get_ptep(pve_p);
10681 }
10682
10683 if (pte_p == PT_ENTRY_NULL) {
10684 panic("pte_p is NULL: pve_p=%p ppnum=0x%x\n", pve_p, ppnum);
10685 }
10686 #ifdef PVH_FLAG_IOMMU
10687 if ((vm_offset_t)pte_p & PVH_FLAG_IOMMU) {
10688 goto cff_skip_pve;
10689 }
10690 #endif
10691 if (*pte_p == ARM_PTE_EMPTY) {
10692 panic("pte is NULL: pte_p=%p ppnum=0x%x\n", pte_p, ppnum);
10693 }
10694
10695 pmap = ptep_get_pmap(pte_p);
10696 va = ptep_get_va(pte_p);
10697
10698 assert(va >= pmap->min && va < pmap->max);
10699
10700 spte = *pte_p;
10701 tmplate = spte;
10702
10703 if ((fault_type & VM_PROT_WRITE) && (pte_was_writeable(spte))) {
10704 {
10705 if (pmap == kernel_pmap) {
10706 tmplate = ((spte & ~ARM_PTE_APMASK) | ARM_PTE_AP(AP_RWNA));
10707 } else {
10708 assert(!pmap->nested); /* no write access in a nested pmap */
10709 tmplate = ((spte & ~ARM_PTE_APMASK) | pt_attr_leaf_rw(pmap_get_pt_attr(pmap)));
10710 }
10711 }
10712
10713 tmplate |= ARM_PTE_AF;
10714
10715 pte_set_was_writeable(tmplate, false);
10716 pa_set_bits(pa, PP_ATTR_REFERENCED | PP_ATTR_MODIFIED);
10717 } else if ((fault_type & VM_PROT_READ) && ((spte & ARM_PTE_AF) != ARM_PTE_AF)) {
10718 tmplate = spte | ARM_PTE_AF;
10719
10720 {
10721 pa_set_bits(pa, PP_ATTR_REFERENCED);
10722 }
10723 }
10724
10725 #if MACH_ASSERT && XNU_MONITOR
10726 if (is_pte_xprr_protected(pmap, spte)) {
10727 if (pte_to_xprr_perm(spte) != pte_to_xprr_perm(tmplate)) {
10728 panic("%s: attempted to mutate an xPRR mapping pte_p=%p, pmap=%p, pv_h=%p, pve_p=%p, pte=0x%llx, tmplate=0x%llx, va=0x%llx, "
10729 "ppnum=0x%x, fault_type=0x%x",
10730 __FUNCTION__, pte_p, pmap, pv_h, pve_p, (unsigned long long)spte, (unsigned long long)tmplate, (unsigned long long)va,
10731 ppnum, fault_type);
10732 }
10733 }
10734 #endif /* MACH_ASSERT && XNU_MONITOR */
10735
10736 if (spte != tmplate) {
10737 if (spte != ARM_PTE_TYPE_FAULT) {
10738 WRITE_PTE_STRONG(pte_p, tmplate);
10739 pmap_get_pt_ops(pmap)->flush_tlb_region_async(va,
10740 pt_attr_page_size(pmap_get_pt_attr(pmap)) * PAGE_RATIO, pmap);
10741 tlb_flush_needed = TRUE;
10742 } else {
10743 WRITE_PTE(pte_p, tmplate);
10744 __builtin_arm_isb(ISB_SY);
10745 }
10746 result = TRUE;
10747 }
10748
10749 #ifdef PVH_FLAG_IOMMU
10750 cff_skip_pve:
10751 #endif
10752 pte_p = PT_ENTRY_NULL;
10753 if (pve_p != PV_ENTRY_NULL) {
10754 pve_p = PVE_NEXT_PTR(pve_next(pve_p));
10755 }
10756 }
10757 if (tlb_flush_needed) {
10758 sync_tlb_flush();
10759 }
10760 return result;
10761 }
10762
10763 /*
10764 * Determine if the fault was induced by software tracking of
10765 * modify/reference bits. If so, re-enable the mapping (and set
10766 * the appropriate bits).
10767 *
10768 * Returns KERN_SUCCESS if the fault was induced and was
10769 * successfully handled.
10770 *
10771 * Returns KERN_FAILURE if the fault was not induced and
10772 * the function was unable to deal with it.
10773 *
10774 * Returns KERN_PROTECTION_FAILURE if the pmap layer explictly
10775 * disallows this type of access.
10776 */
10777 MARK_AS_PMAP_TEXT static kern_return_t
10778 arm_fast_fault_internal(
10779 pmap_t pmap,
10780 vm_map_address_t va,
10781 vm_prot_t fault_type,
10782 __unused bool was_af_fault,
10783 __unused bool from_user)
10784 {
10785 kern_return_t result = KERN_FAILURE;
10786 pt_entry_t *ptep;
10787 pt_entry_t spte = ARM_PTE_TYPE_FAULT;
10788 int pai;
10789 pmap_paddr_t pa;
10790 VALIDATE_PMAP(pmap);
10791
10792 pmap_lock_ro(pmap);
10793
10794 /*
10795 * If the entry doesn't exist, is completely invalid, or is already
10796 * valid, we can't fix it here.
10797 */
10798
10799 ptep = pmap_pte(pmap, va);
10800 if (ptep != PT_ENTRY_NULL) {
10801 while (true) {
10802 spte = *((volatile pt_entry_t*)ptep);
10803
10804 pa = pte_to_pa(spte);
10805
10806 if ((spte == ARM_PTE_TYPE_FAULT) ||
10807 ARM_PTE_IS_COMPRESSED(spte, ptep)) {
10808 pmap_unlock_ro(pmap);
10809 return result;
10810 }
10811
10812 if (!pa_valid(pa)) {
10813 pmap_unlock_ro(pmap);
10814 #if XNU_MONITOR
10815 if (pmap_cache_attributes((ppnum_t)atop(pa)) & PP_ATTR_MONITOR) {
10816 return KERN_PROTECTION_FAILURE;
10817 } else
10818 #endif
10819 return result;
10820 }
10821 pai = (int)pa_index(pa);
10822 LOCK_PVH(pai);
10823 break;
10824 }
10825 } else {
10826 pmap_unlock_ro(pmap);
10827 return result;
10828 }
10829
10830
10831 if ((result != KERN_SUCCESS) &&
10832 ((IS_REFFAULT_PAGE(pai)) || ((fault_type & VM_PROT_WRITE) && IS_MODFAULT_PAGE(pai)))) {
10833 /*
10834 * An attempted access will always clear ref/mod fault state, as
10835 * appropriate for the fault type. arm_clear_fast_fault will
10836 * update the associated PTEs for the page as appropriate; if
10837 * any PTEs are updated, we redrive the access. If the mapping
10838 * does not actually allow for the attempted access, the
10839 * following fault will (hopefully) fail to update any PTEs, and
10840 * thus cause arm_fast_fault to decide that it failed to handle
10841 * the fault.
10842 */
10843 if (IS_REFFAULT_PAGE(pai)) {
10844 CLR_REFFAULT_PAGE(pai);
10845 }
10846 if ((fault_type & VM_PROT_WRITE) && IS_MODFAULT_PAGE(pai)) {
10847 CLR_MODFAULT_PAGE(pai);
10848 }
10849
10850 if (arm_clear_fast_fault((ppnum_t)atop(pa), fault_type)) {
10851 /*
10852 * Should this preserve KERN_PROTECTION_FAILURE? The
10853 * cost of not doing so is a another fault in a case
10854 * that should already result in an exception.
10855 */
10856 result = KERN_SUCCESS;
10857 }
10858 }
10859
10860 /*
10861 * If the PTE already has sufficient permissions, we can report the fault as handled.
10862 * This may happen, for example, if multiple threads trigger roughly simultaneous faults
10863 * on mappings of the same page
10864 */
10865 if ((result == KERN_FAILURE) && (spte & ARM_PTE_AF)) {
10866 uintptr_t ap_ro, ap_rw, ap_x;
10867 if (pmap == kernel_pmap) {
10868 ap_ro = ARM_PTE_AP(AP_RONA);
10869 ap_rw = ARM_PTE_AP(AP_RWNA);
10870 ap_x = ARM_PTE_NX;
10871 } else {
10872 ap_ro = pt_attr_leaf_ro(pmap_get_pt_attr(pmap));
10873 ap_rw = pt_attr_leaf_rw(pmap_get_pt_attr(pmap));
10874 ap_x = pt_attr_leaf_x(pmap_get_pt_attr(pmap));
10875 }
10876 /*
10877 * NOTE: this doesn't currently handle user-XO mappings. Depending upon the
10878 * hardware they may be xPRR-protected, in which case they'll be handled
10879 * by the is_pte_xprr_protected() case above. Additionally, the exception
10880 * handling path currently does not call arm_fast_fault() without at least
10881 * VM_PROT_READ in fault_type.
10882 */
10883 if (((spte & ARM_PTE_APMASK) == ap_rw) ||
10884 (!(fault_type & VM_PROT_WRITE) && ((spte & ARM_PTE_APMASK) == ap_ro))) {
10885 if (!(fault_type & VM_PROT_EXECUTE) || ((spte & ARM_PTE_XMASK) == ap_x)) {
10886 result = KERN_SUCCESS;
10887 }
10888 }
10889 }
10890
10891 UNLOCK_PVH(pai);
10892 pmap_unlock_ro(pmap);
10893 return result;
10894 }
10895
10896 kern_return_t
10897 arm_fast_fault(
10898 pmap_t pmap,
10899 vm_map_address_t va,
10900 vm_prot_t fault_type,
10901 bool was_af_fault,
10902 __unused bool from_user)
10903 {
10904 kern_return_t result = KERN_FAILURE;
10905
10906 if (va < pmap->min || va >= pmap->max) {
10907 return result;
10908 }
10909
10910 PMAP_TRACE(3, PMAP_CODE(PMAP__FAST_FAULT) | DBG_FUNC_START,
10911 VM_KERNEL_ADDRHIDE(pmap), VM_KERNEL_ADDRHIDE(va), fault_type,
10912 from_user);
10913
10914 #if (__ARM_VMSA__ == 7)
10915 if (pmap != kernel_pmap) {
10916 pmap_cpu_data_t *cpu_data_ptr = pmap_get_cpu_data();
10917 pmap_t cur_pmap;
10918 pmap_t cur_user_pmap;
10919
10920 cur_pmap = current_pmap();
10921 cur_user_pmap = cpu_data_ptr->cpu_user_pmap;
10922
10923 if ((cur_user_pmap == cur_pmap) && (cur_pmap == pmap)) {
10924 if (cpu_data_ptr->cpu_user_pmap_stamp != pmap->stamp) {
10925 pmap_set_pmap(pmap, current_thread());
10926 result = KERN_SUCCESS;
10927 goto done;
10928 }
10929 }
10930 }
10931 #endif
10932
10933 #if XNU_MONITOR
10934 result = arm_fast_fault_ppl(pmap, va, fault_type, was_af_fault, from_user);
10935 #else
10936 result = arm_fast_fault_internal(pmap, va, fault_type, was_af_fault, from_user);
10937 #endif
10938
10939 #if (__ARM_VMSA__ == 7)
10940 done:
10941 #endif
10942
10943 PMAP_TRACE(3, PMAP_CODE(PMAP__FAST_FAULT) | DBG_FUNC_END, result);
10944
10945 return result;
10946 }
10947
10948 void
10949 pmap_copy_page(
10950 ppnum_t psrc,
10951 ppnum_t pdst)
10952 {
10953 bcopy_phys((addr64_t) (ptoa(psrc)),
10954 (addr64_t) (ptoa(pdst)),
10955 PAGE_SIZE);
10956 }
10957
10958
10959 /*
10960 * pmap_copy_page copies the specified (machine independent) pages.
10961 */
10962 void
10963 pmap_copy_part_page(
10964 ppnum_t psrc,
10965 vm_offset_t src_offset,
10966 ppnum_t pdst,
10967 vm_offset_t dst_offset,
10968 vm_size_t len)
10969 {
10970 bcopy_phys((addr64_t) (ptoa(psrc) + src_offset),
10971 (addr64_t) (ptoa(pdst) + dst_offset),
10972 len);
10973 }
10974
10975
10976 /*
10977 * pmap_zero_page zeros the specified (machine independent) page.
10978 */
10979 void
10980 pmap_zero_page(
10981 ppnum_t pn)
10982 {
10983 assert(pn != vm_page_fictitious_addr);
10984 bzero_phys((addr64_t) ptoa(pn), PAGE_SIZE);
10985 }
10986
10987 /*
10988 * pmap_zero_part_page
10989 * zeros the specified (machine independent) part of a page.
10990 */
10991 void
10992 pmap_zero_part_page(
10993 ppnum_t pn,
10994 vm_offset_t offset,
10995 vm_size_t len)
10996 {
10997 assert(pn != vm_page_fictitious_addr);
10998 assert(offset + len <= PAGE_SIZE);
10999 bzero_phys((addr64_t) (ptoa(pn) + offset), len);
11000 }
11001
11002 void
11003 pmap_map_globals(
11004 void)
11005 {
11006 pt_entry_t *ptep, pte;
11007
11008 ptep = pmap_pte(kernel_pmap, LOWGLOBAL_ALIAS);
11009 assert(ptep != PT_ENTRY_NULL);
11010 assert(*ptep == ARM_PTE_EMPTY);
11011
11012 pte = pa_to_pte(ml_static_vtop((vm_offset_t)&lowGlo)) | AP_RONA | ARM_PTE_NX | ARM_PTE_PNX | ARM_PTE_AF | ARM_PTE_TYPE;
11013 #if __ARM_KERNEL_PROTECT__
11014 pte |= ARM_PTE_NG;
11015 #endif /* __ARM_KERNEL_PROTECT__ */
11016 pte |= ARM_PTE_ATTRINDX(CACHE_ATTRINDX_WRITEBACK);
11017 #if (__ARM_VMSA__ > 7)
11018 pte |= ARM_PTE_SH(SH_OUTER_MEMORY);
11019 #else
11020 pte |= ARM_PTE_SH;
11021 #endif
11022 *ptep = pte;
11023 FLUSH_PTE_RANGE(ptep, (ptep + 1));
11024 PMAP_UPDATE_TLBS(kernel_pmap, LOWGLOBAL_ALIAS, LOWGLOBAL_ALIAS + PAGE_SIZE, false);
11025 }
11026
11027 vm_offset_t
11028 pmap_cpu_windows_copy_addr(int cpu_num, unsigned int index)
11029 {
11030 if (__improbable(index >= CPUWINDOWS_MAX)) {
11031 panic("%s: invalid index %u", __func__, index);
11032 }
11033 return (vm_offset_t)(CPUWINDOWS_BASE + (PAGE_SIZE * ((CPUWINDOWS_MAX * cpu_num) + index)));
11034 }
11035
11036 MARK_AS_PMAP_TEXT static unsigned int
11037 pmap_map_cpu_windows_copy_internal(
11038 ppnum_t pn,
11039 vm_prot_t prot,
11040 unsigned int wimg_bits)
11041 {
11042 pt_entry_t *ptep = NULL, pte;
11043 pmap_cpu_data_t *pmap_cpu_data = pmap_get_cpu_data();
11044 unsigned int cpu_num;
11045 unsigned int i;
11046 vm_offset_t cpu_copywindow_vaddr = 0;
11047 bool need_strong_sync = false;
11048
11049 #if XNU_MONITOR
11050 unsigned int cacheattr = (!pa_valid(ptoa(pn)) ? pmap_cache_attributes(pn) : 0);
11051 need_strong_sync = ((cacheattr & PMAP_IO_RANGE_STRONG_SYNC) != 0);
11052 #endif
11053
11054 #if XNU_MONITOR
11055 #ifdef __ARM_COHERENT_IO__
11056 if (__improbable(pa_valid(ptoa(pn)) && !pmap_ppl_disable)) {
11057 panic("%s: attempted to map a managed page, "
11058 "pn=%u, prot=0x%x, wimg_bits=0x%x",
11059 __FUNCTION__,
11060 pn, prot, wimg_bits);
11061 }
11062 if (__improbable((cacheattr & PP_ATTR_MONITOR) && (prot != VM_PROT_READ) && !pmap_ppl_disable)) {
11063 panic("%s: attempt to map PPL-protected I/O address 0x%llx as writable", __func__, (uint64_t)ptoa(pn));
11064 }
11065
11066 #else /* __ARM_COHERENT_IO__ */
11067 #error CPU copy windows are not properly supported with both the PPL and incoherent IO
11068 #endif /* __ARM_COHERENT_IO__ */
11069 #endif /* XNU_MONITOR */
11070 cpu_num = pmap_cpu_data->cpu_number;
11071
11072 for (i = 0; i < CPUWINDOWS_MAX; i++) {
11073 cpu_copywindow_vaddr = pmap_cpu_windows_copy_addr(cpu_num, i);
11074 ptep = pmap_pte(kernel_pmap, cpu_copywindow_vaddr);
11075 assert(!ARM_PTE_IS_COMPRESSED(*ptep, ptep));
11076 if (*ptep == ARM_PTE_TYPE_FAULT) {
11077 break;
11078 }
11079 }
11080 if (i == CPUWINDOWS_MAX) {
11081 panic("pmap_map_cpu_windows_copy: out of window\n");
11082 }
11083
11084 pte = pa_to_pte(ptoa(pn)) | ARM_PTE_TYPE | ARM_PTE_AF | ARM_PTE_NX | ARM_PTE_PNX;
11085 #if __ARM_KERNEL_PROTECT__
11086 pte |= ARM_PTE_NG;
11087 #endif /* __ARM_KERNEL_PROTECT__ */
11088
11089 pte |= wimg_to_pte(wimg_bits);
11090
11091 if (prot & VM_PROT_WRITE) {
11092 pte |= ARM_PTE_AP(AP_RWNA);
11093 } else {
11094 pte |= ARM_PTE_AP(AP_RONA);
11095 }
11096
11097 WRITE_PTE_FAST(ptep, pte);
11098 /*
11099 * Invalidate tlb. Cover nested cpu_copywindow_vaddr usage with the interrupted context
11100 * in pmap_unmap_cpu_windows_copy() after clearing the pte and before tlb invalidate.
11101 */
11102 FLUSH_PTE_STRONG(ptep);
11103 PMAP_UPDATE_TLBS(kernel_pmap, cpu_copywindow_vaddr, cpu_copywindow_vaddr + PAGE_SIZE, pmap_cpu_data->copywindow_strong_sync[i]);
11104 pmap_cpu_data->copywindow_strong_sync[i] = need_strong_sync;
11105
11106 return i;
11107 }
11108
11109 unsigned int
11110 pmap_map_cpu_windows_copy(
11111 ppnum_t pn,
11112 vm_prot_t prot,
11113 unsigned int wimg_bits)
11114 {
11115 #if XNU_MONITOR
11116 return pmap_map_cpu_windows_copy_ppl(pn, prot, wimg_bits);
11117 #else
11118 return pmap_map_cpu_windows_copy_internal(pn, prot, wimg_bits);
11119 #endif
11120 }
11121
11122 MARK_AS_PMAP_TEXT static void
11123 pmap_unmap_cpu_windows_copy_internal(
11124 unsigned int index)
11125 {
11126 pt_entry_t *ptep;
11127 unsigned int cpu_num;
11128 vm_offset_t cpu_copywindow_vaddr = 0;
11129 pmap_cpu_data_t *pmap_cpu_data = pmap_get_cpu_data();
11130
11131 cpu_num = pmap_cpu_data->cpu_number;
11132
11133 cpu_copywindow_vaddr = pmap_cpu_windows_copy_addr(cpu_num, index);
11134 /* Issue full-system DSB to ensure prior operations on the per-CPU window
11135 * (which are likely to have been on I/O memory) are complete before
11136 * tearing down the mapping. */
11137 __builtin_arm_dsb(DSB_SY);
11138 ptep = pmap_pte(kernel_pmap, cpu_copywindow_vaddr);
11139 WRITE_PTE_STRONG(ptep, ARM_PTE_TYPE_FAULT);
11140 PMAP_UPDATE_TLBS(kernel_pmap, cpu_copywindow_vaddr, cpu_copywindow_vaddr + PAGE_SIZE, pmap_cpu_data->copywindow_strong_sync[index]);
11141 }
11142
11143 void
11144 pmap_unmap_cpu_windows_copy(
11145 unsigned int index)
11146 {
11147 #if XNU_MONITOR
11148 return pmap_unmap_cpu_windows_copy_ppl(index);
11149 #else
11150 return pmap_unmap_cpu_windows_copy_internal(index);
11151 #endif
11152 }
11153
11154 #if XNU_MONITOR
11155
11156 MARK_AS_PMAP_TEXT void
11157 pmap_invoke_with_page(
11158 ppnum_t page_number,
11159 void *ctx,
11160 void (*callback)(void *ctx, ppnum_t page_number, const void *page))
11161 {
11162 #pragma unused(page_number, ctx, callback)
11163 }
11164
11165 /*
11166 * Loop over every pmap_io_range (I/O ranges marked as owned by
11167 * the PPL in the device tree) and conditionally call callback() on each range
11168 * that needs to be included in the hibernation image.
11169 *
11170 * @param ctx Will be passed as-is into the callback method. Use NULL if no
11171 * context is needed in the callback.
11172 * @param callback Callback function invoked on each range (gated by flag).
11173 */
11174 MARK_AS_PMAP_TEXT void
11175 pmap_hibernate_invoke(void *ctx, void (*callback)(void *ctx, uint64_t addr, uint64_t len))
11176 {
11177 for (unsigned int i = 0; i < num_io_rgns; ++i) {
11178 if (io_attr_table[i].wimg & PMAP_IO_RANGE_NEEDS_HIBERNATING) {
11179 callback(ctx, io_attr_table[i].addr, io_attr_table[i].len);
11180 }
11181 }
11182 }
11183
11184 /**
11185 * Set the HASHED pv_head_table flag for the passed in physical page if it's a
11186 * PPL-owned page. Otherwise, do nothing.
11187 *
11188 * @param addr Physical address of the page to set the HASHED flag on.
11189 */
11190 MARK_AS_PMAP_TEXT void
11191 pmap_set_ppl_hashed_flag(const pmap_paddr_t addr)
11192 {
11193 /* Ignore non-managed kernel memory. */
11194 if (!pa_valid(addr)) {
11195 return;
11196 }
11197
11198 const int pai = (int)pa_index(addr);
11199 if (pp_attr_table[pai] & PP_ATTR_MONITOR) {
11200 pv_entry_t **pv_h = pai_to_pvh(pai);
11201
11202 /* Mark that the PPL-owned page has been hashed into the hibernation image. */
11203 LOCK_PVH(pai);
11204 pvh_set_flags(pv_h, pvh_get_flags(pv_h) | PVH_FLAG_HASHED);
11205 UNLOCK_PVH(pai);
11206 }
11207 }
11208
11209 /**
11210 * Loop through every physical page in the system and clear out the HASHED flag
11211 * on every PPL-owned page. That flag is used to keep track of which pages have
11212 * been hashed into the hibernation image during the hibernation entry process.
11213 *
11214 * The HASHED flag needs to be cleared out between hibernation cycles because the
11215 * pv_head_table and pp_attr_table's might have been copied into the hibernation
11216 * image with the HASHED flag set on certain pages. It's important to clear the
11217 * HASHED flag to ensure that the enforcement of all PPL-owned memory being hashed
11218 * into the hibernation image can't be compromised across hibernation cycles.
11219 */
11220 MARK_AS_PMAP_TEXT void
11221 pmap_clear_ppl_hashed_flag_all(void)
11222 {
11223 const int last_index = (int)pa_index(vm_last_phys);
11224 pv_entry_t **pv_h = NULL;
11225
11226 for (int pai = 0; pai < last_index; ++pai) {
11227 pv_h = pai_to_pvh(pai);
11228
11229 /* Test for PPL-owned pages that have the HASHED flag set in its pv_head_table entry. */
11230 if ((pvh_get_flags(pv_h) & PVH_FLAG_HASHED) &&
11231 (pp_attr_table[pai] & PP_ATTR_MONITOR)) {
11232 LOCK_PVH(pai);
11233 pvh_set_flags(pv_h, pvh_get_flags(pv_h) & ~PVH_FLAG_HASHED);
11234 UNLOCK_PVH(pai);
11235 }
11236 }
11237 }
11238
11239 /**
11240 * Enforce that all PPL-owned pages were hashed into the hibernation image. The
11241 * ppl_hib driver will call this after all wired pages have been copied into the
11242 * hibernation image.
11243 */
11244 MARK_AS_PMAP_TEXT void
11245 pmap_check_ppl_hashed_flag_all(void)
11246 {
11247 const int last_index = (int)pa_index(vm_last_phys);
11248 pv_entry_t **pv_h = NULL;
11249
11250 for (int pai = 0; pai < last_index; ++pai) {
11251 pv_h = pai_to_pvh(pai);
11252
11253 /**
11254 * The PMAP stacks are explicitly not saved into the image so skip checking
11255 * the pages that contain the PMAP stacks.
11256 */
11257 const bool is_pmap_stack = (pai >= (int)pa_index(pmap_stacks_start_pa)) &&
11258 (pai < (int)pa_index(pmap_stacks_end_pa));
11259
11260 if (!is_pmap_stack &&
11261 (pp_attr_table[pai] & PP_ATTR_MONITOR) &&
11262 !(pvh_get_flags(pv_h) & PVH_FLAG_HASHED)) {
11263 panic("Found PPL-owned page that was not hashed into the hibernation image: pai %d", pai);
11264 }
11265 }
11266 }
11267
11268 #endif /* XNU_MONITOR */
11269
11270 /*
11271 * Indicate that a pmap is intended to be used as a nested pmap
11272 * within one or more larger address spaces. This must be set
11273 * before pmap_nest() is called with this pmap as the 'subordinate'.
11274 */
11275 MARK_AS_PMAP_TEXT static void
11276 pmap_set_nested_internal(
11277 pmap_t pmap)
11278 {
11279 VALIDATE_PMAP(pmap);
11280 pmap->nested = TRUE;
11281 pmap_get_pt_ops(pmap)->free_id(pmap);
11282 }
11283
11284 void
11285 pmap_set_nested(
11286 pmap_t pmap)
11287 {
11288 #if XNU_MONITOR
11289 pmap_set_nested_ppl(pmap);
11290 #else
11291 pmap_set_nested_internal(pmap);
11292 #endif
11293 }
11294
11295 /*
11296 * pmap_trim_range(pmap, start, end)
11297 *
11298 * pmap = pmap to operate on
11299 * start = start of the range
11300 * end = end of the range
11301 *
11302 * Attempts to deallocate TTEs for the given range in the nested range.
11303 */
11304 MARK_AS_PMAP_TEXT static void
11305 pmap_trim_range(
11306 pmap_t pmap,
11307 addr64_t start,
11308 addr64_t end)
11309 {
11310 addr64_t cur;
11311 addr64_t nested_region_start;
11312 addr64_t nested_region_end;
11313 addr64_t adjusted_start;
11314 addr64_t adjusted_end;
11315 addr64_t adjust_offmask;
11316 tt_entry_t * tte_p;
11317 pt_entry_t * pte_p;
11318 __unused const pt_attr_t * const pt_attr = pmap_get_pt_attr(pmap);
11319
11320 if (__improbable(end < start)) {
11321 panic("%s: invalid address range, "
11322 "pmap=%p, start=%p, end=%p",
11323 __func__,
11324 pmap, (void*)start, (void*)end);
11325 }
11326
11327 nested_region_start = pmap->nested_region_addr;
11328 nested_region_end = nested_region_start + pmap->nested_region_size;
11329
11330 if (__improbable((start < nested_region_start) || (end > nested_region_end))) {
11331 panic("%s: range outside nested region %p-%p, "
11332 "pmap=%p, start=%p, end=%p",
11333 __func__, (void *)nested_region_start, (void *)nested_region_end,
11334 pmap, (void*)start, (void*)end);
11335 }
11336
11337 /* Contract the range to TT page boundaries. */
11338 adjust_offmask = pt_attr_leaf_table_offmask(pt_attr);
11339 adjusted_start = ((start + adjust_offmask) & ~adjust_offmask);
11340 adjusted_end = end & ~adjust_offmask;
11341
11342 /* Iterate over the range, trying to remove TTEs. */
11343 for (cur = adjusted_start; (cur < adjusted_end) && (cur >= adjusted_start); cur += pt_attr_twig_size(pt_attr)) {
11344 pmap_lock(pmap);
11345
11346 tte_p = pmap_tte(pmap, cur);
11347
11348 if (tte_p == (tt_entry_t *) NULL) {
11349 goto done;
11350 }
11351
11352 if ((*tte_p & ARM_TTE_TYPE_MASK) == ARM_TTE_TYPE_TABLE) {
11353 pte_p = (pt_entry_t *) ttetokv(*tte_p);
11354
11355 if ((ptep_get_info(pte_p)->refcnt == 0) &&
11356 (pmap != kernel_pmap)) {
11357 if (pmap->nested == TRUE) {
11358 /* Deallocate for the nested map. */
11359 pmap_tte_deallocate(pmap, cur, cur + PAGE_SIZE, false, tte_p, pt_attr_twig_level(pt_attr));
11360 } else {
11361 /* Just remove for the parent map. */
11362 pmap_tte_remove(pmap, cur, cur + PAGE_SIZE, false, tte_p, pt_attr_twig_level(pt_attr));
11363 }
11364 }
11365 }
11366
11367 done:
11368 pmap_unlock(pmap);
11369 }
11370
11371 #if (__ARM_VMSA__ > 7)
11372 /* Remove empty L2 TTs. */
11373 adjusted_start = ((start + pt_attr_ln_offmask(pt_attr, PMAP_TT_L1_LEVEL)) & ~pt_attr_ln_offmask(pt_attr, PMAP_TT_L1_LEVEL));
11374 adjusted_end = end & ~pt_attr_ln_offmask(pt_attr, PMAP_TT_L1_LEVEL);
11375
11376 for (cur = adjusted_start; (cur < adjusted_end) && (cur >= adjusted_start); cur += pt_attr_ln_size(pt_attr, PMAP_TT_L1_LEVEL)) {
11377 /* For each L1 entry in our range... */
11378 pmap_lock(pmap);
11379
11380 bool remove_tt1e = true;
11381 tt_entry_t * tt1e_p = pmap_tt1e(pmap, cur);
11382 tt_entry_t * tt2e_start;
11383 tt_entry_t * tt2e_end;
11384 tt_entry_t * tt2e_p;
11385 tt_entry_t tt1e;
11386
11387 if (tt1e_p == NULL) {
11388 pmap_unlock(pmap);
11389 continue;
11390 }
11391
11392 tt1e = *tt1e_p;
11393
11394 if (tt1e == ARM_TTE_TYPE_FAULT) {
11395 pmap_unlock(pmap);
11396 continue;
11397 }
11398
11399 tt2e_start = &((tt_entry_t*) phystokv(tt1e & ARM_TTE_TABLE_MASK))[0];
11400 tt2e_end = &tt2e_start[pt_attr_page_size(pt_attr) / sizeof(*tt2e_start)];
11401
11402 for (tt2e_p = tt2e_start; tt2e_p < tt2e_end; tt2e_p++) {
11403 if (*tt2e_p != ARM_TTE_TYPE_FAULT) {
11404 /*
11405 * If any TTEs are populated, don't remove the
11406 * L1 TT.
11407 */
11408 remove_tt1e = false;
11409 }
11410 }
11411
11412 if (remove_tt1e) {
11413 pmap_tte_deallocate(pmap, cur, cur + PAGE_SIZE, false, tt1e_p, PMAP_TT_L1_LEVEL);
11414 }
11415
11416 pmap_unlock(pmap);
11417 }
11418 #endif /* (__ARM_VMSA__ > 7) */
11419 }
11420
11421 /*
11422 * pmap_trim_internal(grand, subord, vstart, size)
11423 *
11424 * grand = pmap subord is nested in
11425 * subord = nested pmap
11426 * vstart = start of the used range in grand
11427 * size = size of the used range
11428 *
11429 * Attempts to trim the shared region page tables down to only cover the given
11430 * range in subord and grand.
11431 */
11432 MARK_AS_PMAP_TEXT static void
11433 pmap_trim_internal(
11434 pmap_t grand,
11435 pmap_t subord,
11436 addr64_t vstart,
11437 uint64_t size)
11438 {
11439 addr64_t vend;
11440 addr64_t adjust_offmask;
11441
11442 if (__improbable(os_add_overflow(vstart, size, &vend))) {
11443 panic("%s: grand addr wraps around, "
11444 "grand=%p, subord=%p, vstart=%p, size=%#llx",
11445 __func__, grand, subord, (void*)vstart, size);
11446 }
11447
11448 VALIDATE_PMAP(grand);
11449 VALIDATE_PMAP(subord);
11450
11451 __unused const pt_attr_t * const pt_attr = pmap_get_pt_attr(grand);
11452
11453 pmap_lock(subord);
11454
11455 if (__improbable(!subord->nested)) {
11456 panic("%s: subord is not nestable, "
11457 "grand=%p, subord=%p, vstart=%p, size=%#llx",
11458 __func__, grand, subord, (void*)vstart, size);
11459 }
11460
11461 if (__improbable(grand->nested)) {
11462 panic("%s: grand is nestable, "
11463 "grand=%p, subord=%p, vstart=%p, size=%#llx",
11464 __func__, grand, subord, (void*)vstart, size);
11465 }
11466
11467 if (__improbable(grand->nested_pmap != subord)) {
11468 panic("%s: grand->nested != subord, "
11469 "grand=%p, subord=%p, vstart=%p, size=%#llx",
11470 __func__, grand, subord, (void*)vstart, size);
11471 }
11472
11473 if (__improbable((size != 0) &&
11474 ((vstart < grand->nested_region_addr) || (vend > (grand->nested_region_addr + grand->nested_region_size))))) {
11475 panic("%s: grand range not in nested region, "
11476 "grand=%p, subord=%p, vstart=%p, size=%#llx",
11477 __func__, grand, subord, (void*)vstart, size);
11478 }
11479
11480
11481 if (!grand->nested_has_no_bounds_ref) {
11482 assert(subord->nested_bounds_set);
11483
11484 if (!grand->nested_bounds_set) {
11485 /* Inherit the bounds from subord. */
11486 grand->nested_region_true_start = subord->nested_region_true_start;
11487 grand->nested_region_true_end = subord->nested_region_true_end;
11488 grand->nested_bounds_set = true;
11489 }
11490
11491 pmap_unlock(subord);
11492 return;
11493 }
11494
11495 if ((!subord->nested_bounds_set) && size) {
11496 adjust_offmask = pt_attr_leaf_table_offmask(pt_attr);
11497
11498 subord->nested_region_true_start = vstart;
11499 subord->nested_region_true_end = vend;
11500 subord->nested_region_true_start &= ~adjust_offmask;
11501
11502 if (__improbable(os_add_overflow(subord->nested_region_true_end, adjust_offmask, &subord->nested_region_true_end))) {
11503 panic("%s: padded true end wraps around, "
11504 "grand=%p, subord=%p, vstart=%p, size=%#llx",
11505 __func__, grand, subord, (void*)vstart, size);
11506 }
11507
11508 subord->nested_region_true_end &= ~adjust_offmask;
11509 subord->nested_bounds_set = true;
11510 }
11511
11512 if (subord->nested_bounds_set) {
11513 /* Inherit the bounds from subord. */
11514 grand->nested_region_true_start = subord->nested_region_true_start;
11515 grand->nested_region_true_end = subord->nested_region_true_end;
11516 grand->nested_bounds_set = true;
11517
11518 /* If we know the bounds, we can trim the pmap. */
11519 grand->nested_has_no_bounds_ref = false;
11520 pmap_unlock(subord);
11521 } else {
11522 /* Don't trim if we don't know the bounds. */
11523 pmap_unlock(subord);
11524 return;
11525 }
11526
11527 /* Trim grand to only cover the given range. */
11528 pmap_trim_range(grand, grand->nested_region_addr, grand->nested_region_true_start);
11529 pmap_trim_range(grand, grand->nested_region_true_end, (grand->nested_region_addr + grand->nested_region_size));
11530
11531 /* Try to trim subord. */
11532 pmap_trim_subord(subord);
11533 }
11534
11535 MARK_AS_PMAP_TEXT static void
11536 pmap_trim_self(pmap_t pmap)
11537 {
11538 if (pmap->nested_has_no_bounds_ref && pmap->nested_pmap) {
11539 /* If we have a no bounds ref, we need to drop it. */
11540 pmap_lock_ro(pmap->nested_pmap);
11541 pmap->nested_has_no_bounds_ref = false;
11542 boolean_t nested_bounds_set = pmap->nested_pmap->nested_bounds_set;
11543 vm_map_offset_t nested_region_true_start = pmap->nested_pmap->nested_region_true_start;
11544 vm_map_offset_t nested_region_true_end = pmap->nested_pmap->nested_region_true_end;
11545 pmap_unlock_ro(pmap->nested_pmap);
11546
11547 if (nested_bounds_set) {
11548 pmap_trim_range(pmap, pmap->nested_region_addr, nested_region_true_start);
11549 pmap_trim_range(pmap, nested_region_true_end, (pmap->nested_region_addr + pmap->nested_region_size));
11550 }
11551 /*
11552 * Try trimming the nested pmap, in case we had the
11553 * last reference.
11554 */
11555 pmap_trim_subord(pmap->nested_pmap);
11556 }
11557 }
11558
11559 /*
11560 * pmap_trim_subord(grand, subord)
11561 *
11562 * grand = pmap that we have nested subord in
11563 * subord = nested pmap we are attempting to trim
11564 *
11565 * Trims subord if possible
11566 */
11567 MARK_AS_PMAP_TEXT static void
11568 pmap_trim_subord(pmap_t subord)
11569 {
11570 bool contract_subord = false;
11571
11572 pmap_lock(subord);
11573
11574 subord->nested_no_bounds_refcnt--;
11575
11576 if ((subord->nested_no_bounds_refcnt == 0) && (subord->nested_bounds_set)) {
11577 /* If this was the last no bounds reference, trim subord. */
11578 contract_subord = true;
11579 }
11580
11581 pmap_unlock(subord);
11582
11583 if (contract_subord) {
11584 pmap_trim_range(subord, subord->nested_region_addr, subord->nested_region_true_start);
11585 pmap_trim_range(subord, subord->nested_region_true_end, subord->nested_region_addr + subord->nested_region_size);
11586 }
11587 }
11588
11589 void
11590 pmap_trim(
11591 pmap_t grand,
11592 pmap_t subord,
11593 addr64_t vstart,
11594 uint64_t size)
11595 {
11596 #if XNU_MONITOR
11597 pmap_trim_ppl(grand, subord, vstart, size);
11598
11599 pmap_ledger_check_balance(grand);
11600 pmap_ledger_check_balance(subord);
11601 #else
11602 pmap_trim_internal(grand, subord, vstart, size);
11603 #endif
11604 }
11605
11606 #if HAS_APPLE_PAC
11607 static void *
11608 pmap_sign_user_ptr_internal(void *value, ptrauth_key key, uint64_t discriminator, uint64_t jop_key)
11609 {
11610 void *res = NULL;
11611 uint64_t current_intr_state = pmap_interrupts_disable();
11612
11613 uint64_t saved_jop_state = ml_enable_user_jop_key(jop_key);
11614 switch (key) {
11615 case ptrauth_key_asia:
11616 res = ptrauth_sign_unauthenticated(value, ptrauth_key_asia, discriminator);
11617 break;
11618 case ptrauth_key_asda:
11619 res = ptrauth_sign_unauthenticated(value, ptrauth_key_asda, discriminator);
11620 break;
11621 default:
11622 panic("attempt to sign user pointer without process independent key");
11623 }
11624 ml_disable_user_jop_key(jop_key, saved_jop_state);
11625
11626 pmap_interrupts_restore(current_intr_state);
11627
11628 return res;
11629 }
11630
11631 void *
11632 pmap_sign_user_ptr(void *value, ptrauth_key key, uint64_t discriminator, uint64_t jop_key)
11633 {
11634 return pmap_sign_user_ptr_internal(value, key, discriminator, jop_key);
11635 }
11636
11637 static void *
11638 pmap_auth_user_ptr_internal(void *value, ptrauth_key key, uint64_t discriminator, uint64_t jop_key)
11639 {
11640 if ((key != ptrauth_key_asia) && (key != ptrauth_key_asda)) {
11641 panic("attempt to auth user pointer without process independent key");
11642 }
11643
11644 void *res = NULL;
11645 uint64_t current_intr_state = pmap_interrupts_disable();
11646
11647 uint64_t saved_jop_state = ml_enable_user_jop_key(jop_key);
11648 res = ml_auth_ptr_unchecked(value, key, discriminator);
11649 ml_disable_user_jop_key(jop_key, saved_jop_state);
11650
11651 pmap_interrupts_restore(current_intr_state);
11652
11653 return res;
11654 }
11655
11656 void *
11657 pmap_auth_user_ptr(void *value, ptrauth_key key, uint64_t discriminator, uint64_t jop_key)
11658 {
11659 return pmap_auth_user_ptr_internal(value, key, discriminator, jop_key);
11660 }
11661 #endif /* HAS_APPLE_PAC */
11662
11663 /*
11664 * kern_return_t pmap_nest(grand, subord, vstart, size)
11665 *
11666 * grand = the pmap that we will nest subord into
11667 * subord = the pmap that goes into the grand
11668 * vstart = start of range in pmap to be inserted
11669 * size = Size of nest area (up to 16TB)
11670 *
11671 * Inserts a pmap into another. This is used to implement shared segments.
11672 *
11673 */
11674
11675 MARK_AS_PMAP_TEXT static kern_return_t
11676 pmap_nest_internal(
11677 pmap_t grand,
11678 pmap_t subord,
11679 addr64_t vstart,
11680 uint64_t size)
11681 {
11682 kern_return_t kr = KERN_FAILURE;
11683 vm_map_offset_t vaddr;
11684 tt_entry_t *stte_p;
11685 tt_entry_t *gtte_p;
11686 unsigned int i;
11687 unsigned int num_tte;
11688 unsigned int nested_region_asid_bitmap_size;
11689 unsigned int* nested_region_asid_bitmap;
11690 int expand_options = 0;
11691 bool deref_subord = true;
11692 pmap_t __ptrauth_only subord_addr;
11693
11694 addr64_t vend;
11695 if (__improbable(os_add_overflow(vstart, size, &vend))) {
11696 panic("%s: %p grand addr wraps around: 0x%llx + 0x%llx", __func__, grand, vstart, size);
11697 }
11698
11699 VALIDATE_PMAP(grand);
11700 pmap_reference_internal(subord); // This call will also validate subord
11701
11702 __unused const pt_attr_t * const pt_attr = pmap_get_pt_attr(grand);
11703 assert(pmap_get_pt_attr(subord) == pt_attr);
11704
11705 #if XNU_MONITOR
11706 expand_options |= PMAP_TT_ALLOCATE_NOWAIT;
11707 #endif
11708
11709 if (__improbable(((size | vstart) & (pt_attr_leaf_table_offmask(pt_attr))) != 0x0ULL)) {
11710 panic("pmap_nest() pmap %p unaligned nesting request 0x%llx, 0x%llx\n", grand, vstart, size);
11711 }
11712
11713 if (__improbable(!subord->nested)) {
11714 panic("%s: subordinate pmap %p is not nestable", __func__, subord);
11715 }
11716
11717 if (subord->nested_region_asid_bitmap == NULL) {
11718 nested_region_asid_bitmap_size = (unsigned int)(size >> pt_attr_twig_shift(pt_attr)) / (sizeof(unsigned int) * NBBY);
11719
11720 #if XNU_MONITOR
11721 pmap_paddr_t pa = 0;
11722
11723 if (__improbable((nested_region_asid_bitmap_size * sizeof(unsigned int)) > PAGE_SIZE)) {
11724 panic("%s: nested_region_asid_bitmap_size=%u will not fit in a page, "
11725 "grand=%p, subord=%p, vstart=0x%llx, size=%llx",
11726 __FUNCTION__, nested_region_asid_bitmap_size,
11727 grand, subord, vstart, size);
11728 }
11729
11730 kr = pmap_pages_alloc_zeroed(&pa, PAGE_SIZE, PMAP_PAGES_ALLOCATE_NOWAIT);
11731
11732 if (kr != KERN_SUCCESS) {
11733 goto nest_cleanup;
11734 }
11735
11736 assert(pa);
11737
11738 nested_region_asid_bitmap = (unsigned int *)phystokv(pa);
11739 #else
11740 nested_region_asid_bitmap = kheap_alloc(KHEAP_DATA_BUFFERS,
11741 nested_region_asid_bitmap_size * sizeof(unsigned int),
11742 Z_WAITOK | Z_ZERO);
11743 #endif
11744
11745 pmap_lock(subord);
11746 if (subord->nested_region_asid_bitmap == NULL) {
11747 subord->nested_region_asid_bitmap_size = nested_region_asid_bitmap_size;
11748 subord->nested_region_addr = vstart;
11749 subord->nested_region_size = (mach_vm_offset_t) size;
11750
11751 /**
11752 * Ensure that the rest of the subord->nested_region_* fields are
11753 * initialized and visible before setting the nested_region_asid_bitmap
11754 * field (which is used as the flag to say that the rest are initialized).
11755 */
11756 __builtin_arm_dmb(DMB_ISHST);
11757 subord->nested_region_asid_bitmap = nested_region_asid_bitmap;
11758 nested_region_asid_bitmap = NULL;
11759 }
11760 pmap_unlock(subord);
11761 if (nested_region_asid_bitmap != NULL) {
11762 #if XNU_MONITOR
11763 pmap_pages_free(kvtophys((vm_offset_t)nested_region_asid_bitmap), PAGE_SIZE);
11764 #else
11765 kheap_free(KHEAP_DATA_BUFFERS, nested_region_asid_bitmap,
11766 nested_region_asid_bitmap_size * sizeof(unsigned int));
11767 #endif
11768 }
11769 }
11770
11771 /**
11772 * Ensure subsequent reads of the subord->nested_region_* fields don't get
11773 * speculated before their initialization.
11774 */
11775 __builtin_arm_dmb(DMB_ISHLD);
11776
11777 if ((subord->nested_region_addr + subord->nested_region_size) < vend) {
11778 uint64_t new_size;
11779 unsigned int new_nested_region_asid_bitmap_size;
11780 unsigned int* new_nested_region_asid_bitmap;
11781
11782 nested_region_asid_bitmap = NULL;
11783 nested_region_asid_bitmap_size = 0;
11784 new_size = vend - subord->nested_region_addr;
11785
11786 /* We explicitly add 1 to the bitmap allocation size in order to avoid issues with truncation. */
11787 new_nested_region_asid_bitmap_size = (unsigned int)((new_size >> pt_attr_twig_shift(pt_attr)) / (sizeof(unsigned int) * NBBY)) + 1;
11788
11789 #if XNU_MONITOR
11790 pmap_paddr_t pa = 0;
11791
11792 if (__improbable((new_nested_region_asid_bitmap_size * sizeof(unsigned int)) > PAGE_SIZE)) {
11793 panic("%s: new_nested_region_asid_bitmap_size=%u will not fit in a page, "
11794 "grand=%p, subord=%p, vstart=0x%llx, new_size=%llx",
11795 __FUNCTION__, new_nested_region_asid_bitmap_size,
11796 grand, subord, vstart, new_size);
11797 }
11798
11799 kr = pmap_pages_alloc_zeroed(&pa, PAGE_SIZE, PMAP_PAGES_ALLOCATE_NOWAIT);
11800
11801 if (kr != KERN_SUCCESS) {
11802 goto nest_cleanup;
11803 }
11804
11805 assert(pa);
11806
11807 new_nested_region_asid_bitmap = (unsigned int *)phystokv(pa);
11808 #else
11809 new_nested_region_asid_bitmap = kheap_alloc(KHEAP_DATA_BUFFERS,
11810 new_nested_region_asid_bitmap_size * sizeof(unsigned int),
11811 Z_WAITOK | Z_ZERO);
11812 #endif
11813 pmap_lock(subord);
11814 if (subord->nested_region_size < new_size) {
11815 bcopy(subord->nested_region_asid_bitmap,
11816 new_nested_region_asid_bitmap, subord->nested_region_asid_bitmap_size);
11817 nested_region_asid_bitmap_size = subord->nested_region_asid_bitmap_size;
11818 nested_region_asid_bitmap = subord->nested_region_asid_bitmap;
11819 subord->nested_region_asid_bitmap = new_nested_region_asid_bitmap;
11820 subord->nested_region_asid_bitmap_size = new_nested_region_asid_bitmap_size;
11821 subord->nested_region_size = new_size;
11822 new_nested_region_asid_bitmap = NULL;
11823 }
11824 pmap_unlock(subord);
11825 if (nested_region_asid_bitmap != NULL) {
11826 #if XNU_MONITOR
11827 pmap_pages_free(kvtophys((vm_offset_t)nested_region_asid_bitmap), PAGE_SIZE);
11828 #else
11829 kheap_free(KHEAP_DATA_BUFFERS, nested_region_asid_bitmap,
11830 nested_region_asid_bitmap_size * sizeof(unsigned int));
11831 #endif
11832 }
11833 if (new_nested_region_asid_bitmap != NULL) {
11834 #if XNU_MONITOR
11835 pmap_pages_free(kvtophys((vm_offset_t)new_nested_region_asid_bitmap), PAGE_SIZE);
11836 #else
11837 kheap_free(KHEAP_DATA_BUFFERS, new_nested_region_asid_bitmap,
11838 new_nested_region_asid_bitmap_size * sizeof(unsigned int));
11839 #endif
11840 }
11841 }
11842
11843 pmap_lock(subord);
11844
11845 #if __has_feature(ptrauth_calls)
11846 subord_addr = ptrauth_sign_unauthenticated(subord,
11847 ptrauth_key_process_independent_data,
11848 ptrauth_blend_discriminator(&grand->nested_pmap, ptrauth_string_discriminator("pmap.nested_pmap")));
11849 #else
11850 subord_addr = subord;
11851 #endif // __has_feature(ptrauth_calls)
11852
11853 if (os_atomic_cmpxchg(&grand->nested_pmap, PMAP_NULL, subord_addr, relaxed)) {
11854 /*
11855 * If this is grand's first nesting operation, keep the reference on subord.
11856 * It will be released by pmap_destroy_internal() when grand is destroyed.
11857 */
11858 deref_subord = false;
11859
11860 if (!subord->nested_bounds_set) {
11861 /*
11862 * We are nesting without the shared regions bounds
11863 * being known. We'll have to trim the pmap later.
11864 */
11865 grand->nested_has_no_bounds_ref = true;
11866 subord->nested_no_bounds_refcnt++;
11867 }
11868
11869 grand->nested_region_addr = vstart;
11870 grand->nested_region_size = (mach_vm_offset_t) size;
11871 } else {
11872 if (__improbable(grand->nested_pmap != subord)) {
11873 panic("pmap_nest() pmap %p has a nested pmap\n", grand);
11874 } else if (__improbable(grand->nested_region_addr > vstart)) {
11875 panic("pmap_nest() pmap %p : attempt to nest outside the nested region\n", grand);
11876 } else if ((grand->nested_region_addr + grand->nested_region_size) < vend) {
11877 grand->nested_region_size = (mach_vm_offset_t)(vstart - grand->nested_region_addr + size);
11878 }
11879 }
11880
11881 #if (__ARM_VMSA__ == 7)
11882 vaddr = (vm_map_offset_t) vstart;
11883 num_tte = size >> ARM_TT_L1_SHIFT;
11884
11885 for (i = 0; i < num_tte; i++) {
11886 if (((subord->nested_region_true_start) > vaddr) || ((subord->nested_region_true_end) <= vaddr)) {
11887 goto expand_next;
11888 }
11889
11890 stte_p = pmap_tte(subord, vaddr);
11891 if ((stte_p == (tt_entry_t *)NULL) || (((*stte_p) & ARM_TTE_TYPE_MASK) != ARM_TTE_TYPE_TABLE)) {
11892 pmap_unlock(subord);
11893 kr = pmap_expand(subord, vaddr, expand_options, PMAP_TT_L2_LEVEL);
11894
11895 if (kr != KERN_SUCCESS) {
11896 pmap_lock(grand);
11897 goto done;
11898 }
11899
11900 pmap_lock(subord);
11901 }
11902 pmap_unlock(subord);
11903 pmap_lock(grand);
11904 stte_p = pmap_tte(grand, vaddr);
11905 if (stte_p == (tt_entry_t *)NULL) {
11906 pmap_unlock(grand);
11907 kr = pmap_expand(grand, vaddr, expand_options, PMAP_TT_L1_LEVEL);
11908
11909 if (kr != KERN_SUCCESS) {
11910 pmap_lock(grand);
11911 goto done;
11912 }
11913 } else {
11914 pmap_unlock(grand);
11915 kr = KERN_SUCCESS;
11916 }
11917 pmap_lock(subord);
11918
11919 expand_next:
11920 vaddr += ARM_TT_L1_SIZE;
11921 }
11922
11923 #else
11924 vaddr = (vm_map_offset_t) vstart;
11925 num_tte = (unsigned int)(size >> pt_attr_twig_shift(pt_attr));
11926
11927 for (i = 0; i < num_tte; i++) {
11928 if (((subord->nested_region_true_start) > vaddr) || ((subord->nested_region_true_end) <= vaddr)) {
11929 goto expand_next;
11930 }
11931
11932 stte_p = pmap_tte(subord, vaddr);
11933 if (stte_p == PT_ENTRY_NULL || *stte_p == ARM_TTE_EMPTY) {
11934 pmap_unlock(subord);
11935 kr = pmap_expand(subord, vaddr, expand_options, pt_attr_leaf_level(pt_attr));
11936
11937 if (kr != KERN_SUCCESS) {
11938 pmap_lock(grand);
11939 goto done;
11940 }
11941
11942 pmap_lock(subord);
11943 }
11944 expand_next:
11945 vaddr += pt_attr_twig_size(pt_attr);
11946 }
11947 #endif
11948 pmap_unlock(subord);
11949
11950 /*
11951 * copy tte's from subord pmap into grand pmap
11952 */
11953
11954 pmap_lock(grand);
11955 vaddr = (vm_map_offset_t) vstart;
11956
11957
11958 #if (__ARM_VMSA__ == 7)
11959 for (i = 0; i < num_tte; i++) {
11960 if (((subord->nested_region_true_start) > vaddr) || ((subord->nested_region_true_end) <= vaddr)) {
11961 goto nest_next;
11962 }
11963
11964 stte_p = pmap_tte(subord, vaddr);
11965 gtte_p = pmap_tte(grand, vaddr);
11966 *gtte_p = *stte_p;
11967
11968 nest_next:
11969 vaddr += ARM_TT_L1_SIZE;
11970 }
11971 #else
11972 for (i = 0; i < num_tte; i++) {
11973 if (((subord->nested_region_true_start) > vaddr) || ((subord->nested_region_true_end) <= vaddr)) {
11974 goto nest_next;
11975 }
11976
11977 stte_p = pmap_tte(subord, vaddr);
11978 gtte_p = pmap_tte(grand, vaddr);
11979 if (gtte_p == PT_ENTRY_NULL) {
11980 pmap_unlock(grand);
11981 kr = pmap_expand(grand, vaddr, expand_options, pt_attr_twig_level(pt_attr));
11982 pmap_lock(grand);
11983
11984 if (kr != KERN_SUCCESS) {
11985 goto done;
11986 }
11987
11988 gtte_p = pmap_tt2e(grand, vaddr);
11989 }
11990 *gtte_p = *stte_p;
11991
11992 nest_next:
11993 vaddr += pt_attr_twig_size(pt_attr);
11994 }
11995 #endif
11996
11997 kr = KERN_SUCCESS;
11998 done:
11999
12000 stte_p = pmap_tte(grand, vstart);
12001 FLUSH_PTE_RANGE_STRONG(stte_p, stte_p + num_tte);
12002 PMAP_UPDATE_TLBS(grand, vstart, vend, false);
12003
12004 pmap_unlock(grand);
12005 #if XNU_MONITOR
12006 nest_cleanup:
12007 #endif
12008 if (deref_subord) {
12009 pmap_destroy_internal(subord);
12010 }
12011 return kr;
12012 }
12013
12014 kern_return_t
12015 pmap_nest(
12016 pmap_t grand,
12017 pmap_t subord,
12018 addr64_t vstart,
12019 uint64_t size)
12020 {
12021 kern_return_t kr = KERN_FAILURE;
12022
12023 PMAP_TRACE(2, PMAP_CODE(PMAP__NEST) | DBG_FUNC_START,
12024 VM_KERNEL_ADDRHIDE(grand), VM_KERNEL_ADDRHIDE(subord),
12025 VM_KERNEL_ADDRHIDE(vstart));
12026
12027 #if XNU_MONITOR
12028 while ((kr = pmap_nest_ppl(grand, subord, vstart, size)) == KERN_RESOURCE_SHORTAGE) {
12029 pmap_alloc_page_for_ppl(0);
12030 }
12031
12032 pmap_ledger_check_balance(grand);
12033 pmap_ledger_check_balance(subord);
12034 #else
12035 kr = pmap_nest_internal(grand, subord, vstart, size);
12036 #endif
12037
12038 PMAP_TRACE(2, PMAP_CODE(PMAP__NEST) | DBG_FUNC_END, kr);
12039
12040 return kr;
12041 }
12042
12043 /*
12044 * kern_return_t pmap_unnest(grand, vaddr)
12045 *
12046 * grand = the pmap that will have the virtual range unnested
12047 * vaddr = start of range in pmap to be unnested
12048 * size = size of range in pmap to be unnested
12049 *
12050 */
12051
12052 kern_return_t
12053 pmap_unnest(
12054 pmap_t grand,
12055 addr64_t vaddr,
12056 uint64_t size)
12057 {
12058 return pmap_unnest_options(grand, vaddr, size, 0);
12059 }
12060
12061 MARK_AS_PMAP_TEXT static kern_return_t
12062 pmap_unnest_options_internal(
12063 pmap_t grand,
12064 addr64_t vaddr,
12065 uint64_t size,
12066 unsigned int option)
12067 {
12068 vm_map_offset_t start;
12069 vm_map_offset_t addr;
12070 tt_entry_t *tte_p;
12071 unsigned int current_index;
12072 unsigned int start_index;
12073 unsigned int max_index;
12074 unsigned int num_tte;
12075 unsigned int i;
12076
12077 addr64_t vend;
12078 if (__improbable(os_add_overflow(vaddr, size, &vend))) {
12079 panic("%s: %p vaddr wraps around: 0x%llx + 0x%llx", __func__, grand, vaddr, size);
12080 }
12081
12082 VALIDATE_PMAP(grand);
12083
12084 __unused const pt_attr_t * const pt_attr = pmap_get_pt_attr(grand);
12085
12086 if (((size | vaddr) & pt_attr_twig_offmask(pt_attr)) != 0x0ULL) {
12087 panic("pmap_unnest(): unaligned request");
12088 }
12089
12090 if ((option & PMAP_UNNEST_CLEAN) == 0) {
12091 if (grand->nested_pmap == NULL) {
12092 panic("%s: %p has no nested pmap", __func__, grand);
12093 }
12094
12095 if ((vaddr < grand->nested_region_addr) || (vend > (grand->nested_region_addr + grand->nested_region_size))) {
12096 panic("%s: %p: unnest request to region not-fully-nested region [%p, %p)", __func__, grand, (void*)vaddr, (void*)vend);
12097 }
12098
12099 pmap_lock(grand->nested_pmap);
12100
12101 start = vaddr;
12102 start_index = (unsigned int)((vaddr - grand->nested_region_addr) >> pt_attr_twig_shift(pt_attr));
12103 max_index = (unsigned int)(start_index + (size >> pt_attr_twig_shift(pt_attr)));
12104 num_tte = (unsigned int)(size >> pt_attr_twig_shift(pt_attr));
12105
12106 for (current_index = start_index, addr = start; current_index < max_index; current_index++, addr += pt_attr_twig_size(pt_attr)) {
12107 pt_entry_t *bpte, *epte, *cpte;
12108
12109 if (addr < grand->nested_pmap->nested_region_true_start) {
12110 /* We haven't reached the interesting range. */
12111 continue;
12112 }
12113
12114 if (addr >= grand->nested_pmap->nested_region_true_end) {
12115 /* We're done with the interesting range. */
12116 break;
12117 }
12118
12119 bpte = pmap_pte(grand->nested_pmap, addr);
12120 epte = bpte + (pt_attr_leaf_index_mask(pt_attr) >> pt_attr_leaf_shift(pt_attr));
12121
12122 if (!testbit(current_index, (int *)grand->nested_pmap->nested_region_asid_bitmap)) {
12123 setbit(current_index, (int *)grand->nested_pmap->nested_region_asid_bitmap);
12124
12125 for (cpte = bpte; cpte <= epte; cpte++) {
12126 pmap_paddr_t pa;
12127 int pai = 0;
12128 boolean_t managed = FALSE;
12129 pt_entry_t spte;
12130
12131 if ((*cpte != ARM_PTE_TYPE_FAULT)
12132 && (!ARM_PTE_IS_COMPRESSED(*cpte, cpte))) {
12133 spte = *((volatile pt_entry_t*)cpte);
12134 while (!managed) {
12135 pa = pte_to_pa(spte);
12136 if (!pa_valid(pa)) {
12137 break;
12138 }
12139 pai = (int)pa_index(pa);
12140 LOCK_PVH(pai);
12141 spte = *((volatile pt_entry_t*)cpte);
12142 pa = pte_to_pa(spte);
12143 if (pai == (int)pa_index(pa)) {
12144 managed = TRUE;
12145 break; // Leave the PVH locked as we'll unlock it after we update the PTE
12146 }
12147 UNLOCK_PVH(pai);
12148 }
12149
12150 if (((spte & ARM_PTE_NG) != ARM_PTE_NG)) {
12151 WRITE_PTE_FAST(cpte, (spte | ARM_PTE_NG));
12152 }
12153
12154 if (managed) {
12155 ASSERT_PVH_LOCKED(pai);
12156 UNLOCK_PVH(pai);
12157 }
12158 }
12159 }
12160 }
12161
12162 FLUSH_PTE_RANGE_STRONG(bpte, epte);
12163 }
12164
12165 flush_mmu_tlb_region_asid_async(vaddr, (unsigned)size, grand->nested_pmap);
12166 sync_tlb_flush();
12167
12168 pmap_unlock(grand->nested_pmap);
12169 }
12170
12171 pmap_lock(grand);
12172
12173 /*
12174 * invalidate all pdes for segment at vaddr in pmap grand
12175 */
12176 start = vaddr;
12177 addr = vaddr;
12178
12179 num_tte = (unsigned int)(size >> pt_attr_twig_shift(pt_attr));
12180
12181 for (i = 0; i < num_tte; i++, addr += pt_attr_twig_size(pt_attr)) {
12182 if (addr < grand->nested_pmap->nested_region_true_start) {
12183 /* We haven't reached the interesting range. */
12184 continue;
12185 }
12186
12187 if (addr >= grand->nested_pmap->nested_region_true_end) {
12188 /* We're done with the interesting range. */
12189 break;
12190 }
12191
12192 tte_p = pmap_tte(grand, addr);
12193 *tte_p = ARM_TTE_TYPE_FAULT;
12194 }
12195
12196 tte_p = pmap_tte(grand, start);
12197 FLUSH_PTE_RANGE_STRONG(tte_p, tte_p + num_tte);
12198 PMAP_UPDATE_TLBS(grand, start, vend, false);
12199
12200 pmap_unlock(grand);
12201
12202 return KERN_SUCCESS;
12203 }
12204
12205 kern_return_t
12206 pmap_unnest_options(
12207 pmap_t grand,
12208 addr64_t vaddr,
12209 uint64_t size,
12210 unsigned int option)
12211 {
12212 kern_return_t kr = KERN_FAILURE;
12213
12214 PMAP_TRACE(2, PMAP_CODE(PMAP__UNNEST) | DBG_FUNC_START,
12215 VM_KERNEL_ADDRHIDE(grand), VM_KERNEL_ADDRHIDE(vaddr));
12216
12217 #if XNU_MONITOR
12218 kr = pmap_unnest_options_ppl(grand, vaddr, size, option);
12219 #else
12220 kr = pmap_unnest_options_internal(grand, vaddr, size, option);
12221 #endif
12222
12223 PMAP_TRACE(2, PMAP_CODE(PMAP__UNNEST) | DBG_FUNC_END, kr);
12224
12225 return kr;
12226 }
12227
12228 boolean_t
12229 pmap_adjust_unnest_parameters(
12230 __unused pmap_t p,
12231 __unused vm_map_offset_t *s,
12232 __unused vm_map_offset_t *e)
12233 {
12234 return TRUE; /* to get to log_unnest_badness()... */
12235 }
12236
12237 /*
12238 * disable no-execute capability on
12239 * the specified pmap
12240 */
12241 #if DEVELOPMENT || DEBUG
12242 void
12243 pmap_disable_NX(
12244 pmap_t pmap)
12245 {
12246 pmap->nx_enabled = FALSE;
12247 }
12248 #else
12249 void
12250 pmap_disable_NX(
12251 __unused pmap_t pmap)
12252 {
12253 }
12254 #endif
12255
12256 /*
12257 * flush a range of hardware TLB entries.
12258 * NOTE: assumes the smallest TLB entry in use will be for
12259 * an ARM small page (4K).
12260 */
12261
12262 #define ARM_FULL_TLB_FLUSH_THRESHOLD 64
12263
12264 #if __ARM_RANGE_TLBI__
12265 #define ARM64_RANGE_TLB_FLUSH_THRESHOLD 1
12266 #define ARM64_FULL_TLB_FLUSH_THRESHOLD ARM64_TLB_RANGE_PAGES
12267 #else
12268 #define ARM64_FULL_TLB_FLUSH_THRESHOLD 256
12269 #endif // __ARM_RANGE_TLBI__
12270
12271 static void
12272 flush_mmu_tlb_region_asid_async(
12273 vm_offset_t va,
12274 size_t length,
12275 pmap_t pmap)
12276 {
12277 #if (__ARM_VMSA__ == 7)
12278 vm_offset_t end = va + length;
12279 uint32_t asid;
12280
12281 asid = pmap->hw_asid;
12282
12283 if (length / ARM_SMALL_PAGE_SIZE > ARM_FULL_TLB_FLUSH_THRESHOLD) {
12284 boolean_t flush_all = FALSE;
12285
12286 if ((asid == 0) || (pmap->nested == TRUE)) {
12287 flush_all = TRUE;
12288 }
12289 if (flush_all) {
12290 flush_mmu_tlb_async();
12291 } else {
12292 flush_mmu_tlb_asid_async(asid);
12293 }
12294
12295 return;
12296 }
12297 if (pmap->nested == TRUE) {
12298 #if !__ARM_MP_EXT__
12299 flush_mmu_tlb();
12300 #else
12301 va = arm_trunc_page(va);
12302 while (va < end) {
12303 flush_mmu_tlb_mva_entries_async(va);
12304 va += ARM_SMALL_PAGE_SIZE;
12305 }
12306 #endif
12307 return;
12308 }
12309 va = arm_trunc_page(va) | (asid & 0xff);
12310 flush_mmu_tlb_entries_async(va, end);
12311
12312 #else
12313 unsigned long pmap_page_shift = pt_attr_leaf_shift(pmap_get_pt_attr(pmap));
12314 const uint64_t pmap_page_size = 1ULL << pmap_page_shift;
12315 ppnum_t npages = (ppnum_t)(length >> pmap_page_shift);
12316 uint32_t asid;
12317
12318 asid = pmap->hw_asid;
12319
12320 if (npages > ARM64_FULL_TLB_FLUSH_THRESHOLD) {
12321 boolean_t flush_all = FALSE;
12322
12323 if ((asid == 0) || (pmap->nested == TRUE)) {
12324 flush_all = TRUE;
12325 }
12326 if (flush_all) {
12327 flush_mmu_tlb_async();
12328 } else {
12329 flush_mmu_tlb_asid_async((uint64_t)asid << TLBI_ASID_SHIFT);
12330 }
12331 return;
12332 }
12333 #if __ARM_RANGE_TLBI__
12334 if (npages > ARM64_RANGE_TLB_FLUSH_THRESHOLD) {
12335 va = generate_rtlbi_param(npages, asid, va, pmap_page_shift);
12336 if (pmap->nested == TRUE) {
12337 flush_mmu_tlb_allrange_async(va);
12338 } else {
12339 flush_mmu_tlb_range_async(va);
12340 }
12341 return;
12342 }
12343 #endif
12344 vm_offset_t end = tlbi_asid(asid) | tlbi_addr(va + length);
12345 va = tlbi_asid(asid) | tlbi_addr(va);
12346
12347 if (pmap->nested == TRUE) {
12348 flush_mmu_tlb_allentries_async(va, end, pmap_page_size);
12349 } else {
12350 flush_mmu_tlb_entries_async(va, end, pmap_page_size);
12351 }
12352
12353 #endif
12354 }
12355
12356 MARK_AS_PMAP_TEXT static void
12357 flush_mmu_tlb_full_asid_async(pmap_t pmap)
12358 {
12359 #if (__ARM_VMSA__ == 7)
12360 flush_mmu_tlb_asid_async(pmap->hw_asid);
12361 #else /* (__ARM_VMSA__ == 7) */
12362 flush_mmu_tlb_asid_async((uint64_t)(pmap->hw_asid) << TLBI_ASID_SHIFT);
12363 #endif /* (__ARM_VMSA__ == 7) */
12364 }
12365
12366 void
12367 flush_mmu_tlb_region(
12368 vm_offset_t va,
12369 unsigned length)
12370 {
12371 flush_mmu_tlb_region_asid_async(va, length, kernel_pmap);
12372 sync_tlb_flush();
12373 }
12374
12375 static pmap_io_range_t*
12376 pmap_find_io_attr(pmap_paddr_t paddr)
12377 {
12378 pmap_io_range_t find_range = {.addr = paddr & ~PAGE_MASK, .len = PAGE_SIZE};
12379 unsigned int begin = 0, end = num_io_rgns - 1;
12380 if ((num_io_rgns == 0) || (paddr < io_attr_table[begin].addr) ||
12381 (paddr >= (io_attr_table[end].addr + io_attr_table[end].len))) {
12382 return NULL;
12383 }
12384
12385 for (;;) {
12386 unsigned int middle = (begin + end) / 2;
12387 int cmp = cmp_io_rgns(&find_range, &io_attr_table[middle]);
12388 if (cmp == 0) {
12389 return &io_attr_table[middle];
12390 } else if (begin == end) {
12391 break;
12392 } else if (cmp > 0) {
12393 begin = middle + 1;
12394 } else {
12395 end = middle;
12396 }
12397 }
12398
12399 return NULL;
12400 }
12401
12402 unsigned int
12403 pmap_cache_attributes(
12404 ppnum_t pn)
12405 {
12406 pmap_paddr_t paddr;
12407 int pai;
12408 unsigned int result;
12409 pp_attr_t pp_attr_current;
12410
12411 paddr = ptoa(pn);
12412
12413 assert(vm_last_phys > vm_first_phys); // Check that pmap has been bootstrapped
12414
12415 if (!pa_valid(paddr)) {
12416 pmap_io_range_t *io_rgn = pmap_find_io_attr(paddr);
12417 return (io_rgn == NULL) ? VM_WIMG_IO : io_rgn->wimg;
12418 }
12419
12420 result = VM_WIMG_DEFAULT;
12421
12422 pai = (int)pa_index(paddr);
12423
12424 pp_attr_current = pp_attr_table[pai];
12425 if (pp_attr_current & PP_ATTR_WIMG_MASK) {
12426 result = pp_attr_current & PP_ATTR_WIMG_MASK;
12427 }
12428 return result;
12429 }
12430
12431 MARK_AS_PMAP_TEXT static void
12432 pmap_sync_wimg(ppnum_t pn, unsigned int wimg_bits_prev, unsigned int wimg_bits_new)
12433 {
12434 if ((wimg_bits_prev != wimg_bits_new)
12435 && ((wimg_bits_prev == VM_WIMG_COPYBACK)
12436 || ((wimg_bits_prev == VM_WIMG_INNERWBACK)
12437 && (wimg_bits_new != VM_WIMG_COPYBACK))
12438 || ((wimg_bits_prev == VM_WIMG_WTHRU)
12439 && ((wimg_bits_new != VM_WIMG_COPYBACK) || (wimg_bits_new != VM_WIMG_INNERWBACK))))) {
12440 pmap_sync_page_attributes_phys(pn);
12441 }
12442
12443 if ((wimg_bits_new == VM_WIMG_RT) && (wimg_bits_prev != VM_WIMG_RT)) {
12444 pmap_force_dcache_clean(phystokv(ptoa(pn)), PAGE_SIZE);
12445 }
12446 }
12447
12448 MARK_AS_PMAP_TEXT static __unused void
12449 pmap_update_compressor_page_internal(ppnum_t pn, unsigned int prev_cacheattr, unsigned int new_cacheattr)
12450 {
12451 pmap_paddr_t paddr = ptoa(pn);
12452 int pai = (int)pa_index(paddr);
12453
12454 if (__improbable(!pa_valid(paddr))) {
12455 panic("%s called on non-managed page 0x%08x", __func__, pn);
12456 }
12457
12458 LOCK_PVH(pai);
12459
12460 #if XNU_MONITOR
12461 if (__improbable(pa_test_monitor(paddr))) {
12462 panic("%s invoked on PPL page 0x%08x", __func__, pn);
12463 }
12464 #endif
12465
12466 pmap_update_cache_attributes_locked(pn, new_cacheattr);
12467
12468 UNLOCK_PVH(pai);
12469
12470 pmap_sync_wimg(pn, prev_cacheattr & VM_WIMG_MASK, new_cacheattr & VM_WIMG_MASK);
12471 }
12472
12473 void *
12474 pmap_map_compressor_page(ppnum_t pn)
12475 {
12476 #if __ARM_PTE_PHYSMAP__
12477 unsigned int cacheattr = pmap_cache_attributes(pn) & VM_WIMG_MASK;
12478 if (cacheattr != VM_WIMG_DEFAULT) {
12479 #if XNU_MONITOR
12480 pmap_update_compressor_page_ppl(pn, cacheattr, VM_WIMG_DEFAULT);
12481 #else
12482 pmap_update_compressor_page_internal(pn, cacheattr, VM_WIMG_DEFAULT);
12483 #endif
12484 }
12485 #endif
12486 return (void*)phystokv(ptoa(pn));
12487 }
12488
12489 void
12490 pmap_unmap_compressor_page(ppnum_t pn __unused, void *kva __unused)
12491 {
12492 #if __ARM_PTE_PHYSMAP__
12493 unsigned int cacheattr = pmap_cache_attributes(pn) & VM_WIMG_MASK;
12494 if (cacheattr != VM_WIMG_DEFAULT) {
12495 #if XNU_MONITOR
12496 pmap_update_compressor_page_ppl(pn, VM_WIMG_DEFAULT, cacheattr);
12497 #else
12498 pmap_update_compressor_page_internal(pn, VM_WIMG_DEFAULT, cacheattr);
12499 #endif
12500 }
12501 #endif
12502 }
12503
12504 MARK_AS_PMAP_TEXT static boolean_t
12505 pmap_batch_set_cache_attributes_internal(
12506 ppnum_t pn,
12507 unsigned int cacheattr,
12508 unsigned int page_cnt,
12509 unsigned int page_index,
12510 boolean_t doit,
12511 unsigned int *res)
12512 {
12513 pmap_paddr_t paddr;
12514 int pai;
12515 pp_attr_t pp_attr_current;
12516 pp_attr_t pp_attr_template;
12517 unsigned int wimg_bits_prev, wimg_bits_new;
12518
12519 if (cacheattr & VM_WIMG_USE_DEFAULT) {
12520 cacheattr = VM_WIMG_DEFAULT;
12521 }
12522
12523 if ((doit == FALSE) && (*res == 0)) {
12524 pmap_pin_kernel_pages((vm_offset_t)res, sizeof(*res));
12525 *res = page_cnt;
12526 pmap_unpin_kernel_pages((vm_offset_t)res, sizeof(*res));
12527 if (platform_cache_batch_wimg(cacheattr & (VM_WIMG_MASK), page_cnt << PAGE_SHIFT) == FALSE) {
12528 return FALSE;
12529 }
12530 }
12531
12532 paddr = ptoa(pn);
12533
12534 if (!pa_valid(paddr)) {
12535 panic("pmap_batch_set_cache_attributes(): pn 0x%08x not managed", pn);
12536 }
12537
12538 pai = (int)pa_index(paddr);
12539
12540 if (doit) {
12541 LOCK_PVH(pai);
12542 #if XNU_MONITOR
12543 if (pa_test_monitor(paddr)) {
12544 panic("%s invoked on PPL page 0x%llx", __func__, (uint64_t)paddr);
12545 }
12546 #endif
12547 }
12548
12549 do {
12550 pp_attr_current = pp_attr_table[pai];
12551 wimg_bits_prev = VM_WIMG_DEFAULT;
12552 if (pp_attr_current & PP_ATTR_WIMG_MASK) {
12553 wimg_bits_prev = pp_attr_current & PP_ATTR_WIMG_MASK;
12554 }
12555
12556 pp_attr_template = (pp_attr_current & ~PP_ATTR_WIMG_MASK) | PP_ATTR_WIMG(cacheattr & (VM_WIMG_MASK));
12557
12558 if (!doit) {
12559 break;
12560 }
12561
12562 /* WIMG bits should only be updated under the PVH lock, but we should do this in a CAS loop
12563 * to avoid losing simultaneous updates to other bits like refmod. */
12564 } while (!OSCompareAndSwap16(pp_attr_current, pp_attr_template, &pp_attr_table[pai]));
12565
12566 wimg_bits_new = VM_WIMG_DEFAULT;
12567 if (pp_attr_template & PP_ATTR_WIMG_MASK) {
12568 wimg_bits_new = pp_attr_template & PP_ATTR_WIMG_MASK;
12569 }
12570
12571 if (doit) {
12572 if (wimg_bits_new != wimg_bits_prev) {
12573 pmap_update_cache_attributes_locked(pn, cacheattr);
12574 }
12575 UNLOCK_PVH(pai);
12576 if ((wimg_bits_new == VM_WIMG_RT) && (wimg_bits_prev != VM_WIMG_RT)) {
12577 pmap_force_dcache_clean(phystokv(paddr), PAGE_SIZE);
12578 }
12579 } else {
12580 if (wimg_bits_new == VM_WIMG_COPYBACK) {
12581 return FALSE;
12582 }
12583 if (wimg_bits_prev == wimg_bits_new) {
12584 pmap_pin_kernel_pages((vm_offset_t)res, sizeof(*res));
12585 *res = *res - 1;
12586 pmap_unpin_kernel_pages((vm_offset_t)res, sizeof(*res));
12587 if (!platform_cache_batch_wimg(wimg_bits_new, (*res) << PAGE_SHIFT)) {
12588 return FALSE;
12589 }
12590 }
12591 return TRUE;
12592 }
12593
12594 if (page_cnt == (page_index + 1)) {
12595 wimg_bits_prev = VM_WIMG_COPYBACK;
12596 if (((wimg_bits_prev != wimg_bits_new))
12597 && ((wimg_bits_prev == VM_WIMG_COPYBACK)
12598 || ((wimg_bits_prev == VM_WIMG_INNERWBACK)
12599 && (wimg_bits_new != VM_WIMG_COPYBACK))
12600 || ((wimg_bits_prev == VM_WIMG_WTHRU)
12601 && ((wimg_bits_new != VM_WIMG_COPYBACK) || (wimg_bits_new != VM_WIMG_INNERWBACK))))) {
12602 platform_cache_flush_wimg(wimg_bits_new);
12603 }
12604 }
12605
12606 return TRUE;
12607 }
12608
12609 boolean_t
12610 pmap_batch_set_cache_attributes(
12611 ppnum_t pn,
12612 unsigned int cacheattr,
12613 unsigned int page_cnt,
12614 unsigned int page_index,
12615 boolean_t doit,
12616 unsigned int *res)
12617 {
12618 #if XNU_MONITOR
12619 return pmap_batch_set_cache_attributes_ppl(pn, cacheattr, page_cnt, page_index, doit, res);
12620 #else
12621 return pmap_batch_set_cache_attributes_internal(pn, cacheattr, page_cnt, page_index, doit, res);
12622 #endif
12623 }
12624
12625 MARK_AS_PMAP_TEXT static void
12626 pmap_set_cache_attributes_priv(
12627 ppnum_t pn,
12628 unsigned int cacheattr,
12629 boolean_t external __unused)
12630 {
12631 pmap_paddr_t paddr;
12632 int pai;
12633 pp_attr_t pp_attr_current;
12634 pp_attr_t pp_attr_template;
12635 unsigned int wimg_bits_prev, wimg_bits_new;
12636
12637 paddr = ptoa(pn);
12638
12639 if (!pa_valid(paddr)) {
12640 return; /* Not a managed page. */
12641 }
12642
12643 if (cacheattr & VM_WIMG_USE_DEFAULT) {
12644 cacheattr = VM_WIMG_DEFAULT;
12645 }
12646
12647 pai = (int)pa_index(paddr);
12648
12649 LOCK_PVH(pai);
12650
12651 #if XNU_MONITOR
12652 if (external && pa_test_monitor(paddr)) {
12653 panic("%s invoked on PPL page 0x%llx", __func__, (uint64_t)paddr);
12654 } else if (!external && !pa_test_monitor(paddr)) {
12655 panic("%s invoked on non-PPL page 0x%llx", __func__, (uint64_t)paddr);
12656 }
12657 #endif
12658
12659 do {
12660 pp_attr_current = pp_attr_table[pai];
12661 wimg_bits_prev = VM_WIMG_DEFAULT;
12662 if (pp_attr_current & PP_ATTR_WIMG_MASK) {
12663 wimg_bits_prev = pp_attr_current & PP_ATTR_WIMG_MASK;
12664 }
12665
12666 pp_attr_template = (pp_attr_current & ~PP_ATTR_WIMG_MASK) | PP_ATTR_WIMG(cacheattr & (VM_WIMG_MASK));
12667
12668 /* WIMG bits should only be updated under the PVH lock, but we should do this in a CAS loop
12669 * to avoid losing simultaneous updates to other bits like refmod. */
12670 } while (!OSCompareAndSwap16(pp_attr_current, pp_attr_template, &pp_attr_table[pai]));
12671
12672 wimg_bits_new = VM_WIMG_DEFAULT;
12673 if (pp_attr_template & PP_ATTR_WIMG_MASK) {
12674 wimg_bits_new = pp_attr_template & PP_ATTR_WIMG_MASK;
12675 }
12676
12677 if (wimg_bits_new != wimg_bits_prev) {
12678 pmap_update_cache_attributes_locked(pn, cacheattr);
12679 }
12680
12681 UNLOCK_PVH(pai);
12682
12683 pmap_sync_wimg(pn, wimg_bits_prev, wimg_bits_new);
12684 }
12685
12686 MARK_AS_PMAP_TEXT static void
12687 pmap_set_cache_attributes_internal(
12688 ppnum_t pn,
12689 unsigned int cacheattr)
12690 {
12691 pmap_set_cache_attributes_priv(pn, cacheattr, TRUE);
12692 }
12693
12694 void
12695 pmap_set_cache_attributes(
12696 ppnum_t pn,
12697 unsigned int cacheattr)
12698 {
12699 #if XNU_MONITOR
12700 pmap_set_cache_attributes_ppl(pn, cacheattr);
12701 #else
12702 pmap_set_cache_attributes_internal(pn, cacheattr);
12703 #endif
12704 }
12705
12706 MARK_AS_PMAP_TEXT void
12707 pmap_update_cache_attributes_locked(
12708 ppnum_t ppnum,
12709 unsigned attributes)
12710 {
12711 pmap_paddr_t phys = ptoa(ppnum);
12712 pv_entry_t *pve_p;
12713 pt_entry_t *pte_p;
12714 pv_entry_t **pv_h;
12715 pt_entry_t tmplate;
12716 unsigned int pai;
12717 boolean_t tlb_flush_needed = FALSE;
12718
12719 PMAP_TRACE(2, PMAP_CODE(PMAP__UPDATE_CACHING) | DBG_FUNC_START, ppnum, attributes);
12720
12721 if (pmap_panic_dev_wimg_on_managed) {
12722 switch (attributes & VM_WIMG_MASK) {
12723 case VM_WIMG_IO: // nGnRnE
12724 case VM_WIMG_POSTED: // nGnRE
12725 /* supported on DRAM, but slow, so we disallow */
12726
12727 case VM_WIMG_POSTED_REORDERED: // nGRE
12728 case VM_WIMG_POSTED_COMBINED_REORDERED: // GRE
12729 /* unsupported on DRAM */
12730
12731 panic("%s: trying to use unsupported VM_WIMG type for managed page, VM_WIMG=%x, ppnum=%#x",
12732 __FUNCTION__, attributes & VM_WIMG_MASK, ppnum);
12733 break;
12734
12735 default:
12736 /* not device type memory, all good */
12737
12738 break;
12739 }
12740 }
12741
12742 #if __ARM_PTE_PHYSMAP__
12743 vm_offset_t kva = phystokv(phys);
12744 pte_p = pmap_pte(kernel_pmap, kva);
12745
12746 tmplate = *pte_p;
12747 tmplate &= ~(ARM_PTE_ATTRINDXMASK | ARM_PTE_SHMASK);
12748 #if XNU_MONITOR
12749 tmplate |= (wimg_to_pte(attributes) & ~ARM_PTE_XPRR_MASK);
12750 #else
12751 tmplate |= wimg_to_pte(attributes);
12752 #endif
12753 #if (__ARM_VMSA__ > 7)
12754 if (tmplate & ARM_PTE_HINT_MASK) {
12755 panic("%s: physical aperture PTE %p has hint bit set, va=%p, pte=0x%llx",
12756 __FUNCTION__, pte_p, (void *)kva, tmplate);
12757 }
12758 #endif
12759 WRITE_PTE_STRONG(pte_p, tmplate);
12760 flush_mmu_tlb_region_asid_async(kva, PAGE_SIZE, kernel_pmap);
12761 tlb_flush_needed = TRUE;
12762 #endif
12763
12764 pai = (unsigned int)pa_index(phys);
12765
12766 pv_h = pai_to_pvh(pai);
12767
12768 pte_p = PT_ENTRY_NULL;
12769 pve_p = PV_ENTRY_NULL;
12770 if (pvh_test_type(pv_h, PVH_TYPE_PTEP)) {
12771 pte_p = pvh_ptep(pv_h);
12772 } else if (pvh_test_type(pv_h, PVH_TYPE_PVEP)) {
12773 pve_p = pvh_list(pv_h);
12774 pte_p = PT_ENTRY_NULL;
12775 }
12776
12777 while ((pve_p != PV_ENTRY_NULL) || (pte_p != PT_ENTRY_NULL)) {
12778 vm_map_address_t va;
12779 pmap_t pmap;
12780
12781 if (pve_p != PV_ENTRY_NULL) {
12782 pte_p = pve_get_ptep(pve_p);
12783 }
12784 #ifdef PVH_FLAG_IOMMU
12785 if ((vm_offset_t)pte_p & PVH_FLAG_IOMMU) {
12786 goto cache_skip_pve;
12787 }
12788 #endif
12789 pmap = ptep_get_pmap(pte_p);
12790 va = ptep_get_va(pte_p);
12791
12792 tmplate = *pte_p;
12793 tmplate &= ~(ARM_PTE_ATTRINDXMASK | ARM_PTE_SHMASK);
12794 tmplate |= pmap_get_pt_ops(pmap)->wimg_to_pte(attributes);
12795
12796 WRITE_PTE_STRONG(pte_p, tmplate);
12797 pmap_get_pt_ops(pmap)->flush_tlb_region_async(va,
12798 pt_attr_page_size(pmap_get_pt_attr(pmap)) * PAGE_RATIO, pmap);
12799 tlb_flush_needed = TRUE;
12800
12801 #ifdef PVH_FLAG_IOMMU
12802 cache_skip_pve:
12803 #endif
12804 pte_p = PT_ENTRY_NULL;
12805 if (pve_p != PV_ENTRY_NULL) {
12806 pve_p = PVE_NEXT_PTR(pve_next(pve_p));
12807 }
12808 }
12809 if (tlb_flush_needed) {
12810 pmap_sync_tlb((attributes & VM_WIMG_MASK) == VM_WIMG_RT);
12811 }
12812
12813 PMAP_TRACE(2, PMAP_CODE(PMAP__UPDATE_CACHING) | DBG_FUNC_END, ppnum, attributes);
12814 }
12815
12816 #if (__ARM_VMSA__ == 7)
12817 void
12818 pmap_create_sharedpages(vm_map_address_t *kernel_data_addr, vm_map_address_t *kernel_text_addr,
12819 vm_map_address_t *user_commpage_addr)
12820 {
12821 pmap_paddr_t pa;
12822 kern_return_t kr;
12823
12824 assert(kernel_data_addr != NULL);
12825 assert(kernel_text_addr != NULL);
12826 assert(user_commpage_addr != NULL);
12827
12828 (void) pmap_pages_alloc_zeroed(&pa, PAGE_SIZE, 0);
12829
12830 kr = pmap_enter(kernel_pmap, _COMM_PAGE_BASE_ADDRESS, atop(pa), VM_PROT_READ | VM_PROT_WRITE, VM_PROT_NONE, VM_WIMG_USE_DEFAULT, TRUE);
12831 assert(kr == KERN_SUCCESS);
12832
12833 *kernel_data_addr = phystokv(pa);
12834 // We don't have PFZ for 32 bit arm, always NULL
12835 *kernel_text_addr = 0;
12836 *user_commpage_addr = 0;
12837 }
12838
12839 #else /* __ARM_VMSA__ == 7 */
12840
12841 static void
12842 pmap_update_tt3e(
12843 pmap_t pmap,
12844 vm_address_t address,
12845 tt_entry_t template)
12846 {
12847 tt_entry_t *ptep, pte;
12848
12849 ptep = pmap_tt3e(pmap, address);
12850 if (ptep == NULL) {
12851 panic("%s: no ptep?\n", __FUNCTION__);
12852 }
12853
12854 pte = *ptep;
12855 pte = tte_to_pa(pte) | template;
12856 WRITE_PTE_STRONG(ptep, pte);
12857 }
12858
12859 /* Note absence of non-global bit */
12860 #define PMAP_COMM_PAGE_PTE_TEMPLATE (ARM_PTE_TYPE_VALID \
12861 | ARM_PTE_ATTRINDX(CACHE_ATTRINDX_WRITEBACK) \
12862 | ARM_PTE_SH(SH_INNER_MEMORY) | ARM_PTE_NX \
12863 | ARM_PTE_PNX | ARM_PTE_AP(AP_RORO) | ARM_PTE_AF)
12864
12865 /* Note absence of non-global bit and no-execute bit. */
12866 #define PMAP_COMM_PAGE_TEXT_PTE_TEMPLATE (ARM_PTE_TYPE_VALID \
12867 | ARM_PTE_ATTRINDX(CACHE_ATTRINDX_WRITEBACK) \
12868 | ARM_PTE_SH(SH_INNER_MEMORY) | ARM_PTE_PNX \
12869 | ARM_PTE_AP(AP_RORO) | ARM_PTE_AF)
12870
12871 void
12872 pmap_create_sharedpages(vm_map_address_t *kernel_data_addr, vm_map_address_t *kernel_text_addr,
12873 vm_map_address_t *user_text_addr)
12874 {
12875 kern_return_t kr;
12876 pmap_paddr_t data_pa = 0; // data address
12877 pmap_paddr_t text_pa = 0; // text address
12878
12879 *kernel_data_addr = 0;
12880 *kernel_text_addr = 0;
12881 *user_text_addr = 0;
12882
12883 #if XNU_MONITOR
12884 data_pa = pmap_alloc_page_for_kern(0);
12885 assert(data_pa);
12886 memset((char *) phystokv(data_pa), 0, PAGE_SIZE);
12887 #if CONFIG_ARM_PFZ
12888 text_pa = pmap_alloc_page_for_kern(0);
12889 assert(text_pa);
12890 memset((char *) phystokv(text_pa), 0, PAGE_SIZE);
12891 #endif
12892
12893 #else /* XNU_MONITOR */
12894 (void) pmap_pages_alloc_zeroed(&data_pa, PAGE_SIZE, 0);
12895 #if CONFIG_ARM_PFZ
12896 (void) pmap_pages_alloc_zeroed(&text_pa, PAGE_SIZE, 0);
12897 #endif
12898
12899 #endif /* XNU_MONITOR */
12900
12901 #ifdef CONFIG_XNUPOST
12902 /*
12903 * The kernel pmap maintains a user accessible mapping of the commpage
12904 * to test PAN.
12905 */
12906 kr = pmap_enter(kernel_pmap, _COMM_HIGH_PAGE64_BASE_ADDRESS, (ppnum_t)atop(data_pa), VM_PROT_READ, VM_PROT_NONE, VM_WIMG_USE_DEFAULT, TRUE);
12907 assert(kr == KERN_SUCCESS);
12908
12909 /*
12910 * This mapping should not be global (as we only expect to reference it
12911 * during testing).
12912 */
12913 pmap_update_tt3e(kernel_pmap, _COMM_HIGH_PAGE64_BASE_ADDRESS, PMAP_COMM_PAGE_PTE_TEMPLATE | ARM_PTE_NG);
12914
12915 #if KASAN
12916 kasan_map_shadow(_COMM_HIGH_PAGE64_BASE_ADDRESS, PAGE_SIZE, true);
12917 #endif
12918 #endif /* CONFIG_XNUPOST */
12919
12920 /*
12921 * In order to avoid burning extra pages on mapping the shared page, we
12922 * create a dedicated pmap for the shared page. We forcibly nest the
12923 * translation tables from this pmap into other pmaps. The level we
12924 * will nest at depends on the MMU configuration (page size, TTBR range,
12925 * etc). Typically, this is at L1 for 4K tasks and L2 for 16K tasks.
12926 *
12927 * Note that this is NOT "the nested pmap" (which is used to nest the
12928 * shared cache).
12929 *
12930 * Note that we update parameters of the entry for our unique needs (NG
12931 * entry, etc.).
12932 */
12933 sharedpage_pmap_default = pmap_create_options(NULL, 0x0, 0);
12934 assert(sharedpage_pmap_default != NULL);
12935
12936 /* The user 64-bit mapping... */
12937 kr = pmap_enter_addr(sharedpage_pmap_default, _COMM_PAGE64_BASE_ADDRESS, data_pa, VM_PROT_READ, VM_PROT_NONE, VM_WIMG_USE_DEFAULT, TRUE);
12938 assert(kr == KERN_SUCCESS);
12939 pmap_update_tt3e(sharedpage_pmap_default, _COMM_PAGE64_BASE_ADDRESS, PMAP_COMM_PAGE_PTE_TEMPLATE);
12940 #if CONFIG_ARM_PFZ
12941 /* User mapping of comm page text section for 64 bit mapping only
12942 *
12943 * We don't insert it into the 32 bit mapping because we don't want 32 bit
12944 * user processes to get this page mapped in, they should never call into
12945 * this page.
12946 *
12947 * The data comm page is in a pre-reserved L3 VA range and the text commpage
12948 * is slid in the same L3 as the data commpage. It is either outside the
12949 * max of user VA or is pre-reserved in the vm_map_exec(). This means that
12950 * it is reserved and unavailable to mach VM for future mappings.
12951 */
12952 const pt_attr_t * const pt_attr = pmap_get_pt_attr(sharedpage_pmap_default);
12953 int num_ptes = pt_attr_leaf_size(pt_attr) >> PTE_SHIFT;
12954
12955 vm_map_address_t commpage_text_va = 0;
12956
12957 do {
12958 int text_leaf_index = random() % num_ptes;
12959
12960 // Generate a VA for the commpage text with the same root and twig index as data
12961 // comm page, but with new leaf index we've just generated.
12962 commpage_text_va = (_COMM_PAGE64_BASE_ADDRESS & ~pt_attr_leaf_index_mask(pt_attr));
12963 commpage_text_va |= (text_leaf_index << pt_attr_leaf_shift(pt_attr));
12964 } while (commpage_text_va == _COMM_PAGE64_BASE_ADDRESS); // Try again if we collide (should be unlikely)
12965
12966 // Assert that this is empty
12967 __assert_only pt_entry_t *ptep = pmap_pte(sharedpage_pmap_default, commpage_text_va);
12968 assert(ptep != PT_ENTRY_NULL);
12969 assert(*ptep == ARM_TTE_EMPTY);
12970
12971 // At this point, we've found the address we want to insert our comm page at
12972 kr = pmap_enter_addr(sharedpage_pmap_default, commpage_text_va, text_pa, VM_PROT_READ | VM_PROT_EXECUTE, VM_PROT_NONE, VM_WIMG_USE_DEFAULT, TRUE);
12973 assert(kr == KERN_SUCCESS);
12974 // Mark it as global page R/X so that it doesn't get thrown out on tlb flush
12975 pmap_update_tt3e(sharedpage_pmap_default, commpage_text_va, PMAP_COMM_PAGE_TEXT_PTE_TEMPLATE);
12976
12977 *user_text_addr = commpage_text_va;
12978 #endif
12979
12980 /* ...and the user 32-bit mapping. */
12981 kr = pmap_enter_addr(sharedpage_pmap_default, _COMM_PAGE32_BASE_ADDRESS, data_pa, VM_PROT_READ, VM_PROT_NONE, VM_WIMG_USE_DEFAULT, TRUE);
12982 assert(kr == KERN_SUCCESS);
12983 pmap_update_tt3e(sharedpage_pmap_default, _COMM_PAGE32_BASE_ADDRESS, PMAP_COMM_PAGE_PTE_TEMPLATE);
12984
12985 #if __ARM_MIXED_PAGE_SIZE__
12986 /**
12987 * To handle 4K tasks a new view/pmap of the shared page is needed. These are a
12988 * new set of page tables that point to the exact same 16K shared page as
12989 * before. Only the first 4K of the 16K shared page is mapped since that's
12990 * the only part that contains relevant data.
12991 */
12992 sharedpage_pmap_4k = pmap_create_options(NULL, 0x0, PMAP_CREATE_FORCE_4K_PAGES);
12993 assert(sharedpage_pmap_4k != NULL);
12994
12995 /* The user 64-bit mapping... */
12996 kr = pmap_enter_addr(sharedpage_pmap_4k, _COMM_PAGE64_BASE_ADDRESS, data_pa, VM_PROT_READ, VM_PROT_NONE, VM_WIMG_USE_DEFAULT, TRUE);
12997 assert(kr == KERN_SUCCESS);
12998 pmap_update_tt3e(sharedpage_pmap_4k, _COMM_PAGE64_BASE_ADDRESS, PMAP_COMM_PAGE_PTE_TEMPLATE);
12999
13000 /* ...and the user 32-bit mapping. */
13001 kr = pmap_enter_addr(sharedpage_pmap_4k, _COMM_PAGE32_BASE_ADDRESS, data_pa, VM_PROT_READ, VM_PROT_NONE, VM_WIMG_USE_DEFAULT, TRUE);
13002 assert(kr == KERN_SUCCESS);
13003 pmap_update_tt3e(sharedpage_pmap_4k, _COMM_PAGE32_BASE_ADDRESS, PMAP_COMM_PAGE_PTE_TEMPLATE);
13004
13005 #endif
13006
13007 /* For manipulation in kernel, go straight to physical page */
13008 *kernel_data_addr = phystokv(data_pa);
13009 *kernel_text_addr = (text_pa) ? phystokv(text_pa) : 0;
13010
13011 return;
13012 }
13013
13014
13015 /*
13016 * Asserts to ensure that the TTEs we nest to map the shared page do not overlap
13017 * with user controlled TTEs for regions that aren't explicitly reserved by the
13018 * VM (e.g., _COMM_PAGE64_NESTING_START/_COMM_PAGE64_BASE_ADDRESS).
13019 */
13020 #if (ARM_PGSHIFT == 14)
13021 static_assert((_COMM_PAGE32_BASE_ADDRESS & ~ARM_TT_L2_OFFMASK) >= VM_MAX_ADDRESS);
13022 #elif (ARM_PGSHIFT == 12)
13023 static_assert((_COMM_PAGE32_BASE_ADDRESS & ~ARM_TT_L1_OFFMASK) >= VM_MAX_ADDRESS);
13024 #else
13025 #error Nested shared page mapping is unsupported on this config
13026 #endif
13027
13028 MARK_AS_PMAP_TEXT static kern_return_t
13029 pmap_insert_sharedpage_internal(
13030 pmap_t pmap)
13031 {
13032 kern_return_t kr = KERN_SUCCESS;
13033 vm_offset_t sharedpage_vaddr;
13034 pt_entry_t *ttep, *src_ttep;
13035 int options = 0;
13036 pmap_t sharedpage_pmap = sharedpage_pmap_default;
13037
13038 const pt_attr_t * const pt_attr = pmap_get_pt_attr(pmap);
13039 const unsigned int sharedpage_level = pt_attr_sharedpage_level(pt_attr);
13040
13041 #if __ARM_MIXED_PAGE_SIZE__
13042 #if !__ARM_16K_PG__
13043 /* The following code assumes that sharedpage_pmap_default is a 16KB pmap. */
13044 #error "pmap_insert_sharedpage_internal requires a 16KB default kernel page size when __ARM_MIXED_PAGE_SIZE__ is enabled"
13045 #endif /* !__ARM_16K_PG__ */
13046
13047 /* Choose the correct shared page pmap to use. */
13048 const uint64_t pmap_page_size = pt_attr_page_size(pt_attr);
13049 if (pmap_page_size == 16384) {
13050 sharedpage_pmap = sharedpage_pmap_default;
13051 } else if (pmap_page_size == 4096) {
13052 sharedpage_pmap = sharedpage_pmap_4k;
13053 } else {
13054 panic("No shared page pmap exists for the wanted page size: %llu", pmap_page_size);
13055 }
13056 #endif /* __ARM_MIXED_PAGE_SIZE__ */
13057
13058 VALIDATE_PMAP(pmap);
13059 #if XNU_MONITOR
13060 options |= PMAP_OPTIONS_NOWAIT;
13061 #endif /* XNU_MONITOR */
13062
13063 #if _COMM_PAGE_AREA_LENGTH != PAGE_SIZE
13064 #error We assume a single page.
13065 #endif
13066
13067 if (pmap_is_64bit(pmap)) {
13068 sharedpage_vaddr = _COMM_PAGE64_BASE_ADDRESS;
13069 } else {
13070 sharedpage_vaddr = _COMM_PAGE32_BASE_ADDRESS;
13071 }
13072
13073
13074 pmap_lock(pmap);
13075
13076 /*
13077 * For 4KB pages, we either "nest" at the level one page table (1GB) or level
13078 * two (2MB) depending on the address space layout. For 16KB pages, each level
13079 * one entry is 64GB, so we must go to the second level entry (32MB) in order
13080 * to "nest".
13081 *
13082 * Note: This is not "nesting" in the shared cache sense. This definition of
13083 * nesting just means inserting pointers to pre-allocated tables inside of
13084 * the passed in pmap to allow us to share page tables (which map the shared
13085 * page) for every task. This saves at least one page of memory per process
13086 * compared to creating new page tables in every process for mapping the
13087 * shared page.
13088 */
13089
13090 /**
13091 * Allocate the twig page tables if needed, and slam a pointer to the shared
13092 * page's tables into place.
13093 */
13094 while ((ttep = pmap_ttne(pmap, sharedpage_level, sharedpage_vaddr)) == TT_ENTRY_NULL) {
13095 pmap_unlock(pmap);
13096
13097 kr = pmap_expand(pmap, sharedpage_vaddr, options, sharedpage_level);
13098
13099 if (kr != KERN_SUCCESS) {
13100 #if XNU_MONITOR
13101 if (kr == KERN_RESOURCE_SHORTAGE) {
13102 return kr;
13103 } else
13104 #endif
13105 {
13106 panic("Failed to pmap_expand for commpage, pmap=%p", pmap);
13107 }
13108 }
13109
13110 pmap_lock(pmap);
13111 }
13112
13113 if (*ttep != ARM_PTE_EMPTY) {
13114 panic("%s: Found something mapped at the commpage address?!", __FUNCTION__);
13115 }
13116
13117 src_ttep = pmap_ttne(sharedpage_pmap, sharedpage_level, sharedpage_vaddr);
13118
13119 *ttep = *src_ttep;
13120 FLUSH_PTE_STRONG(ttep);
13121
13122 pmap_unlock(pmap);
13123
13124 return kr;
13125 }
13126
13127 static void
13128 pmap_unmap_sharedpage(
13129 pmap_t pmap)
13130 {
13131 pt_entry_t *ttep;
13132 vm_offset_t sharedpage_vaddr;
13133 pmap_t sharedpage_pmap = sharedpage_pmap_default;
13134
13135 const pt_attr_t * const pt_attr = pmap_get_pt_attr(pmap);
13136 const unsigned int sharedpage_level = pt_attr_sharedpage_level(pt_attr);
13137
13138 #if __ARM_MIXED_PAGE_SIZE__
13139 #if !__ARM_16K_PG__
13140 /* The following code assumes that sharedpage_pmap_default is a 16KB pmap. */
13141 #error "pmap_unmap_sharedpage requires a 16KB default kernel page size when __ARM_MIXED_PAGE_SIZE__ is enabled"
13142 #endif /* !__ARM_16K_PG__ */
13143
13144 /* Choose the correct shared page pmap to use. */
13145 const uint64_t pmap_page_size = pt_attr_page_size(pt_attr);
13146 if (pmap_page_size == 16384) {
13147 sharedpage_pmap = sharedpage_pmap_default;
13148 } else if (pmap_page_size == 4096) {
13149 sharedpage_pmap = sharedpage_pmap_4k;
13150 } else {
13151 panic("No shared page pmap exists for the wanted page size: %llu", pmap_page_size);
13152 }
13153 #endif /* __ARM_MIXED_PAGE_SIZE__ */
13154
13155 #if _COMM_PAGE_AREA_LENGTH != PAGE_SIZE
13156 #error We assume a single page.
13157 #endif
13158
13159 if (pmap_is_64bit(pmap)) {
13160 sharedpage_vaddr = _COMM_PAGE64_BASE_ADDRESS;
13161 } else {
13162 sharedpage_vaddr = _COMM_PAGE32_BASE_ADDRESS;
13163 }
13164
13165
13166 ttep = pmap_ttne(pmap, sharedpage_level, sharedpage_vaddr);
13167
13168 if (ttep == NULL) {
13169 return;
13170 }
13171
13172 /* It had better be mapped to the shared page. */
13173 if (*ttep != ARM_TTE_EMPTY && *ttep != *pmap_ttne(sharedpage_pmap, sharedpage_level, sharedpage_vaddr)) {
13174 panic("%s: Something other than commpage mapped in shared page slot?", __FUNCTION__);
13175 }
13176
13177 *ttep = ARM_TTE_EMPTY;
13178 FLUSH_PTE_STRONG(ttep);
13179
13180 flush_mmu_tlb_region_asid_async(sharedpage_vaddr, PAGE_SIZE, pmap);
13181 sync_tlb_flush();
13182 }
13183
13184 void
13185 pmap_insert_sharedpage(
13186 pmap_t pmap)
13187 {
13188 #if XNU_MONITOR
13189 kern_return_t kr = KERN_FAILURE;
13190
13191 while ((kr = pmap_insert_sharedpage_ppl(pmap)) == KERN_RESOURCE_SHORTAGE) {
13192 pmap_alloc_page_for_ppl(0);
13193 }
13194
13195 pmap_ledger_check_balance(pmap);
13196
13197 if (kr != KERN_SUCCESS) {
13198 panic("%s: failed to insert the shared page, kr=%d, "
13199 "pmap=%p",
13200 __FUNCTION__, kr,
13201 pmap);
13202 }
13203 #else
13204 pmap_insert_sharedpage_internal(pmap);
13205 #endif
13206 }
13207
13208 static boolean_t
13209 pmap_is_64bit(
13210 pmap_t pmap)
13211 {
13212 return pmap->is_64bit;
13213 }
13214
13215 bool
13216 pmap_is_exotic(
13217 pmap_t pmap __unused)
13218 {
13219 return false;
13220 }
13221
13222 #endif
13223
13224 /* ARMTODO -- an implementation that accounts for
13225 * holes in the physical map, if any.
13226 */
13227 boolean_t
13228 pmap_valid_page(
13229 ppnum_t pn)
13230 {
13231 return pa_valid(ptoa(pn));
13232 }
13233
13234 boolean_t
13235 pmap_bootloader_page(
13236 ppnum_t pn)
13237 {
13238 pmap_paddr_t paddr = ptoa(pn);
13239
13240 if (pa_valid(paddr)) {
13241 return FALSE;
13242 }
13243 pmap_io_range_t *io_rgn = pmap_find_io_attr(paddr);
13244 return (io_rgn != NULL) && (io_rgn->wimg & PMAP_IO_RANGE_CARVEOUT);
13245 }
13246
13247 MARK_AS_PMAP_TEXT static boolean_t
13248 pmap_is_empty_internal(
13249 pmap_t pmap,
13250 vm_map_offset_t va_start,
13251 vm_map_offset_t va_end)
13252 {
13253 vm_map_offset_t block_start, block_end;
13254 tt_entry_t *tte_p;
13255
13256 if (pmap == NULL) {
13257 return TRUE;
13258 }
13259
13260 VALIDATE_PMAP(pmap);
13261
13262 __unused const pt_attr_t * const pt_attr = pmap_get_pt_attr(pmap);
13263 unsigned int initial_not_in_kdp = not_in_kdp;
13264
13265 if ((pmap != kernel_pmap) && (initial_not_in_kdp)) {
13266 pmap_lock_ro(pmap);
13267 }
13268
13269 #if (__ARM_VMSA__ == 7)
13270 if (tte_index(pmap, pt_attr, va_end) >= pmap->tte_index_max) {
13271 if ((pmap != kernel_pmap) && (initial_not_in_kdp)) {
13272 pmap_unlock_ro(pmap);
13273 }
13274 return TRUE;
13275 }
13276 #endif
13277
13278 /* TODO: This will be faster if we increment ttep at each level. */
13279 block_start = va_start;
13280
13281 while (block_start < va_end) {
13282 pt_entry_t *bpte_p, *epte_p;
13283 pt_entry_t *pte_p;
13284
13285 block_end = (block_start + pt_attr_twig_size(pt_attr)) & ~pt_attr_twig_offmask(pt_attr);
13286 if (block_end > va_end) {
13287 block_end = va_end;
13288 }
13289
13290 tte_p = pmap_tte(pmap, block_start);
13291 if ((tte_p != PT_ENTRY_NULL)
13292 && ((*tte_p & ARM_TTE_TYPE_MASK) == ARM_TTE_TYPE_TABLE)) {
13293 pte_p = (pt_entry_t *) ttetokv(*tte_p);
13294 bpte_p = &pte_p[pte_index(pmap, pt_attr, block_start)];
13295 epte_p = &pte_p[pte_index(pmap, pt_attr, block_end)];
13296
13297 for (pte_p = bpte_p; pte_p < epte_p; pte_p++) {
13298 if (*pte_p != ARM_PTE_EMPTY) {
13299 if ((pmap != kernel_pmap) && (initial_not_in_kdp)) {
13300 pmap_unlock_ro(pmap);
13301 }
13302 return FALSE;
13303 }
13304 }
13305 }
13306 block_start = block_end;
13307 }
13308
13309 if ((pmap != kernel_pmap) && (initial_not_in_kdp)) {
13310 pmap_unlock_ro(pmap);
13311 }
13312
13313 return TRUE;
13314 }
13315
13316 boolean_t
13317 pmap_is_empty(
13318 pmap_t pmap,
13319 vm_map_offset_t va_start,
13320 vm_map_offset_t va_end)
13321 {
13322 #if XNU_MONITOR
13323 return pmap_is_empty_ppl(pmap, va_start, va_end);
13324 #else
13325 return pmap_is_empty_internal(pmap, va_start, va_end);
13326 #endif
13327 }
13328
13329 vm_map_offset_t
13330 pmap_max_offset(
13331 boolean_t is64,
13332 unsigned int option)
13333 {
13334 return (is64) ? pmap_max_64bit_offset(option) : pmap_max_32bit_offset(option);
13335 }
13336
13337 vm_map_offset_t
13338 pmap_max_64bit_offset(
13339 __unused unsigned int option)
13340 {
13341 vm_map_offset_t max_offset_ret = 0;
13342
13343 #if defined(__arm64__)
13344 #define ARM64_MIN_MAX_ADDRESS (SHARED_REGION_BASE_ARM64 + SHARED_REGION_SIZE_ARM64 + 0x20000000) // end of shared region + 512MB for various purposes
13345 _Static_assert((ARM64_MIN_MAX_ADDRESS > SHARED_REGION_BASE_ARM64) && (ARM64_MIN_MAX_ADDRESS <= MACH_VM_MAX_ADDRESS),
13346 "Minimum address space size outside allowable range");
13347 const vm_map_offset_t min_max_offset = ARM64_MIN_MAX_ADDRESS; // end of shared region + 512MB for various purposes
13348 if (option == ARM_PMAP_MAX_OFFSET_DEFAULT) {
13349 max_offset_ret = arm64_pmap_max_offset_default;
13350 } else if (option == ARM_PMAP_MAX_OFFSET_MIN) {
13351 max_offset_ret = min_max_offset;
13352 } else if (option == ARM_PMAP_MAX_OFFSET_MAX) {
13353 max_offset_ret = MACH_VM_MAX_ADDRESS;
13354 } else if (option == ARM_PMAP_MAX_OFFSET_DEVICE) {
13355 if (arm64_pmap_max_offset_default) {
13356 max_offset_ret = arm64_pmap_max_offset_default;
13357 } else if (max_mem > 0xC0000000) {
13358 max_offset_ret = min_max_offset + 0x138000000; // Max offset is 13.375GB for devices with > 3GB of memory
13359 } else if (max_mem > 0x40000000) {
13360 max_offset_ret = min_max_offset + 0x38000000; // Max offset is 9.375GB for devices with > 1GB and <= 3GB of memory
13361 } else {
13362 max_offset_ret = min_max_offset;
13363 }
13364 } else if (option == ARM_PMAP_MAX_OFFSET_JUMBO) {
13365 if (arm64_pmap_max_offset_default) {
13366 // Allow the boot-arg to override jumbo size
13367 max_offset_ret = arm64_pmap_max_offset_default;
13368 } else {
13369 max_offset_ret = MACH_VM_MAX_ADDRESS; // Max offset is 64GB for pmaps with special "jumbo" blessing
13370 }
13371 } else {
13372 panic("pmap_max_64bit_offset illegal option 0x%x\n", option);
13373 }
13374
13375 assert(max_offset_ret <= MACH_VM_MAX_ADDRESS);
13376 assert(max_offset_ret >= min_max_offset);
13377 #else
13378 panic("Can't run pmap_max_64bit_offset on non-64bit architectures\n");
13379 #endif
13380
13381 return max_offset_ret;
13382 }
13383
13384 vm_map_offset_t
13385 pmap_max_32bit_offset(
13386 unsigned int option)
13387 {
13388 vm_map_offset_t max_offset_ret = 0;
13389
13390 if (option == ARM_PMAP_MAX_OFFSET_DEFAULT) {
13391 max_offset_ret = arm_pmap_max_offset_default;
13392 } else if (option == ARM_PMAP_MAX_OFFSET_MIN) {
13393 max_offset_ret = 0x80000000;
13394 } else if (option == ARM_PMAP_MAX_OFFSET_MAX) {
13395 max_offset_ret = VM_MAX_ADDRESS;
13396 } else if (option == ARM_PMAP_MAX_OFFSET_DEVICE) {
13397 if (arm_pmap_max_offset_default) {
13398 max_offset_ret = arm_pmap_max_offset_default;
13399 } else if (max_mem > 0x20000000) {
13400 max_offset_ret = 0x80000000;
13401 } else {
13402 max_offset_ret = 0x80000000;
13403 }
13404 } else if (option == ARM_PMAP_MAX_OFFSET_JUMBO) {
13405 max_offset_ret = 0x80000000;
13406 } else {
13407 panic("pmap_max_32bit_offset illegal option 0x%x\n", option);
13408 }
13409
13410 assert(max_offset_ret <= MACH_VM_MAX_ADDRESS);
13411 return max_offset_ret;
13412 }
13413
13414 #if CONFIG_DTRACE
13415 /*
13416 * Constrain DTrace copyin/copyout actions
13417 */
13418 extern kern_return_t dtrace_copyio_preflight(addr64_t);
13419 extern kern_return_t dtrace_copyio_postflight(addr64_t);
13420
13421 kern_return_t
13422 dtrace_copyio_preflight(
13423 __unused addr64_t va)
13424 {
13425 if (current_map() == kernel_map) {
13426 return KERN_FAILURE;
13427 } else {
13428 return KERN_SUCCESS;
13429 }
13430 }
13431
13432 kern_return_t
13433 dtrace_copyio_postflight(
13434 __unused addr64_t va)
13435 {
13436 return KERN_SUCCESS;
13437 }
13438 #endif /* CONFIG_DTRACE */
13439
13440
13441 void
13442 pmap_flush_context_init(__unused pmap_flush_context *pfc)
13443 {
13444 }
13445
13446
13447 void
13448 pmap_flush(
13449 __unused pmap_flush_context *cpus_to_flush)
13450 {
13451 /* not implemented yet */
13452 return;
13453 }
13454
13455 #if XNU_MONITOR
13456
13457 /*
13458 * Enforce that the address range described by kva and nbytes is not currently
13459 * PPL-owned, and won't become PPL-owned while pinned. This is to prevent
13460 * unintentionally writing to PPL-owned memory.
13461 */
13462 static void
13463 pmap_pin_kernel_pages(vm_offset_t kva, size_t nbytes)
13464 {
13465 vm_offset_t end;
13466 if (os_add_overflow(kva, nbytes, &end)) {
13467 panic("%s(%p, 0x%llx): overflow", __func__, (void*)kva, (uint64_t)nbytes);
13468 }
13469 for (vm_offset_t ckva = kva; ckva < end; ckva = round_page(ckva + 1)) {
13470 pmap_paddr_t pa = kvtophys(ckva);
13471 if (!pa_valid(pa)) {
13472 panic("%s(%p): invalid physical page 0x%llx", __func__, (void*)kva, (uint64_t)pa);
13473 }
13474 pp_attr_t attr;
13475 unsigned int pai = (unsigned int)pa_index(pa);
13476 if (ckva == phystokv(pa)) {
13477 panic("%s(%p): attempt to pin static mapping for page 0x%llx", __func__, (void*)kva, (uint64_t)pa);
13478 }
13479 do {
13480 attr = pp_attr_table[pai] & ~PP_ATTR_NO_MONITOR;
13481 if (attr & PP_ATTR_MONITOR) {
13482 panic("%s(%p): physical page 0x%llx belongs to PPL", __func__, (void*)kva, (uint64_t)pa);
13483 }
13484 } while (!OSCompareAndSwap16(attr, attr | PP_ATTR_NO_MONITOR, &pp_attr_table[pai]));
13485 }
13486 }
13487
13488 static void
13489 pmap_unpin_kernel_pages(vm_offset_t kva, size_t nbytes)
13490 {
13491 vm_offset_t end;
13492 if (os_add_overflow(kva, nbytes, &end)) {
13493 panic("%s(%p, 0x%llx): overflow", __func__, (void*)kva, (uint64_t)nbytes);
13494 }
13495 for (vm_offset_t ckva = kva; ckva < end; ckva = round_page(ckva + 1)) {
13496 pmap_paddr_t pa = kvtophys(ckva);
13497 if (!pa_valid(pa)) {
13498 panic("%s(%p): invalid physical page 0x%llx", __func__, (void*)kva, (uint64_t)pa);
13499 }
13500 if (!(pp_attr_table[pa_index(pa)] & PP_ATTR_NO_MONITOR)) {
13501 panic("%s(%p): physical page 0x%llx not pinned", __func__, (void*)kva, (uint64_t)pa);
13502 }
13503 assert(!(pp_attr_table[pa_index(pa)] & PP_ATTR_MONITOR));
13504 pa_clear_no_monitor(pa);
13505 }
13506 }
13507
13508 /*
13509 * Lock down a page, making all mappings read-only, and preventing
13510 * further mappings or removal of this particular kva's mapping.
13511 * Effectively, it makes the page at kva immutable.
13512 */
13513 MARK_AS_PMAP_TEXT static void
13514 pmap_ppl_lockdown_page(vm_address_t kva)
13515 {
13516 pmap_paddr_t pa = kvtophys(kva);
13517 unsigned int pai = (unsigned int)pa_index(pa);
13518 LOCK_PVH(pai);
13519 pv_entry_t **pv_h = pai_to_pvh(pai);
13520
13521 if (__improbable(pa_test_monitor(pa))) {
13522 panic("%#lx: page %llx belongs to PPL", kva, pa);
13523 }
13524
13525 if (__improbable(pvh_get_flags(pv_h) & (PVH_FLAG_LOCKDOWN | PVH_FLAG_EXEC))) {
13526 panic("%#lx: already locked down/executable (%#llx)", kva, pvh_get_flags(pv_h));
13527 }
13528
13529 pt_entry_t *pte_p = pmap_pte(kernel_pmap, kva);
13530
13531 if (pte_p == PT_ENTRY_NULL) {
13532 panic("%#lx: NULL pte", kva);
13533 }
13534
13535 pt_entry_t tmplate = *pte_p;
13536 if (__improbable((tmplate & ARM_PTE_APMASK) != ARM_PTE_AP(AP_RWNA))) {
13537 panic("%#lx: not a kernel r/w page (%#llx)", kva, tmplate & ARM_PTE_APMASK);
13538 }
13539
13540 pvh_set_flags(pv_h, pvh_get_flags(pv_h) | PVH_FLAG_LOCKDOWN);
13541
13542 pmap_set_ptov_ap(pai, AP_RONA, FALSE);
13543
13544 UNLOCK_PVH(pai);
13545
13546 pmap_page_protect_options_internal((ppnum_t)atop(pa), VM_PROT_READ, 0, NULL);
13547 }
13548
13549 /*
13550 * Release a page from being locked down to the PPL, making it writable
13551 * to the kernel once again.
13552 */
13553 MARK_AS_PMAP_TEXT static void
13554 pmap_ppl_unlockdown_page(vm_address_t kva)
13555 {
13556 pmap_paddr_t pa = kvtophys(kva);
13557 unsigned int pai = (unsigned int)pa_index(pa);
13558 LOCK_PVH(pai);
13559 pv_entry_t **pv_h = pai_to_pvh(pai);
13560
13561 vm_offset_t pvh_flags = pvh_get_flags(pv_h);
13562
13563 if (__improbable(!(pvh_flags & PVH_FLAG_LOCKDOWN))) {
13564 panic("unlockdown attempt on not locked down virtual %#lx/pai %d", kva, pai);
13565 }
13566
13567 pvh_set_flags(pv_h, pvh_flags & ~PVH_FLAG_LOCKDOWN);
13568 pmap_set_ptov_ap(pai, AP_RWNA, FALSE);
13569 UNLOCK_PVH(pai);
13570 }
13571
13572 #else /* XNU_MONITOR */
13573
13574 static void __unused
13575 pmap_pin_kernel_pages(vm_offset_t kva __unused, size_t nbytes __unused)
13576 {
13577 }
13578
13579 static void __unused
13580 pmap_unpin_kernel_pages(vm_offset_t kva __unused, size_t nbytes __unused)
13581 {
13582 }
13583
13584 #endif /* !XNU_MONITOR */
13585
13586
13587 #define PMAP_RESIDENT_INVALID ((mach_vm_size_t)-1)
13588
13589 MARK_AS_PMAP_TEXT static mach_vm_size_t
13590 pmap_query_resident_internal(
13591 pmap_t pmap,
13592 vm_map_address_t start,
13593 vm_map_address_t end,
13594 mach_vm_size_t *compressed_bytes_p)
13595 {
13596 mach_vm_size_t resident_bytes = 0;
13597 mach_vm_size_t compressed_bytes = 0;
13598
13599 pt_entry_t *bpte, *epte;
13600 pt_entry_t *pte_p;
13601 tt_entry_t *tte_p;
13602
13603 if (pmap == NULL) {
13604 return PMAP_RESIDENT_INVALID;
13605 }
13606
13607 VALIDATE_PMAP(pmap);
13608
13609 const pt_attr_t * const pt_attr = pmap_get_pt_attr(pmap);
13610
13611 /* Ensure that this request is valid, and addresses exactly one TTE. */
13612 if (__improbable((start % pt_attr_page_size(pt_attr)) ||
13613 (end % pt_attr_page_size(pt_attr)))) {
13614 panic("%s: address range %p, %p not page-aligned to 0x%llx", __func__, (void*)start, (void*)end, pt_attr_page_size(pt_attr));
13615 }
13616
13617 if (__improbable((end < start) || (end > ((start + pt_attr_twig_size(pt_attr)) & ~pt_attr_twig_offmask(pt_attr))))) {
13618 panic("%s: invalid address range %p, %p", __func__, (void*)start, (void*)end);
13619 }
13620
13621 pmap_lock_ro(pmap);
13622 tte_p = pmap_tte(pmap, start);
13623 if (tte_p == (tt_entry_t *) NULL) {
13624 pmap_unlock_ro(pmap);
13625 return PMAP_RESIDENT_INVALID;
13626 }
13627 if ((*tte_p & ARM_TTE_TYPE_MASK) == ARM_TTE_TYPE_TABLE) {
13628 pte_p = (pt_entry_t *) ttetokv(*tte_p);
13629 bpte = &pte_p[pte_index(pmap, pt_attr, start)];
13630 epte = &pte_p[pte_index(pmap, pt_attr, end)];
13631
13632 for (; bpte < epte; bpte++) {
13633 if (ARM_PTE_IS_COMPRESSED(*bpte, bpte)) {
13634 compressed_bytes += pt_attr_page_size(pt_attr);
13635 } else if (pa_valid(pte_to_pa(*bpte))) {
13636 resident_bytes += pt_attr_page_size(pt_attr);
13637 }
13638 }
13639 }
13640 pmap_unlock_ro(pmap);
13641
13642 if (compressed_bytes_p) {
13643 pmap_pin_kernel_pages((vm_offset_t)compressed_bytes_p, sizeof(*compressed_bytes_p));
13644 *compressed_bytes_p += compressed_bytes;
13645 pmap_unpin_kernel_pages((vm_offset_t)compressed_bytes_p, sizeof(*compressed_bytes_p));
13646 }
13647
13648 return resident_bytes;
13649 }
13650
13651 mach_vm_size_t
13652 pmap_query_resident(
13653 pmap_t pmap,
13654 vm_map_address_t start,
13655 vm_map_address_t end,
13656 mach_vm_size_t *compressed_bytes_p)
13657 {
13658 mach_vm_size_t total_resident_bytes;
13659 mach_vm_size_t compressed_bytes;
13660 vm_map_address_t va;
13661
13662
13663 if (pmap == PMAP_NULL) {
13664 if (compressed_bytes_p) {
13665 *compressed_bytes_p = 0;
13666 }
13667 return 0;
13668 }
13669
13670 __unused const pt_attr_t * const pt_attr = pmap_get_pt_attr(pmap);
13671
13672 total_resident_bytes = 0;
13673 compressed_bytes = 0;
13674
13675 PMAP_TRACE(3, PMAP_CODE(PMAP__QUERY_RESIDENT) | DBG_FUNC_START,
13676 VM_KERNEL_ADDRHIDE(pmap), VM_KERNEL_ADDRHIDE(start),
13677 VM_KERNEL_ADDRHIDE(end));
13678
13679 va = start;
13680 while (va < end) {
13681 vm_map_address_t l;
13682 mach_vm_size_t resident_bytes;
13683
13684 l = ((va + pt_attr_twig_size(pt_attr)) & ~pt_attr_twig_offmask(pt_attr));
13685
13686 if (l > end) {
13687 l = end;
13688 }
13689 #if XNU_MONITOR
13690 resident_bytes = pmap_query_resident_ppl(pmap, va, l, compressed_bytes_p);
13691 #else
13692 resident_bytes = pmap_query_resident_internal(pmap, va, l, compressed_bytes_p);
13693 #endif
13694 if (resident_bytes == PMAP_RESIDENT_INVALID) {
13695 break;
13696 }
13697
13698 total_resident_bytes += resident_bytes;
13699
13700 va = l;
13701 }
13702
13703 if (compressed_bytes_p) {
13704 *compressed_bytes_p = compressed_bytes;
13705 }
13706
13707 PMAP_TRACE(3, PMAP_CODE(PMAP__QUERY_RESIDENT) | DBG_FUNC_END,
13708 total_resident_bytes);
13709
13710 return total_resident_bytes;
13711 }
13712
13713 #if MACH_ASSERT
13714 static void
13715 pmap_check_ledgers(
13716 pmap_t pmap)
13717 {
13718 int pid;
13719 char *procname;
13720
13721 if (pmap->pmap_pid == 0) {
13722 /*
13723 * This pmap was not or is no longer fully associated
13724 * with a task (e.g. the old pmap after a fork()/exec() or
13725 * spawn()). Its "ledger" still points at a task that is
13726 * now using a different (and active) address space, so
13727 * we can't check that all the pmap ledgers are balanced here.
13728 *
13729 * If the "pid" is set, that means that we went through
13730 * pmap_set_process() in task_terminate_internal(), so
13731 * this task's ledger should not have been re-used and
13732 * all the pmap ledgers should be back to 0.
13733 */
13734 return;
13735 }
13736
13737 pid = pmap->pmap_pid;
13738 procname = pmap->pmap_procname;
13739
13740 vm_map_pmap_check_ledgers(pmap, pmap->ledger, pid, procname);
13741
13742 PMAP_STATS_ASSERTF(pmap->stats.resident_count == 0, pmap, "stats.resident_count %d", pmap->stats.resident_count);
13743 #if 00
13744 PMAP_STATS_ASSERTF(pmap->stats.wired_count == 0, pmap, "stats.wired_count %d", pmap->stats.wired_count);
13745 #endif
13746 PMAP_STATS_ASSERTF(pmap->stats.device == 0, pmap, "stats.device %d", pmap->stats.device);
13747 PMAP_STATS_ASSERTF(pmap->stats.internal == 0, pmap, "stats.internal %d", pmap->stats.internal);
13748 PMAP_STATS_ASSERTF(pmap->stats.external == 0, pmap, "stats.external %d", pmap->stats.external);
13749 PMAP_STATS_ASSERTF(pmap->stats.reusable == 0, pmap, "stats.reusable %d", pmap->stats.reusable);
13750 PMAP_STATS_ASSERTF(pmap->stats.compressed == 0, pmap, "stats.compressed %lld", pmap->stats.compressed);
13751 }
13752 #endif /* MACH_ASSERT */
13753
13754 void
13755 pmap_advise_pagezero_range(__unused pmap_t p, __unused uint64_t a)
13756 {
13757 }
13758
13759
13760 #if CONFIG_PGTRACE
13761 #define PROF_START uint64_t t, nanot;\
13762 t = mach_absolute_time();
13763
13764 #define PROF_END absolutetime_to_nanoseconds(mach_absolute_time()-t, &nanot);\
13765 kprintf("%s: took %llu ns\n", __func__, nanot);
13766
13767 #define PMAP_PGTRACE_LOCK(p) \
13768 do { \
13769 *(p) = pmap_interrupts_disable(); \
13770 if (simple_lock_try(&(pmap_pgtrace.lock), LCK_GRP_NULL)) break; \
13771 pmap_interrupts_restore(*(p)); \
13772 } while (true)
13773
13774 #define PMAP_PGTRACE_UNLOCK(p) \
13775 do { \
13776 simple_unlock(&(pmap_pgtrace.lock)); \
13777 pmap_interrupts_restore(*(p)); \
13778 } while (0)
13779
13780 #define PGTRACE_WRITE_PTE(pte_p, pte_entry) \
13781 do { \
13782 *(pte_p) = (pte_entry); \
13783 FLUSH_PTE(pte_p); \
13784 } while (0)
13785
13786 #define PGTRACE_MAX_MAP 16 // maximum supported va to same pa
13787
13788 typedef enum {
13789 UNDEFINED,
13790 PA_UNDEFINED,
13791 VA_UNDEFINED,
13792 DEFINED
13793 } pmap_pgtrace_page_state_t;
13794
13795 typedef struct {
13796 queue_chain_t chain;
13797
13798 /*
13799 * pa - pa
13800 * maps - list of va maps to upper pa
13801 * map_pool - map pool
13802 * map_waste - waste can
13803 * state - state
13804 */
13805 pmap_paddr_t pa;
13806 queue_head_t maps;
13807 queue_head_t map_pool;
13808 queue_head_t map_waste;
13809 pmap_pgtrace_page_state_t state;
13810 } pmap_pgtrace_page_t;
13811
13812 typedef struct {
13813 queue_chain_t chain;
13814 pmap_t pmap;
13815 vm_map_offset_t va;
13816 } pmap_va_t;
13817
13818 static ZONE_VIEW_DEFINE(ZV_PMAP_VA, "pmap va",
13819 KHEAP_ID_DEFAULT, sizeof(pmap_va_t));
13820
13821 static ZONE_VIEW_DEFINE(ZV_PMAP_PGTRACE, "pmap pgtrace",
13822 KHEAP_ID_DEFAULT, sizeof(pmap_pgtrace_page_t));
13823
13824 static struct {
13825 /*
13826 * pages - list of tracing page info
13827 */
13828 queue_head_t pages;
13829 decl_simple_lock_data(, lock);
13830 } pmap_pgtrace = {};
13831
13832 static void
13833 pmap_pgtrace_init(void)
13834 {
13835 queue_init(&(pmap_pgtrace.pages));
13836 simple_lock_init(&(pmap_pgtrace.lock), 0);
13837
13838 boolean_t enabled;
13839
13840 if (PE_parse_boot_argn("pgtrace", &enabled, sizeof(enabled))) {
13841 pgtrace_enabled = enabled;
13842 }
13843 }
13844
13845 // find a page with given pa - pmap_pgtrace should be locked
13846 inline static pmap_pgtrace_page_t *
13847 pmap_pgtrace_find_page(pmap_paddr_t pa)
13848 {
13849 queue_head_t *q = &(pmap_pgtrace.pages);
13850 pmap_pgtrace_page_t *p;
13851
13852 queue_iterate(q, p, pmap_pgtrace_page_t *, chain) {
13853 if (p->state == UNDEFINED) {
13854 continue;
13855 }
13856 if (p->state == PA_UNDEFINED) {
13857 continue;
13858 }
13859 if (p->pa == pa) {
13860 return p;
13861 }
13862 }
13863
13864 return NULL;
13865 }
13866
13867 // enter clone of given pmap, va page and range - pmap should be locked
13868 static bool
13869 pmap_pgtrace_enter_clone(pmap_t pmap, vm_map_offset_t va_page, vm_map_offset_t start, vm_map_offset_t end)
13870 {
13871 uint64_t ints;
13872 queue_head_t *q = &(pmap_pgtrace.pages);
13873 pmap_paddr_t pa_page;
13874 pt_entry_t *ptep, *cptep;
13875 pmap_pgtrace_page_t *p;
13876 bool found = false;
13877
13878 pmap_assert_locked_w(pmap);
13879 assert(va_page == arm_trunc_page(va_page));
13880
13881 PMAP_PGTRACE_LOCK(&ints);
13882
13883 ptep = pmap_pte(pmap, va_page);
13884
13885 // target pte should exist
13886 if (!ptep || !(*ptep & ARM_PTE_TYPE_VALID)) {
13887 PMAP_PGTRACE_UNLOCK(&ints);
13888 return false;
13889 }
13890
13891 queue_head_t *mapq;
13892 queue_head_t *mappool;
13893 pmap_pgtrace_map_t *map = NULL;
13894
13895 pa_page = pte_to_pa(*ptep);
13896
13897 // find if we have a page info defined for this
13898 queue_iterate(q, p, pmap_pgtrace_page_t *, chain) {
13899 mapq = &(p->maps);
13900 mappool = &(p->map_pool);
13901
13902 switch (p->state) {
13903 case PA_UNDEFINED:
13904 queue_iterate(mapq, map, pmap_pgtrace_map_t *, chain) {
13905 if (map->cloned == false && map->pmap == pmap && map->ova == va_page) {
13906 p->pa = pa_page;
13907 map->range.start = start;
13908 map->range.end = end;
13909 found = true;
13910 break;
13911 }
13912 }
13913 break;
13914
13915 case VA_UNDEFINED:
13916 if (p->pa != pa_page) {
13917 break;
13918 }
13919 queue_iterate(mapq, map, pmap_pgtrace_map_t *, chain) {
13920 if (map->cloned == false) {
13921 map->pmap = pmap;
13922 map->ova = va_page;
13923 map->range.start = start;
13924 map->range.end = end;
13925 found = true;
13926 break;
13927 }
13928 }
13929 break;
13930
13931 case DEFINED:
13932 if (p->pa != pa_page) {
13933 break;
13934 }
13935 queue_iterate(mapq, map, pmap_pgtrace_map_t *, chain) {
13936 if (map->cloned == true && map->pmap == pmap && map->ova == va_page) {
13937 kprintf("%s: skip existing mapping at va=%llx\n", __func__, va_page);
13938 break;
13939 } else if (map->cloned == true && map->pmap == kernel_pmap && map->cva[1] == va_page) {
13940 kprintf("%s: skip clone mapping at va=%llx\n", __func__, va_page);
13941 break;
13942 } else if (map->cloned == false && map->pmap == pmap && map->ova == va_page) {
13943 // range should be already defined as well
13944 found = true;
13945 break;
13946 }
13947 }
13948 break;
13949
13950 default:
13951 panic("invalid state p->state=%x\n", p->state);
13952 }
13953
13954 if (found == true) {
13955 break;
13956 }
13957 }
13958
13959 // do not clone if no page info found
13960 if (found == false) {
13961 PMAP_PGTRACE_UNLOCK(&ints);
13962 return false;
13963 }
13964
13965 // copy pre, target and post ptes to clone ptes
13966 for (int i = 0; i < 3; i++) {
13967 ptep = pmap_pte(pmap, va_page + (i - 1) * ARM_PGBYTES);
13968 cptep = pmap_pte(kernel_pmap, map->cva[i]);
13969 assert(cptep != NULL);
13970 if (ptep == NULL) {
13971 PGTRACE_WRITE_PTE(cptep, (pt_entry_t)NULL);
13972 } else {
13973 PGTRACE_WRITE_PTE(cptep, *ptep);
13974 }
13975 PMAP_UPDATE_TLBS(kernel_pmap, map->cva[i], map->cva[i] + ARM_PGBYTES, false);
13976 }
13977
13978 // get ptes for original and clone
13979 ptep = pmap_pte(pmap, va_page);
13980 cptep = pmap_pte(kernel_pmap, map->cva[1]);
13981
13982 // invalidate original pte and mark it as a pgtrace page
13983 PGTRACE_WRITE_PTE(ptep, (*ptep | ARM_PTE_PGTRACE) & ~ARM_PTE_TYPE_VALID);
13984 PMAP_UPDATE_TLBS(pmap, map->ova, map->ova + ARM_PGBYTES, false);
13985
13986 map->cloned = true;
13987 p->state = DEFINED;
13988
13989 kprintf("%s: pa_page=%llx va_page=%llx cva[1]=%llx pmap=%p ptep=%p cptep=%p\n", __func__, pa_page, va_page, map->cva[1], pmap, ptep, cptep);
13990
13991 PMAP_PGTRACE_UNLOCK(&ints);
13992
13993 return true;
13994 }
13995
13996 // This function removes trace bit and validate pte if applicable. Pmap must be locked.
13997 static void
13998 pmap_pgtrace_remove_clone(pmap_t pmap, pmap_paddr_t pa, vm_map_offset_t va)
13999 {
14000 uint64_t ints, found = false;
14001 pmap_pgtrace_page_t *p;
14002 pt_entry_t *ptep;
14003
14004 PMAP_PGTRACE_LOCK(&ints);
14005
14006 // we must have this page info
14007 p = pmap_pgtrace_find_page(pa);
14008 if (p == NULL) {
14009 goto unlock_exit;
14010 }
14011
14012 // find matching map
14013 queue_head_t *mapq = &(p->maps);
14014 queue_head_t *mappool = &(p->map_pool);
14015 pmap_pgtrace_map_t *map;
14016
14017 queue_iterate(mapq, map, pmap_pgtrace_map_t *, chain) {
14018 if (map->pmap == pmap && map->ova == va) {
14019 found = true;
14020 break;
14021 }
14022 }
14023
14024 if (!found) {
14025 goto unlock_exit;
14026 }
14027
14028 if (map->cloned == true) {
14029 // Restore back the pte to original state
14030 ptep = pmap_pte(pmap, map->ova);
14031 assert(ptep);
14032 PGTRACE_WRITE_PTE(ptep, *ptep | ARM_PTE_TYPE_VALID);
14033 PMAP_UPDATE_TLBS(pmap, va, va + ARM_PGBYTES, false);
14034
14035 // revert clone pages
14036 for (int i = 0; i < 3; i++) {
14037 ptep = pmap_pte(kernel_pmap, map->cva[i]);
14038 assert(ptep != NULL);
14039 PGTRACE_WRITE_PTE(ptep, map->cva_spte[i]);
14040 PMAP_UPDATE_TLBS(kernel_pmap, map->cva[i], map->cva[i] + ARM_PGBYTES, false);
14041 }
14042 }
14043
14044 queue_remove(mapq, map, pmap_pgtrace_map_t *, chain);
14045 map->pmap = NULL;
14046 map->ova = (vm_map_offset_t)NULL;
14047 map->cloned = false;
14048 queue_enter_first(mappool, map, pmap_pgtrace_map_t *, chain);
14049
14050 kprintf("%s: p=%p pa=%llx va=%llx\n", __func__, p, pa, va);
14051
14052 unlock_exit:
14053 PMAP_PGTRACE_UNLOCK(&ints);
14054 }
14055
14056 // remove all clones of given pa - pmap must be locked
14057 static void
14058 pmap_pgtrace_remove_all_clone(pmap_paddr_t pa)
14059 {
14060 uint64_t ints;
14061 pmap_pgtrace_page_t *p;
14062 pt_entry_t *ptep;
14063
14064 PMAP_PGTRACE_LOCK(&ints);
14065
14066 // we must have this page info
14067 p = pmap_pgtrace_find_page(pa);
14068 if (p == NULL) {
14069 PMAP_PGTRACE_UNLOCK(&ints);
14070 return;
14071 }
14072
14073 queue_head_t *mapq = &(p->maps);
14074 queue_head_t *mappool = &(p->map_pool);
14075 queue_head_t *mapwaste = &(p->map_waste);
14076 pmap_pgtrace_map_t *map;
14077
14078 // move maps to waste
14079 while (!queue_empty(mapq)) {
14080 queue_remove_first(mapq, map, pmap_pgtrace_map_t *, chain);
14081 queue_enter_first(mapwaste, map, pmap_pgtrace_map_t*, chain);
14082 }
14083
14084 PMAP_PGTRACE_UNLOCK(&ints);
14085
14086 // sanitize maps in waste
14087 queue_iterate(mapwaste, map, pmap_pgtrace_map_t *, chain) {
14088 if (map->cloned == true) {
14089 pmap_lock(map->pmap);
14090
14091 // restore back original pte
14092 ptep = pmap_pte(map->pmap, map->ova);
14093 assert(ptep);
14094 PGTRACE_WRITE_PTE(ptep, *ptep | ARM_PTE_TYPE_VALID);
14095 PMAP_UPDATE_TLBS(map->pmap, map->ova, map->ova + ARM_PGBYTES, false);
14096
14097 // revert clone ptes
14098 for (int i = 0; i < 3; i++) {
14099 ptep = pmap_pte(kernel_pmap, map->cva[i]);
14100 assert(ptep != NULL);
14101 PGTRACE_WRITE_PTE(ptep, map->cva_spte[i]);
14102 PMAP_UPDATE_TLBS(kernel_pmap, map->cva[i], map->cva[i] + ARM_PGBYTES, false);
14103 }
14104
14105 pmap_unlock(map->pmap);
14106 }
14107
14108 map->pmap = NULL;
14109 map->ova = (vm_map_offset_t)NULL;
14110 map->cloned = false;
14111 }
14112
14113 PMAP_PGTRACE_LOCK(&ints);
14114
14115 // recycle maps back to map_pool
14116 while (!queue_empty(mapwaste)) {
14117 queue_remove_first(mapwaste, map, pmap_pgtrace_map_t *, chain);
14118 queue_enter_first(mappool, map, pmap_pgtrace_map_t*, chain);
14119 }
14120
14121 PMAP_PGTRACE_UNLOCK(&ints);
14122 }
14123
14124 inline static void
14125 pmap_pgtrace_get_search_space(pmap_t pmap, vm_map_offset_t *startp, vm_map_offset_t *endp)
14126 {
14127 uint64_t tsz;
14128 vm_map_offset_t end;
14129
14130 if (pmap == kernel_pmap) {
14131 tsz = (get_tcr() >> TCR_T1SZ_SHIFT) & TCR_TSZ_MASK;
14132 *startp = MAX(VM_MIN_KERNEL_ADDRESS, (UINT64_MAX >> (64 - tsz)) << (64 - tsz));
14133 *endp = VM_MAX_KERNEL_ADDRESS;
14134 } else {
14135 tsz = (get_tcr() >> TCR_T0SZ_SHIFT) & TCR_TSZ_MASK;
14136 if (tsz == 64) {
14137 end = 0;
14138 } else {
14139 end = ((uint64_t)1 << (64 - tsz)) - 1;
14140 }
14141
14142 *startp = 0;
14143 *endp = end;
14144 }
14145
14146 assert(*endp > *startp);
14147
14148 return;
14149 }
14150
14151 // has pa mapped in given pmap? then clone it
14152 static uint64_t
14153 pmap_pgtrace_clone_from_pa(pmap_t pmap, pmap_paddr_t pa, vm_map_offset_t start_offset, vm_map_offset_t end_offset)
14154 {
14155 uint64_t ret = 0;
14156 vm_map_offset_t min, max;
14157 vm_map_offset_t cur_page, end_page;
14158 pt_entry_t *ptep;
14159 tt_entry_t *ttep;
14160 tt_entry_t tte;
14161 __unused const pt_attr_t * const pt_attr = pmap_get_pt_attr(pmap);
14162
14163 pmap_pgtrace_get_search_space(pmap, &min, &max);
14164
14165 cur_page = arm_trunc_page(min);
14166 end_page = arm_trunc_page(max);
14167 while (cur_page <= end_page) {
14168 vm_map_offset_t add = 0;
14169
14170 pmap_lock(pmap);
14171
14172 // skip uninterested space
14173 if (pmap == kernel_pmap &&
14174 ((vm_kernel_base <= cur_page && cur_page < vm_kernel_top) ||
14175 (vm_kext_base <= cur_page && cur_page < vm_kext_top))) {
14176 add = ARM_PGBYTES;
14177 goto unlock_continue;
14178 }
14179
14180 // check whether we can skip l1
14181 ttep = pmap_tt1e(pmap, cur_page);
14182 assert(ttep);
14183 tte = *ttep;
14184 if ((tte & (ARM_TTE_TYPE_MASK | ARM_TTE_VALID)) != (ARM_TTE_TYPE_TABLE | ARM_TTE_VALID)) {
14185 add = ARM_TT_L1_SIZE;
14186 goto unlock_continue;
14187 }
14188
14189 // how about l2
14190 tte = ((tt_entry_t*) phystokv(tte & ARM_TTE_TABLE_MASK))[tt2_index(pmap, pt_attr, cur_page)];
14191
14192 if ((tte & (ARM_TTE_TYPE_MASK | ARM_TTE_VALID)) != (ARM_TTE_TYPE_TABLE | ARM_TTE_VALID)) {
14193 add = ARM_TT_L2_SIZE;
14194 goto unlock_continue;
14195 }
14196
14197 // ptep finally
14198 ptep = &(((pt_entry_t*) phystokv(tte & ARM_TTE_TABLE_MASK))[tt3_index(pmap, pt_attr, cur_page)]);
14199 if (ptep == PT_ENTRY_NULL) {
14200 add = ARM_TT_L3_SIZE;
14201 goto unlock_continue;
14202 }
14203
14204 if (arm_trunc_page(pa) == pte_to_pa(*ptep)) {
14205 if (pmap_pgtrace_enter_clone(pmap, cur_page, start_offset, end_offset) == true) {
14206 ret++;
14207 }
14208 }
14209
14210 add = ARM_PGBYTES;
14211
14212 unlock_continue:
14213 pmap_unlock(pmap);
14214
14215 //overflow
14216 if (cur_page + add < cur_page) {
14217 break;
14218 }
14219
14220 cur_page += add;
14221 }
14222
14223
14224 return ret;
14225 }
14226
14227 // search pv table and clone vas of given pa
14228 static uint64_t
14229 pmap_pgtrace_clone_from_pvtable(pmap_paddr_t pa, vm_map_offset_t start_offset, vm_map_offset_t end_offset)
14230 {
14231 uint64_t ret = 0;
14232 unsigned long pai;
14233 pv_entry_t **pvh;
14234 pt_entry_t *ptep;
14235 pmap_t pmap;
14236
14237 queue_head_t pmapvaq;
14238 pmap_va_t *pmapva;
14239
14240 queue_init(&pmapvaq);
14241
14242 pai = pa_index(pa);
14243 LOCK_PVH(pai);
14244 pvh = pai_to_pvh(pai);
14245
14246 // collect pmap/va pair from pvh
14247 if (pvh_test_type(pvh, PVH_TYPE_PTEP)) {
14248 ptep = pvh_ptep(pvh);
14249 pmap = ptep_get_pmap(ptep);
14250
14251 pmapva = (pmap_va_t *)zalloc(ZV_PMAP_VA);
14252 pmapva->pmap = pmap;
14253 pmapva->va = ptep_get_va(ptep);
14254
14255 queue_enter_first(&pmapvaq, pmapva, pmap_va_t *, chain);
14256 } else if (pvh_test_type(pvh, PVH_TYPE_PVEP)) {
14257 pv_entry_t *pvep;
14258
14259 pvep = pvh_list(pvh);
14260 while (pvep) {
14261 ptep = pve_get_ptep(pvep);
14262 pmap = ptep_get_pmap(ptep);
14263
14264 pmapva = (pmap_va_t *)zalloc(ZV_PMAP_VA);
14265 pmapva->pmap = pmap;
14266 pmapva->va = ptep_get_va(ptep);
14267
14268 queue_enter_first(&pmapvaq, pmapva, pmap_va_t *, chain);
14269
14270 pvep = PVE_NEXT_PTR(pve_next(pvep));
14271 }
14272 }
14273
14274 UNLOCK_PVH(pai);
14275
14276 // clone them while making sure mapping still exists
14277 queue_iterate(&pmapvaq, pmapva, pmap_va_t *, chain) {
14278 pmap_lock(pmapva->pmap);
14279 ptep = pmap_pte(pmapva->pmap, pmapva->va);
14280 if (pte_to_pa(*ptep) == pa) {
14281 if (pmap_pgtrace_enter_clone(pmapva->pmap, pmapva->va, start_offset, end_offset) == true) {
14282 ret++;
14283 }
14284 }
14285 pmap_unlock(pmapva->pmap);
14286
14287 zfree(ZV_PMAP_VA, pmapva);
14288 }
14289
14290 return ret;
14291 }
14292
14293 // allocate a page info
14294 static pmap_pgtrace_page_t *
14295 pmap_pgtrace_alloc_page(void)
14296 {
14297 pmap_pgtrace_page_t *p;
14298 queue_head_t *mapq;
14299 queue_head_t *mappool;
14300 queue_head_t *mapwaste;
14301 pmap_pgtrace_map_t *map;
14302
14303 p = zalloc(ZV_PMAP_PGTRACE);
14304 assert(p);
14305
14306 p->state = UNDEFINED;
14307
14308 mapq = &(p->maps);
14309 mappool = &(p->map_pool);
14310 mapwaste = &(p->map_waste);
14311 queue_init(mapq);
14312 queue_init(mappool);
14313 queue_init(mapwaste);
14314
14315 for (int i = 0; i < PGTRACE_MAX_MAP; i++) {
14316 vm_map_offset_t newcva;
14317 pt_entry_t *cptep;
14318 kern_return_t kr;
14319 vm_map_entry_t entry;
14320
14321 // get a clone va
14322 vm_object_reference(kernel_object);
14323 kr = vm_map_find_space(kernel_map, &newcva, vm_map_round_page(3 * ARM_PGBYTES, PAGE_MASK), 0, 0, VM_MAP_KERNEL_FLAGS_NONE, VM_KERN_MEMORY_DIAG, &entry);
14324 if (kr != KERN_SUCCESS) {
14325 panic("%s VM couldn't find any space kr=%d\n", __func__, kr);
14326 }
14327 VME_OBJECT_SET(entry, kernel_object);
14328 VME_OFFSET_SET(entry, newcva);
14329 vm_map_unlock(kernel_map);
14330
14331 // fill default clone page info and add to pool
14332 map = zalloc(ZV_PMAP_PGTRACE);
14333 for (int j = 0; j < 3; j++) {
14334 vm_map_offset_t addr = newcva + j * ARM_PGBYTES;
14335
14336 // pre-expand pmap while preemption enabled
14337 kr = pmap_expand(kernel_pmap, addr, 0, PMAP_TT_L3_LEVEL);
14338 if (kr != KERN_SUCCESS) {
14339 panic("%s: pmap_expand(kernel_pmap, addr=%llx) returns kr=%d\n", __func__, addr, kr);
14340 }
14341
14342 cptep = pmap_pte(kernel_pmap, addr);
14343 assert(cptep != NULL);
14344
14345 map->cva[j] = addr;
14346 map->cva_spte[j] = *cptep;
14347 }
14348 map->range.start = map->range.end = 0;
14349 map->cloned = false;
14350 queue_enter_first(mappool, map, pmap_pgtrace_map_t *, chain);
14351 }
14352
14353 return p;
14354 }
14355
14356 // free a page info
14357 static void
14358 pmap_pgtrace_free_page(pmap_pgtrace_page_t *p)
14359 {
14360 queue_head_t *mapq;
14361 queue_head_t *mappool;
14362 queue_head_t *mapwaste;
14363 pmap_pgtrace_map_t *map;
14364
14365 assert(p);
14366
14367 mapq = &(p->maps);
14368 mappool = &(p->map_pool);
14369 mapwaste = &(p->map_waste);
14370
14371 while (!queue_empty(mapq)) {
14372 queue_remove_first(mapq, map, pmap_pgtrace_map_t *, chain);
14373 zfree(ZV_PMAP_PGTRACE, map);
14374 }
14375
14376 while (!queue_empty(mappool)) {
14377 queue_remove_first(mappool, map, pmap_pgtrace_map_t *, chain);
14378 zfree(ZV_PMAP_PGTRACE, map);
14379 }
14380
14381 while (!queue_empty(mapwaste)) {
14382 queue_remove_first(mapwaste, map, pmap_pgtrace_map_t *, chain);
14383 zfree(ZV_PMAP_PGTRACE, map);
14384 }
14385
14386 zfree(ZV_PMAP_PGTRACE, p);
14387 }
14388
14389 // construct page infos with the given address range
14390 int
14391 pmap_pgtrace_add_page(pmap_t pmap, vm_map_offset_t start, vm_map_offset_t end)
14392 {
14393 int ret = 0;
14394 pt_entry_t *ptep;
14395 queue_head_t *q = &(pmap_pgtrace.pages);
14396 uint64_t ints;
14397 vm_map_offset_t cur_page, end_page;
14398
14399 if (start > end) {
14400 kprintf("%s: invalid start=%llx > end=%llx\n", __func__, start, end);
14401 return -1;
14402 }
14403
14404 PROF_START
14405
14406 // add each page in given range
14407 cur_page = arm_trunc_page(start);
14408 end_page = arm_trunc_page(end);
14409 while (cur_page <= end_page) {
14410 pmap_paddr_t pa_page = 0;
14411 uint64_t num_cloned = 0;
14412 pmap_pgtrace_page_t *p = NULL, *newp;
14413 bool free_newp = true;
14414 pmap_pgtrace_page_state_t state;
14415
14416 // do all allocations outside of spinlocks
14417 newp = pmap_pgtrace_alloc_page();
14418
14419 // keep lock orders in pmap, kernel_pmap and pgtrace lock
14420 if (pmap != NULL) {
14421 pmap_lock_ro(pmap);
14422 }
14423 if (pmap != kernel_pmap) {
14424 pmap_lock_ro(kernel_pmap);
14425 }
14426
14427 // addresses are physical if pmap is null
14428 if (pmap == NULL) {
14429 ptep = NULL;
14430 pa_page = cur_page;
14431 state = VA_UNDEFINED;
14432 } else {
14433 ptep = pmap_pte(pmap, cur_page);
14434 if (ptep != NULL) {
14435 pa_page = pte_to_pa(*ptep);
14436 state = DEFINED;
14437 } else {
14438 state = PA_UNDEFINED;
14439 }
14440 }
14441
14442 // search if we have a page info already
14443 PMAP_PGTRACE_LOCK(&ints);
14444 if (state != PA_UNDEFINED) {
14445 p = pmap_pgtrace_find_page(pa_page);
14446 }
14447
14448 // add pre-allocated page info if nothing found
14449 if (p == NULL) {
14450 queue_enter_first(q, newp, pmap_pgtrace_page_t *, chain);
14451 p = newp;
14452 free_newp = false;
14453 }
14454
14455 // now p points what we want
14456 p->state = state;
14457
14458 queue_head_t *mapq = &(p->maps);
14459 queue_head_t *mappool = &(p->map_pool);
14460 pmap_pgtrace_map_t *map;
14461 vm_map_offset_t start_offset, end_offset;
14462
14463 // calculate trace offsets in the page
14464 if (cur_page > start) {
14465 start_offset = 0;
14466 } else {
14467 start_offset = start - cur_page;
14468 }
14469 if (cur_page == end_page) {
14470 end_offset = end - end_page;
14471 } else {
14472 end_offset = ARM_PGBYTES - 1;
14473 }
14474
14475 kprintf("%s: pmap=%p cur_page=%llx ptep=%p state=%d start_offset=%llx end_offset=%llx\n", __func__, pmap, cur_page, ptep, state, start_offset, end_offset);
14476
14477 // fill map info
14478 assert(!queue_empty(mappool));
14479 queue_remove_first(mappool, map, pmap_pgtrace_map_t *, chain);
14480 if (p->state == PA_UNDEFINED) {
14481 map->pmap = pmap;
14482 map->ova = cur_page;
14483 map->range.start = start_offset;
14484 map->range.end = end_offset;
14485 } else if (p->state == VA_UNDEFINED) {
14486 p->pa = pa_page;
14487 map->range.start = start_offset;
14488 map->range.end = end_offset;
14489 } else if (p->state == DEFINED) {
14490 p->pa = pa_page;
14491 map->pmap = pmap;
14492 map->ova = cur_page;
14493 map->range.start = start_offset;
14494 map->range.end = end_offset;
14495 } else {
14496 panic("invalid p->state=%d\n", p->state);
14497 }
14498
14499 // not cloned yet
14500 map->cloned = false;
14501 queue_enter(mapq, map, pmap_pgtrace_map_t *, chain);
14502
14503 // unlock locks
14504 PMAP_PGTRACE_UNLOCK(&ints);
14505 if (pmap != kernel_pmap) {
14506 pmap_unlock_ro(kernel_pmap);
14507 }
14508 if (pmap != NULL) {
14509 pmap_unlock_ro(pmap);
14510 }
14511
14512 // now clone it
14513 if (pa_valid(pa_page)) {
14514 num_cloned = pmap_pgtrace_clone_from_pvtable(pa_page, start_offset, end_offset);
14515 }
14516 if (pmap == NULL) {
14517 num_cloned += pmap_pgtrace_clone_from_pa(kernel_pmap, pa_page, start_offset, end_offset);
14518 } else {
14519 num_cloned += pmap_pgtrace_clone_from_pa(pmap, pa_page, start_offset, end_offset);
14520 }
14521
14522 // free pre-allocations if we didn't add it to the q
14523 if (free_newp) {
14524 pmap_pgtrace_free_page(newp);
14525 }
14526
14527 if (num_cloned == 0) {
14528 kprintf("%s: no mapping found for pa_page=%llx but will be added when a page entered\n", __func__, pa_page);
14529 }
14530
14531 ret += num_cloned;
14532
14533 // overflow
14534 if (cur_page + ARM_PGBYTES < cur_page) {
14535 break;
14536 } else {
14537 cur_page += ARM_PGBYTES;
14538 }
14539 }
14540
14541 PROF_END
14542
14543 return ret;
14544 }
14545
14546 // delete page infos for given address range
14547 int
14548 pmap_pgtrace_delete_page(pmap_t pmap, vm_map_offset_t start, vm_map_offset_t end)
14549 {
14550 int ret = 0;
14551 uint64_t ints;
14552 queue_head_t *q = &(pmap_pgtrace.pages);
14553 pmap_pgtrace_page_t *p;
14554 vm_map_offset_t cur_page, end_page;
14555
14556 kprintf("%s start=%llx end=%llx\n", __func__, start, end);
14557
14558 PROF_START
14559
14560 pt_entry_t *ptep;
14561 pmap_paddr_t pa_page;
14562
14563 // remove page info from start to end
14564 cur_page = arm_trunc_page(start);
14565 end_page = arm_trunc_page(end);
14566 while (cur_page <= end_page) {
14567 p = NULL;
14568
14569 if (pmap == NULL) {
14570 pa_page = cur_page;
14571 } else {
14572 pmap_lock(pmap);
14573 ptep = pmap_pte(pmap, cur_page);
14574 if (ptep == NULL) {
14575 pmap_unlock(pmap);
14576 goto cont;
14577 }
14578 pa_page = pte_to_pa(*ptep);
14579 pmap_unlock(pmap);
14580 }
14581
14582 // remove all clones and validate
14583 pmap_pgtrace_remove_all_clone(pa_page);
14584
14585 // find page info and delete
14586 PMAP_PGTRACE_LOCK(&ints);
14587 p = pmap_pgtrace_find_page(pa_page);
14588 if (p != NULL) {
14589 queue_remove(q, p, pmap_pgtrace_page_t *, chain);
14590 ret++;
14591 }
14592 PMAP_PGTRACE_UNLOCK(&ints);
14593
14594 // free outside of locks
14595 if (p != NULL) {
14596 pmap_pgtrace_free_page(p);
14597 }
14598
14599 cont:
14600 // overflow
14601 if (cur_page + ARM_PGBYTES < cur_page) {
14602 break;
14603 } else {
14604 cur_page += ARM_PGBYTES;
14605 }
14606 }
14607
14608 PROF_END
14609
14610 return ret;
14611 }
14612
14613 kern_return_t
14614 pmap_pgtrace_fault(pmap_t pmap, vm_map_offset_t va, arm_saved_state_t *ss)
14615 {
14616 pt_entry_t *ptep;
14617 pgtrace_run_result_t res;
14618 pmap_pgtrace_page_t *p;
14619 uint64_t ints, found = false;
14620 pmap_paddr_t pa;
14621
14622 // Quick check if we are interested
14623 ptep = pmap_pte(pmap, va);
14624 if (!ptep || !(*ptep & ARM_PTE_PGTRACE)) {
14625 return KERN_FAILURE;
14626 }
14627
14628 PMAP_PGTRACE_LOCK(&ints);
14629
14630 // Check again since access is serialized
14631 ptep = pmap_pte(pmap, va);
14632 if (!ptep || !(*ptep & ARM_PTE_PGTRACE)) {
14633 PMAP_PGTRACE_UNLOCK(&ints);
14634 return KERN_FAILURE;
14635 } else if ((*ptep & ARM_PTE_TYPE_VALID) == ARM_PTE_TYPE_VALID) {
14636 // Somehow this cpu's tlb has not updated
14637 kprintf("%s Somehow this cpu's tlb has not updated?\n", __func__);
14638 PMAP_UPDATE_TLBS(pmap, va, va + ARM_PGBYTES, false);
14639
14640 PMAP_PGTRACE_UNLOCK(&ints);
14641 return KERN_SUCCESS;
14642 }
14643
14644 // Find if this pa is what we are tracing
14645 pa = pte_to_pa(*ptep);
14646
14647 p = pmap_pgtrace_find_page(arm_trunc_page(pa));
14648 if (p == NULL) {
14649 panic("%s Can't find va=%llx pa=%llx from tracing pages\n", __func__, va, pa);
14650 }
14651
14652 // find if pmap and va are also matching
14653 queue_head_t *mapq = &(p->maps);
14654 queue_head_t *mapwaste = &(p->map_waste);
14655 pmap_pgtrace_map_t *map;
14656
14657 queue_iterate(mapq, map, pmap_pgtrace_map_t *, chain) {
14658 if (map->pmap == pmap && map->ova == arm_trunc_page(va)) {
14659 found = true;
14660 break;
14661 }
14662 }
14663
14664 // if not found, search map waste as they are still valid
14665 if (!found) {
14666 queue_iterate(mapwaste, map, pmap_pgtrace_map_t *, chain) {
14667 if (map->pmap == pmap && map->ova == arm_trunc_page(va)) {
14668 found = true;
14669 break;
14670 }
14671 }
14672 }
14673
14674 if (!found) {
14675 panic("%s Can't find va=%llx pa=%llx from tracing pages\n", __func__, va, pa);
14676 }
14677
14678 // Decode and run it on the clone map
14679 bzero(&res, sizeof(res));
14680 pgtrace_decode_and_run(*(uint32_t *)get_saved_state_pc(ss), // instruction
14681 va, map->cva, // fault va and clone page vas
14682 ss, &res);
14683
14684 // write a log if in range
14685 vm_map_offset_t offset = va - map->ova;
14686 if (map->range.start <= offset && offset <= map->range.end) {
14687 pgtrace_write_log(res);
14688 }
14689
14690 PMAP_PGTRACE_UNLOCK(&ints);
14691
14692 // Return to next instruction
14693 add_saved_state_pc(ss, sizeof(uint32_t));
14694
14695 return KERN_SUCCESS;
14696 }
14697 #endif
14698
14699 /**
14700 * The minimum shared region nesting size is used by the VM to determine when to
14701 * break up large mappings to nested regions. The smallest size that these
14702 * mappings can be broken into is determined by what page table level those
14703 * regions are being nested in at and the size of the page tables.
14704 *
14705 * For instance, if a nested region is nesting at L2 for a process utilizing
14706 * 16KB page tables, then the minimum nesting size would be 32MB (size of an L2
14707 * block entry).
14708 *
14709 * @param pmap The target pmap to determine the block size based on whether it's
14710 * using 16KB or 4KB page tables.
14711 */
14712 uint64_t
14713 pmap_shared_region_size_min(__unused pmap_t pmap)
14714 {
14715 #if (__ARM_VMSA__ > 7)
14716 const pt_attr_t * const pt_attr = pmap_get_pt_attr(pmap);
14717
14718 /**
14719 * We always nest the shared region at L2 (32MB for 16KB pages, 2MB for
14720 * 4KB pages). This means that a target pmap will contain L2 entries that
14721 * point to shared L3 page tables in the shared region pmap.
14722 */
14723 return pt_attr_twig_size(pt_attr);
14724
14725 #else
14726 return ARM_NESTING_SIZE_MIN;
14727 #endif
14728 }
14729
14730 /**
14731 * The concept of a nesting size maximum was made to accomodate restrictions in
14732 * place for nesting regions on PowerPC. There are no restrictions to max nesting
14733 * sizes on x86/armv7/armv8 and this should get removed.
14734 *
14735 * TODO: <rdar://problem/65247502> Completely remove pmap_nesting_size_max()
14736 */
14737 uint64_t
14738 pmap_nesting_size_max(__unused pmap_t pmap)
14739 {
14740 return ARM_NESTING_SIZE_MAX;
14741 }
14742
14743 boolean_t
14744 pmap_enforces_execute_only(
14745 #if (__ARM_VMSA__ == 7)
14746 __unused
14747 #endif
14748 pmap_t pmap)
14749 {
14750 #if (__ARM_VMSA__ > 7)
14751 return pmap != kernel_pmap;
14752 #else
14753 return FALSE;
14754 #endif
14755 }
14756
14757 MARK_AS_PMAP_TEXT void
14758 pmap_set_vm_map_cs_enforced_internal(
14759 pmap_t pmap,
14760 bool new_value)
14761 {
14762 VALIDATE_PMAP(pmap);
14763 pmap->pmap_vm_map_cs_enforced = new_value;
14764 }
14765
14766 void
14767 pmap_set_vm_map_cs_enforced(
14768 pmap_t pmap,
14769 bool new_value)
14770 {
14771 #if XNU_MONITOR
14772 pmap_set_vm_map_cs_enforced_ppl(pmap, new_value);
14773 #else
14774 pmap_set_vm_map_cs_enforced_internal(pmap, new_value);
14775 #endif
14776 }
14777
14778 extern int cs_process_enforcement_enable;
14779 bool
14780 pmap_get_vm_map_cs_enforced(
14781 pmap_t pmap)
14782 {
14783 if (cs_process_enforcement_enable) {
14784 return true;
14785 }
14786 return pmap->pmap_vm_map_cs_enforced;
14787 }
14788
14789 MARK_AS_PMAP_TEXT void
14790 pmap_set_jit_entitled_internal(
14791 __unused pmap_t pmap)
14792 {
14793 return;
14794 }
14795
14796 void
14797 pmap_set_jit_entitled(
14798 pmap_t pmap)
14799 {
14800 #if XNU_MONITOR
14801 pmap_set_jit_entitled_ppl(pmap);
14802 #else
14803 pmap_set_jit_entitled_internal(pmap);
14804 #endif
14805 }
14806
14807 bool
14808 pmap_get_jit_entitled(
14809 __unused pmap_t pmap)
14810 {
14811 return false;
14812 }
14813
14814 MARK_AS_PMAP_TEXT static kern_return_t
14815 pmap_query_page_info_internal(
14816 pmap_t pmap,
14817 vm_map_offset_t va,
14818 int *disp_p)
14819 {
14820 pmap_paddr_t pa;
14821 int disp;
14822 int pai;
14823 pt_entry_t *pte;
14824 pv_entry_t **pv_h, *pve_p;
14825
14826 if (pmap == PMAP_NULL || pmap == kernel_pmap) {
14827 pmap_pin_kernel_pages((vm_offset_t)disp_p, sizeof(*disp_p));
14828 *disp_p = 0;
14829 pmap_unpin_kernel_pages((vm_offset_t)disp_p, sizeof(*disp_p));
14830 return KERN_INVALID_ARGUMENT;
14831 }
14832
14833 disp = 0;
14834
14835 VALIDATE_PMAP(pmap);
14836 pmap_lock_ro(pmap);
14837
14838 pte = pmap_pte(pmap, va);
14839 if (pte == PT_ENTRY_NULL) {
14840 goto done;
14841 }
14842
14843 pa = pte_to_pa(*((volatile pt_entry_t*)pte));
14844 if (pa == 0) {
14845 if (ARM_PTE_IS_COMPRESSED(*pte, pte)) {
14846 disp |= PMAP_QUERY_PAGE_COMPRESSED;
14847 if (*pte & ARM_PTE_COMPRESSED_ALT) {
14848 disp |= PMAP_QUERY_PAGE_COMPRESSED_ALTACCT;
14849 }
14850 }
14851 } else {
14852 disp |= PMAP_QUERY_PAGE_PRESENT;
14853 pai = (int) pa_index(pa);
14854 if (!pa_valid(pa)) {
14855 goto done;
14856 }
14857 LOCK_PVH(pai);
14858 pv_h = pai_to_pvh(pai);
14859 pve_p = PV_ENTRY_NULL;
14860 if (pvh_test_type(pv_h, PVH_TYPE_PVEP)) {
14861 pve_p = pvh_list(pv_h);
14862 while (pve_p != PV_ENTRY_NULL &&
14863 pve_get_ptep(pve_p) != pte) {
14864 pve_p = PVE_NEXT_PTR(pve_next(pve_p));
14865 }
14866 }
14867 if (IS_ALTACCT_PAGE(pai, pve_p)) {
14868 disp |= PMAP_QUERY_PAGE_ALTACCT;
14869 } else if (IS_REUSABLE_PAGE(pai)) {
14870 disp |= PMAP_QUERY_PAGE_REUSABLE;
14871 } else if (IS_INTERNAL_PAGE(pai)) {
14872 disp |= PMAP_QUERY_PAGE_INTERNAL;
14873 }
14874 UNLOCK_PVH(pai);
14875 }
14876
14877 done:
14878 pmap_unlock_ro(pmap);
14879 pmap_pin_kernel_pages((vm_offset_t)disp_p, sizeof(*disp_p));
14880 *disp_p = disp;
14881 pmap_unpin_kernel_pages((vm_offset_t)disp_p, sizeof(*disp_p));
14882 return KERN_SUCCESS;
14883 }
14884
14885 kern_return_t
14886 pmap_query_page_info(
14887 pmap_t pmap,
14888 vm_map_offset_t va,
14889 int *disp_p)
14890 {
14891 #if XNU_MONITOR
14892 return pmap_query_page_info_ppl(pmap, va, disp_p);
14893 #else
14894 return pmap_query_page_info_internal(pmap, va, disp_p);
14895 #endif
14896 }
14897
14898 MARK_AS_PMAP_TEXT kern_return_t
14899 pmap_return_internal(__unused boolean_t do_panic, __unused boolean_t do_recurse)
14900 {
14901
14902 return KERN_SUCCESS;
14903 }
14904
14905 kern_return_t
14906 pmap_return(boolean_t do_panic, boolean_t do_recurse)
14907 {
14908 #if XNU_MONITOR
14909 return pmap_return_ppl(do_panic, do_recurse);
14910 #else
14911 return pmap_return_internal(do_panic, do_recurse);
14912 #endif
14913 }
14914
14915
14916
14917
14918 kern_return_t
14919 pmap_load_legacy_trust_cache(struct pmap_legacy_trust_cache __unused *trust_cache,
14920 const vm_size_t __unused trust_cache_len)
14921 {
14922 // Unsupported
14923 return KERN_NOT_SUPPORTED;
14924 }
14925
14926 pmap_tc_ret_t
14927 pmap_load_image4_trust_cache(struct pmap_image4_trust_cache __unused *trust_cache,
14928 const vm_size_t __unused trust_cache_len,
14929 uint8_t const * __unused img4_manifest,
14930 const vm_size_t __unused img4_manifest_buffer_len,
14931 const vm_size_t __unused img4_manifest_actual_len,
14932 bool __unused dry_run)
14933 {
14934 // Unsupported
14935 return PMAP_TC_UNKNOWN_FORMAT;
14936 }
14937
14938 bool
14939 pmap_in_ppl(void)
14940 {
14941 // Unsupported
14942 return false;
14943 }
14944
14945 void
14946 pmap_lockdown_image4_slab(__unused vm_offset_t slab, __unused vm_size_t slab_len, __unused uint64_t flags)
14947 {
14948 // Unsupported
14949 }
14950
14951 void *
14952 pmap_claim_reserved_ppl_page(void)
14953 {
14954 // Unsupported
14955 return NULL;
14956 }
14957
14958 void
14959 pmap_free_reserved_ppl_page(void __unused *kva)
14960 {
14961 // Unsupported
14962 }
14963
14964
14965 MARK_AS_PMAP_TEXT static bool
14966 pmap_is_trust_cache_loaded_internal(const uuid_t uuid)
14967 {
14968 bool found = false;
14969
14970 pmap_simple_lock(&pmap_loaded_trust_caches_lock);
14971
14972 for (struct pmap_image4_trust_cache const *c = pmap_image4_trust_caches; c != NULL; c = c->next) {
14973 if (bcmp(uuid, c->module->uuid, sizeof(uuid_t)) == 0) {
14974 found = true;
14975 goto done;
14976 }
14977 }
14978
14979 #ifdef PLATFORM_BridgeOS
14980 for (struct pmap_legacy_trust_cache const *c = pmap_legacy_trust_caches; c != NULL; c = c->next) {
14981 if (bcmp(uuid, c->uuid, sizeof(uuid_t)) == 0) {
14982 found = true;
14983 goto done;
14984 }
14985 }
14986 #endif
14987
14988 done:
14989 pmap_simple_unlock(&pmap_loaded_trust_caches_lock);
14990 return found;
14991 }
14992
14993 bool
14994 pmap_is_trust_cache_loaded(const uuid_t uuid)
14995 {
14996 #if XNU_MONITOR
14997 return pmap_is_trust_cache_loaded_ppl(uuid);
14998 #else
14999 return pmap_is_trust_cache_loaded_internal(uuid);
15000 #endif
15001 }
15002
15003 MARK_AS_PMAP_TEXT static bool
15004 pmap_lookup_in_loaded_trust_caches_internal(const uint8_t cdhash[CS_CDHASH_LEN])
15005 {
15006 struct pmap_image4_trust_cache const *cache = NULL;
15007 #ifdef PLATFORM_BridgeOS
15008 struct pmap_legacy_trust_cache const *legacy = NULL;
15009 #endif
15010
15011 pmap_simple_lock(&pmap_loaded_trust_caches_lock);
15012
15013 for (cache = pmap_image4_trust_caches; cache != NULL; cache = cache->next) {
15014 uint8_t hash_type = 0, flags = 0;
15015
15016 if (lookup_in_trust_cache_module(cache->module, cdhash, &hash_type, &flags)) {
15017 goto done;
15018 }
15019 }
15020
15021 #ifdef PLATFORM_BridgeOS
15022 for (legacy = pmap_legacy_trust_caches; legacy != NULL; legacy = legacy->next) {
15023 for (uint32_t i = 0; i < legacy->num_hashes; i++) {
15024 if (bcmp(legacy->hashes[i], cdhash, CS_CDHASH_LEN) == 0) {
15025 goto done;
15026 }
15027 }
15028 }
15029 #endif
15030
15031 done:
15032 pmap_simple_unlock(&pmap_loaded_trust_caches_lock);
15033
15034 if (cache != NULL) {
15035 return true;
15036 #ifdef PLATFORM_BridgeOS
15037 } else if (legacy != NULL) {
15038 return true;
15039 #endif
15040 }
15041
15042 return false;
15043 }
15044
15045 bool
15046 pmap_lookup_in_loaded_trust_caches(const uint8_t cdhash[CS_CDHASH_LEN])
15047 {
15048 #if XNU_MONITOR
15049 return pmap_lookup_in_loaded_trust_caches_ppl(cdhash);
15050 #else
15051 return pmap_lookup_in_loaded_trust_caches_internal(cdhash);
15052 #endif
15053 }
15054
15055 MARK_AS_PMAP_TEXT static uint32_t
15056 pmap_lookup_in_static_trust_cache_internal(const uint8_t cdhash[CS_CDHASH_LEN])
15057 {
15058 // Awkward indirection, because the PPL macros currently force their functions to be static.
15059 return lookup_in_static_trust_cache(cdhash);
15060 }
15061
15062 uint32_t
15063 pmap_lookup_in_static_trust_cache(const uint8_t cdhash[CS_CDHASH_LEN])
15064 {
15065 #if XNU_MONITOR
15066 return pmap_lookup_in_static_trust_cache_ppl(cdhash);
15067 #else
15068 return pmap_lookup_in_static_trust_cache_internal(cdhash);
15069 #endif
15070 }
15071
15072 MARK_AS_PMAP_TEXT static void
15073 pmap_set_compilation_service_cdhash_internal(const uint8_t cdhash[CS_CDHASH_LEN])
15074 {
15075 pmap_simple_lock(&pmap_compilation_service_cdhash_lock);
15076 memcpy(pmap_compilation_service_cdhash, cdhash, CS_CDHASH_LEN);
15077 pmap_simple_unlock(&pmap_compilation_service_cdhash_lock);
15078
15079 pmap_cs_log("Added Compilation Service CDHash through the PPL: 0x%02X 0x%02X 0x%02X 0x%02X", cdhash[0], cdhash[1], cdhash[2], cdhash[4]);
15080 }
15081
15082 MARK_AS_PMAP_TEXT static bool
15083 pmap_match_compilation_service_cdhash_internal(const uint8_t cdhash[CS_CDHASH_LEN])
15084 {
15085 bool match = false;
15086
15087 pmap_simple_lock(&pmap_compilation_service_cdhash_lock);
15088 if (bcmp(pmap_compilation_service_cdhash, cdhash, CS_CDHASH_LEN) == 0) {
15089 match = true;
15090 }
15091 pmap_simple_unlock(&pmap_compilation_service_cdhash_lock);
15092
15093 if (match) {
15094 pmap_cs_log("Matched Compilation Service CDHash through the PPL");
15095 }
15096
15097 return match;
15098 }
15099
15100 void
15101 pmap_set_compilation_service_cdhash(const uint8_t cdhash[CS_CDHASH_LEN])
15102 {
15103 #if XNU_MONITOR
15104 pmap_set_compilation_service_cdhash_ppl(cdhash);
15105 #else
15106 pmap_set_compilation_service_cdhash_internal(cdhash);
15107 #endif
15108 }
15109
15110 bool
15111 pmap_match_compilation_service_cdhash(const uint8_t cdhash[CS_CDHASH_LEN])
15112 {
15113 #if XNU_MONITOR
15114 return pmap_match_compilation_service_cdhash_ppl(cdhash);
15115 #else
15116 return pmap_match_compilation_service_cdhash_internal(cdhash);
15117 #endif
15118 }
15119
15120 MARK_AS_PMAP_TEXT static void
15121 pmap_footprint_suspend_internal(
15122 vm_map_t map,
15123 boolean_t suspend)
15124 {
15125 #if DEVELOPMENT || DEBUG
15126 if (suspend) {
15127 current_thread()->pmap_footprint_suspended = TRUE;
15128 map->pmap->footprint_was_suspended = TRUE;
15129 } else {
15130 current_thread()->pmap_footprint_suspended = FALSE;
15131 }
15132 #else /* DEVELOPMENT || DEBUG */
15133 (void) map;
15134 (void) suspend;
15135 #endif /* DEVELOPMENT || DEBUG */
15136 }
15137
15138 void
15139 pmap_footprint_suspend(
15140 vm_map_t map,
15141 boolean_t suspend)
15142 {
15143 #if XNU_MONITOR
15144 pmap_footprint_suspend_ppl(map, suspend);
15145 #else
15146 pmap_footprint_suspend_internal(map, suspend);
15147 #endif
15148 }
15149
15150 #if defined(__arm64__) && (DEVELOPMENT || DEBUG)
15151
15152 struct page_table_dump_header {
15153 uint64_t pa;
15154 uint64_t num_entries;
15155 uint64_t start_va;
15156 uint64_t end_va;
15157 };
15158
15159 static kern_return_t
15160 pmap_dump_page_tables_recurse(pmap_t pmap,
15161 const tt_entry_t *ttp,
15162 unsigned int cur_level,
15163 unsigned int level_mask,
15164 uint64_t start_va,
15165 void *buf_start,
15166 void *buf_end,
15167 size_t *bytes_copied)
15168 {
15169 const pt_attr_t * const pt_attr = pmap_get_pt_attr(pmap);
15170 uint64_t num_entries = pt_attr_page_size(pt_attr) / sizeof(*ttp);
15171
15172 uint64_t size = pt_attr->pta_level_info[cur_level].size;
15173 uint64_t valid_mask = pt_attr->pta_level_info[cur_level].valid_mask;
15174 uint64_t type_mask = pt_attr->pta_level_info[cur_level].type_mask;
15175 uint64_t type_block = pt_attr->pta_level_info[cur_level].type_block;
15176
15177 void *bufp = (uint8_t*)buf_start + *bytes_copied;
15178
15179 if (cur_level == pt_attr_root_level(pt_attr)) {
15180 num_entries = pmap_root_alloc_size(pmap) / sizeof(tt_entry_t);
15181 }
15182
15183 uint64_t tt_size = num_entries * sizeof(tt_entry_t);
15184 const tt_entry_t *tt_end = &ttp[num_entries];
15185
15186 if (((vm_offset_t)buf_end - (vm_offset_t)bufp) < (tt_size + sizeof(struct page_table_dump_header))) {
15187 return KERN_INSUFFICIENT_BUFFER_SIZE;
15188 }
15189
15190 if (level_mask & (1U << cur_level)) {
15191 struct page_table_dump_header *header = (struct page_table_dump_header*)bufp;
15192 header->pa = ml_static_vtop((vm_offset_t)ttp);
15193 header->num_entries = num_entries;
15194 header->start_va = start_va;
15195 header->end_va = start_va + (num_entries * size);
15196
15197 bcopy(ttp, (uint8_t*)bufp + sizeof(*header), tt_size);
15198 *bytes_copied = *bytes_copied + sizeof(*header) + tt_size;
15199 }
15200 uint64_t current_va = start_va;
15201
15202 for (const tt_entry_t *ttep = ttp; ttep < tt_end; ttep++, current_va += size) {
15203 tt_entry_t tte = *ttep;
15204
15205 if (!(tte & valid_mask)) {
15206 continue;
15207 }
15208
15209 if ((tte & type_mask) == type_block) {
15210 continue;
15211 } else {
15212 if (cur_level >= pt_attr_leaf_level(pt_attr)) {
15213 panic("%s: corrupt entry %#llx at %p, "
15214 "ttp=%p, cur_level=%u, bufp=%p, buf_end=%p",
15215 __FUNCTION__, tte, ttep,
15216 ttp, cur_level, bufp, buf_end);
15217 }
15218
15219 const tt_entry_t *next_tt = (const tt_entry_t*)phystokv(tte & ARM_TTE_TABLE_MASK);
15220
15221 kern_return_t recurse_result = pmap_dump_page_tables_recurse(pmap, next_tt, cur_level + 1,
15222 level_mask, current_va, buf_start, buf_end, bytes_copied);
15223
15224 if (recurse_result != KERN_SUCCESS) {
15225 return recurse_result;
15226 }
15227 }
15228 }
15229
15230 return KERN_SUCCESS;
15231 }
15232
15233 kern_return_t
15234 pmap_dump_page_tables(pmap_t pmap, void *bufp, void *buf_end, unsigned int level_mask, size_t *bytes_copied)
15235 {
15236 if (not_in_kdp) {
15237 panic("pmap_dump_page_tables must only be called from kernel debugger context");
15238 }
15239 return pmap_dump_page_tables_recurse(pmap, pmap->tte, pt_attr_root_level(pmap_get_pt_attr(pmap)),
15240 level_mask, pmap->min, bufp, buf_end, bytes_copied);
15241 }
15242
15243 #else /* defined(__arm64__) && (DEVELOPMENT || DEBUG) */
15244
15245 kern_return_t
15246 pmap_dump_page_tables(pmap_t pmap __unused, void *bufp __unused, void *buf_end __unused,
15247 unsigned int level_mask __unused, size_t *bytes_copied __unused)
15248 {
15249 return KERN_NOT_SUPPORTED;
15250 }
15251 #endif /* !defined(__arm64__) */
15252
15253
15254 #ifdef CONFIG_XNUPOST
15255 #ifdef __arm64__
15256 static volatile bool pmap_test_took_fault = false;
15257
15258 static bool
15259 pmap_test_fault_handler(arm_saved_state_t * state)
15260 {
15261 bool retval = false;
15262 uint32_t esr = get_saved_state_esr(state);
15263 esr_exception_class_t class = ESR_EC(esr);
15264 fault_status_t fsc = ISS_IA_FSC(ESR_ISS(esr));
15265
15266 if ((class == ESR_EC_DABORT_EL1) &&
15267 ((fsc == FSC_PERMISSION_FAULT_L3) || (fsc == FSC_ACCESS_FLAG_FAULT_L3))) {
15268 pmap_test_took_fault = true;
15269 /* return to the instruction immediately after the call to NX page */
15270 set_saved_state_pc(state, get_saved_state_pc(state) + 4);
15271 retval = true;
15272 }
15273
15274 return retval;
15275 }
15276
15277 static bool
15278 pmap_test_access(pmap_t pmap, vm_map_address_t va, bool should_fault, bool is_write)
15279 {
15280 /*
15281 * We're switching pmaps without using the normal thread mechanism;
15282 * disable interrupts and preemption to avoid any unexpected memory
15283 * accesses.
15284 */
15285 uint64_t old_int_state = pmap_interrupts_disable();
15286 pmap_t old_pmap = current_pmap();
15287 mp_disable_preemption();
15288 pmap_switch(pmap);
15289
15290 pmap_test_took_fault = false;
15291
15292 /* Disable PAN; pmap shouldn't be the kernel pmap. */
15293 #if __ARM_PAN_AVAILABLE__
15294 __builtin_arm_wsr("pan", 0);
15295 #endif /* __ARM_PAN_AVAILABLE__ */
15296 ml_expect_fault_begin(pmap_test_fault_handler, va);
15297
15298 if (is_write) {
15299 *((volatile uint64_t*)(va)) = 0xdec0de;
15300 } else {
15301 volatile uint64_t tmp = *((volatile uint64_t*)(va));
15302 (void)tmp;
15303 }
15304
15305 /* Save the fault bool, and undo the gross stuff we did. */
15306 bool took_fault = pmap_test_took_fault;
15307 ml_expect_fault_end();
15308 #if __ARM_PAN_AVAILABLE__
15309 __builtin_arm_wsr("pan", 1);
15310 #endif /* __ARM_PAN_AVAILABLE__ */
15311
15312 pmap_switch(old_pmap);
15313 mp_enable_preemption();
15314 pmap_interrupts_restore(old_int_state);
15315 bool retval = (took_fault == should_fault);
15316 return retval;
15317 }
15318
15319 static bool
15320 pmap_test_read(pmap_t pmap, vm_map_address_t va, bool should_fault)
15321 {
15322 bool retval = pmap_test_access(pmap, va, should_fault, false);
15323
15324 if (!retval) {
15325 T_FAIL("%s: %s, "
15326 "pmap=%p, va=%p, should_fault=%u",
15327 __func__, should_fault ? "did not fault" : "faulted",
15328 pmap, (void*)va, (unsigned)should_fault);
15329 }
15330
15331 return retval;
15332 }
15333
15334 static bool
15335 pmap_test_write(pmap_t pmap, vm_map_address_t va, bool should_fault)
15336 {
15337 bool retval = pmap_test_access(pmap, va, should_fault, true);
15338
15339 if (!retval) {
15340 T_FAIL("%s: %s, "
15341 "pmap=%p, va=%p, should_fault=%u",
15342 __func__, should_fault ? "did not fault" : "faulted",
15343 pmap, (void*)va, (unsigned)should_fault);
15344 }
15345
15346 return retval;
15347 }
15348
15349 static bool
15350 pmap_test_check_refmod(pmap_paddr_t pa, unsigned int should_be_set)
15351 {
15352 unsigned int should_be_clear = (~should_be_set) & (VM_MEM_REFERENCED | VM_MEM_MODIFIED);
15353 unsigned int bits = pmap_get_refmod((ppnum_t)atop(pa));
15354
15355 bool retval = (((bits & should_be_set) == should_be_set) && ((bits & should_be_clear) == 0));
15356
15357 if (!retval) {
15358 T_FAIL("%s: bits=%u, "
15359 "pa=%p, should_be_set=%u",
15360 __func__, bits,
15361 (void*)pa, should_be_set);
15362 }
15363
15364 return retval;
15365 }
15366
15367 static __attribute__((noinline)) bool
15368 pmap_test_read_write(pmap_t pmap, vm_map_address_t va, bool allow_read, bool allow_write)
15369 {
15370 bool retval = (pmap_test_read(pmap, va, !allow_read) | pmap_test_write(pmap, va, !allow_write));
15371 return retval;
15372 }
15373
15374 static int
15375 pmap_test_test_config(unsigned int flags)
15376 {
15377 T_LOG("running pmap_test_test_config flags=0x%X", flags);
15378 unsigned int map_count = 0;
15379 unsigned long page_ratio = 0;
15380 pmap_t pmap = pmap_create_options(NULL, 0, flags);
15381
15382 if (!pmap) {
15383 panic("Failed to allocate pmap");
15384 }
15385
15386 __unused const pt_attr_t * const pt_attr = pmap_get_pt_attr(pmap);
15387 uintptr_t native_page_size = pt_attr_page_size(native_pt_attr);
15388 uintptr_t pmap_page_size = pt_attr_page_size(pt_attr);
15389 uintptr_t pmap_twig_size = pt_attr_twig_size(pt_attr);
15390
15391 if (pmap_page_size <= native_page_size) {
15392 page_ratio = native_page_size / pmap_page_size;
15393 } else {
15394 /*
15395 * We claim to support a page_ratio of less than 1, which is
15396 * not currently supported by the pmap layer; panic.
15397 */
15398 panic("%s: page_ratio < 1, native_page_size=%lu, pmap_page_size=%lu"
15399 "flags=%u",
15400 __func__, native_page_size, pmap_page_size,
15401 flags);
15402 }
15403
15404 if (PAGE_RATIO > 1) {
15405 /*
15406 * The kernel is deliberately pretending to have 16KB pages.
15407 * The pmap layer has code that supports this, so pretend the
15408 * page size is larger than it is.
15409 */
15410 pmap_page_size = PAGE_SIZE;
15411 native_page_size = PAGE_SIZE;
15412 }
15413
15414 /*
15415 * Get two pages from the VM; one to be mapped wired, and one to be
15416 * mapped nonwired.
15417 */
15418 vm_page_t unwired_vm_page = vm_page_grab();
15419 vm_page_t wired_vm_page = vm_page_grab();
15420
15421 if ((unwired_vm_page == VM_PAGE_NULL) || (wired_vm_page == VM_PAGE_NULL)) {
15422 panic("Failed to grab VM pages");
15423 }
15424
15425 ppnum_t pn = VM_PAGE_GET_PHYS_PAGE(unwired_vm_page);
15426 ppnum_t wired_pn = VM_PAGE_GET_PHYS_PAGE(wired_vm_page);
15427
15428 pmap_paddr_t pa = ptoa(pn);
15429 pmap_paddr_t wired_pa = ptoa(wired_pn);
15430
15431 /*
15432 * We'll start mappings at the second twig TT. This keeps us from only
15433 * using the first entry in each TT, which would trivially be address
15434 * 0; one of the things we will need to test is retrieving the VA for
15435 * a given PTE.
15436 */
15437 vm_map_address_t va_base = pmap_twig_size;
15438 vm_map_address_t wired_va_base = ((2 * pmap_twig_size) - pmap_page_size);
15439
15440 if (wired_va_base < (va_base + (page_ratio * pmap_page_size))) {
15441 /*
15442 * Not exactly a functional failure, but this test relies on
15443 * there being a spare PTE slot we can use to pin the TT.
15444 */
15445 panic("Cannot pin translation table");
15446 }
15447
15448 /*
15449 * Create the wired mapping; this will prevent the pmap layer from
15450 * reclaiming our test TTs, which would interfere with this test
15451 * ("interfere" -> "make it panic").
15452 */
15453 pmap_enter_addr(pmap, wired_va_base, wired_pa, VM_PROT_READ, VM_PROT_READ, 0, true);
15454
15455 /*
15456 * Create read-only mappings of the nonwired page; if the pmap does
15457 * not use the same page size as the kernel, create multiple mappings
15458 * so that the kernel page is fully mapped.
15459 */
15460 for (map_count = 0; map_count < page_ratio; map_count++) {
15461 pmap_enter_addr(pmap, va_base + (pmap_page_size * map_count), pa + (pmap_page_size * (map_count)), VM_PROT_READ, VM_PROT_READ, 0, false);
15462 }
15463
15464 /* Validate that all the PTEs have the expected PA and VA. */
15465 for (map_count = 0; map_count < page_ratio; map_count++) {
15466 pt_entry_t * ptep = pmap_pte(pmap, va_base + (pmap_page_size * map_count));
15467
15468 if (pte_to_pa(*ptep) != (pa + (pmap_page_size * map_count))) {
15469 T_FAIL("Unexpected pa=%p, expected %p, map_count=%u",
15470 (void*)pte_to_pa(*ptep), (void*)(pa + (pmap_page_size * map_count)), map_count);
15471 }
15472
15473 if (ptep_get_va(ptep) != (va_base + (pmap_page_size * map_count))) {
15474 T_FAIL("Unexpected va=%p, expected %p, map_count=%u",
15475 (void*)ptep_get_va(ptep), (void*)(va_base + (pmap_page_size * map_count)), map_count);
15476 }
15477 }
15478
15479 T_LOG("Validate that reads to our mapping do not fault.");
15480 pmap_test_read(pmap, va_base, false);
15481
15482 T_LOG("Validate that writes to our mapping fault.");
15483 pmap_test_write(pmap, va_base, true);
15484
15485 T_LOG("Make the first mapping writable.");
15486 pmap_enter_addr(pmap, va_base, pa, VM_PROT_READ | VM_PROT_WRITE, VM_PROT_READ | VM_PROT_WRITE, 0, false);
15487
15488 T_LOG("Validate that writes to our mapping do not fault.");
15489 pmap_test_write(pmap, va_base, false);
15490
15491
15492 T_LOG("Make the first mapping XO.");
15493 pmap_enter_addr(pmap, va_base, pa, VM_PROT_EXECUTE, VM_PROT_EXECUTE, 0, false);
15494
15495 T_LOG("Validate that reads to our mapping do not fault.");
15496 pmap_test_read(pmap, va_base, false);
15497
15498 T_LOG("Validate that writes to our mapping fault.");
15499 pmap_test_write(pmap, va_base, true);
15500
15501
15502 /*
15503 * For page ratios of greater than 1: validate that writes to the other
15504 * mappings still fault. Remove the mappings afterwards (we're done
15505 * with page ratio testing).
15506 */
15507 for (map_count = 1; map_count < page_ratio; map_count++) {
15508 pmap_test_write(pmap, va_base + (pmap_page_size * map_count), true);
15509 pmap_remove(pmap, va_base + (pmap_page_size * map_count), va_base + (pmap_page_size * map_count) + pmap_page_size);
15510 }
15511
15512 T_LOG("Mark the page unreferenced and unmodified.");
15513 pmap_clear_refmod(pn, VM_MEM_MODIFIED | VM_MEM_REFERENCED);
15514 pmap_test_check_refmod(pa, 0);
15515
15516 /*
15517 * Begin testing the ref/mod state machine. Re-enter the mapping with
15518 * different protection/fault_type settings, and confirm that the
15519 * ref/mod state matches our expectations at each step.
15520 */
15521 T_LOG("!ref/!mod: read, no fault. Expect ref/!mod");
15522 pmap_enter_addr(pmap, va_base, pa, VM_PROT_READ, VM_PROT_NONE, 0, false);
15523 pmap_test_check_refmod(pa, VM_MEM_REFERENCED);
15524
15525 T_LOG("!ref/!mod: read, read fault. Expect ref/!mod");
15526 pmap_clear_refmod(pn, VM_MEM_MODIFIED | VM_MEM_REFERENCED);
15527 pmap_enter_addr(pmap, va_base, pa, VM_PROT_READ, VM_PROT_READ, 0, false);
15528 pmap_test_check_refmod(pa, VM_MEM_REFERENCED);
15529
15530 T_LOG("!ref/!mod: rw, read fault. Expect ref/!mod");
15531 pmap_clear_refmod(pn, VM_MEM_MODIFIED | VM_MEM_REFERENCED);
15532 pmap_enter_addr(pmap, va_base, pa, VM_PROT_READ | VM_PROT_WRITE, VM_PROT_NONE, 0, false);
15533 pmap_test_check_refmod(pa, VM_MEM_REFERENCED);
15534
15535 T_LOG("ref/!mod: rw, read fault. Expect ref/!mod");
15536 pmap_enter_addr(pmap, va_base, pa, VM_PROT_READ | VM_PROT_WRITE, VM_PROT_READ, 0, false);
15537 pmap_test_check_refmod(pa, VM_MEM_REFERENCED);
15538
15539 T_LOG("!ref/!mod: rw, rw fault. Expect ref/mod");
15540 pmap_clear_refmod(pn, VM_MEM_MODIFIED | VM_MEM_REFERENCED);
15541 pmap_enter_addr(pmap, va_base, pa, VM_PROT_READ | VM_PROT_WRITE, VM_PROT_READ | VM_PROT_WRITE, 0, false);
15542 pmap_test_check_refmod(pa, VM_MEM_REFERENCED | VM_MEM_MODIFIED);
15543
15544 /*
15545 * Shared memory testing; we'll have two mappings; one read-only,
15546 * one read-write.
15547 */
15548 vm_map_address_t rw_base = va_base;
15549 vm_map_address_t ro_base = va_base + pmap_page_size;
15550
15551 pmap_enter_addr(pmap, rw_base, pa, VM_PROT_READ | VM_PROT_WRITE, VM_PROT_READ | VM_PROT_WRITE, 0, false);
15552 pmap_enter_addr(pmap, ro_base, pa, VM_PROT_READ, VM_PROT_READ, 0, false);
15553
15554 /*
15555 * Test that we take faults as expected for unreferenced/unmodified
15556 * pages. Also test the arm_fast_fault interface, to ensure that
15557 * mapping permissions change as expected.
15558 */
15559 T_LOG("!ref/!mod: expect no access");
15560 pmap_clear_refmod(pn, VM_MEM_MODIFIED | VM_MEM_REFERENCED);
15561 pmap_test_read_write(pmap, ro_base, false, false);
15562 pmap_test_read_write(pmap, rw_base, false, false);
15563
15564 T_LOG("Read fault; expect !ref/!mod -> ref/!mod, read access");
15565 arm_fast_fault(pmap, rw_base, VM_PROT_READ, false, false);
15566 pmap_test_check_refmod(pa, VM_MEM_REFERENCED);
15567 pmap_test_read_write(pmap, ro_base, true, false);
15568 pmap_test_read_write(pmap, rw_base, true, false);
15569
15570 T_LOG("Write fault; expect ref/!mod -> ref/mod, read and write access");
15571 arm_fast_fault(pmap, rw_base, VM_PROT_READ | VM_PROT_WRITE, false, false);
15572 pmap_test_check_refmod(pa, VM_MEM_REFERENCED | VM_MEM_MODIFIED);
15573 pmap_test_read_write(pmap, ro_base, true, false);
15574 pmap_test_read_write(pmap, rw_base, true, true);
15575
15576 T_LOG("Write fault; expect !ref/!mod -> ref/mod, read and write access");
15577 pmap_clear_refmod(pn, VM_MEM_MODIFIED | VM_MEM_REFERENCED);
15578 arm_fast_fault(pmap, rw_base, VM_PROT_READ | VM_PROT_WRITE, false, false);
15579 pmap_test_check_refmod(pa, VM_MEM_REFERENCED | VM_MEM_MODIFIED);
15580 pmap_test_read_write(pmap, ro_base, true, false);
15581 pmap_test_read_write(pmap, rw_base, true, true);
15582
15583 T_LOG("RW protect both mappings; should not change protections.");
15584 pmap_protect(pmap, ro_base, ro_base + pmap_page_size, VM_PROT_READ | VM_PROT_WRITE);
15585 pmap_protect(pmap, rw_base, rw_base + pmap_page_size, VM_PROT_READ | VM_PROT_WRITE);
15586 pmap_test_read_write(pmap, ro_base, true, false);
15587 pmap_test_read_write(pmap, rw_base, true, true);
15588
15589 T_LOG("Read protect both mappings; RW mapping should become RO.");
15590 pmap_protect(pmap, ro_base, ro_base + pmap_page_size, VM_PROT_READ);
15591 pmap_protect(pmap, rw_base, rw_base + pmap_page_size, VM_PROT_READ);
15592 pmap_test_read_write(pmap, ro_base, true, false);
15593 pmap_test_read_write(pmap, rw_base, true, false);
15594
15595 T_LOG("RW protect the page; mappings should not change protections.");
15596 pmap_enter_addr(pmap, rw_base, pa, VM_PROT_READ | VM_PROT_WRITE, VM_PROT_READ | VM_PROT_WRITE, 0, false);
15597 pmap_page_protect(pn, VM_PROT_ALL);
15598 pmap_test_read_write(pmap, ro_base, true, false);
15599 pmap_test_read_write(pmap, rw_base, true, true);
15600
15601 T_LOG("Read protect the page; RW mapping should become RO.");
15602 pmap_page_protect(pn, VM_PROT_READ);
15603 pmap_test_read_write(pmap, ro_base, true, false);
15604 pmap_test_read_write(pmap, rw_base, true, false);
15605
15606 T_LOG("Validate that disconnect removes all known mappings of the page.");
15607 pmap_disconnect(pn);
15608 if (!pmap_verify_free(pn)) {
15609 T_FAIL("Page still has mappings");
15610 }
15611
15612 T_LOG("Remove the wired mapping, so we can tear down the test map.");
15613 pmap_remove(pmap, wired_va_base, wired_va_base + pmap_page_size);
15614 pmap_destroy(pmap);
15615
15616 T_LOG("Release the pages back to the VM.");
15617 vm_page_lock_queues();
15618 vm_page_free(unwired_vm_page);
15619 vm_page_free(wired_vm_page);
15620 vm_page_unlock_queues();
15621
15622 T_LOG("Testing successful!");
15623 return 0;
15624 }
15625 #endif /* __arm64__ */
15626
15627 kern_return_t
15628 pmap_test(void)
15629 {
15630 T_LOG("Starting pmap_tests");
15631 #ifdef __arm64__
15632 int flags = 0;
15633 flags |= PMAP_CREATE_64BIT;
15634
15635 #if __ARM_MIXED_PAGE_SIZE__
15636 T_LOG("Testing VM_PAGE_SIZE_4KB");
15637 pmap_test_test_config(flags | PMAP_CREATE_FORCE_4K_PAGES);
15638 T_LOG("Testing VM_PAGE_SIZE_16KB");
15639 pmap_test_test_config(flags);
15640 #else /* __ARM_MIXED_PAGE_SIZE__ */
15641 pmap_test_test_config(flags);
15642 #endif /* __ARM_MIXED_PAGE_SIZE__ */
15643
15644 #endif /* __arm64__ */
15645 T_PASS("completed pmap_test successfully");
15646 return KERN_SUCCESS;
15647 }
15648 #endif /* CONFIG_XNUPOST */
15649
15650 /*
15651 * The following function should never make it to RELEASE code, since
15652 * it provides a way to get the PPL to modify text pages.
15653 */
15654 #if DEVELOPMENT || DEBUG
15655
15656 #define ARM_UNDEFINED_INSN 0xe7f000f0
15657 #define ARM_UNDEFINED_INSN_THUMB 0xde00
15658
15659 /**
15660 * Forcibly overwrite executable text with an illegal instruction.
15661 *
15662 * @note Only used for xnu unit testing.
15663 *
15664 * @param pa The physical address to corrupt.
15665 *
15666 * @return KERN_SUCCESS on success.
15667 */
15668 kern_return_t
15669 pmap_test_text_corruption(pmap_paddr_t pa)
15670 {
15671 #if XNU_MONITOR
15672 return pmap_test_text_corruption_ppl(pa);
15673 #else /* XNU_MONITOR */
15674 return pmap_test_text_corruption_internal(pa);
15675 #endif /* XNU_MONITOR */
15676 }
15677
15678 MARK_AS_PMAP_TEXT kern_return_t
15679 pmap_test_text_corruption_internal(pmap_paddr_t pa)
15680 {
15681 vm_offset_t va = phystokv(pa);
15682 unsigned int pai = pa_index(pa);
15683
15684 assert(pa_valid(pa));
15685
15686 LOCK_PVH(pai);
15687
15688 pv_entry_t **pv_h = pai_to_pvh(pai);
15689 assert(!pvh_test_type(pv_h, PVH_TYPE_NULL));
15690 #if defined(PVH_FLAG_EXEC)
15691 const bool need_ap_twiddle = pvh_get_flags(pv_h) & PVH_FLAG_EXEC;
15692
15693 if (need_ap_twiddle) {
15694 pmap_set_ptov_ap(pai, AP_RWNA, FALSE);
15695 }
15696 #endif /* defined(PVH_FLAG_EXEC) */
15697
15698 /*
15699 * The low bit in an instruction address indicates a THUMB instruction
15700 */
15701 if (va & 1) {
15702 va &= ~(vm_offset_t)1;
15703 *(uint16_t *)va = ARM_UNDEFINED_INSN_THUMB;
15704 } else {
15705 *(uint32_t *)va = ARM_UNDEFINED_INSN;
15706 }
15707
15708 #if defined(PVH_FLAG_EXEC)
15709 if (need_ap_twiddle) {
15710 pmap_set_ptov_ap(pai, AP_RONA, FALSE);
15711 }
15712 #endif /* defined(PVH_FLAG_EXEC) */
15713
15714 InvalidatePoU_IcacheRegion(va, sizeof(uint32_t));
15715
15716 UNLOCK_PVH(pai);
15717
15718 return KERN_SUCCESS;
15719 }
15720
15721 #endif /* DEVELOPMENT || DEBUG */