]> git.saurik.com Git - apple/xnu.git/blob - osfmk/arm/pmap.c
f40eeb88c51a76757a13b321df5819ab0941d6bf
[apple/xnu.git] / osfmk / arm / pmap.c
1 /*
2 * Copyright (c) 2011-2019 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28 #include <string.h>
29 #include <mach_assert.h>
30 #include <mach_ldebug.h>
31
32 #include <mach/shared_region.h>
33 #include <mach/vm_param.h>
34 #include <mach/vm_prot.h>
35 #include <mach/vm_map.h>
36 #include <mach/machine/vm_param.h>
37 #include <mach/machine/vm_types.h>
38
39 #include <mach/boolean.h>
40 #include <kern/bits.h>
41 #include <kern/thread.h>
42 #include <kern/sched.h>
43 #include <kern/zalloc.h>
44 #include <kern/kalloc.h>
45 #include <kern/ledger.h>
46 #include <kern/spl.h>
47 #include <kern/trustcache.h>
48
49 #include <os/overflow.h>
50
51 #include <vm/pmap.h>
52 #include <vm/vm_map.h>
53 #include <vm/vm_kern.h>
54 #include <vm/vm_protos.h>
55 #include <vm/vm_object.h>
56 #include <vm/vm_page.h>
57 #include <vm/vm_pageout.h>
58 #include <vm/cpm.h>
59
60 #include <libkern/img4/interface.h>
61 #include <libkern/section_keywords.h>
62
63 #include <machine/atomic.h>
64 #include <machine/thread.h>
65 #include <machine/lowglobals.h>
66
67 #include <arm/caches_internal.h>
68 #include <arm/cpu_data.h>
69 #include <arm/cpu_data_internal.h>
70 #include <arm/cpu_capabilities.h>
71 #include <arm/cpu_number.h>
72 #include <arm/machine_cpu.h>
73 #include <arm/misc_protos.h>
74 #include <arm/trap.h>
75
76 #if (__ARM_VMSA__ > 7)
77 #include <arm64/proc_reg.h>
78 #include <pexpert/arm64/boot.h>
79 #if CONFIG_PGTRACE
80 #include <stdint.h>
81 #include <arm64/pgtrace.h>
82 #if CONFIG_PGTRACE_NONKEXT
83 #include <arm64/pgtrace_decoder.h>
84 #endif // CONFIG_PGTRACE_NONKEXT
85 #endif
86 #endif
87
88 #include <pexpert/device_tree.h>
89
90 #include <san/kasan.h>
91 #include <sys/cdefs.h>
92
93 #if defined(HAS_APPLE_PAC)
94 #include <ptrauth.h>
95 #endif
96
97 #define PMAP_TT_L0_LEVEL 0x0
98 #define PMAP_TT_L1_LEVEL 0x1
99 #define PMAP_TT_L2_LEVEL 0x2
100 #define PMAP_TT_L3_LEVEL 0x3
101 #if (__ARM_VMSA__ == 7)
102 #define PMAP_TT_MAX_LEVEL PMAP_TT_L2_LEVEL
103 #else
104 #define PMAP_TT_MAX_LEVEL PMAP_TT_L3_LEVEL
105 #endif
106 #define PMAP_TT_LEAF_LEVEL PMAP_TT_MAX_LEVEL
107 #define PMAP_TT_TWIG_LEVEL (PMAP_TT_MAX_LEVEL - 1)
108
109 static bool alloc_asid(pmap_t pmap);
110 static void free_asid(pmap_t pmap);
111 static void flush_mmu_tlb_region_asid_async(vm_offset_t va, unsigned length, pmap_t pmap);
112 static void flush_mmu_tlb_tte_asid_async(vm_offset_t va, pmap_t pmap);
113 static void flush_mmu_tlb_full_asid_async(pmap_t pmap);
114 static pt_entry_t wimg_to_pte(unsigned int wimg);
115
116 struct page_table_ops {
117 bool (*alloc_id)(pmap_t pmap);
118 void (*free_id)(pmap_t pmap);
119 void (*flush_tlb_region_async)(vm_offset_t va, unsigned length, pmap_t pmap);
120 void (*flush_tlb_tte_async)(vm_offset_t va, pmap_t pmap);
121 void (*flush_tlb_async)(pmap_t pmap);
122 pt_entry_t (*wimg_to_pte)(unsigned int wimg);
123 };
124
125 static const struct page_table_ops native_pt_ops =
126 {
127 .alloc_id = alloc_asid,
128 .free_id = free_asid,
129 .flush_tlb_region_async = flush_mmu_tlb_region_asid_async,
130 .flush_tlb_tte_async = flush_mmu_tlb_tte_asid_async,
131 .flush_tlb_async = flush_mmu_tlb_full_asid_async,
132 .wimg_to_pte = wimg_to_pte,
133 };
134
135 #if (__ARM_VMSA__ > 7)
136 const struct page_table_level_info pmap_table_level_info_16k[] =
137 {
138 [0] = {
139 .size = ARM_16K_TT_L0_SIZE,
140 .offmask = ARM_16K_TT_L0_OFFMASK,
141 .shift = ARM_16K_TT_L0_SHIFT,
142 .index_mask = ARM_16K_TT_L0_INDEX_MASK,
143 .valid_mask = ARM_TTE_VALID,
144 .type_mask = ARM_TTE_TYPE_MASK,
145 .type_block = ARM_TTE_TYPE_BLOCK
146 },
147 [1] = {
148 .size = ARM_16K_TT_L1_SIZE,
149 .offmask = ARM_16K_TT_L1_OFFMASK,
150 .shift = ARM_16K_TT_L1_SHIFT,
151 .index_mask = ARM_16K_TT_L1_INDEX_MASK,
152 .valid_mask = ARM_TTE_VALID,
153 .type_mask = ARM_TTE_TYPE_MASK,
154 .type_block = ARM_TTE_TYPE_BLOCK
155 },
156 [2] = {
157 .size = ARM_16K_TT_L2_SIZE,
158 .offmask = ARM_16K_TT_L2_OFFMASK,
159 .shift = ARM_16K_TT_L2_SHIFT,
160 .index_mask = ARM_16K_TT_L2_INDEX_MASK,
161 .valid_mask = ARM_TTE_VALID,
162 .type_mask = ARM_TTE_TYPE_MASK,
163 .type_block = ARM_TTE_TYPE_BLOCK
164 },
165 [3] = {
166 .size = ARM_16K_TT_L3_SIZE,
167 .offmask = ARM_16K_TT_L3_OFFMASK,
168 .shift = ARM_16K_TT_L3_SHIFT,
169 .index_mask = ARM_16K_TT_L3_INDEX_MASK,
170 .valid_mask = ARM_PTE_TYPE_VALID,
171 .type_mask = ARM_PTE_TYPE_MASK,
172 .type_block = ARM_TTE_TYPE_L3BLOCK
173 }
174 };
175
176 const struct page_table_level_info pmap_table_level_info_4k[] =
177 {
178 [0] = {
179 .size = ARM_4K_TT_L0_SIZE,
180 .offmask = ARM_4K_TT_L0_OFFMASK,
181 .shift = ARM_4K_TT_L0_SHIFT,
182 .index_mask = ARM_4K_TT_L0_INDEX_MASK,
183 .valid_mask = ARM_TTE_VALID,
184 .type_mask = ARM_TTE_TYPE_MASK,
185 .type_block = ARM_TTE_TYPE_BLOCK
186 },
187 [1] = {
188 .size = ARM_4K_TT_L1_SIZE,
189 .offmask = ARM_4K_TT_L1_OFFMASK,
190 .shift = ARM_4K_TT_L1_SHIFT,
191 .index_mask = ARM_4K_TT_L1_INDEX_MASK,
192 .valid_mask = ARM_TTE_VALID,
193 .type_mask = ARM_TTE_TYPE_MASK,
194 .type_block = ARM_TTE_TYPE_BLOCK
195 },
196 [2] = {
197 .size = ARM_4K_TT_L2_SIZE,
198 .offmask = ARM_4K_TT_L2_OFFMASK,
199 .shift = ARM_4K_TT_L2_SHIFT,
200 .index_mask = ARM_4K_TT_L2_INDEX_MASK,
201 .valid_mask = ARM_TTE_VALID,
202 .type_mask = ARM_TTE_TYPE_MASK,
203 .type_block = ARM_TTE_TYPE_BLOCK
204 },
205 [3] = {
206 .size = ARM_4K_TT_L3_SIZE,
207 .offmask = ARM_4K_TT_L3_OFFMASK,
208 .shift = ARM_4K_TT_L3_SHIFT,
209 .index_mask = ARM_4K_TT_L3_INDEX_MASK,
210 .valid_mask = ARM_PTE_TYPE_VALID,
211 .type_mask = ARM_PTE_TYPE_MASK,
212 .type_block = ARM_TTE_TYPE_L3BLOCK
213 }
214 };
215
216 struct page_table_attr {
217 const struct page_table_level_info * const pta_level_info;
218 const struct page_table_ops * const pta_ops;
219 const uintptr_t ap_ro;
220 const uintptr_t ap_rw;
221 const uintptr_t ap_rona;
222 const uintptr_t ap_rwna;
223 const uintptr_t ap_xn;
224 const uintptr_t ap_x;
225 const unsigned int pta_root_level;
226 const unsigned int pta_max_level;
227 };
228
229 const struct page_table_attr pmap_pt_attr_4k = {
230 .pta_level_info = pmap_table_level_info_4k,
231 .pta_root_level = PMAP_TT_L1_LEVEL,
232 .pta_max_level = PMAP_TT_L3_LEVEL,
233 .pta_ops = &native_pt_ops,
234 .ap_ro = ARM_PTE_AP(AP_RORO),
235 .ap_rw = ARM_PTE_AP(AP_RWRW),
236 .ap_rona = ARM_PTE_AP(AP_RONA),
237 .ap_rwna = ARM_PTE_AP(AP_RWNA),
238 .ap_xn = ARM_PTE_PNX | ARM_PTE_NX,
239 .ap_x = ARM_PTE_PNX,
240 };
241
242 const struct page_table_attr pmap_pt_attr_16k = {
243 .pta_level_info = pmap_table_level_info_16k,
244 .pta_root_level = PMAP_TT_L1_LEVEL,
245 .pta_max_level = PMAP_TT_L3_LEVEL,
246 .pta_ops = &native_pt_ops,
247 .ap_ro = ARM_PTE_AP(AP_RORO),
248 .ap_rw = ARM_PTE_AP(AP_RWRW),
249 .ap_rona = ARM_PTE_AP(AP_RONA),
250 .ap_rwna = ARM_PTE_AP(AP_RWNA),
251 .ap_xn = ARM_PTE_PNX | ARM_PTE_NX,
252 .ap_x = ARM_PTE_PNX,
253 };
254
255 #if __ARM_16K_PG__
256 const struct page_table_attr * const native_pt_attr = &pmap_pt_attr_16k;
257 #else /* !__ARM_16K_PG__ */
258 const struct page_table_attr * const native_pt_attr = &pmap_pt_attr_4k;
259 #endif /* !__ARM_16K_PG__ */
260
261
262 #else /* (__ARM_VMSA__ > 7) */
263 /*
264 * We don't support pmap parameterization for VMSA7, so use an opaque
265 * page_table_attr structure.
266 */
267 const struct page_table_attr * const native_pt_attr = NULL;
268 #endif /* (__ARM_VMSA__ > 7) */
269
270 typedef struct page_table_attr pt_attr_t;
271
272 /* Macro for getting pmap attributes; not a function for const propagation. */
273 #if ARM_PARAMETERIZED_PMAP
274 /* The page table attributes are linked to the pmap */
275 #define pmap_get_pt_attr(pmap) ((pmap)->pmap_pt_attr)
276 #define pmap_get_pt_ops(pmap) ((pmap)->pmap_pt_attr->pta_ops)
277 #else /* !ARM_PARAMETERIZED_PMAP */
278 /* The page table attributes are fixed (to allow for const propagation) */
279 #define pmap_get_pt_attr(pmap) (native_pt_attr)
280 #define pmap_get_pt_ops(pmap) (&native_pt_ops)
281 #endif /* !ARM_PARAMETERIZED_PMAP */
282
283 #if (__ARM_VMSA__ > 7)
284 static inline uint64_t
285 pt_attr_ln_size(const pt_attr_t * const pt_attr, unsigned int level)
286 {
287 return pt_attr->pta_level_info[level].size;
288 }
289
290 __unused static inline uint64_t
291 pt_attr_ln_shift(const pt_attr_t * const pt_attr, unsigned int level)
292 {
293 return pt_attr->pta_level_info[level].shift;
294 }
295
296 __unused static inline uint64_t
297 pt_attr_ln_offmask(const pt_attr_t * const pt_attr, unsigned int level)
298 {
299 return pt_attr->pta_level_info[level].offmask;
300 }
301
302 static inline unsigned int
303 pt_attr_twig_level(const pt_attr_t * const pt_attr)
304 {
305 return pt_attr->pta_max_level - 1;
306 }
307
308 static inline unsigned int
309 pt_attr_root_level(const pt_attr_t * const pt_attr)
310 {
311 return pt_attr->pta_root_level;
312 }
313
314 static __unused inline uint64_t
315 pt_attr_leaf_size(const pt_attr_t * const pt_attr)
316 {
317 return pt_attr->pta_level_info[pt_attr->pta_max_level].size;
318 }
319
320 static __unused inline uint64_t
321 pt_attr_leaf_offmask(const pt_attr_t * const pt_attr)
322 {
323 return pt_attr->pta_level_info[pt_attr->pta_max_level].offmask;
324 }
325
326 static inline uint64_t
327 pt_attr_leaf_shift(const pt_attr_t * const pt_attr)
328 {
329 return pt_attr->pta_level_info[pt_attr->pta_max_level].shift;
330 }
331
332 static __unused inline uint64_t
333 pt_attr_leaf_index_mask(const pt_attr_t * const pt_attr)
334 {
335 return pt_attr->pta_level_info[pt_attr->pta_max_level].index_mask;
336 }
337
338 static inline uint64_t
339 pt_attr_twig_size(const pt_attr_t * const pt_attr)
340 {
341 return pt_attr->pta_level_info[pt_attr->pta_max_level - 1].size;
342 }
343
344 static inline uint64_t
345 pt_attr_twig_offmask(const pt_attr_t * const pt_attr)
346 {
347 return pt_attr->pta_level_info[pt_attr->pta_max_level - 1].offmask;
348 }
349
350 static inline uint64_t
351 pt_attr_twig_shift(const pt_attr_t * const pt_attr)
352 {
353 return pt_attr->pta_level_info[pt_attr->pta_max_level - 1].shift;
354 }
355
356 static __unused inline uint64_t
357 pt_attr_twig_index_mask(const pt_attr_t * const pt_attr)
358 {
359 return pt_attr->pta_level_info[pt_attr->pta_max_level - 1].index_mask;
360 }
361
362 static inline uint64_t
363 pt_attr_leaf_table_size(const pt_attr_t * const pt_attr)
364 {
365 return pt_attr_twig_size(pt_attr);
366 }
367
368 static inline uint64_t
369 pt_attr_leaf_table_offmask(const pt_attr_t * const pt_attr)
370 {
371 return pt_attr_twig_offmask(pt_attr);
372 }
373
374 static inline uintptr_t
375 pt_attr_leaf_rw(const pt_attr_t * const pt_attr)
376 {
377 return pt_attr->ap_rw;
378 }
379
380 static inline uintptr_t
381 pt_attr_leaf_ro(const pt_attr_t * const pt_attr)
382 {
383 return pt_attr->ap_ro;
384 }
385
386 static inline uintptr_t
387 pt_attr_leaf_rona(const pt_attr_t * const pt_attr)
388 {
389 return pt_attr->ap_rona;
390 }
391
392 static inline uintptr_t
393 pt_attr_leaf_rwna(const pt_attr_t * const pt_attr)
394 {
395 return pt_attr->ap_rwna;
396 }
397
398 static inline uintptr_t
399 pt_attr_leaf_xn(const pt_attr_t * const pt_attr)
400 {
401 return pt_attr->ap_xn;
402 }
403
404 static inline uintptr_t
405 pt_attr_leaf_x(const pt_attr_t * const pt_attr)
406 {
407 return pt_attr->ap_x;
408 }
409
410 #else /* (__ARM_VMSA__ > 7) */
411
412 static inline unsigned int
413 pt_attr_twig_level(__unused const pt_attr_t * const pt_attr)
414 {
415 return PMAP_TT_L1_LEVEL;
416 }
417
418 static inline uint64_t
419 pt_attr_twig_size(__unused const pt_attr_t * const pt_attr)
420 {
421 return ARM_TT_TWIG_SIZE;
422 }
423
424 static inline uint64_t
425 pt_attr_twig_offmask(__unused const pt_attr_t * const pt_attr)
426 {
427 return ARM_TT_TWIG_OFFMASK;
428 }
429
430 static inline uint64_t
431 pt_attr_twig_shift(__unused const pt_attr_t * const pt_attr)
432 {
433 return ARM_TT_TWIG_SHIFT;
434 }
435
436 static __unused inline uint64_t
437 pt_attr_twig_index_mask(__unused const pt_attr_t * const pt_attr)
438 {
439 return ARM_TT_TWIG_INDEX_MASK;
440 }
441
442 __unused static inline uint64_t
443 pt_attr_leaf_size(__unused const pt_attr_t * const pt_attr)
444 {
445 return ARM_TT_LEAF_SIZE;
446 }
447
448 __unused static inline uint64_t
449 pt_attr_leaf_offmask(__unused const pt_attr_t * const pt_attr)
450 {
451 return ARM_TT_LEAF_OFFMASK;
452 }
453
454 static inline uint64_t
455 pt_attr_leaf_shift(__unused const pt_attr_t * const pt_attr)
456 {
457 return ARM_TT_LEAF_SHIFT;
458 }
459
460 static __unused inline uint64_t
461 pt_attr_leaf_index_mask(__unused const pt_attr_t * const pt_attr)
462 {
463 return ARM_TT_LEAF_INDEX_MASK;
464 }
465
466 static inline uint64_t
467 pt_attr_leaf_table_size(__unused const pt_attr_t * const pt_attr)
468 {
469 return ARM_TT_L1_PT_SIZE;
470 }
471
472 static inline uint64_t
473 pt_attr_leaf_table_offmask(__unused const pt_attr_t * const pt_attr)
474 {
475 return ARM_TT_L1_PT_OFFMASK;
476 }
477
478 static inline uintptr_t
479 pt_attr_leaf_rw(__unused const pt_attr_t * const pt_attr)
480 {
481 return ARM_PTE_AP(AP_RWRW);
482 }
483
484 static inline uintptr_t
485 pt_attr_leaf_ro(__unused const pt_attr_t * const pt_attr)
486 {
487 return ARM_PTE_AP(AP_RORO);
488 }
489
490 static inline uintptr_t
491 pt_attr_leaf_rona(__unused const pt_attr_t * const pt_attr)
492 {
493 return ARM_PTE_AP(AP_RONA);
494 }
495
496 static inline uintptr_t
497 pt_attr_leaf_rwna(__unused const pt_attr_t * const pt_attr)
498 {
499 return ARM_PTE_AP(AP_RWNA);
500 }
501
502 static inline uintptr_t
503 pt_attr_leaf_xn(__unused const pt_attr_t * const pt_attr)
504 {
505 return ARM_PTE_NX;
506 }
507
508 #endif /* (__ARM_VMSA__ > 7) */
509
510 static inline void
511 pmap_sync_tlb(bool strong __unused)
512 {
513 sync_tlb_flush();
514 }
515
516 #if MACH_ASSERT
517 int vm_footprint_suspend_allowed = 1;
518
519 extern int pmap_ledgers_panic;
520 extern int pmap_ledgers_panic_leeway;
521
522 int pmap_stats_assert = 1;
523 #define PMAP_STATS_ASSERTF(cond, pmap, fmt, ...) \
524 MACRO_BEGIN \
525 if (pmap_stats_assert && (pmap)->pmap_stats_assert) \
526 assertf(cond, fmt, ##__VA_ARGS__); \
527 MACRO_END
528 #else /* MACH_ASSERT */
529 #define PMAP_STATS_ASSERTF(cond, pmap, fmt, ...)
530 #endif /* MACH_ASSERT */
531
532 #if DEVELOPMENT || DEBUG
533 #define PMAP_FOOTPRINT_SUSPENDED(pmap) \
534 (current_thread()->pmap_footprint_suspended)
535 #else /* DEVELOPMENT || DEBUG */
536 #define PMAP_FOOTPRINT_SUSPENDED(pmap) (FALSE)
537 #endif /* DEVELOPMENT || DEBUG */
538
539
540
541 #define pmap_ledger_debit(p, e, a) ledger_debit((p)->ledger, e, a)
542 #define pmap_ledger_credit(p, e, a) ledger_credit((p)->ledger, e, a)
543
544
545 #if DEVELOPMENT || DEBUG
546 int panic_on_unsigned_execute = 0;
547 #endif /* DEVELOPMENT || DEBUG */
548
549
550 /* Virtual memory region for early allocation */
551 #if (__ARM_VMSA__ == 7)
552 #define VREGION1_HIGH_WINDOW (0)
553 #else
554 #define VREGION1_HIGH_WINDOW (PE_EARLY_BOOT_VA)
555 #endif
556 #define VREGION1_START ((VM_MAX_KERNEL_ADDRESS & CPUWINDOWS_BASE_MASK) - VREGION1_HIGH_WINDOW)
557 #define VREGION1_SIZE (trunc_page(VM_MAX_KERNEL_ADDRESS - (VREGION1_START)))
558
559 extern unsigned int not_in_kdp;
560
561 extern vm_offset_t first_avail;
562
563 extern pmap_paddr_t avail_start;
564 extern pmap_paddr_t avail_end;
565
566 extern vm_offset_t virtual_space_start; /* Next available kernel VA */
567 extern vm_offset_t virtual_space_end; /* End of kernel address space */
568 extern vm_offset_t static_memory_end;
569
570 extern int maxproc, hard_maxproc;
571
572 #if (__ARM_VMSA__ > 7)
573 /* The number of address bits one TTBR can cover. */
574 #define PGTABLE_ADDR_BITS (64ULL - T0SZ_BOOT)
575
576 /*
577 * The bounds on our TTBRs. These are for sanity checking that
578 * an address is accessible by a TTBR before we attempt to map it.
579 */
580 #define ARM64_TTBR0_MIN_ADDR (0ULL)
581 #define ARM64_TTBR0_MAX_ADDR (0ULL + (1ULL << PGTABLE_ADDR_BITS) - 1)
582 #define ARM64_TTBR1_MIN_ADDR (0ULL - (1ULL << PGTABLE_ADDR_BITS))
583 #define ARM64_TTBR1_MAX_ADDR (~0ULL)
584
585 /* The level of the root of a page table. */
586 const uint64_t arm64_root_pgtable_level = (3 - ((PGTABLE_ADDR_BITS - 1 - ARM_PGSHIFT) / (ARM_PGSHIFT - TTE_SHIFT)));
587
588 /* The number of entries in the root TT of a page table. */
589 const uint64_t arm64_root_pgtable_num_ttes = (2 << ((PGTABLE_ADDR_BITS - 1 - ARM_PGSHIFT) % (ARM_PGSHIFT - TTE_SHIFT)));
590 #else
591 const uint64_t arm64_root_pgtable_level = 0;
592 const uint64_t arm64_root_pgtable_num_ttes = 0;
593 #endif
594
595 struct pmap kernel_pmap_store MARK_AS_PMAP_DATA;
596 SECURITY_READ_ONLY_LATE(pmap_t) kernel_pmap = &kernel_pmap_store;
597
598 struct vm_object pmap_object_store __attribute__((aligned(VM_PACKED_POINTER_ALIGNMENT))); /* store pt pages */
599 vm_object_t pmap_object = &pmap_object_store;
600
601 static struct zone *pmap_zone; /* zone of pmap structures */
602
603 decl_simple_lock_data(, pmaps_lock MARK_AS_PMAP_DATA);
604 decl_simple_lock_data(, tt1_lock MARK_AS_PMAP_DATA);
605 unsigned int pmap_stamp MARK_AS_PMAP_DATA;
606 queue_head_t map_pmap_list MARK_AS_PMAP_DATA;
607
608 decl_simple_lock_data(, pt_pages_lock MARK_AS_PMAP_DATA);
609 queue_head_t pt_page_list MARK_AS_PMAP_DATA; /* pt page ptd entries list */
610
611 decl_simple_lock_data(, pmap_pages_lock MARK_AS_PMAP_DATA);
612
613 typedef struct page_free_entry {
614 struct page_free_entry *next;
615 } page_free_entry_t;
616
617 #define PAGE_FREE_ENTRY_NULL ((page_free_entry_t *) 0)
618
619 page_free_entry_t *pmap_pages_reclaim_list MARK_AS_PMAP_DATA; /* Reclaimed pt page list */
620 unsigned int pmap_pages_request_count MARK_AS_PMAP_DATA; /* Pending requests to reclaim pt page */
621 unsigned long long pmap_pages_request_acum MARK_AS_PMAP_DATA;
622
623
624 typedef struct tt_free_entry {
625 struct tt_free_entry *next;
626 } tt_free_entry_t;
627
628 #define TT_FREE_ENTRY_NULL ((tt_free_entry_t *) 0)
629
630 tt_free_entry_t *free_page_size_tt_list MARK_AS_PMAP_DATA;
631 unsigned int free_page_size_tt_count MARK_AS_PMAP_DATA;
632 unsigned int free_page_size_tt_max MARK_AS_PMAP_DATA;
633 #define FREE_PAGE_SIZE_TT_MAX 4
634 tt_free_entry_t *free_two_page_size_tt_list MARK_AS_PMAP_DATA;
635 unsigned int free_two_page_size_tt_count MARK_AS_PMAP_DATA;
636 unsigned int free_two_page_size_tt_max MARK_AS_PMAP_DATA;
637 #define FREE_TWO_PAGE_SIZE_TT_MAX 4
638 tt_free_entry_t *free_tt_list MARK_AS_PMAP_DATA;
639 unsigned int free_tt_count MARK_AS_PMAP_DATA;
640 unsigned int free_tt_max MARK_AS_PMAP_DATA;
641
642 #define TT_FREE_ENTRY_NULL ((tt_free_entry_t *) 0)
643
644 boolean_t pmap_gc_allowed MARK_AS_PMAP_DATA = TRUE;
645 boolean_t pmap_gc_forced MARK_AS_PMAP_DATA = FALSE;
646 boolean_t pmap_gc_allowed_by_time_throttle = TRUE;
647
648 unsigned int inuse_user_ttepages_count MARK_AS_PMAP_DATA = 0; /* non-root, non-leaf user pagetable pages, in units of PAGE_SIZE */
649 unsigned int inuse_user_ptepages_count MARK_AS_PMAP_DATA = 0; /* leaf user pagetable pages, in units of PAGE_SIZE */
650 unsigned int inuse_user_tteroot_count MARK_AS_PMAP_DATA = 0; /* root user pagetables, in units of PMAP_ROOT_ALLOC_SIZE */
651 unsigned int inuse_kernel_ttepages_count MARK_AS_PMAP_DATA = 0; /* non-root, non-leaf kernel pagetable pages, in units of PAGE_SIZE */
652 unsigned int inuse_kernel_ptepages_count MARK_AS_PMAP_DATA = 0; /* leaf kernel pagetable pages, in units of PAGE_SIZE */
653 unsigned int inuse_kernel_tteroot_count MARK_AS_PMAP_DATA = 0; /* root kernel pagetables, in units of PMAP_ROOT_ALLOC_SIZE */
654 unsigned int inuse_pmap_pages_count = 0; /* debugging */
655
656 SECURITY_READ_ONLY_LATE(tt_entry_t *) invalid_tte = 0;
657 SECURITY_READ_ONLY_LATE(pmap_paddr_t) invalid_ttep = 0;
658
659 SECURITY_READ_ONLY_LATE(tt_entry_t *) cpu_tte = 0; /* set by arm_vm_init() - keep out of bss */
660 SECURITY_READ_ONLY_LATE(pmap_paddr_t) cpu_ttep = 0; /* set by arm_vm_init() - phys tte addr */
661
662 #if DEVELOPMENT || DEBUG
663 int nx_enabled = 1; /* enable no-execute protection */
664 int allow_data_exec = 0; /* No apps may execute data */
665 int allow_stack_exec = 0; /* No apps may execute from the stack */
666 unsigned long pmap_asid_flushes MARK_AS_PMAP_DATA = 0;
667 #else /* DEVELOPMENT || DEBUG */
668 const int nx_enabled = 1; /* enable no-execute protection */
669 const int allow_data_exec = 0; /* No apps may execute data */
670 const int allow_stack_exec = 0; /* No apps may execute from the stack */
671 #endif /* DEVELOPMENT || DEBUG */
672
673 /*
674 * pv_entry_t - structure to track the active mappings for a given page
675 */
676 typedef struct pv_entry {
677 struct pv_entry *pve_next; /* next alias */
678 pt_entry_t *pve_ptep; /* page table entry */
679 }
680 #if __arm__ && (__BIGGEST_ALIGNMENT__ > 4)
681 /* For the newer ARMv7k ABI where 64-bit types are 64-bit aligned, but pointers
682 * are 32-bit:
683 * Since pt_desc is 64-bit aligned and we cast often from pv_entry to
684 * pt_desc.
685 */
686 __attribute__ ((aligned(8))) pv_entry_t;
687 #else
688 pv_entry_t;
689 #endif
690
691 #define PV_ENTRY_NULL ((pv_entry_t *) 0)
692
693 /*
694 * PMAP LEDGERS:
695 * We use the least significant bit of the "pve_next" pointer in a "pv_entry"
696 * as a marker for pages mapped through an "alternate accounting" mapping.
697 * These macros set, clear and test for this marker and extract the actual
698 * value of the "pve_next" pointer.
699 */
700 #define PVE_NEXT_ALTACCT ((uintptr_t) 0x1)
701 #define PVE_NEXT_SET_ALTACCT(pve_next_p) \
702 *(pve_next_p) = (struct pv_entry *) (((uintptr_t) *(pve_next_p)) | \
703 PVE_NEXT_ALTACCT)
704 #define PVE_NEXT_CLR_ALTACCT(pve_next_p) \
705 *(pve_next_p) = (struct pv_entry *) (((uintptr_t) *(pve_next_p)) & \
706 ~PVE_NEXT_ALTACCT)
707 #define PVE_NEXT_IS_ALTACCT(pve_next) \
708 ((((uintptr_t) (pve_next)) & PVE_NEXT_ALTACCT) ? TRUE : FALSE)
709 #define PVE_NEXT_PTR(pve_next) \
710 ((struct pv_entry *)(((uintptr_t) (pve_next)) & \
711 ~PVE_NEXT_ALTACCT))
712 #if MACH_ASSERT
713 static void pmap_check_ledgers(pmap_t pmap);
714 #else
715 static inline void
716 pmap_check_ledgers(__unused pmap_t pmap)
717 {
718 }
719 #endif /* MACH_ASSERT */
720
721 SECURITY_READ_ONLY_LATE(pv_entry_t * *) pv_head_table; /* array of pv entry pointers */
722
723 pv_entry_t *pv_free_list MARK_AS_PMAP_DATA;
724 pv_entry_t *pv_kern_free_list MARK_AS_PMAP_DATA;
725 decl_simple_lock_data(, pv_free_list_lock MARK_AS_PMAP_DATA);
726 decl_simple_lock_data(, pv_kern_free_list_lock MARK_AS_PMAP_DATA);
727
728 decl_simple_lock_data(, phys_backup_lock);
729
730 /*
731 * pt_desc - structure to keep info on page assigned to page tables
732 */
733 #if (__ARM_VMSA__ == 7)
734 #define PT_INDEX_MAX 1
735 #else
736 #if (ARM_PGSHIFT == 14)
737 #define PT_INDEX_MAX 1
738 #else
739 #define PT_INDEX_MAX 4
740 #endif
741 #endif
742
743 #define PT_DESC_REFCOUNT 0x4000U
744 #define PT_DESC_IOMMU_REFCOUNT 0x8000U
745
746 typedef struct pt_desc {
747 queue_chain_t pt_page;
748 union {
749 struct pmap *pmap;
750 };
751 /*
752 * Locate this struct towards the end of the pt_desc; our long term
753 * goal is to make this a VLA to avoid wasting memory if we don't need
754 * multiple entries.
755 */
756 struct {
757 /*
758 * For non-leaf pagetables, should always be PT_DESC_REFCOUNT
759 * For leaf pagetables, should reflect the number of non-empty PTEs
760 * For IOMMU pages, should always be PT_DESC_IOMMU_REFCOUNT
761 */
762 unsigned short refcnt;
763 /*
764 * For non-leaf pagetables, should be 0
765 * For leaf pagetables, should reflect the number of wired entries
766 * For IOMMU pages, may optionally reflect a driver-defined refcount (IOMMU operations are implicitly wired)
767 */
768 unsigned short wiredcnt;
769 vm_offset_t va;
770 } ptd_info[PT_INDEX_MAX];
771 } pt_desc_t;
772
773
774 #define PTD_ENTRY_NULL ((pt_desc_t *) 0)
775
776 SECURITY_READ_ONLY_LATE(pt_desc_t *) ptd_root_table;
777
778 pt_desc_t *ptd_free_list MARK_AS_PMAP_DATA = PTD_ENTRY_NULL;
779 SECURITY_READ_ONLY_LATE(boolean_t) ptd_preboot = TRUE;
780 unsigned int ptd_free_count MARK_AS_PMAP_DATA = 0;
781 decl_simple_lock_data(, ptd_free_list_lock MARK_AS_PMAP_DATA);
782
783 /*
784 * physical page attribute
785 */
786 typedef u_int16_t pp_attr_t;
787
788 #define PP_ATTR_WIMG_MASK 0x003F
789 #define PP_ATTR_WIMG(x) ((x) & PP_ATTR_WIMG_MASK)
790
791 #define PP_ATTR_REFERENCED 0x0040
792 #define PP_ATTR_MODIFIED 0x0080
793
794 #define PP_ATTR_INTERNAL 0x0100
795 #define PP_ATTR_REUSABLE 0x0200
796 #define PP_ATTR_ALTACCT 0x0400
797 #define PP_ATTR_NOENCRYPT 0x0800
798
799 #define PP_ATTR_REFFAULT 0x1000
800 #define PP_ATTR_MODFAULT 0x2000
801
802
803 SECURITY_READ_ONLY_LATE(pp_attr_t*) pp_attr_table;
804
805 typedef struct pmap_io_range {
806 uint64_t addr;
807 uint64_t len;
808 #define PMAP_IO_RANGE_STRONG_SYNC (1UL << 31) // Strong DSB required for pages in this range
809 #define PMAP_IO_RANGE_CARVEOUT (1UL << 30) // Corresponds to memory carved out by bootloader
810 uint32_t wimg; // lower 16 bits treated as pp_attr_t, upper 16 bits contain additional mapping flags
811 uint32_t signature; // 4CC
812 } __attribute__((packed)) pmap_io_range_t;
813
814 SECURITY_READ_ONLY_LATE(pmap_io_range_t*) io_attr_table;
815
816 SECURITY_READ_ONLY_LATE(pmap_paddr_t) vm_first_phys = (pmap_paddr_t) 0;
817 SECURITY_READ_ONLY_LATE(pmap_paddr_t) vm_last_phys = (pmap_paddr_t) 0;
818
819 SECURITY_READ_ONLY_LATE(unsigned int) num_io_rgns = 0;
820
821 SECURITY_READ_ONLY_LATE(boolean_t) pmap_initialized = FALSE; /* Has pmap_init completed? */
822
823 SECURITY_READ_ONLY_LATE(uint64_t) pmap_nesting_size_min;
824 SECURITY_READ_ONLY_LATE(uint64_t) pmap_nesting_size_max;
825
826 SECURITY_READ_ONLY_LATE(vm_map_offset_t) arm_pmap_max_offset_default = 0x0;
827 #if defined(__arm64__)
828 SECURITY_READ_ONLY_LATE(vm_map_offset_t) arm64_pmap_max_offset_default = 0x0;
829 #endif
830
831 #define PMAP_MAX_SW_ASID ((MAX_ASID + MAX_HW_ASID - 1) / MAX_HW_ASID)
832 _Static_assert(PMAP_MAX_SW_ASID <= (UINT8_MAX + 1),
833 "VASID bits can't be represented by an 8-bit integer");
834
835 decl_simple_lock_data(, asid_lock MARK_AS_PMAP_DATA);
836 static bitmap_t asid_bitmap[BITMAP_LEN(MAX_ASID)] MARK_AS_PMAP_DATA;
837
838
839 #if (__ARM_VMSA__ > 7)
840 SECURITY_READ_ONLY_LATE(pmap_t) sharedpage_pmap;
841 #endif
842
843
844 #define pa_index(pa) \
845 (atop((pa) - vm_first_phys))
846
847 #define pai_to_pvh(pai) \
848 (&pv_head_table[pai])
849
850 #define pa_valid(x) \
851 ((x) >= vm_first_phys && (x) < vm_last_phys)
852
853 /* PTE Define Macros */
854
855 #define pte_is_wired(pte) \
856 (((pte) & ARM_PTE_WIRED_MASK) == ARM_PTE_WIRED)
857
858 #define pte_set_wired(ptep, wired) \
859 do { \
860 SInt16 *ptd_wiredcnt_ptr; \
861 ptd_wiredcnt_ptr = (SInt16 *)&(ptep_get_ptd(ptep)->ptd_info[ARM_PT_DESC_INDEX(ptep)].wiredcnt); \
862 if (wired) { \
863 *ptep |= ARM_PTE_WIRED; \
864 OSAddAtomic16(1, ptd_wiredcnt_ptr); \
865 } else { \
866 *ptep &= ~ARM_PTE_WIRED; \
867 OSAddAtomic16(-1, ptd_wiredcnt_ptr); \
868 } \
869 } while(0)
870
871 #define pte_was_writeable(pte) \
872 (((pte) & ARM_PTE_WRITEABLE) == ARM_PTE_WRITEABLE)
873
874 #define pte_set_was_writeable(pte, was_writeable) \
875 do { \
876 if ((was_writeable)) { \
877 (pte) |= ARM_PTE_WRITEABLE; \
878 } else { \
879 (pte) &= ~ARM_PTE_WRITEABLE; \
880 } \
881 } while(0)
882
883 /* PVE Define Macros */
884
885 #define pve_next(pve) \
886 ((pve)->pve_next)
887
888 #define pve_link_field(pve) \
889 (&pve_next(pve))
890
891 #define pve_link(pp, e) \
892 ((pve_next(e) = pve_next(pp)), (pve_next(pp) = (e)))
893
894 #define pve_unlink(pp, e) \
895 (pve_next(pp) = pve_next(e))
896
897 /* bits held in the ptep pointer field */
898
899 #define pve_get_ptep(pve) \
900 ((pve)->pve_ptep)
901
902 #define pve_set_ptep(pve, ptep_new) \
903 do { \
904 (pve)->pve_ptep = (ptep_new); \
905 } while (0)
906
907 /* PTEP Define Macros */
908
909 /* mask for page descriptor index */
910 #define ARM_TT_PT_INDEX_MASK ARM_PGMASK
911
912 #if (__ARM_VMSA__ == 7)
913 #define ARM_PT_DESC_INDEX_MASK 0x00000
914 #define ARM_PT_DESC_INDEX_SHIFT 0
915
916 /*
917 * Shift value used for reconstructing the virtual address for a PTE.
918 */
919 #define ARM_TT_PT_ADDR_SHIFT (10U)
920
921 #define ptep_get_va(ptep) \
922 ((((pt_desc_t *) (pvh_list(pai_to_pvh(pa_index(ml_static_vtop((((vm_offset_t)(ptep) & ~ARM_PGMASK))))))))->ptd_info[ARM_PT_DESC_INDEX(ptep)].va)+ ((((unsigned)(ptep)) & ARM_TT_PT_INDEX_MASK)<<ARM_TT_PT_ADDR_SHIFT))
923
924 #define ptep_get_pmap(ptep) \
925 ((((pt_desc_t *) (pvh_list(pai_to_pvh(pa_index(ml_static_vtop((((vm_offset_t)(ptep) & ~ARM_PGMASK))))))))->pmap))
926
927 #else
928
929 #if (ARM_PGSHIFT == 12)
930 #define ARM_PT_DESC_INDEX_MASK ((PAGE_SHIFT_CONST == ARM_PGSHIFT )? 0x00000ULL : 0x03000ULL)
931 #define ARM_PT_DESC_INDEX_SHIFT ((PAGE_SHIFT_CONST == ARM_PGSHIFT )? 0 : 12)
932 /*
933 * Shift value used for reconstructing the virtual address for a PTE.
934 */
935 #define ARM_TT_PT_ADDR_SHIFT (9ULL)
936 #else
937
938 #define ARM_PT_DESC_INDEX_MASK (0x00000)
939 #define ARM_PT_DESC_INDEX_SHIFT (0)
940 /*
941 * Shift value used for reconstructing the virtual address for a PTE.
942 */
943 #define ARM_TT_PT_ADDR_SHIFT (11ULL)
944 #endif
945
946
947 #define ARM_PT_DESC_INDEX(ptep) \
948 (((unsigned)(ptep) & ARM_PT_DESC_INDEX_MASK) >> ARM_PT_DESC_INDEX_SHIFT)
949
950 #define ptep_get_va(ptep) \
951 ((((pt_desc_t *) (pvh_list(pai_to_pvh(pa_index(ml_static_vtop((((vm_offset_t)(ptep) & ~ARM_PGMASK))))))))->ptd_info[ARM_PT_DESC_INDEX(ptep)].va)+ ((((unsigned)(ptep)) & ARM_TT_PT_INDEX_MASK)<<ARM_TT_PT_ADDR_SHIFT))
952
953 #define ptep_get_pmap(ptep) \
954 ((((pt_desc_t *) (pvh_list(pai_to_pvh(pa_index(ml_static_vtop((((vm_offset_t)(ptep) & ~ARM_PGMASK))))))))->pmap))
955
956 #endif
957
958 #define ARM_PT_DESC_INDEX(ptep) \
959 (((unsigned)(ptep) & ARM_PT_DESC_INDEX_MASK) >> ARM_PT_DESC_INDEX_SHIFT)
960
961 #define ptep_get_ptd(ptep) \
962 ((struct pt_desc *)(pvh_list(pai_to_pvh(pa_index(ml_static_vtop((vm_offset_t)(ptep)))))))
963
964
965 /* PVH Define Macros */
966
967 /* pvhead type */
968 #define PVH_TYPE_NULL 0x0UL
969 #define PVH_TYPE_PVEP 0x1UL
970 #define PVH_TYPE_PTEP 0x2UL
971 #define PVH_TYPE_PTDP 0x3UL
972
973 #define PVH_TYPE_MASK (0x3UL)
974
975 #ifdef __arm64__
976
977 /* All flags listed below are stored in the PV head pointer unless otherwise noted */
978 #define PVH_FLAG_IOMMU 0x4UL /* Stored in each PTE, or in PV head for single-PTE PV heads */
979 #define PVH_FLAG_IOMMU_TABLE (1ULL << 63) /* Stored in each PTE, or in PV head for single-PTE PV heads */
980 #define PVH_FLAG_CPU (1ULL << 62)
981 #define PVH_LOCK_BIT 61
982 #define PVH_FLAG_LOCK (1ULL << PVH_LOCK_BIT)
983 #define PVH_FLAG_EXEC (1ULL << 60)
984 #define PVH_FLAG_LOCKDOWN (1ULL << 59)
985 #define PVH_HIGH_FLAGS (PVH_FLAG_CPU | PVH_FLAG_LOCK | PVH_FLAG_EXEC | PVH_FLAG_LOCKDOWN)
986
987 #else /* !__arm64__ */
988
989 #define PVH_LOCK_BIT 31
990 #define PVH_FLAG_LOCK (1UL << PVH_LOCK_BIT)
991 #define PVH_HIGH_FLAGS PVH_FLAG_LOCK
992
993 #endif
994
995 #define PVH_LIST_MASK (~PVH_TYPE_MASK)
996
997 #define pvh_test_type(h, b) \
998 ((*(vm_offset_t *)(h) & (PVH_TYPE_MASK)) == (b))
999
1000 #define pvh_ptep(h) \
1001 ((pt_entry_t *)((*(vm_offset_t *)(h) & PVH_LIST_MASK) | PVH_HIGH_FLAGS))
1002
1003 #define pvh_list(h) \
1004 ((pv_entry_t *)((*(vm_offset_t *)(h) & PVH_LIST_MASK) | PVH_HIGH_FLAGS))
1005
1006 #define pvh_get_flags(h) \
1007 (*(vm_offset_t *)(h) & PVH_HIGH_FLAGS)
1008
1009 #define pvh_set_flags(h, f) \
1010 do { \
1011 os_atomic_store((vm_offset_t *)(h), (*(vm_offset_t *)(h) & ~PVH_HIGH_FLAGS) | (f), \
1012 relaxed); \
1013 } while (0)
1014
1015 #define pvh_update_head(h, e, t) \
1016 do { \
1017 assert(*(vm_offset_t *)(h) & PVH_FLAG_LOCK); \
1018 os_atomic_store((vm_offset_t *)(h), (vm_offset_t)(e) | (t) | PVH_FLAG_LOCK, \
1019 relaxed); \
1020 } while (0)
1021
1022 #define pvh_update_head_unlocked(h, e, t) \
1023 do { \
1024 assert(!(*(vm_offset_t *)(h) & PVH_FLAG_LOCK)); \
1025 *(vm_offset_t *)(h) = ((vm_offset_t)(e) | (t)) & ~PVH_FLAG_LOCK; \
1026 } while (0)
1027
1028 #define pvh_add(h, e) \
1029 do { \
1030 assert(!pvh_test_type((h), PVH_TYPE_PTEP)); \
1031 pve_next(e) = pvh_list(h); \
1032 pvh_update_head((h), (e), PVH_TYPE_PVEP); \
1033 } while (0)
1034
1035 #define pvh_remove(h, p, e) \
1036 do { \
1037 assert(!PVE_NEXT_IS_ALTACCT(pve_next((e)))); \
1038 if ((p) == (h)) { \
1039 if (PVE_NEXT_PTR(pve_next((e))) == PV_ENTRY_NULL) { \
1040 pvh_update_head((h), PV_ENTRY_NULL, PVH_TYPE_NULL); \
1041 } else { \
1042 pvh_update_head((h), PVE_NEXT_PTR(pve_next((e))), PVH_TYPE_PVEP); \
1043 } \
1044 } else { \
1045 /* \
1046 * PMAP LEDGERS: \
1047 * preserve the "alternate accounting" bit \
1048 * when updating "p" (the previous entry's \
1049 * "pve_next"). \
1050 */ \
1051 boolean_t __is_altacct; \
1052 __is_altacct = PVE_NEXT_IS_ALTACCT(*(p)); \
1053 *(p) = PVE_NEXT_PTR(pve_next((e))); \
1054 if (__is_altacct) { \
1055 PVE_NEXT_SET_ALTACCT((p)); \
1056 } else { \
1057 PVE_NEXT_CLR_ALTACCT((p)); \
1058 } \
1059 } \
1060 } while (0)
1061
1062
1063 /* PPATTR Define Macros */
1064
1065 #define ppattr_set_bits(h, b) \
1066 do { \
1067 while (!OSCompareAndSwap16(*(pp_attr_t *)(h), *(pp_attr_t *)(h) | (b), (pp_attr_t *)(h))); \
1068 } while (0)
1069
1070 #define ppattr_clear_bits(h, b) \
1071 do { \
1072 while (!OSCompareAndSwap16(*(pp_attr_t *)(h), *(pp_attr_t *)(h) & ~(b), (pp_attr_t *)(h))); \
1073 } while (0)
1074
1075 #define ppattr_test_bits(h, b) \
1076 ((*(pp_attr_t *)(h) & (b)) == (b))
1077
1078 #define pa_set_bits(x, b) \
1079 do { \
1080 if (pa_valid(x)) \
1081 ppattr_set_bits(&pp_attr_table[pa_index(x)], \
1082 (b)); \
1083 } while (0)
1084
1085 #define pa_test_bits(x, b) \
1086 (pa_valid(x) ? ppattr_test_bits(&pp_attr_table[pa_index(x)],\
1087 (b)) : FALSE)
1088
1089 #define pa_clear_bits(x, b) \
1090 do { \
1091 if (pa_valid(x)) \
1092 ppattr_clear_bits(&pp_attr_table[pa_index(x)], \
1093 (b)); \
1094 } while (0)
1095
1096 #define pa_set_modify(x) \
1097 pa_set_bits(x, PP_ATTR_MODIFIED)
1098
1099 #define pa_clear_modify(x) \
1100 pa_clear_bits(x, PP_ATTR_MODIFIED)
1101
1102 #define pa_set_reference(x) \
1103 pa_set_bits(x, PP_ATTR_REFERENCED)
1104
1105 #define pa_clear_reference(x) \
1106 pa_clear_bits(x, PP_ATTR_REFERENCED)
1107
1108
1109 #define IS_INTERNAL_PAGE(pai) \
1110 ppattr_test_bits(&pp_attr_table[pai], PP_ATTR_INTERNAL)
1111 #define SET_INTERNAL_PAGE(pai) \
1112 ppattr_set_bits(&pp_attr_table[pai], PP_ATTR_INTERNAL)
1113 #define CLR_INTERNAL_PAGE(pai) \
1114 ppattr_clear_bits(&pp_attr_table[pai], PP_ATTR_INTERNAL)
1115
1116 #define IS_REUSABLE_PAGE(pai) \
1117 ppattr_test_bits(&pp_attr_table[pai], PP_ATTR_REUSABLE)
1118 #define SET_REUSABLE_PAGE(pai) \
1119 ppattr_set_bits(&pp_attr_table[pai], PP_ATTR_REUSABLE)
1120 #define CLR_REUSABLE_PAGE(pai) \
1121 ppattr_clear_bits(&pp_attr_table[pai], PP_ATTR_REUSABLE)
1122
1123 #define IS_ALTACCT_PAGE(pai, pve_p) \
1124 (((pve_p) == NULL) \
1125 ? ppattr_test_bits(&pp_attr_table[pai], PP_ATTR_ALTACCT) \
1126 : PVE_NEXT_IS_ALTACCT(pve_next((pve_p))))
1127 #define SET_ALTACCT_PAGE(pai, pve_p) \
1128 if ((pve_p) == NULL) { \
1129 ppattr_set_bits(&pp_attr_table[pai], PP_ATTR_ALTACCT); \
1130 } else { \
1131 PVE_NEXT_SET_ALTACCT(&pve_next((pve_p))); \
1132 }
1133 #define CLR_ALTACCT_PAGE(pai, pve_p) \
1134 if ((pve_p) == NULL) { \
1135 ppattr_clear_bits(&pp_attr_table[pai], PP_ATTR_ALTACCT); \
1136 } else { \
1137 PVE_NEXT_CLR_ALTACCT(&pve_next((pve_p))); \
1138 }
1139
1140 #define IS_REFFAULT_PAGE(pai) \
1141 ppattr_test_bits(&pp_attr_table[pai], PP_ATTR_REFFAULT)
1142 #define SET_REFFAULT_PAGE(pai) \
1143 ppattr_set_bits(&pp_attr_table[pai], PP_ATTR_REFFAULT)
1144 #define CLR_REFFAULT_PAGE(pai) \
1145 ppattr_clear_bits(&pp_attr_table[pai], PP_ATTR_REFFAULT)
1146
1147 #define IS_MODFAULT_PAGE(pai) \
1148 ppattr_test_bits(&pp_attr_table[pai], PP_ATTR_MODFAULT)
1149 #define SET_MODFAULT_PAGE(pai) \
1150 ppattr_set_bits(&pp_attr_table[pai], PP_ATTR_MODFAULT)
1151 #define CLR_MODFAULT_PAGE(pai) \
1152 ppattr_clear_bits(&pp_attr_table[pai], PP_ATTR_MODFAULT)
1153
1154 #define tte_get_ptd(tte) \
1155 ((struct pt_desc *)(pvh_list(pai_to_pvh(pa_index((vm_offset_t)((tte) & ~PAGE_MASK))))))
1156
1157
1158 #if (__ARM_VMSA__ == 7)
1159
1160 #define tte_index(pmap, pt_attr, addr) \
1161 ttenum((addr))
1162
1163 #define pte_index(pmap, pt_attr, addr) \
1164 ptenum((addr))
1165
1166 #else
1167
1168 #define ttn_index(pmap, pt_attr, addr, pt_level) \
1169 (((addr) & (pt_attr)->pta_level_info[(pt_level)].index_mask) >> (pt_attr)->pta_level_info[(pt_level)].shift)
1170
1171 #define tt0_index(pmap, pt_attr, addr) \
1172 ttn_index((pmap), (pt_attr), (addr), PMAP_TT_L0_LEVEL)
1173
1174 #define tt1_index(pmap, pt_attr, addr) \
1175 ttn_index((pmap), (pt_attr), (addr), PMAP_TT_L1_LEVEL)
1176
1177 #define tt2_index(pmap, pt_attr, addr) \
1178 ttn_index((pmap), (pt_attr), (addr), PMAP_TT_L2_LEVEL)
1179
1180 #define tt3_index(pmap, pt_attr, addr) \
1181 ttn_index((pmap), (pt_attr), (addr), PMAP_TT_L3_LEVEL)
1182
1183 #define tte_index(pmap, pt_attr, addr) \
1184 tt2_index((pmap), (pt_attr), (addr))
1185
1186 #define pte_index(pmap, pt_attr, addr) \
1187 tt3_index((pmap), (pt_attr), (addr))
1188
1189 #endif
1190
1191 /*
1192 * Lock on pmap system
1193 */
1194
1195 lck_grp_t pmap_lck_grp;
1196
1197 #define PMAP_LOCK_INIT(pmap) { \
1198 simple_lock_init(&(pmap)->lock, 0); \
1199 }
1200
1201 #define PMAP_LOCK(pmap) { \
1202 pmap_simple_lock(&(pmap)->lock); \
1203 }
1204
1205 #define PMAP_UNLOCK(pmap) { \
1206 pmap_simple_unlock(&(pmap)->lock); \
1207 }
1208
1209 #if MACH_ASSERT
1210 #define PMAP_ASSERT_LOCKED(pmap) { \
1211 simple_lock_assert(&(pmap)->lock, LCK_ASSERT_OWNED); \
1212 }
1213 #else
1214 #define PMAP_ASSERT_LOCKED(pmap)
1215 #endif
1216
1217 #if defined(__arm64__)
1218 #define PVH_LOCK_WORD 1 /* Assumes little-endian */
1219 #else
1220 #define PVH_LOCK_WORD 0
1221 #endif
1222
1223 #define ASSERT_PVH_LOCKED(index) \
1224 do { \
1225 assert((vm_offset_t)(pv_head_table[index]) & PVH_FLAG_LOCK); \
1226 } while (0)
1227
1228 #define LOCK_PVH(index) \
1229 do { \
1230 pmap_lock_bit((uint32_t*)(&pv_head_table[index]) + PVH_LOCK_WORD, PVH_LOCK_BIT - (PVH_LOCK_WORD * 32)); \
1231 } while (0)
1232
1233 #define UNLOCK_PVH(index) \
1234 do { \
1235 ASSERT_PVH_LOCKED(index); \
1236 pmap_unlock_bit((uint32_t*)(&pv_head_table[index]) + PVH_LOCK_WORD, PVH_LOCK_BIT - (PVH_LOCK_WORD * 32)); \
1237 } while (0)
1238
1239 #define PMAP_UPDATE_TLBS(pmap, s, e, strong) { \
1240 pmap_get_pt_ops(pmap)->flush_tlb_region_async(s, (unsigned)(e - s), pmap); \
1241 pmap_sync_tlb(strong); \
1242 }
1243
1244 #define FLUSH_PTE_RANGE(spte, epte) \
1245 __builtin_arm_dmb(DMB_ISH);
1246
1247 #define FLUSH_PTE(pte_p) \
1248 __builtin_arm_dmb(DMB_ISH);
1249
1250 #define FLUSH_PTE_STRONG(pte_p) \
1251 __builtin_arm_dsb(DSB_ISH);
1252
1253 #define FLUSH_PTE_RANGE_STRONG(spte, epte) \
1254 __builtin_arm_dsb(DSB_ISH);
1255
1256 #define WRITE_PTE_FAST(pte_p, pte_entry) \
1257 __unreachable_ok_push \
1258 if (TEST_PAGE_RATIO_4) { \
1259 if (((unsigned)(pte_p)) & 0x1f) { \
1260 panic("%s: WRITE_PTE_FAST is unaligned, " \
1261 "pte_p=%p, pte_entry=%p", \
1262 __FUNCTION__, \
1263 pte_p, (void*)pte_entry); \
1264 } \
1265 if (((pte_entry) & ~ARM_PTE_COMPRESSED_MASK) == ARM_PTE_EMPTY) { \
1266 *(pte_p) = (pte_entry); \
1267 *((pte_p)+1) = (pte_entry); \
1268 *((pte_p)+2) = (pte_entry); \
1269 *((pte_p)+3) = (pte_entry); \
1270 } else { \
1271 *(pte_p) = (pte_entry); \
1272 *((pte_p)+1) = (pte_entry) | 0x1000; \
1273 *((pte_p)+2) = (pte_entry) | 0x2000; \
1274 *((pte_p)+3) = (pte_entry) | 0x3000; \
1275 } \
1276 } else { \
1277 *(pte_p) = (pte_entry); \
1278 } \
1279 __unreachable_ok_pop
1280
1281 #define WRITE_PTE(pte_p, pte_entry) \
1282 WRITE_PTE_FAST(pte_p, pte_entry); \
1283 FLUSH_PTE(pte_p);
1284
1285 #define WRITE_PTE_STRONG(pte_p, pte_entry) \
1286 WRITE_PTE_FAST(pte_p, pte_entry); \
1287 FLUSH_PTE_STRONG(pte_p);
1288
1289 /*
1290 * Other useful macros.
1291 */
1292 #define current_pmap() \
1293 (vm_map_pmap(current_thread()->map))
1294
1295
1296 #define VALIDATE_USER_PMAP(x)
1297 #define VALIDATE_PMAP(x)
1298 #define VALIDATE_LEDGER(x)
1299
1300
1301 #if DEVELOPMENT || DEBUG
1302
1303 /*
1304 * Trace levels are controlled by a bitmask in which each
1305 * level can be enabled/disabled by the (1<<level) position
1306 * in the boot arg
1307 * Level 1: pmap lifecycle (create/destroy/switch)
1308 * Level 2: mapping lifecycle (enter/remove/protect/nest/unnest)
1309 * Level 3: internal state management (tte/attributes/fast-fault)
1310 */
1311
1312 SECURITY_READ_ONLY_LATE(unsigned int) pmap_trace_mask = 0;
1313
1314 #define PMAP_TRACE(level, ...) \
1315 if (__improbable((1 << (level)) & pmap_trace_mask)) { \
1316 KDBG_RELEASE(__VA_ARGS__); \
1317 }
1318 #else
1319
1320 #define PMAP_TRACE(level, ...)
1321
1322 #endif
1323
1324
1325 /*
1326 * Internal function prototypes (forward declarations).
1327 */
1328
1329 static void pv_init(
1330 void);
1331
1332 static boolean_t pv_alloc(
1333 pmap_t pmap,
1334 unsigned int pai,
1335 pv_entry_t **pvepp);
1336
1337 static void pv_free(
1338 pv_entry_t *pvep);
1339
1340 static void pv_list_free(
1341 pv_entry_t *pvehp,
1342 pv_entry_t *pvetp,
1343 unsigned int cnt);
1344
1345 static void ptd_bootstrap(
1346 pt_desc_t *ptdp, unsigned int ptd_cnt);
1347
1348 static inline pt_desc_t *ptd_alloc_unlinked(bool reclaim);
1349
1350 static pt_desc_t *ptd_alloc(pmap_t pmap, bool reclaim);
1351
1352 static void ptd_deallocate(pt_desc_t *ptdp);
1353
1354 static void ptd_init(
1355 pt_desc_t *ptdp, pmap_t pmap, vm_map_address_t va, unsigned int ttlevel, pt_entry_t * pte_p);
1356
1357 static void pmap_zone_init(
1358 void);
1359
1360 static void pmap_set_reference(
1361 ppnum_t pn);
1362
1363 ppnum_t pmap_vtophys(
1364 pmap_t pmap, addr64_t va);
1365
1366 void pmap_switch_user_ttb(
1367 pmap_t pmap);
1368
1369 static kern_return_t pmap_expand(
1370 pmap_t, vm_map_address_t, unsigned int options, unsigned int level);
1371
1372 static int pmap_remove_range(
1373 pmap_t, vm_map_address_t, pt_entry_t *, pt_entry_t *, uint32_t *);
1374
1375 static int pmap_remove_range_options(
1376 pmap_t, vm_map_address_t, pt_entry_t *, pt_entry_t *, uint32_t *, bool *, int);
1377
1378 static tt_entry_t *pmap_tt1_allocate(
1379 pmap_t, vm_size_t, unsigned int);
1380
1381 #define PMAP_TT_ALLOCATE_NOWAIT 0x1
1382
1383 static void pmap_tt1_deallocate(
1384 pmap_t, tt_entry_t *, vm_size_t, unsigned int);
1385
1386 #define PMAP_TT_DEALLOCATE_NOBLOCK 0x1
1387
1388 static kern_return_t pmap_tt_allocate(
1389 pmap_t, tt_entry_t **, unsigned int, unsigned int);
1390
1391 #define PMAP_TT_ALLOCATE_NOWAIT 0x1
1392
1393 static void pmap_tte_deallocate(
1394 pmap_t, tt_entry_t *, unsigned int);
1395
1396 #ifdef __ARM64_PMAP_SUBPAGE_L1__
1397 #if (__ARM_VMSA__ <= 7)
1398 #error This is not supported for old-style page tables
1399 #endif /* (__ARM_VMSA__ <= 7) */
1400 #define PMAP_ROOT_ALLOC_SIZE (((ARM_TT_L1_INDEX_MASK >> ARM_TT_L1_SHIFT) + 1) * sizeof(tt_entry_t))
1401 #else /* !defined(__ARM64_PMAP_SUBPAGE_L1__) */
1402 #if (__ARM_VMSA__ <= 7)
1403 #define PMAP_ROOT_ALLOC_SIZE (ARM_PGBYTES * 2)
1404 #else /* (__ARM_VMSA__ > 7) */
1405 #define PMAP_ROOT_ALLOC_SIZE (ARM_PGBYTES)
1406 #endif /* (__ARM_VMSA__ > 7) */
1407 #endif /* !defined(__ARM64_PMAP_SUBPAGE_L1__) */
1408
1409 const unsigned int arm_hardware_page_size = ARM_PGBYTES;
1410 const unsigned int arm_pt_desc_size = sizeof(pt_desc_t);
1411 const unsigned int arm_pt_root_size = PMAP_ROOT_ALLOC_SIZE;
1412
1413 #define PMAP_TT_DEALLOCATE_NOBLOCK 0x1
1414
1415 #if (__ARM_VMSA__ > 7)
1416
1417 static inline tt_entry_t *pmap_tt1e(
1418 pmap_t, vm_map_address_t);
1419
1420 static inline tt_entry_t *pmap_tt2e(
1421 pmap_t, vm_map_address_t);
1422
1423 static inline pt_entry_t *pmap_tt3e(
1424 pmap_t, vm_map_address_t);
1425
1426 static inline pt_entry_t *pmap_ttne(
1427 pmap_t, unsigned int, vm_map_address_t);
1428
1429 static void pmap_unmap_sharedpage(
1430 pmap_t pmap);
1431
1432 static boolean_t
1433 pmap_is_64bit(pmap_t);
1434
1435
1436 #endif
1437 static inline tt_entry_t *pmap_tte(
1438 pmap_t, vm_map_address_t);
1439
1440 static inline pt_entry_t *pmap_pte(
1441 pmap_t, vm_map_address_t);
1442
1443 static void pmap_update_cache_attributes_locked(
1444 ppnum_t, unsigned);
1445
1446 boolean_t arm_clear_fast_fault(
1447 ppnum_t ppnum,
1448 vm_prot_t fault_type);
1449
1450 static pmap_paddr_t pmap_pages_reclaim(
1451 void);
1452
1453 static kern_return_t pmap_pages_alloc(
1454 pmap_paddr_t *pa,
1455 unsigned size,
1456 unsigned option);
1457
1458 #define PMAP_PAGES_ALLOCATE_NOWAIT 0x1
1459 #define PMAP_PAGES_RECLAIM_NOWAIT 0x2
1460
1461 static void pmap_pages_free(
1462 pmap_paddr_t pa,
1463 unsigned size);
1464
1465 static void pmap_pin_kernel_pages(vm_offset_t kva, size_t nbytes);
1466
1467 static void pmap_unpin_kernel_pages(vm_offset_t kva, size_t nbytes);
1468
1469 static void pmap_trim_self(pmap_t pmap);
1470 static void pmap_trim_subord(pmap_t subord);
1471
1472
1473 #define PMAP_SUPPORT_PROTOTYPES(__return_type, __function_name, __function_args, __function_index) \
1474 static __return_type __function_name##_internal __function_args
1475
1476 PMAP_SUPPORT_PROTOTYPES(
1477 kern_return_t,
1478 arm_fast_fault, (pmap_t pmap,
1479 vm_map_address_t va,
1480 vm_prot_t fault_type,
1481 bool was_af_fault,
1482 bool from_user), ARM_FAST_FAULT_INDEX);
1483
1484
1485 PMAP_SUPPORT_PROTOTYPES(
1486 boolean_t,
1487 arm_force_fast_fault, (ppnum_t ppnum,
1488 vm_prot_t allow_mode,
1489 int options), ARM_FORCE_FAST_FAULT_INDEX);
1490
1491 PMAP_SUPPORT_PROTOTYPES(
1492 kern_return_t,
1493 mapping_free_prime, (void), MAPPING_FREE_PRIME_INDEX);
1494
1495 PMAP_SUPPORT_PROTOTYPES(
1496 kern_return_t,
1497 mapping_replenish, (uint32_t kern_target_count, uint32_t user_target_count), MAPPING_REPLENISH_INDEX);
1498
1499 PMAP_SUPPORT_PROTOTYPES(
1500 boolean_t,
1501 pmap_batch_set_cache_attributes, (ppnum_t pn,
1502 unsigned int cacheattr,
1503 unsigned int page_cnt,
1504 unsigned int page_index,
1505 boolean_t doit,
1506 unsigned int *res), PMAP_BATCH_SET_CACHE_ATTRIBUTES_INDEX);
1507
1508 PMAP_SUPPORT_PROTOTYPES(
1509 void,
1510 pmap_change_wiring, (pmap_t pmap,
1511 vm_map_address_t v,
1512 boolean_t wired), PMAP_CHANGE_WIRING_INDEX);
1513
1514 PMAP_SUPPORT_PROTOTYPES(
1515 pmap_t,
1516 pmap_create_options, (ledger_t ledger,
1517 vm_map_size_t size,
1518 unsigned int flags), PMAP_CREATE_INDEX);
1519
1520 PMAP_SUPPORT_PROTOTYPES(
1521 void,
1522 pmap_destroy, (pmap_t pmap), PMAP_DESTROY_INDEX);
1523
1524 PMAP_SUPPORT_PROTOTYPES(
1525 kern_return_t,
1526 pmap_enter_options, (pmap_t pmap,
1527 vm_map_address_t v,
1528 ppnum_t pn,
1529 vm_prot_t prot,
1530 vm_prot_t fault_type,
1531 unsigned int flags,
1532 boolean_t wired,
1533 unsigned int options), PMAP_ENTER_OPTIONS_INDEX);
1534
1535 PMAP_SUPPORT_PROTOTYPES(
1536 vm_offset_t,
1537 pmap_extract, (pmap_t pmap,
1538 vm_map_address_t va), PMAP_EXTRACT_INDEX);
1539
1540 PMAP_SUPPORT_PROTOTYPES(
1541 ppnum_t,
1542 pmap_find_phys, (pmap_t pmap,
1543 addr64_t va), PMAP_FIND_PHYS_INDEX);
1544
1545 #if (__ARM_VMSA__ > 7)
1546 PMAP_SUPPORT_PROTOTYPES(
1547 kern_return_t,
1548 pmap_insert_sharedpage, (pmap_t pmap), PMAP_INSERT_SHAREDPAGE_INDEX);
1549 #endif
1550
1551
1552 PMAP_SUPPORT_PROTOTYPES(
1553 boolean_t,
1554 pmap_is_empty, (pmap_t pmap,
1555 vm_map_offset_t va_start,
1556 vm_map_offset_t va_end), PMAP_IS_EMPTY_INDEX);
1557
1558
1559 PMAP_SUPPORT_PROTOTYPES(
1560 unsigned int,
1561 pmap_map_cpu_windows_copy, (ppnum_t pn,
1562 vm_prot_t prot,
1563 unsigned int wimg_bits), PMAP_MAP_CPU_WINDOWS_COPY_INDEX);
1564
1565 PMAP_SUPPORT_PROTOTYPES(
1566 kern_return_t,
1567 pmap_nest, (pmap_t grand,
1568 pmap_t subord,
1569 addr64_t vstart,
1570 addr64_t nstart,
1571 uint64_t size), PMAP_NEST_INDEX);
1572
1573 PMAP_SUPPORT_PROTOTYPES(
1574 void,
1575 pmap_page_protect_options, (ppnum_t ppnum,
1576 vm_prot_t prot,
1577 unsigned int options), PMAP_PAGE_PROTECT_OPTIONS_INDEX);
1578
1579 PMAP_SUPPORT_PROTOTYPES(
1580 void,
1581 pmap_protect_options, (pmap_t pmap,
1582 vm_map_address_t start,
1583 vm_map_address_t end,
1584 vm_prot_t prot,
1585 unsigned int options,
1586 void *args), PMAP_PROTECT_OPTIONS_INDEX);
1587
1588 PMAP_SUPPORT_PROTOTYPES(
1589 kern_return_t,
1590 pmap_query_page_info, (pmap_t pmap,
1591 vm_map_offset_t va,
1592 int *disp_p), PMAP_QUERY_PAGE_INFO_INDEX);
1593
1594 PMAP_SUPPORT_PROTOTYPES(
1595 mach_vm_size_t,
1596 pmap_query_resident, (pmap_t pmap,
1597 vm_map_address_t start,
1598 vm_map_address_t end,
1599 mach_vm_size_t * compressed_bytes_p), PMAP_QUERY_RESIDENT_INDEX);
1600
1601 PMAP_SUPPORT_PROTOTYPES(
1602 void,
1603 pmap_reference, (pmap_t pmap), PMAP_REFERENCE_INDEX);
1604
1605 PMAP_SUPPORT_PROTOTYPES(
1606 int,
1607 pmap_remove_options, (pmap_t pmap,
1608 vm_map_address_t start,
1609 vm_map_address_t end,
1610 int options), PMAP_REMOVE_OPTIONS_INDEX);
1611
1612 PMAP_SUPPORT_PROTOTYPES(
1613 kern_return_t,
1614 pmap_return, (boolean_t do_panic,
1615 boolean_t do_recurse), PMAP_RETURN_INDEX);
1616
1617 PMAP_SUPPORT_PROTOTYPES(
1618 void,
1619 pmap_set_cache_attributes, (ppnum_t pn,
1620 unsigned int cacheattr), PMAP_SET_CACHE_ATTRIBUTES_INDEX);
1621
1622 PMAP_SUPPORT_PROTOTYPES(
1623 void,
1624 pmap_update_compressor_page, (ppnum_t pn,
1625 unsigned int prev_cacheattr, unsigned int new_cacheattr), PMAP_UPDATE_COMPRESSOR_PAGE_INDEX);
1626
1627 PMAP_SUPPORT_PROTOTYPES(
1628 void,
1629 pmap_set_nested, (pmap_t pmap), PMAP_SET_NESTED_INDEX);
1630
1631 #if MACH_ASSERT
1632 PMAP_SUPPORT_PROTOTYPES(
1633 void,
1634 pmap_set_process, (pmap_t pmap,
1635 int pid,
1636 char *procname), PMAP_SET_PROCESS_INDEX);
1637 #endif
1638
1639 PMAP_SUPPORT_PROTOTYPES(
1640 void,
1641 pmap_unmap_cpu_windows_copy, (unsigned int index), PMAP_UNMAP_CPU_WINDOWS_COPY_INDEX);
1642
1643 PMAP_SUPPORT_PROTOTYPES(
1644 kern_return_t,
1645 pmap_unnest_options, (pmap_t grand,
1646 addr64_t vaddr,
1647 uint64_t size,
1648 unsigned int option), PMAP_UNNEST_OPTIONS_INDEX);
1649
1650
1651 PMAP_SUPPORT_PROTOTYPES(
1652 void,
1653 phys_attribute_set, (ppnum_t pn,
1654 unsigned int bits), PHYS_ATTRIBUTE_SET_INDEX);
1655
1656
1657 PMAP_SUPPORT_PROTOTYPES(
1658 void,
1659 phys_attribute_clear, (ppnum_t pn,
1660 unsigned int bits,
1661 int options,
1662 void *arg), PHYS_ATTRIBUTE_CLEAR_INDEX);
1663
1664 PMAP_SUPPORT_PROTOTYPES(
1665 void,
1666 pmap_switch, (pmap_t pmap), PMAP_SWITCH_INDEX);
1667
1668 PMAP_SUPPORT_PROTOTYPES(
1669 void,
1670 pmap_switch_user_ttb, (pmap_t pmap), PMAP_SWITCH_USER_TTB_INDEX);
1671
1672 PMAP_SUPPORT_PROTOTYPES(
1673 void,
1674 pmap_clear_user_ttb, (void), PMAP_CLEAR_USER_TTB_INDEX);
1675
1676
1677 PMAP_SUPPORT_PROTOTYPES(
1678 void,
1679 pmap_set_jit_entitled, (pmap_t pmap), PMAP_SET_JIT_ENTITLED_INDEX);
1680
1681 PMAP_SUPPORT_PROTOTYPES(
1682 void,
1683 pmap_trim, (pmap_t grand,
1684 pmap_t subord,
1685 addr64_t vstart,
1686 addr64_t nstart,
1687 uint64_t size), PMAP_TRIM_INDEX);
1688
1689
1690
1691
1692
1693
1694 void pmap_footprint_suspend(vm_map_t map,
1695 boolean_t suspend);
1696 PMAP_SUPPORT_PROTOTYPES(
1697 void,
1698 pmap_footprint_suspend, (vm_map_t map,
1699 boolean_t suspend),
1700 PMAP_FOOTPRINT_SUSPEND_INDEX);
1701
1702
1703 #if CONFIG_PGTRACE
1704 boolean_t pgtrace_enabled = 0;
1705
1706 typedef struct {
1707 queue_chain_t chain;
1708
1709 /*
1710 * pmap - pmap for below addresses
1711 * ova - original va page address
1712 * cva - clone va addresses for pre, target and post pages
1713 * cva_spte - clone saved ptes
1714 * range - trace range in this map
1715 * cloned - has been cloned or not
1716 */
1717 pmap_t pmap;
1718 vm_map_offset_t ova;
1719 vm_map_offset_t cva[3];
1720 pt_entry_t cva_spte[3];
1721 struct {
1722 pmap_paddr_t start;
1723 pmap_paddr_t end;
1724 } range;
1725 bool cloned;
1726 } pmap_pgtrace_map_t;
1727
1728 static void pmap_pgtrace_init(void);
1729 static bool pmap_pgtrace_enter_clone(pmap_t pmap, vm_map_offset_t va_page, vm_map_offset_t start, vm_map_offset_t end);
1730 static void pmap_pgtrace_remove_clone(pmap_t pmap, pmap_paddr_t pa_page, vm_map_offset_t va_page);
1731 static void pmap_pgtrace_remove_all_clone(pmap_paddr_t pa);
1732 #endif
1733
1734 #if (__ARM_VMSA__ > 7)
1735 /*
1736 * The low global vector page is mapped at a fixed alias.
1737 * Since the page size is 16k for H8 and newer we map the globals to a 16k
1738 * aligned address. Readers of the globals (e.g. lldb, panic server) need
1739 * to check both addresses anyway for backward compatibility. So for now
1740 * we leave H6 and H7 where they were.
1741 */
1742 #if (ARM_PGSHIFT == 14)
1743 #define LOWGLOBAL_ALIAS (LOW_GLOBAL_BASE_ADDRESS + 0x4000)
1744 #else
1745 #define LOWGLOBAL_ALIAS (LOW_GLOBAL_BASE_ADDRESS + 0x2000)
1746 #endif
1747
1748 #else
1749 #define LOWGLOBAL_ALIAS (0xFFFF1000)
1750 #endif
1751
1752 long long alloc_tteroot_count __attribute__((aligned(8))) MARK_AS_PMAP_DATA = 0LL;
1753 long long alloc_ttepages_count __attribute__((aligned(8))) MARK_AS_PMAP_DATA = 0LL;
1754 long long alloc_ptepages_count __attribute__((aligned(8))) MARK_AS_PMAP_DATA = 0LL;
1755 long long alloc_pmap_pages_count __attribute__((aligned(8))) = 0LL;
1756
1757 int pt_fake_zone_index = -1; /* index of pmap fake zone */
1758
1759
1760
1761 /*
1762 * Allocates and initializes a per-CPU data structure for the pmap.
1763 */
1764 MARK_AS_PMAP_TEXT static void
1765 pmap_cpu_data_init_internal(unsigned int cpu_number)
1766 {
1767 pmap_cpu_data_t * pmap_cpu_data = pmap_get_cpu_data();
1768
1769 pmap_cpu_data->cpu_number = cpu_number;
1770 }
1771
1772 void
1773 pmap_cpu_data_init(void)
1774 {
1775 pmap_cpu_data_init_internal(cpu_number());
1776 }
1777
1778 static void
1779 pmap_cpu_data_array_init(void)
1780 {
1781
1782 pmap_cpu_data_init();
1783 }
1784
1785 pmap_cpu_data_t *
1786 pmap_get_cpu_data(void)
1787 {
1788 pmap_cpu_data_t * pmap_cpu_data = NULL;
1789
1790 pmap_cpu_data = &getCpuDatap()->cpu_pmap_cpu_data;
1791
1792 return pmap_cpu_data;
1793 }
1794
1795
1796
1797 /* TODO */
1798 pmap_paddr_t
1799 pmap_pages_reclaim(
1800 void)
1801 {
1802 boolean_t found_page;
1803 unsigned i;
1804 pt_desc_t *ptdp;
1805
1806 /*
1807 * pmap_pages_reclaim() is returning a page by freeing an active pt page.
1808 * To be eligible, a pt page is assigned to a user pmap. It doesn't have any wired pte
1809 * entry and it contains at least one valid pte entry.
1810 *
1811 * In a loop, check for a page in the reclaimed pt page list.
1812 * if one is present, unlink that page and return the physical page address.
1813 * Otherwise, scan the pt page list for an eligible pt page to reclaim.
1814 * If found, invoke pmap_remove_range() on its pmap and address range then
1815 * deallocates that pt page. This will end up adding the pt page to the
1816 * reclaimed pt page list.
1817 * If no eligible page were found in the pt page list, panic.
1818 */
1819
1820 pmap_simple_lock(&pmap_pages_lock);
1821 pmap_pages_request_count++;
1822 pmap_pages_request_acum++;
1823
1824 while (1) {
1825 if (pmap_pages_reclaim_list != (page_free_entry_t *)NULL) {
1826 page_free_entry_t *page_entry;
1827
1828 page_entry = pmap_pages_reclaim_list;
1829 pmap_pages_reclaim_list = pmap_pages_reclaim_list->next;
1830 pmap_simple_unlock(&pmap_pages_lock);
1831
1832 return (pmap_paddr_t)ml_static_vtop((vm_offset_t)page_entry);
1833 }
1834
1835 pmap_simple_unlock(&pmap_pages_lock);
1836
1837 pmap_simple_lock(&pt_pages_lock);
1838 ptdp = (pt_desc_t *)queue_first(&pt_page_list);
1839 found_page = FALSE;
1840
1841 while (!queue_end(&pt_page_list, (queue_entry_t)ptdp)) {
1842 if ((ptdp->pmap->nested == FALSE)
1843 && (pmap_simple_lock_try(&ptdp->pmap->lock))) {
1844 assert(ptdp->pmap != kernel_pmap);
1845 unsigned refcnt_acc = 0;
1846 unsigned wiredcnt_acc = 0;
1847
1848 for (i = 0; i < PT_INDEX_MAX; i++) {
1849 if (ptdp->ptd_info[i].refcnt == PT_DESC_REFCOUNT) {
1850 /* Do not attempt to free a page that contains an L2 table */
1851 refcnt_acc = 0;
1852 break;
1853 }
1854 refcnt_acc += ptdp->ptd_info[i].refcnt;
1855 wiredcnt_acc += ptdp->ptd_info[i].wiredcnt;
1856 }
1857 if ((wiredcnt_acc == 0) && (refcnt_acc != 0)) {
1858 found_page = TRUE;
1859 /* Leave ptdp->pmap locked here. We're about to reclaim
1860 * a tt page from it, so we don't want anyone else messing
1861 * with it while we do that. */
1862 break;
1863 }
1864 pmap_simple_unlock(&ptdp->pmap->lock);
1865 }
1866 ptdp = (pt_desc_t *)queue_next((queue_t)ptdp);
1867 }
1868 if (!found_page) {
1869 panic("%s: No eligible page in pt_page_list", __FUNCTION__);
1870 } else {
1871 int remove_count = 0;
1872 bool need_strong_sync = false;
1873 vm_map_address_t va;
1874 pmap_t pmap;
1875 pt_entry_t *bpte, *epte;
1876 pt_entry_t *pte_p;
1877 tt_entry_t *tte_p;
1878 uint32_t rmv_spte = 0;
1879
1880 pmap_simple_unlock(&pt_pages_lock);
1881 pmap = ptdp->pmap;
1882 PMAP_ASSERT_LOCKED(pmap); // pmap lock should be held from loop above
1883
1884 __unused const pt_attr_t * const pt_attr = pmap_get_pt_attr(pmap);
1885
1886 for (i = 0; i < PT_INDEX_MAX; i++) {
1887 va = ptdp->ptd_info[i].va;
1888
1889 /* If the VA is bogus, this may represent an unallocated region
1890 * or one which is in transition (already being freed or expanded).
1891 * Don't try to remove mappings here. */
1892 if (va == (vm_offset_t)-1) {
1893 continue;
1894 }
1895
1896 tte_p = pmap_tte(pmap, va);
1897 if ((tte_p != (tt_entry_t *) NULL)
1898 && ((*tte_p & ARM_TTE_TYPE_MASK) == ARM_TTE_TYPE_TABLE)) {
1899 pte_p = (pt_entry_t *) ttetokv(*tte_p);
1900 bpte = &pte_p[pte_index(pmap, pt_attr, va)];
1901 epte = bpte + PAGE_SIZE / sizeof(pt_entry_t);
1902 /*
1903 * Use PMAP_OPTIONS_REMOVE to clear any
1904 * "compressed" markers and update the
1905 * "compressed" counter in pmap->stats.
1906 * This means that we lose accounting for
1907 * any compressed pages in this range
1908 * but the alternative is to not be able
1909 * to account for their future decompression,
1910 * which could cause the counter to drift
1911 * more and more.
1912 */
1913 remove_count += pmap_remove_range_options(
1914 pmap, va, bpte, epte,
1915 &rmv_spte, &need_strong_sync, PMAP_OPTIONS_REMOVE);
1916 if (ptdp->ptd_info[ARM_PT_DESC_INDEX(pte_p)].refcnt != 0) {
1917 panic("%s: ptdp %p, count %d", __FUNCTION__, ptdp, ptdp->ptd_info[ARM_PT_DESC_INDEX(pte_p)].refcnt);
1918 }
1919
1920 pmap_tte_deallocate(pmap, tte_p, PMAP_TT_TWIG_LEVEL);
1921
1922 if (remove_count > 0) {
1923 pmap_get_pt_ops(pmap)->flush_tlb_region_async(va, (unsigned int)pt_attr_leaf_table_size(pt_attr), pmap);
1924 } else {
1925 pmap_get_pt_ops(pmap)->flush_tlb_tte_async(va, pmap);
1926 }
1927 }
1928 }
1929 // Undo the lock we grabbed when we found ptdp above
1930 PMAP_UNLOCK(pmap);
1931 pmap_sync_tlb(need_strong_sync);
1932 }
1933 pmap_simple_lock(&pmap_pages_lock);
1934 }
1935 }
1936
1937
1938 static kern_return_t
1939 pmap_pages_alloc(
1940 pmap_paddr_t *pa,
1941 unsigned size,
1942 unsigned option)
1943 {
1944 vm_page_t m = VM_PAGE_NULL, m_prev;
1945
1946 if (option & PMAP_PAGES_RECLAIM_NOWAIT) {
1947 assert(size == PAGE_SIZE);
1948 *pa = pmap_pages_reclaim();
1949 return KERN_SUCCESS;
1950 }
1951 if (size == PAGE_SIZE) {
1952 while ((m = vm_page_grab()) == VM_PAGE_NULL) {
1953 if (option & PMAP_PAGES_ALLOCATE_NOWAIT) {
1954 return KERN_RESOURCE_SHORTAGE;
1955 }
1956
1957 VM_PAGE_WAIT();
1958 }
1959 vm_page_lock_queues();
1960 vm_page_wire(m, VM_KERN_MEMORY_PTE, TRUE);
1961 vm_page_unlock_queues();
1962 }
1963 if (size == 2 * PAGE_SIZE) {
1964 while (cpm_allocate(size, &m, 0, 1, TRUE, 0) != KERN_SUCCESS) {
1965 if (option & PMAP_PAGES_ALLOCATE_NOWAIT) {
1966 return KERN_RESOURCE_SHORTAGE;
1967 }
1968
1969 VM_PAGE_WAIT();
1970 }
1971 }
1972
1973 *pa = (pmap_paddr_t)ptoa(VM_PAGE_GET_PHYS_PAGE(m));
1974
1975 vm_object_lock(pmap_object);
1976 while (m != VM_PAGE_NULL) {
1977 vm_page_insert_wired(m, pmap_object, (vm_object_offset_t) ((ptoa(VM_PAGE_GET_PHYS_PAGE(m))) - gPhysBase), VM_KERN_MEMORY_PTE);
1978 m_prev = m;
1979 m = NEXT_PAGE(m_prev);
1980 *(NEXT_PAGE_PTR(m_prev)) = VM_PAGE_NULL;
1981 }
1982 vm_object_unlock(pmap_object);
1983
1984 OSAddAtomic(size >> PAGE_SHIFT, &inuse_pmap_pages_count);
1985 OSAddAtomic64(size >> PAGE_SHIFT, &alloc_pmap_pages_count);
1986
1987 return KERN_SUCCESS;
1988 }
1989
1990
1991 static void
1992 pmap_pages_free(
1993 pmap_paddr_t pa,
1994 unsigned size)
1995 {
1996 pmap_simple_lock(&pmap_pages_lock);
1997
1998 if (pmap_pages_request_count != 0) {
1999 page_free_entry_t *page_entry;
2000
2001 pmap_pages_request_count--;
2002 page_entry = (page_free_entry_t *)phystokv(pa);
2003 page_entry->next = pmap_pages_reclaim_list;
2004 pmap_pages_reclaim_list = page_entry;
2005 pmap_simple_unlock(&pmap_pages_lock);
2006
2007 return;
2008 }
2009
2010 pmap_simple_unlock(&pmap_pages_lock);
2011
2012 vm_page_t m;
2013 pmap_paddr_t pa_max;
2014
2015 OSAddAtomic(-(size >> PAGE_SHIFT), &inuse_pmap_pages_count);
2016
2017 for (pa_max = pa + size; pa < pa_max; pa = pa + PAGE_SIZE) {
2018 vm_object_lock(pmap_object);
2019 m = vm_page_lookup(pmap_object, (pa - gPhysBase));
2020 assert(m != VM_PAGE_NULL);
2021 assert(VM_PAGE_WIRED(m));
2022 vm_page_lock_queues();
2023 vm_page_free(m);
2024 vm_page_unlock_queues();
2025 vm_object_unlock(pmap_object);
2026 }
2027 }
2028
2029 static inline void
2030 PMAP_ZINFO_PALLOC(
2031 pmap_t pmap, int bytes)
2032 {
2033 pmap_ledger_credit(pmap, task_ledgers.tkm_private, bytes);
2034 }
2035
2036 static inline void
2037 PMAP_ZINFO_PFREE(
2038 pmap_t pmap,
2039 int bytes)
2040 {
2041 pmap_ledger_debit(pmap, task_ledgers.tkm_private, bytes);
2042 }
2043
2044 static inline void
2045 pmap_tt_ledger_credit(
2046 pmap_t pmap,
2047 vm_size_t size)
2048 {
2049 if (pmap != kernel_pmap) {
2050 pmap_ledger_credit(pmap, task_ledgers.phys_footprint, size);
2051 pmap_ledger_credit(pmap, task_ledgers.page_table, size);
2052 }
2053 }
2054
2055 static inline void
2056 pmap_tt_ledger_debit(
2057 pmap_t pmap,
2058 vm_size_t size)
2059 {
2060 if (pmap != kernel_pmap) {
2061 pmap_ledger_debit(pmap, task_ledgers.phys_footprint, size);
2062 pmap_ledger_debit(pmap, task_ledgers.page_table, size);
2063 }
2064 }
2065
2066 static bool
2067 alloc_asid(pmap_t pmap)
2068 {
2069 int vasid;
2070 uint16_t hw_asid;
2071
2072 pmap_simple_lock(&asid_lock);
2073 vasid = bitmap_first(&asid_bitmap[0], MAX_ASID);
2074 if (vasid < 0) {
2075 pmap_simple_unlock(&asid_lock);
2076 return false;
2077 }
2078 assert(vasid < MAX_ASID);
2079 bitmap_clear(&asid_bitmap[0], (unsigned int)vasid);
2080 pmap_simple_unlock(&asid_lock);
2081 // bitmap_first() returns highest-order bits first, but a 0-based scheme works
2082 // slightly better with the collision detection scheme used by pmap_switch_internal().
2083 vasid = MAX_ASID - 1 - vasid;
2084 hw_asid = vasid % MAX_HW_ASID;
2085 pmap->sw_asid = vasid / MAX_HW_ASID;
2086 hw_asid += 1; // Account for ASID 0, which is reserved for the kernel
2087 #if __ARM_KERNEL_PROTECT__
2088 hw_asid <<= 1; // We're really handing out 2 hardware ASIDs, one for EL0 and one for EL1 access
2089 #endif
2090 pmap->hw_asid = hw_asid;
2091 return true;
2092 }
2093
2094 static void
2095 free_asid(pmap_t pmap)
2096 {
2097 unsigned int vasid;
2098 uint16_t hw_asid = pmap->hw_asid;
2099 assert(hw_asid != 0); // Should not try to free kernel ASID
2100
2101 #if __ARM_KERNEL_PROTECT__
2102 hw_asid >>= 1;
2103 #endif
2104 hw_asid -= 1;
2105
2106 vasid = ((unsigned int)pmap->sw_asid * MAX_HW_ASID) + hw_asid;
2107 vasid = MAX_ASID - 1 - vasid;
2108
2109 pmap_simple_lock(&asid_lock);
2110 assert(!bitmap_test(&asid_bitmap[0], vasid));
2111 bitmap_set(&asid_bitmap[0], vasid);
2112 pmap_simple_unlock(&asid_lock);
2113 }
2114
2115
2116 #ifndef PMAP_PV_LOAD_FACTOR
2117 #define PMAP_PV_LOAD_FACTOR 1
2118 #endif
2119
2120 #define PV_LOW_WATER_MARK_DEFAULT (0x200 * PMAP_PV_LOAD_FACTOR)
2121 #define PV_KERN_LOW_WATER_MARK_DEFAULT (0x200 * PMAP_PV_LOAD_FACTOR)
2122 #define PV_ALLOC_CHUNK_INITIAL (0x200 * PMAP_PV_LOAD_FACTOR)
2123 #define PV_KERN_ALLOC_CHUNK_INITIAL (0x200 * PMAP_PV_LOAD_FACTOR)
2124 #define PV_ALLOC_INITIAL_TARGET (PV_ALLOC_CHUNK_INITIAL * 5)
2125 #define PV_KERN_ALLOC_INITIAL_TARGET (PV_KERN_ALLOC_CHUNK_INITIAL)
2126
2127 uint32_t pv_free_count MARK_AS_PMAP_DATA = 0;
2128 uint32_t pv_page_count MARK_AS_PMAP_DATA = 0;
2129 uint32_t pv_kern_free_count MARK_AS_PMAP_DATA = 0;
2130
2131 uint32_t pv_low_water_mark MARK_AS_PMAP_DATA;
2132 uint32_t pv_kern_low_water_mark MARK_AS_PMAP_DATA;
2133 uint32_t pv_alloc_chunk MARK_AS_PMAP_DATA;
2134 uint32_t pv_kern_alloc_chunk MARK_AS_PMAP_DATA;
2135
2136 thread_t mapping_replenish_thread;
2137 event_t mapping_replenish_event;
2138 volatile uint32_t mappingrecurse = 0;
2139
2140 unsigned pmap_mapping_thread_wakeups;
2141 unsigned pmap_reserve_replenish_stat MARK_AS_PMAP_DATA;
2142 unsigned pmap_kern_reserve_alloc_stat MARK_AS_PMAP_DATA;
2143
2144
2145 static void
2146 pv_init(
2147 void)
2148 {
2149 simple_lock_init(&pv_free_list_lock, 0);
2150 simple_lock_init(&pv_kern_free_list_lock, 0);
2151 pv_free_list = PV_ENTRY_NULL;
2152 pv_free_count = 0x0U;
2153 pv_kern_free_list = PV_ENTRY_NULL;
2154 pv_kern_free_count = 0x0U;
2155 }
2156
2157 static inline void PV_ALLOC(pv_entry_t **pv_ep);
2158 static inline void PV_KERN_ALLOC(pv_entry_t **pv_e);
2159 static inline void PV_FREE_LIST(pv_entry_t *pv_eh, pv_entry_t *pv_et, int pv_cnt, uint32_t kern_target);
2160
2161 static boolean_t
2162 pv_alloc(
2163 pmap_t pmap,
2164 unsigned int pai,
2165 pv_entry_t **pvepp)
2166 {
2167 if (pmap != NULL) {
2168 PMAP_ASSERT_LOCKED(pmap);
2169 }
2170 ASSERT_PVH_LOCKED(pai);
2171 PV_ALLOC(pvepp);
2172 if (PV_ENTRY_NULL == *pvepp) {
2173 if ((pmap == NULL) || (kernel_pmap == pmap)) {
2174 PV_KERN_ALLOC(pvepp);
2175
2176 if (PV_ENTRY_NULL == *pvepp) {
2177 pv_entry_t *pv_e;
2178 pv_entry_t *pv_eh;
2179 pv_entry_t *pv_et;
2180 int pv_cnt;
2181 unsigned j;
2182 pmap_paddr_t pa;
2183 kern_return_t ret;
2184
2185 UNLOCK_PVH(pai);
2186 if (pmap != NULL) {
2187 PMAP_UNLOCK(pmap);
2188 }
2189
2190 ret = pmap_pages_alloc(&pa, PAGE_SIZE, PMAP_PAGES_ALLOCATE_NOWAIT);
2191
2192 if (ret == KERN_RESOURCE_SHORTAGE) {
2193 ret = pmap_pages_alloc(&pa, PAGE_SIZE, PMAP_PAGES_RECLAIM_NOWAIT);
2194 }
2195
2196 if (ret != KERN_SUCCESS) {
2197 panic("%s: failed to alloc page for kernel, ret=%d, "
2198 "pmap=%p, pai=%u, pvepp=%p",
2199 __FUNCTION__, ret,
2200 pmap, pai, pvepp);
2201 }
2202
2203 pv_page_count++;
2204
2205 pv_e = (pv_entry_t *)phystokv(pa);
2206 pv_cnt = 0;
2207 pv_eh = pv_et = PV_ENTRY_NULL;
2208 *pvepp = pv_e;
2209 pv_e++;
2210
2211 for (j = 1; j < (PAGE_SIZE / sizeof(pv_entry_t)); j++) {
2212 pv_e->pve_next = pv_eh;
2213 pv_eh = pv_e;
2214
2215 if (pv_et == PV_ENTRY_NULL) {
2216 pv_et = pv_e;
2217 }
2218 pv_cnt++;
2219 pv_e++;
2220 }
2221 PV_FREE_LIST(pv_eh, pv_et, pv_cnt, pv_kern_low_water_mark);
2222 if (pmap != NULL) {
2223 PMAP_LOCK(pmap);
2224 }
2225 LOCK_PVH(pai);
2226 return FALSE;
2227 }
2228 } else {
2229 UNLOCK_PVH(pai);
2230 PMAP_UNLOCK(pmap);
2231
2232 pv_entry_t *pv_e;
2233 pv_entry_t *pv_eh;
2234 pv_entry_t *pv_et;
2235 int pv_cnt;
2236 unsigned j;
2237 pmap_paddr_t pa;
2238 kern_return_t ret;
2239
2240 ret = pmap_pages_alloc(&pa, PAGE_SIZE, 0);
2241
2242 if (ret != KERN_SUCCESS) {
2243 panic("%s: failed to alloc page, ret=%d, "
2244 "pmap=%p, pai=%u, pvepp=%p",
2245 __FUNCTION__, ret,
2246 pmap, pai, pvepp);
2247 }
2248
2249 pv_page_count++;
2250
2251 pv_e = (pv_entry_t *)phystokv(pa);
2252 pv_cnt = 0;
2253 pv_eh = pv_et = PV_ENTRY_NULL;
2254 *pvepp = pv_e;
2255 pv_e++;
2256
2257 for (j = 1; j < (PAGE_SIZE / sizeof(pv_entry_t)); j++) {
2258 pv_e->pve_next = pv_eh;
2259 pv_eh = pv_e;
2260
2261 if (pv_et == PV_ENTRY_NULL) {
2262 pv_et = pv_e;
2263 }
2264 pv_cnt++;
2265 pv_e++;
2266 }
2267
2268 PV_FREE_LIST(pv_eh, pv_et, pv_cnt, pv_kern_low_water_mark);
2269
2270 PMAP_LOCK(pmap);
2271 LOCK_PVH(pai);
2272 return FALSE;
2273 }
2274 }
2275 assert(PV_ENTRY_NULL != *pvepp);
2276 return TRUE;
2277 }
2278
2279 static void
2280 pv_free(
2281 pv_entry_t *pvep)
2282 {
2283 PV_FREE_LIST(pvep, pvep, 1, pv_kern_low_water_mark);
2284 }
2285
2286 static void
2287 pv_list_free(
2288 pv_entry_t *pvehp,
2289 pv_entry_t *pvetp,
2290 unsigned int cnt)
2291 {
2292 PV_FREE_LIST(pvehp, pvetp, cnt, pv_kern_low_water_mark);
2293 }
2294
2295 static inline void
2296 pv_water_mark_check(void)
2297 {
2298 if (__improbable((pv_free_count < pv_low_water_mark) || (pv_kern_free_count < pv_kern_low_water_mark))) {
2299 if (!mappingrecurse && os_atomic_cmpxchg(&mappingrecurse, 0, 1, acq_rel)) {
2300 thread_wakeup(&mapping_replenish_event);
2301 }
2302 }
2303 }
2304
2305 static inline void
2306 PV_ALLOC(pv_entry_t **pv_ep)
2307 {
2308 assert(*pv_ep == PV_ENTRY_NULL);
2309 if (pv_kern_free_count < pv_kern_low_water_mark) {
2310 /*
2311 * If the kernel reserved pool is low, let non-kernel mappings wait for a page
2312 * from the VM.
2313 */
2314 return;
2315 }
2316 pmap_simple_lock(&pv_free_list_lock);
2317
2318 if ((*pv_ep = pv_free_list) != 0) {
2319 pv_free_list = (pv_entry_t *)(*pv_ep)->pve_next;
2320 (*pv_ep)->pve_next = PV_ENTRY_NULL;
2321 pv_free_count--;
2322 }
2323
2324 pmap_simple_unlock(&pv_free_list_lock);
2325 }
2326
2327 static inline void
2328 PV_FREE_LIST(pv_entry_t *pv_eh, pv_entry_t *pv_et, int pv_cnt, uint32_t kern_target)
2329 {
2330 bool use_kernel_list = false;
2331 pmap_simple_lock(&pv_kern_free_list_lock);
2332 if (pv_kern_free_count < kern_target) {
2333 pv_et->pve_next = pv_kern_free_list;
2334 pv_kern_free_list = pv_eh;
2335 pv_kern_free_count += pv_cnt;
2336 use_kernel_list = true;
2337 }
2338 pmap_simple_unlock(&pv_kern_free_list_lock);
2339
2340 if (!use_kernel_list) {
2341 pmap_simple_lock(&pv_free_list_lock);
2342 pv_et->pve_next = (pv_entry_t *)pv_free_list;
2343 pv_free_list = pv_eh;
2344 pv_free_count += pv_cnt;
2345 pmap_simple_unlock(&pv_free_list_lock);
2346 }
2347 }
2348
2349 static inline void
2350 PV_KERN_ALLOC(pv_entry_t **pv_e)
2351 {
2352 assert(*pv_e == PV_ENTRY_NULL);
2353 pmap_simple_lock(&pv_kern_free_list_lock);
2354
2355 if ((*pv_e = pv_kern_free_list) != 0) {
2356 pv_kern_free_list = (pv_entry_t *)(*pv_e)->pve_next;
2357 (*pv_e)->pve_next = PV_ENTRY_NULL;
2358 pv_kern_free_count--;
2359 pmap_kern_reserve_alloc_stat++;
2360 }
2361
2362 pmap_simple_unlock(&pv_kern_free_list_lock);
2363 }
2364
2365 /*
2366 * Creates a target number of free pv_entry_t objects for the kernel free list
2367 * and the general free list.
2368 */
2369 MARK_AS_PMAP_TEXT static kern_return_t
2370 mapping_free_prime_internal(void)
2371 {
2372 SECURITY_READ_ONLY_LATE(static boolean_t) mapping_free_prime_internal_called = FALSE;
2373 SECURITY_READ_ONLY_LATE(static boolean_t) mapping_free_prime_internal_done = FALSE;
2374
2375 if (mapping_free_prime_internal_done) {
2376 return KERN_FAILURE;
2377 }
2378
2379 if (!mapping_free_prime_internal_called) {
2380 mapping_free_prime_internal_called = TRUE;
2381
2382 pv_low_water_mark = PV_LOW_WATER_MARK_DEFAULT;
2383
2384 /* Alterable via sysctl */
2385 pv_kern_low_water_mark = PV_KERN_LOW_WATER_MARK_DEFAULT;
2386
2387 pv_kern_alloc_chunk = PV_KERN_ALLOC_CHUNK_INITIAL;
2388 pv_alloc_chunk = PV_ALLOC_CHUNK_INITIAL;
2389 }
2390
2391 return mapping_replenish_internal(PV_KERN_ALLOC_INITIAL_TARGET, PV_ALLOC_INITIAL_TARGET);
2392 }
2393
2394 void
2395 mapping_free_prime(void)
2396 {
2397 kern_return_t kr = KERN_FAILURE;
2398
2399 kr = mapping_free_prime_internal();
2400
2401 if (kr != KERN_SUCCESS) {
2402 panic("%s: failed, kr=%d",
2403 __FUNCTION__, kr);
2404 }
2405 }
2406
2407 void mapping_replenish(void);
2408
2409 void
2410 mapping_adjust(void)
2411 {
2412 kern_return_t mres;
2413
2414 mres = kernel_thread_start_priority((thread_continue_t)mapping_replenish, NULL, MAXPRI_KERNEL, &mapping_replenish_thread);
2415 if (mres != KERN_SUCCESS) {
2416 panic("%s: mapping_replenish thread creation failed",
2417 __FUNCTION__);
2418 }
2419 thread_deallocate(mapping_replenish_thread);
2420 }
2421
2422 /*
2423 * Fills the kernel and general PV free lists back up to their low watermarks.
2424 */
2425 MARK_AS_PMAP_TEXT static kern_return_t
2426 mapping_replenish_internal(uint32_t kern_target_count, uint32_t user_target_count)
2427 {
2428 pv_entry_t *pv_e;
2429 pv_entry_t *pv_eh;
2430 pv_entry_t *pv_et;
2431 int pv_cnt;
2432 unsigned j;
2433 pmap_paddr_t pa;
2434 kern_return_t ret = KERN_SUCCESS;
2435
2436 while ((pv_free_count < user_target_count) || (pv_kern_free_count < kern_target_count)) {
2437 pv_cnt = 0;
2438 pv_eh = pv_et = PV_ENTRY_NULL;
2439
2440 ret = pmap_pages_alloc(&pa, PAGE_SIZE, 0);
2441 assert(ret == KERN_SUCCESS);
2442
2443 pv_page_count++;
2444
2445 pv_e = (pv_entry_t *)phystokv(pa);
2446
2447 for (j = 0; j < (PAGE_SIZE / sizeof(pv_entry_t)); j++) {
2448 pv_e->pve_next = pv_eh;
2449 pv_eh = pv_e;
2450
2451 if (pv_et == PV_ENTRY_NULL) {
2452 pv_et = pv_e;
2453 }
2454 pv_cnt++;
2455 pv_e++;
2456 }
2457 pmap_reserve_replenish_stat += pv_cnt;
2458 PV_FREE_LIST(pv_eh, pv_et, pv_cnt, kern_target_count);
2459 }
2460
2461 return ret;
2462 }
2463
2464 /*
2465 * Continuation function that keeps the PV free lists from running out of free
2466 * elements.
2467 */
2468 __attribute__((noreturn))
2469 void
2470 mapping_replenish(void)
2471 {
2472 kern_return_t kr;
2473
2474 /* We qualify for VM privileges...*/
2475 current_thread()->options |= TH_OPT_VMPRIV;
2476
2477 for (;;) {
2478 kr = mapping_replenish_internal(pv_kern_low_water_mark, pv_low_water_mark);
2479
2480 if (kr != KERN_SUCCESS) {
2481 panic("%s: failed, kr=%d", __FUNCTION__, kr);
2482 }
2483
2484 /* Check if the kernel pool has been depleted since the
2485 * first pass, to reduce refill latency.
2486 */
2487 if (pv_kern_free_count < pv_kern_low_water_mark) {
2488 continue;
2489 }
2490 /* Block sans continuation to avoid yielding kernel stack */
2491 assert_wait(&mapping_replenish_event, THREAD_UNINT);
2492 mappingrecurse = 0;
2493 thread_block(THREAD_CONTINUE_NULL);
2494 pmap_mapping_thread_wakeups++;
2495 }
2496 }
2497
2498
2499 static void
2500 ptd_bootstrap(
2501 pt_desc_t *ptdp,
2502 unsigned int ptd_cnt)
2503 {
2504 simple_lock_init(&ptd_free_list_lock, 0);
2505 while (ptd_cnt != 0) {
2506 (*(void **)ptdp) = (void *)ptd_free_list;
2507 ptd_free_list = ptdp;
2508 ptdp++;
2509 ptd_cnt--;
2510 ptd_free_count++;
2511 }
2512 ptd_preboot = FALSE;
2513 }
2514
2515 static pt_desc_t*
2516 ptd_alloc_unlinked(bool reclaim)
2517 {
2518 pt_desc_t *ptdp;
2519 unsigned i;
2520
2521 if (!ptd_preboot) {
2522 pmap_simple_lock(&ptd_free_list_lock);
2523 }
2524
2525 if (ptd_free_count == 0) {
2526 unsigned int ptd_cnt;
2527 pt_desc_t *ptdp_next;
2528
2529 if (ptd_preboot) {
2530 ptdp = (pt_desc_t *)avail_start;
2531 avail_start += ARM_PGBYTES;
2532 ptdp_next = ptdp;
2533 ptd_cnt = ARM_PGBYTES / sizeof(pt_desc_t);
2534 } else {
2535 pmap_paddr_t pa;
2536 kern_return_t ret;
2537
2538 pmap_simple_unlock(&ptd_free_list_lock);
2539
2540 if (pmap_pages_alloc(&pa, PAGE_SIZE, PMAP_PAGES_ALLOCATE_NOWAIT) != KERN_SUCCESS) {
2541 if (reclaim) {
2542 ret = pmap_pages_alloc(&pa, PAGE_SIZE, PMAP_PAGES_RECLAIM_NOWAIT);
2543 assert(ret == KERN_SUCCESS);
2544 } else {
2545 return NULL;
2546 }
2547 }
2548 ptdp = (pt_desc_t *)phystokv(pa);
2549
2550 pmap_simple_lock(&ptd_free_list_lock);
2551 ptdp_next = ptdp;
2552 ptd_cnt = PAGE_SIZE / sizeof(pt_desc_t);
2553 }
2554
2555 while (ptd_cnt != 0) {
2556 (*(void **)ptdp_next) = (void *)ptd_free_list;
2557 ptd_free_list = ptdp_next;
2558 ptdp_next++;
2559 ptd_cnt--;
2560 ptd_free_count++;
2561 }
2562 }
2563
2564 if ((ptdp = ptd_free_list) != PTD_ENTRY_NULL) {
2565 ptd_free_list = (pt_desc_t *)(*(void **)ptdp);
2566 ptd_free_count--;
2567 } else {
2568 panic("%s: out of ptd entry",
2569 __FUNCTION__);
2570 }
2571
2572 if (!ptd_preboot) {
2573 pmap_simple_unlock(&ptd_free_list_lock);
2574 }
2575
2576 ptdp->pt_page.next = NULL;
2577 ptdp->pt_page.prev = NULL;
2578 ptdp->pmap = NULL;
2579
2580 for (i = 0; i < PT_INDEX_MAX; i++) {
2581 ptdp->ptd_info[i].va = (vm_offset_t)-1;
2582 ptdp->ptd_info[i].refcnt = 0;
2583 ptdp->ptd_info[i].wiredcnt = 0;
2584 }
2585
2586 return ptdp;
2587 }
2588
2589 static inline pt_desc_t*
2590 ptd_alloc(pmap_t pmap, bool reclaim)
2591 {
2592 pt_desc_t *ptdp = ptd_alloc_unlinked(reclaim);
2593
2594 if (ptdp == NULL) {
2595 return NULL;
2596 }
2597
2598 ptdp->pmap = pmap;
2599 if (pmap != kernel_pmap) {
2600 /* We should never try to reclaim kernel pagetable pages in
2601 * pmap_pages_reclaim(), so don't enter them into the list. */
2602 pmap_simple_lock(&pt_pages_lock);
2603 queue_enter(&pt_page_list, ptdp, pt_desc_t *, pt_page);
2604 pmap_simple_unlock(&pt_pages_lock);
2605 }
2606
2607 pmap_tt_ledger_credit(pmap, sizeof(*ptdp));
2608 return ptdp;
2609 }
2610
2611 static void
2612 ptd_deallocate(pt_desc_t *ptdp)
2613 {
2614 pmap_t pmap = ptdp->pmap;
2615
2616 if (ptd_preboot) {
2617 panic("%s: early boot, "
2618 "ptdp=%p",
2619 __FUNCTION__,
2620 ptdp);
2621 }
2622
2623 if (ptdp->pt_page.next != NULL) {
2624 pmap_simple_lock(&pt_pages_lock);
2625 queue_remove(&pt_page_list, ptdp, pt_desc_t *, pt_page);
2626 pmap_simple_unlock(&pt_pages_lock);
2627 }
2628 pmap_simple_lock(&ptd_free_list_lock);
2629 (*(void **)ptdp) = (void *)ptd_free_list;
2630 ptd_free_list = (pt_desc_t *)ptdp;
2631 ptd_free_count++;
2632 pmap_simple_unlock(&ptd_free_list_lock);
2633 if (pmap != NULL) {
2634 pmap_tt_ledger_debit(pmap, sizeof(*ptdp));
2635 }
2636 }
2637
2638 static void
2639 ptd_init(
2640 pt_desc_t *ptdp,
2641 pmap_t pmap,
2642 vm_map_address_t va,
2643 unsigned int level,
2644 pt_entry_t *pte_p)
2645 {
2646 if (ptdp->pmap != pmap) {
2647 panic("%s: pmap mismatch, "
2648 "ptdp=%p, pmap=%p, va=%p, level=%u, pte_p=%p",
2649 __FUNCTION__,
2650 ptdp, pmap, (void*)va, level, pte_p);
2651 }
2652
2653 #if (__ARM_VMSA__ == 7)
2654 assert(level == 2);
2655 ptdp->ptd_info[ARM_PT_DESC_INDEX(pte_p)].va = (vm_offset_t) va & ~(ARM_TT_L1_PT_OFFMASK);
2656 #else
2657 assert(level > pt_attr_root_level(pmap_get_pt_attr(pmap)));
2658 ptdp->ptd_info[ARM_PT_DESC_INDEX(pte_p)].va = (vm_offset_t) va & ~(pt_attr_ln_offmask(pmap_get_pt_attr(pmap), level - 1));
2659 #endif
2660 if (level < PMAP_TT_MAX_LEVEL) {
2661 ptdp->ptd_info[ARM_PT_DESC_INDEX(pte_p)].refcnt = PT_DESC_REFCOUNT;
2662 }
2663 }
2664
2665
2666 boolean_t
2667 pmap_valid_address(
2668 pmap_paddr_t addr)
2669 {
2670 return pa_valid(addr);
2671 }
2672
2673 #if (__ARM_VMSA__ == 7)
2674
2675 /*
2676 * Given an offset and a map, compute the address of the
2677 * corresponding translation table entry.
2678 */
2679 static inline tt_entry_t *
2680 pmap_tte(pmap_t pmap,
2681 vm_map_address_t addr)
2682 {
2683 __unused const pt_attr_t * const pt_attr = pmap_get_pt_attr(pmap);
2684
2685 if (!(tte_index(pmap, pt_attr, addr) < pmap->tte_index_max)) {
2686 return (tt_entry_t *)NULL;
2687 }
2688 return &pmap->tte[tte_index(pmap, pt_attr, addr)];
2689 }
2690
2691
2692 /*
2693 * Given an offset and a map, compute the address of the
2694 * pte. If the address is invalid with respect to the map
2695 * then PT_ENTRY_NULL is returned (and the map may need to grow).
2696 *
2697 * This is only used internally.
2698 */
2699 static inline pt_entry_t *
2700 pmap_pte(
2701 pmap_t pmap,
2702 vm_map_address_t addr)
2703 {
2704 pt_entry_t *ptp;
2705 tt_entry_t *ttp;
2706 tt_entry_t tte;
2707
2708 ttp = pmap_tte(pmap, addr);
2709 if (ttp == (tt_entry_t *)NULL) {
2710 return PT_ENTRY_NULL;
2711 }
2712 tte = *ttp;
2713 #if MACH_ASSERT
2714 if ((tte & ARM_TTE_TYPE_MASK) == ARM_TTE_TYPE_BLOCK) {
2715 panic("%s: Attempt to demote L1 block, tte=0x%lx, "
2716 "pmap=%p, addr=%p",
2717 __FUNCTION__, (unsigned long)tte,
2718 pmap, (void*)addr);
2719 }
2720 #endif
2721 if ((tte & ARM_TTE_TYPE_MASK) != ARM_TTE_TYPE_TABLE) {
2722 return PT_ENTRY_NULL;
2723 }
2724 ptp = (pt_entry_t *) ttetokv(tte) + ptenum(addr);
2725 return ptp;
2726 }
2727
2728 __unused static inline tt_entry_t *
2729 pmap_ttne(pmap_t pmap,
2730 unsigned int target_level,
2731 vm_map_address_t addr)
2732 {
2733 tt_entry_t * ret_ttep = NULL;
2734
2735 switch (target_level) {
2736 case 1:
2737 ret_ttep = pmap_tte(pmap, addr);
2738 break;
2739 case 2:
2740 ret_ttep = (tt_entry_t *)pmap_pte(pmap, addr);
2741 break;
2742 default:
2743 panic("%s: bad level, "
2744 "pmap=%p, target_level=%u, addr=%p",
2745 __FUNCTION__,
2746 pmap, target_level, (void *)addr);
2747 }
2748
2749 return ret_ttep;
2750 }
2751
2752 #else
2753
2754 static inline tt_entry_t *
2755 pmap_ttne(pmap_t pmap,
2756 unsigned int target_level,
2757 vm_map_address_t addr)
2758 {
2759 tt_entry_t * ttp = NULL;
2760 tt_entry_t * ttep = NULL;
2761 tt_entry_t tte = ARM_TTE_EMPTY;
2762 unsigned int cur_level;
2763
2764 const pt_attr_t * const pt_attr = pmap_get_pt_attr(pmap);
2765
2766 ttp = pmap->tte;
2767
2768 assert(target_level <= pt_attr->pta_max_level);
2769
2770 for (cur_level = pt_attr->pta_root_level; cur_level <= target_level; cur_level++) {
2771 ttep = &ttp[ttn_index(pmap, pt_attr, addr, cur_level)];
2772
2773 if (cur_level == target_level) {
2774 break;
2775 }
2776
2777 tte = *ttep;
2778
2779 #if MACH_ASSERT
2780 if ((tte & (ARM_TTE_TYPE_MASK | ARM_TTE_VALID)) == (ARM_TTE_TYPE_BLOCK | ARM_TTE_VALID)) {
2781 panic("%s: Attempt to demote L%u block, tte=0x%llx, "
2782 "pmap=%p, target_level=%u, addr=%p",
2783 __FUNCTION__, cur_level, tte,
2784 pmap, target_level, (void*)addr);
2785 }
2786 #endif
2787 if ((tte & (ARM_TTE_TYPE_MASK | ARM_TTE_VALID)) != (ARM_TTE_TYPE_TABLE | ARM_TTE_VALID)) {
2788 return TT_ENTRY_NULL;
2789 }
2790
2791 ttp = (tt_entry_t*)phystokv(tte & ARM_TTE_TABLE_MASK);
2792 }
2793
2794 return ttep;
2795 }
2796
2797 /*
2798 * Given an offset and a map, compute the address of level 1 translation table entry.
2799 * If the tranlation is invalid then PT_ENTRY_NULL is returned.
2800 */
2801 static inline tt_entry_t *
2802 pmap_tt1e(pmap_t pmap,
2803 vm_map_address_t addr)
2804 {
2805 return pmap_ttne(pmap, PMAP_TT_L1_LEVEL, addr);
2806 }
2807
2808 /*
2809 * Given an offset and a map, compute the address of level 2 translation table entry.
2810 * If the tranlation is invalid then PT_ENTRY_NULL is returned.
2811 */
2812 static inline tt_entry_t *
2813 pmap_tt2e(pmap_t pmap,
2814 vm_map_address_t addr)
2815 {
2816 return pmap_ttne(pmap, PMAP_TT_L2_LEVEL, addr);
2817 }
2818
2819
2820 /*
2821 * Given an offset and a map, compute the address of level 3 translation table entry.
2822 * If the tranlation is invalid then PT_ENTRY_NULL is returned.
2823 */
2824 static inline pt_entry_t *
2825 pmap_tt3e(
2826 pmap_t pmap,
2827 vm_map_address_t addr)
2828 {
2829 return (pt_entry_t*)pmap_ttne(pmap, PMAP_TT_L3_LEVEL, addr);
2830 }
2831
2832 static inline tt_entry_t *
2833 pmap_tte(
2834 pmap_t pmap,
2835 vm_map_address_t addr)
2836 {
2837 return pmap_tt2e(pmap, addr);
2838 }
2839
2840 static inline pt_entry_t *
2841 pmap_pte(
2842 pmap_t pmap,
2843 vm_map_address_t addr)
2844 {
2845 return pmap_tt3e(pmap, addr);
2846 }
2847
2848 #endif
2849
2850
2851
2852
2853
2854
2855 /*
2856 * Map memory at initialization. The physical addresses being
2857 * mapped are not managed and are never unmapped.
2858 *
2859 * For now, VM is already on, we only need to map the
2860 * specified memory.
2861 */
2862 vm_map_address_t
2863 pmap_map(
2864 vm_map_address_t virt,
2865 vm_offset_t start,
2866 vm_offset_t end,
2867 vm_prot_t prot,
2868 unsigned int flags)
2869 {
2870 kern_return_t kr;
2871 vm_size_t ps;
2872
2873 ps = PAGE_SIZE;
2874 while (start < end) {
2875 kr = pmap_enter(kernel_pmap, virt, (ppnum_t)atop(start),
2876 prot, VM_PROT_NONE, flags, FALSE);
2877
2878 if (kr != KERN_SUCCESS) {
2879 panic("%s: failed pmap_enter, "
2880 "virt=%p, start_addr=%p, end_addr=%p, prot=%#x, flags=%#x",
2881 __FUNCTION__,
2882 (void *) virt, (void *) start, (void *) end, prot, flags);
2883 }
2884
2885 virt += ps;
2886 start += ps;
2887 }
2888 return virt;
2889 }
2890
2891 vm_map_address_t
2892 pmap_map_bd_with_options(
2893 vm_map_address_t virt,
2894 vm_offset_t start,
2895 vm_offset_t end,
2896 vm_prot_t prot,
2897 int32_t options)
2898 {
2899 pt_entry_t tmplate;
2900 pt_entry_t *ptep;
2901 vm_map_address_t vaddr;
2902 vm_offset_t paddr;
2903 pt_entry_t mem_attr;
2904
2905 switch (options & PMAP_MAP_BD_MASK) {
2906 case PMAP_MAP_BD_WCOMB:
2907 mem_attr = ARM_PTE_ATTRINDX(CACHE_ATTRINDX_WRITECOMB);
2908 #if (__ARM_VMSA__ > 7)
2909 mem_attr |= ARM_PTE_SH(SH_OUTER_MEMORY);
2910 #else
2911 mem_attr |= ARM_PTE_SH;
2912 #endif
2913 break;
2914 case PMAP_MAP_BD_POSTED:
2915 mem_attr = ARM_PTE_ATTRINDX(CACHE_ATTRINDX_POSTED);
2916 break;
2917 case PMAP_MAP_BD_POSTED_REORDERED:
2918 mem_attr = ARM_PTE_ATTRINDX(CACHE_ATTRINDX_POSTED_REORDERED);
2919 break;
2920 case PMAP_MAP_BD_POSTED_COMBINED_REORDERED:
2921 mem_attr = ARM_PTE_ATTRINDX(CACHE_ATTRINDX_POSTED_COMBINED_REORDERED);
2922 break;
2923 default:
2924 mem_attr = ARM_PTE_ATTRINDX(CACHE_ATTRINDX_DISABLE);
2925 break;
2926 }
2927
2928 tmplate = pa_to_pte(start) | ARM_PTE_AP((prot & VM_PROT_WRITE) ? AP_RWNA : AP_RONA) |
2929 mem_attr | ARM_PTE_TYPE | ARM_PTE_NX | ARM_PTE_PNX | ARM_PTE_AF;
2930 #if __ARM_KERNEL_PROTECT__
2931 tmplate |= ARM_PTE_NG;
2932 #endif /* __ARM_KERNEL_PROTECT__ */
2933
2934 vaddr = virt;
2935 paddr = start;
2936 while (paddr < end) {
2937 ptep = pmap_pte(kernel_pmap, vaddr);
2938 if (ptep == PT_ENTRY_NULL) {
2939 panic("%s: no PTE for vaddr=%p, "
2940 "virt=%p, start=%p, end=%p, prot=0x%x, options=0x%x",
2941 __FUNCTION__, (void*)vaddr,
2942 (void*)virt, (void*)start, (void*)end, prot, options);
2943 }
2944
2945 assert(!ARM_PTE_IS_COMPRESSED(*ptep, ptep));
2946 WRITE_PTE_STRONG(ptep, tmplate);
2947
2948 pte_increment_pa(tmplate);
2949 vaddr += PAGE_SIZE;
2950 paddr += PAGE_SIZE;
2951 }
2952
2953 if (end >= start) {
2954 flush_mmu_tlb_region(virt, (unsigned)(end - start));
2955 }
2956
2957 return vaddr;
2958 }
2959
2960 /*
2961 * Back-door routine for mapping kernel VM at initialization.
2962 * Useful for mapping memory outside the range
2963 * [vm_first_phys, vm_last_phys] (i.e., devices).
2964 * Otherwise like pmap_map.
2965 */
2966 vm_map_address_t
2967 pmap_map_bd(
2968 vm_map_address_t virt,
2969 vm_offset_t start,
2970 vm_offset_t end,
2971 vm_prot_t prot)
2972 {
2973 pt_entry_t tmplate;
2974 pt_entry_t *ptep;
2975 vm_map_address_t vaddr;
2976 vm_offset_t paddr;
2977
2978 /* not cacheable and not buffered */
2979 tmplate = pa_to_pte(start)
2980 | ARM_PTE_TYPE | ARM_PTE_AF | ARM_PTE_NX | ARM_PTE_PNX
2981 | ARM_PTE_AP((prot & VM_PROT_WRITE) ? AP_RWNA : AP_RONA)
2982 | ARM_PTE_ATTRINDX(CACHE_ATTRINDX_DISABLE);
2983 #if __ARM_KERNEL_PROTECT__
2984 tmplate |= ARM_PTE_NG;
2985 #endif /* __ARM_KERNEL_PROTECT__ */
2986
2987 vaddr = virt;
2988 paddr = start;
2989 while (paddr < end) {
2990 ptep = pmap_pte(kernel_pmap, vaddr);
2991 if (ptep == PT_ENTRY_NULL) {
2992 panic("pmap_map_bd");
2993 }
2994 assert(!ARM_PTE_IS_COMPRESSED(*ptep, ptep));
2995 WRITE_PTE_STRONG(ptep, tmplate);
2996
2997 pte_increment_pa(tmplate);
2998 vaddr += PAGE_SIZE;
2999 paddr += PAGE_SIZE;
3000 }
3001
3002 if (end >= start) {
3003 flush_mmu_tlb_region(virt, (unsigned)(end - start));
3004 }
3005
3006 return vaddr;
3007 }
3008
3009 /*
3010 * Back-door routine for mapping kernel VM at initialization.
3011 * Useful for mapping memory specific physical addresses in early
3012 * boot (i.e., before kernel_map is initialized).
3013 *
3014 * Maps are in the VM_HIGH_KERNEL_WINDOW area.
3015 */
3016
3017 vm_map_address_t
3018 pmap_map_high_window_bd(
3019 vm_offset_t pa_start,
3020 vm_size_t len,
3021 vm_prot_t prot)
3022 {
3023 pt_entry_t *ptep, pte;
3024 #if (__ARM_VMSA__ == 7)
3025 vm_map_address_t va_start = VM_HIGH_KERNEL_WINDOW;
3026 vm_map_address_t va_max = VM_MAX_KERNEL_ADDRESS;
3027 #else
3028 vm_map_address_t va_start = VREGION1_START;
3029 vm_map_address_t va_max = VREGION1_START + VREGION1_SIZE;
3030 #endif
3031 vm_map_address_t va_end;
3032 vm_map_address_t va;
3033 vm_size_t offset;
3034
3035 offset = pa_start & PAGE_MASK;
3036 pa_start -= offset;
3037 len += offset;
3038
3039 if (len > (va_max - va_start)) {
3040 panic("%s: area too large, "
3041 "pa_start=%p, len=%p, prot=0x%x",
3042 __FUNCTION__,
3043 (void*)pa_start, (void*)len, prot);
3044 }
3045
3046 scan:
3047 for (; va_start < va_max; va_start += PAGE_SIZE) {
3048 ptep = pmap_pte(kernel_pmap, va_start);
3049 assert(!ARM_PTE_IS_COMPRESSED(*ptep, ptep));
3050 if (*ptep == ARM_PTE_TYPE_FAULT) {
3051 break;
3052 }
3053 }
3054 if (va_start > va_max) {
3055 panic("%s: insufficient pages, "
3056 "pa_start=%p, len=%p, prot=0x%x",
3057 __FUNCTION__,
3058 (void*)pa_start, (void*)len, prot);
3059 }
3060
3061 for (va_end = va_start + PAGE_SIZE; va_end < va_start + len; va_end += PAGE_SIZE) {
3062 ptep = pmap_pte(kernel_pmap, va_end);
3063 assert(!ARM_PTE_IS_COMPRESSED(*ptep, ptep));
3064 if (*ptep != ARM_PTE_TYPE_FAULT) {
3065 va_start = va_end + PAGE_SIZE;
3066 goto scan;
3067 }
3068 }
3069
3070 for (va = va_start; va < va_end; va += PAGE_SIZE, pa_start += PAGE_SIZE) {
3071 ptep = pmap_pte(kernel_pmap, va);
3072 pte = pa_to_pte(pa_start)
3073 | ARM_PTE_TYPE | ARM_PTE_AF | ARM_PTE_NX | ARM_PTE_PNX
3074 | ARM_PTE_AP((prot & VM_PROT_WRITE) ? AP_RWNA : AP_RONA)
3075 | ARM_PTE_ATTRINDX(CACHE_ATTRINDX_DEFAULT);
3076 #if (__ARM_VMSA__ > 7)
3077 pte |= ARM_PTE_SH(SH_OUTER_MEMORY);
3078 #else
3079 pte |= ARM_PTE_SH;
3080 #endif
3081 #if __ARM_KERNEL_PROTECT__
3082 pte |= ARM_PTE_NG;
3083 #endif /* __ARM_KERNEL_PROTECT__ */
3084 WRITE_PTE_STRONG(ptep, pte);
3085 }
3086 PMAP_UPDATE_TLBS(kernel_pmap, va_start, va_start + len, false);
3087 #if KASAN
3088 kasan_notify_address(va_start, len);
3089 #endif
3090 return va_start;
3091 }
3092
3093 #define PMAP_ALIGN(addr, align) ((addr) + ((align) - 1) & ~((align) - 1))
3094
3095 static vm_size_t
3096 pmap_compute_io_rgns(void)
3097 {
3098 DTEntry entry;
3099 pmap_io_range_t *ranges;
3100 uint64_t rgn_end;
3101 void *prop = NULL;
3102 int err;
3103 unsigned int prop_size;
3104
3105 err = DTLookupEntry(NULL, "/defaults", &entry);
3106 assert(err == kSuccess);
3107
3108 if (kSuccess != DTGetProperty(entry, "pmap-io-ranges", &prop, &prop_size)) {
3109 return 0;
3110 }
3111
3112 ranges = prop;
3113 for (unsigned int i = 0; i < (prop_size / sizeof(*ranges)); ++i) {
3114 if (ranges[i].addr & PAGE_MASK) {
3115 panic("pmap I/O region %u addr 0x%llx is not page-aligned", i, ranges[i].addr);
3116 }
3117 if (ranges[i].len & PAGE_MASK) {
3118 panic("pmap I/O region %u length 0x%llx is not page-aligned", i, ranges[i].len);
3119 }
3120 if (os_add_overflow(ranges[i].addr, ranges[i].len, &rgn_end)) {
3121 panic("pmap I/O region %u addr 0x%llx length 0x%llx wraps around", i, ranges[i].addr, ranges[i].len);
3122 }
3123 if (((ranges[i].addr <= gPhysBase) && (rgn_end > gPhysBase)) ||
3124 ((ranges[i].addr < avail_end) && (rgn_end >= avail_end)) ||
3125 ((ranges[i].addr > gPhysBase) && (rgn_end < avail_end))) {
3126 panic("pmap I/O region %u addr 0x%llx length 0x%llx overlaps physical memory", i, ranges[i].addr, ranges[i].len);
3127 }
3128
3129 ++num_io_rgns;
3130 }
3131
3132 return num_io_rgns * sizeof(*ranges);
3133 }
3134
3135 /*
3136 * return < 0 for a < b
3137 * 0 for a == b
3138 * > 0 for a > b
3139 */
3140 typedef int (*cmpfunc_t)(const void *a, const void *b);
3141
3142 extern void
3143 qsort(void *a, size_t n, size_t es, cmpfunc_t cmp);
3144
3145 static int
3146 cmp_io_rgns(const void *a, const void *b)
3147 {
3148 const pmap_io_range_t *range_a = a;
3149 const pmap_io_range_t *range_b = b;
3150 if ((range_b->addr + range_b->len) <= range_a->addr) {
3151 return 1;
3152 } else if ((range_a->addr + range_a->len) <= range_b->addr) {
3153 return -1;
3154 } else {
3155 return 0;
3156 }
3157 }
3158
3159 static void
3160 pmap_load_io_rgns(void)
3161 {
3162 DTEntry entry;
3163 pmap_io_range_t *ranges;
3164 void *prop = NULL;
3165 int err;
3166 unsigned int prop_size;
3167
3168 if (num_io_rgns == 0) {
3169 return;
3170 }
3171
3172 err = DTLookupEntry(NULL, "/defaults", &entry);
3173 assert(err == kSuccess);
3174
3175 err = DTGetProperty(entry, "pmap-io-ranges", &prop, &prop_size);
3176 assert(err == kSuccess);
3177
3178 ranges = prop;
3179 for (unsigned int i = 0; i < (prop_size / sizeof(*ranges)); ++i) {
3180 io_attr_table[i] = ranges[i];
3181 }
3182
3183 qsort(io_attr_table, num_io_rgns, sizeof(*ranges), cmp_io_rgns);
3184 }
3185
3186 #if __arm64__
3187 /*
3188 * pmap_get_arm64_prot
3189 *
3190 * return effective armv8 VMSA block protections including
3191 * table AP/PXN/XN overrides of a pmap entry
3192 *
3193 */
3194
3195 uint64_t
3196 pmap_get_arm64_prot(
3197 pmap_t pmap,
3198 vm_offset_t addr)
3199 {
3200 tt_entry_t tte = 0;
3201 unsigned int level = 0;
3202 uint64_t tte_type = 0;
3203 uint64_t effective_prot_bits = 0;
3204 uint64_t aggregate_tte = 0;
3205 uint64_t table_ap_bits = 0, table_xn = 0, table_pxn = 0;
3206 const pt_attr_t * const pt_attr = pmap_get_pt_attr(pmap);
3207
3208 for (level = pt_attr->pta_root_level; level <= pt_attr->pta_max_level; level++) {
3209 tte = *pmap_ttne(pmap, level, addr);
3210
3211 if (!(tte & ARM_TTE_VALID)) {
3212 return 0;
3213 }
3214
3215 tte_type = tte & ARM_TTE_TYPE_MASK;
3216
3217 if ((tte_type == ARM_TTE_TYPE_BLOCK) ||
3218 (level == pt_attr->pta_max_level)) {
3219 /* Block or page mapping; both have the same protection bit layout. */
3220 break;
3221 } else if (tte_type == ARM_TTE_TYPE_TABLE) {
3222 /* All of the table bits we care about are overrides, so just OR them together. */
3223 aggregate_tte |= tte;
3224 }
3225 }
3226
3227 table_ap_bits = ((aggregate_tte >> ARM_TTE_TABLE_APSHIFT) & AP_MASK);
3228 table_xn = (aggregate_tte & ARM_TTE_TABLE_XN);
3229 table_pxn = (aggregate_tte & ARM_TTE_TABLE_PXN);
3230
3231 /* Start with the PTE bits. */
3232 effective_prot_bits = tte & (ARM_PTE_APMASK | ARM_PTE_NX | ARM_PTE_PNX);
3233
3234 /* Table AP bits mask out block/page AP bits */
3235 effective_prot_bits &= ~(ARM_PTE_AP(table_ap_bits));
3236
3237 /* XN/PXN bits can be OR'd in. */
3238 effective_prot_bits |= (table_xn ? ARM_PTE_NX : 0);
3239 effective_prot_bits |= (table_pxn ? ARM_PTE_PNX : 0);
3240
3241 return effective_prot_bits;
3242 }
3243 #endif /* __arm64__ */
3244
3245
3246 /*
3247 * Bootstrap the system enough to run with virtual memory.
3248 *
3249 * The early VM initialization code has already allocated
3250 * the first CPU's translation table and made entries for
3251 * all the one-to-one mappings to be found there.
3252 *
3253 * We must set up the kernel pmap structures, the
3254 * physical-to-virtual translation lookup tables for the
3255 * physical memory to be managed (between avail_start and
3256 * avail_end).
3257 *
3258 * Map the kernel's code and data, and allocate the system page table.
3259 * Page_size must already be set.
3260 *
3261 * Parameters:
3262 * first_avail first available physical page -
3263 * after kernel page tables
3264 * avail_start PA of first managed physical page
3265 * avail_end PA of last managed physical page
3266 */
3267
3268 void
3269 pmap_bootstrap(
3270 vm_offset_t vstart)
3271 {
3272 pmap_paddr_t pmap_struct_start;
3273 vm_size_t pv_head_size;
3274 vm_size_t ptd_root_table_size;
3275 vm_size_t pp_attr_table_size;
3276 vm_size_t io_attr_table_size;
3277 unsigned int npages;
3278 vm_map_offset_t maxoffset;
3279
3280 lck_grp_init(&pmap_lck_grp, "pmap", LCK_GRP_ATTR_NULL);
3281
3282
3283 #if DEVELOPMENT || DEBUG
3284 if (PE_parse_boot_argn("pmap_trace", &pmap_trace_mask, sizeof(pmap_trace_mask))) {
3285 kprintf("Kernel traces for pmap operations enabled\n");
3286 }
3287 #endif
3288
3289 /*
3290 * Initialize the kernel pmap.
3291 */
3292 pmap_stamp = 1;
3293 #if ARM_PARAMETERIZED_PMAP
3294 kernel_pmap->pmap_pt_attr = native_pt_attr;
3295 #endif /* ARM_PARAMETERIZED_PMAP */
3296 #if HAS_APPLE_PAC
3297 kernel_pmap->disable_jop = 0;
3298 #endif /* HAS_APPLE_PAC */
3299 kernel_pmap->tte = cpu_tte;
3300 kernel_pmap->ttep = cpu_ttep;
3301 #if (__ARM_VMSA__ > 7)
3302 kernel_pmap->min = ARM64_TTBR1_MIN_ADDR;
3303 #else
3304 kernel_pmap->min = VM_MIN_KERNEL_AND_KEXT_ADDRESS;
3305 #endif
3306 kernel_pmap->max = VM_MAX_KERNEL_ADDRESS;
3307 os_atomic_init(&kernel_pmap->ref_count, 1);
3308 kernel_pmap->gc_status = 0;
3309 kernel_pmap->nx_enabled = TRUE;
3310 #ifdef __arm64__
3311 kernel_pmap->is_64bit = TRUE;
3312 #else
3313 kernel_pmap->is_64bit = FALSE;
3314 #endif
3315 kernel_pmap->stamp = os_atomic_inc(&pmap_stamp, relaxed);
3316
3317 kernel_pmap->nested_region_grand_addr = 0x0ULL;
3318 kernel_pmap->nested_region_subord_addr = 0x0ULL;
3319 kernel_pmap->nested_region_size = 0x0ULL;
3320 kernel_pmap->nested_region_asid_bitmap = NULL;
3321 kernel_pmap->nested_region_asid_bitmap_size = 0x0UL;
3322
3323 #if (__ARM_VMSA__ == 7)
3324 kernel_pmap->tte_index_max = 4 * NTTES;
3325 #endif
3326 kernel_pmap->hw_asid = 0;
3327 kernel_pmap->sw_asid = 0;
3328
3329 PMAP_LOCK_INIT(kernel_pmap);
3330 memset((void *) &kernel_pmap->stats, 0, sizeof(kernel_pmap->stats));
3331
3332 /* allocate space for and initialize the bookkeeping structures */
3333 io_attr_table_size = pmap_compute_io_rgns();
3334 npages = (unsigned int)atop(mem_size);
3335 pp_attr_table_size = npages * sizeof(pp_attr_t);
3336 pv_head_size = round_page(sizeof(pv_entry_t *) * npages);
3337 // allocate enough initial PTDs to map twice the available physical memory
3338 ptd_root_table_size = sizeof(pt_desc_t) * (mem_size / ((PAGE_SIZE / sizeof(pt_entry_t)) * ARM_PGBYTES)) * 2;
3339
3340 pmap_struct_start = avail_start;
3341
3342 pp_attr_table = (pp_attr_t *) phystokv(avail_start);
3343 avail_start = PMAP_ALIGN(avail_start + pp_attr_table_size, __alignof(pp_attr_t));
3344 io_attr_table = (pmap_io_range_t *) phystokv(avail_start);
3345 avail_start = PMAP_ALIGN(avail_start + io_attr_table_size, __alignof(pv_entry_t*));
3346 pv_head_table = (pv_entry_t **) phystokv(avail_start);
3347 avail_start = PMAP_ALIGN(avail_start + pv_head_size, __alignof(pt_desc_t));
3348 ptd_root_table = (pt_desc_t *)phystokv(avail_start);
3349 avail_start = round_page(avail_start + ptd_root_table_size);
3350
3351 memset((char *)phystokv(pmap_struct_start), 0, avail_start - pmap_struct_start);
3352
3353 pmap_load_io_rgns();
3354 ptd_bootstrap(ptd_root_table, (unsigned int)(ptd_root_table_size / sizeof(pt_desc_t)));
3355
3356 pmap_cpu_data_array_init();
3357
3358 vm_first_phys = gPhysBase;
3359 vm_last_phys = trunc_page(avail_end);
3360
3361 simple_lock_init(&pmaps_lock, 0);
3362 simple_lock_init(&asid_lock, 0);
3363 simple_lock_init(&tt1_lock, 0);
3364 queue_init(&map_pmap_list);
3365 queue_enter(&map_pmap_list, kernel_pmap, pmap_t, pmaps);
3366 free_page_size_tt_list = TT_FREE_ENTRY_NULL;
3367 free_page_size_tt_count = 0;
3368 free_page_size_tt_max = 0;
3369 free_two_page_size_tt_list = TT_FREE_ENTRY_NULL;
3370 free_two_page_size_tt_count = 0;
3371 free_two_page_size_tt_max = 0;
3372 free_tt_list = TT_FREE_ENTRY_NULL;
3373 free_tt_count = 0;
3374 free_tt_max = 0;
3375
3376 simple_lock_init(&pt_pages_lock, 0);
3377 queue_init(&pt_page_list);
3378
3379 simple_lock_init(&pmap_pages_lock, 0);
3380 pmap_pages_request_count = 0;
3381 pmap_pages_request_acum = 0;
3382 pmap_pages_reclaim_list = PAGE_FREE_ENTRY_NULL;
3383
3384 virtual_space_start = vstart;
3385 virtual_space_end = VM_MAX_KERNEL_ADDRESS;
3386
3387 bitmap_full(&asid_bitmap[0], MAX_ASID);
3388
3389
3390
3391 if (PE_parse_boot_argn("arm_maxoffset", &maxoffset, sizeof(maxoffset))) {
3392 maxoffset = trunc_page(maxoffset);
3393 if ((maxoffset >= pmap_max_offset(FALSE, ARM_PMAP_MAX_OFFSET_MIN))
3394 && (maxoffset <= pmap_max_offset(FALSE, ARM_PMAP_MAX_OFFSET_MAX))) {
3395 arm_pmap_max_offset_default = maxoffset;
3396 }
3397 }
3398 #if defined(__arm64__)
3399 if (PE_parse_boot_argn("arm64_maxoffset", &maxoffset, sizeof(maxoffset))) {
3400 maxoffset = trunc_page(maxoffset);
3401 if ((maxoffset >= pmap_max_offset(TRUE, ARM_PMAP_MAX_OFFSET_MIN))
3402 && (maxoffset <= pmap_max_offset(TRUE, ARM_PMAP_MAX_OFFSET_MAX))) {
3403 arm64_pmap_max_offset_default = maxoffset;
3404 }
3405 }
3406 #endif
3407
3408 #if DEVELOPMENT || DEBUG
3409 PE_parse_boot_argn("panic_on_unsigned_execute", &panic_on_unsigned_execute, sizeof(panic_on_unsigned_execute));
3410 #endif /* DEVELOPMENT || DEBUG */
3411
3412 pmap_nesting_size_min = ARM_NESTING_SIZE_MIN;
3413 pmap_nesting_size_max = ARM_NESTING_SIZE_MAX;
3414
3415 simple_lock_init(&phys_backup_lock, 0);
3416
3417
3418 #if MACH_ASSERT
3419 PE_parse_boot_argn("pmap_stats_assert",
3420 &pmap_stats_assert,
3421 sizeof(pmap_stats_assert));
3422 PE_parse_boot_argn("vm_footprint_suspend_allowed",
3423 &vm_footprint_suspend_allowed,
3424 sizeof(vm_footprint_suspend_allowed));
3425 #endif /* MACH_ASSERT */
3426
3427 #if KASAN
3428 /* Shadow the CPU copy windows, as they fall outside of the physical aperture */
3429 kasan_map_shadow(CPUWINDOWS_BASE, CPUWINDOWS_TOP - CPUWINDOWS_BASE, true);
3430 #endif /* KASAN */
3431 }
3432
3433
3434 void
3435 pmap_virtual_space(
3436 vm_offset_t *startp,
3437 vm_offset_t *endp
3438 )
3439 {
3440 *startp = virtual_space_start;
3441 *endp = virtual_space_end;
3442 }
3443
3444
3445 boolean_t
3446 pmap_virtual_region(
3447 unsigned int region_select,
3448 vm_map_offset_t *startp,
3449 vm_map_size_t *size
3450 )
3451 {
3452 boolean_t ret = FALSE;
3453 #if __ARM64_PMAP_SUBPAGE_L1__ && __ARM_16K_PG__
3454 if (region_select == 0) {
3455 /*
3456 * In this config, the bootstrap mappings should occupy their own L2
3457 * TTs, as they should be immutable after boot. Having the associated
3458 * TTEs and PTEs in their own pages allows us to lock down those pages,
3459 * while allowing the rest of the kernel address range to be remapped.
3460 */
3461 #if (__ARM_VMSA__ > 7)
3462 *startp = LOW_GLOBAL_BASE_ADDRESS & ~ARM_TT_L2_OFFMASK;
3463 #else
3464 #error Unsupported configuration
3465 #endif
3466 *size = ((VM_MAX_KERNEL_ADDRESS - *startp) & ~PAGE_MASK);
3467 ret = TRUE;
3468 }
3469 #else
3470 #if (__ARM_VMSA__ > 7)
3471 unsigned long low_global_vr_mask = 0;
3472 vm_map_size_t low_global_vr_size = 0;
3473 #endif
3474
3475 if (region_select == 0) {
3476 #if (__ARM_VMSA__ == 7)
3477 *startp = gVirtBase & 0xFFC00000;
3478 *size = ((virtual_space_start - (gVirtBase & 0xFFC00000)) + ~0xFFC00000) & 0xFFC00000;
3479 #else
3480 /* Round to avoid overlapping with the V=P area; round to at least the L2 block size. */
3481 if (!TEST_PAGE_SIZE_4K) {
3482 *startp = gVirtBase & 0xFFFFFFFFFE000000;
3483 *size = ((virtual_space_start - (gVirtBase & 0xFFFFFFFFFE000000)) + ~0xFFFFFFFFFE000000) & 0xFFFFFFFFFE000000;
3484 } else {
3485 *startp = gVirtBase & 0xFFFFFFFFFF800000;
3486 *size = ((virtual_space_start - (gVirtBase & 0xFFFFFFFFFF800000)) + ~0xFFFFFFFFFF800000) & 0xFFFFFFFFFF800000;
3487 }
3488 #endif
3489 ret = TRUE;
3490 }
3491 if (region_select == 1) {
3492 *startp = VREGION1_START;
3493 *size = VREGION1_SIZE;
3494 ret = TRUE;
3495 }
3496 #if (__ARM_VMSA__ > 7)
3497 /* We need to reserve a range that is at least the size of an L2 block mapping for the low globals */
3498 if (!TEST_PAGE_SIZE_4K) {
3499 low_global_vr_mask = 0xFFFFFFFFFE000000;
3500 low_global_vr_size = 0x2000000;
3501 } else {
3502 low_global_vr_mask = 0xFFFFFFFFFF800000;
3503 low_global_vr_size = 0x800000;
3504 }
3505
3506 if (((gVirtBase & low_global_vr_mask) != LOW_GLOBAL_BASE_ADDRESS) && (region_select == 2)) {
3507 *startp = LOW_GLOBAL_BASE_ADDRESS;
3508 *size = low_global_vr_size;
3509 ret = TRUE;
3510 }
3511
3512 if (region_select == 3) {
3513 /* In this config, we allow the bootstrap mappings to occupy the same
3514 * page table pages as the heap.
3515 */
3516 *startp = VM_MIN_KERNEL_ADDRESS;
3517 *size = LOW_GLOBAL_BASE_ADDRESS - *startp;
3518 ret = TRUE;
3519 }
3520 #endif
3521 #endif
3522 return ret;
3523 }
3524
3525 unsigned int
3526 pmap_free_pages(
3527 void)
3528 {
3529 return (unsigned int)atop(avail_end - first_avail);
3530 }
3531
3532
3533 boolean_t
3534 pmap_next_page_hi(
3535 ppnum_t * pnum,
3536 __unused boolean_t might_free)
3537 {
3538 return pmap_next_page(pnum);
3539 }
3540
3541
3542 boolean_t
3543 pmap_next_page(
3544 ppnum_t *pnum)
3545 {
3546 if (first_avail != avail_end) {
3547 *pnum = (ppnum_t)atop(first_avail);
3548 first_avail += PAGE_SIZE;
3549 return TRUE;
3550 }
3551 return FALSE;
3552 }
3553
3554
3555 /*
3556 * Initialize the pmap module.
3557 * Called by vm_init, to initialize any structures that the pmap
3558 * system needs to map virtual memory.
3559 */
3560 void
3561 pmap_init(
3562 void)
3563 {
3564 /*
3565 * Protect page zero in the kernel map.
3566 * (can be overruled by permanent transltion
3567 * table entries at page zero - see arm_vm_init).
3568 */
3569 vm_protect(kernel_map, 0, PAGE_SIZE, TRUE, VM_PROT_NONE);
3570
3571 pmap_initialized = TRUE;
3572
3573 pmap_zone_init();
3574
3575
3576 /*
3577 * Initialize the pmap object (for tracking the vm_page_t
3578 * structures for pages we allocate to be page tables in
3579 * pmap_expand().
3580 */
3581 _vm_object_allocate(mem_size, pmap_object);
3582 pmap_object->copy_strategy = MEMORY_OBJECT_COPY_NONE;
3583
3584 pv_init();
3585
3586 /*
3587 * The values of [hard_]maxproc may have been scaled, make sure
3588 * they are still less than the value of MAX_ASID.
3589 */
3590 if (maxproc > MAX_ASID) {
3591 maxproc = MAX_ASID;
3592 }
3593 if (hard_maxproc > MAX_ASID) {
3594 hard_maxproc = MAX_ASID;
3595 }
3596
3597 #if CONFIG_PGTRACE
3598 pmap_pgtrace_init();
3599 #endif
3600 }
3601
3602 boolean_t
3603 pmap_verify_free(
3604 ppnum_t ppnum)
3605 {
3606 pv_entry_t **pv_h;
3607 int pai;
3608 pmap_paddr_t phys = ptoa(ppnum);
3609
3610 assert(phys != vm_page_fictitious_addr);
3611
3612 if (!pa_valid(phys)) {
3613 return FALSE;
3614 }
3615
3616 pai = (int)pa_index(phys);
3617 pv_h = pai_to_pvh(pai);
3618
3619 return pvh_test_type(pv_h, PVH_TYPE_NULL);
3620 }
3621
3622 #if MACH_ASSERT
3623 void
3624 pmap_assert_free(ppnum_t ppnum)
3625 {
3626 assertf(pmap_verify_free(ppnum), "page = 0x%x", ppnum);
3627 (void)ppnum;
3628 }
3629 #endif
3630
3631
3632 /*
3633 * Initialize zones used by pmap.
3634 */
3635 static void
3636 pmap_zone_init(
3637 void)
3638 {
3639 /*
3640 * Create the zone of physical maps
3641 * and the physical-to-virtual entries.
3642 */
3643 pmap_zone = zinit((vm_size_t) sizeof(struct pmap), (vm_size_t) sizeof(struct pmap) * 256,
3644 PAGE_SIZE, "pmap");
3645 }
3646
3647 __dead2
3648 void
3649 pmap_ledger_alloc_init(size_t size)
3650 {
3651 panic("%s: unsupported, "
3652 "size=%lu",
3653 __func__, size);
3654 }
3655
3656 __dead2
3657 ledger_t
3658 pmap_ledger_alloc(void)
3659 {
3660 panic("%s: unsupported",
3661 __func__);
3662 }
3663
3664 __dead2
3665 void
3666 pmap_ledger_free(ledger_t ledger)
3667 {
3668 panic("%s: unsupported, "
3669 "ledger=%p",
3670 __func__, ledger);
3671 }
3672
3673 /*
3674 * Create and return a physical map.
3675 *
3676 * If the size specified for the map
3677 * is zero, the map is an actual physical
3678 * map, and may be referenced by the
3679 * hardware.
3680 *
3681 * If the size specified is non-zero,
3682 * the map will be used in software only, and
3683 * is bounded by that size.
3684 */
3685 MARK_AS_PMAP_TEXT static pmap_t
3686 pmap_create_options_internal(
3687 ledger_t ledger,
3688 vm_map_size_t size,
3689 unsigned int flags)
3690 {
3691 unsigned i;
3692 unsigned tte_index_max;
3693 pmap_t p;
3694 bool is_64bit = flags & PMAP_CREATE_64BIT;
3695 #if defined(HAS_APPLE_PAC)
3696 bool disable_jop = flags & PMAP_CREATE_DISABLE_JOP;
3697 #endif /* defined(HAS_APPLE_PAC) */
3698
3699 /*
3700 * A software use-only map doesn't even need a pmap.
3701 */
3702 if (size != 0) {
3703 return PMAP_NULL;
3704 }
3705
3706 /*
3707 * Allocate a pmap struct from the pmap_zone. Then allocate
3708 * the translation table of the right size for the pmap.
3709 */
3710 if ((p = (pmap_t) zalloc(pmap_zone)) == PMAP_NULL) {
3711 return PMAP_NULL;
3712 }
3713
3714 if (flags & PMAP_CREATE_64BIT) {
3715 p->min = MACH_VM_MIN_ADDRESS;
3716 p->max = MACH_VM_MAX_ADDRESS;
3717 } else {
3718 p->min = VM_MIN_ADDRESS;
3719 p->max = VM_MAX_ADDRESS;
3720 }
3721
3722 #if defined(HAS_APPLE_PAC)
3723 p->disable_jop = disable_jop;
3724 #endif /* defined(HAS_APPLE_PAC) */
3725
3726 p->nested_region_true_start = 0;
3727 p->nested_region_true_end = ~0;
3728
3729 os_atomic_init(&p->ref_count, 1);
3730 p->gc_status = 0;
3731 p->stamp = os_atomic_inc(&pmap_stamp, relaxed);
3732 p->nx_enabled = TRUE;
3733 p->is_64bit = is_64bit;
3734 p->nested = FALSE;
3735 p->nested_pmap = PMAP_NULL;
3736
3737 #if ARM_PARAMETERIZED_PMAP
3738 p->pmap_pt_attr = native_pt_attr;
3739 #endif /* ARM_PARAMETERIZED_PMAP */
3740
3741 if (!pmap_get_pt_ops(p)->alloc_id(p)) {
3742 goto id_alloc_fail;
3743 }
3744
3745
3746
3747 p->ledger = ledger;
3748
3749 PMAP_LOCK_INIT(p);
3750 memset((void *) &p->stats, 0, sizeof(p->stats));
3751
3752 p->tt_entry_free = (tt_entry_t *)0;
3753 tte_index_max = PMAP_ROOT_ALLOC_SIZE / sizeof(tt_entry_t);
3754
3755 #if (__ARM_VMSA__ == 7)
3756 p->tte_index_max = tte_index_max;
3757 #endif
3758
3759 p->tte = pmap_tt1_allocate(p, PMAP_ROOT_ALLOC_SIZE, 0);
3760 if (!(p->tte)) {
3761 goto tt1_alloc_fail;
3762 }
3763
3764 p->ttep = ml_static_vtop((vm_offset_t)p->tte);
3765 PMAP_TRACE(3, PMAP_CODE(PMAP__TTE), VM_KERNEL_ADDRHIDE(p), VM_KERNEL_ADDRHIDE(p->min), VM_KERNEL_ADDRHIDE(p->max), p->ttep);
3766
3767 /* nullify the translation table */
3768 for (i = 0; i < tte_index_max; i++) {
3769 p->tte[i] = ARM_TTE_TYPE_FAULT;
3770 }
3771
3772 FLUSH_PTE_RANGE(p->tte, p->tte + tte_index_max);
3773
3774 /*
3775 * initialize the rest of the structure
3776 */
3777 p->nested_region_grand_addr = 0x0ULL;
3778 p->nested_region_subord_addr = 0x0ULL;
3779 p->nested_region_size = 0x0ULL;
3780 p->nested_region_asid_bitmap = NULL;
3781 p->nested_region_asid_bitmap_size = 0x0UL;
3782
3783 p->nested_has_no_bounds_ref = false;
3784 p->nested_no_bounds_refcnt = 0;
3785 p->nested_bounds_set = false;
3786
3787
3788 #if MACH_ASSERT
3789 p->pmap_stats_assert = TRUE;
3790 p->pmap_pid = 0;
3791 strlcpy(p->pmap_procname, "<nil>", sizeof(p->pmap_procname));
3792 #endif /* MACH_ASSERT */
3793 #if DEVELOPMENT || DEBUG
3794 p->footprint_was_suspended = FALSE;
3795 #endif /* DEVELOPMENT || DEBUG */
3796
3797 pmap_simple_lock(&pmaps_lock);
3798 queue_enter(&map_pmap_list, p, pmap_t, pmaps);
3799 pmap_simple_unlock(&pmaps_lock);
3800
3801 return p;
3802
3803 tt1_alloc_fail:
3804 pmap_get_pt_ops(p)->free_id(p);
3805 id_alloc_fail:
3806 zfree(pmap_zone, p);
3807 return PMAP_NULL;
3808 }
3809
3810 pmap_t
3811 pmap_create_options(
3812 ledger_t ledger,
3813 vm_map_size_t size,
3814 unsigned int flags)
3815 {
3816 pmap_t pmap;
3817
3818 PMAP_TRACE(1, PMAP_CODE(PMAP__CREATE) | DBG_FUNC_START, size, flags);
3819
3820 ledger_reference(ledger);
3821
3822 pmap = pmap_create_options_internal(ledger, size, flags);
3823
3824 if (pmap == PMAP_NULL) {
3825 ledger_dereference(ledger);
3826 }
3827
3828 PMAP_TRACE(1, PMAP_CODE(PMAP__CREATE) | DBG_FUNC_END, VM_KERNEL_ADDRHIDE(pmap), PMAP_VASID(pmap), pmap->hw_asid);
3829
3830 return pmap;
3831 }
3832
3833 #if MACH_ASSERT
3834 MARK_AS_PMAP_TEXT static void
3835 pmap_set_process_internal(
3836 __unused pmap_t pmap,
3837 __unused int pid,
3838 __unused char *procname)
3839 {
3840 #if MACH_ASSERT
3841 if (pmap == NULL) {
3842 return;
3843 }
3844
3845 VALIDATE_PMAP(pmap);
3846
3847 pmap->pmap_pid = pid;
3848 strlcpy(pmap->pmap_procname, procname, sizeof(pmap->pmap_procname));
3849 if (pmap_ledgers_panic_leeway) {
3850 /*
3851 * XXX FBDP
3852 * Some processes somehow trigger some issues that make
3853 * the pmap stats and ledgers go off track, causing
3854 * some assertion failures and ledger panics.
3855 * Turn off the sanity checks if we allow some ledger leeway
3856 * because of that. We'll still do a final check in
3857 * pmap_check_ledgers() for discrepancies larger than the
3858 * allowed leeway after the address space has been fully
3859 * cleaned up.
3860 */
3861 pmap->pmap_stats_assert = FALSE;
3862 ledger_disable_panic_on_negative(pmap->ledger,
3863 task_ledgers.phys_footprint);
3864 ledger_disable_panic_on_negative(pmap->ledger,
3865 task_ledgers.internal);
3866 ledger_disable_panic_on_negative(pmap->ledger,
3867 task_ledgers.internal_compressed);
3868 ledger_disable_panic_on_negative(pmap->ledger,
3869 task_ledgers.iokit_mapped);
3870 ledger_disable_panic_on_negative(pmap->ledger,
3871 task_ledgers.alternate_accounting);
3872 ledger_disable_panic_on_negative(pmap->ledger,
3873 task_ledgers.alternate_accounting_compressed);
3874 }
3875 #endif /* MACH_ASSERT */
3876 }
3877 #endif /* MACH_ASSERT*/
3878
3879 #if MACH_ASSERT
3880 void
3881 pmap_set_process(
3882 pmap_t pmap,
3883 int pid,
3884 char *procname)
3885 {
3886 pmap_set_process_internal(pmap, pid, procname);
3887 }
3888 #endif /* MACH_ASSERT */
3889
3890 /*
3891 * We maintain stats and ledgers so that a task's physical footprint is:
3892 * phys_footprint = ((internal - alternate_accounting)
3893 * + (internal_compressed - alternate_accounting_compressed)
3894 * + iokit_mapped
3895 * + purgeable_nonvolatile
3896 * + purgeable_nonvolatile_compressed
3897 * + page_table)
3898 * where "alternate_accounting" includes "iokit" and "purgeable" memory.
3899 */
3900
3901
3902 /*
3903 * Retire the given physical map from service.
3904 * Should only be called if the map contains
3905 * no valid mappings.
3906 */
3907 MARK_AS_PMAP_TEXT static void
3908 pmap_destroy_internal(
3909 pmap_t pmap)
3910 {
3911 if (pmap == PMAP_NULL) {
3912 return;
3913 }
3914
3915 VALIDATE_PMAP(pmap);
3916
3917 __unused const pt_attr_t * const pt_attr = pmap_get_pt_attr(pmap);
3918
3919 int32_t ref_count = os_atomic_dec(&pmap->ref_count, relaxed);
3920 if (ref_count > 0) {
3921 return;
3922 } else if (ref_count < 0) {
3923 panic("pmap %p: refcount underflow", pmap);
3924 } else if (pmap == kernel_pmap) {
3925 panic("pmap %p: attempt to destroy kernel pmap", pmap);
3926 }
3927
3928 pt_entry_t *ttep;
3929
3930 #if (__ARM_VMSA__ > 7)
3931 pmap_unmap_sharedpage(pmap);
3932 #endif /* (__ARM_VMSA__ > 7) */
3933
3934 pmap_simple_lock(&pmaps_lock);
3935 while (pmap->gc_status & PMAP_GC_INFLIGHT) {
3936 pmap->gc_status |= PMAP_GC_WAIT;
3937 assert_wait((event_t) &pmap->gc_status, THREAD_UNINT);
3938 pmap_simple_unlock(&pmaps_lock);
3939 (void) thread_block(THREAD_CONTINUE_NULL);
3940 pmap_simple_lock(&pmaps_lock);
3941 }
3942 queue_remove(&map_pmap_list, pmap, pmap_t, pmaps);
3943 pmap_simple_unlock(&pmaps_lock);
3944
3945 pmap_trim_self(pmap);
3946
3947 /*
3948 * Free the memory maps, then the
3949 * pmap structure.
3950 */
3951 #if (__ARM_VMSA__ == 7)
3952 unsigned int i = 0;
3953
3954 PMAP_LOCK(pmap);
3955 for (i = 0; i < pmap->tte_index_max; i++) {
3956 ttep = &pmap->tte[i];
3957 if ((*ttep & ARM_TTE_TYPE_MASK) == ARM_TTE_TYPE_TABLE) {
3958 pmap_tte_deallocate(pmap, ttep, PMAP_TT_L1_LEVEL);
3959 }
3960 }
3961 PMAP_UNLOCK(pmap);
3962 #else /* (__ARM_VMSA__ == 7) */
3963 vm_map_address_t c;
3964 unsigned int level;
3965
3966 for (level = pt_attr->pta_max_level - 1; level >= pt_attr->pta_root_level; level--) {
3967 for (c = pmap->min; c < pmap->max; c += pt_attr_ln_size(pt_attr, level)) {
3968 ttep = pmap_ttne(pmap, level, c);
3969
3970 if ((ttep != PT_ENTRY_NULL) && (*ttep & ARM_TTE_TYPE_MASK) == ARM_TTE_TYPE_TABLE) {
3971 PMAP_LOCK(pmap);
3972 pmap_tte_deallocate(pmap, ttep, level);
3973 PMAP_UNLOCK(pmap);
3974 }
3975 }
3976 }
3977 #endif /* (__ARM_VMSA__ == 7) */
3978
3979
3980
3981 if (pmap->tte) {
3982 #if (__ARM_VMSA__ == 7)
3983 pmap_tt1_deallocate(pmap, pmap->tte, pmap->tte_index_max * sizeof(tt_entry_t), 0);
3984 pmap->tte_index_max = 0;
3985 #else /* (__ARM_VMSA__ == 7) */
3986 pmap_tt1_deallocate(pmap, pmap->tte, PMAP_ROOT_ALLOC_SIZE, 0);
3987 #endif /* (__ARM_VMSA__ == 7) */
3988 pmap->tte = (tt_entry_t *) NULL;
3989 pmap->ttep = 0;
3990 }
3991
3992 assert((tt_free_entry_t*)pmap->tt_entry_free == NULL);
3993
3994 pmap_get_pt_ops(pmap)->flush_tlb_async(pmap);
3995 sync_tlb_flush();
3996
3997 /* return its asid to the pool */
3998 pmap_get_pt_ops(pmap)->free_id(pmap);
3999 pmap_check_ledgers(pmap);
4000
4001 if (pmap->nested_region_asid_bitmap) {
4002 kfree(pmap->nested_region_asid_bitmap, pmap->nested_region_asid_bitmap_size * sizeof(unsigned int));
4003 }
4004
4005 zfree(pmap_zone, pmap);
4006 }
4007
4008 void
4009 pmap_destroy(
4010 pmap_t pmap)
4011 {
4012 ledger_t ledger;
4013
4014 PMAP_TRACE(1, PMAP_CODE(PMAP__DESTROY) | DBG_FUNC_START, VM_KERNEL_ADDRHIDE(pmap), PMAP_VASID(pmap), pmap->hw_asid);
4015
4016 ledger = pmap->ledger;
4017
4018 pmap_destroy_internal(pmap);
4019
4020 ledger_dereference(ledger);
4021
4022 PMAP_TRACE(1, PMAP_CODE(PMAP__DESTROY) | DBG_FUNC_END);
4023 }
4024
4025
4026 /*
4027 * Add a reference to the specified pmap.
4028 */
4029 MARK_AS_PMAP_TEXT static void
4030 pmap_reference_internal(
4031 pmap_t pmap)
4032 {
4033 if (pmap != PMAP_NULL) {
4034 VALIDATE_PMAP(pmap);
4035 os_atomic_inc(&pmap->ref_count, relaxed);
4036 }
4037 }
4038
4039 void
4040 pmap_reference(
4041 pmap_t pmap)
4042 {
4043 pmap_reference_internal(pmap);
4044 }
4045
4046 static tt_entry_t *
4047 pmap_tt1_allocate(
4048 pmap_t pmap,
4049 vm_size_t size,
4050 unsigned option)
4051 {
4052 tt_entry_t *tt1 = NULL;
4053 tt_free_entry_t *tt1_free;
4054 pmap_paddr_t pa;
4055 vm_address_t va;
4056 vm_address_t va_end;
4057 kern_return_t ret;
4058
4059 pmap_simple_lock(&tt1_lock);
4060 if ((size == PAGE_SIZE) && (free_page_size_tt_count != 0)) {
4061 free_page_size_tt_count--;
4062 tt1 = (tt_entry_t *)free_page_size_tt_list;
4063 free_page_size_tt_list = ((tt_free_entry_t *)tt1)->next;
4064 } else if ((size == 2 * PAGE_SIZE) && (free_two_page_size_tt_count != 0)) {
4065 free_two_page_size_tt_count--;
4066 tt1 = (tt_entry_t *)free_two_page_size_tt_list;
4067 free_two_page_size_tt_list = ((tt_free_entry_t *)tt1)->next;
4068 } else if ((size < PAGE_SIZE) && (free_tt_count != 0)) {
4069 free_tt_count--;
4070 tt1 = (tt_entry_t *)free_tt_list;
4071 free_tt_list = (tt_free_entry_t *)((tt_free_entry_t *)tt1)->next;
4072 }
4073
4074 pmap_simple_unlock(&tt1_lock);
4075
4076 if (tt1 != NULL) {
4077 pmap_tt_ledger_credit(pmap, size);
4078 return (tt_entry_t *)tt1;
4079 }
4080
4081 ret = pmap_pages_alloc(&pa, (unsigned)((size < PAGE_SIZE)? PAGE_SIZE : size), ((option & PMAP_TT_ALLOCATE_NOWAIT)? PMAP_PAGES_ALLOCATE_NOWAIT : 0));
4082
4083 if (ret == KERN_RESOURCE_SHORTAGE) {
4084 return (tt_entry_t *)0;
4085 }
4086
4087
4088 if (size < PAGE_SIZE) {
4089 va = phystokv(pa) + size;
4090 tt_free_entry_t *local_free_list = (tt_free_entry_t*)va;
4091 tt_free_entry_t *next_free = NULL;
4092 for (va_end = phystokv(pa) + PAGE_SIZE; va < va_end; va = va + size) {
4093 tt1_free = (tt_free_entry_t *)va;
4094 tt1_free->next = next_free;
4095 next_free = tt1_free;
4096 }
4097 pmap_simple_lock(&tt1_lock);
4098 local_free_list->next = free_tt_list;
4099 free_tt_list = next_free;
4100 free_tt_count += ((PAGE_SIZE / size) - 1);
4101 if (free_tt_count > free_tt_max) {
4102 free_tt_max = free_tt_count;
4103 }
4104 pmap_simple_unlock(&tt1_lock);
4105 }
4106
4107 /* Always report root allocations in units of PMAP_ROOT_ALLOC_SIZE, which can be obtained by sysctl arm_pt_root_size.
4108 * Depending on the device, this can vary between 512b and 16K. */
4109 OSAddAtomic((uint32_t)(size / PMAP_ROOT_ALLOC_SIZE), (pmap == kernel_pmap ? &inuse_kernel_tteroot_count : &inuse_user_tteroot_count));
4110 OSAddAtomic64(size / PMAP_ROOT_ALLOC_SIZE, &alloc_tteroot_count);
4111 pmap_tt_ledger_credit(pmap, size);
4112
4113 return (tt_entry_t *) phystokv(pa);
4114 }
4115
4116 static void
4117 pmap_tt1_deallocate(
4118 pmap_t pmap,
4119 tt_entry_t *tt,
4120 vm_size_t size,
4121 unsigned option)
4122 {
4123 tt_free_entry_t *tt_entry;
4124
4125 tt_entry = (tt_free_entry_t *)tt;
4126 assert(not_in_kdp);
4127 pmap_simple_lock(&tt1_lock);
4128
4129 if (size < PAGE_SIZE) {
4130 free_tt_count++;
4131 if (free_tt_count > free_tt_max) {
4132 free_tt_max = free_tt_count;
4133 }
4134 tt_entry->next = free_tt_list;
4135 free_tt_list = tt_entry;
4136 }
4137
4138 if (size == PAGE_SIZE) {
4139 free_page_size_tt_count++;
4140 if (free_page_size_tt_count > free_page_size_tt_max) {
4141 free_page_size_tt_max = free_page_size_tt_count;
4142 }
4143 tt_entry->next = free_page_size_tt_list;
4144 free_page_size_tt_list = tt_entry;
4145 }
4146
4147 if (size == 2 * PAGE_SIZE) {
4148 free_two_page_size_tt_count++;
4149 if (free_two_page_size_tt_count > free_two_page_size_tt_max) {
4150 free_two_page_size_tt_max = free_two_page_size_tt_count;
4151 }
4152 tt_entry->next = free_two_page_size_tt_list;
4153 free_two_page_size_tt_list = tt_entry;
4154 }
4155
4156 if (option & PMAP_TT_DEALLOCATE_NOBLOCK) {
4157 pmap_simple_unlock(&tt1_lock);
4158 pmap_tt_ledger_debit(pmap, size);
4159 return;
4160 }
4161
4162 while (free_page_size_tt_count > FREE_PAGE_SIZE_TT_MAX) {
4163 free_page_size_tt_count--;
4164 tt = (tt_entry_t *)free_page_size_tt_list;
4165 free_page_size_tt_list = ((tt_free_entry_t *)tt)->next;
4166
4167 pmap_simple_unlock(&tt1_lock);
4168
4169 pmap_pages_free(ml_static_vtop((vm_offset_t)tt), PAGE_SIZE);
4170
4171 OSAddAtomic(-(int32_t)(PAGE_SIZE / PMAP_ROOT_ALLOC_SIZE), (pmap == kernel_pmap ? &inuse_kernel_tteroot_count : &inuse_user_tteroot_count));
4172
4173 pmap_simple_lock(&tt1_lock);
4174 }
4175
4176 while (free_two_page_size_tt_count > FREE_TWO_PAGE_SIZE_TT_MAX) {
4177 free_two_page_size_tt_count--;
4178 tt = (tt_entry_t *)free_two_page_size_tt_list;
4179 free_two_page_size_tt_list = ((tt_free_entry_t *)tt)->next;
4180
4181 pmap_simple_unlock(&tt1_lock);
4182
4183 pmap_pages_free(ml_static_vtop((vm_offset_t)tt), 2 * PAGE_SIZE);
4184
4185 OSAddAtomic(-2 * (int32_t)(PAGE_SIZE / PMAP_ROOT_ALLOC_SIZE), (pmap == kernel_pmap ? &inuse_kernel_tteroot_count : &inuse_user_tteroot_count));
4186
4187 pmap_simple_lock(&tt1_lock);
4188 }
4189 pmap_simple_unlock(&tt1_lock);
4190 pmap_tt_ledger_debit(pmap, size);
4191 }
4192
4193 static kern_return_t
4194 pmap_tt_allocate(
4195 pmap_t pmap,
4196 tt_entry_t **ttp,
4197 unsigned int level,
4198 unsigned int options)
4199 {
4200 pmap_paddr_t pa;
4201 *ttp = NULL;
4202
4203 PMAP_LOCK(pmap);
4204 if ((tt_free_entry_t *)pmap->tt_entry_free != NULL) {
4205 tt_free_entry_t *tt_free_next;
4206
4207 tt_free_next = ((tt_free_entry_t *)pmap->tt_entry_free)->next;
4208 *ttp = (tt_entry_t *)pmap->tt_entry_free;
4209 pmap->tt_entry_free = (tt_entry_t *)tt_free_next;
4210 }
4211 PMAP_UNLOCK(pmap);
4212
4213 if (*ttp == NULL) {
4214 pt_desc_t *ptdp;
4215
4216 /*
4217 * Allocate a VM page for the level x page table entries.
4218 */
4219 while (pmap_pages_alloc(&pa, PAGE_SIZE, ((options & PMAP_TT_ALLOCATE_NOWAIT)? PMAP_PAGES_ALLOCATE_NOWAIT : 0)) != KERN_SUCCESS) {
4220 if (options & PMAP_OPTIONS_NOWAIT) {
4221 return KERN_RESOURCE_SHORTAGE;
4222 }
4223 VM_PAGE_WAIT();
4224 }
4225
4226 while ((ptdp = ptd_alloc(pmap, false)) == NULL) {
4227 if (options & PMAP_OPTIONS_NOWAIT) {
4228 pmap_pages_free(pa, PAGE_SIZE);
4229 return KERN_RESOURCE_SHORTAGE;
4230 }
4231 VM_PAGE_WAIT();
4232 }
4233
4234 if (level < PMAP_TT_MAX_LEVEL) {
4235 OSAddAtomic64(1, &alloc_ttepages_count);
4236 OSAddAtomic(1, (pmap == kernel_pmap ? &inuse_kernel_ttepages_count : &inuse_user_ttepages_count));
4237 } else {
4238 OSAddAtomic64(1, &alloc_ptepages_count);
4239 OSAddAtomic(1, (pmap == kernel_pmap ? &inuse_kernel_ptepages_count : &inuse_user_ptepages_count));
4240 }
4241
4242 pmap_tt_ledger_credit(pmap, PAGE_SIZE);
4243
4244 PMAP_ZINFO_PALLOC(pmap, PAGE_SIZE);
4245
4246 pvh_update_head_unlocked(pai_to_pvh(pa_index(pa)), ptdp, PVH_TYPE_PTDP);
4247
4248 __unreachable_ok_push
4249 if (TEST_PAGE_RATIO_4) {
4250 vm_address_t va;
4251 vm_address_t va_end;
4252
4253 PMAP_LOCK(pmap);
4254
4255 for (va_end = phystokv(pa) + PAGE_SIZE, va = phystokv(pa) + ARM_PGBYTES; va < va_end; va = va + ARM_PGBYTES) {
4256 ((tt_free_entry_t *)va)->next = (tt_free_entry_t *)pmap->tt_entry_free;
4257 pmap->tt_entry_free = (tt_entry_t *)va;
4258 }
4259 PMAP_UNLOCK(pmap);
4260 }
4261 __unreachable_ok_pop
4262
4263 *ttp = (tt_entry_t *)phystokv(pa);
4264 }
4265
4266
4267 return KERN_SUCCESS;
4268 }
4269
4270
4271 static void
4272 pmap_tt_deallocate(
4273 pmap_t pmap,
4274 tt_entry_t *ttp,
4275 unsigned int level)
4276 {
4277 pt_desc_t *ptdp;
4278 unsigned pt_acc_cnt;
4279 unsigned i, max_pt_index = PAGE_RATIO;
4280 vm_offset_t free_page = 0;
4281
4282 PMAP_LOCK(pmap);
4283
4284 ptdp = ptep_get_ptd((vm_offset_t)ttp);
4285
4286 ptdp->ptd_info[ARM_PT_DESC_INDEX(ttp)].va = (vm_offset_t)-1;
4287
4288 if ((level < PMAP_TT_MAX_LEVEL) && (ptdp->ptd_info[ARM_PT_DESC_INDEX(ttp)].refcnt == PT_DESC_REFCOUNT)) {
4289 ptdp->ptd_info[ARM_PT_DESC_INDEX(ttp)].refcnt = 0;
4290 }
4291
4292 if (ptdp->ptd_info[ARM_PT_DESC_INDEX(ttp)].refcnt != 0) {
4293 panic("pmap_tt_deallocate(): ptdp %p, count %d\n", ptdp, ptdp->ptd_info[ARM_PT_DESC_INDEX(ttp)].refcnt);
4294 }
4295
4296 ptdp->ptd_info[ARM_PT_DESC_INDEX(ttp)].refcnt = 0;
4297
4298 for (i = 0, pt_acc_cnt = 0; i < max_pt_index; i++) {
4299 pt_acc_cnt += ptdp->ptd_info[i].refcnt;
4300 }
4301
4302 if (pt_acc_cnt == 0) {
4303 tt_free_entry_t *tt_free_list = (tt_free_entry_t *)&pmap->tt_entry_free;
4304 unsigned pt_free_entry_cnt = 1;
4305
4306 while (pt_free_entry_cnt < max_pt_index && tt_free_list) {
4307 tt_free_entry_t *tt_free_list_next;
4308
4309 tt_free_list_next = tt_free_list->next;
4310 if ((((vm_offset_t)tt_free_list_next) - ((vm_offset_t)ttp & ~PAGE_MASK)) < PAGE_SIZE) {
4311 pt_free_entry_cnt++;
4312 }
4313 tt_free_list = tt_free_list_next;
4314 }
4315 if (pt_free_entry_cnt == max_pt_index) {
4316 tt_free_entry_t *tt_free_list_cur;
4317
4318 free_page = (vm_offset_t)ttp & ~PAGE_MASK;
4319 tt_free_list = (tt_free_entry_t *)&pmap->tt_entry_free;
4320 tt_free_list_cur = (tt_free_entry_t *)&pmap->tt_entry_free;
4321
4322 while (tt_free_list_cur) {
4323 tt_free_entry_t *tt_free_list_next;
4324
4325 tt_free_list_next = tt_free_list_cur->next;
4326 if ((((vm_offset_t)tt_free_list_next) - free_page) < PAGE_SIZE) {
4327 tt_free_list->next = tt_free_list_next->next;
4328 } else {
4329 tt_free_list = tt_free_list_next;
4330 }
4331 tt_free_list_cur = tt_free_list_next;
4332 }
4333 } else {
4334 ((tt_free_entry_t *)ttp)->next = (tt_free_entry_t *)pmap->tt_entry_free;
4335 pmap->tt_entry_free = ttp;
4336 }
4337 } else {
4338 ((tt_free_entry_t *)ttp)->next = (tt_free_entry_t *)pmap->tt_entry_free;
4339 pmap->tt_entry_free = ttp;
4340 }
4341
4342 PMAP_UNLOCK(pmap);
4343
4344 if (free_page != 0) {
4345 ptd_deallocate(ptep_get_ptd((vm_offset_t)free_page));
4346 *(pt_desc_t **)pai_to_pvh(pa_index(ml_static_vtop(free_page))) = NULL;
4347 pmap_pages_free(ml_static_vtop(free_page), PAGE_SIZE);
4348 if (level < PMAP_TT_MAX_LEVEL) {
4349 OSAddAtomic(-1, (pmap == kernel_pmap ? &inuse_kernel_ttepages_count : &inuse_user_ttepages_count));
4350 } else {
4351 OSAddAtomic(-1, (pmap == kernel_pmap ? &inuse_kernel_ptepages_count : &inuse_user_ptepages_count));
4352 }
4353 PMAP_ZINFO_PFREE(pmap, PAGE_SIZE);
4354 pmap_tt_ledger_debit(pmap, PAGE_SIZE);
4355 }
4356 }
4357
4358 static void
4359 pmap_tte_remove(
4360 pmap_t pmap,
4361 tt_entry_t *ttep,
4362 unsigned int level)
4363 {
4364 tt_entry_t tte = *ttep;
4365
4366 if (tte == 0) {
4367 panic("pmap_tte_deallocate(): null tt_entry ttep==%p\n", ttep);
4368 }
4369
4370 if (((level + 1) == PMAP_TT_MAX_LEVEL) && (tte_get_ptd(tte)->ptd_info[ARM_PT_DESC_INDEX(ttetokv(*ttep))].refcnt != 0)) {
4371 panic("pmap_tte_deallocate(): pmap=%p ttep=%p ptd=%p refcnt=0x%x \n", pmap, ttep,
4372 tte_get_ptd(tte), (tte_get_ptd(tte)->ptd_info[ARM_PT_DESC_INDEX(ttetokv(*ttep))].refcnt));
4373 }
4374
4375 #if (__ARM_VMSA__ == 7)
4376 {
4377 tt_entry_t *ttep_4M = (tt_entry_t *) ((vm_offset_t)ttep & 0xFFFFFFF0);
4378 unsigned i;
4379
4380 for (i = 0; i < 4; i++, ttep_4M++) {
4381 *ttep_4M = (tt_entry_t) 0;
4382 }
4383 FLUSH_PTE_RANGE_STRONG(ttep_4M - 4, ttep_4M);
4384 }
4385 #else
4386 *ttep = (tt_entry_t) 0;
4387 FLUSH_PTE_STRONG(ttep);
4388 #endif
4389 }
4390
4391 static void
4392 pmap_tte_deallocate(
4393 pmap_t pmap,
4394 tt_entry_t *ttep,
4395 unsigned int level)
4396 {
4397 pmap_paddr_t pa;
4398 tt_entry_t tte;
4399
4400 PMAP_ASSERT_LOCKED(pmap);
4401
4402 tte = *ttep;
4403
4404 #if MACH_ASSERT
4405 if (tte_get_ptd(tte)->pmap != pmap) {
4406 panic("pmap_tte_deallocate(): ptd=%p ptd->pmap=%p pmap=%p \n",
4407 tte_get_ptd(tte), tte_get_ptd(tte)->pmap, pmap);
4408 }
4409 #endif
4410
4411 pmap_tte_remove(pmap, ttep, level);
4412
4413 if ((tte & ARM_TTE_TYPE_MASK) == ARM_TTE_TYPE_TABLE) {
4414 #if MACH_ASSERT
4415 {
4416 pt_entry_t *pte_p = ((pt_entry_t *) (ttetokv(tte) & ~ARM_PGMASK));
4417 unsigned i;
4418
4419 for (i = 0; i < (ARM_PGBYTES / sizeof(*pte_p)); i++, pte_p++) {
4420 if (ARM_PTE_IS_COMPRESSED(*pte_p, pte_p)) {
4421 panic("pmap_tte_deallocate: tte=0x%llx pmap=%p, pte_p=%p, pte=0x%llx compressed\n",
4422 (uint64_t)tte, pmap, pte_p, (uint64_t)(*pte_p));
4423 } else if (((*pte_p) & ARM_PTE_TYPE_MASK) != ARM_PTE_TYPE_FAULT) {
4424 panic("pmap_tte_deallocate: tte=0x%llx pmap=%p, pte_p=%p, pte=0x%llx\n",
4425 (uint64_t)tte, pmap, pte_p, (uint64_t)(*pte_p));
4426 }
4427 }
4428 }
4429 #endif
4430 PMAP_UNLOCK(pmap);
4431
4432 /* Clear any page offset: we mean to free the whole page, but armv7 TTEs may only be
4433 * aligned on 1K boundaries. We clear the surrounding "chunk" of 4 TTEs above. */
4434 pa = tte_to_pa(tte) & ~ARM_PGMASK;
4435 pmap_tt_deallocate(pmap, (tt_entry_t *) phystokv(pa), level + 1);
4436 PMAP_LOCK(pmap);
4437 }
4438 }
4439
4440 /*
4441 * Remove a range of hardware page-table entries.
4442 * The entries given are the first (inclusive)
4443 * and last (exclusive) entries for the VM pages.
4444 * The virtual address is the va for the first pte.
4445 *
4446 * The pmap must be locked.
4447 * If the pmap is not the kernel pmap, the range must lie
4448 * entirely within one pte-page. This is NOT checked.
4449 * Assumes that the pte-page exists.
4450 *
4451 * Returns the number of PTE changed, and sets *rmv_cnt
4452 * to the number of SPTE changed.
4453 */
4454 static int
4455 pmap_remove_range(
4456 pmap_t pmap,
4457 vm_map_address_t va,
4458 pt_entry_t *bpte,
4459 pt_entry_t *epte,
4460 uint32_t *rmv_cnt)
4461 {
4462 bool need_strong_sync = false;
4463 int num_changed = pmap_remove_range_options(pmap, va, bpte, epte, rmv_cnt,
4464 &need_strong_sync, PMAP_OPTIONS_REMOVE);
4465 if (num_changed > 0) {
4466 PMAP_UPDATE_TLBS(pmap, va, va + (PAGE_SIZE * (epte - bpte)), need_strong_sync);
4467 }
4468 return num_changed;
4469 }
4470
4471
4472 #ifdef PVH_FLAG_EXEC
4473
4474 /*
4475 * Update the access protection bits of the physical aperture mapping for a page.
4476 * This is useful, for example, in guranteeing that a verified executable page
4477 * has no writable mappings anywhere in the system, including the physical
4478 * aperture. flush_tlb_async can be set to true to avoid unnecessary TLB
4479 * synchronization overhead in cases where the call to this function is
4480 * guaranteed to be followed by other TLB operations.
4481 */
4482 static void
4483 pmap_set_ptov_ap(unsigned int pai __unused, unsigned int ap __unused, boolean_t flush_tlb_async __unused)
4484 {
4485 #if __ARM_PTE_PHYSMAP__
4486 ASSERT_PVH_LOCKED(pai);
4487 vm_offset_t kva = phystokv(vm_first_phys + (pmap_paddr_t)ptoa(pai));
4488 pt_entry_t *pte_p = pmap_pte(kernel_pmap, kva);
4489
4490 pt_entry_t tmplate = *pte_p;
4491 if ((tmplate & ARM_PTE_APMASK) == ARM_PTE_AP(ap)) {
4492 return;
4493 }
4494 tmplate = (tmplate & ~ARM_PTE_APMASK) | ARM_PTE_AP(ap);
4495 #if (__ARM_VMSA__ > 7)
4496 if (tmplate & ARM_PTE_HINT_MASK) {
4497 panic("%s: physical aperture PTE %p has hint bit set, va=%p, pte=0x%llx",
4498 __func__, pte_p, (void *)kva, tmplate);
4499 }
4500 #endif
4501 WRITE_PTE_STRONG(pte_p, tmplate);
4502 flush_mmu_tlb_region_asid_async(kva, PAGE_SIZE, kernel_pmap);
4503 if (!flush_tlb_async) {
4504 sync_tlb_flush();
4505 }
4506 #endif
4507 }
4508
4509 #endif /* defined(PVH_FLAG_EXEC) */
4510
4511 static void
4512 pmap_remove_pv(
4513 pmap_t pmap,
4514 pt_entry_t *cpte,
4515 int pai,
4516 int *num_internal,
4517 int *num_alt_internal,
4518 int *num_reusable,
4519 int *num_external)
4520 {
4521 pv_entry_t **pv_h, **pve_pp;
4522 pv_entry_t *pve_p;
4523
4524 ASSERT_PVH_LOCKED(pai);
4525 pv_h = pai_to_pvh(pai);
4526 vm_offset_t pvh_flags = pvh_get_flags(pv_h);
4527
4528
4529 if (pvh_test_type(pv_h, PVH_TYPE_PTEP)) {
4530 if (__builtin_expect((cpte != pvh_ptep(pv_h)), 0)) {
4531 panic("%s: cpte=%p does not match pv_h=%p (%p), pai=0x%x\n", __func__, cpte, pv_h, pvh_ptep(pv_h), pai);
4532 }
4533 if (IS_ALTACCT_PAGE(pai, PV_ENTRY_NULL)) {
4534 assert(IS_INTERNAL_PAGE(pai));
4535 (*num_internal)++;
4536 (*num_alt_internal)++;
4537 CLR_ALTACCT_PAGE(pai, PV_ENTRY_NULL);
4538 } else if (IS_INTERNAL_PAGE(pai)) {
4539 if (IS_REUSABLE_PAGE(pai)) {
4540 (*num_reusable)++;
4541 } else {
4542 (*num_internal)++;
4543 }
4544 } else {
4545 (*num_external)++;
4546 }
4547 pvh_update_head(pv_h, PV_ENTRY_NULL, PVH_TYPE_NULL);
4548 } else if (pvh_test_type(pv_h, PVH_TYPE_PVEP)) {
4549 pve_pp = pv_h;
4550 pve_p = pvh_list(pv_h);
4551
4552 while (pve_p != PV_ENTRY_NULL &&
4553 (pve_get_ptep(pve_p) != cpte)) {
4554 pve_pp = pve_link_field(pve_p);
4555 pve_p = PVE_NEXT_PTR(pve_next(pve_p));
4556 }
4557
4558 if (__builtin_expect((pve_p == PV_ENTRY_NULL), 0)) {
4559 panic("%s: cpte=%p (pai=0x%x) not in pv_h=%p\n", __func__, cpte, pai, pv_h);
4560 }
4561
4562 #if MACH_ASSERT
4563 if ((pmap != NULL) && (kern_feature_override(KF_PMAPV_OVRD) == FALSE)) {
4564 pv_entry_t *check_pve_p = PVE_NEXT_PTR(pve_next(pve_p));
4565 while (check_pve_p != PV_ENTRY_NULL) {
4566 if (pve_get_ptep(check_pve_p) == cpte) {
4567 panic("%s: duplicate pve entry cpte=%p pmap=%p, pv_h=%p, pve_p=%p, pai=0x%x",
4568 __func__, cpte, pmap, pv_h, pve_p, pai);
4569 }
4570 check_pve_p = PVE_NEXT_PTR(pve_next(check_pve_p));
4571 }
4572 }
4573 #endif
4574
4575 if (IS_ALTACCT_PAGE(pai, pve_p)) {
4576 assert(IS_INTERNAL_PAGE(pai));
4577 (*num_internal)++;
4578 (*num_alt_internal)++;
4579 CLR_ALTACCT_PAGE(pai, pve_p);
4580 } else if (IS_INTERNAL_PAGE(pai)) {
4581 if (IS_REUSABLE_PAGE(pai)) {
4582 (*num_reusable)++;
4583 } else {
4584 (*num_internal)++;
4585 }
4586 } else {
4587 (*num_external)++;
4588 }
4589
4590 pvh_remove(pv_h, pve_pp, pve_p);
4591 pv_free(pve_p);
4592 if (!pvh_test_type(pv_h, PVH_TYPE_NULL)) {
4593 pvh_set_flags(pv_h, pvh_flags);
4594 }
4595 } else {
4596 panic("%s: unexpected PV head %p, cpte=%p pmap=%p pv_h=%p pai=0x%x",
4597 __func__, *pv_h, cpte, pmap, pv_h, pai);
4598 }
4599
4600 #ifdef PVH_FLAG_EXEC
4601 if ((pvh_flags & PVH_FLAG_EXEC) && pvh_test_type(pv_h, PVH_TYPE_NULL)) {
4602 pmap_set_ptov_ap(pai, AP_RWNA, FALSE);
4603 }
4604 #endif
4605 }
4606
4607 static int
4608 pmap_remove_range_options(
4609 pmap_t pmap,
4610 vm_map_address_t va,
4611 pt_entry_t *bpte,
4612 pt_entry_t *epte,
4613 uint32_t *rmv_cnt,
4614 bool *need_strong_sync __unused,
4615 int options)
4616 {
4617 pt_entry_t *cpte;
4618 int num_removed, num_unwired;
4619 int num_pte_changed;
4620 int pai = 0;
4621 pmap_paddr_t pa;
4622 int num_external, num_internal, num_reusable;
4623 int num_alt_internal;
4624 uint64_t num_compressed, num_alt_compressed;
4625
4626 PMAP_ASSERT_LOCKED(pmap);
4627
4628 num_removed = 0;
4629 num_unwired = 0;
4630 num_pte_changed = 0;
4631 num_external = 0;
4632 num_internal = 0;
4633 num_reusable = 0;
4634 num_compressed = 0;
4635 num_alt_internal = 0;
4636 num_alt_compressed = 0;
4637
4638 for (cpte = bpte; cpte < epte;
4639 cpte += PAGE_SIZE / ARM_PGBYTES, va += PAGE_SIZE) {
4640 pt_entry_t spte;
4641 boolean_t managed = FALSE;
4642
4643 spte = *cpte;
4644
4645 #if CONFIG_PGTRACE
4646 if (pgtrace_enabled) {
4647 pmap_pgtrace_remove_clone(pmap, pte_to_pa(spte), va);
4648 }
4649 #endif
4650
4651 while (!managed) {
4652 if (pmap != kernel_pmap &&
4653 (options & PMAP_OPTIONS_REMOVE) &&
4654 (ARM_PTE_IS_COMPRESSED(spte, cpte))) {
4655 /*
4656 * "pmap" must be locked at this point,
4657 * so this should not race with another
4658 * pmap_remove_range() or pmap_enter().
4659 */
4660
4661 /* one less "compressed"... */
4662 num_compressed++;
4663 if (spte & ARM_PTE_COMPRESSED_ALT) {
4664 /* ... but it used to be "ALTACCT" */
4665 num_alt_compressed++;
4666 }
4667
4668 /* clear marker */
4669 WRITE_PTE_FAST(cpte, ARM_PTE_TYPE_FAULT);
4670 /*
4671 * "refcnt" also accounts for
4672 * our "compressed" markers,
4673 * so let's update it here.
4674 */
4675 if (OSAddAtomic16(-1, (SInt16 *) &(ptep_get_ptd(cpte)->ptd_info[ARM_PT_DESC_INDEX(cpte)].refcnt)) <= 0) {
4676 panic("pmap_remove_range_options: over-release of ptdp %p for pte %p\n", ptep_get_ptd(cpte), cpte);
4677 }
4678 spte = *cpte;
4679 }
4680 /*
4681 * It may be possible for the pte to transition from managed
4682 * to unmanaged in this timeframe; for now, elide the assert.
4683 * We should break out as a consequence of checking pa_valid.
4684 */
4685 //assert(!ARM_PTE_IS_COMPRESSED(spte));
4686 pa = pte_to_pa(spte);
4687 if (!pa_valid(pa)) {
4688 break;
4689 }
4690 pai = (int)pa_index(pa);
4691 LOCK_PVH(pai);
4692 spte = *cpte;
4693 pa = pte_to_pa(spte);
4694 if (pai == (int)pa_index(pa)) {
4695 managed = TRUE;
4696 break; // Leave pai locked as we will unlock it after we free the PV entry
4697 }
4698 UNLOCK_PVH(pai);
4699 }
4700
4701 if (ARM_PTE_IS_COMPRESSED(*cpte, cpte)) {
4702 /*
4703 * There used to be a valid mapping here but it
4704 * has already been removed when the page was
4705 * sent to the VM compressor, so nothing left to
4706 * remove now...
4707 */
4708 continue;
4709 }
4710
4711 /* remove the translation, do not flush the TLB */
4712 if (*cpte != ARM_PTE_TYPE_FAULT) {
4713 assertf(!ARM_PTE_IS_COMPRESSED(*cpte, cpte), "unexpected compressed pte %p (=0x%llx)", cpte, (uint64_t)*cpte);
4714 assertf((*cpte & ARM_PTE_TYPE_VALID) == ARM_PTE_TYPE, "invalid pte %p (=0x%llx)", cpte, (uint64_t)*cpte);
4715 #if MACH_ASSERT
4716 if (managed && (pmap != kernel_pmap) && (ptep_get_va(cpte) != va)) {
4717 panic("pmap_remove_range_options(): cpte=%p ptd=%p pte=0x%llx va=0x%llx\n",
4718 cpte, ptep_get_ptd(cpte), (uint64_t)*cpte, (uint64_t)va);
4719 }
4720 #endif
4721 WRITE_PTE_FAST(cpte, ARM_PTE_TYPE_FAULT);
4722 num_pte_changed++;
4723 }
4724
4725 if ((spte != ARM_PTE_TYPE_FAULT) &&
4726 (pmap != kernel_pmap)) {
4727 assertf(!ARM_PTE_IS_COMPRESSED(spte, cpte), "unexpected compressed pte %p (=0x%llx)", cpte, (uint64_t)spte);
4728 assertf((spte & ARM_PTE_TYPE_VALID) == ARM_PTE_TYPE, "invalid pte %p (=0x%llx)", cpte, (uint64_t)spte);
4729 if (OSAddAtomic16(-1, (SInt16 *) &(ptep_get_ptd(cpte)->ptd_info[ARM_PT_DESC_INDEX(cpte)].refcnt)) <= 0) {
4730 panic("pmap_remove_range_options: over-release of ptdp %p for pte %p\n", ptep_get_ptd(cpte), cpte);
4731 }
4732 if (rmv_cnt) {
4733 (*rmv_cnt)++;
4734 }
4735 }
4736
4737 if (pte_is_wired(spte)) {
4738 pte_set_wired(cpte, 0);
4739 num_unwired++;
4740 }
4741 /*
4742 * if not managed, we're done
4743 */
4744 if (!managed) {
4745 continue;
4746 }
4747 /*
4748 * find and remove the mapping from the chain for this
4749 * physical address.
4750 */
4751
4752 pmap_remove_pv(pmap, cpte, pai, &num_internal, &num_alt_internal, &num_reusable, &num_external);
4753
4754 UNLOCK_PVH(pai);
4755 num_removed++;
4756 }
4757
4758 /*
4759 * Update the counts
4760 */
4761 OSAddAtomic(-num_removed, (SInt32 *) &pmap->stats.resident_count);
4762 pmap_ledger_debit(pmap, task_ledgers.phys_mem, machine_ptob(num_removed));
4763
4764 if (pmap != kernel_pmap) {
4765 /* sanity checks... */
4766 #if MACH_ASSERT
4767 if (pmap->stats.internal < num_internal) {
4768 if ((!pmap_stats_assert ||
4769 !pmap->pmap_stats_assert)) {
4770 printf("%d[%s] pmap_remove_range_options(%p,0x%llx,%p,%p,0x%x): num_internal=%d num_removed=%d num_unwired=%d num_external=%d num_reusable=%d num_compressed=%lld num_alt_internal=%d num_alt_compressed=%lld num_pte_changed=%d stats.internal=%d stats.reusable=%d\n",
4771 pmap->pmap_pid,
4772 pmap->pmap_procname,
4773 pmap,
4774 (uint64_t) va,
4775 bpte,
4776 epte,
4777 options,
4778 num_internal,
4779 num_removed,
4780 num_unwired,
4781 num_external,
4782 num_reusable,
4783 num_compressed,
4784 num_alt_internal,
4785 num_alt_compressed,
4786 num_pte_changed,
4787 pmap->stats.internal,
4788 pmap->stats.reusable);
4789 } else {
4790 panic("%d[%s] pmap_remove_range_options(%p,0x%llx,%p,%p,0x%x): num_internal=%d num_removed=%d num_unwired=%d num_external=%d num_reusable=%d num_compressed=%lld num_alt_internal=%d num_alt_compressed=%lld num_pte_changed=%d stats.internal=%d stats.reusable=%d",
4791 pmap->pmap_pid,
4792 pmap->pmap_procname,
4793 pmap,
4794 (uint64_t) va,
4795 bpte,
4796 epte,
4797 options,
4798 num_internal,
4799 num_removed,
4800 num_unwired,
4801 num_external,
4802 num_reusable,
4803 num_compressed,
4804 num_alt_internal,
4805 num_alt_compressed,
4806 num_pte_changed,
4807 pmap->stats.internal,
4808 pmap->stats.reusable);
4809 }
4810 }
4811 #endif /* MACH_ASSERT */
4812 PMAP_STATS_ASSERTF(pmap->stats.external >= num_external,
4813 pmap,
4814 "pmap=%p num_external=%d stats.external=%d",
4815 pmap, num_external, pmap->stats.external);
4816 PMAP_STATS_ASSERTF(pmap->stats.internal >= num_internal,
4817 pmap,
4818 "pmap=%p num_internal=%d stats.internal=%d num_reusable=%d stats.reusable=%d",
4819 pmap,
4820 num_internal, pmap->stats.internal,
4821 num_reusable, pmap->stats.reusable);
4822 PMAP_STATS_ASSERTF(pmap->stats.reusable >= num_reusable,
4823 pmap,
4824 "pmap=%p num_internal=%d stats.internal=%d num_reusable=%d stats.reusable=%d",
4825 pmap,
4826 num_internal, pmap->stats.internal,
4827 num_reusable, pmap->stats.reusable);
4828 PMAP_STATS_ASSERTF(pmap->stats.compressed >= num_compressed,
4829 pmap,
4830 "pmap=%p num_compressed=%lld num_alt_compressed=%lld stats.compressed=%lld",
4831 pmap, num_compressed, num_alt_compressed,
4832 pmap->stats.compressed);
4833
4834 /* update pmap stats... */
4835 OSAddAtomic(-num_unwired, (SInt32 *) &pmap->stats.wired_count);
4836 if (num_external) {
4837 OSAddAtomic(-num_external, &pmap->stats.external);
4838 }
4839 if (num_internal) {
4840 OSAddAtomic(-num_internal, &pmap->stats.internal);
4841 }
4842 if (num_reusable) {
4843 OSAddAtomic(-num_reusable, &pmap->stats.reusable);
4844 }
4845 if (num_compressed) {
4846 OSAddAtomic64(-num_compressed, &pmap->stats.compressed);
4847 }
4848 /* ... and ledgers */
4849 pmap_ledger_debit(pmap, task_ledgers.wired_mem, machine_ptob(num_unwired));
4850 pmap_ledger_debit(pmap, task_ledgers.internal, machine_ptob(num_internal));
4851 pmap_ledger_debit(pmap, task_ledgers.alternate_accounting, machine_ptob(num_alt_internal));
4852 pmap_ledger_debit(pmap, task_ledgers.alternate_accounting_compressed, machine_ptob(num_alt_compressed));
4853 pmap_ledger_debit(pmap, task_ledgers.internal_compressed, machine_ptob(num_compressed));
4854 /* make needed adjustments to phys_footprint */
4855 pmap_ledger_debit(pmap, task_ledgers.phys_footprint,
4856 machine_ptob((num_internal -
4857 num_alt_internal) +
4858 (num_compressed -
4859 num_alt_compressed)));
4860 }
4861
4862 /* flush the ptable entries we have written */
4863 if (num_pte_changed > 0) {
4864 FLUSH_PTE_RANGE_STRONG(bpte, epte);
4865 }
4866
4867 return num_pte_changed;
4868 }
4869
4870
4871 /*
4872 * Remove the given range of addresses
4873 * from the specified map.
4874 *
4875 * It is assumed that the start and end are properly
4876 * rounded to the hardware page size.
4877 */
4878 void
4879 pmap_remove(
4880 pmap_t pmap,
4881 vm_map_address_t start,
4882 vm_map_address_t end)
4883 {
4884 pmap_remove_options(pmap, start, end, PMAP_OPTIONS_REMOVE);
4885 }
4886
4887 MARK_AS_PMAP_TEXT static int
4888 pmap_remove_options_internal(
4889 pmap_t pmap,
4890 vm_map_address_t start,
4891 vm_map_address_t end,
4892 int options)
4893 {
4894 int remove_count = 0;
4895 pt_entry_t *bpte, *epte;
4896 pt_entry_t *pte_p;
4897 tt_entry_t *tte_p;
4898 uint32_t rmv_spte = 0;
4899 bool need_strong_sync = false;
4900 bool flush_tte = false;
4901
4902 if (__improbable(end < start)) {
4903 panic("%s: invalid address range %p, %p", __func__, (void*)start, (void*)end);
4904 }
4905
4906 VALIDATE_PMAP(pmap);
4907
4908 __unused const pt_attr_t * const pt_attr = pmap_get_pt_attr(pmap);
4909
4910 PMAP_LOCK(pmap);
4911
4912 tte_p = pmap_tte(pmap, start);
4913
4914 if (tte_p == (tt_entry_t *) NULL) {
4915 goto done;
4916 }
4917
4918 if ((*tte_p & ARM_TTE_TYPE_MASK) == ARM_TTE_TYPE_TABLE) {
4919 pte_p = (pt_entry_t *) ttetokv(*tte_p);
4920 bpte = &pte_p[ptenum(start)];
4921 epte = bpte + ((end - start) >> pt_attr_leaf_shift(pt_attr));
4922
4923 remove_count += pmap_remove_range_options(pmap, start, bpte, epte,
4924 &rmv_spte, &need_strong_sync, options);
4925
4926 if (rmv_spte && (ptep_get_ptd(pte_p)->ptd_info[ARM_PT_DESC_INDEX(pte_p)].refcnt == 0) &&
4927 (pmap != kernel_pmap) && (pmap->nested == FALSE)) {
4928 pmap_tte_deallocate(pmap, tte_p, pt_attr_twig_level(pt_attr));
4929 flush_tte = true;
4930 }
4931 }
4932
4933 done:
4934 PMAP_UNLOCK(pmap);
4935
4936 if (remove_count > 0) {
4937 PMAP_UPDATE_TLBS(pmap, start, end, need_strong_sync);
4938 } else if (flush_tte > 0) {
4939 pmap_get_pt_ops(pmap)->flush_tlb_tte_async(start, pmap);
4940 sync_tlb_flush();
4941 }
4942 return remove_count;
4943 }
4944
4945 void
4946 pmap_remove_options(
4947 pmap_t pmap,
4948 vm_map_address_t start,
4949 vm_map_address_t end,
4950 int options)
4951 {
4952 int remove_count = 0;
4953 vm_map_address_t va;
4954
4955 if (pmap == PMAP_NULL) {
4956 return;
4957 }
4958
4959 __unused const pt_attr_t * const pt_attr = pmap_get_pt_attr(pmap);
4960
4961 PMAP_TRACE(2, PMAP_CODE(PMAP__REMOVE) | DBG_FUNC_START,
4962 VM_KERNEL_ADDRHIDE(pmap), VM_KERNEL_ADDRHIDE(start),
4963 VM_KERNEL_ADDRHIDE(end));
4964
4965 #if MACH_ASSERT
4966 if ((start | end) & PAGE_MASK) {
4967 panic("pmap_remove_options() pmap %p start 0x%llx end 0x%llx\n",
4968 pmap, (uint64_t)start, (uint64_t)end);
4969 }
4970 if ((end < start) || (start < pmap->min) || (end > pmap->max)) {
4971 panic("pmap_remove_options(): invalid address range, pmap=%p, start=0x%llx, end=0x%llx\n",
4972 pmap, (uint64_t)start, (uint64_t)end);
4973 }
4974 #endif
4975
4976 /*
4977 * Invalidate the translation buffer first
4978 */
4979 va = start;
4980 while (va < end) {
4981 vm_map_address_t l;
4982
4983 l = ((va + pt_attr_twig_size(pt_attr)) & ~pt_attr_twig_offmask(pt_attr));
4984 if (l > end) {
4985 l = end;
4986 }
4987
4988 remove_count += pmap_remove_options_internal(pmap, va, l, options);
4989
4990 va = l;
4991 }
4992
4993 PMAP_TRACE(2, PMAP_CODE(PMAP__REMOVE) | DBG_FUNC_END);
4994 }
4995
4996
4997 /*
4998 * Remove phys addr if mapped in specified map
4999 */
5000 void
5001 pmap_remove_some_phys(
5002 __unused pmap_t map,
5003 __unused ppnum_t pn)
5004 {
5005 /* Implement to support working set code */
5006 }
5007
5008 void
5009 pmap_set_pmap(
5010 pmap_t pmap,
5011 #if !__ARM_USER_PROTECT__
5012 __unused
5013 #endif
5014 thread_t thread)
5015 {
5016 pmap_switch(pmap);
5017 #if __ARM_USER_PROTECT__
5018 if (pmap->tte_index_max == NTTES) {
5019 thread->machine.uptw_ttc = 2;
5020 } else {
5021 thread->machine.uptw_ttc = 1;
5022 }
5023 thread->machine.uptw_ttb = ((unsigned int) pmap->ttep) | TTBR_SETUP;
5024 thread->machine.asid = pmap->hw_asid;
5025 #endif
5026 }
5027
5028 static void
5029 pmap_flush_core_tlb_asid(pmap_t pmap)
5030 {
5031 #if (__ARM_VMSA__ == 7)
5032 flush_core_tlb_asid(pmap->hw_asid);
5033 #else
5034 flush_core_tlb_asid(((uint64_t) pmap->hw_asid) << TLBI_ASID_SHIFT);
5035 #endif
5036 }
5037
5038 MARK_AS_PMAP_TEXT static void
5039 pmap_switch_internal(
5040 pmap_t pmap)
5041 {
5042 VALIDATE_PMAP(pmap);
5043 pmap_cpu_data_t *cpu_data_ptr = pmap_get_cpu_data();
5044 uint16_t asid_index = pmap->hw_asid;
5045 boolean_t do_asid_flush = FALSE;
5046
5047 #if __ARM_KERNEL_PROTECT__
5048 asid_index >>= 1;
5049 #endif
5050
5051 #if (__ARM_VMSA__ > 7)
5052 pmap_t last_nested_pmap = cpu_data_ptr->cpu_nested_pmap;
5053 #endif
5054
5055 #if MAX_ASID > MAX_HW_ASID
5056 if (asid_index > 0) {
5057 asid_index -= 1;
5058 /* Paranoia. */
5059 assert(asid_index < (sizeof(cpu_data_ptr->cpu_asid_high_bits) / sizeof(*cpu_data_ptr->cpu_asid_high_bits)));
5060
5061 /* Extract the "virtual" bits of the ASIDs (which could cause us to alias). */
5062 uint8_t asid_high_bits = pmap->sw_asid;
5063 uint8_t last_asid_high_bits = cpu_data_ptr->cpu_asid_high_bits[asid_index];
5064
5065 if (asid_high_bits != last_asid_high_bits) {
5066 /*
5067 * If the virtual ASID of the new pmap does not match the virtual ASID
5068 * last seen on this CPU for the physical ASID (that was a mouthful),
5069 * then this switch runs the risk of aliasing. We need to flush the
5070 * TLB for this phyiscal ASID in this case.
5071 */
5072 cpu_data_ptr->cpu_asid_high_bits[asid_index] = asid_high_bits;
5073 do_asid_flush = TRUE;
5074 }
5075 }
5076 #endif /* MAX_ASID > MAX_HW_ASID */
5077
5078 pmap_switch_user_ttb_internal(pmap);
5079
5080 #if (__ARM_VMSA__ > 7)
5081 /* If we're switching to a different nested pmap (i.e. shared region), we'll need
5082 * to flush the userspace mappings for that region. Those mappings are global
5083 * and will not be protected by the ASID. It should also be cheaper to flush the
5084 * entire local TLB rather than to do a broadcast MMU flush by VA region. */
5085 if ((pmap != kernel_pmap) && (last_nested_pmap != NULL) && (pmap->nested_pmap != last_nested_pmap)) {
5086 flush_core_tlb();
5087 } else
5088 #endif
5089 if (do_asid_flush) {
5090 pmap_flush_core_tlb_asid(pmap);
5091 #if DEVELOPMENT || DEBUG
5092 os_atomic_inc(&pmap_asid_flushes, relaxed);
5093 #endif
5094 }
5095 }
5096
5097 void
5098 pmap_switch(
5099 pmap_t pmap)
5100 {
5101 PMAP_TRACE(1, PMAP_CODE(PMAP__SWITCH) | DBG_FUNC_START, VM_KERNEL_ADDRHIDE(pmap), PMAP_VASID(pmap), pmap->hw_asid);
5102 pmap_switch_internal(pmap);
5103 PMAP_TRACE(1, PMAP_CODE(PMAP__SWITCH) | DBG_FUNC_END);
5104 }
5105
5106 void
5107 pmap_page_protect(
5108 ppnum_t ppnum,
5109 vm_prot_t prot)
5110 {
5111 pmap_page_protect_options(ppnum, prot, 0, NULL);
5112 }
5113
5114 /*
5115 * Routine: pmap_page_protect_options
5116 *
5117 * Function:
5118 * Lower the permission for all mappings to a given
5119 * page.
5120 */
5121 MARK_AS_PMAP_TEXT static void
5122 pmap_page_protect_options_internal(
5123 ppnum_t ppnum,
5124 vm_prot_t prot,
5125 unsigned int options)
5126 {
5127 pmap_paddr_t phys = ptoa(ppnum);
5128 pv_entry_t **pv_h;
5129 pv_entry_t **pve_pp;
5130 pv_entry_t *pve_p;
5131 pv_entry_t *pveh_p;
5132 pv_entry_t *pvet_p;
5133 pt_entry_t *pte_p;
5134 pv_entry_t *new_pve_p;
5135 pt_entry_t *new_pte_p;
5136 vm_offset_t pvh_flags;
5137 int pai;
5138 boolean_t remove;
5139 boolean_t set_NX;
5140 boolean_t tlb_flush_needed = FALSE;
5141 unsigned int pvh_cnt = 0;
5142
5143 assert(ppnum != vm_page_fictitious_addr);
5144
5145 /* Only work with managed pages. */
5146 if (!pa_valid(phys)) {
5147 return;
5148 }
5149
5150 /*
5151 * Determine the new protection.
5152 */
5153 switch (prot) {
5154 case VM_PROT_ALL:
5155 return; /* nothing to do */
5156 case VM_PROT_READ:
5157 case VM_PROT_READ | VM_PROT_EXECUTE:
5158 remove = FALSE;
5159 break;
5160 default:
5161 remove = TRUE;
5162 break;
5163 }
5164
5165 pai = (int)pa_index(phys);
5166 LOCK_PVH(pai);
5167 pv_h = pai_to_pvh(pai);
5168 pvh_flags = pvh_get_flags(pv_h);
5169
5170
5171 pte_p = PT_ENTRY_NULL;
5172 pve_p = PV_ENTRY_NULL;
5173 pve_pp = pv_h;
5174 pveh_p = PV_ENTRY_NULL;
5175 pvet_p = PV_ENTRY_NULL;
5176 new_pve_p = PV_ENTRY_NULL;
5177 new_pte_p = PT_ENTRY_NULL;
5178 if (pvh_test_type(pv_h, PVH_TYPE_PTEP)) {
5179 pte_p = pvh_ptep(pv_h);
5180 } else if (pvh_test_type(pv_h, PVH_TYPE_PVEP)) {
5181 pve_p = pvh_list(pv_h);
5182 pveh_p = pve_p;
5183 }
5184
5185 while ((pve_p != PV_ENTRY_NULL) || (pte_p != PT_ENTRY_NULL)) {
5186 vm_map_address_t va;
5187 pmap_t pmap;
5188 pt_entry_t tmplate;
5189 boolean_t update = FALSE;
5190
5191 if (pve_p != PV_ENTRY_NULL) {
5192 pte_p = pve_get_ptep(pve_p);
5193 }
5194
5195 #ifdef PVH_FLAG_IOMMU
5196 if ((vm_offset_t)pte_p & PVH_FLAG_IOMMU) {
5197 if (remove) {
5198 if (options & PMAP_OPTIONS_COMPRESSOR) {
5199 panic("pmap_page_protect: attempt to compress ppnum 0x%x owned by iommu 0x%llx, pve_p=%p",
5200 ppnum, (uint64_t)pte_p & ~PVH_FLAG_IOMMU, pve_p);
5201 }
5202 if (pve_p != PV_ENTRY_NULL) {
5203 pv_entry_t *temp_pve_p = PVE_NEXT_PTR(pve_next(pve_p));
5204 pvh_remove(pv_h, pve_pp, pve_p);
5205 pveh_p = pvh_list(pv_h);
5206 pve_next(pve_p) = new_pve_p;
5207 new_pve_p = pve_p;
5208 pve_p = temp_pve_p;
5209 continue;
5210 } else {
5211 new_pte_p = pte_p;
5212 break;
5213 }
5214 }
5215 goto protect_skip_pve;
5216 }
5217 #endif
5218 pmap = ptep_get_pmap(pte_p);
5219 va = ptep_get_va(pte_p);
5220
5221 if (pte_p == PT_ENTRY_NULL) {
5222 panic("pmap_page_protect: pmap=%p prot=%d options=%u, pv_h=%p, pveh_p=%p, pve_p=%p, va=0x%llx ppnum: 0x%x\n",
5223 pmap, prot, options, pv_h, pveh_p, pve_p, (uint64_t)va, ppnum);
5224 } else if ((pmap == NULL) || (atop(pte_to_pa(*pte_p)) != ppnum)) {
5225 #if MACH_ASSERT
5226 if (kern_feature_override(KF_PMAPV_OVRD) == FALSE) {
5227 pv_entry_t *check_pve_p = pveh_p;
5228 while (check_pve_p != PV_ENTRY_NULL) {
5229 if ((check_pve_p != pve_p) && (pve_get_ptep(check_pve_p) == pte_p)) {
5230 panic("pmap_page_protect: duplicate pve entry pte_p=%p pmap=%p prot=%d options=%u, pv_h=%p, pveh_p=%p, pve_p=%p, pte=0x%llx, va=0x%llx ppnum: 0x%x\n",
5231 pte_p, pmap, prot, options, pv_h, pveh_p, pve_p, (uint64_t)*pte_p, (uint64_t)va, ppnum);
5232 }
5233 check_pve_p = PVE_NEXT_PTR(pve_next(check_pve_p));
5234 }
5235 }
5236 #endif
5237 panic("pmap_page_protect: bad pve entry pte_p=%p pmap=%p prot=%d options=%u, pv_h=%p, pveh_p=%p, pve_p=%p, pte=0x%llx, va=0x%llx ppnum: 0x%x\n",
5238 pte_p, pmap, prot, options, pv_h, pveh_p, pve_p, (uint64_t)*pte_p, (uint64_t)va, ppnum);
5239 }
5240
5241 #if DEVELOPMENT || DEBUG
5242 if ((prot & VM_PROT_EXECUTE) || !nx_enabled || !pmap->nx_enabled)
5243 #else
5244 if ((prot & VM_PROT_EXECUTE))
5245 #endif
5246 { set_NX = FALSE;} else {
5247 set_NX = TRUE;
5248 }
5249
5250 /* Remove the mapping if new protection is NONE */
5251 if (remove) {
5252 boolean_t is_altacct = FALSE;
5253
5254 if (IS_ALTACCT_PAGE(pai, pve_p)) {
5255 is_altacct = TRUE;
5256 } else {
5257 is_altacct = FALSE;
5258 }
5259
5260 if (pte_is_wired(*pte_p)) {
5261 pte_set_wired(pte_p, 0);
5262 if (pmap != kernel_pmap) {
5263 pmap_ledger_debit(pmap, task_ledgers.wired_mem, PAGE_SIZE);
5264 OSAddAtomic(-1, (SInt32 *) &pmap->stats.wired_count);
5265 }
5266 }
5267
5268 if (*pte_p != ARM_PTE_TYPE_FAULT &&
5269 pmap != kernel_pmap &&
5270 (options & PMAP_OPTIONS_COMPRESSOR) &&
5271 IS_INTERNAL_PAGE(pai)) {
5272 assert(!ARM_PTE_IS_COMPRESSED(*pte_p, pte_p));
5273 /* mark this PTE as having been "compressed" */
5274 tmplate = ARM_PTE_COMPRESSED;
5275 if (is_altacct) {
5276 tmplate |= ARM_PTE_COMPRESSED_ALT;
5277 is_altacct = TRUE;
5278 }
5279 } else {
5280 tmplate = ARM_PTE_TYPE_FAULT;
5281 }
5282
5283 if ((*pte_p != ARM_PTE_TYPE_FAULT) &&
5284 tmplate == ARM_PTE_TYPE_FAULT &&
5285 (pmap != kernel_pmap)) {
5286 if (OSAddAtomic16(-1, (SInt16 *) &(ptep_get_ptd(pte_p)->ptd_info[ARM_PT_DESC_INDEX(pte_p)].refcnt)) <= 0) {
5287 panic("pmap_page_protect_options(): over-release of ptdp %p for pte %p\n", ptep_get_ptd(pte_p), pte_p);
5288 }
5289 }
5290
5291 if (*pte_p != tmplate) {
5292 WRITE_PTE_STRONG(pte_p, tmplate);
5293 update = TRUE;
5294 }
5295 pvh_cnt++;
5296 pmap_ledger_debit(pmap, task_ledgers.phys_mem, PAGE_SIZE);
5297 OSAddAtomic(-1, (SInt32 *) &pmap->stats.resident_count);
5298
5299 #if MACH_ASSERT
5300 /*
5301 * We only ever compress internal pages.
5302 */
5303 if (options & PMAP_OPTIONS_COMPRESSOR) {
5304 assert(IS_INTERNAL_PAGE(pai));
5305 }
5306 #endif
5307
5308 if (pmap != kernel_pmap) {
5309 if (IS_REUSABLE_PAGE(pai) &&
5310 IS_INTERNAL_PAGE(pai) &&
5311 !is_altacct) {
5312 PMAP_STATS_ASSERTF(pmap->stats.reusable > 0, pmap, "stats.reusable %d", pmap->stats.reusable);
5313 OSAddAtomic(-1, &pmap->stats.reusable);
5314 } else if (IS_INTERNAL_PAGE(pai)) {
5315 PMAP_STATS_ASSERTF(pmap->stats.internal > 0, pmap, "stats.internal %d", pmap->stats.internal);
5316 OSAddAtomic(-1, &pmap->stats.internal);
5317 } else {
5318 PMAP_STATS_ASSERTF(pmap->stats.external > 0, pmap, "stats.external %d", pmap->stats.external);
5319 OSAddAtomic(-1, &pmap->stats.external);
5320 }
5321 if ((options & PMAP_OPTIONS_COMPRESSOR) &&
5322 IS_INTERNAL_PAGE(pai)) {
5323 /* adjust "compressed" stats */
5324 OSAddAtomic64(+1, &pmap->stats.compressed);
5325 PMAP_STATS_PEAK(pmap->stats.compressed);
5326 pmap->stats.compressed_lifetime++;
5327 }
5328
5329 if (IS_ALTACCT_PAGE(pai, pve_p)) {
5330 assert(IS_INTERNAL_PAGE(pai));
5331 pmap_ledger_debit(pmap, task_ledgers.internal, PAGE_SIZE);
5332 pmap_ledger_debit(pmap, task_ledgers.alternate_accounting, PAGE_SIZE);
5333 if (options & PMAP_OPTIONS_COMPRESSOR) {
5334 pmap_ledger_credit(pmap, task_ledgers.internal_compressed, PAGE_SIZE);
5335 pmap_ledger_credit(pmap, task_ledgers.alternate_accounting_compressed, PAGE_SIZE);
5336 }
5337
5338 /*
5339 * Cleanup our marker before
5340 * we free this pv_entry.
5341 */
5342 CLR_ALTACCT_PAGE(pai, pve_p);
5343 } else if (IS_REUSABLE_PAGE(pai)) {
5344 assert(IS_INTERNAL_PAGE(pai));
5345 if (options & PMAP_OPTIONS_COMPRESSOR) {
5346 pmap_ledger_credit(pmap, task_ledgers.internal_compressed, PAGE_SIZE);
5347 /* was not in footprint, but is now */
5348 pmap_ledger_credit(pmap, task_ledgers.phys_footprint, PAGE_SIZE);
5349 }
5350 } else if (IS_INTERNAL_PAGE(pai)) {
5351 pmap_ledger_debit(pmap, task_ledgers.internal, PAGE_SIZE);
5352
5353 /*
5354 * Update all stats related to physical footprint, which only
5355 * deals with internal pages.
5356 */
5357 if (options & PMAP_OPTIONS_COMPRESSOR) {
5358 /*
5359 * This removal is only being done so we can send this page to
5360 * the compressor; therefore it mustn't affect total task footprint.
5361 */
5362 pmap_ledger_credit(pmap, task_ledgers.internal_compressed, PAGE_SIZE);
5363 } else {
5364 /*
5365 * This internal page isn't going to the compressor, so adjust stats to keep
5366 * phys_footprint up to date.
5367 */
5368 pmap_ledger_debit(pmap, task_ledgers.phys_footprint, PAGE_SIZE);
5369 }
5370 } else {
5371 /* external page: no impact on ledgers */
5372 }
5373 }
5374
5375 if (pve_p != PV_ENTRY_NULL) {
5376 assert(pve_next(pve_p) == PVE_NEXT_PTR(pve_next(pve_p)));
5377 }
5378 } else {
5379 pt_entry_t spte;
5380 const pt_attr_t *const pt_attr = pmap_get_pt_attr(pmap);
5381
5382 spte = *pte_p;
5383
5384 if (pmap == kernel_pmap) {
5385 tmplate = ((spte & ~ARM_PTE_APMASK) | ARM_PTE_AP(AP_RONA));
5386 } else {
5387 tmplate = ((spte & ~ARM_PTE_APMASK) | pt_attr_leaf_ro(pt_attr));
5388 }
5389
5390 pte_set_was_writeable(tmplate, false);
5391 /*
5392 * While the naive implementation of this would serve to add execute
5393 * permission, this is not how the VM uses this interface, or how
5394 * x86_64 implements it. So ignore requests to add execute permissions.
5395 */
5396 if (set_NX) {
5397 tmplate |= pt_attr_leaf_xn(pt_attr);
5398 }
5399
5400
5401 if (*pte_p != ARM_PTE_TYPE_FAULT &&
5402 !ARM_PTE_IS_COMPRESSED(*pte_p, pte_p) &&
5403 *pte_p != tmplate) {
5404 WRITE_PTE_STRONG(pte_p, tmplate);
5405 update = TRUE;
5406 }
5407 }
5408
5409 /* Invalidate TLBs for all CPUs using it */
5410 if (update) {
5411 tlb_flush_needed = TRUE;
5412 pmap_get_pt_ops(pmap)->flush_tlb_region_async(va, PAGE_SIZE, pmap);
5413 }
5414
5415 #ifdef PVH_FLAG_IOMMU
5416 protect_skip_pve:
5417 #endif
5418 pte_p = PT_ENTRY_NULL;
5419 pvet_p = pve_p;
5420 if (pve_p != PV_ENTRY_NULL) {
5421 if (remove) {
5422 assert(pve_next(pve_p) == PVE_NEXT_PTR(pve_next(pve_p)));
5423 }
5424 pve_pp = pve_link_field(pve_p);
5425 pve_p = PVE_NEXT_PTR(pve_next(pve_p));
5426 }
5427 }
5428
5429 #ifdef PVH_FLAG_EXEC
5430 if (remove && (pvh_get_flags(pv_h) & PVH_FLAG_EXEC)) {
5431 pmap_set_ptov_ap(pai, AP_RWNA, tlb_flush_needed);
5432 }
5433 #endif
5434 if (tlb_flush_needed) {
5435 sync_tlb_flush();
5436 }
5437
5438 /* if we removed a bunch of entries, take care of them now */
5439 if (remove) {
5440 if (new_pve_p != PV_ENTRY_NULL) {
5441 pvh_update_head(pv_h, new_pve_p, PVH_TYPE_PVEP);
5442 pvh_set_flags(pv_h, pvh_flags);
5443 } else if (new_pte_p != PT_ENTRY_NULL) {
5444 pvh_update_head(pv_h, new_pte_p, PVH_TYPE_PTEP);
5445 pvh_set_flags(pv_h, pvh_flags);
5446 } else {
5447 pvh_update_head(pv_h, PV_ENTRY_NULL, PVH_TYPE_NULL);
5448 }
5449 }
5450
5451 UNLOCK_PVH(pai);
5452
5453 if (remove && (pvet_p != PV_ENTRY_NULL)) {
5454 pv_list_free(pveh_p, pvet_p, pvh_cnt);
5455 }
5456 }
5457
5458 void
5459 pmap_page_protect_options(
5460 ppnum_t ppnum,
5461 vm_prot_t prot,
5462 unsigned int options,
5463 __unused void *arg)
5464 {
5465 pmap_paddr_t phys = ptoa(ppnum);
5466
5467 assert(ppnum != vm_page_fictitious_addr);
5468
5469 /* Only work with managed pages. */
5470 if (!pa_valid(phys)) {
5471 return;
5472 }
5473
5474 /*
5475 * Determine the new protection.
5476 */
5477 if (prot == VM_PROT_ALL) {
5478 return; /* nothing to do */
5479 }
5480
5481 PMAP_TRACE(2, PMAP_CODE(PMAP__PAGE_PROTECT) | DBG_FUNC_START, ppnum, prot);
5482
5483 pmap_page_protect_options_internal(ppnum, prot, options);
5484
5485 PMAP_TRACE(2, PMAP_CODE(PMAP__PAGE_PROTECT) | DBG_FUNC_END);
5486 }
5487
5488 /*
5489 * Indicates if the pmap layer enforces some additional restrictions on the
5490 * given set of protections.
5491 */
5492 bool
5493 pmap_has_prot_policy(__unused vm_prot_t prot)
5494 {
5495 return FALSE;
5496 }
5497
5498 /*
5499 * Set the physical protection on the
5500 * specified range of this map as requested.
5501 * VERY IMPORTANT: Will not increase permissions.
5502 * VERY IMPORTANT: Only pmap_enter() is allowed to grant permissions.
5503 */
5504 void
5505 pmap_protect(
5506 pmap_t pmap,
5507 vm_map_address_t b,
5508 vm_map_address_t e,
5509 vm_prot_t prot)
5510 {
5511 pmap_protect_options(pmap, b, e, prot, 0, NULL);
5512 }
5513
5514 MARK_AS_PMAP_TEXT static void
5515 pmap_protect_options_internal(
5516 pmap_t pmap,
5517 vm_map_address_t start,
5518 vm_map_address_t end,
5519 vm_prot_t prot,
5520 unsigned int options,
5521 __unused void *args)
5522 {
5523 const pt_attr_t *const pt_attr = pmap_get_pt_attr(pmap);
5524 tt_entry_t *tte_p;
5525 pt_entry_t *bpte_p, *epte_p;
5526 pt_entry_t *pte_p;
5527 boolean_t set_NX = TRUE;
5528 #if (__ARM_VMSA__ > 7)
5529 boolean_t set_XO = FALSE;
5530 #endif
5531 boolean_t should_have_removed = FALSE;
5532 bool need_strong_sync = false;
5533
5534 if (__improbable(end < start)) {
5535 panic("%s called with bogus range: %p, %p", __func__, (void*)start, (void*)end);
5536 }
5537
5538 #if DEVELOPMENT || DEBUG
5539 if (options & PMAP_OPTIONS_PROTECT_IMMEDIATE) {
5540 if ((prot & VM_PROT_ALL) == VM_PROT_NONE) {
5541 should_have_removed = TRUE;
5542 }
5543 } else
5544 #endif
5545 {
5546 /* Determine the new protection. */
5547 switch (prot) {
5548 #if (__ARM_VMSA__ > 7)
5549 case VM_PROT_EXECUTE:
5550 set_XO = TRUE;
5551 /* fall through */
5552 #endif
5553 case VM_PROT_READ:
5554 case VM_PROT_READ | VM_PROT_EXECUTE:
5555 break;
5556 case VM_PROT_READ | VM_PROT_WRITE:
5557 case VM_PROT_ALL:
5558 return; /* nothing to do */
5559 default:
5560 should_have_removed = TRUE;
5561 }
5562 }
5563
5564 if (should_have_removed) {
5565 panic("%s: should have been a remove operation, "
5566 "pmap=%p, start=%p, end=%p, prot=%#x, options=%#x, args=%p",
5567 __FUNCTION__,
5568 pmap, (void *)start, (void *)end, prot, options, args);
5569 }
5570
5571 #if DEVELOPMENT || DEBUG
5572 if ((prot & VM_PROT_EXECUTE) || !nx_enabled || !pmap->nx_enabled)
5573 #else
5574 if ((prot & VM_PROT_EXECUTE))
5575 #endif
5576 {
5577 set_NX = FALSE;
5578 } else {
5579 set_NX = TRUE;
5580 }
5581
5582 VALIDATE_PMAP(pmap);
5583 PMAP_LOCK(pmap);
5584 tte_p = pmap_tte(pmap, start);
5585
5586 if ((tte_p != (tt_entry_t *) NULL) && (*tte_p & ARM_TTE_TYPE_MASK) == ARM_TTE_TYPE_TABLE) {
5587 bpte_p = (pt_entry_t *) ttetokv(*tte_p);
5588 bpte_p = &bpte_p[ptenum(start)];
5589 epte_p = bpte_p + arm_atop(end - start);
5590 pte_p = bpte_p;
5591
5592 for (pte_p = bpte_p;
5593 pte_p < epte_p;
5594 pte_p += PAGE_SIZE / ARM_PGBYTES) {
5595 pt_entry_t spte;
5596 #if DEVELOPMENT || DEBUG
5597 boolean_t force_write = FALSE;
5598 #endif
5599
5600 spte = *pte_p;
5601
5602 if ((spte == ARM_PTE_TYPE_FAULT) ||
5603 ARM_PTE_IS_COMPRESSED(spte, pte_p)) {
5604 continue;
5605 }
5606
5607 pmap_paddr_t pa;
5608 int pai = 0;
5609 boolean_t managed = FALSE;
5610
5611 while (!managed) {
5612 /*
5613 * It may be possible for the pte to transition from managed
5614 * to unmanaged in this timeframe; for now, elide the assert.
5615 * We should break out as a consequence of checking pa_valid.
5616 */
5617 // assert(!ARM_PTE_IS_COMPRESSED(spte));
5618 pa = pte_to_pa(spte);
5619 if (!pa_valid(pa)) {
5620 break;
5621 }
5622 pai = (int)pa_index(pa);
5623 LOCK_PVH(pai);
5624 spte = *pte_p;
5625 pa = pte_to_pa(spte);
5626 if (pai == (int)pa_index(pa)) {
5627 managed = TRUE;
5628 break; // Leave the PVH locked as we will unlock it after we free the PTE
5629 }
5630 UNLOCK_PVH(pai);
5631 }
5632
5633 if ((spte == ARM_PTE_TYPE_FAULT) ||
5634 ARM_PTE_IS_COMPRESSED(spte, pte_p)) {
5635 continue;
5636 }
5637
5638 pt_entry_t tmplate;
5639
5640 if (pmap == kernel_pmap) {
5641 #if DEVELOPMENT || DEBUG
5642 if ((options & PMAP_OPTIONS_PROTECT_IMMEDIATE) && (prot & VM_PROT_WRITE)) {
5643 force_write = TRUE;
5644 tmplate = ((spte & ~ARM_PTE_APMASK) | ARM_PTE_AP(AP_RWNA));
5645 } else
5646 #endif
5647 {
5648 tmplate = ((spte & ~ARM_PTE_APMASK) | ARM_PTE_AP(AP_RONA));
5649 }
5650 } else {
5651 #if DEVELOPMENT || DEBUG
5652 if ((options & PMAP_OPTIONS_PROTECT_IMMEDIATE) && (prot & VM_PROT_WRITE)) {
5653 force_write = TRUE;
5654 tmplate = ((spte & ~ARM_PTE_APMASK) | pt_attr_leaf_rw(pt_attr));
5655 } else
5656 #endif
5657 {
5658 tmplate = ((spte & ~ARM_PTE_APMASK) | pt_attr_leaf_ro(pt_attr));
5659 }
5660 }
5661
5662 /*
5663 * XXX Removing "NX" would
5664 * grant "execute" access
5665 * immediately, bypassing any
5666 * checks VM might want to do
5667 * in its soft fault path.
5668 * pmap_protect() and co. are
5669 * not allowed to increase
5670 * access permissions.
5671 */
5672 if (set_NX) {
5673 tmplate |= pt_attr_leaf_xn(pt_attr);
5674 } else {
5675 #if (__ARM_VMSA__ > 7)
5676 if (pmap == kernel_pmap) {
5677 /* do NOT clear "PNX"! */
5678 tmplate |= ARM_PTE_NX;
5679 } else {
5680 /* do NOT clear "NX"! */
5681 tmplate |= pt_attr_leaf_x(pt_attr);
5682 if (set_XO) {
5683 tmplate &= ~ARM_PTE_APMASK;
5684 tmplate |= pt_attr_leaf_rona(pt_attr);
5685 }
5686 }
5687 #endif
5688 }
5689
5690 #if DEVELOPMENT || DEBUG
5691 if (force_write) {
5692 /*
5693 * TODO: Run CS/Monitor checks here.
5694 */
5695 if (managed) {
5696 /*
5697 * We are marking the page as writable,
5698 * so we consider it to be modified and
5699 * referenced.
5700 */
5701 pa_set_bits(pa, PP_ATTR_REFERENCED | PP_ATTR_MODIFIED);
5702 tmplate |= ARM_PTE_AF;
5703
5704 if (IS_REFFAULT_PAGE(pai)) {
5705 CLR_REFFAULT_PAGE(pai);
5706 }
5707
5708 if (IS_MODFAULT_PAGE(pai)) {
5709 CLR_MODFAULT_PAGE(pai);
5710 }
5711 }
5712 } else if (options & PMAP_OPTIONS_PROTECT_IMMEDIATE) {
5713 /*
5714 * An immediate request for anything other than
5715 * write should still mark the page as
5716 * referenced if managed.
5717 */
5718 if (managed) {
5719 pa_set_bits(pa, PP_ATTR_REFERENCED);
5720 tmplate |= ARM_PTE_AF;
5721
5722 if (IS_REFFAULT_PAGE(pai)) {
5723 CLR_REFFAULT_PAGE(pai);
5724 }
5725 }
5726 }
5727 #endif
5728
5729 /* We do not expect to write fast fault the entry. */
5730 pte_set_was_writeable(tmplate, false);
5731
5732 WRITE_PTE_FAST(pte_p, tmplate);
5733
5734 if (managed) {
5735 ASSERT_PVH_LOCKED(pai);
5736 UNLOCK_PVH(pai);
5737 }
5738 }
5739 FLUSH_PTE_RANGE_STRONG(bpte_p, epte_p);
5740 PMAP_UPDATE_TLBS(pmap, start, end, need_strong_sync);
5741 }
5742
5743 PMAP_UNLOCK(pmap);
5744 }
5745
5746 void
5747 pmap_protect_options(
5748 pmap_t pmap,
5749 vm_map_address_t b,
5750 vm_map_address_t e,
5751 vm_prot_t prot,
5752 unsigned int options,
5753 __unused void *args)
5754 {
5755 vm_map_address_t l, beg;
5756
5757 __unused const pt_attr_t * const pt_attr = pmap_get_pt_attr(pmap);
5758
5759 if ((b | e) & PAGE_MASK) {
5760 panic("pmap_protect_options() pmap %p start 0x%llx end 0x%llx\n",
5761 pmap, (uint64_t)b, (uint64_t)e);
5762 }
5763
5764 #if DEVELOPMENT || DEBUG
5765 if (options & PMAP_OPTIONS_PROTECT_IMMEDIATE) {
5766 if ((prot & VM_PROT_ALL) == VM_PROT_NONE) {
5767 pmap_remove_options(pmap, b, e, options);
5768 return;
5769 }
5770 } else
5771 #endif
5772 {
5773 /* Determine the new protection. */
5774 switch (prot) {
5775 case VM_PROT_EXECUTE:
5776 case VM_PROT_READ:
5777 case VM_PROT_READ | VM_PROT_EXECUTE:
5778 break;
5779 case VM_PROT_READ | VM_PROT_WRITE:
5780 case VM_PROT_ALL:
5781 return; /* nothing to do */
5782 default:
5783 pmap_remove_options(pmap, b, e, options);
5784 return;
5785 }
5786 }
5787
5788 PMAP_TRACE(2, PMAP_CODE(PMAP__PROTECT) | DBG_FUNC_START,
5789 VM_KERNEL_ADDRHIDE(pmap), VM_KERNEL_ADDRHIDE(b),
5790 VM_KERNEL_ADDRHIDE(e));
5791
5792 beg = b;
5793
5794 while (beg < e) {
5795 l = ((beg + pt_attr_twig_size(pt_attr)) & ~pt_attr_twig_offmask(pt_attr));
5796
5797 if (l > e) {
5798 l = e;
5799 }
5800
5801 pmap_protect_options_internal(pmap, beg, l, prot, options, args);
5802
5803 beg = l;
5804 }
5805
5806 PMAP_TRACE(2, PMAP_CODE(PMAP__PROTECT) | DBG_FUNC_END);
5807 }
5808
5809 /* Map a (possibly) autogenned block */
5810 kern_return_t
5811 pmap_map_block(
5812 pmap_t pmap,
5813 addr64_t va,
5814 ppnum_t pa,
5815 uint32_t size,
5816 vm_prot_t prot,
5817 int attr,
5818 __unused unsigned int flags)
5819 {
5820 kern_return_t kr;
5821 addr64_t original_va = va;
5822 uint32_t page;
5823
5824 for (page = 0; page < size; page++) {
5825 kr = pmap_enter(pmap, va, pa, prot, VM_PROT_NONE, attr, TRUE);
5826
5827 if (kr != KERN_SUCCESS) {
5828 /*
5829 * This will panic for now, as it is unclear that
5830 * removing the mappings is correct.
5831 */
5832 panic("%s: failed pmap_enter, "
5833 "pmap=%p, va=%#llx, pa=%u, size=%u, prot=%#x, flags=%#x",
5834 __FUNCTION__,
5835 pmap, va, pa, size, prot, flags);
5836
5837 pmap_remove(pmap, original_va, va - original_va);
5838 return kr;
5839 }
5840
5841 va += PAGE_SIZE;
5842 pa++;
5843 }
5844
5845 return KERN_SUCCESS;
5846 }
5847
5848 /*
5849 * Insert the given physical page (p) at
5850 * the specified virtual address (v) in the
5851 * target physical map with the protection requested.
5852 *
5853 * If specified, the page will be wired down, meaning
5854 * that the related pte can not be reclaimed.
5855 *
5856 * NB: This is the only routine which MAY NOT lazy-evaluate
5857 * or lose information. That is, this routine must actually
5858 * insert this page into the given map eventually (must make
5859 * forward progress eventually.
5860 */
5861 kern_return_t
5862 pmap_enter(
5863 pmap_t pmap,
5864 vm_map_address_t v,
5865 ppnum_t pn,
5866 vm_prot_t prot,
5867 vm_prot_t fault_type,
5868 unsigned int flags,
5869 boolean_t wired)
5870 {
5871 return pmap_enter_options(pmap, v, pn, prot, fault_type, flags, wired, 0, NULL);
5872 }
5873
5874
5875 static inline void
5876 pmap_enter_pte(pmap_t pmap, pt_entry_t *pte_p, pt_entry_t pte, vm_map_address_t v)
5877 {
5878 if (pmap != kernel_pmap && ((pte & ARM_PTE_WIRED) != (*pte_p & ARM_PTE_WIRED))) {
5879 SInt16 *ptd_wiredcnt_ptr = (SInt16 *)&(ptep_get_ptd(pte_p)->ptd_info[ARM_PT_DESC_INDEX(pte_p)].wiredcnt);
5880 if (pte & ARM_PTE_WIRED) {
5881 OSAddAtomic16(1, ptd_wiredcnt_ptr);
5882 pmap_ledger_credit(pmap, task_ledgers.wired_mem, PAGE_SIZE);
5883 OSAddAtomic(1, (SInt32 *) &pmap->stats.wired_count);
5884 } else {
5885 OSAddAtomic16(-1, ptd_wiredcnt_ptr);
5886 pmap_ledger_debit(pmap, task_ledgers.wired_mem, PAGE_SIZE);
5887 OSAddAtomic(-1, (SInt32 *) &pmap->stats.wired_count);
5888 }
5889 }
5890 if (*pte_p != ARM_PTE_TYPE_FAULT &&
5891 !ARM_PTE_IS_COMPRESSED(*pte_p, pte_p)) {
5892 WRITE_PTE_STRONG(pte_p, pte);
5893 PMAP_UPDATE_TLBS(pmap, v, v + PAGE_SIZE, false);
5894 } else {
5895 WRITE_PTE(pte_p, pte);
5896 __builtin_arm_isb(ISB_SY);
5897 }
5898
5899 PMAP_TRACE(3, PMAP_CODE(PMAP__TTE), VM_KERNEL_ADDRHIDE(pmap), VM_KERNEL_ADDRHIDE(v), VM_KERNEL_ADDRHIDE(v + PAGE_SIZE), pte);
5900 }
5901
5902 MARK_AS_PMAP_TEXT static pt_entry_t
5903 wimg_to_pte(unsigned int wimg)
5904 {
5905 pt_entry_t pte;
5906
5907 switch (wimg & (VM_WIMG_MASK)) {
5908 case VM_WIMG_IO:
5909 case VM_WIMG_RT:
5910 pte = ARM_PTE_ATTRINDX(CACHE_ATTRINDX_DISABLE);
5911 pte |= ARM_PTE_NX | ARM_PTE_PNX;
5912 break;
5913 case VM_WIMG_POSTED:
5914 pte = ARM_PTE_ATTRINDX(CACHE_ATTRINDX_POSTED);
5915 pte |= ARM_PTE_NX | ARM_PTE_PNX;
5916 break;
5917 case VM_WIMG_POSTED_REORDERED:
5918 pte = ARM_PTE_ATTRINDX(CACHE_ATTRINDX_POSTED_REORDERED);
5919 pte |= ARM_PTE_NX | ARM_PTE_PNX;
5920 break;
5921 case VM_WIMG_POSTED_COMBINED_REORDERED:
5922 pte = ARM_PTE_ATTRINDX(CACHE_ATTRINDX_POSTED_COMBINED_REORDERED);
5923 pte |= ARM_PTE_NX | ARM_PTE_PNX;
5924 break;
5925 case VM_WIMG_WCOMB:
5926 pte = ARM_PTE_ATTRINDX(CACHE_ATTRINDX_WRITECOMB);
5927 pte |= ARM_PTE_NX | ARM_PTE_PNX;
5928 break;
5929 case VM_WIMG_WTHRU:
5930 pte = ARM_PTE_ATTRINDX(CACHE_ATTRINDX_WRITETHRU);
5931 #if (__ARM_VMSA__ > 7)
5932 pte |= ARM_PTE_SH(SH_OUTER_MEMORY);
5933 #else
5934 pte |= ARM_PTE_SH;
5935 #endif
5936 break;
5937 case VM_WIMG_COPYBACK:
5938 pte = ARM_PTE_ATTRINDX(CACHE_ATTRINDX_WRITEBACK);
5939 #if (__ARM_VMSA__ > 7)
5940 pte |= ARM_PTE_SH(SH_OUTER_MEMORY);
5941 #else
5942 pte |= ARM_PTE_SH;
5943 #endif
5944 break;
5945 case VM_WIMG_INNERWBACK:
5946 pte = ARM_PTE_ATTRINDX(CACHE_ATTRINDX_INNERWRITEBACK);
5947 #if (__ARM_VMSA__ > 7)
5948 pte |= ARM_PTE_SH(SH_INNER_MEMORY);
5949 #else
5950 pte |= ARM_PTE_SH;
5951 #endif
5952 break;
5953 default:
5954 pte = ARM_PTE_ATTRINDX(CACHE_ATTRINDX_DEFAULT);
5955 #if (__ARM_VMSA__ > 7)
5956 pte |= ARM_PTE_SH(SH_OUTER_MEMORY);
5957 #else
5958 pte |= ARM_PTE_SH;
5959 #endif
5960 }
5961
5962 return pte;
5963 }
5964
5965 static boolean_t
5966 pmap_enter_pv(
5967 pmap_t pmap,
5968 pt_entry_t *pte_p,
5969 int pai,
5970 unsigned int options,
5971 pv_entry_t **pve_p,
5972 boolean_t *is_altacct)
5973 {
5974 pv_entry_t **pv_h;
5975 pv_h = pai_to_pvh(pai);
5976 boolean_t first_cpu_mapping;
5977
5978 ASSERT_PVH_LOCKED(pai);
5979
5980 vm_offset_t pvh_flags = pvh_get_flags(pv_h);
5981
5982
5983 #ifdef PVH_FLAG_CPU
5984 /* An IOMMU mapping may already be present for a page that hasn't yet
5985 * had a CPU mapping established, so we use PVH_FLAG_CPU to determine
5986 * if this is the first CPU mapping. We base internal/reusable
5987 * accounting on the options specified for the first CPU mapping.
5988 * PVH_FLAG_CPU, and thus this accounting, will then persist as long
5989 * as there are *any* mappings of the page. The accounting for a
5990 * page should not need to change until the page is recycled by the
5991 * VM layer, and we assert that there are no mappings when a page
5992 * is recycled. An IOMMU mapping of a freed/recycled page is
5993 * considered a security violation & potential DMA corruption path.*/
5994 first_cpu_mapping = ((pmap != NULL) && !(pvh_flags & PVH_FLAG_CPU));
5995 if (first_cpu_mapping) {
5996 pvh_flags |= PVH_FLAG_CPU;
5997 }
5998 #else
5999 first_cpu_mapping = pvh_test_type(pv_h, PVH_TYPE_NULL);
6000 #endif
6001
6002 if (first_cpu_mapping) {
6003 if (options & PMAP_OPTIONS_INTERNAL) {
6004 SET_INTERNAL_PAGE(pai);
6005 } else {
6006 CLR_INTERNAL_PAGE(pai);
6007 }
6008 if ((options & PMAP_OPTIONS_INTERNAL) &&
6009 (options & PMAP_OPTIONS_REUSABLE)) {
6010 SET_REUSABLE_PAGE(pai);
6011 } else {
6012 CLR_REUSABLE_PAGE(pai);
6013 }
6014 }
6015 if (pvh_test_type(pv_h, PVH_TYPE_NULL)) {
6016 pvh_update_head(pv_h, pte_p, PVH_TYPE_PTEP);
6017 if (pmap != NULL && pmap != kernel_pmap &&
6018 ((options & PMAP_OPTIONS_ALT_ACCT) ||
6019 PMAP_FOOTPRINT_SUSPENDED(pmap)) &&
6020 IS_INTERNAL_PAGE(pai)) {
6021 /*
6022 * Make a note to ourselves that this mapping is using alternative
6023 * accounting. We'll need this in order to know which ledger to
6024 * debit when the mapping is removed.
6025 *
6026 * The altacct bit must be set while the pv head is locked. Defer
6027 * the ledger accounting until after we've dropped the lock.
6028 */
6029 SET_ALTACCT_PAGE(pai, PV_ENTRY_NULL);
6030 *is_altacct = TRUE;
6031 } else {
6032 CLR_ALTACCT_PAGE(pai, PV_ENTRY_NULL);
6033 }
6034 } else {
6035 if (pvh_test_type(pv_h, PVH_TYPE_PTEP)) {
6036 pt_entry_t *pte1_p;
6037
6038 /*
6039 * convert pvh list from PVH_TYPE_PTEP to PVH_TYPE_PVEP
6040 */
6041 pte1_p = pvh_ptep(pv_h);
6042 pvh_set_flags(pv_h, pvh_flags);
6043 if ((*pve_p == PV_ENTRY_NULL) && (!pv_alloc(pmap, pai, pve_p))) {
6044 return FALSE;
6045 }
6046
6047 pve_set_ptep(*pve_p, pte1_p);
6048 (*pve_p)->pve_next = PV_ENTRY_NULL;
6049
6050 if (IS_ALTACCT_PAGE(pai, PV_ENTRY_NULL)) {
6051 /*
6052 * transfer "altacct" from
6053 * pp_attr to this pve
6054 */
6055 CLR_ALTACCT_PAGE(pai, PV_ENTRY_NULL);
6056 SET_ALTACCT_PAGE(pai, *pve_p);
6057 }
6058 pvh_update_head(pv_h, *pve_p, PVH_TYPE_PVEP);
6059 *pve_p = PV_ENTRY_NULL;
6060 } else if (!pvh_test_type(pv_h, PVH_TYPE_PVEP)) {
6061 panic("%s: unexpected PV head %p, pte_p=%p pmap=%p pv_h=%p",
6062 __func__, *pv_h, pte_p, pmap, pv_h);
6063 }
6064 /*
6065 * Set up pv_entry for this new mapping and then
6066 * add it to the list for this physical page.
6067 */
6068 pvh_set_flags(pv_h, pvh_flags);
6069 if ((*pve_p == PV_ENTRY_NULL) && (!pv_alloc(pmap, pai, pve_p))) {
6070 return FALSE;
6071 }
6072
6073 pve_set_ptep(*pve_p, pte_p);
6074 (*pve_p)->pve_next = PV_ENTRY_NULL;
6075
6076 pvh_add(pv_h, *pve_p);
6077
6078 if (pmap != NULL && pmap != kernel_pmap &&
6079 ((options & PMAP_OPTIONS_ALT_ACCT) ||
6080 PMAP_FOOTPRINT_SUSPENDED(pmap)) &&
6081 IS_INTERNAL_PAGE(pai)) {
6082 /*
6083 * Make a note to ourselves that this
6084 * mapping is using alternative
6085 * accounting. We'll need this in order
6086 * to know which ledger to debit when
6087 * the mapping is removed.
6088 *
6089 * The altacct bit must be set while
6090 * the pv head is locked. Defer the
6091 * ledger accounting until after we've
6092 * dropped the lock.
6093 */
6094 SET_ALTACCT_PAGE(pai, *pve_p);
6095 *is_altacct = TRUE;
6096 }
6097
6098 *pve_p = PV_ENTRY_NULL;
6099 }
6100
6101 pvh_set_flags(pv_h, pvh_flags);
6102
6103 return TRUE;
6104 }
6105
6106 MARK_AS_PMAP_TEXT static kern_return_t
6107 pmap_enter_options_internal(
6108 pmap_t pmap,
6109 vm_map_address_t v,
6110 ppnum_t pn,
6111 vm_prot_t prot,
6112 vm_prot_t fault_type,
6113 unsigned int flags,
6114 boolean_t wired,
6115 unsigned int options)
6116 {
6117 pmap_paddr_t pa = ptoa(pn);
6118 pt_entry_t pte;
6119 pt_entry_t spte;
6120 pt_entry_t *pte_p;
6121 pv_entry_t *pve_p;
6122 boolean_t set_NX;
6123 boolean_t set_XO = FALSE;
6124 boolean_t refcnt_updated;
6125 boolean_t wiredcnt_updated;
6126 unsigned int wimg_bits;
6127 boolean_t was_compressed, was_alt_compressed;
6128 kern_return_t kr = KERN_SUCCESS;
6129
6130 VALIDATE_PMAP(pmap);
6131
6132 __unused const pt_attr_t * const pt_attr = pmap_get_pt_attr(pmap);
6133
6134 if ((v) & PAGE_MASK) {
6135 panic("pmap_enter_options() pmap %p v 0x%llx\n",
6136 pmap, (uint64_t)v);
6137 }
6138
6139 if ((prot & VM_PROT_EXECUTE) && (prot & VM_PROT_WRITE) && (pmap == kernel_pmap)) {
6140 panic("pmap_enter_options(): WX request on kernel_pmap");
6141 }
6142
6143 #if DEVELOPMENT || DEBUG
6144 if ((prot & VM_PROT_EXECUTE) || !nx_enabled || !pmap->nx_enabled)
6145 #else
6146 if ((prot & VM_PROT_EXECUTE))
6147 #endif
6148 { set_NX = FALSE;} else {
6149 set_NX = TRUE;
6150 }
6151
6152 #if (__ARM_VMSA__ > 7)
6153 if (prot == VM_PROT_EXECUTE) {
6154 set_XO = TRUE;
6155 }
6156 #endif
6157
6158 assert(pn != vm_page_fictitious_addr);
6159
6160 refcnt_updated = FALSE;
6161 wiredcnt_updated = FALSE;
6162 pve_p = PV_ENTRY_NULL;
6163 was_compressed = FALSE;
6164 was_alt_compressed = FALSE;
6165
6166 PMAP_LOCK(pmap);
6167
6168 /*
6169 * Expand pmap to include this pte. Assume that
6170 * pmap is always expanded to include enough hardware
6171 * pages to map one VM page.
6172 */
6173 while ((pte_p = pmap_pte(pmap, v)) == PT_ENTRY_NULL) {
6174 /* Must unlock to expand the pmap. */
6175 PMAP_UNLOCK(pmap);
6176
6177 kr = pmap_expand(pmap, v, options, PMAP_TT_MAX_LEVEL);
6178
6179 if (kr != KERN_SUCCESS) {
6180 return kr;
6181 }
6182
6183 PMAP_LOCK(pmap);
6184 }
6185
6186 if (options & PMAP_OPTIONS_NOENTER) {
6187 PMAP_UNLOCK(pmap);
6188 return KERN_SUCCESS;
6189 }
6190
6191 Pmap_enter_retry:
6192
6193 spte = *pte_p;
6194
6195 if (ARM_PTE_IS_COMPRESSED(spte, pte_p)) {
6196 /*
6197 * "pmap" should be locked at this point, so this should
6198 * not race with another pmap_enter() or pmap_remove_range().
6199 */
6200 assert(pmap != kernel_pmap);
6201
6202 /* one less "compressed" */
6203 OSAddAtomic64(-1, &pmap->stats.compressed);
6204 pmap_ledger_debit(pmap, task_ledgers.internal_compressed,
6205 PAGE_SIZE);
6206
6207 was_compressed = TRUE;
6208 if (spte & ARM_PTE_COMPRESSED_ALT) {
6209 was_alt_compressed = TRUE;
6210 pmap_ledger_debit(pmap, task_ledgers.alternate_accounting_compressed, PAGE_SIZE);
6211 } else {
6212 /* was part of the footprint */
6213 pmap_ledger_debit(pmap, task_ledgers.phys_footprint, PAGE_SIZE);
6214 }
6215
6216 /* clear "compressed" marker */
6217 /* XXX is it necessary since we're about to overwrite it ? */
6218 WRITE_PTE_FAST(pte_p, ARM_PTE_TYPE_FAULT);
6219 spte = ARM_PTE_TYPE_FAULT;
6220
6221 /*
6222 * We're replacing a "compressed" marker with a valid PTE,
6223 * so no change for "refcnt".
6224 */
6225 refcnt_updated = TRUE;
6226 }
6227
6228 if ((spte != ARM_PTE_TYPE_FAULT) && (pte_to_pa(spte) != pa)) {
6229 pmap_remove_range(pmap, v, pte_p, pte_p + 1, 0);
6230 }
6231
6232 pte = pa_to_pte(pa) | ARM_PTE_TYPE;
6233
6234 /* Don't bother tracking wiring for kernel PTEs. We use ARM_PTE_WIRED to track
6235 * wired memory statistics for user pmaps, but kernel PTEs are assumed
6236 * to be wired in nearly all cases. For VM layer functionality, the wired
6237 * count in vm_page_t is sufficient. */
6238 if (wired && pmap != kernel_pmap) {
6239 pte |= ARM_PTE_WIRED;
6240 }
6241
6242 if (set_NX) {
6243 pte |= pt_attr_leaf_xn(pt_attr);
6244 } else {
6245 #if (__ARM_VMSA__ > 7)
6246 if (pmap == kernel_pmap) {
6247 pte |= ARM_PTE_NX;
6248 } else {
6249 pte |= pt_attr_leaf_x(pt_attr);
6250 }
6251 #endif
6252 }
6253
6254 if (pmap == kernel_pmap) {
6255 #if __ARM_KERNEL_PROTECT__
6256 pte |= ARM_PTE_NG;
6257 #endif /* __ARM_KERNEL_PROTECT__ */
6258 if (prot & VM_PROT_WRITE) {
6259 pte |= ARM_PTE_AP(AP_RWNA);
6260 pa_set_bits(pa, PP_ATTR_MODIFIED | PP_ATTR_REFERENCED);
6261 } else {
6262 pte |= ARM_PTE_AP(AP_RONA);
6263 pa_set_bits(pa, PP_ATTR_REFERENCED);
6264 }
6265 #if (__ARM_VMSA__ == 7)
6266 if ((_COMM_PAGE_BASE_ADDRESS <= v) && (v < _COMM_PAGE_BASE_ADDRESS + _COMM_PAGE_AREA_LENGTH)) {
6267 pte = (pte & ~(ARM_PTE_APMASK)) | ARM_PTE_AP(AP_RORO);
6268 }
6269 #endif
6270 } else {
6271 if (!pmap->nested) {
6272 pte |= ARM_PTE_NG;
6273 } else if ((pmap->nested_region_asid_bitmap)
6274 && (v >= pmap->nested_region_subord_addr)
6275 && (v < (pmap->nested_region_subord_addr + pmap->nested_region_size))) {
6276 unsigned int index = (unsigned int)((v - pmap->nested_region_subord_addr) >> pt_attr_twig_shift(pt_attr));
6277
6278 if ((pmap->nested_region_asid_bitmap)
6279 && testbit(index, (int *)pmap->nested_region_asid_bitmap)) {
6280 pte |= ARM_PTE_NG;
6281 }
6282 }
6283 #if MACH_ASSERT
6284 if (pmap->nested_pmap != NULL) {
6285 vm_map_address_t nest_vaddr;
6286 pt_entry_t *nest_pte_p;
6287
6288 nest_vaddr = v - pmap->nested_region_grand_addr + pmap->nested_region_subord_addr;
6289
6290 if ((nest_vaddr >= pmap->nested_region_subord_addr)
6291 && (nest_vaddr < (pmap->nested_region_subord_addr + pmap->nested_region_size))
6292 && ((nest_pte_p = pmap_pte(pmap->nested_pmap, nest_vaddr)) != PT_ENTRY_NULL)
6293 && (*nest_pte_p != ARM_PTE_TYPE_FAULT)
6294 && (!ARM_PTE_IS_COMPRESSED(*nest_pte_p, nest_pte_p))
6295 && (((*nest_pte_p) & ARM_PTE_NG) != ARM_PTE_NG)) {
6296 unsigned int index = (unsigned int)((v - pmap->nested_region_subord_addr) >> pt_attr_twig_shift(pt_attr));
6297
6298 if ((pmap->nested_pmap->nested_region_asid_bitmap)
6299 && !testbit(index, (int *)pmap->nested_pmap->nested_region_asid_bitmap)) {
6300 panic("pmap_enter(): Global attribute conflict nest_pte_p=%p pmap=%p v=0x%llx spte=0x%llx \n",
6301 nest_pte_p, pmap, (uint64_t)v, (uint64_t)*nest_pte_p);
6302 }
6303 }
6304 }
6305 #endif
6306 if (prot & VM_PROT_WRITE) {
6307 if (pa_valid(pa) && (!pa_test_bits(pa, PP_ATTR_MODIFIED))) {
6308 if (fault_type & VM_PROT_WRITE) {
6309 if (set_XO) {
6310 pte |= pt_attr_leaf_rwna(pt_attr);
6311 } else {
6312 pte |= pt_attr_leaf_rw(pt_attr);
6313 }
6314 pa_set_bits(pa, PP_ATTR_REFERENCED | PP_ATTR_MODIFIED);
6315 } else {
6316 if (set_XO) {
6317 pte |= pt_attr_leaf_rona(pt_attr);
6318 } else {
6319 pte |= pt_attr_leaf_ro(pt_attr);
6320 }
6321 pa_set_bits(pa, PP_ATTR_REFERENCED);
6322 pte_set_was_writeable(pte, true);
6323 }
6324 } else {
6325 if (set_XO) {
6326 pte |= pt_attr_leaf_rwna(pt_attr);
6327 } else {
6328 pte |= pt_attr_leaf_rw(pt_attr);
6329 }
6330 pa_set_bits(pa, PP_ATTR_REFERENCED);
6331 }
6332 } else {
6333 if (set_XO) {
6334 pte |= pt_attr_leaf_rona(pt_attr);
6335 } else {
6336 pte |= pt_attr_leaf_ro(pt_attr);;
6337 }
6338 pa_set_bits(pa, PP_ATTR_REFERENCED);
6339 }
6340 }
6341
6342 pte |= ARM_PTE_AF;
6343
6344 volatile uint16_t *refcnt = NULL;
6345 volatile uint16_t *wiredcnt = NULL;
6346 if (pmap != kernel_pmap) {
6347 refcnt = &(ptep_get_ptd(pte_p)->ptd_info[ARM_PT_DESC_INDEX(pte_p)].refcnt);
6348 wiredcnt = &(ptep_get_ptd(pte_p)->ptd_info[ARM_PT_DESC_INDEX(pte_p)].wiredcnt);
6349 /* Bump the wired count to keep the PTE page from being reclaimed. We need this because
6350 * we may drop the PVH and pmap locks later in pmap_enter() if we need to allocate
6351 * a new PV entry. */
6352 if (!wiredcnt_updated) {
6353 OSAddAtomic16(1, (volatile int16_t*)wiredcnt);
6354 wiredcnt_updated = TRUE;
6355 }
6356 if (!refcnt_updated) {
6357 OSAddAtomic16(1, (volatile int16_t*)refcnt);
6358 refcnt_updated = TRUE;
6359 }
6360 }
6361
6362 if (pa_valid(pa)) {
6363 int pai;
6364 boolean_t is_altacct, is_internal;
6365
6366 is_internal = FALSE;
6367 is_altacct = FALSE;
6368
6369 pai = (int)pa_index(pa);
6370
6371 LOCK_PVH(pai);
6372
6373 Pmap_enter_loop:
6374 if ((flags & (VM_WIMG_MASK | VM_WIMG_USE_DEFAULT))) {
6375 wimg_bits = (flags & (VM_WIMG_MASK | VM_WIMG_USE_DEFAULT));
6376 } else {
6377 wimg_bits = pmap_cache_attributes(pn);
6378 }
6379
6380 /* We may be retrying this operation after dropping the PVH lock.
6381 * Cache attributes for the physical page may have changed while the lock
6382 * was dropped, so clear any cache attributes we may have previously set
6383 * in the PTE template. */
6384 pte &= ~(ARM_PTE_ATTRINDXMASK | ARM_PTE_SHMASK);
6385 pte |= pmap_get_pt_ops(pmap)->wimg_to_pte(wimg_bits);
6386
6387
6388
6389 if (pte == *pte_p) {
6390 /*
6391 * This pmap_enter operation has been completed by another thread
6392 * undo refcnt on pt and return
6393 */
6394 UNLOCK_PVH(pai);
6395 goto Pmap_enter_cleanup;
6396 } else if (pte_to_pa(*pte_p) == pa) {
6397 pmap_enter_pte(pmap, pte_p, pte, v);
6398 UNLOCK_PVH(pai);
6399 goto Pmap_enter_cleanup;
6400 } else if (*pte_p != ARM_PTE_TYPE_FAULT) {
6401 /*
6402 * pte has been modified by another thread
6403 * hold refcnt on pt and retry pmap_enter operation
6404 */
6405 UNLOCK_PVH(pai);
6406 goto Pmap_enter_retry;
6407 }
6408 if (!pmap_enter_pv(pmap, pte_p, pai, options, &pve_p, &is_altacct)) {
6409 goto Pmap_enter_loop;
6410 }
6411
6412 pmap_enter_pte(pmap, pte_p, pte, v);
6413
6414 if (pmap != kernel_pmap) {
6415 if (IS_REUSABLE_PAGE(pai) &&
6416 !is_altacct) {
6417 assert(IS_INTERNAL_PAGE(pai));
6418 OSAddAtomic(+1, &pmap->stats.reusable);
6419 PMAP_STATS_PEAK(pmap->stats.reusable);
6420 } else if (IS_INTERNAL_PAGE(pai)) {
6421 OSAddAtomic(+1, &pmap->stats.internal);
6422 PMAP_STATS_PEAK(pmap->stats.internal);
6423 is_internal = TRUE;
6424 } else {
6425 OSAddAtomic(+1, &pmap->stats.external);
6426 PMAP_STATS_PEAK(pmap->stats.external);
6427 }
6428 }
6429
6430 UNLOCK_PVH(pai);
6431
6432 if (pmap != kernel_pmap) {
6433 pmap_ledger_credit(pmap, task_ledgers.phys_mem, PAGE_SIZE);
6434
6435 if (is_internal) {
6436 /*
6437 * Make corresponding adjustments to
6438 * phys_footprint statistics.
6439 */
6440 pmap_ledger_credit(pmap, task_ledgers.internal, PAGE_SIZE);
6441 if (is_altacct) {
6442 /*
6443 * If this page is internal and
6444 * in an IOKit region, credit
6445 * the task's total count of
6446 * dirty, internal IOKit pages.
6447 * It should *not* count towards
6448 * the task's total physical
6449 * memory footprint, because
6450 * this entire region was
6451 * already billed to the task
6452 * at the time the mapping was
6453 * created.
6454 *
6455 * Put another way, this is
6456 * internal++ and
6457 * alternate_accounting++, so
6458 * net effect on phys_footprint
6459 * is 0. That means: don't
6460 * touch phys_footprint here.
6461 */
6462 pmap_ledger_credit(pmap, task_ledgers.alternate_accounting, PAGE_SIZE);
6463 } else {
6464 pmap_ledger_credit(pmap, task_ledgers.phys_footprint, PAGE_SIZE);
6465 }
6466 }
6467 }
6468
6469 OSAddAtomic(1, (SInt32 *) &pmap->stats.resident_count);
6470 if (pmap->stats.resident_count > pmap->stats.resident_max) {
6471 pmap->stats.resident_max = pmap->stats.resident_count;
6472 }
6473 } else {
6474 if (prot & VM_PROT_EXECUTE) {
6475 kr = KERN_FAILURE;
6476 goto Pmap_enter_cleanup;
6477 }
6478
6479 wimg_bits = pmap_cache_attributes(pn);
6480 if ((flags & (VM_WIMG_MASK | VM_WIMG_USE_DEFAULT))) {
6481 wimg_bits = (wimg_bits & (~VM_WIMG_MASK)) | (flags & (VM_WIMG_MASK | VM_WIMG_USE_DEFAULT));
6482 }
6483
6484 pte |= pmap_get_pt_ops(pmap)->wimg_to_pte(wimg_bits);
6485
6486 pmap_enter_pte(pmap, pte_p, pte, v);
6487 }
6488
6489 goto Pmap_enter_return;
6490
6491 Pmap_enter_cleanup:
6492
6493 if (refcnt != NULL) {
6494 assert(refcnt_updated);
6495 if (OSAddAtomic16(-1, (volatile int16_t*)refcnt) <= 0) {
6496 panic("pmap_enter(): over-release of ptdp %p for pte %p\n", ptep_get_ptd(pte_p), pte_p);
6497 }
6498 }
6499
6500 Pmap_enter_return:
6501
6502 #if CONFIG_PGTRACE
6503 if (pgtrace_enabled) {
6504 // Clone and invalidate original mapping if eligible
6505 for (int i = 0; i < PAGE_RATIO; i++) {
6506 pmap_pgtrace_enter_clone(pmap, v + ARM_PGBYTES * i, 0, 0);
6507 }
6508 }
6509 #endif
6510
6511 if (pve_p != PV_ENTRY_NULL) {
6512 pv_free(pve_p);
6513 }
6514
6515 if (wiredcnt_updated && (OSAddAtomic16(-1, (volatile int16_t*)wiredcnt) <= 0)) {
6516 panic("pmap_enter(): over-unwire of ptdp %p for pte %p\n", ptep_get_ptd(pte_p), pte_p);
6517 }
6518
6519 PMAP_UNLOCK(pmap);
6520
6521 return kr;
6522 }
6523
6524 kern_return_t
6525 pmap_enter_options(
6526 pmap_t pmap,
6527 vm_map_address_t v,
6528 ppnum_t pn,
6529 vm_prot_t prot,
6530 vm_prot_t fault_type,
6531 unsigned int flags,
6532 boolean_t wired,
6533 unsigned int options,
6534 __unused void *arg)
6535 {
6536 kern_return_t kr = KERN_FAILURE;
6537
6538 PMAP_TRACE(2, PMAP_CODE(PMAP__ENTER) | DBG_FUNC_START,
6539 VM_KERNEL_ADDRHIDE(pmap), VM_KERNEL_ADDRHIDE(v), pn, prot);
6540
6541 kr = pmap_enter_options_internal(pmap, v, pn, prot, fault_type, flags, wired, options);
6542 pv_water_mark_check();
6543
6544 PMAP_TRACE(2, PMAP_CODE(PMAP__ENTER) | DBG_FUNC_END, kr);
6545
6546 return kr;
6547 }
6548
6549 /*
6550 * Routine: pmap_change_wiring
6551 * Function: Change the wiring attribute for a map/virtual-address
6552 * pair.
6553 * In/out conditions:
6554 * The mapping must already exist in the pmap.
6555 */
6556 MARK_AS_PMAP_TEXT static void
6557 pmap_change_wiring_internal(
6558 pmap_t pmap,
6559 vm_map_address_t v,
6560 boolean_t wired)
6561 {
6562 pt_entry_t *pte_p;
6563 pmap_paddr_t pa;
6564
6565 /* Don't bother tracking wiring for kernel PTEs. We use ARM_PTE_WIRED to track
6566 * wired memory statistics for user pmaps, but kernel PTEs are assumed
6567 * to be wired in nearly all cases. For VM layer functionality, the wired
6568 * count in vm_page_t is sufficient. */
6569 if (pmap == kernel_pmap) {
6570 return;
6571 }
6572 VALIDATE_USER_PMAP(pmap);
6573
6574 PMAP_LOCK(pmap);
6575 pte_p = pmap_pte(pmap, v);
6576 assert(pte_p != PT_ENTRY_NULL);
6577 pa = pte_to_pa(*pte_p);
6578
6579 while (pa_valid(pa)) {
6580 pmap_paddr_t new_pa;
6581
6582 LOCK_PVH((int)pa_index(pa));
6583 new_pa = pte_to_pa(*pte_p);
6584
6585 if (pa == new_pa) {
6586 break;
6587 }
6588
6589 UNLOCK_PVH((int)pa_index(pa));
6590 pa = new_pa;
6591 }
6592
6593 if (wired && !pte_is_wired(*pte_p)) {
6594 pte_set_wired(pte_p, wired);
6595 OSAddAtomic(+1, (SInt32 *) &pmap->stats.wired_count);
6596 pmap_ledger_credit(pmap, task_ledgers.wired_mem, PAGE_SIZE);
6597 } else if (!wired && pte_is_wired(*pte_p)) {
6598 PMAP_STATS_ASSERTF(pmap->stats.wired_count >= 1, pmap, "stats.wired_count %d", pmap->stats.wired_count);
6599 pte_set_wired(pte_p, wired);
6600 OSAddAtomic(-1, (SInt32 *) &pmap->stats.wired_count);
6601 pmap_ledger_debit(pmap, task_ledgers.wired_mem, PAGE_SIZE);
6602 }
6603
6604 if (pa_valid(pa)) {
6605 UNLOCK_PVH((int)pa_index(pa));
6606 }
6607
6608 PMAP_UNLOCK(pmap);
6609 }
6610
6611 void
6612 pmap_change_wiring(
6613 pmap_t pmap,
6614 vm_map_address_t v,
6615 boolean_t wired)
6616 {
6617 pmap_change_wiring_internal(pmap, v, wired);
6618 }
6619
6620 MARK_AS_PMAP_TEXT static ppnum_t
6621 pmap_find_phys_internal(
6622 pmap_t pmap,
6623 addr64_t va)
6624 {
6625 ppnum_t ppn = 0;
6626
6627 VALIDATE_PMAP(pmap);
6628
6629 if (pmap != kernel_pmap) {
6630 PMAP_LOCK(pmap);
6631 }
6632
6633 ppn = pmap_vtophys(pmap, va);
6634
6635 if (pmap != kernel_pmap) {
6636 PMAP_UNLOCK(pmap);
6637 }
6638
6639 return ppn;
6640 }
6641
6642 ppnum_t
6643 pmap_find_phys(
6644 pmap_t pmap,
6645 addr64_t va)
6646 {
6647 pmap_paddr_t pa = 0;
6648
6649 if (pmap == kernel_pmap) {
6650 pa = mmu_kvtop(va);
6651 } else if ((current_thread()->map) && (pmap == vm_map_pmap(current_thread()->map))) {
6652 pa = mmu_uvtop(va);
6653 }
6654
6655 if (pa) {
6656 return (ppnum_t)(pa >> PAGE_SHIFT);
6657 }
6658
6659 if (not_in_kdp) {
6660 return pmap_find_phys_internal(pmap, va);
6661 } else {
6662 return pmap_vtophys(pmap, va);
6663 }
6664 }
6665
6666 pmap_paddr_t
6667 kvtophys(
6668 vm_offset_t va)
6669 {
6670 pmap_paddr_t pa;
6671
6672 pa = mmu_kvtop(va);
6673 if (pa) {
6674 return pa;
6675 }
6676 pa = ((pmap_paddr_t)pmap_vtophys(kernel_pmap, va)) << PAGE_SHIFT;
6677 if (pa) {
6678 pa |= (va & PAGE_MASK);
6679 }
6680
6681 return (pmap_paddr_t)pa;
6682 }
6683
6684 ppnum_t
6685 pmap_vtophys(
6686 pmap_t pmap,
6687 addr64_t va)
6688 {
6689 if ((va < pmap->min) || (va >= pmap->max)) {
6690 return 0;
6691 }
6692
6693 #if (__ARM_VMSA__ == 7)
6694 tt_entry_t *tte_p, tte;
6695 pt_entry_t *pte_p;
6696 ppnum_t ppn;
6697
6698 tte_p = pmap_tte(pmap, va);
6699 if (tte_p == (tt_entry_t *) NULL) {
6700 return (ppnum_t) 0;
6701 }
6702
6703 tte = *tte_p;
6704 if ((tte & ARM_TTE_TYPE_MASK) == ARM_TTE_TYPE_TABLE) {
6705 pte_p = (pt_entry_t *) ttetokv(tte) + ptenum(va);
6706 ppn = (ppnum_t) atop(pte_to_pa(*pte_p) | (va & ARM_PGMASK));
6707 #if DEVELOPMENT || DEBUG
6708 if (ppn != 0 &&
6709 ARM_PTE_IS_COMPRESSED(*pte_p, pte_p)) {
6710 panic("pmap_vtophys(%p,0x%llx): compressed pte_p=%p 0x%llx with ppn=0x%x\n",
6711 pmap, va, pte_p, (uint64_t) (*pte_p), ppn);
6712 }
6713 #endif /* DEVELOPMENT || DEBUG */
6714 } else if ((tte & ARM_TTE_TYPE_MASK) == ARM_TTE_TYPE_BLOCK) {
6715 if ((tte & ARM_TTE_BLOCK_SUPER) == ARM_TTE_BLOCK_SUPER) {
6716 ppn = (ppnum_t) atop(suptte_to_pa(tte) | (va & ARM_TT_L1_SUPER_OFFMASK));
6717 } else {
6718 ppn = (ppnum_t) atop(sectte_to_pa(tte) | (va & ARM_TT_L1_BLOCK_OFFMASK));
6719 }
6720 } else {
6721 ppn = 0;
6722 }
6723 #else
6724 tt_entry_t *ttp;
6725 tt_entry_t tte;
6726 ppnum_t ppn = 0;
6727
6728 __unused const pt_attr_t * const pt_attr = pmap_get_pt_attr(pmap);
6729
6730 /* Level 0 currently unused */
6731
6732 /* Get first-level (1GB) entry */
6733 ttp = pmap_tt1e(pmap, va);
6734 tte = *ttp;
6735 if ((tte & (ARM_TTE_TYPE_MASK | ARM_TTE_VALID)) != (ARM_TTE_TYPE_TABLE | ARM_TTE_VALID)) {
6736 return ppn;
6737 }
6738
6739 tte = ((tt_entry_t*) phystokv(tte & ARM_TTE_TABLE_MASK))[tt2_index(pmap, pt_attr, va)];
6740
6741 if ((tte & ARM_TTE_VALID) != (ARM_TTE_VALID)) {
6742 return ppn;
6743 }
6744
6745 if ((tte & ARM_TTE_TYPE_MASK) == ARM_TTE_TYPE_BLOCK) {
6746 ppn = (ppnum_t) atop((tte & ARM_TTE_BLOCK_L2_MASK) | (va & ARM_TT_L2_OFFMASK));
6747 return ppn;
6748 }
6749 tte = ((tt_entry_t*) phystokv(tte & ARM_TTE_TABLE_MASK))[tt3_index(pmap, pt_attr, va)];
6750 ppn = (ppnum_t) atop((tte & ARM_PTE_MASK) | (va & ARM_TT_L3_OFFMASK));
6751 #endif
6752
6753 return ppn;
6754 }
6755
6756 MARK_AS_PMAP_TEXT static vm_offset_t
6757 pmap_extract_internal(
6758 pmap_t pmap,
6759 vm_map_address_t va)
6760 {
6761 pmap_paddr_t pa = 0;
6762 ppnum_t ppn = 0;
6763
6764 if (pmap == NULL) {
6765 return 0;
6766 }
6767
6768 VALIDATE_PMAP(pmap);
6769
6770 PMAP_LOCK(pmap);
6771
6772 ppn = pmap_vtophys(pmap, va);
6773
6774 if (ppn != 0) {
6775 pa = ptoa(ppn) | ((va) & PAGE_MASK);
6776 }
6777
6778 PMAP_UNLOCK(pmap);
6779
6780 return pa;
6781 }
6782
6783 /*
6784 * Routine: pmap_extract
6785 * Function:
6786 * Extract the physical page address associated
6787 * with the given map/virtual_address pair.
6788 *
6789 */
6790 vm_offset_t
6791 pmap_extract(
6792 pmap_t pmap,
6793 vm_map_address_t va)
6794 {
6795 pmap_paddr_t pa = 0;
6796
6797 if (pmap == kernel_pmap) {
6798 pa = mmu_kvtop(va);
6799 } else if (pmap == vm_map_pmap(current_thread()->map)) {
6800 pa = mmu_uvtop(va);
6801 }
6802
6803 if (pa) {
6804 return pa;
6805 }
6806
6807 return pmap_extract_internal(pmap, va);
6808 }
6809
6810 /*
6811 * pmap_init_pte_page - Initialize a page table page.
6812 */
6813 void
6814 pmap_init_pte_page(
6815 pmap_t pmap,
6816 pt_entry_t *pte_p,
6817 vm_offset_t va,
6818 unsigned int ttlevel,
6819 boolean_t alloc_ptd,
6820 boolean_t clear)
6821 {
6822 pt_desc_t *ptdp = NULL;
6823 vm_offset_t *pvh;
6824
6825 pvh = (vm_offset_t *)(pai_to_pvh(pa_index(ml_static_vtop((vm_offset_t)pte_p))));
6826
6827 if (pvh_test_type(pvh, PVH_TYPE_NULL)) {
6828 if (alloc_ptd) {
6829 /*
6830 * This path should only be invoked from arm_vm_init. If we are emulating 16KB pages
6831 * on 4KB hardware, we may already have allocated a page table descriptor for a
6832 * bootstrap request, so we check for an existing PTD here.
6833 */
6834 ptdp = ptd_alloc(pmap, true);
6835 pvh_update_head_unlocked(pvh, ptdp, PVH_TYPE_PTDP);
6836 } else {
6837 panic("pmap_init_pte_page(): pte_p %p", pte_p);
6838 }
6839 } else if (pvh_test_type(pvh, PVH_TYPE_PTDP)) {
6840 ptdp = (pt_desc_t*)(pvh_list(pvh));
6841 } else {
6842 panic("pmap_init_pte_page(): invalid PVH type for pte_p %p", pte_p);
6843 }
6844
6845 if (clear) {
6846 bzero(pte_p, ARM_PGBYTES);
6847 // below barrier ensures the page zeroing is visible to PTW before
6848 // it is linked to the PTE of previous level
6849 __builtin_arm_dmb(DMB_ISHST);
6850 }
6851 ptd_init(ptdp, pmap, va, ttlevel, pte_p);
6852 }
6853
6854 /*
6855 * Routine: pmap_expand
6856 *
6857 * Expands a pmap to be able to map the specified virtual address.
6858 *
6859 * Allocates new memory for the default (COARSE) translation table
6860 * entry, initializes all the pte entries to ARM_PTE_TYPE_FAULT and
6861 * also allocates space for the corresponding pv entries.
6862 *
6863 * Nothing should be locked.
6864 */
6865 static kern_return_t
6866 pmap_expand(
6867 pmap_t pmap,
6868 vm_map_address_t v,
6869 unsigned int options,
6870 unsigned int level)
6871 {
6872 __unused const pt_attr_t * const pt_attr = pmap_get_pt_attr(pmap);
6873
6874 #if (__ARM_VMSA__ == 7)
6875 vm_offset_t pa;
6876 tt_entry_t *tte_p;
6877 tt_entry_t *tt_p;
6878 unsigned int i;
6879
6880 #if DEVELOPMENT || DEBUG
6881 /*
6882 * We no longer support root level expansion; panic in case something
6883 * still attempts to trigger it.
6884 */
6885 i = tte_index(pmap, pt_attr, v);
6886
6887 if (i >= pmap->tte_index_max) {
6888 panic("%s: index out of range, index=%u, max=%u, "
6889 "pmap=%p, addr=%p, options=%u, level=%u",
6890 __func__, i, pmap->tte_index_max,
6891 pmap, (void *)v, options, level);
6892 }
6893 #endif /* DEVELOPMENT || DEBUG */
6894
6895 if (level == 1) {
6896 return KERN_SUCCESS;
6897 }
6898
6899 {
6900 tt_entry_t *tte_next_p;
6901
6902 PMAP_LOCK(pmap);
6903 pa = 0;
6904 if (pmap_pte(pmap, v) != PT_ENTRY_NULL) {
6905 PMAP_UNLOCK(pmap);
6906 return KERN_SUCCESS;
6907 }
6908 tte_p = &pmap->tte[ttenum(v & ~ARM_TT_L1_PT_OFFMASK)];
6909 for (i = 0, tte_next_p = tte_p; i < 4; i++) {
6910 if (tte_to_pa(*tte_next_p)) {
6911 pa = tte_to_pa(*tte_next_p);
6912 break;
6913 }
6914 tte_next_p++;
6915 }
6916 pa = pa & ~PAGE_MASK;
6917 if (pa) {
6918 tte_p = &pmap->tte[ttenum(v)];
6919 *tte_p = pa_to_tte(pa) | (((v >> ARM_TT_L1_SHIFT) & 0x3) << 10) | ARM_TTE_TYPE_TABLE;
6920 FLUSH_PTE(tte_p);
6921 PMAP_TRACE(3, PMAP_CODE(PMAP__TTE), VM_KERNEL_ADDRHIDE(pmap), VM_KERNEL_ADDRHIDE(v & ~ARM_TT_L1_OFFMASK),
6922 VM_KERNEL_ADDRHIDE((v & ~ARM_TT_L1_OFFMASK) + ARM_TT_L1_SIZE), *tte_p);
6923 PMAP_UNLOCK(pmap);
6924 return KERN_SUCCESS;
6925 }
6926 PMAP_UNLOCK(pmap);
6927 }
6928 v = v & ~ARM_TT_L1_PT_OFFMASK;
6929
6930
6931 while (pmap_pte(pmap, v) == PT_ENTRY_NULL) {
6932 /*
6933 * Allocate a VM page for the level 2 page table entries.
6934 */
6935 while (pmap_tt_allocate(pmap, &tt_p, PMAP_TT_L2_LEVEL, ((options & PMAP_TT_ALLOCATE_NOWAIT)? PMAP_PAGES_ALLOCATE_NOWAIT : 0)) != KERN_SUCCESS) {
6936 if (options & PMAP_OPTIONS_NOWAIT) {
6937 return KERN_RESOURCE_SHORTAGE;
6938 }
6939 VM_PAGE_WAIT();
6940 }
6941
6942 PMAP_LOCK(pmap);
6943 /*
6944 * See if someone else expanded us first
6945 */
6946 if (pmap_pte(pmap, v) == PT_ENTRY_NULL) {
6947 tt_entry_t *tte_next_p;
6948
6949 pmap_init_pte_page(pmap, (pt_entry_t *) tt_p, v, PMAP_TT_L2_LEVEL, FALSE, TRUE);
6950 pa = kvtophys((vm_offset_t)tt_p);
6951 tte_p = &pmap->tte[ttenum(v)];
6952 for (i = 0, tte_next_p = tte_p; i < 4; i++) {
6953 *tte_next_p = pa_to_tte(pa) | ARM_TTE_TYPE_TABLE;
6954 PMAP_TRACE(3, PMAP_CODE(PMAP__TTE), VM_KERNEL_ADDRHIDE(pmap), VM_KERNEL_ADDRHIDE((v & ~ARM_TT_L1_PT_OFFMASK) + (i * ARM_TT_L1_SIZE)),
6955 VM_KERNEL_ADDRHIDE((v & ~ARM_TT_L1_PT_OFFMASK) + ((i + 1) * ARM_TT_L1_SIZE)), *tte_p);
6956 tte_next_p++;
6957 pa = pa + 0x400;
6958 }
6959 FLUSH_PTE_RANGE(tte_p, tte_p + 4);
6960
6961 pa = 0x0ULL;
6962 tt_p = (tt_entry_t *)NULL;
6963 }
6964 PMAP_UNLOCK(pmap);
6965 if (tt_p != (tt_entry_t *)NULL) {
6966 pmap_tt_deallocate(pmap, tt_p, PMAP_TT_L2_LEVEL);
6967 tt_p = (tt_entry_t *)NULL;
6968 }
6969 }
6970 return KERN_SUCCESS;
6971 #else
6972 pmap_paddr_t pa;
6973 unsigned int ttlevel = pt_attr_root_level(pt_attr);
6974 tt_entry_t *tte_p;
6975 tt_entry_t *tt_p;
6976
6977 pa = 0x0ULL;
6978 tt_p = (tt_entry_t *)NULL;
6979
6980 for (; ttlevel < level; ttlevel++) {
6981 PMAP_LOCK(pmap);
6982
6983 if (pmap_ttne(pmap, ttlevel + 1, v) == PT_ENTRY_NULL) {
6984 PMAP_UNLOCK(pmap);
6985 while (pmap_tt_allocate(pmap, &tt_p, ttlevel + 1, ((options & PMAP_TT_ALLOCATE_NOWAIT)? PMAP_PAGES_ALLOCATE_NOWAIT : 0)) != KERN_SUCCESS) {
6986 if (options & PMAP_OPTIONS_NOWAIT) {
6987 return KERN_RESOURCE_SHORTAGE;
6988 }
6989 VM_PAGE_WAIT();
6990 }
6991 PMAP_LOCK(pmap);
6992 if ((pmap_ttne(pmap, ttlevel + 1, v) == PT_ENTRY_NULL)) {
6993 pmap_init_pte_page(pmap, (pt_entry_t *) tt_p, v, ttlevel + 1, FALSE, TRUE);
6994 pa = kvtophys((vm_offset_t)tt_p);
6995 tte_p = pmap_ttne(pmap, ttlevel, v);
6996 *tte_p = (pa & ARM_TTE_TABLE_MASK) | ARM_TTE_TYPE_TABLE | ARM_TTE_VALID;
6997 PMAP_TRACE(ttlevel + 1, PMAP_CODE(PMAP__TTE), VM_KERNEL_ADDRHIDE(pmap), VM_KERNEL_ADDRHIDE(v & ~pt_attr_ln_offmask(pt_attr, ttlevel)),
6998 VM_KERNEL_ADDRHIDE((v & ~pt_attr_ln_offmask(pt_attr, ttlevel)) + pt_attr_ln_size(pt_attr, ttlevel)), *tte_p);
6999 pa = 0x0ULL;
7000 tt_p = (tt_entry_t *)NULL;
7001 }
7002 }
7003
7004 PMAP_UNLOCK(pmap);
7005
7006 if (tt_p != (tt_entry_t *)NULL) {
7007 pmap_tt_deallocate(pmap, tt_p, ttlevel + 1);
7008 tt_p = (tt_entry_t *)NULL;
7009 }
7010 }
7011
7012 return KERN_SUCCESS;
7013 #endif
7014 }
7015
7016 /*
7017 * Routine: pmap_collect
7018 * Function:
7019 * Garbage collects the physical map system for
7020 * pages which are no longer used.
7021 * Success need not be guaranteed -- that is, there
7022 * may well be pages which are not referenced, but
7023 * others may be collected.
7024 */
7025 void
7026 pmap_collect(pmap_t pmap)
7027 {
7028 if (pmap == PMAP_NULL) {
7029 return;
7030 }
7031
7032 #if 0
7033 PMAP_LOCK(pmap);
7034 if ((pmap->nested == FALSE) && (pmap != kernel_pmap)) {
7035 /* TODO: Scan for vm page assigned to top level page tables with no reference */
7036 }
7037 PMAP_UNLOCK(pmap);
7038 #endif
7039
7040 return;
7041 }
7042
7043 /*
7044 * Routine: pmap_gc
7045 * Function:
7046 * Pmap garbage collection
7047 * Called by the pageout daemon when pages are scarce.
7048 *
7049 */
7050 void
7051 pmap_gc(
7052 void)
7053 {
7054 pmap_t pmap, pmap_next;
7055 boolean_t gc_wait;
7056
7057 if (pmap_gc_allowed &&
7058 (pmap_gc_allowed_by_time_throttle ||
7059 pmap_gc_forced)) {
7060 pmap_gc_forced = FALSE;
7061 pmap_gc_allowed_by_time_throttle = FALSE;
7062 pmap_simple_lock(&pmaps_lock);
7063 pmap = CAST_DOWN_EXPLICIT(pmap_t, queue_first(&map_pmap_list));
7064 while (!queue_end(&map_pmap_list, (queue_entry_t)pmap)) {
7065 if (!(pmap->gc_status & PMAP_GC_INFLIGHT)) {
7066 pmap->gc_status |= PMAP_GC_INFLIGHT;
7067 }
7068 pmap_simple_unlock(&pmaps_lock);
7069
7070 pmap_collect(pmap);
7071
7072 pmap_simple_lock(&pmaps_lock);
7073 gc_wait = (pmap->gc_status & PMAP_GC_WAIT);
7074 pmap->gc_status &= ~(PMAP_GC_INFLIGHT | PMAP_GC_WAIT);
7075 pmap_next = CAST_DOWN_EXPLICIT(pmap_t, queue_next(&pmap->pmaps));
7076 if (gc_wait) {
7077 if (!queue_end(&map_pmap_list, (queue_entry_t)pmap_next)) {
7078 pmap_next->gc_status |= PMAP_GC_INFLIGHT;
7079 }
7080 pmap_simple_unlock(&pmaps_lock);
7081 thread_wakeup((event_t) &pmap->gc_status);
7082 pmap_simple_lock(&pmaps_lock);
7083 }
7084 pmap = pmap_next;
7085 }
7086 pmap_simple_unlock(&pmaps_lock);
7087 }
7088 }
7089
7090 /*
7091 * Called by the VM to reclaim pages that we can reclaim quickly and cheaply.
7092 */
7093 uint64_t
7094 pmap_release_pages_fast(void)
7095 {
7096 return 0;
7097 }
7098
7099 /*
7100 * By default, don't attempt pmap GC more frequently
7101 * than once / 1 minutes.
7102 */
7103
7104 void
7105 compute_pmap_gc_throttle(
7106 void *arg __unused)
7107 {
7108 pmap_gc_allowed_by_time_throttle = TRUE;
7109 }
7110
7111 /*
7112 * pmap_attribute_cache_sync(vm_offset_t pa)
7113 *
7114 * Invalidates all of the instruction cache on a physical page and
7115 * pushes any dirty data from the data cache for the same physical page
7116 */
7117
7118 kern_return_t
7119 pmap_attribute_cache_sync(
7120 ppnum_t pp,
7121 vm_size_t size,
7122 __unused vm_machine_attribute_t attribute,
7123 __unused vm_machine_attribute_val_t * value)
7124 {
7125 if (size > PAGE_SIZE) {
7126 panic("pmap_attribute_cache_sync size: 0x%llx\n", (uint64_t)size);
7127 } else {
7128 cache_sync_page(pp);
7129 }
7130
7131 return KERN_SUCCESS;
7132 }
7133
7134 /*
7135 * pmap_sync_page_data_phys(ppnum_t pp)
7136 *
7137 * Invalidates all of the instruction cache on a physical page and
7138 * pushes any dirty data from the data cache for the same physical page
7139 */
7140 void
7141 pmap_sync_page_data_phys(
7142 ppnum_t pp)
7143 {
7144 cache_sync_page(pp);
7145 }
7146
7147 /*
7148 * pmap_sync_page_attributes_phys(ppnum_t pp)
7149 *
7150 * Write back and invalidate all cachelines on a physical page.
7151 */
7152 void
7153 pmap_sync_page_attributes_phys(
7154 ppnum_t pp)
7155 {
7156 flush_dcache((vm_offset_t) (pp << PAGE_SHIFT), PAGE_SIZE, TRUE);
7157 }
7158
7159 #if CONFIG_COREDUMP
7160 /* temporary workaround */
7161 boolean_t
7162 coredumpok(
7163 vm_map_t map,
7164 vm_offset_t va)
7165 {
7166 pt_entry_t *pte_p;
7167 pt_entry_t spte;
7168
7169 pte_p = pmap_pte(map->pmap, va);
7170 if (0 == pte_p) {
7171 return FALSE;
7172 }
7173 spte = *pte_p;
7174 return (spte & ARM_PTE_ATTRINDXMASK) == ARM_PTE_ATTRINDX(CACHE_ATTRINDX_DEFAULT);
7175 }
7176 #endif
7177
7178 void
7179 fillPage(
7180 ppnum_t pn,
7181 unsigned int fill)
7182 {
7183 unsigned int *addr;
7184 int count;
7185
7186 addr = (unsigned int *) phystokv(ptoa(pn));
7187 count = PAGE_SIZE / sizeof(unsigned int);
7188 while (count--) {
7189 *addr++ = fill;
7190 }
7191 }
7192
7193 extern void mapping_set_mod(ppnum_t pn);
7194
7195 void
7196 mapping_set_mod(
7197 ppnum_t pn)
7198 {
7199 pmap_set_modify(pn);
7200 }
7201
7202 extern void mapping_set_ref(ppnum_t pn);
7203
7204 void
7205 mapping_set_ref(
7206 ppnum_t pn)
7207 {
7208 pmap_set_reference(pn);
7209 }
7210
7211 /*
7212 * Clear specified attribute bits.
7213 *
7214 * Try to force an arm_fast_fault() for all mappings of
7215 * the page - to force attributes to be set again at fault time.
7216 * If the forcing succeeds, clear the cached bits at the head.
7217 * Otherwise, something must have been wired, so leave the cached
7218 * attributes alone.
7219 */
7220 MARK_AS_PMAP_TEXT static void
7221 phys_attribute_clear_internal(
7222 ppnum_t pn,
7223 unsigned int bits,
7224 int options,
7225 void *arg)
7226 {
7227 pmap_paddr_t pa = ptoa(pn);
7228 vm_prot_t allow_mode = VM_PROT_ALL;
7229
7230
7231 if ((bits & PP_ATTR_MODIFIED) &&
7232 (options & PMAP_OPTIONS_NOFLUSH) &&
7233 (arg == NULL)) {
7234 panic("phys_attribute_clear(0x%x,0x%x,0x%x,%p): "
7235 "should not clear 'modified' without flushing TLBs\n",
7236 pn, bits, options, arg);
7237 }
7238
7239 assert(pn != vm_page_fictitious_addr);
7240
7241 if (options & PMAP_OPTIONS_CLEAR_WRITE) {
7242 assert(bits == PP_ATTR_MODIFIED);
7243
7244 pmap_page_protect_options_internal(pn, (VM_PROT_ALL & ~VM_PROT_WRITE), 0);
7245 /*
7246 * We short circuit this case; it should not need to
7247 * invoke arm_force_fast_fault, so just clear the modified bit.
7248 * pmap_page_protect has taken care of resetting
7249 * the state so that we'll see the next write as a fault to
7250 * the VM (i.e. we don't want a fast fault).
7251 */
7252 pa_clear_bits(pa, bits);
7253 return;
7254 }
7255 if (bits & PP_ATTR_REFERENCED) {
7256 allow_mode &= ~(VM_PROT_READ | VM_PROT_EXECUTE);
7257 }
7258 if (bits & PP_ATTR_MODIFIED) {
7259 allow_mode &= ~VM_PROT_WRITE;
7260 }
7261
7262 if (bits == PP_ATTR_NOENCRYPT) {
7263 /*
7264 * We short circuit this case; it should not need to
7265 * invoke arm_force_fast_fault, so just clear and
7266 * return. On ARM, this bit is just a debugging aid.
7267 */
7268 pa_clear_bits(pa, bits);
7269 return;
7270 }
7271
7272 if (arm_force_fast_fault_internal(pn, allow_mode, options)) {
7273 pa_clear_bits(pa, bits);
7274 }
7275 return;
7276 }
7277
7278 static void
7279 phys_attribute_clear(
7280 ppnum_t pn,
7281 unsigned int bits,
7282 int options,
7283 void *arg)
7284 {
7285 /*
7286 * Do we really want this tracepoint? It will be extremely chatty.
7287 * Also, should we have a corresponding trace point for the set path?
7288 */
7289 PMAP_TRACE(3, PMAP_CODE(PMAP__ATTRIBUTE_CLEAR) | DBG_FUNC_START, pn, bits);
7290
7291 phys_attribute_clear_internal(pn, bits, options, arg);
7292
7293 PMAP_TRACE(3, PMAP_CODE(PMAP__ATTRIBUTE_CLEAR) | DBG_FUNC_END);
7294 }
7295
7296 /*
7297 * Set specified attribute bits.
7298 *
7299 * Set cached value in the pv head because we have
7300 * no per-mapping hardware support for referenced and
7301 * modify bits.
7302 */
7303 MARK_AS_PMAP_TEXT static void
7304 phys_attribute_set_internal(
7305 ppnum_t pn,
7306 unsigned int bits)
7307 {
7308 pmap_paddr_t pa = ptoa(pn);
7309 assert(pn != vm_page_fictitious_addr);
7310
7311
7312 pa_set_bits(pa, bits);
7313
7314 return;
7315 }
7316
7317 static void
7318 phys_attribute_set(
7319 ppnum_t pn,
7320 unsigned int bits)
7321 {
7322 phys_attribute_set_internal(pn, bits);
7323 }
7324
7325
7326 /*
7327 * Check specified attribute bits.
7328 *
7329 * use the software cached bits (since no hw support).
7330 */
7331 static boolean_t
7332 phys_attribute_test(
7333 ppnum_t pn,
7334 unsigned int bits)
7335 {
7336 pmap_paddr_t pa = ptoa(pn);
7337 assert(pn != vm_page_fictitious_addr);
7338 return pa_test_bits(pa, bits);
7339 }
7340
7341
7342 /*
7343 * Set the modify/reference bits on the specified physical page.
7344 */
7345 void
7346 pmap_set_modify(ppnum_t pn)
7347 {
7348 phys_attribute_set(pn, PP_ATTR_MODIFIED);
7349 }
7350
7351
7352 /*
7353 * Clear the modify bits on the specified physical page.
7354 */
7355 void
7356 pmap_clear_modify(
7357 ppnum_t pn)
7358 {
7359 phys_attribute_clear(pn, PP_ATTR_MODIFIED, 0, NULL);
7360 }
7361
7362
7363 /*
7364 * pmap_is_modified:
7365 *
7366 * Return whether or not the specified physical page is modified
7367 * by any physical maps.
7368 */
7369 boolean_t
7370 pmap_is_modified(
7371 ppnum_t pn)
7372 {
7373 return phys_attribute_test(pn, PP_ATTR_MODIFIED);
7374 }
7375
7376
7377 /*
7378 * Set the reference bit on the specified physical page.
7379 */
7380 static void
7381 pmap_set_reference(
7382 ppnum_t pn)
7383 {
7384 phys_attribute_set(pn, PP_ATTR_REFERENCED);
7385 }
7386
7387 /*
7388 * Clear the reference bits on the specified physical page.
7389 */
7390 void
7391 pmap_clear_reference(
7392 ppnum_t pn)
7393 {
7394 phys_attribute_clear(pn, PP_ATTR_REFERENCED, 0, NULL);
7395 }
7396
7397
7398 /*
7399 * pmap_is_referenced:
7400 *
7401 * Return whether or not the specified physical page is referenced
7402 * by any physical maps.
7403 */
7404 boolean_t
7405 pmap_is_referenced(
7406 ppnum_t pn)
7407 {
7408 return phys_attribute_test(pn, PP_ATTR_REFERENCED);
7409 }
7410
7411 /*
7412 * pmap_get_refmod(phys)
7413 * returns the referenced and modified bits of the specified
7414 * physical page.
7415 */
7416 unsigned int
7417 pmap_get_refmod(
7418 ppnum_t pn)
7419 {
7420 return ((phys_attribute_test(pn, PP_ATTR_MODIFIED)) ? VM_MEM_MODIFIED : 0)
7421 | ((phys_attribute_test(pn, PP_ATTR_REFERENCED)) ? VM_MEM_REFERENCED : 0);
7422 }
7423
7424 /*
7425 * pmap_clear_refmod(phys, mask)
7426 * clears the referenced and modified bits as specified by the mask
7427 * of the specified physical page.
7428 */
7429 void
7430 pmap_clear_refmod_options(
7431 ppnum_t pn,
7432 unsigned int mask,
7433 unsigned int options,
7434 void *arg)
7435 {
7436 unsigned int bits;
7437
7438 bits = ((mask & VM_MEM_MODIFIED) ? PP_ATTR_MODIFIED : 0) |
7439 ((mask & VM_MEM_REFERENCED) ? PP_ATTR_REFERENCED : 0);
7440 phys_attribute_clear(pn, bits, options, arg);
7441 }
7442
7443 void
7444 pmap_clear_refmod(
7445 ppnum_t pn,
7446 unsigned int mask)
7447 {
7448 pmap_clear_refmod_options(pn, mask, 0, NULL);
7449 }
7450
7451 unsigned int
7452 pmap_disconnect_options(
7453 ppnum_t pn,
7454 unsigned int options,
7455 void *arg)
7456 {
7457 if ((options & PMAP_OPTIONS_COMPRESSOR_IFF_MODIFIED)) {
7458 /*
7459 * On ARM, the "modified" bit is managed by software, so
7460 * we know up-front if the physical page is "modified",
7461 * without having to scan all the PTEs pointing to it.
7462 * The caller should have made the VM page "busy" so noone
7463 * should be able to establish any new mapping and "modify"
7464 * the page behind us.
7465 */
7466 if (pmap_is_modified(pn)) {
7467 /*
7468 * The page has been modified and will be sent to
7469 * the VM compressor.
7470 */
7471 options |= PMAP_OPTIONS_COMPRESSOR;
7472 } else {
7473 /*
7474 * The page hasn't been modified and will be freed
7475 * instead of compressed.
7476 */
7477 }
7478 }
7479
7480 /* disconnect the page */
7481 pmap_page_protect_options(pn, 0, options, arg);
7482
7483 /* return ref/chg status */
7484 return pmap_get_refmod(pn);
7485 }
7486
7487 /*
7488 * Routine:
7489 * pmap_disconnect
7490 *
7491 * Function:
7492 * Disconnect all mappings for this page and return reference and change status
7493 * in generic format.
7494 *
7495 */
7496 unsigned int
7497 pmap_disconnect(
7498 ppnum_t pn)
7499 {
7500 pmap_page_protect(pn, 0); /* disconnect the page */
7501 return pmap_get_refmod(pn); /* return ref/chg status */
7502 }
7503
7504 boolean_t
7505 pmap_has_managed_page(ppnum_t first, ppnum_t last)
7506 {
7507 if (ptoa(first) >= vm_last_phys) {
7508 return FALSE;
7509 }
7510 if (ptoa(last) < vm_first_phys) {
7511 return FALSE;
7512 }
7513
7514 return TRUE;
7515 }
7516
7517 /*
7518 * The state maintained by the noencrypt functions is used as a
7519 * debugging aid on ARM. This incurs some overhead on the part
7520 * of the caller. A special case check in phys_attribute_clear
7521 * (the most expensive path) currently minimizes this overhead,
7522 * but stubbing these functions out on RELEASE kernels yields
7523 * further wins.
7524 */
7525 boolean_t
7526 pmap_is_noencrypt(
7527 ppnum_t pn)
7528 {
7529 #if DEVELOPMENT || DEBUG
7530 boolean_t result = FALSE;
7531
7532 if (!pa_valid(ptoa(pn))) {
7533 return FALSE;
7534 }
7535
7536 result = (phys_attribute_test(pn, PP_ATTR_NOENCRYPT));
7537
7538 return result;
7539 #else
7540 #pragma unused(pn)
7541 return FALSE;
7542 #endif
7543 }
7544
7545 void
7546 pmap_set_noencrypt(
7547 ppnum_t pn)
7548 {
7549 #if DEVELOPMENT || DEBUG
7550 if (!pa_valid(ptoa(pn))) {
7551 return;
7552 }
7553
7554 phys_attribute_set(pn, PP_ATTR_NOENCRYPT);
7555 #else
7556 #pragma unused(pn)
7557 #endif
7558 }
7559
7560 void
7561 pmap_clear_noencrypt(
7562 ppnum_t pn)
7563 {
7564 #if DEVELOPMENT || DEBUG
7565 if (!pa_valid(ptoa(pn))) {
7566 return;
7567 }
7568
7569 phys_attribute_clear(pn, PP_ATTR_NOENCRYPT, 0, NULL);
7570 #else
7571 #pragma unused(pn)
7572 #endif
7573 }
7574
7575
7576 void
7577 pmap_lock_phys_page(ppnum_t pn)
7578 {
7579 int pai;
7580 pmap_paddr_t phys = ptoa(pn);
7581
7582 if (pa_valid(phys)) {
7583 pai = (int)pa_index(phys);
7584 LOCK_PVH(pai);
7585 } else
7586 { simple_lock(&phys_backup_lock, LCK_GRP_NULL);}
7587 }
7588
7589
7590 void
7591 pmap_unlock_phys_page(ppnum_t pn)
7592 {
7593 int pai;
7594 pmap_paddr_t phys = ptoa(pn);
7595
7596 if (pa_valid(phys)) {
7597 pai = (int)pa_index(phys);
7598 UNLOCK_PVH(pai);
7599 } else
7600 { simple_unlock(&phys_backup_lock);}
7601 }
7602
7603 MARK_AS_PMAP_TEXT static void
7604 pmap_switch_user_ttb_internal(
7605 pmap_t pmap)
7606 {
7607 VALIDATE_PMAP(pmap);
7608 pmap_cpu_data_t *cpu_data_ptr;
7609 cpu_data_ptr = pmap_get_cpu_data();
7610
7611 #if (__ARM_VMSA__ == 7)
7612 cpu_data_ptr->cpu_user_pmap = pmap;
7613 cpu_data_ptr->cpu_user_pmap_stamp = pmap->stamp;
7614
7615 #if MACH_ASSERT && __ARM_USER_PROTECT__
7616 {
7617 unsigned int ttbr0_val, ttbr1_val;
7618 __asm__ volatile ("mrc p15,0,%0,c2,c0,0\n" : "=r"(ttbr0_val));
7619 __asm__ volatile ("mrc p15,0,%0,c2,c0,1\n" : "=r"(ttbr1_val));
7620 if (ttbr0_val != ttbr1_val) {
7621 panic("Misaligned ttbr0 %08X\n", ttbr0_val);
7622 }
7623 }
7624 #endif
7625 if (pmap->tte_index_max == NTTES) {
7626 /* Setting TTBCR.N for TTBR0 TTBR1 boundary at 0x40000000 */
7627 __asm__ volatile ("mcr p15,0,%0,c2,c0,2" : : "r"(2));
7628 __builtin_arm_isb(ISB_SY);
7629 #if !__ARM_USER_PROTECT__
7630 set_mmu_ttb(pmap->ttep);
7631 #endif
7632 } else {
7633 #if !__ARM_USER_PROTECT__
7634 set_mmu_ttb(pmap->ttep);
7635 #endif
7636 /* Setting TTBCR.N for TTBR0 TTBR1 boundary at 0x80000000 */
7637 __asm__ volatile ("mcr p15,0,%0,c2,c0,2" : : "r"(1));
7638 __builtin_arm_isb(ISB_SY);
7639 #if MACH_ASSERT && __ARM_USER_PROTECT__
7640 if (pmap->ttep & 0x1000) {
7641 panic("Misaligned ttbr0 %08X\n", pmap->ttep);
7642 }
7643 #endif
7644 }
7645
7646 #if !__ARM_USER_PROTECT__
7647 set_context_id(pmap->hw_asid);
7648 #endif
7649
7650 #else /* (__ARM_VMSA__ == 7) */
7651
7652 if (pmap != kernel_pmap) {
7653 cpu_data_ptr->cpu_nested_pmap = pmap->nested_pmap;
7654 }
7655
7656 if (pmap == kernel_pmap) {
7657 pmap_clear_user_ttb_internal();
7658 } else {
7659 set_mmu_ttb((pmap->ttep & TTBR_BADDR_MASK) | (((uint64_t)pmap->hw_asid) << TTBR_ASID_SHIFT));
7660 }
7661
7662 #if defined(HAS_APPLE_PAC) && (__APCFG_SUPPORTED__ || __APSTS_SUPPORTED__)
7663 if (!(BootArgs->bootFlags & kBootFlagsDisableJOP) && !(BootArgs->bootFlags & kBootFlagsDisableUserJOP)) {
7664 uint64_t sctlr = __builtin_arm_rsr64("SCTLR_EL1");
7665 bool jop_enabled = sctlr & SCTLR_JOP_KEYS_ENABLED;
7666 if (!jop_enabled && !pmap->disable_jop) {
7667 // turn on JOP
7668 sctlr |= SCTLR_JOP_KEYS_ENABLED;
7669 __builtin_arm_wsr64("SCTLR_EL1", sctlr);
7670 // no ISB necessary because this won't take effect until eret returns to EL0
7671 } else if (jop_enabled && pmap->disable_jop) {
7672 // turn off JOP
7673 sctlr &= ~SCTLR_JOP_KEYS_ENABLED;
7674 __builtin_arm_wsr64("SCTLR_EL1", sctlr);
7675 }
7676 }
7677 #endif /* HAS_APPLE_PAC && (__APCFG_SUPPORTED__ || __APSTS_SUPPORTED__) */
7678 #endif /* (__ARM_VMSA__ == 7) */
7679 }
7680
7681 void
7682 pmap_switch_user_ttb(
7683 pmap_t pmap)
7684 {
7685 PMAP_TRACE(1, PMAP_CODE(PMAP__SWITCH_USER_TTB) | DBG_FUNC_START, VM_KERNEL_ADDRHIDE(pmap), PMAP_VASID(pmap), pmap->hw_asid);
7686 pmap_switch_user_ttb_internal(pmap);
7687 PMAP_TRACE(1, PMAP_CODE(PMAP__SWITCH_USER_TTB) | DBG_FUNC_END);
7688 }
7689
7690 MARK_AS_PMAP_TEXT static void
7691 pmap_clear_user_ttb_internal(void)
7692 {
7693 #if (__ARM_VMSA__ > 7)
7694 set_mmu_ttb(invalid_ttep & TTBR_BADDR_MASK);
7695 #else
7696 set_mmu_ttb(kernel_pmap->ttep);
7697 #endif
7698 }
7699
7700 void
7701 pmap_clear_user_ttb(void)
7702 {
7703 pmap_clear_user_ttb_internal();
7704 }
7705
7706 /*
7707 * Routine: arm_force_fast_fault
7708 *
7709 * Function:
7710 * Force all mappings for this page to fault according
7711 * to the access modes allowed, so we can gather ref/modify
7712 * bits again.
7713 */
7714 MARK_AS_PMAP_TEXT static boolean_t
7715 arm_force_fast_fault_internal(
7716 ppnum_t ppnum,
7717 vm_prot_t allow_mode,
7718 int options)
7719 {
7720 pmap_paddr_t phys = ptoa(ppnum);
7721 pv_entry_t *pve_p;
7722 pt_entry_t *pte_p;
7723 int pai;
7724 boolean_t result;
7725 pv_entry_t **pv_h;
7726 boolean_t is_reusable, is_internal;
7727 boolean_t tlb_flush_needed = FALSE;
7728 boolean_t ref_fault;
7729 boolean_t mod_fault;
7730
7731 assert(ppnum != vm_page_fictitious_addr);
7732
7733 if (!pa_valid(phys)) {
7734 return FALSE; /* Not a managed page. */
7735 }
7736
7737 result = TRUE;
7738 ref_fault = FALSE;
7739 mod_fault = FALSE;
7740 pai = (int)pa_index(phys);
7741 LOCK_PVH(pai);
7742 pv_h = pai_to_pvh(pai);
7743
7744 pte_p = PT_ENTRY_NULL;
7745 pve_p = PV_ENTRY_NULL;
7746 if (pvh_test_type(pv_h, PVH_TYPE_PTEP)) {
7747 pte_p = pvh_ptep(pv_h);
7748 } else if (pvh_test_type(pv_h, PVH_TYPE_PVEP)) {
7749 pve_p = pvh_list(pv_h);
7750 }
7751
7752 is_reusable = IS_REUSABLE_PAGE(pai);
7753 is_internal = IS_INTERNAL_PAGE(pai);
7754
7755 while ((pve_p != PV_ENTRY_NULL) || (pte_p != PT_ENTRY_NULL)) {
7756 vm_map_address_t va;
7757 pt_entry_t spte;
7758 pt_entry_t tmplate;
7759 pmap_t pmap;
7760 boolean_t update_pte;
7761
7762 if (pve_p != PV_ENTRY_NULL) {
7763 pte_p = pve_get_ptep(pve_p);
7764 }
7765
7766 if (pte_p == PT_ENTRY_NULL) {
7767 panic("pte_p is NULL: pve_p=%p ppnum=0x%x\n", pve_p, ppnum);
7768 }
7769 #ifdef PVH_FLAG_IOMMU
7770 if ((vm_offset_t)pte_p & PVH_FLAG_IOMMU) {
7771 goto fff_skip_pve;
7772 }
7773 #endif
7774 if (*pte_p == ARM_PTE_EMPTY) {
7775 panic("pte is NULL: pte_p=%p ppnum=0x%x\n", pte_p, ppnum);
7776 }
7777 if (ARM_PTE_IS_COMPRESSED(*pte_p, pte_p)) {
7778 panic("pte is COMPRESSED: pte_p=%p ppnum=0x%x\n", pte_p, ppnum);
7779 }
7780
7781 pmap = ptep_get_pmap(pte_p);
7782 va = ptep_get_va(pte_p);
7783
7784 assert(va >= pmap->min && va < pmap->max);
7785
7786 if (pte_is_wired(*pte_p) || pmap == kernel_pmap) {
7787 result = FALSE;
7788 break;
7789 }
7790
7791 spte = *pte_p;
7792 tmplate = spte;
7793 update_pte = FALSE;
7794
7795 if ((allow_mode & VM_PROT_READ) != VM_PROT_READ) {
7796 /* read protection sets the pte to fault */
7797 tmplate = tmplate & ~ARM_PTE_AF;
7798 update_pte = TRUE;
7799 ref_fault = TRUE;
7800 }
7801 if ((allow_mode & VM_PROT_WRITE) != VM_PROT_WRITE) {
7802 /* take away write permission if set */
7803 if (pmap == kernel_pmap) {
7804 if ((tmplate & ARM_PTE_APMASK) == ARM_PTE_AP(AP_RWNA)) {
7805 tmplate = ((tmplate & ~ARM_PTE_APMASK) | ARM_PTE_AP(AP_RONA));
7806 pte_set_was_writeable(tmplate, true);
7807 update_pte = TRUE;
7808 mod_fault = TRUE;
7809 }
7810 } else {
7811 if ((tmplate & ARM_PTE_APMASK) == ARM_PTE_AP(AP_RWRW)) {
7812 tmplate = ((tmplate & ~ARM_PTE_APMASK) | pt_attr_leaf_ro(pmap_get_pt_attr(pmap)));
7813 pte_set_was_writeable(tmplate, true);
7814 update_pte = TRUE;
7815 mod_fault = TRUE;
7816 }
7817 }
7818 }
7819
7820
7821 if (update_pte) {
7822 if (*pte_p != ARM_PTE_TYPE_FAULT &&
7823 !ARM_PTE_IS_COMPRESSED(*pte_p, pte_p)) {
7824 WRITE_PTE_STRONG(pte_p, tmplate);
7825 pmap_get_pt_ops(pmap)->flush_tlb_region_async(va, PAGE_SIZE, pmap);
7826 tlb_flush_needed = TRUE;
7827 } else {
7828 WRITE_PTE(pte_p, tmplate);
7829 __builtin_arm_isb(ISB_SY);
7830 }
7831 }
7832
7833 /* update pmap stats and ledgers */
7834 if (IS_ALTACCT_PAGE(pai, pve_p)) {
7835 /*
7836 * We do not track "reusable" status for
7837 * "alternate accounting" mappings.
7838 */
7839 } else if ((options & PMAP_OPTIONS_CLEAR_REUSABLE) &&
7840 is_reusable &&
7841 is_internal &&
7842 pmap != kernel_pmap) {
7843 /* one less "reusable" */
7844 PMAP_STATS_ASSERTF(pmap->stats.reusable > 0, pmap, "stats.reusable %d", pmap->stats.reusable);
7845 OSAddAtomic(-1, &pmap->stats.reusable);
7846 /* one more "internal" */
7847 OSAddAtomic(+1, &pmap->stats.internal);
7848 PMAP_STATS_PEAK(pmap->stats.internal);
7849 PMAP_STATS_ASSERTF(pmap->stats.internal > 0, pmap, "stats.internal %d", pmap->stats.internal);
7850 pmap_ledger_credit(pmap, task_ledgers.internal, machine_ptob(1));
7851 assert(!IS_ALTACCT_PAGE(pai, pve_p));
7852 assert(IS_INTERNAL_PAGE(pai));
7853 pmap_ledger_credit(pmap, task_ledgers.phys_footprint, machine_ptob(1));
7854
7855 /*
7856 * Avoid the cost of another trap to handle the fast
7857 * fault when we next write to this page: let's just
7858 * handle that now since we already have all the
7859 * necessary information.
7860 */
7861 {
7862 arm_clear_fast_fault(ppnum, VM_PROT_WRITE);
7863 }
7864 } else if ((options & PMAP_OPTIONS_SET_REUSABLE) &&
7865 !is_reusable &&
7866 is_internal &&
7867 pmap != kernel_pmap) {
7868 /* one more "reusable" */
7869 OSAddAtomic(+1, &pmap->stats.reusable);
7870 PMAP_STATS_PEAK(pmap->stats.reusable);
7871 PMAP_STATS_ASSERTF(pmap->stats.reusable > 0, pmap, "stats.reusable %d", pmap->stats.reusable);
7872 /* one less "internal" */
7873 PMAP_STATS_ASSERTF(pmap->stats.internal > 0, pmap, "stats.internal %d", pmap->stats.internal);
7874 OSAddAtomic(-1, &pmap->stats.internal);
7875 pmap_ledger_debit(pmap, task_ledgers.internal, machine_ptob(1));
7876 assert(!IS_ALTACCT_PAGE(pai, pve_p));
7877 assert(IS_INTERNAL_PAGE(pai));
7878 pmap_ledger_debit(pmap, task_ledgers.phys_footprint, machine_ptob(1));
7879 }
7880
7881 #ifdef PVH_FLAG_IOMMU
7882 fff_skip_pve:
7883 #endif
7884 pte_p = PT_ENTRY_NULL;
7885 if (pve_p != PV_ENTRY_NULL) {
7886 pve_p = PVE_NEXT_PTR(pve_next(pve_p));
7887 }
7888 }
7889
7890 if (tlb_flush_needed) {
7891 sync_tlb_flush();
7892 }
7893
7894 /* update global "reusable" status for this page */
7895 if (is_internal) {
7896 if ((options & PMAP_OPTIONS_CLEAR_REUSABLE) &&
7897 is_reusable) {
7898 CLR_REUSABLE_PAGE(pai);
7899 } else if ((options & PMAP_OPTIONS_SET_REUSABLE) &&
7900 !is_reusable) {
7901 SET_REUSABLE_PAGE(pai);
7902 }
7903 }
7904
7905 if (mod_fault) {
7906 SET_MODFAULT_PAGE(pai);
7907 }
7908 if (ref_fault) {
7909 SET_REFFAULT_PAGE(pai);
7910 }
7911
7912 UNLOCK_PVH(pai);
7913 return result;
7914 }
7915
7916 boolean_t
7917 arm_force_fast_fault(
7918 ppnum_t ppnum,
7919 vm_prot_t allow_mode,
7920 int options,
7921 __unused void *arg)
7922 {
7923 pmap_paddr_t phys = ptoa(ppnum);
7924
7925 assert(ppnum != vm_page_fictitious_addr);
7926
7927 if (!pa_valid(phys)) {
7928 return FALSE; /* Not a managed page. */
7929 }
7930
7931 return arm_force_fast_fault_internal(ppnum, allow_mode, options);
7932 }
7933
7934 /*
7935 * Routine: arm_clear_fast_fault
7936 *
7937 * Function:
7938 * Clear pending force fault for all mappings for this page based on
7939 * the observed fault type, update ref/modify bits.
7940 */
7941 boolean_t
7942 arm_clear_fast_fault(
7943 ppnum_t ppnum,
7944 vm_prot_t fault_type)
7945 {
7946 pmap_paddr_t pa = ptoa(ppnum);
7947 pv_entry_t *pve_p;
7948 pt_entry_t *pte_p;
7949 int pai;
7950 boolean_t result;
7951 boolean_t tlb_flush_needed = FALSE;
7952 pv_entry_t **pv_h;
7953
7954 assert(ppnum != vm_page_fictitious_addr);
7955
7956 if (!pa_valid(pa)) {
7957 return FALSE; /* Not a managed page. */
7958 }
7959
7960 result = FALSE;
7961 pai = (int)pa_index(pa);
7962 ASSERT_PVH_LOCKED(pai);
7963 pv_h = pai_to_pvh(pai);
7964
7965 pte_p = PT_ENTRY_NULL;
7966 pve_p = PV_ENTRY_NULL;
7967 if (pvh_test_type(pv_h, PVH_TYPE_PTEP)) {
7968 pte_p = pvh_ptep(pv_h);
7969 } else if (pvh_test_type(pv_h, PVH_TYPE_PVEP)) {
7970 pve_p = pvh_list(pv_h);
7971 }
7972
7973 while ((pve_p != PV_ENTRY_NULL) || (pte_p != PT_ENTRY_NULL)) {
7974 vm_map_address_t va;
7975 pt_entry_t spte;
7976 pt_entry_t tmplate;
7977 pmap_t pmap;
7978
7979 if (pve_p != PV_ENTRY_NULL) {
7980 pte_p = pve_get_ptep(pve_p);
7981 }
7982
7983 if (pte_p == PT_ENTRY_NULL) {
7984 panic("pte_p is NULL: pve_p=%p ppnum=0x%x\n", pve_p, ppnum);
7985 }
7986 #ifdef PVH_FLAG_IOMMU
7987 if ((vm_offset_t)pte_p & PVH_FLAG_IOMMU) {
7988 goto cff_skip_pve;
7989 }
7990 #endif
7991 if (*pte_p == ARM_PTE_EMPTY) {
7992 panic("pte is NULL: pte_p=%p ppnum=0x%x\n", pte_p, ppnum);
7993 }
7994
7995 pmap = ptep_get_pmap(pte_p);
7996 va = ptep_get_va(pte_p);
7997
7998 assert(va >= pmap->min && va < pmap->max);
7999
8000 spte = *pte_p;
8001 tmplate = spte;
8002
8003 if ((fault_type & VM_PROT_WRITE) && (pte_was_writeable(spte))) {
8004 {
8005 if (pmap == kernel_pmap) {
8006 tmplate = ((spte & ~ARM_PTE_APMASK) | ARM_PTE_AP(AP_RWNA));
8007 } else {
8008 tmplate = ((spte & ~ARM_PTE_APMASK) | pt_attr_leaf_rw(pmap_get_pt_attr(pmap)));
8009 }
8010 }
8011
8012 tmplate |= ARM_PTE_AF;
8013
8014 pte_set_was_writeable(tmplate, false);
8015 pa_set_bits(pa, PP_ATTR_REFERENCED | PP_ATTR_MODIFIED);
8016 } else if ((fault_type & VM_PROT_READ) && ((spte & ARM_PTE_AF) != ARM_PTE_AF)) {
8017 tmplate = spte | ARM_PTE_AF;
8018
8019 {
8020 pa_set_bits(pa, PP_ATTR_REFERENCED);
8021 }
8022 }
8023
8024
8025 if (spte != tmplate) {
8026 if (spte != ARM_PTE_TYPE_FAULT) {
8027 WRITE_PTE_STRONG(pte_p, tmplate);
8028 pmap_get_pt_ops(pmap)->flush_tlb_region_async(va, PAGE_SIZE, pmap);
8029 tlb_flush_needed = TRUE;
8030 } else {
8031 WRITE_PTE(pte_p, tmplate);
8032 __builtin_arm_isb(ISB_SY);
8033 }
8034 result = TRUE;
8035 }
8036
8037 #ifdef PVH_FLAG_IOMMU
8038 cff_skip_pve:
8039 #endif
8040 pte_p = PT_ENTRY_NULL;
8041 if (pve_p != PV_ENTRY_NULL) {
8042 pve_p = PVE_NEXT_PTR(pve_next(pve_p));
8043 }
8044 }
8045 if (tlb_flush_needed) {
8046 sync_tlb_flush();
8047 }
8048 return result;
8049 }
8050
8051 /*
8052 * Determine if the fault was induced by software tracking of
8053 * modify/reference bits. If so, re-enable the mapping (and set
8054 * the appropriate bits).
8055 *
8056 * Returns KERN_SUCCESS if the fault was induced and was
8057 * successfully handled.
8058 *
8059 * Returns KERN_FAILURE if the fault was not induced and
8060 * the function was unable to deal with it.
8061 *
8062 * Returns KERN_PROTECTION_FAILURE if the pmap layer explictly
8063 * disallows this type of access.
8064 */
8065 MARK_AS_PMAP_TEXT static kern_return_t
8066 arm_fast_fault_internal(
8067 pmap_t pmap,
8068 vm_map_address_t va,
8069 vm_prot_t fault_type,
8070 __unused bool was_af_fault,
8071 __unused bool from_user)
8072 {
8073 kern_return_t result = KERN_FAILURE;
8074 pt_entry_t *ptep;
8075 pt_entry_t spte = ARM_PTE_TYPE_FAULT;
8076 int pai;
8077 pmap_paddr_t pa;
8078 VALIDATE_PMAP(pmap);
8079
8080 PMAP_LOCK(pmap);
8081
8082 /*
8083 * If the entry doesn't exist, is completely invalid, or is already
8084 * valid, we can't fix it here.
8085 */
8086
8087 ptep = pmap_pte(pmap, va);
8088 if (ptep != PT_ENTRY_NULL) {
8089 while (true) {
8090 spte = *ptep;
8091
8092 pa = pte_to_pa(spte);
8093
8094 if ((spte == ARM_PTE_TYPE_FAULT) ||
8095 ARM_PTE_IS_COMPRESSED(spte, ptep)) {
8096 PMAP_UNLOCK(pmap);
8097 return result;
8098 }
8099
8100 if (!pa_valid(pa)) {
8101 PMAP_UNLOCK(pmap);
8102 return result;
8103 }
8104 pai = (int)pa_index(pa);
8105 LOCK_PVH(pai);
8106 break;
8107 }
8108 } else {
8109 PMAP_UNLOCK(pmap);
8110 return result;
8111 }
8112
8113
8114 if ((IS_REFFAULT_PAGE(pai)) ||
8115 ((fault_type & VM_PROT_WRITE) && IS_MODFAULT_PAGE(pai))) {
8116 /*
8117 * An attempted access will always clear ref/mod fault state, as
8118 * appropriate for the fault type. arm_clear_fast_fault will
8119 * update the associated PTEs for the page as appropriate; if
8120 * any PTEs are updated, we redrive the access. If the mapping
8121 * does not actually allow for the attempted access, the
8122 * following fault will (hopefully) fail to update any PTEs, and
8123 * thus cause arm_fast_fault to decide that it failed to handle
8124 * the fault.
8125 */
8126 if (IS_REFFAULT_PAGE(pai)) {
8127 CLR_REFFAULT_PAGE(pai);
8128 }
8129 if ((fault_type & VM_PROT_WRITE) && IS_MODFAULT_PAGE(pai)) {
8130 CLR_MODFAULT_PAGE(pai);
8131 }
8132
8133 if (arm_clear_fast_fault((ppnum_t)atop(pa), fault_type)) {
8134 /*
8135 * Should this preserve KERN_PROTECTION_FAILURE? The
8136 * cost of not doing so is a another fault in a case
8137 * that should already result in an exception.
8138 */
8139 result = KERN_SUCCESS;
8140 }
8141 }
8142
8143 UNLOCK_PVH(pai);
8144 PMAP_UNLOCK(pmap);
8145 return result;
8146 }
8147
8148 kern_return_t
8149 arm_fast_fault(
8150 pmap_t pmap,
8151 vm_map_address_t va,
8152 vm_prot_t fault_type,
8153 bool was_af_fault,
8154 __unused bool from_user)
8155 {
8156 kern_return_t result = KERN_FAILURE;
8157
8158 if (va < pmap->min || va >= pmap->max) {
8159 return result;
8160 }
8161
8162 PMAP_TRACE(3, PMAP_CODE(PMAP__FAST_FAULT) | DBG_FUNC_START,
8163 VM_KERNEL_ADDRHIDE(pmap), VM_KERNEL_ADDRHIDE(va), fault_type,
8164 from_user);
8165
8166 #if (__ARM_VMSA__ == 7)
8167 if (pmap != kernel_pmap) {
8168 pmap_cpu_data_t *cpu_data_ptr = pmap_get_cpu_data();
8169 pmap_t cur_pmap;
8170 pmap_t cur_user_pmap;
8171
8172 cur_pmap = current_pmap();
8173 cur_user_pmap = cpu_data_ptr->cpu_user_pmap;
8174
8175 if ((cur_user_pmap == cur_pmap) && (cur_pmap == pmap)) {
8176 if (cpu_data_ptr->cpu_user_pmap_stamp != pmap->stamp) {
8177 pmap_set_pmap(pmap, current_thread());
8178 result = KERN_SUCCESS;
8179 goto done;
8180 }
8181 }
8182 }
8183 #endif
8184
8185 result = arm_fast_fault_internal(pmap, va, fault_type, was_af_fault, from_user);
8186
8187 #if (__ARM_VMSA__ == 7)
8188 done:
8189 #endif
8190
8191 PMAP_TRACE(3, PMAP_CODE(PMAP__FAST_FAULT) | DBG_FUNC_END, result);
8192
8193 return result;
8194 }
8195
8196 void
8197 pmap_copy_page(
8198 ppnum_t psrc,
8199 ppnum_t pdst)
8200 {
8201 bcopy_phys((addr64_t) (ptoa(psrc)),
8202 (addr64_t) (ptoa(pdst)),
8203 PAGE_SIZE);
8204 }
8205
8206
8207 /*
8208 * pmap_copy_page copies the specified (machine independent) pages.
8209 */
8210 void
8211 pmap_copy_part_page(
8212 ppnum_t psrc,
8213 vm_offset_t src_offset,
8214 ppnum_t pdst,
8215 vm_offset_t dst_offset,
8216 vm_size_t len)
8217 {
8218 bcopy_phys((addr64_t) (ptoa(psrc) + src_offset),
8219 (addr64_t) (ptoa(pdst) + dst_offset),
8220 len);
8221 }
8222
8223
8224 /*
8225 * pmap_zero_page zeros the specified (machine independent) page.
8226 */
8227 void
8228 pmap_zero_page(
8229 ppnum_t pn)
8230 {
8231 assert(pn != vm_page_fictitious_addr);
8232 bzero_phys((addr64_t) ptoa(pn), PAGE_SIZE);
8233 }
8234
8235 /*
8236 * pmap_zero_part_page
8237 * zeros the specified (machine independent) part of a page.
8238 */
8239 void
8240 pmap_zero_part_page(
8241 ppnum_t pn,
8242 vm_offset_t offset,
8243 vm_size_t len)
8244 {
8245 assert(pn != vm_page_fictitious_addr);
8246 assert(offset + len <= PAGE_SIZE);
8247 bzero_phys((addr64_t) (ptoa(pn) + offset), len);
8248 }
8249
8250
8251 /*
8252 * nop in current arm implementation
8253 */
8254 void
8255 inval_copy_windows(
8256 __unused thread_t t)
8257 {
8258 }
8259
8260 void
8261 pmap_map_globals(
8262 void)
8263 {
8264 pt_entry_t *ptep, pte;
8265
8266 ptep = pmap_pte(kernel_pmap, LOWGLOBAL_ALIAS);
8267 assert(ptep != PT_ENTRY_NULL);
8268 assert(*ptep == ARM_PTE_EMPTY);
8269
8270 pte = pa_to_pte(ml_static_vtop((vm_offset_t)&lowGlo)) | AP_RONA | ARM_PTE_NX | ARM_PTE_PNX | ARM_PTE_AF | ARM_PTE_TYPE;
8271 #if __ARM_KERNEL_PROTECT__
8272 pte |= ARM_PTE_NG;
8273 #endif /* __ARM_KERNEL_PROTECT__ */
8274 pte |= ARM_PTE_ATTRINDX(CACHE_ATTRINDX_WRITEBACK);
8275 #if (__ARM_VMSA__ > 7)
8276 pte |= ARM_PTE_SH(SH_OUTER_MEMORY);
8277 #else
8278 pte |= ARM_PTE_SH;
8279 #endif
8280 *ptep = pte;
8281 FLUSH_PTE_RANGE(ptep, (ptep + 1));
8282 PMAP_UPDATE_TLBS(kernel_pmap, LOWGLOBAL_ALIAS, LOWGLOBAL_ALIAS + PAGE_SIZE, false);
8283 }
8284
8285 vm_offset_t
8286 pmap_cpu_windows_copy_addr(int cpu_num, unsigned int index)
8287 {
8288 if (__improbable(index >= CPUWINDOWS_MAX)) {
8289 panic("%s: invalid index %u", __func__, index);
8290 }
8291 return (vm_offset_t)(CPUWINDOWS_BASE + (PAGE_SIZE * ((CPUWINDOWS_MAX * cpu_num) + index)));
8292 }
8293
8294 MARK_AS_PMAP_TEXT static unsigned int
8295 pmap_map_cpu_windows_copy_internal(
8296 ppnum_t pn,
8297 vm_prot_t prot,
8298 unsigned int wimg_bits)
8299 {
8300 pt_entry_t *ptep = NULL, pte;
8301 pmap_cpu_data_t *pmap_cpu_data = pmap_get_cpu_data();
8302 unsigned int cpu_num;
8303 unsigned int i;
8304 vm_offset_t cpu_copywindow_vaddr = 0;
8305 bool need_strong_sync = false;
8306
8307
8308 cpu_num = pmap_cpu_data->cpu_number;
8309
8310 for (i = 0; i < CPUWINDOWS_MAX; i++) {
8311 cpu_copywindow_vaddr = pmap_cpu_windows_copy_addr(cpu_num, i);
8312 ptep = pmap_pte(kernel_pmap, cpu_copywindow_vaddr);
8313 assert(!ARM_PTE_IS_COMPRESSED(*ptep, ptep));
8314 if (*ptep == ARM_PTE_TYPE_FAULT) {
8315 break;
8316 }
8317 }
8318 if (i == CPUWINDOWS_MAX) {
8319 panic("pmap_map_cpu_windows_copy: out of window\n");
8320 }
8321
8322 pte = pa_to_pte(ptoa(pn)) | ARM_PTE_TYPE | ARM_PTE_AF | ARM_PTE_NX | ARM_PTE_PNX;
8323 #if __ARM_KERNEL_PROTECT__
8324 pte |= ARM_PTE_NG;
8325 #endif /* __ARM_KERNEL_PROTECT__ */
8326
8327 pte |= wimg_to_pte(wimg_bits);
8328
8329 if (prot & VM_PROT_WRITE) {
8330 pte |= ARM_PTE_AP(AP_RWNA);
8331 } else {
8332 pte |= ARM_PTE_AP(AP_RONA);
8333 }
8334
8335 WRITE_PTE_FAST(ptep, pte);
8336 /*
8337 * Invalidate tlb. Cover nested cpu_copywindow_vaddr usage with the interrupted context
8338 * in pmap_unmap_cpu_windows_copy() after clearing the pte and before tlb invalidate.
8339 */
8340 FLUSH_PTE_STRONG(ptep);
8341 PMAP_UPDATE_TLBS(kernel_pmap, cpu_copywindow_vaddr, cpu_copywindow_vaddr + PAGE_SIZE, pmap_cpu_data->copywindow_strong_sync[i]);
8342 pmap_cpu_data->copywindow_strong_sync[i] = need_strong_sync;
8343
8344 return i;
8345 }
8346
8347 unsigned int
8348 pmap_map_cpu_windows_copy(
8349 ppnum_t pn,
8350 vm_prot_t prot,
8351 unsigned int wimg_bits)
8352 {
8353 return pmap_map_cpu_windows_copy_internal(pn, prot, wimg_bits);
8354 }
8355
8356 MARK_AS_PMAP_TEXT static void
8357 pmap_unmap_cpu_windows_copy_internal(
8358 unsigned int index)
8359 {
8360 pt_entry_t *ptep;
8361 unsigned int cpu_num;
8362 vm_offset_t cpu_copywindow_vaddr = 0;
8363 pmap_cpu_data_t *pmap_cpu_data = pmap_get_cpu_data();
8364
8365 cpu_num = pmap_cpu_data->cpu_number;
8366
8367 cpu_copywindow_vaddr = pmap_cpu_windows_copy_addr(cpu_num, index);
8368 /* Issue full-system DSB to ensure prior operations on the per-CPU window
8369 * (which are likely to have been on I/O memory) are complete before
8370 * tearing down the mapping. */
8371 __builtin_arm_dsb(DSB_SY);
8372 ptep = pmap_pte(kernel_pmap, cpu_copywindow_vaddr);
8373 WRITE_PTE_STRONG(ptep, ARM_PTE_TYPE_FAULT);
8374 PMAP_UPDATE_TLBS(kernel_pmap, cpu_copywindow_vaddr, cpu_copywindow_vaddr + PAGE_SIZE, pmap_cpu_data->copywindow_strong_sync[index]);
8375 }
8376
8377 void
8378 pmap_unmap_cpu_windows_copy(
8379 unsigned int index)
8380 {
8381 return pmap_unmap_cpu_windows_copy_internal(index);
8382 }
8383
8384 /*
8385 * Indicate that a pmap is intended to be used as a nested pmap
8386 * within one or more larger address spaces. This must be set
8387 * before pmap_nest() is called with this pmap as the 'subordinate'.
8388 */
8389 MARK_AS_PMAP_TEXT static void
8390 pmap_set_nested_internal(
8391 pmap_t pmap)
8392 {
8393 VALIDATE_PMAP(pmap);
8394 pmap->nested = TRUE;
8395 }
8396
8397 void
8398 pmap_set_nested(
8399 pmap_t pmap)
8400 {
8401 pmap_set_nested_internal(pmap);
8402 }
8403
8404 /*
8405 * pmap_trim_range(pmap, start, end)
8406 *
8407 * pmap = pmap to operate on
8408 * start = start of the range
8409 * end = end of the range
8410 *
8411 * Attempts to deallocate TTEs for the given range in the nested range.
8412 */
8413 MARK_AS_PMAP_TEXT static void
8414 pmap_trim_range(
8415 pmap_t pmap,
8416 addr64_t start,
8417 addr64_t end)
8418 {
8419 addr64_t cur;
8420 addr64_t nested_region_start;
8421 addr64_t nested_region_end;
8422 addr64_t adjusted_start;
8423 addr64_t adjusted_end;
8424 addr64_t adjust_offmask;
8425 tt_entry_t * tte_p;
8426 pt_entry_t * pte_p;
8427 __unused const pt_attr_t * const pt_attr = pmap_get_pt_attr(pmap);
8428
8429 if (__improbable(end < start)) {
8430 panic("%s: invalid address range, "
8431 "pmap=%p, start=%p, end=%p",
8432 __func__,
8433 pmap, (void*)start, (void*)end);
8434 }
8435
8436 nested_region_start = pmap->nested ? pmap->nested_region_subord_addr : pmap->nested_region_subord_addr;
8437 nested_region_end = nested_region_start + pmap->nested_region_size;
8438
8439 if (__improbable((start < nested_region_start) || (end > nested_region_end))) {
8440 panic("%s: range outside nested region %p-%p, "
8441 "pmap=%p, start=%p, end=%p",
8442 __func__, (void *)nested_region_start, (void *)nested_region_end,
8443 pmap, (void*)start, (void*)end);
8444 }
8445
8446 /* Contract the range to TT page boundaries. */
8447 adjust_offmask = pt_attr_leaf_table_offmask(pt_attr);
8448 adjusted_start = ((start + adjust_offmask) & ~adjust_offmask);
8449 adjusted_end = end & ~adjust_offmask;
8450 bool modified = false;
8451
8452 /* Iterate over the range, trying to remove TTEs. */
8453 for (cur = adjusted_start; (cur < adjusted_end) && (cur >= adjusted_start); cur += pt_attr_twig_size(pt_attr)) {
8454 PMAP_LOCK(pmap);
8455
8456 tte_p = pmap_tte(pmap, cur);
8457
8458 if (tte_p == (tt_entry_t *) NULL) {
8459 goto done;
8460 }
8461
8462 if ((*tte_p & ARM_TTE_TYPE_MASK) == ARM_TTE_TYPE_TABLE) {
8463 pte_p = (pt_entry_t *) ttetokv(*tte_p);
8464
8465 if ((ptep_get_ptd(pte_p)->ptd_info[ARM_PT_DESC_INDEX(pte_p)].refcnt == 0) &&
8466 (pmap != kernel_pmap)) {
8467 if (pmap->nested == TRUE) {
8468 /* Deallocate for the nested map. */
8469 pmap_tte_deallocate(pmap, tte_p, pt_attr_twig_level(pt_attr));
8470 } else {
8471 /* Just remove for the parent map. */
8472 pmap_tte_remove(pmap, tte_p, pt_attr_twig_level(pt_attr));
8473 }
8474
8475 pmap_get_pt_ops(pmap)->flush_tlb_tte_async(cur, pmap);
8476 modified = true;
8477 }
8478 }
8479
8480 done:
8481 PMAP_UNLOCK(pmap);
8482 }
8483
8484 if (modified) {
8485 sync_tlb_flush();
8486 }
8487
8488 #if (__ARM_VMSA__ > 7)
8489 /* Remove empty L2 TTs. */
8490 adjusted_start = ((start + ARM_TT_L1_OFFMASK) & ~ARM_TT_L1_OFFMASK);
8491 adjusted_end = end & ~ARM_TT_L1_OFFMASK;
8492
8493 for (cur = adjusted_start; (cur < adjusted_end) && (cur >= adjusted_start); cur += ARM_TT_L1_SIZE) {
8494 /* For each L1 entry in our range... */
8495 PMAP_LOCK(pmap);
8496
8497 bool remove_tt1e = true;
8498 tt_entry_t * tt1e_p = pmap_tt1e(pmap, cur);
8499 tt_entry_t * tt2e_start;
8500 tt_entry_t * tt2e_end;
8501 tt_entry_t * tt2e_p;
8502 tt_entry_t tt1e;
8503
8504 if (tt1e_p == NULL) {
8505 PMAP_UNLOCK(pmap);
8506 continue;
8507 }
8508
8509 tt1e = *tt1e_p;
8510
8511 if (tt1e == ARM_TTE_TYPE_FAULT) {
8512 PMAP_UNLOCK(pmap);
8513 continue;
8514 }
8515
8516 tt2e_start = &((tt_entry_t*) phystokv(tt1e & ARM_TTE_TABLE_MASK))[0];
8517 tt2e_end = &tt2e_start[TTE_PGENTRIES];
8518
8519 for (tt2e_p = tt2e_start; tt2e_p < tt2e_end; tt2e_p++) {
8520 if (*tt2e_p != ARM_TTE_TYPE_FAULT) {
8521 /*
8522 * If any TTEs are populated, don't remove the
8523 * L1 TT.
8524 */
8525 remove_tt1e = false;
8526 }
8527 }
8528
8529 if (remove_tt1e) {
8530 pmap_tte_deallocate(pmap, tt1e_p, PMAP_TT_L1_LEVEL);
8531 PMAP_UPDATE_TLBS(pmap, cur, cur + PAGE_SIZE, false);
8532 }
8533
8534 PMAP_UNLOCK(pmap);
8535 }
8536 #endif /* (__ARM_VMSA__ > 7) */
8537 }
8538
8539 /*
8540 * pmap_trim_internal(grand, subord, vstart, nstart, size)
8541 *
8542 * grand = pmap subord is nested in
8543 * subord = nested pmap
8544 * vstart = start of the used range in grand
8545 * nstart = start of the used range in nstart
8546 * size = size of the used range
8547 *
8548 * Attempts to trim the shared region page tables down to only cover the given
8549 * range in subord and grand.
8550 */
8551 MARK_AS_PMAP_TEXT static void
8552 pmap_trim_internal(
8553 pmap_t grand,
8554 pmap_t subord,
8555 addr64_t vstart,
8556 addr64_t nstart,
8557 uint64_t size)
8558 {
8559 addr64_t vend, nend;
8560 addr64_t adjust_offmask;
8561
8562 if (__improbable(os_add_overflow(vstart, size, &vend))) {
8563 panic("%s: grand addr wraps around, "
8564 "grand=%p, subord=%p, vstart=%p, nstart=%p, size=%#llx",
8565 __func__, grand, subord, (void*)vstart, (void*)nstart, size);
8566 }
8567
8568 if (__improbable(os_add_overflow(nstart, size, &nend))) {
8569 panic("%s: nested addr wraps around, "
8570 "grand=%p, subord=%p, vstart=%p, nstart=%p, size=%#llx",
8571 __func__, grand, subord, (void*)vstart, (void*)nstart, size);
8572 }
8573
8574 VALIDATE_PMAP(grand);
8575 VALIDATE_PMAP(subord);
8576
8577 __unused const pt_attr_t * const pt_attr = pmap_get_pt_attr(grand);
8578
8579 PMAP_LOCK(subord);
8580
8581 if (!subord->nested) {
8582 panic("%s: subord is not nestable, "
8583 "grand=%p, subord=%p, vstart=%p, nstart=%p, size=%#llx",
8584 __func__, grand, subord, (void*)vstart, (void*)nstart, size);
8585 }
8586
8587 if (grand->nested) {
8588 panic("%s: grand is nestable, "
8589 "grand=%p, subord=%p, vstart=%p, nstart=%p, size=%#llx",
8590 __func__, grand, subord, (void*)vstart, (void*)nstart, size);
8591 }
8592
8593 if (grand->nested_pmap != subord) {
8594 panic("%s: grand->nested != subord, "
8595 "grand=%p, subord=%p, vstart=%p, nstart=%p, size=%#llx",
8596 __func__, grand, subord, (void*)vstart, (void*)nstart, size);
8597 }
8598
8599 if (size != 0) {
8600 if ((vstart < grand->nested_region_grand_addr) || (vend > (grand->nested_region_grand_addr + grand->nested_region_size))) {
8601 panic("%s: grand range not in nested region, "
8602 "grand=%p, subord=%p, vstart=%p, nstart=%p, size=%#llx",
8603 __func__, grand, subord, (void*)vstart, (void*)nstart, size);
8604 }
8605
8606 if ((nstart < grand->nested_region_grand_addr) || (nend > (grand->nested_region_grand_addr + grand->nested_region_size))) {
8607 panic("%s: subord range not in nested region, "
8608 "grand=%p, subord=%p, vstart=%p, nstart=%p, size=%#llx",
8609 __func__, grand, subord, (void*)vstart, (void*)nstart, size);
8610 }
8611 }
8612
8613
8614 if (!grand->nested_has_no_bounds_ref) {
8615 assert(subord->nested_bounds_set);
8616
8617 if (!grand->nested_bounds_set) {
8618 /* Inherit the bounds from subord. */
8619 grand->nested_region_true_start = (subord->nested_region_true_start - grand->nested_region_subord_addr) + grand->nested_region_grand_addr;
8620 grand->nested_region_true_end = (subord->nested_region_true_end - grand->nested_region_subord_addr) + grand->nested_region_grand_addr;
8621 grand->nested_bounds_set = true;
8622 }
8623
8624 PMAP_UNLOCK(subord);
8625 return;
8626 }
8627
8628 if ((!subord->nested_bounds_set) && size) {
8629 adjust_offmask = pt_attr_leaf_table_offmask(pt_attr);
8630
8631 subord->nested_region_true_start = nstart;
8632 subord->nested_region_true_end = nend;
8633 subord->nested_region_true_start &= ~adjust_offmask;
8634
8635 if (__improbable(os_add_overflow(subord->nested_region_true_end, adjust_offmask, &subord->nested_region_true_end))) {
8636 panic("%s: padded true end wraps around, "
8637 "grand=%p, subord=%p, vstart=%p, nstart=%p, size=%#llx",
8638 __func__, grand, subord, (void*)vstart, (void*)nstart, size);
8639 }
8640
8641 subord->nested_region_true_end &= ~adjust_offmask;
8642 subord->nested_bounds_set = true;
8643 }
8644
8645 if (subord->nested_bounds_set) {
8646 /* Inherit the bounds from subord. */
8647 grand->nested_region_true_start = (subord->nested_region_true_start - grand->nested_region_subord_addr) + grand->nested_region_grand_addr;
8648 grand->nested_region_true_end = (subord->nested_region_true_end - grand->nested_region_subord_addr) + grand->nested_region_grand_addr;
8649 grand->nested_bounds_set = true;
8650
8651 /* If we know the bounds, we can trim the pmap. */
8652 grand->nested_has_no_bounds_ref = false;
8653 PMAP_UNLOCK(subord);
8654 } else {
8655 /* Don't trim if we don't know the bounds. */
8656 PMAP_UNLOCK(subord);
8657 return;
8658 }
8659
8660 /* Trim grand to only cover the given range. */
8661 pmap_trim_range(grand, grand->nested_region_grand_addr, grand->nested_region_true_start);
8662 pmap_trim_range(grand, grand->nested_region_true_end, (grand->nested_region_grand_addr + grand->nested_region_size));
8663
8664 /* Try to trim subord. */
8665 pmap_trim_subord(subord);
8666 }
8667
8668 MARK_AS_PMAP_TEXT static void
8669 pmap_trim_self(pmap_t pmap)
8670 {
8671 if (pmap->nested_has_no_bounds_ref && pmap->nested_pmap) {
8672 /* If we have a no bounds ref, we need to drop it. */
8673 PMAP_LOCK(pmap->nested_pmap);
8674 pmap->nested_has_no_bounds_ref = false;
8675 boolean_t nested_bounds_set = pmap->nested_pmap->nested_bounds_set;
8676 vm_map_offset_t nested_region_true_start = (pmap->nested_pmap->nested_region_true_start - pmap->nested_region_subord_addr) + pmap->nested_region_grand_addr;
8677 vm_map_offset_t nested_region_true_end = (pmap->nested_pmap->nested_region_true_end - pmap->nested_region_subord_addr) + pmap->nested_region_grand_addr;
8678 PMAP_UNLOCK(pmap->nested_pmap);
8679
8680 if (nested_bounds_set) {
8681 pmap_trim_range(pmap, pmap->nested_region_grand_addr, nested_region_true_start);
8682 pmap_trim_range(pmap, nested_region_true_end, (pmap->nested_region_grand_addr + pmap->nested_region_size));
8683 }
8684 /*
8685 * Try trimming the nested pmap, in case we had the
8686 * last reference.
8687 */
8688 pmap_trim_subord(pmap->nested_pmap);
8689 }
8690 }
8691
8692 /*
8693 * pmap_trim_subord(grand, subord)
8694 *
8695 * grand = pmap that we have nested subord in
8696 * subord = nested pmap we are attempting to trim
8697 *
8698 * Trims subord if possible
8699 */
8700 MARK_AS_PMAP_TEXT static void
8701 pmap_trim_subord(pmap_t subord)
8702 {
8703 bool contract_subord = false;
8704
8705 PMAP_LOCK(subord);
8706
8707 subord->nested_no_bounds_refcnt--;
8708
8709 if ((subord->nested_no_bounds_refcnt == 0) && (subord->nested_bounds_set)) {
8710 /* If this was the last no bounds reference, trim subord. */
8711 contract_subord = true;
8712 }
8713
8714 PMAP_UNLOCK(subord);
8715
8716 if (contract_subord) {
8717 pmap_trim_range(subord, subord->nested_region_subord_addr, subord->nested_region_true_start);
8718 pmap_trim_range(subord, subord->nested_region_true_end, subord->nested_region_subord_addr + subord->nested_region_size);
8719 }
8720 }
8721
8722 void
8723 pmap_trim(
8724 pmap_t grand,
8725 pmap_t subord,
8726 addr64_t vstart,
8727 addr64_t nstart,
8728 uint64_t size)
8729 {
8730 pmap_trim_internal(grand, subord, vstart, nstart, size);
8731 }
8732
8733
8734 /*
8735 * kern_return_t pmap_nest(grand, subord, vstart, size)
8736 *
8737 * grand = the pmap that we will nest subord into
8738 * subord = the pmap that goes into the grand
8739 * vstart = start of range in pmap to be inserted
8740 * nstart = start of range in pmap nested pmap
8741 * size = Size of nest area (up to 16TB)
8742 *
8743 * Inserts a pmap into another. This is used to implement shared segments.
8744 *
8745 */
8746
8747 MARK_AS_PMAP_TEXT static kern_return_t
8748 pmap_nest_internal(
8749 pmap_t grand,
8750 pmap_t subord,
8751 addr64_t vstart,
8752 addr64_t nstart,
8753 uint64_t size)
8754 {
8755 kern_return_t kr = KERN_FAILURE;
8756 vm_map_offset_t vaddr, nvaddr;
8757 tt_entry_t *stte_p;
8758 tt_entry_t *gtte_p;
8759 unsigned int i;
8760 unsigned int num_tte;
8761 unsigned int nested_region_asid_bitmap_size;
8762 unsigned int* nested_region_asid_bitmap;
8763 int expand_options = 0;
8764
8765 addr64_t vend, nend;
8766 if (__improbable(os_add_overflow(vstart, size, &vend))) {
8767 panic("%s: %p grand addr wraps around: 0x%llx + 0x%llx", __func__, grand, vstart, size);
8768 }
8769 if (__improbable(os_add_overflow(nstart, size, &nend))) {
8770 panic("%s: %p nested addr wraps around: 0x%llx + 0x%llx", __func__, subord, nstart, size);
8771 }
8772
8773 VALIDATE_PMAP(grand);
8774 VALIDATE_PMAP(subord);
8775
8776 __unused const pt_attr_t * const pt_attr = pmap_get_pt_attr(grand);
8777 assert(pmap_get_pt_attr(subord) == pt_attr);
8778
8779
8780 if (((size | vstart | nstart) & (pt_attr_leaf_table_offmask(pt_attr))) != 0x0ULL) {
8781 panic("pmap_nest() pmap %p unaligned nesting request 0x%llx, 0x%llx, 0x%llx\n", grand, vstart, nstart, size);
8782 }
8783
8784 if (!subord->nested) {
8785 panic("%s: subordinate pmap %p is not nestable", __func__, subord);
8786 }
8787
8788 if ((grand->nested_pmap != PMAP_NULL) && (grand->nested_pmap != subord)) {
8789 panic("pmap_nest() pmap %p has a nested pmap\n", grand);
8790 }
8791
8792 if (subord->nested_region_asid_bitmap == NULL) {
8793 nested_region_asid_bitmap_size = (unsigned int)(size >> pt_attr_twig_shift(pt_attr)) / (sizeof(unsigned int) * NBBY);
8794
8795 nested_region_asid_bitmap = kalloc(nested_region_asid_bitmap_size * sizeof(unsigned int));
8796 bzero(nested_region_asid_bitmap, nested_region_asid_bitmap_size * sizeof(unsigned int));
8797
8798 PMAP_LOCK(subord);
8799 if (subord->nested_region_asid_bitmap == NULL) {
8800 subord->nested_region_asid_bitmap = nested_region_asid_bitmap;
8801 subord->nested_region_asid_bitmap_size = nested_region_asid_bitmap_size;
8802 subord->nested_region_subord_addr = nstart;
8803 subord->nested_region_size = (mach_vm_offset_t) size;
8804 nested_region_asid_bitmap = NULL;
8805 }
8806 PMAP_UNLOCK(subord);
8807 if (nested_region_asid_bitmap != NULL) {
8808 kfree(nested_region_asid_bitmap, nested_region_asid_bitmap_size * sizeof(unsigned int));
8809 }
8810 }
8811 if ((subord->nested_region_subord_addr + subord->nested_region_size) < nend) {
8812 uint64_t new_size;
8813 unsigned int new_nested_region_asid_bitmap_size;
8814 unsigned int* new_nested_region_asid_bitmap;
8815
8816 nested_region_asid_bitmap = NULL;
8817 nested_region_asid_bitmap_size = 0;
8818 new_size = nend - subord->nested_region_subord_addr;
8819
8820 /* We explicitly add 1 to the bitmap allocation size in order to avoid issues with truncation. */
8821 new_nested_region_asid_bitmap_size = (unsigned int)((new_size >> pt_attr_twig_shift(pt_attr)) / (sizeof(unsigned int) * NBBY)) + 1;
8822
8823 new_nested_region_asid_bitmap = kalloc(new_nested_region_asid_bitmap_size * sizeof(unsigned int));
8824 PMAP_LOCK(subord);
8825 if (subord->nested_region_size < new_size) {
8826 bzero(new_nested_region_asid_bitmap, new_nested_region_asid_bitmap_size * sizeof(unsigned int));
8827 bcopy(subord->nested_region_asid_bitmap, new_nested_region_asid_bitmap, subord->nested_region_asid_bitmap_size);
8828 nested_region_asid_bitmap_size = subord->nested_region_asid_bitmap_size;
8829 nested_region_asid_bitmap = subord->nested_region_asid_bitmap;
8830 subord->nested_region_asid_bitmap = new_nested_region_asid_bitmap;
8831 subord->nested_region_asid_bitmap_size = new_nested_region_asid_bitmap_size;
8832 subord->nested_region_size = new_size;
8833 new_nested_region_asid_bitmap = NULL;
8834 }
8835 PMAP_UNLOCK(subord);
8836 if (nested_region_asid_bitmap != NULL)
8837 { kfree(nested_region_asid_bitmap, nested_region_asid_bitmap_size * sizeof(unsigned int));}
8838 if (new_nested_region_asid_bitmap != NULL)
8839 { kfree(new_nested_region_asid_bitmap, new_nested_region_asid_bitmap_size * sizeof(unsigned int));}
8840 }
8841
8842 PMAP_LOCK(subord);
8843 if (grand->nested_pmap == PMAP_NULL) {
8844 grand->nested_pmap = subord;
8845
8846 if (!subord->nested_bounds_set) {
8847 /*
8848 * We are nesting without the shared regions bounds
8849 * being known. We'll have to trim the pmap later.
8850 */
8851 grand->nested_has_no_bounds_ref = true;
8852 subord->nested_no_bounds_refcnt++;
8853 }
8854
8855 grand->nested_region_grand_addr = vstart;
8856 grand->nested_region_subord_addr = nstart;
8857 grand->nested_region_size = (mach_vm_offset_t) size;
8858 } else {
8859 if ((grand->nested_region_grand_addr > vstart)) {
8860 panic("pmap_nest() pmap %p : attempt to nest outside the nested region\n", grand);
8861 } else if ((grand->nested_region_grand_addr + grand->nested_region_size) < vend) {
8862 grand->nested_region_size = (mach_vm_offset_t)(vstart - grand->nested_region_grand_addr + size);
8863 }
8864 }
8865
8866 #if (__ARM_VMSA__ == 7)
8867 nvaddr = (vm_map_offset_t) nstart;
8868 vaddr = (vm_map_offset_t) vstart;
8869 num_tte = size >> ARM_TT_L1_SHIFT;
8870
8871 for (i = 0; i < num_tte; i++) {
8872 if (((subord->nested_region_true_start) > nvaddr) || ((subord->nested_region_true_end) <= nvaddr)) {
8873 goto expand_next;
8874 }
8875
8876 stte_p = pmap_tte(subord, nvaddr);
8877 if ((stte_p == (tt_entry_t *)NULL) || (((*stte_p) & ARM_TTE_TYPE_MASK) != ARM_TTE_TYPE_TABLE)) {
8878 PMAP_UNLOCK(subord);
8879 kr = pmap_expand(subord, nvaddr, expand_options, PMAP_TT_L2_LEVEL);
8880
8881 if (kr != KERN_SUCCESS) {
8882 PMAP_LOCK(grand);
8883 goto done;
8884 }
8885
8886 PMAP_LOCK(subord);
8887 }
8888 PMAP_UNLOCK(subord);
8889 PMAP_LOCK(grand);
8890 stte_p = pmap_tte(grand, vaddr);
8891 if (stte_p == (tt_entry_t *)NULL) {
8892 PMAP_UNLOCK(grand);
8893 kr = pmap_expand(grand, vaddr, expand_options, PMAP_TT_L1_LEVEL);
8894
8895 if (kr != KERN_SUCCESS) {
8896 PMAP_LOCK(grand);
8897 goto done;
8898 }
8899 } else {
8900 PMAP_UNLOCK(grand);
8901 kr = KERN_SUCCESS;
8902 }
8903 PMAP_LOCK(subord);
8904
8905 expand_next:
8906 nvaddr += ARM_TT_L1_SIZE;
8907 vaddr += ARM_TT_L1_SIZE;
8908 }
8909
8910 #else
8911 nvaddr = (vm_map_offset_t) nstart;
8912 num_tte = (unsigned int)(size >> pt_attr_twig_shift(pt_attr));
8913
8914 for (i = 0; i < num_tte; i++) {
8915 if (((subord->nested_region_true_start) > nvaddr) || ((subord->nested_region_true_end) <= nvaddr)) {
8916 goto expand_next;
8917 }
8918
8919 stte_p = pmap_tte(subord, nvaddr);
8920 if (stte_p == PT_ENTRY_NULL || *stte_p == ARM_TTE_EMPTY) {
8921 PMAP_UNLOCK(subord);
8922 kr = pmap_expand(subord, nvaddr, expand_options, PMAP_TT_LEAF_LEVEL);
8923
8924 if (kr != KERN_SUCCESS) {
8925 PMAP_LOCK(grand);
8926 goto done;
8927 }
8928
8929 PMAP_LOCK(subord);
8930 }
8931 expand_next:
8932 nvaddr += pt_attr_twig_size(pt_attr);
8933 }
8934 #endif
8935 PMAP_UNLOCK(subord);
8936
8937 /*
8938 * copy tte's from subord pmap into grand pmap
8939 */
8940
8941 PMAP_LOCK(grand);
8942 nvaddr = (vm_map_offset_t) nstart;
8943 vaddr = (vm_map_offset_t) vstart;
8944
8945
8946 #if (__ARM_VMSA__ == 7)
8947 for (i = 0; i < num_tte; i++) {
8948 if (((subord->nested_region_true_start) > nvaddr) || ((subord->nested_region_true_end) <= nvaddr)) {
8949 goto nest_next;
8950 }
8951
8952 stte_p = pmap_tte(subord, nvaddr);
8953 gtte_p = pmap_tte(grand, vaddr);
8954 *gtte_p = *stte_p;
8955
8956 nest_next:
8957 nvaddr += ARM_TT_L1_SIZE;
8958 vaddr += ARM_TT_L1_SIZE;
8959 }
8960 #else
8961 for (i = 0; i < num_tte; i++) {
8962 if (((subord->nested_region_true_start) > nvaddr) || ((subord->nested_region_true_end) <= nvaddr)) {
8963 goto nest_next;
8964 }
8965
8966 stte_p = pmap_tte(subord, nvaddr);
8967 gtte_p = pmap_tte(grand, vaddr);
8968 if (gtte_p == PT_ENTRY_NULL) {
8969 PMAP_UNLOCK(grand);
8970 kr = pmap_expand(grand, vaddr, expand_options, PMAP_TT_TWIG_LEVEL);
8971 PMAP_LOCK(grand);
8972
8973 if (kr != KERN_SUCCESS) {
8974 goto done;
8975 }
8976
8977 gtte_p = pmap_tt2e(grand, vaddr);
8978 }
8979 *gtte_p = *stte_p;
8980
8981 nest_next:
8982 vaddr += pt_attr_twig_size(pt_attr);
8983 nvaddr += pt_attr_twig_size(pt_attr);
8984 }
8985 #endif
8986
8987 kr = KERN_SUCCESS;
8988 done:
8989
8990 stte_p = pmap_tte(grand, vstart);
8991 FLUSH_PTE_RANGE_STRONG(stte_p, stte_p + num_tte);
8992
8993 #if (__ARM_VMSA__ > 7)
8994 /*
8995 * check for overflow on LP64 arch
8996 */
8997 assert((size & 0xFFFFFFFF00000000ULL) == 0);
8998 #endif
8999 PMAP_UPDATE_TLBS(grand, vstart, vend, false);
9000
9001 PMAP_UNLOCK(grand);
9002 return kr;
9003 }
9004
9005 kern_return_t
9006 pmap_nest(
9007 pmap_t grand,
9008 pmap_t subord,
9009 addr64_t vstart,
9010 addr64_t nstart,
9011 uint64_t size)
9012 {
9013 kern_return_t kr = KERN_FAILURE;
9014
9015 PMAP_TRACE(2, PMAP_CODE(PMAP__NEST) | DBG_FUNC_START,
9016 VM_KERNEL_ADDRHIDE(grand), VM_KERNEL_ADDRHIDE(subord),
9017 VM_KERNEL_ADDRHIDE(vstart));
9018
9019 kr = pmap_nest_internal(grand, subord, vstart, nstart, size);
9020
9021 PMAP_TRACE(2, PMAP_CODE(PMAP__NEST) | DBG_FUNC_END, kr);
9022
9023 return kr;
9024 }
9025
9026 /*
9027 * kern_return_t pmap_unnest(grand, vaddr)
9028 *
9029 * grand = the pmap that will have the virtual range unnested
9030 * vaddr = start of range in pmap to be unnested
9031 * size = size of range in pmap to be unnested
9032 *
9033 */
9034
9035 kern_return_t
9036 pmap_unnest(
9037 pmap_t grand,
9038 addr64_t vaddr,
9039 uint64_t size)
9040 {
9041 return pmap_unnest_options(grand, vaddr, size, 0);
9042 }
9043
9044 MARK_AS_PMAP_TEXT static kern_return_t
9045 pmap_unnest_options_internal(
9046 pmap_t grand,
9047 addr64_t vaddr,
9048 uint64_t size,
9049 unsigned int option)
9050 {
9051 vm_map_offset_t start;
9052 vm_map_offset_t addr;
9053 tt_entry_t *tte_p;
9054 unsigned int current_index;
9055 unsigned int start_index;
9056 unsigned int max_index;
9057 unsigned int num_tte;
9058 unsigned int i;
9059
9060 addr64_t vend;
9061 if (__improbable(os_add_overflow(vaddr, size, &vend))) {
9062 panic("%s: %p vaddr wraps around: 0x%llx + 0x%llx", __func__, grand, vaddr, size);
9063 }
9064
9065 VALIDATE_PMAP(grand);
9066
9067 __unused const pt_attr_t * const pt_attr = pmap_get_pt_attr(grand);
9068
9069 if (((size | vaddr) & pt_attr_twig_offmask(pt_attr)) != 0x0ULL) {
9070 panic("pmap_unnest(): unaligned request");
9071 }
9072
9073 if ((option & PMAP_UNNEST_CLEAN) == 0) {
9074 if (grand->nested_pmap == NULL) {
9075 panic("%s: %p has no nested pmap", __func__, grand);
9076 }
9077
9078 if ((vaddr < grand->nested_region_grand_addr) || (vend > (grand->nested_region_grand_addr + grand->nested_region_size))) {
9079 panic("%s: %p: unnest request to region not-fully-nested region [%p, %p)", __func__, grand, (void*)vaddr, (void*)vend);
9080 }
9081
9082 PMAP_LOCK(grand->nested_pmap);
9083
9084 start = vaddr - grand->nested_region_grand_addr + grand->nested_region_subord_addr;
9085 start_index = (unsigned int)((vaddr - grand->nested_region_grand_addr) >> pt_attr_twig_shift(pt_attr));
9086 max_index = (unsigned int)(start_index + (size >> pt_attr_twig_shift(pt_attr)));
9087 num_tte = (unsigned int)(size >> pt_attr_twig_shift(pt_attr));
9088
9089 for (current_index = start_index, addr = start; current_index < max_index; current_index++, addr += pt_attr_twig_size(pt_attr)) {
9090 pt_entry_t *bpte, *epte, *cpte;
9091
9092 if (addr < grand->nested_pmap->nested_region_true_start) {
9093 /* We haven't reached the interesting range. */
9094 continue;
9095 }
9096
9097 if (addr >= grand->nested_pmap->nested_region_true_end) {
9098 /* We're done with the interesting range. */
9099 break;
9100 }
9101
9102 bpte = pmap_pte(grand->nested_pmap, addr);
9103 epte = bpte + (pt_attr_leaf_index_mask(pt_attr) >> pt_attr_leaf_shift(pt_attr));
9104
9105 if (!testbit(current_index, (int *)grand->nested_pmap->nested_region_asid_bitmap)) {
9106 setbit(current_index, (int *)grand->nested_pmap->nested_region_asid_bitmap);
9107
9108 for (cpte = bpte; cpte <= epte; cpte++) {
9109 pmap_paddr_t pa;
9110 int pai = 0;
9111 boolean_t managed = FALSE;
9112 pt_entry_t spte;
9113
9114 if ((*cpte != ARM_PTE_TYPE_FAULT)
9115 && (!ARM_PTE_IS_COMPRESSED(*cpte, cpte))) {
9116 spte = *cpte;
9117 while (!managed) {
9118 pa = pte_to_pa(spte);
9119 if (!pa_valid(pa)) {
9120 break;
9121 }
9122 pai = (int)pa_index(pa);
9123 LOCK_PVH(pai);
9124 spte = *cpte;
9125 pa = pte_to_pa(spte);
9126 if (pai == (int)pa_index(pa)) {
9127 managed = TRUE;
9128 break; // Leave the PVH locked as we'll unlock it after we update the PTE
9129 }
9130 UNLOCK_PVH(pai);
9131 }
9132
9133 if (((spte & ARM_PTE_NG) != ARM_PTE_NG)) {
9134 WRITE_PTE_FAST(cpte, (spte | ARM_PTE_NG));
9135 }
9136
9137 if (managed) {
9138 ASSERT_PVH_LOCKED(pai);
9139 UNLOCK_PVH(pai);
9140 }
9141 }
9142 }
9143 }
9144
9145 FLUSH_PTE_RANGE_STRONG(bpte, epte);
9146 flush_mmu_tlb_region_asid_async(start, (unsigned)size, grand->nested_pmap);
9147 }
9148
9149 sync_tlb_flush();
9150
9151 PMAP_UNLOCK(grand->nested_pmap);
9152 }
9153
9154 PMAP_LOCK(grand);
9155
9156 /*
9157 * invalidate all pdes for segment at vaddr in pmap grand
9158 */
9159 start = vaddr;
9160 addr = vaddr;
9161
9162 num_tte = (unsigned int)(size >> pt_attr_twig_shift(pt_attr));
9163
9164 for (i = 0; i < num_tte; i++, addr += pt_attr_twig_size(pt_attr)) {
9165 if (addr < grand->nested_pmap->nested_region_true_start) {
9166 /* We haven't reached the interesting range. */
9167 continue;
9168 }
9169
9170 if (addr >= grand->nested_pmap->nested_region_true_end) {
9171 /* We're done with the interesting range. */
9172 break;
9173 }
9174
9175 tte_p = pmap_tte(grand, addr);
9176 *tte_p = ARM_TTE_TYPE_FAULT;
9177 }
9178
9179 tte_p = pmap_tte(grand, start);
9180 FLUSH_PTE_RANGE_STRONG(tte_p, tte_p + num_tte);
9181 PMAP_UPDATE_TLBS(grand, start, vend, false);
9182
9183 PMAP_UNLOCK(grand);
9184
9185 return KERN_SUCCESS;
9186 }
9187
9188 kern_return_t
9189 pmap_unnest_options(
9190 pmap_t grand,
9191 addr64_t vaddr,
9192 uint64_t size,
9193 unsigned int option)
9194 {
9195 kern_return_t kr = KERN_FAILURE;
9196
9197 PMAP_TRACE(2, PMAP_CODE(PMAP__UNNEST) | DBG_FUNC_START,
9198 VM_KERNEL_ADDRHIDE(grand), VM_KERNEL_ADDRHIDE(vaddr));
9199
9200 kr = pmap_unnest_options_internal(grand, vaddr, size, option);
9201
9202 PMAP_TRACE(2, PMAP_CODE(PMAP__UNNEST) | DBG_FUNC_END, kr);
9203
9204 return kr;
9205 }
9206
9207 boolean_t
9208 pmap_adjust_unnest_parameters(
9209 __unused pmap_t p,
9210 __unused vm_map_offset_t *s,
9211 __unused vm_map_offset_t *e)
9212 {
9213 return TRUE; /* to get to log_unnest_badness()... */
9214 }
9215
9216 /*
9217 * disable no-execute capability on
9218 * the specified pmap
9219 */
9220 #if DEVELOPMENT || DEBUG
9221 void
9222 pmap_disable_NX(
9223 pmap_t pmap)
9224 {
9225 pmap->nx_enabled = FALSE;
9226 }
9227 #else
9228 void
9229 pmap_disable_NX(
9230 __unused pmap_t pmap)
9231 {
9232 }
9233 #endif
9234
9235 void
9236 pt_fake_zone_init(
9237 int zone_index)
9238 {
9239 pt_fake_zone_index = zone_index;
9240 }
9241
9242 void
9243 pt_fake_zone_info(
9244 int *count,
9245 vm_size_t *cur_size, vm_size_t *max_size, vm_size_t *elem_size, vm_size_t *alloc_size,
9246 uint64_t *sum_size, int *collectable, int *exhaustable, int *caller_acct)
9247 {
9248 *count = inuse_pmap_pages_count;
9249 *cur_size = PAGE_SIZE * (inuse_pmap_pages_count);
9250 *max_size = PAGE_SIZE * (inuse_pmap_pages_count + vm_page_inactive_count + vm_page_active_count + vm_page_free_count);
9251 *elem_size = PAGE_SIZE;
9252 *alloc_size = PAGE_SIZE;
9253 *sum_size = (alloc_pmap_pages_count) * PAGE_SIZE;
9254
9255 *collectable = 1;
9256 *exhaustable = 0;
9257 *caller_acct = 1;
9258 }
9259
9260 /*
9261 * flush a range of hardware TLB entries.
9262 * NOTE: assumes the smallest TLB entry in use will be for
9263 * an ARM small page (4K).
9264 */
9265
9266 #define ARM_FULL_TLB_FLUSH_THRESHOLD 64
9267
9268 #if __ARM_RANGE_TLBI__
9269 #define ARM64_RANGE_TLB_FLUSH_THRESHOLD 1
9270 #define ARM64_FULL_TLB_FLUSH_THRESHOLD ARM64_16K_TLB_RANGE_PAGES
9271 #else
9272 #define ARM64_FULL_TLB_FLUSH_THRESHOLD 256
9273 #endif // __ARM_RANGE_TLBI__
9274
9275 static void
9276 flush_mmu_tlb_region_asid_async(
9277 vm_offset_t va,
9278 unsigned length,
9279 pmap_t pmap)
9280 {
9281 #if (__ARM_VMSA__ == 7)
9282 vm_offset_t end = va + length;
9283 uint32_t asid;
9284
9285 asid = pmap->hw_asid;
9286
9287 if (length / ARM_SMALL_PAGE_SIZE > ARM_FULL_TLB_FLUSH_THRESHOLD) {
9288 boolean_t flush_all = FALSE;
9289
9290 if ((asid == 0) || (pmap->nested == TRUE)) {
9291 flush_all = TRUE;
9292 }
9293 if (flush_all) {
9294 flush_mmu_tlb_async();
9295 } else {
9296 flush_mmu_tlb_asid_async(asid);
9297 }
9298
9299 return;
9300 }
9301 if (pmap->nested == TRUE) {
9302 #if !__ARM_MP_EXT__
9303 flush_mmu_tlb();
9304 #else
9305 va = arm_trunc_page(va);
9306 while (va < end) {
9307 flush_mmu_tlb_mva_entries_async(va);
9308 va += ARM_SMALL_PAGE_SIZE;
9309 }
9310 #endif
9311 return;
9312 }
9313 va = arm_trunc_page(va) | (asid & 0xff);
9314 flush_mmu_tlb_entries_async(va, end);
9315
9316 #else
9317 unsigned npages = length >> pt_attr_leaf_shift(pmap_get_pt_attr(pmap));
9318 uint32_t asid;
9319
9320 asid = pmap->hw_asid;
9321
9322 if (npages > ARM64_FULL_TLB_FLUSH_THRESHOLD) {
9323 boolean_t flush_all = FALSE;
9324
9325 if ((asid == 0) || (pmap->nested == TRUE)) {
9326 flush_all = TRUE;
9327 }
9328 if (flush_all) {
9329 flush_mmu_tlb_async();
9330 } else {
9331 flush_mmu_tlb_asid_async((uint64_t)asid << TLBI_ASID_SHIFT);
9332 }
9333 return;
9334 }
9335 #if __ARM_RANGE_TLBI__
9336 if (npages > ARM64_RANGE_TLB_FLUSH_THRESHOLD) {
9337 va = generate_rtlbi_param(npages, asid, va);
9338 if (pmap->nested == TRUE) {
9339 flush_mmu_tlb_allrange_async(va);
9340 } else {
9341 flush_mmu_tlb_range_async(va);
9342 }
9343 return;
9344 }
9345 #endif
9346 vm_offset_t end = tlbi_asid(asid) | tlbi_addr(va + length);
9347 va = tlbi_asid(asid) | tlbi_addr(va);
9348 if (pmap->nested == TRUE) {
9349 flush_mmu_tlb_allentries_async(va, end);
9350 } else {
9351 flush_mmu_tlb_entries_async(va, end);
9352 }
9353
9354 #endif
9355 }
9356
9357 MARK_AS_PMAP_TEXT static void
9358 flush_mmu_tlb_tte_asid_async(vm_offset_t va, pmap_t pmap)
9359 {
9360 #if (__ARM_VMSA__ == 7)
9361 flush_mmu_tlb_entry_async((va & ~ARM_TT_L1_PT_OFFMASK) | (pmap->hw_asid & 0xff));
9362 flush_mmu_tlb_entry_async(((va & ~ARM_TT_L1_PT_OFFMASK) + ARM_TT_L1_SIZE) | (pmap->hw_asid & 0xff));
9363 flush_mmu_tlb_entry_async(((va & ~ARM_TT_L1_PT_OFFMASK) + 2 * ARM_TT_L1_SIZE) | (pmap->hw_asid & 0xff));
9364 flush_mmu_tlb_entry_async(((va & ~ARM_TT_L1_PT_OFFMASK) + 3 * ARM_TT_L1_SIZE) | (pmap->hw_asid & 0xff));
9365 #else
9366 flush_mmu_tlb_entry_async(tlbi_addr(va & ~pt_attr_twig_offmask(pmap_get_pt_attr(pmap))) | tlbi_asid(pmap->hw_asid));
9367 #endif
9368 }
9369
9370 MARK_AS_PMAP_TEXT static void
9371 flush_mmu_tlb_full_asid_async(pmap_t pmap)
9372 {
9373 #if (__ARM_VMSA__ == 7)
9374 flush_mmu_tlb_asid_async(pmap->hw_asid);
9375 #else /* (__ARM_VMSA__ == 7) */
9376 flush_mmu_tlb_asid_async((uint64_t)(pmap->hw_asid) << TLBI_ASID_SHIFT);
9377 #endif /* (__ARM_VMSA__ == 7) */
9378 }
9379
9380 void
9381 flush_mmu_tlb_region(
9382 vm_offset_t va,
9383 unsigned length)
9384 {
9385 flush_mmu_tlb_region_asid_async(va, length, kernel_pmap);
9386 sync_tlb_flush();
9387 }
9388
9389 static pmap_io_range_t*
9390 pmap_find_io_attr(pmap_paddr_t paddr)
9391 {
9392 pmap_io_range_t find_range = {.addr = paddr & ~PAGE_MASK, .len = PAGE_SIZE};
9393 unsigned int begin = 0, end = num_io_rgns - 1;
9394 if ((num_io_rgns == 0) || (paddr < io_attr_table[begin].addr) ||
9395 (paddr >= (io_attr_table[end].addr + io_attr_table[end].len))) {
9396 return NULL;
9397 }
9398
9399 for (;;) {
9400 unsigned int middle = (begin + end) / 2;
9401 int cmp = cmp_io_rgns(&find_range, &io_attr_table[middle]);
9402 if (cmp == 0) {
9403 return &io_attr_table[middle];
9404 } else if (begin == end) {
9405 break;
9406 } else if (cmp > 0) {
9407 begin = middle + 1;
9408 } else {
9409 end = middle;
9410 }
9411 }
9412
9413 return NULL;
9414 }
9415
9416 unsigned int
9417 pmap_cache_attributes(
9418 ppnum_t pn)
9419 {
9420 pmap_paddr_t paddr;
9421 int pai;
9422 unsigned int result;
9423 pp_attr_t pp_attr_current;
9424
9425 paddr = ptoa(pn);
9426
9427 assert(vm_last_phys > vm_first_phys); // Check that pmap has been bootstrapped
9428
9429 if (!pa_valid(paddr)) {
9430 pmap_io_range_t *io_rgn = pmap_find_io_attr(paddr);
9431 return (io_rgn == NULL) ? VM_WIMG_IO : io_rgn->wimg;
9432 }
9433
9434 result = VM_WIMG_DEFAULT;
9435
9436 pai = (int)pa_index(paddr);
9437
9438 pp_attr_current = pp_attr_table[pai];
9439 if (pp_attr_current & PP_ATTR_WIMG_MASK) {
9440 result = pp_attr_current & PP_ATTR_WIMG_MASK;
9441 }
9442 return result;
9443 }
9444
9445 MARK_AS_PMAP_TEXT static void
9446 pmap_sync_wimg(ppnum_t pn, unsigned int wimg_bits_prev, unsigned int wimg_bits_new)
9447 {
9448 if ((wimg_bits_prev != wimg_bits_new)
9449 && ((wimg_bits_prev == VM_WIMG_COPYBACK)
9450 || ((wimg_bits_prev == VM_WIMG_INNERWBACK)
9451 && (wimg_bits_new != VM_WIMG_COPYBACK))
9452 || ((wimg_bits_prev == VM_WIMG_WTHRU)
9453 && ((wimg_bits_new != VM_WIMG_COPYBACK) || (wimg_bits_new != VM_WIMG_INNERWBACK))))) {
9454 pmap_sync_page_attributes_phys(pn);
9455 }
9456
9457 if ((wimg_bits_new == VM_WIMG_RT) && (wimg_bits_prev != VM_WIMG_RT)) {
9458 pmap_force_dcache_clean(phystokv(ptoa(pn)), PAGE_SIZE);
9459 }
9460 }
9461
9462 MARK_AS_PMAP_TEXT static __unused void
9463 pmap_update_compressor_page_internal(ppnum_t pn, unsigned int prev_cacheattr, unsigned int new_cacheattr)
9464 {
9465 pmap_paddr_t paddr = ptoa(pn);
9466 int pai = (int)pa_index(paddr);
9467
9468 if (__improbable(!pa_valid(paddr))) {
9469 panic("%s called on non-managed page 0x%08x", __func__, pn);
9470 }
9471
9472 LOCK_PVH(pai);
9473
9474
9475 pmap_update_cache_attributes_locked(pn, new_cacheattr);
9476
9477 UNLOCK_PVH(pai);
9478
9479 pmap_sync_wimg(pn, prev_cacheattr & VM_WIMG_MASK, new_cacheattr & VM_WIMG_MASK);
9480 }
9481
9482 void *
9483 pmap_map_compressor_page(ppnum_t pn)
9484 {
9485 #if __ARM_PTE_PHYSMAP__
9486 unsigned int cacheattr = pmap_cache_attributes(pn) & VM_WIMG_MASK;
9487 if (cacheattr != VM_WIMG_DEFAULT) {
9488 pmap_update_compressor_page_internal(pn, cacheattr, VM_WIMG_DEFAULT);
9489 }
9490 #endif
9491 return (void*)phystokv(ptoa(pn));
9492 }
9493
9494 void
9495 pmap_unmap_compressor_page(ppnum_t pn __unused, void *kva __unused)
9496 {
9497 #if __ARM_PTE_PHYSMAP__
9498 unsigned int cacheattr = pmap_cache_attributes(pn) & VM_WIMG_MASK;
9499 if (cacheattr != VM_WIMG_DEFAULT) {
9500 pmap_update_compressor_page_internal(pn, VM_WIMG_DEFAULT, cacheattr);
9501 }
9502 #endif
9503 }
9504
9505 MARK_AS_PMAP_TEXT static boolean_t
9506 pmap_batch_set_cache_attributes_internal(
9507 ppnum_t pn,
9508 unsigned int cacheattr,
9509 unsigned int page_cnt,
9510 unsigned int page_index,
9511 boolean_t doit,
9512 unsigned int *res)
9513 {
9514 pmap_paddr_t paddr;
9515 int pai;
9516 pp_attr_t pp_attr_current;
9517 pp_attr_t pp_attr_template;
9518 unsigned int wimg_bits_prev, wimg_bits_new;
9519
9520 if (cacheattr & VM_WIMG_USE_DEFAULT) {
9521 cacheattr = VM_WIMG_DEFAULT;
9522 }
9523
9524 if ((doit == FALSE) && (*res == 0)) {
9525 pmap_pin_kernel_pages((vm_offset_t)res, sizeof(*res));
9526 *res = page_cnt;
9527 pmap_unpin_kernel_pages((vm_offset_t)res, sizeof(*res));
9528 if (platform_cache_batch_wimg(cacheattr & (VM_WIMG_MASK), page_cnt << PAGE_SHIFT) == FALSE) {
9529 return FALSE;
9530 }
9531 }
9532
9533 paddr = ptoa(pn);
9534
9535 if (!pa_valid(paddr)) {
9536 panic("pmap_batch_set_cache_attributes(): pn 0x%08x not managed", pn);
9537 }
9538
9539 pai = (int)pa_index(paddr);
9540
9541 if (doit) {
9542 LOCK_PVH(pai);
9543 }
9544
9545 do {
9546 pp_attr_current = pp_attr_table[pai];
9547 wimg_bits_prev = VM_WIMG_DEFAULT;
9548 if (pp_attr_current & PP_ATTR_WIMG_MASK) {
9549 wimg_bits_prev = pp_attr_current & PP_ATTR_WIMG_MASK;
9550 }
9551
9552 pp_attr_template = (pp_attr_current & ~PP_ATTR_WIMG_MASK) | PP_ATTR_WIMG(cacheattr & (VM_WIMG_MASK));
9553
9554 if (!doit) {
9555 break;
9556 }
9557
9558 /* WIMG bits should only be updated under the PVH lock, but we should do this in a CAS loop
9559 * to avoid losing simultaneous updates to other bits like refmod. */
9560 } while (!OSCompareAndSwap16(pp_attr_current, pp_attr_template, &pp_attr_table[pai]));
9561
9562 wimg_bits_new = VM_WIMG_DEFAULT;
9563 if (pp_attr_template & PP_ATTR_WIMG_MASK) {
9564 wimg_bits_new = pp_attr_template & PP_ATTR_WIMG_MASK;
9565 }
9566
9567 if (doit) {
9568 if (wimg_bits_new != wimg_bits_prev) {
9569 pmap_update_cache_attributes_locked(pn, cacheattr);
9570 }
9571 UNLOCK_PVH(pai);
9572 if ((wimg_bits_new == VM_WIMG_RT) && (wimg_bits_prev != VM_WIMG_RT)) {
9573 pmap_force_dcache_clean(phystokv(paddr), PAGE_SIZE);
9574 }
9575 } else {
9576 if (wimg_bits_new == VM_WIMG_COPYBACK) {
9577 return FALSE;
9578 }
9579 if (wimg_bits_prev == wimg_bits_new) {
9580 pmap_pin_kernel_pages((vm_offset_t)res, sizeof(*res));
9581 *res = *res - 1;
9582 pmap_unpin_kernel_pages((vm_offset_t)res, sizeof(*res));
9583 if (!platform_cache_batch_wimg(wimg_bits_new, (*res) << PAGE_SHIFT)) {
9584 return FALSE;
9585 }
9586 }
9587 return TRUE;
9588 }
9589
9590 if (page_cnt == (page_index + 1)) {
9591 wimg_bits_prev = VM_WIMG_COPYBACK;
9592 if (((wimg_bits_prev != wimg_bits_new))
9593 && ((wimg_bits_prev == VM_WIMG_COPYBACK)
9594 || ((wimg_bits_prev == VM_WIMG_INNERWBACK)
9595 && (wimg_bits_new != VM_WIMG_COPYBACK))
9596 || ((wimg_bits_prev == VM_WIMG_WTHRU)
9597 && ((wimg_bits_new != VM_WIMG_COPYBACK) || (wimg_bits_new != VM_WIMG_INNERWBACK))))) {
9598 platform_cache_flush_wimg(wimg_bits_new);
9599 }
9600 }
9601
9602 return TRUE;
9603 }
9604
9605 boolean_t
9606 pmap_batch_set_cache_attributes(
9607 ppnum_t pn,
9608 unsigned int cacheattr,
9609 unsigned int page_cnt,
9610 unsigned int page_index,
9611 boolean_t doit,
9612 unsigned int *res)
9613 {
9614 return pmap_batch_set_cache_attributes_internal(pn, cacheattr, page_cnt, page_index, doit, res);
9615 }
9616
9617 MARK_AS_PMAP_TEXT static void
9618 pmap_set_cache_attributes_priv(
9619 ppnum_t pn,
9620 unsigned int cacheattr,
9621 boolean_t external __unused)
9622 {
9623 pmap_paddr_t paddr;
9624 int pai;
9625 pp_attr_t pp_attr_current;
9626 pp_attr_t pp_attr_template;
9627 unsigned int wimg_bits_prev, wimg_bits_new;
9628
9629 paddr = ptoa(pn);
9630
9631 if (!pa_valid(paddr)) {
9632 return; /* Not a managed page. */
9633 }
9634
9635 if (cacheattr & VM_WIMG_USE_DEFAULT) {
9636 cacheattr = VM_WIMG_DEFAULT;
9637 }
9638
9639 pai = (int)pa_index(paddr);
9640
9641 LOCK_PVH(pai);
9642
9643
9644 do {
9645 pp_attr_current = pp_attr_table[pai];
9646 wimg_bits_prev = VM_WIMG_DEFAULT;
9647 if (pp_attr_current & PP_ATTR_WIMG_MASK) {
9648 wimg_bits_prev = pp_attr_current & PP_ATTR_WIMG_MASK;
9649 }
9650
9651 pp_attr_template = (pp_attr_current & ~PP_ATTR_WIMG_MASK) | PP_ATTR_WIMG(cacheattr & (VM_WIMG_MASK));
9652
9653 /* WIMG bits should only be updated under the PVH lock, but we should do this in a CAS loop
9654 * to avoid losing simultaneous updates to other bits like refmod. */
9655 } while (!OSCompareAndSwap16(pp_attr_current, pp_attr_template, &pp_attr_table[pai]));
9656
9657 wimg_bits_new = VM_WIMG_DEFAULT;
9658 if (pp_attr_template & PP_ATTR_WIMG_MASK) {
9659 wimg_bits_new = pp_attr_template & PP_ATTR_WIMG_MASK;
9660 }
9661
9662 if (wimg_bits_new != wimg_bits_prev) {
9663 pmap_update_cache_attributes_locked(pn, cacheattr);
9664 }
9665
9666 UNLOCK_PVH(pai);
9667
9668 pmap_sync_wimg(pn, wimg_bits_prev, wimg_bits_new);
9669 }
9670
9671 MARK_AS_PMAP_TEXT static void
9672 pmap_set_cache_attributes_internal(
9673 ppnum_t pn,
9674 unsigned int cacheattr)
9675 {
9676 pmap_set_cache_attributes_priv(pn, cacheattr, TRUE);
9677 }
9678
9679 void
9680 pmap_set_cache_attributes(
9681 ppnum_t pn,
9682 unsigned int cacheattr)
9683 {
9684 pmap_set_cache_attributes_internal(pn, cacheattr);
9685 }
9686
9687 MARK_AS_PMAP_TEXT void
9688 pmap_update_cache_attributes_locked(
9689 ppnum_t ppnum,
9690 unsigned attributes)
9691 {
9692 pmap_paddr_t phys = ptoa(ppnum);
9693 pv_entry_t *pve_p;
9694 pt_entry_t *pte_p;
9695 pv_entry_t **pv_h;
9696 pt_entry_t tmplate;
9697 unsigned int pai;
9698 boolean_t tlb_flush_needed = FALSE;
9699
9700 PMAP_TRACE(2, PMAP_CODE(PMAP__UPDATE_CACHING) | DBG_FUNC_START, ppnum, attributes);
9701
9702 #if __ARM_PTE_PHYSMAP__
9703 vm_offset_t kva = phystokv(phys);
9704 pte_p = pmap_pte(kernel_pmap, kva);
9705
9706 tmplate = *pte_p;
9707 tmplate &= ~(ARM_PTE_ATTRINDXMASK | ARM_PTE_SHMASK);
9708 tmplate |= wimg_to_pte(attributes);
9709 #if (__ARM_VMSA__ > 7)
9710 if (tmplate & ARM_PTE_HINT_MASK) {
9711 panic("%s: physical aperture PTE %p has hint bit set, va=%p, pte=0x%llx",
9712 __FUNCTION__, pte_p, (void *)kva, tmplate);
9713 }
9714 #endif
9715 WRITE_PTE_STRONG(pte_p, tmplate);
9716 flush_mmu_tlb_region_asid_async(kva, PAGE_SIZE, kernel_pmap);
9717 tlb_flush_needed = TRUE;
9718 #endif
9719
9720 pai = (unsigned int)pa_index(phys);
9721
9722 pv_h = pai_to_pvh(pai);
9723
9724 pte_p = PT_ENTRY_NULL;
9725 pve_p = PV_ENTRY_NULL;
9726 if (pvh_test_type(pv_h, PVH_TYPE_PTEP)) {
9727 pte_p = pvh_ptep(pv_h);
9728 } else if (pvh_test_type(pv_h, PVH_TYPE_PVEP)) {
9729 pve_p = pvh_list(pv_h);
9730 pte_p = PT_ENTRY_NULL;
9731 }
9732
9733 while ((pve_p != PV_ENTRY_NULL) || (pte_p != PT_ENTRY_NULL)) {
9734 vm_map_address_t va;
9735 pmap_t pmap;
9736
9737 if (pve_p != PV_ENTRY_NULL) {
9738 pte_p = pve_get_ptep(pve_p);
9739 }
9740 #ifdef PVH_FLAG_IOMMU
9741 if ((vm_offset_t)pte_p & PVH_FLAG_IOMMU) {
9742 goto cache_skip_pve;
9743 }
9744 #endif
9745 pmap = ptep_get_pmap(pte_p);
9746 va = ptep_get_va(pte_p);
9747
9748 tmplate = *pte_p;
9749 tmplate &= ~(ARM_PTE_ATTRINDXMASK | ARM_PTE_SHMASK);
9750 tmplate |= pmap_get_pt_ops(pmap)->wimg_to_pte(attributes);
9751
9752 WRITE_PTE_STRONG(pte_p, tmplate);
9753 pmap_get_pt_ops(pmap)->flush_tlb_region_async(va, PAGE_SIZE, pmap);
9754 tlb_flush_needed = TRUE;
9755
9756 #ifdef PVH_FLAG_IOMMU
9757 cache_skip_pve:
9758 #endif
9759 pte_p = PT_ENTRY_NULL;
9760 if (pve_p != PV_ENTRY_NULL) {
9761 pve_p = PVE_NEXT_PTR(pve_next(pve_p));
9762 }
9763 }
9764 if (tlb_flush_needed) {
9765 sync_tlb_flush();
9766 }
9767
9768 PMAP_TRACE(2, PMAP_CODE(PMAP__UPDATE_CACHING) | DBG_FUNC_END, ppnum, attributes);
9769 }
9770
9771 #if (__ARM_VMSA__ == 7)
9772 vm_map_address_t
9773 pmap_create_sharedpage(
9774 void)
9775 {
9776 pmap_paddr_t pa;
9777 kern_return_t kr;
9778
9779 (void) pmap_pages_alloc(&pa, PAGE_SIZE, 0);
9780 memset((char *) phystokv(pa), 0, PAGE_SIZE);
9781
9782 kr = pmap_enter(kernel_pmap, _COMM_PAGE_BASE_ADDRESS, atop(pa), VM_PROT_READ | VM_PROT_WRITE, VM_PROT_NONE, VM_WIMG_USE_DEFAULT, TRUE);
9783 assert(kr == KERN_SUCCESS);
9784
9785 return (vm_map_address_t)phystokv(pa);
9786 }
9787 #else
9788 static void
9789 pmap_update_tt3e(
9790 pmap_t pmap,
9791 vm_address_t address,
9792 tt_entry_t template)
9793 {
9794 tt_entry_t *ptep, pte;
9795
9796 ptep = pmap_tt3e(pmap, address);
9797 if (ptep == NULL) {
9798 panic("%s: no ptep?\n", __FUNCTION__);
9799 }
9800
9801 pte = *ptep;
9802 pte = tte_to_pa(pte) | template;
9803 WRITE_PTE_STRONG(ptep, pte);
9804 }
9805
9806 /* Note absence of non-global bit */
9807 #define PMAP_COMM_PAGE_PTE_TEMPLATE (ARM_PTE_TYPE_VALID \
9808 | ARM_PTE_ATTRINDX(CACHE_ATTRINDX_WRITEBACK) \
9809 | ARM_PTE_SH(SH_INNER_MEMORY) | ARM_PTE_NX \
9810 | ARM_PTE_PNX | ARM_PTE_AP(AP_RORO) | ARM_PTE_AF)
9811
9812 vm_map_address_t
9813 pmap_create_sharedpage(
9814 void
9815 )
9816 {
9817 kern_return_t kr;
9818 pmap_paddr_t pa = 0;
9819
9820
9821 (void) pmap_pages_alloc(&pa, PAGE_SIZE, 0);
9822
9823 memset((char *) phystokv(pa), 0, PAGE_SIZE);
9824
9825 #ifdef CONFIG_XNUPOST
9826 /*
9827 * The kernel pmap maintains a user accessible mapping of the commpage
9828 * to test PAN.
9829 */
9830 kr = pmap_enter(kernel_pmap, _COMM_HIGH_PAGE64_BASE_ADDRESS, (ppnum_t)atop(pa), VM_PROT_READ, VM_PROT_NONE, VM_WIMG_USE_DEFAULT, TRUE);
9831 assert(kr == KERN_SUCCESS);
9832
9833 /*
9834 * This mapping should not be global (as we only expect to reference it
9835 * during testing).
9836 */
9837 pmap_update_tt3e(kernel_pmap, _COMM_HIGH_PAGE64_BASE_ADDRESS, PMAP_COMM_PAGE_PTE_TEMPLATE | ARM_PTE_NG);
9838
9839 #if KASAN
9840 kasan_map_shadow(_COMM_HIGH_PAGE64_BASE_ADDRESS, PAGE_SIZE, true);
9841 #endif
9842 #endif /* CONFIG_XNUPOST */
9843
9844 /*
9845 * In order to avoid burning extra pages on mapping the shared page, we
9846 * create a dedicated pmap for the shared page. We forcibly nest the
9847 * translation tables from this pmap into other pmaps. The level we
9848 * will nest at depends on the MMU configuration (page size, TTBR range,
9849 * etc).
9850 *
9851 * Note that this is NOT "the nested pmap" (which is used to nest the
9852 * shared cache).
9853 *
9854 * Note that we update parameters of the entry for our unique needs (NG
9855 * entry, etc.).
9856 */
9857 sharedpage_pmap = pmap_create_options(NULL, 0x0, 0);
9858 assert(sharedpage_pmap != NULL);
9859
9860 /* The user 64-bit mapping... */
9861 kr = pmap_enter(sharedpage_pmap, _COMM_PAGE64_BASE_ADDRESS, (ppnum_t)atop(pa), VM_PROT_READ, VM_PROT_NONE, VM_WIMG_USE_DEFAULT, TRUE);
9862 assert(kr == KERN_SUCCESS);
9863 pmap_update_tt3e(sharedpage_pmap, _COMM_PAGE64_BASE_ADDRESS, PMAP_COMM_PAGE_PTE_TEMPLATE);
9864
9865 /* ...and the user 32-bit mapping. */
9866 kr = pmap_enter(sharedpage_pmap, _COMM_PAGE32_BASE_ADDRESS, (ppnum_t)atop(pa), VM_PROT_READ, VM_PROT_NONE, VM_WIMG_USE_DEFAULT, TRUE);
9867 assert(kr == KERN_SUCCESS);
9868 pmap_update_tt3e(sharedpage_pmap, _COMM_PAGE32_BASE_ADDRESS, PMAP_COMM_PAGE_PTE_TEMPLATE);
9869
9870 /* For manipulation in kernel, go straight to physical page */
9871 return (vm_map_address_t)phystokv(pa);
9872 }
9873
9874 /*
9875 * Asserts to ensure that the TTEs we nest to map the shared page do not overlap
9876 * with user controlled TTEs.
9877 */
9878 #if (ARM_PGSHIFT == 14)
9879 static_assert((_COMM_PAGE64_BASE_ADDRESS & ~ARM_TT_L2_OFFMASK) >= MACH_VM_MAX_ADDRESS);
9880 static_assert((_COMM_PAGE32_BASE_ADDRESS & ~ARM_TT_L2_OFFMASK) >= VM_MAX_ADDRESS);
9881 #elif (ARM_PGSHIFT == 12)
9882 static_assert((_COMM_PAGE64_BASE_ADDRESS & ~ARM_TT_L1_OFFMASK) >= MACH_VM_MAX_ADDRESS);
9883 static_assert((_COMM_PAGE32_BASE_ADDRESS & ~ARM_TT_L1_OFFMASK) >= VM_MAX_ADDRESS);
9884 #else
9885 #error Nested shared page mapping is unsupported on this config
9886 #endif
9887
9888 MARK_AS_PMAP_TEXT static kern_return_t
9889 pmap_insert_sharedpage_internal(
9890 pmap_t pmap)
9891 {
9892 kern_return_t kr = KERN_SUCCESS;
9893 vm_offset_t sharedpage_vaddr;
9894 pt_entry_t *ttep, *src_ttep;
9895 int options = 0;
9896
9897 VALIDATE_PMAP(pmap);
9898
9899 #if _COMM_PAGE_AREA_LENGTH != PAGE_SIZE
9900 #error We assume a single page.
9901 #endif
9902
9903 if (pmap_is_64bit(pmap)) {
9904 sharedpage_vaddr = _COMM_PAGE64_BASE_ADDRESS;
9905 } else {
9906 sharedpage_vaddr = _COMM_PAGE32_BASE_ADDRESS;
9907 }
9908
9909 PMAP_LOCK(pmap);
9910
9911 /*
9912 * For 4KB pages, we can force the commpage to nest at the level one
9913 * page table, as each entry is 1GB (i.e, there will be no overlap
9914 * with regular userspace mappings). For 16KB pages, each level one
9915 * entry is 64GB, so we must go to the second level entry (32MB) in
9916 * order to nest.
9917 */
9918 #if (ARM_PGSHIFT == 12)
9919 (void)options;
9920
9921 /* Just slam in the L1 entry. */
9922 ttep = pmap_tt1e(pmap, sharedpage_vaddr);
9923
9924 if (*ttep != ARM_PTE_EMPTY) {
9925 panic("%s: Found something mapped at the commpage address?!", __FUNCTION__);
9926 }
9927
9928 src_ttep = pmap_tt1e(sharedpage_pmap, sharedpage_vaddr);
9929 #elif (ARM_PGSHIFT == 14)
9930 /* Allocate for the L2 entry if necessary, and slam it into place. */
9931 /*
9932 * As long as we are use a three level page table, the first level
9933 * should always exist, so we don't need to check for it.
9934 */
9935 while (*pmap_tt1e(pmap, sharedpage_vaddr) == ARM_PTE_EMPTY) {
9936 PMAP_UNLOCK(pmap);
9937
9938 kr = pmap_expand(pmap, sharedpage_vaddr, options, PMAP_TT_L2_LEVEL);
9939
9940 if (kr != KERN_SUCCESS) {
9941 {
9942 panic("Failed to pmap_expand for commpage, pmap=%p", pmap);
9943 }
9944 }
9945
9946 PMAP_LOCK(pmap);
9947 }
9948
9949 ttep = pmap_tt2e(pmap, sharedpage_vaddr);
9950
9951 if (*ttep != ARM_PTE_EMPTY) {
9952 panic("%s: Found something mapped at the commpage address?!", __FUNCTION__);
9953 }
9954
9955 src_ttep = pmap_tt2e(sharedpage_pmap, sharedpage_vaddr);
9956 #endif
9957
9958 *ttep = *src_ttep;
9959 FLUSH_PTE_STRONG(ttep);
9960
9961 /* TODO: Should we flush in the 64-bit case? */
9962 flush_mmu_tlb_region_asid_async(sharedpage_vaddr, PAGE_SIZE, kernel_pmap);
9963
9964 #if (ARM_PGSHIFT == 12)
9965 flush_mmu_tlb_entry_async(tlbi_addr(sharedpage_vaddr & ~ARM_TT_L1_OFFMASK) | tlbi_asid(pmap->hw_asid));
9966 #elif (ARM_PGSHIFT == 14)
9967 flush_mmu_tlb_entry_async(tlbi_addr(sharedpage_vaddr & ~ARM_TT_L2_OFFMASK) | tlbi_asid(pmap->hw_asid));
9968 #endif
9969 sync_tlb_flush();
9970
9971 PMAP_UNLOCK(pmap);
9972
9973 return kr;
9974 }
9975
9976 static void
9977 pmap_unmap_sharedpage(
9978 pmap_t pmap)
9979 {
9980 pt_entry_t *ttep;
9981 vm_offset_t sharedpage_vaddr;
9982
9983 #if _COMM_PAGE_AREA_LENGTH != PAGE_SIZE
9984 #error We assume a single page.
9985 #endif
9986
9987 if (pmap_is_64bit(pmap)) {
9988 sharedpage_vaddr = _COMM_PAGE64_BASE_ADDRESS;
9989 } else {
9990 sharedpage_vaddr = _COMM_PAGE32_BASE_ADDRESS;
9991 }
9992
9993 #if (ARM_PGSHIFT == 12)
9994 ttep = pmap_tt1e(pmap, sharedpage_vaddr);
9995
9996 if (ttep == NULL) {
9997 return;
9998 }
9999
10000 /* It had better be mapped to the shared page */
10001 if (*ttep != ARM_TTE_EMPTY && *ttep != *pmap_tt1e(sharedpage_pmap, sharedpage_vaddr)) {
10002 panic("%s: Something other than commpage mapped in shared page slot?", __FUNCTION__);
10003 }
10004 #elif (ARM_PGSHIFT == 14)
10005 ttep = pmap_tt2e(pmap, sharedpage_vaddr);
10006
10007 if (ttep == NULL) {
10008 return;
10009 }
10010
10011 /* It had better be mapped to the shared page */
10012 if (*ttep != ARM_TTE_EMPTY && *ttep != *pmap_tt2e(sharedpage_pmap, sharedpage_vaddr)) {
10013 panic("%s: Something other than commpage mapped in shared page slot?", __FUNCTION__);
10014 }
10015 #endif
10016
10017 *ttep = ARM_TTE_EMPTY;
10018 flush_mmu_tlb_region_asid_async(sharedpage_vaddr, PAGE_SIZE, kernel_pmap);
10019
10020 #if (ARM_PGSHIFT == 12)
10021 flush_mmu_tlb_entry_async(tlbi_addr(sharedpage_vaddr & ~ARM_TT_L1_OFFMASK) | tlbi_asid(pmap->hw_asid));
10022 #elif (ARM_PGSHIFT == 14)
10023 flush_mmu_tlb_entry_async(tlbi_addr(sharedpage_vaddr & ~ARM_TT_L2_OFFMASK) | tlbi_asid(pmap->hw_asid));
10024 #endif
10025 sync_tlb_flush();
10026 }
10027
10028 void
10029 pmap_insert_sharedpage(
10030 pmap_t pmap)
10031 {
10032 pmap_insert_sharedpage_internal(pmap);
10033 }
10034
10035 static boolean_t
10036 pmap_is_64bit(
10037 pmap_t pmap)
10038 {
10039 return pmap->is_64bit;
10040 }
10041
10042 #endif
10043
10044 /* ARMTODO -- an implementation that accounts for
10045 * holes in the physical map, if any.
10046 */
10047 boolean_t
10048 pmap_valid_page(
10049 ppnum_t pn)
10050 {
10051 return pa_valid(ptoa(pn));
10052 }
10053
10054 boolean_t
10055 pmap_bootloader_page(
10056 ppnum_t pn)
10057 {
10058 pmap_paddr_t paddr = ptoa(pn);
10059
10060 if (pa_valid(paddr)) {
10061 return FALSE;
10062 }
10063 pmap_io_range_t *io_rgn = pmap_find_io_attr(paddr);
10064 return (io_rgn != NULL) && (io_rgn->wimg & PMAP_IO_RANGE_CARVEOUT);
10065 }
10066
10067 MARK_AS_PMAP_TEXT static boolean_t
10068 pmap_is_empty_internal(
10069 pmap_t pmap,
10070 vm_map_offset_t va_start,
10071 vm_map_offset_t va_end)
10072 {
10073 vm_map_offset_t block_start, block_end;
10074 tt_entry_t *tte_p;
10075
10076 if (pmap == NULL) {
10077 return TRUE;
10078 }
10079
10080 VALIDATE_PMAP(pmap);
10081
10082 __unused const pt_attr_t * const pt_attr = pmap_get_pt_attr(pmap);
10083 unsigned int initial_not_in_kdp = not_in_kdp;
10084
10085 if ((pmap != kernel_pmap) && (initial_not_in_kdp)) {
10086 PMAP_LOCK(pmap);
10087 }
10088
10089 #if (__ARM_VMSA__ == 7)
10090 if (tte_index(pmap, pt_attr, va_end) >= pmap->tte_index_max) {
10091 if ((pmap != kernel_pmap) && (initial_not_in_kdp)) {
10092 PMAP_UNLOCK(pmap);
10093 }
10094 return TRUE;
10095 }
10096 #endif
10097
10098 /* TODO: This will be faster if we increment ttep at each level. */
10099 block_start = va_start;
10100
10101 while (block_start < va_end) {
10102 pt_entry_t *bpte_p, *epte_p;
10103 pt_entry_t *pte_p;
10104
10105 block_end = (block_start + pt_attr_twig_size(pt_attr)) & ~pt_attr_twig_offmask(pt_attr);
10106 if (block_end > va_end) {
10107 block_end = va_end;
10108 }
10109
10110 tte_p = pmap_tte(pmap, block_start);
10111 if ((tte_p != PT_ENTRY_NULL)
10112 && ((*tte_p & ARM_TTE_TYPE_MASK) == ARM_TTE_TYPE_TABLE)) {
10113 pte_p = (pt_entry_t *) ttetokv(*tte_p);
10114 bpte_p = &pte_p[pte_index(pmap, pt_attr, block_start)];
10115 epte_p = &pte_p[pte_index(pmap, pt_attr, block_end)];
10116
10117 for (pte_p = bpte_p; pte_p < epte_p; pte_p++) {
10118 if (*pte_p != ARM_PTE_EMPTY) {
10119 if ((pmap != kernel_pmap) && (initial_not_in_kdp)) {
10120 PMAP_UNLOCK(pmap);
10121 }
10122 return FALSE;
10123 }
10124 }
10125 }
10126 block_start = block_end;
10127 }
10128
10129 if ((pmap != kernel_pmap) && (initial_not_in_kdp)) {
10130 PMAP_UNLOCK(pmap);
10131 }
10132
10133 return TRUE;
10134 }
10135
10136 boolean_t
10137 pmap_is_empty(
10138 pmap_t pmap,
10139 vm_map_offset_t va_start,
10140 vm_map_offset_t va_end)
10141 {
10142 return pmap_is_empty_internal(pmap, va_start, va_end);
10143 }
10144
10145 vm_map_offset_t
10146 pmap_max_offset(
10147 boolean_t is64,
10148 unsigned int option)
10149 {
10150 return (is64) ? pmap_max_64bit_offset(option) : pmap_max_32bit_offset(option);
10151 }
10152
10153 vm_map_offset_t
10154 pmap_max_64bit_offset(
10155 __unused unsigned int option)
10156 {
10157 vm_map_offset_t max_offset_ret = 0;
10158
10159 #if defined(__arm64__)
10160 const vm_map_offset_t min_max_offset = SHARED_REGION_BASE_ARM64 + SHARED_REGION_SIZE_ARM64 + 0x20000000; // end of shared region + 512MB for various purposes
10161 if (option == ARM_PMAP_MAX_OFFSET_DEFAULT) {
10162 max_offset_ret = arm64_pmap_max_offset_default;
10163 } else if (option == ARM_PMAP_MAX_OFFSET_MIN) {
10164 max_offset_ret = min_max_offset;
10165 } else if (option == ARM_PMAP_MAX_OFFSET_MAX) {
10166 max_offset_ret = MACH_VM_MAX_ADDRESS;
10167 } else if (option == ARM_PMAP_MAX_OFFSET_DEVICE) {
10168 if (arm64_pmap_max_offset_default) {
10169 max_offset_ret = arm64_pmap_max_offset_default;
10170 } else if (max_mem > 0xC0000000) {
10171 max_offset_ret = min_max_offset + 0x138000000; // Max offset is 13.375GB for devices with > 3GB of memory
10172 } else if (max_mem > 0x40000000) {
10173 max_offset_ret = min_max_offset + 0x38000000; // Max offset is 9.375GB for devices with > 1GB and <= 3GB of memory
10174 } else {
10175 max_offset_ret = min_max_offset;
10176 }
10177 } else if (option == ARM_PMAP_MAX_OFFSET_JUMBO) {
10178 if (arm64_pmap_max_offset_default) {
10179 // Allow the boot-arg to override jumbo size
10180 max_offset_ret = arm64_pmap_max_offset_default;
10181 } else {
10182 max_offset_ret = MACH_VM_MAX_ADDRESS; // Max offset is 64GB for pmaps with special "jumbo" blessing
10183 }
10184 } else {
10185 panic("pmap_max_64bit_offset illegal option 0x%x\n", option);
10186 }
10187
10188 assert(max_offset_ret <= MACH_VM_MAX_ADDRESS);
10189 assert(max_offset_ret >= min_max_offset);
10190 #else
10191 panic("Can't run pmap_max_64bit_offset on non-64bit architectures\n");
10192 #endif
10193
10194 return max_offset_ret;
10195 }
10196
10197 vm_map_offset_t
10198 pmap_max_32bit_offset(
10199 unsigned int option)
10200 {
10201 vm_map_offset_t max_offset_ret = 0;
10202
10203 if (option == ARM_PMAP_MAX_OFFSET_DEFAULT) {
10204 max_offset_ret = arm_pmap_max_offset_default;
10205 } else if (option == ARM_PMAP_MAX_OFFSET_MIN) {
10206 max_offset_ret = 0x80000000;
10207 } else if (option == ARM_PMAP_MAX_OFFSET_MAX) {
10208 max_offset_ret = VM_MAX_ADDRESS;
10209 } else if (option == ARM_PMAP_MAX_OFFSET_DEVICE) {
10210 if (arm_pmap_max_offset_default) {
10211 max_offset_ret = arm_pmap_max_offset_default;
10212 } else if (max_mem > 0x20000000) {
10213 max_offset_ret = 0x80000000;
10214 } else {
10215 max_offset_ret = 0x80000000;
10216 }
10217 } else if (option == ARM_PMAP_MAX_OFFSET_JUMBO) {
10218 max_offset_ret = 0x80000000;
10219 } else {
10220 panic("pmap_max_32bit_offset illegal option 0x%x\n", option);
10221 }
10222
10223 assert(max_offset_ret <= MACH_VM_MAX_ADDRESS);
10224 return max_offset_ret;
10225 }
10226
10227 #if CONFIG_DTRACE
10228 /*
10229 * Constrain DTrace copyin/copyout actions
10230 */
10231 extern kern_return_t dtrace_copyio_preflight(addr64_t);
10232 extern kern_return_t dtrace_copyio_postflight(addr64_t);
10233
10234 kern_return_t
10235 dtrace_copyio_preflight(
10236 __unused addr64_t va)
10237 {
10238 if (current_map() == kernel_map) {
10239 return KERN_FAILURE;
10240 } else {
10241 return KERN_SUCCESS;
10242 }
10243 }
10244
10245 kern_return_t
10246 dtrace_copyio_postflight(
10247 __unused addr64_t va)
10248 {
10249 return KERN_SUCCESS;
10250 }
10251 #endif /* CONFIG_DTRACE */
10252
10253
10254 void
10255 pmap_flush_context_init(__unused pmap_flush_context *pfc)
10256 {
10257 }
10258
10259
10260 void
10261 pmap_flush(
10262 __unused pmap_flush_context *cpus_to_flush)
10263 {
10264 /* not implemented yet */
10265 return;
10266 }
10267
10268
10269 static void __unused
10270 pmap_pin_kernel_pages(vm_offset_t kva __unused, size_t nbytes __unused)
10271 {
10272 }
10273
10274 static void __unused
10275 pmap_unpin_kernel_pages(vm_offset_t kva __unused, size_t nbytes __unused)
10276 {
10277 }
10278
10279
10280
10281 #define PMAP_RESIDENT_INVALID ((mach_vm_size_t)-1)
10282
10283 MARK_AS_PMAP_TEXT static mach_vm_size_t
10284 pmap_query_resident_internal(
10285 pmap_t pmap,
10286 vm_map_address_t start,
10287 vm_map_address_t end,
10288 mach_vm_size_t *compressed_bytes_p)
10289 {
10290 mach_vm_size_t resident_bytes = 0;
10291 mach_vm_size_t compressed_bytes = 0;
10292
10293 pt_entry_t *bpte, *epte;
10294 pt_entry_t *pte_p;
10295 tt_entry_t *tte_p;
10296
10297 if (pmap == NULL) {
10298 return PMAP_RESIDENT_INVALID;
10299 }
10300
10301 VALIDATE_PMAP(pmap);
10302
10303 /* Ensure that this request is valid, and addresses exactly one TTE. */
10304 if (__improbable((start % ARM_PGBYTES) || (end % ARM_PGBYTES))) {
10305 panic("%s: address range %p, %p not page-aligned", __func__, (void*)start, (void*)end);
10306 }
10307
10308 if (__improbable((end < start) || ((end - start) > (PTE_PGENTRIES * ARM_PGBYTES)))) {
10309 panic("%s: invalid address range %p, %p", __func__, (void*)start, (void*)end);
10310 }
10311
10312 PMAP_LOCK(pmap);
10313 tte_p = pmap_tte(pmap, start);
10314 if (tte_p == (tt_entry_t *) NULL) {
10315 PMAP_UNLOCK(pmap);
10316 return PMAP_RESIDENT_INVALID;
10317 }
10318 if ((*tte_p & ARM_TTE_TYPE_MASK) == ARM_TTE_TYPE_TABLE) {
10319 __unused const pt_attr_t * const pt_attr = pmap_get_pt_attr(pmap);
10320 pte_p = (pt_entry_t *) ttetokv(*tte_p);
10321 bpte = &pte_p[pte_index(pmap, pt_attr, start)];
10322 epte = &pte_p[pte_index(pmap, pt_attr, end)];
10323
10324 for (; bpte < epte; bpte++) {
10325 if (ARM_PTE_IS_COMPRESSED(*bpte, bpte)) {
10326 compressed_bytes += ARM_PGBYTES;
10327 } else if (pa_valid(pte_to_pa(*bpte))) {
10328 resident_bytes += ARM_PGBYTES;
10329 }
10330 }
10331 }
10332 PMAP_UNLOCK(pmap);
10333
10334 if (compressed_bytes_p) {
10335 pmap_pin_kernel_pages((vm_offset_t)compressed_bytes_p, sizeof(*compressed_bytes_p));
10336 *compressed_bytes_p += compressed_bytes;
10337 pmap_unpin_kernel_pages((vm_offset_t)compressed_bytes_p, sizeof(*compressed_bytes_p));
10338 }
10339
10340 return resident_bytes;
10341 }
10342
10343 mach_vm_size_t
10344 pmap_query_resident(
10345 pmap_t pmap,
10346 vm_map_address_t start,
10347 vm_map_address_t end,
10348 mach_vm_size_t *compressed_bytes_p)
10349 {
10350 mach_vm_size_t total_resident_bytes;
10351 mach_vm_size_t compressed_bytes;
10352 vm_map_address_t va;
10353
10354
10355 if (pmap == PMAP_NULL) {
10356 if (compressed_bytes_p) {
10357 *compressed_bytes_p = 0;
10358 }
10359 return 0;
10360 }
10361
10362 __unused const pt_attr_t * const pt_attr = pmap_get_pt_attr(pmap);
10363
10364 total_resident_bytes = 0;
10365 compressed_bytes = 0;
10366
10367 PMAP_TRACE(3, PMAP_CODE(PMAP__QUERY_RESIDENT) | DBG_FUNC_START,
10368 VM_KERNEL_ADDRHIDE(pmap), VM_KERNEL_ADDRHIDE(start),
10369 VM_KERNEL_ADDRHIDE(end));
10370
10371 va = start;
10372 while (va < end) {
10373 vm_map_address_t l;
10374 mach_vm_size_t resident_bytes;
10375
10376 l = ((va + pt_attr_twig_size(pt_attr)) & ~pt_attr_twig_offmask(pt_attr));
10377
10378 if (l > end) {
10379 l = end;
10380 }
10381 resident_bytes = pmap_query_resident_internal(pmap, va, l, compressed_bytes_p);
10382 if (resident_bytes == PMAP_RESIDENT_INVALID) {
10383 break;
10384 }
10385
10386 total_resident_bytes += resident_bytes;
10387
10388 va = l;
10389 }
10390
10391 if (compressed_bytes_p) {
10392 *compressed_bytes_p = compressed_bytes;
10393 }
10394
10395 PMAP_TRACE(3, PMAP_CODE(PMAP__QUERY_RESIDENT) | DBG_FUNC_END,
10396 total_resident_bytes);
10397
10398 return total_resident_bytes;
10399 }
10400
10401 #if MACH_ASSERT
10402 static void
10403 pmap_check_ledgers(
10404 pmap_t pmap)
10405 {
10406 int pid;
10407 char *procname;
10408
10409 if (pmap->pmap_pid == 0) {
10410 /*
10411 * This pmap was not or is no longer fully associated
10412 * with a task (e.g. the old pmap after a fork()/exec() or
10413 * spawn()). Its "ledger" still points at a task that is
10414 * now using a different (and active) address space, so
10415 * we can't check that all the pmap ledgers are balanced here.
10416 *
10417 * If the "pid" is set, that means that we went through
10418 * pmap_set_process() in task_terminate_internal(), so
10419 * this task's ledger should not have been re-used and
10420 * all the pmap ledgers should be back to 0.
10421 */
10422 return;
10423 }
10424
10425 pid = pmap->pmap_pid;
10426 procname = pmap->pmap_procname;
10427
10428 vm_map_pmap_check_ledgers(pmap, pmap->ledger, pid, procname);
10429
10430 PMAP_STATS_ASSERTF(pmap->stats.resident_count == 0, pmap, "stats.resident_count %d", pmap->stats.resident_count);
10431 #if 00
10432 PMAP_STATS_ASSERTF(pmap->stats.wired_count == 0, pmap, "stats.wired_count %d", pmap->stats.wired_count);
10433 #endif
10434 PMAP_STATS_ASSERTF(pmap->stats.device == 0, pmap, "stats.device %d", pmap->stats.device);
10435 PMAP_STATS_ASSERTF(pmap->stats.internal == 0, pmap, "stats.internal %d", pmap->stats.internal);
10436 PMAP_STATS_ASSERTF(pmap->stats.external == 0, pmap, "stats.external %d", pmap->stats.external);
10437 PMAP_STATS_ASSERTF(pmap->stats.reusable == 0, pmap, "stats.reusable %d", pmap->stats.reusable);
10438 PMAP_STATS_ASSERTF(pmap->stats.compressed == 0, pmap, "stats.compressed %lld", pmap->stats.compressed);
10439 }
10440 #endif /* MACH_ASSERT */
10441
10442 void
10443 pmap_advise_pagezero_range(__unused pmap_t p, __unused uint64_t a)
10444 {
10445 }
10446
10447
10448 #if CONFIG_PGTRACE
10449 #define PROF_START uint64_t t, nanot;\
10450 t = mach_absolute_time();
10451
10452 #define PROF_END absolutetime_to_nanoseconds(mach_absolute_time()-t, &nanot);\
10453 kprintf("%s: took %llu ns\n", __func__, nanot);
10454
10455 #define PMAP_PGTRACE_LOCK(p) \
10456 do { \
10457 *(p) = ml_set_interrupts_enabled(false); \
10458 if (simple_lock_try(&(pmap_pgtrace.lock), LCK_GRP_NULL)) break; \
10459 ml_set_interrupts_enabled(*(p)); \
10460 } while (true)
10461
10462 #define PMAP_PGTRACE_UNLOCK(p) \
10463 do { \
10464 simple_unlock(&(pmap_pgtrace.lock)); \
10465 ml_set_interrupts_enabled(*(p)); \
10466 } while (0)
10467
10468 #define PGTRACE_WRITE_PTE(pte_p, pte_entry) \
10469 do { \
10470 *(pte_p) = (pte_entry); \
10471 FLUSH_PTE(pte_p); \
10472 } while (0)
10473
10474 #define PGTRACE_MAX_MAP 16 // maximum supported va to same pa
10475
10476 typedef enum {
10477 UNDEFINED,
10478 PA_UNDEFINED,
10479 VA_UNDEFINED,
10480 DEFINED
10481 } pmap_pgtrace_page_state_t;
10482
10483 typedef struct {
10484 queue_chain_t chain;
10485
10486 /*
10487 * pa - pa
10488 * maps - list of va maps to upper pa
10489 * map_pool - map pool
10490 * map_waste - waste can
10491 * state - state
10492 */
10493 pmap_paddr_t pa;
10494 queue_head_t maps;
10495 queue_head_t map_pool;
10496 queue_head_t map_waste;
10497 pmap_pgtrace_page_state_t state;
10498 } pmap_pgtrace_page_t;
10499
10500 static struct {
10501 /*
10502 * pages - list of tracing page info
10503 */
10504 queue_head_t pages;
10505 decl_simple_lock_data(, lock);
10506 } pmap_pgtrace = {};
10507
10508 static void
10509 pmap_pgtrace_init(void)
10510 {
10511 queue_init(&(pmap_pgtrace.pages));
10512 simple_lock_init(&(pmap_pgtrace.lock), 0);
10513
10514 boolean_t enabled;
10515
10516 if (PE_parse_boot_argn("pgtrace", &enabled, sizeof(enabled))) {
10517 pgtrace_enabled = enabled;
10518 }
10519 }
10520
10521 // find a page with given pa - pmap_pgtrace should be locked
10522 inline static pmap_pgtrace_page_t *
10523 pmap_pgtrace_find_page(pmap_paddr_t pa)
10524 {
10525 queue_head_t *q = &(pmap_pgtrace.pages);
10526 pmap_pgtrace_page_t *p;
10527
10528 queue_iterate(q, p, pmap_pgtrace_page_t *, chain) {
10529 if (p->state == UNDEFINED) {
10530 continue;
10531 }
10532 if (p->state == PA_UNDEFINED) {
10533 continue;
10534 }
10535 if (p->pa == pa) {
10536 return p;
10537 }
10538 }
10539
10540 return NULL;
10541 }
10542
10543 // enter clone of given pmap, va page and range - pmap should be locked
10544 static bool
10545 pmap_pgtrace_enter_clone(pmap_t pmap, vm_map_offset_t va_page, vm_map_offset_t start, vm_map_offset_t end)
10546 {
10547 bool ints;
10548 queue_head_t *q = &(pmap_pgtrace.pages);
10549 pmap_paddr_t pa_page;
10550 pt_entry_t *ptep, *cptep;
10551 pmap_pgtrace_page_t *p;
10552 bool found = false;
10553
10554 PMAP_ASSERT_LOCKED(pmap);
10555 assert(va_page == arm_trunc_page(va_page));
10556
10557 PMAP_PGTRACE_LOCK(&ints);
10558
10559 ptep = pmap_pte(pmap, va_page);
10560
10561 // target pte should exist
10562 if (!ptep || !(*ptep & ARM_PTE_TYPE_VALID)) {
10563 PMAP_PGTRACE_UNLOCK(&ints);
10564 return false;
10565 }
10566
10567 queue_head_t *mapq;
10568 queue_head_t *mappool;
10569 pmap_pgtrace_map_t *map = NULL;
10570
10571 pa_page = pte_to_pa(*ptep);
10572
10573 // find if we have a page info defined for this
10574 queue_iterate(q, p, pmap_pgtrace_page_t *, chain) {
10575 mapq = &(p->maps);
10576 mappool = &(p->map_pool);
10577
10578 switch (p->state) {
10579 case PA_UNDEFINED:
10580 queue_iterate(mapq, map, pmap_pgtrace_map_t *, chain) {
10581 if (map->cloned == false && map->pmap == pmap && map->ova == va_page) {
10582 p->pa = pa_page;
10583 map->range.start = start;
10584 map->range.end = end;
10585 found = true;
10586 break;
10587 }
10588 }
10589 break;
10590
10591 case VA_UNDEFINED:
10592 if (p->pa != pa_page) {
10593 break;
10594 }
10595 queue_iterate(mapq, map, pmap_pgtrace_map_t *, chain) {
10596 if (map->cloned == false) {
10597 map->pmap = pmap;
10598 map->ova = va_page;
10599 map->range.start = start;
10600 map->range.end = end;
10601 found = true;
10602 break;
10603 }
10604 }
10605 break;
10606
10607 case DEFINED:
10608 if (p->pa != pa_page) {
10609 break;
10610 }
10611 queue_iterate(mapq, map, pmap_pgtrace_map_t *, chain) {
10612 if (map->cloned == true && map->pmap == pmap && map->ova == va_page) {
10613 kprintf("%s: skip existing mapping at va=%llx\n", __func__, va_page);
10614 break;
10615 } else if (map->cloned == true && map->pmap == kernel_pmap && map->cva[1] == va_page) {
10616 kprintf("%s: skip clone mapping at va=%llx\n", __func__, va_page);
10617 break;
10618 } else if (map->cloned == false && map->pmap == pmap && map->ova == va_page) {
10619 // range should be already defined as well
10620 found = true;
10621 break;
10622 }
10623 }
10624 break;
10625
10626 default:
10627 panic("invalid state p->state=%x\n", p->state);
10628 }
10629
10630 if (found == true) {
10631 break;
10632 }
10633 }
10634
10635 // do not clone if no page info found
10636 if (found == false) {
10637 PMAP_PGTRACE_UNLOCK(&ints);
10638 return false;
10639 }
10640
10641 // copy pre, target and post ptes to clone ptes
10642 for (int i = 0; i < 3; i++) {
10643 ptep = pmap_pte(pmap, va_page + (i - 1) * ARM_PGBYTES);
10644 cptep = pmap_pte(kernel_pmap, map->cva[i]);
10645 assert(cptep != NULL);
10646 if (ptep == NULL) {
10647 PGTRACE_WRITE_PTE(cptep, (pt_entry_t)NULL);
10648 } else {
10649 PGTRACE_WRITE_PTE(cptep, *ptep);
10650 }
10651 PMAP_UPDATE_TLBS(kernel_pmap, map->cva[i], map->cva[i] + ARM_PGBYTES, false);
10652 }
10653
10654 // get ptes for original and clone
10655 ptep = pmap_pte(pmap, va_page);
10656 cptep = pmap_pte(kernel_pmap, map->cva[1]);
10657
10658 // invalidate original pte and mark it as a pgtrace page
10659 PGTRACE_WRITE_PTE(ptep, (*ptep | ARM_PTE_PGTRACE) & ~ARM_PTE_TYPE_VALID);
10660 PMAP_UPDATE_TLBS(pmap, map->ova, map->ova + ARM_PGBYTES, false);
10661
10662 map->cloned = true;
10663 p->state = DEFINED;
10664
10665 kprintf("%s: pa_page=%llx va_page=%llx cva[1]=%llx pmap=%p ptep=%p cptep=%p\n", __func__, pa_page, va_page, map->cva[1], pmap, ptep, cptep);
10666
10667 PMAP_PGTRACE_UNLOCK(&ints);
10668
10669 return true;
10670 }
10671
10672 // This function removes trace bit and validate pte if applicable. Pmap must be locked.
10673 static void
10674 pmap_pgtrace_remove_clone(pmap_t pmap, pmap_paddr_t pa, vm_map_offset_t va)
10675 {
10676 bool ints, found = false;
10677 pmap_pgtrace_page_t *p;
10678 pt_entry_t *ptep;
10679
10680 PMAP_PGTRACE_LOCK(&ints);
10681
10682 // we must have this page info
10683 p = pmap_pgtrace_find_page(pa);
10684 if (p == NULL) {
10685 goto unlock_exit;
10686 }
10687
10688 // find matching map
10689 queue_head_t *mapq = &(p->maps);
10690 queue_head_t *mappool = &(p->map_pool);
10691 pmap_pgtrace_map_t *map;
10692
10693 queue_iterate(mapq, map, pmap_pgtrace_map_t *, chain) {
10694 if (map->pmap == pmap && map->ova == va) {
10695 found = true;
10696 break;
10697 }
10698 }
10699
10700 if (!found) {
10701 goto unlock_exit;
10702 }
10703
10704 if (map->cloned == true) {
10705 // Restore back the pte to original state
10706 ptep = pmap_pte(pmap, map->ova);
10707 assert(ptep);
10708 PGTRACE_WRITE_PTE(ptep, *ptep | ARM_PTE_TYPE_VALID);
10709 PMAP_UPDATE_TLBS(pmap, va, va + ARM_PGBYTES, false);
10710
10711 // revert clone pages
10712 for (int i = 0; i < 3; i++) {
10713 ptep = pmap_pte(kernel_pmap, map->cva[i]);
10714 assert(ptep != NULL);
10715 PGTRACE_WRITE_PTE(ptep, map->cva_spte[i]);
10716 PMAP_UPDATE_TLBS(kernel_pmap, map->cva[i], map->cva[i] + ARM_PGBYTES, false);
10717 }
10718 }
10719
10720 queue_remove(mapq, map, pmap_pgtrace_map_t *, chain);
10721 map->pmap = NULL;
10722 map->ova = (vm_map_offset_t)NULL;
10723 map->cloned = false;
10724 queue_enter_first(mappool, map, pmap_pgtrace_map_t *, chain);
10725
10726 kprintf("%s: p=%p pa=%llx va=%llx\n", __func__, p, pa, va);
10727
10728 unlock_exit:
10729 PMAP_PGTRACE_UNLOCK(&ints);
10730 }
10731
10732 // remove all clones of given pa - pmap must be locked
10733 static void
10734 pmap_pgtrace_remove_all_clone(pmap_paddr_t pa)
10735 {
10736 bool ints;
10737 pmap_pgtrace_page_t *p;
10738 pt_entry_t *ptep;
10739
10740 PMAP_PGTRACE_LOCK(&ints);
10741
10742 // we must have this page info
10743 p = pmap_pgtrace_find_page(pa);
10744 if (p == NULL) {
10745 PMAP_PGTRACE_UNLOCK(&ints);
10746 return;
10747 }
10748
10749 queue_head_t *mapq = &(p->maps);
10750 queue_head_t *mappool = &(p->map_pool);
10751 queue_head_t *mapwaste = &(p->map_waste);
10752 pmap_pgtrace_map_t *map;
10753
10754 // move maps to waste
10755 while (!queue_empty(mapq)) {
10756 queue_remove_first(mapq, map, pmap_pgtrace_map_t *, chain);
10757 queue_enter_first(mapwaste, map, pmap_pgtrace_map_t*, chain);
10758 }
10759
10760 PMAP_PGTRACE_UNLOCK(&ints);
10761
10762 // sanitize maps in waste
10763 queue_iterate(mapwaste, map, pmap_pgtrace_map_t *, chain) {
10764 if (map->cloned == true) {
10765 PMAP_LOCK(map->pmap);
10766
10767 // restore back original pte
10768 ptep = pmap_pte(map->pmap, map->ova);
10769 assert(ptep);
10770 PGTRACE_WRITE_PTE(ptep, *ptep | ARM_PTE_TYPE_VALID);
10771 PMAP_UPDATE_TLBS(map->pmap, map->ova, map->ova + ARM_PGBYTES, false);
10772
10773 // revert clone ptes
10774 for (int i = 0; i < 3; i++) {
10775 ptep = pmap_pte(kernel_pmap, map->cva[i]);
10776 assert(ptep != NULL);
10777 PGTRACE_WRITE_PTE(ptep, map->cva_spte[i]);
10778 PMAP_UPDATE_TLBS(kernel_pmap, map->cva[i], map->cva[i] + ARM_PGBYTES, false);
10779 }
10780
10781 PMAP_UNLOCK(map->pmap);
10782 }
10783
10784 map->pmap = NULL;
10785 map->ova = (vm_map_offset_t)NULL;
10786 map->cloned = false;
10787 }
10788
10789 PMAP_PGTRACE_LOCK(&ints);
10790
10791 // recycle maps back to map_pool
10792 while (!queue_empty(mapwaste)) {
10793 queue_remove_first(mapwaste, map, pmap_pgtrace_map_t *, chain);
10794 queue_enter_first(mappool, map, pmap_pgtrace_map_t*, chain);
10795 }
10796
10797 PMAP_PGTRACE_UNLOCK(&ints);
10798 }
10799
10800 inline static void
10801 pmap_pgtrace_get_search_space(pmap_t pmap, vm_map_offset_t *startp, vm_map_offset_t *endp)
10802 {
10803 uint64_t tsz;
10804 vm_map_offset_t end;
10805
10806 if (pmap == kernel_pmap) {
10807 tsz = (get_tcr() >> TCR_T1SZ_SHIFT) & TCR_TSZ_MASK;
10808 *startp = MAX(VM_MIN_KERNEL_ADDRESS, (UINT64_MAX >> (64 - tsz)) << (64 - tsz));
10809 *endp = VM_MAX_KERNEL_ADDRESS;
10810 } else {
10811 tsz = (get_tcr() >> TCR_T0SZ_SHIFT) & TCR_TSZ_MASK;
10812 if (tsz == 64) {
10813 end = 0;
10814 } else {
10815 end = ((uint64_t)1 << (64 - tsz)) - 1;
10816 }
10817
10818 *startp = 0;
10819 *endp = end;
10820 }
10821
10822 assert(*endp > *startp);
10823
10824 return;
10825 }
10826
10827 // has pa mapped in given pmap? then clone it
10828 static uint64_t
10829 pmap_pgtrace_clone_from_pa(pmap_t pmap, pmap_paddr_t pa, vm_map_offset_t start_offset, vm_map_offset_t end_offset)
10830 {
10831 uint64_t ret = 0;
10832 vm_map_offset_t min, max;
10833 vm_map_offset_t cur_page, end_page;
10834 pt_entry_t *ptep;
10835 tt_entry_t *ttep;
10836 tt_entry_t tte;
10837 __unused const pt_attr_t * const pt_attr = pmap_get_pt_attr(pmap);
10838
10839 pmap_pgtrace_get_search_space(pmap, &min, &max);
10840
10841 cur_page = arm_trunc_page(min);
10842 end_page = arm_trunc_page(max);
10843 while (cur_page <= end_page) {
10844 vm_map_offset_t add = 0;
10845
10846 PMAP_LOCK(pmap);
10847
10848 // skip uninterested space
10849 if (pmap == kernel_pmap &&
10850 ((vm_kernel_base <= cur_page && cur_page < vm_kernel_top) ||
10851 (vm_kext_base <= cur_page && cur_page < vm_kext_top))) {
10852 add = ARM_PGBYTES;
10853 goto unlock_continue;
10854 }
10855
10856 // check whether we can skip l1
10857 ttep = pmap_tt1e(pmap, cur_page);
10858 assert(ttep);
10859 tte = *ttep;
10860 if ((tte & (ARM_TTE_TYPE_MASK | ARM_TTE_VALID)) != (ARM_TTE_TYPE_TABLE | ARM_TTE_VALID)) {
10861 add = ARM_TT_L1_SIZE;
10862 goto unlock_continue;
10863 }
10864
10865 // how about l2
10866 tte = ((tt_entry_t*) phystokv(tte & ARM_TTE_TABLE_MASK))[tt2_index(pmap, pt_attr, cur_page)];
10867
10868 if ((tte & (ARM_TTE_TYPE_MASK | ARM_TTE_VALID)) != (ARM_TTE_TYPE_TABLE | ARM_TTE_VALID)) {
10869 add = ARM_TT_L2_SIZE;
10870 goto unlock_continue;
10871 }
10872
10873 // ptep finally
10874 ptep = &(((pt_entry_t*) phystokv(tte & ARM_TTE_TABLE_MASK))[tt3_index(pmap, pt_attr, cur_page)]);
10875 if (ptep == PT_ENTRY_NULL) {
10876 add = ARM_TT_L3_SIZE;
10877 goto unlock_continue;
10878 }
10879
10880 if (arm_trunc_page(pa) == pte_to_pa(*ptep)) {
10881 if (pmap_pgtrace_enter_clone(pmap, cur_page, start_offset, end_offset) == true) {
10882 ret++;
10883 }
10884 }
10885
10886 add = ARM_PGBYTES;
10887
10888 unlock_continue:
10889 PMAP_UNLOCK(pmap);
10890
10891 //overflow
10892 if (cur_page + add < cur_page) {
10893 break;
10894 }
10895
10896 cur_page += add;
10897 }
10898
10899
10900 return ret;
10901 }
10902
10903 // search pv table and clone vas of given pa
10904 static uint64_t
10905 pmap_pgtrace_clone_from_pvtable(pmap_paddr_t pa, vm_map_offset_t start_offset, vm_map_offset_t end_offset)
10906 {
10907 uint64_t ret = 0;
10908 unsigned long pai;
10909 pv_entry_t **pvh;
10910 pt_entry_t *ptep;
10911 pmap_t pmap;
10912
10913 typedef struct {
10914 queue_chain_t chain;
10915 pmap_t pmap;
10916 vm_map_offset_t va;
10917 } pmap_va_t;
10918
10919 queue_head_t pmapvaq;
10920 pmap_va_t *pmapva;
10921
10922 queue_init(&pmapvaq);
10923
10924 pai = pa_index(pa);
10925 LOCK_PVH(pai);
10926 pvh = pai_to_pvh(pai);
10927
10928 // collect pmap/va pair from pvh
10929 if (pvh_test_type(pvh, PVH_TYPE_PTEP)) {
10930 ptep = pvh_ptep(pvh);
10931 pmap = ptep_get_pmap(ptep);
10932
10933 pmapva = (pmap_va_t *)kalloc(sizeof(pmap_va_t));
10934 pmapva->pmap = pmap;
10935 pmapva->va = ptep_get_va(ptep);
10936
10937 queue_enter_first(&pmapvaq, pmapva, pmap_va_t *, chain);
10938 } else if (pvh_test_type(pvh, PVH_TYPE_PVEP)) {
10939 pv_entry_t *pvep;
10940
10941 pvep = pvh_list(pvh);
10942 while (pvep) {
10943 ptep = pve_get_ptep(pvep);
10944 pmap = ptep_get_pmap(ptep);
10945
10946 pmapva = (pmap_va_t *)kalloc(sizeof(pmap_va_t));
10947 pmapva->pmap = pmap;
10948 pmapva->va = ptep_get_va(ptep);
10949
10950 queue_enter_first(&pmapvaq, pmapva, pmap_va_t *, chain);
10951
10952 pvep = PVE_NEXT_PTR(pve_next(pvep));
10953 }
10954 }
10955
10956 UNLOCK_PVH(pai);
10957
10958 // clone them while making sure mapping still exists
10959 queue_iterate(&pmapvaq, pmapva, pmap_va_t *, chain) {
10960 PMAP_LOCK(pmapva->pmap);
10961 ptep = pmap_pte(pmapva->pmap, pmapva->va);
10962 if (pte_to_pa(*ptep) == pa) {
10963 if (pmap_pgtrace_enter_clone(pmapva->pmap, pmapva->va, start_offset, end_offset) == true) {
10964 ret++;
10965 }
10966 }
10967 PMAP_UNLOCK(pmapva->pmap);
10968
10969 kfree(pmapva, sizeof(pmap_va_t));
10970 }
10971
10972 return ret;
10973 }
10974
10975 // allocate a page info
10976 static pmap_pgtrace_page_t *
10977 pmap_pgtrace_alloc_page(void)
10978 {
10979 pmap_pgtrace_page_t *p;
10980 queue_head_t *mapq;
10981 queue_head_t *mappool;
10982 queue_head_t *mapwaste;
10983 pmap_pgtrace_map_t *map;
10984
10985 p = kalloc(sizeof(pmap_pgtrace_page_t));
10986 assert(p);
10987
10988 p->state = UNDEFINED;
10989
10990 mapq = &(p->maps);
10991 mappool = &(p->map_pool);
10992 mapwaste = &(p->map_waste);
10993 queue_init(mapq);
10994 queue_init(mappool);
10995 queue_init(mapwaste);
10996
10997 for (int i = 0; i < PGTRACE_MAX_MAP; i++) {
10998 vm_map_offset_t newcva;
10999 pt_entry_t *cptep;
11000 kern_return_t kr;
11001 vm_map_entry_t entry;
11002
11003 // get a clone va
11004 vm_object_reference(kernel_object);
11005 kr = vm_map_find_space(kernel_map, &newcva, vm_map_round_page(3 * ARM_PGBYTES, PAGE_MASK), 0, 0, VM_MAP_KERNEL_FLAGS_NONE, VM_KERN_MEMORY_DIAG, &entry);
11006 if (kr != KERN_SUCCESS) {
11007 panic("%s VM couldn't find any space kr=%d\n", __func__, kr);
11008 }
11009 VME_OBJECT_SET(entry, kernel_object);
11010 VME_OFFSET_SET(entry, newcva);
11011 vm_map_unlock(kernel_map);
11012
11013 // fill default clone page info and add to pool
11014 map = kalloc(sizeof(pmap_pgtrace_map_t));
11015 for (int j = 0; j < 3; j++) {
11016 vm_map_offset_t addr = newcva + j * ARM_PGBYTES;
11017
11018 // pre-expand pmap while preemption enabled
11019 kr = pmap_expand(kernel_pmap, addr, 0, PMAP_TT_MAX_LEVEL);
11020 if (kr != KERN_SUCCESS) {
11021 panic("%s: pmap_expand(kernel_pmap, addr=%llx) returns kr=%d\n", __func__, addr, kr);
11022 }
11023
11024 cptep = pmap_pte(kernel_pmap, addr);
11025 assert(cptep != NULL);
11026
11027 map->cva[j] = addr;
11028 map->cva_spte[j] = *cptep;
11029 }
11030 map->range.start = map->range.end = 0;
11031 map->cloned = false;
11032 queue_enter_first(mappool, map, pmap_pgtrace_map_t *, chain);
11033 }
11034
11035 return p;
11036 }
11037
11038 // free a page info
11039 static void
11040 pmap_pgtrace_free_page(pmap_pgtrace_page_t *p)
11041 {
11042 queue_head_t *mapq;
11043 queue_head_t *mappool;
11044 queue_head_t *mapwaste;
11045 pmap_pgtrace_map_t *map;
11046
11047 assert(p);
11048
11049 mapq = &(p->maps);
11050 mappool = &(p->map_pool);
11051 mapwaste = &(p->map_waste);
11052
11053 while (!queue_empty(mapq)) {
11054 queue_remove_first(mapq, map, pmap_pgtrace_map_t *, chain);
11055 kfree(map, sizeof(pmap_pgtrace_map_t));
11056 }
11057
11058 while (!queue_empty(mappool)) {
11059 queue_remove_first(mappool, map, pmap_pgtrace_map_t *, chain);
11060 kfree(map, sizeof(pmap_pgtrace_map_t));
11061 }
11062
11063 while (!queue_empty(mapwaste)) {
11064 queue_remove_first(mapwaste, map, pmap_pgtrace_map_t *, chain);
11065 kfree(map, sizeof(pmap_pgtrace_map_t));
11066 }
11067
11068 kfree(p, sizeof(pmap_pgtrace_page_t));
11069 }
11070
11071 // construct page infos with the given address range
11072 int
11073 pmap_pgtrace_add_page(pmap_t pmap, vm_map_offset_t start, vm_map_offset_t end)
11074 {
11075 int ret = 0;
11076 pt_entry_t *ptep;
11077 queue_head_t *q = &(pmap_pgtrace.pages);
11078 bool ints;
11079 vm_map_offset_t cur_page, end_page;
11080
11081 if (start > end) {
11082 kprintf("%s: invalid start=%llx > end=%llx\n", __func__, start, end);
11083 return -1;
11084 }
11085
11086 PROF_START
11087
11088 // add each page in given range
11089 cur_page = arm_trunc_page(start);
11090 end_page = arm_trunc_page(end);
11091 while (cur_page <= end_page) {
11092 pmap_paddr_t pa_page = 0;
11093 uint64_t num_cloned = 0;
11094 pmap_pgtrace_page_t *p = NULL, *newp;
11095 bool free_newp = true;
11096 pmap_pgtrace_page_state_t state;
11097
11098 // do all allocations outside of spinlocks
11099 newp = pmap_pgtrace_alloc_page();
11100
11101 // keep lock orders in pmap, kernel_pmap and pgtrace lock
11102 if (pmap != NULL) {
11103 PMAP_LOCK(pmap);
11104 }
11105 if (pmap != kernel_pmap) {
11106 PMAP_LOCK(kernel_pmap);
11107 }
11108
11109 // addresses are physical if pmap is null
11110 if (pmap == NULL) {
11111 ptep = NULL;
11112 pa_page = cur_page;
11113 state = VA_UNDEFINED;
11114 } else {
11115 ptep = pmap_pte(pmap, cur_page);
11116 if (ptep != NULL) {
11117 pa_page = pte_to_pa(*ptep);
11118 state = DEFINED;
11119 } else {
11120 state = PA_UNDEFINED;
11121 }
11122 }
11123
11124 // search if we have a page info already
11125 PMAP_PGTRACE_LOCK(&ints);
11126 if (state != PA_UNDEFINED) {
11127 p = pmap_pgtrace_find_page(pa_page);
11128 }
11129
11130 // add pre-allocated page info if nothing found
11131 if (p == NULL) {
11132 queue_enter_first(q, newp, pmap_pgtrace_page_t *, chain);
11133 p = newp;
11134 free_newp = false;
11135 }
11136
11137 // now p points what we want
11138 p->state = state;
11139
11140 queue_head_t *mapq = &(p->maps);
11141 queue_head_t *mappool = &(p->map_pool);
11142 pmap_pgtrace_map_t *map;
11143 vm_map_offset_t start_offset, end_offset;
11144
11145 // calculate trace offsets in the page
11146 if (cur_page > start) {
11147 start_offset = 0;
11148 } else {
11149 start_offset = start - cur_page;
11150 }
11151 if (cur_page == end_page) {
11152 end_offset = end - end_page;
11153 } else {
11154 end_offset = ARM_PGBYTES - 1;
11155 }
11156
11157 kprintf("%s: pmap=%p cur_page=%llx ptep=%p state=%d start_offset=%llx end_offset=%llx\n", __func__, pmap, cur_page, ptep, state, start_offset, end_offset);
11158
11159 // fill map info
11160 assert(!queue_empty(mappool));
11161 queue_remove_first(mappool, map, pmap_pgtrace_map_t *, chain);
11162 if (p->state == PA_UNDEFINED) {
11163 map->pmap = pmap;
11164 map->ova = cur_page;
11165 map->range.start = start_offset;
11166 map->range.end = end_offset;
11167 } else if (p->state == VA_UNDEFINED) {
11168 p->pa = pa_page;
11169 map->range.start = start_offset;
11170 map->range.end = end_offset;
11171 } else if (p->state == DEFINED) {
11172 p->pa = pa_page;
11173 map->pmap = pmap;
11174 map->ova = cur_page;
11175 map->range.start = start_offset;
11176 map->range.end = end_offset;
11177 } else {
11178 panic("invalid p->state=%d\n", p->state);
11179 }
11180
11181 // not cloned yet
11182 map->cloned = false;
11183 queue_enter(mapq, map, pmap_pgtrace_map_t *, chain);
11184
11185 // unlock locks
11186 PMAP_PGTRACE_UNLOCK(&ints);
11187 if (pmap != kernel_pmap) {
11188 PMAP_UNLOCK(kernel_pmap);
11189 }
11190 if (pmap != NULL) {
11191 PMAP_UNLOCK(pmap);
11192 }
11193
11194 // now clone it
11195 if (pa_valid(pa_page)) {
11196 num_cloned = pmap_pgtrace_clone_from_pvtable(pa_page, start_offset, end_offset);
11197 }
11198 if (pmap == NULL) {
11199 num_cloned += pmap_pgtrace_clone_from_pa(kernel_pmap, pa_page, start_offset, end_offset);
11200 } else {
11201 num_cloned += pmap_pgtrace_clone_from_pa(pmap, pa_page, start_offset, end_offset);
11202 }
11203
11204 // free pre-allocations if we didn't add it to the q
11205 if (free_newp) {
11206 pmap_pgtrace_free_page(newp);
11207 }
11208
11209 if (num_cloned == 0) {
11210 kprintf("%s: no mapping found for pa_page=%llx but will be added when a page entered\n", __func__, pa_page);
11211 }
11212
11213 ret += num_cloned;
11214
11215 // overflow
11216 if (cur_page + ARM_PGBYTES < cur_page) {
11217 break;
11218 } else {
11219 cur_page += ARM_PGBYTES;
11220 }
11221 }
11222
11223 PROF_END
11224
11225 return ret;
11226 }
11227
11228 // delete page infos for given address range
11229 int
11230 pmap_pgtrace_delete_page(pmap_t pmap, vm_map_offset_t start, vm_map_offset_t end)
11231 {
11232 int ret = 0;
11233 bool ints;
11234 queue_head_t *q = &(pmap_pgtrace.pages);
11235 pmap_pgtrace_page_t *p;
11236 vm_map_offset_t cur_page, end_page;
11237
11238 kprintf("%s start=%llx end=%llx\n", __func__, start, end);
11239
11240 PROF_START
11241
11242 pt_entry_t *ptep;
11243 pmap_paddr_t pa_page;
11244
11245 // remove page info from start to end
11246 cur_page = arm_trunc_page(start);
11247 end_page = arm_trunc_page(end);
11248 while (cur_page <= end_page) {
11249 p = NULL;
11250
11251 if (pmap == NULL) {
11252 pa_page = cur_page;
11253 } else {
11254 PMAP_LOCK(pmap);
11255 ptep = pmap_pte(pmap, cur_page);
11256 if (ptep == NULL) {
11257 PMAP_UNLOCK(pmap);
11258 goto cont;
11259 }
11260 pa_page = pte_to_pa(*ptep);
11261 PMAP_UNLOCK(pmap);
11262 }
11263
11264 // remove all clones and validate
11265 pmap_pgtrace_remove_all_clone(pa_page);
11266
11267 // find page info and delete
11268 PMAP_PGTRACE_LOCK(&ints);
11269 p = pmap_pgtrace_find_page(pa_page);
11270 if (p != NULL) {
11271 queue_remove(q, p, pmap_pgtrace_page_t *, chain);
11272 ret++;
11273 }
11274 PMAP_PGTRACE_UNLOCK(&ints);
11275
11276 // free outside of locks
11277 if (p != NULL) {
11278 pmap_pgtrace_free_page(p);
11279 }
11280
11281 cont:
11282 // overflow
11283 if (cur_page + ARM_PGBYTES < cur_page) {
11284 break;
11285 } else {
11286 cur_page += ARM_PGBYTES;
11287 }
11288 }
11289
11290 PROF_END
11291
11292 return ret;
11293 }
11294
11295 kern_return_t
11296 pmap_pgtrace_fault(pmap_t pmap, vm_map_offset_t va, arm_saved_state_t *ss)
11297 {
11298 pt_entry_t *ptep;
11299 pgtrace_run_result_t res;
11300 pmap_pgtrace_page_t *p;
11301 bool ints, found = false;
11302 pmap_paddr_t pa;
11303
11304 // Quick check if we are interested
11305 ptep = pmap_pte(pmap, va);
11306 if (!ptep || !(*ptep & ARM_PTE_PGTRACE)) {
11307 return KERN_FAILURE;
11308 }
11309
11310 PMAP_PGTRACE_LOCK(&ints);
11311
11312 // Check again since access is serialized
11313 ptep = pmap_pte(pmap, va);
11314 if (!ptep || !(*ptep & ARM_PTE_PGTRACE)) {
11315 PMAP_PGTRACE_UNLOCK(&ints);
11316 return KERN_FAILURE;
11317 } else if ((*ptep & ARM_PTE_TYPE_VALID) == ARM_PTE_TYPE_VALID) {
11318 // Somehow this cpu's tlb has not updated
11319 kprintf("%s Somehow this cpu's tlb has not updated?\n", __func__);
11320 PMAP_UPDATE_TLBS(pmap, va, va + ARM_PGBYTES, false);
11321
11322 PMAP_PGTRACE_UNLOCK(&ints);
11323 return KERN_SUCCESS;
11324 }
11325
11326 // Find if this pa is what we are tracing
11327 pa = pte_to_pa(*ptep);
11328
11329 p = pmap_pgtrace_find_page(arm_trunc_page(pa));
11330 if (p == NULL) {
11331 panic("%s Can't find va=%llx pa=%llx from tracing pages\n", __func__, va, pa);
11332 }
11333
11334 // find if pmap and va are also matching
11335 queue_head_t *mapq = &(p->maps);
11336 queue_head_t *mapwaste = &(p->map_waste);
11337 pmap_pgtrace_map_t *map;
11338
11339 queue_iterate(mapq, map, pmap_pgtrace_map_t *, chain) {
11340 if (map->pmap == pmap && map->ova == arm_trunc_page(va)) {
11341 found = true;
11342 break;
11343 }
11344 }
11345
11346 // if not found, search map waste as they are still valid
11347 if (!found) {
11348 queue_iterate(mapwaste, map, pmap_pgtrace_map_t *, chain) {
11349 if (map->pmap == pmap && map->ova == arm_trunc_page(va)) {
11350 found = true;
11351 break;
11352 }
11353 }
11354 }
11355
11356 if (!found) {
11357 panic("%s Can't find va=%llx pa=%llx from tracing pages\n", __func__, va, pa);
11358 }
11359
11360 // Decode and run it on the clone map
11361 bzero(&res, sizeof(res));
11362 pgtrace_decode_and_run(*(uint32_t *)get_saved_state_pc(ss), // instruction
11363 va, map->cva, // fault va and clone page vas
11364 ss, &res);
11365
11366 // write a log if in range
11367 vm_map_offset_t offset = va - map->ova;
11368 if (map->range.start <= offset && offset <= map->range.end) {
11369 pgtrace_write_log(res);
11370 }
11371
11372 PMAP_PGTRACE_UNLOCK(&ints);
11373
11374 // Return to next instruction
11375 add_saved_state_pc(ss, sizeof(uint32_t));
11376
11377 return KERN_SUCCESS;
11378 }
11379 #endif
11380
11381 boolean_t
11382 pmap_enforces_execute_only(
11383 #if (__ARM_VMSA__ == 7)
11384 __unused
11385 #endif
11386 pmap_t pmap)
11387 {
11388 #if (__ARM_VMSA__ > 7)
11389 return pmap != kernel_pmap;
11390 #else
11391 return FALSE;
11392 #endif
11393 }
11394
11395 MARK_AS_PMAP_TEXT void
11396 pmap_set_jit_entitled_internal(
11397 __unused pmap_t pmap)
11398 {
11399 return;
11400 }
11401
11402 void
11403 pmap_set_jit_entitled(
11404 pmap_t pmap)
11405 {
11406 pmap_set_jit_entitled_internal(pmap);
11407 }
11408
11409 MARK_AS_PMAP_TEXT static kern_return_t
11410 pmap_query_page_info_internal(
11411 pmap_t pmap,
11412 vm_map_offset_t va,
11413 int *disp_p)
11414 {
11415 pmap_paddr_t pa;
11416 int disp;
11417 int pai;
11418 pt_entry_t *pte;
11419 pv_entry_t **pv_h, *pve_p;
11420
11421 if (pmap == PMAP_NULL || pmap == kernel_pmap) {
11422 pmap_pin_kernel_pages((vm_offset_t)disp_p, sizeof(*disp_p));
11423 *disp_p = 0;
11424 pmap_unpin_kernel_pages((vm_offset_t)disp_p, sizeof(*disp_p));
11425 return KERN_INVALID_ARGUMENT;
11426 }
11427
11428 disp = 0;
11429
11430 VALIDATE_PMAP(pmap);
11431 PMAP_LOCK(pmap);
11432
11433 pte = pmap_pte(pmap, va);
11434 if (pte == PT_ENTRY_NULL) {
11435 goto done;
11436 }
11437
11438 pa = pte_to_pa(*pte);
11439 if (pa == 0) {
11440 if (ARM_PTE_IS_COMPRESSED(*pte, pte)) {
11441 disp |= PMAP_QUERY_PAGE_COMPRESSED;
11442 if (*pte & ARM_PTE_COMPRESSED_ALT) {
11443 disp |= PMAP_QUERY_PAGE_COMPRESSED_ALTACCT;
11444 }
11445 }
11446 } else {
11447 disp |= PMAP_QUERY_PAGE_PRESENT;
11448 pai = (int) pa_index(pa);
11449 if (!pa_valid(pa)) {
11450 goto done;
11451 }
11452 LOCK_PVH(pai);
11453 pv_h = pai_to_pvh(pai);
11454 pve_p = PV_ENTRY_NULL;
11455 if (pvh_test_type(pv_h, PVH_TYPE_PVEP)) {
11456 pve_p = pvh_list(pv_h);
11457 while (pve_p != PV_ENTRY_NULL &&
11458 pve_get_ptep(pve_p) != pte) {
11459 pve_p = PVE_NEXT_PTR(pve_next(pve_p));
11460 }
11461 }
11462 if (IS_ALTACCT_PAGE(pai, pve_p)) {
11463 disp |= PMAP_QUERY_PAGE_ALTACCT;
11464 } else if (IS_REUSABLE_PAGE(pai)) {
11465 disp |= PMAP_QUERY_PAGE_REUSABLE;
11466 } else if (IS_INTERNAL_PAGE(pai)) {
11467 disp |= PMAP_QUERY_PAGE_INTERNAL;
11468 }
11469 UNLOCK_PVH(pai);
11470 }
11471
11472 done:
11473 PMAP_UNLOCK(pmap);
11474 pmap_pin_kernel_pages((vm_offset_t)disp_p, sizeof(*disp_p));
11475 *disp_p = disp;
11476 pmap_unpin_kernel_pages((vm_offset_t)disp_p, sizeof(*disp_p));
11477 return KERN_SUCCESS;
11478 }
11479
11480 kern_return_t
11481 pmap_query_page_info(
11482 pmap_t pmap,
11483 vm_map_offset_t va,
11484 int *disp_p)
11485 {
11486 return pmap_query_page_info_internal(pmap, va, disp_p);
11487 }
11488
11489 MARK_AS_PMAP_TEXT kern_return_t
11490 pmap_return_internal(__unused boolean_t do_panic, __unused boolean_t do_recurse)
11491 {
11492
11493 return KERN_SUCCESS;
11494 }
11495
11496 kern_return_t
11497 pmap_return(boolean_t do_panic, boolean_t do_recurse)
11498 {
11499 return pmap_return_internal(do_panic, do_recurse);
11500 }
11501
11502
11503
11504
11505 MARK_AS_PMAP_TEXT static void
11506 pmap_footprint_suspend_internal(
11507 vm_map_t map,
11508 boolean_t suspend)
11509 {
11510 #if DEVELOPMENT || DEBUG
11511 if (suspend) {
11512 current_thread()->pmap_footprint_suspended = TRUE;
11513 map->pmap->footprint_was_suspended = TRUE;
11514 } else {
11515 current_thread()->pmap_footprint_suspended = FALSE;
11516 }
11517 #else /* DEVELOPMENT || DEBUG */
11518 (void) map;
11519 (void) suspend;
11520 #endif /* DEVELOPMENT || DEBUG */
11521 }
11522
11523 void
11524 pmap_footprint_suspend(
11525 vm_map_t map,
11526 boolean_t suspend)
11527 {
11528 pmap_footprint_suspend_internal(map, suspend);
11529 }
11530
11531 #if defined(__arm64__) && (DEVELOPMENT || DEBUG)
11532
11533 struct page_table_dump_header {
11534 uint64_t pa;
11535 uint64_t num_entries;
11536 uint64_t start_va;
11537 uint64_t end_va;
11538 };
11539
11540 static size_t
11541 pmap_dump_page_tables_recurse(pmap_t pmap,
11542 const tt_entry_t *ttp,
11543 unsigned int cur_level,
11544 uint64_t start_va,
11545 void *bufp,
11546 void *buf_end)
11547 {
11548 size_t bytes_used = 0;
11549 uint64_t num_entries = ARM_PGBYTES / sizeof(*ttp);
11550 const pt_attr_t * const pt_attr = pmap_get_pt_attr(pmap);
11551
11552 uint64_t size = pt_attr->pta_level_info[cur_level].size;
11553 uint64_t valid_mask = pt_attr->pta_level_info[cur_level].valid_mask;
11554 uint64_t type_mask = pt_attr->pta_level_info[cur_level].type_mask;
11555 uint64_t type_block = pt_attr->pta_level_info[cur_level].type_block;
11556
11557 if (cur_level == arm64_root_pgtable_level) {
11558 num_entries = arm64_root_pgtable_num_ttes;
11559 }
11560
11561 uint64_t tt_size = num_entries * sizeof(tt_entry_t);
11562 const tt_entry_t *tt_end = &ttp[num_entries];
11563
11564 if (((vm_offset_t)buf_end - (vm_offset_t)bufp) < (tt_size + sizeof(struct page_table_dump_header))) {
11565 return 0;
11566 }
11567
11568 struct page_table_dump_header *header = (struct page_table_dump_header*)bufp;
11569 header->pa = ml_static_vtop((vm_offset_t)ttp);
11570 header->num_entries = num_entries;
11571 header->start_va = start_va;
11572 header->end_va = start_va + (num_entries * size);
11573
11574 bcopy(ttp, (uint8_t*)bufp + sizeof(*header), tt_size);
11575 bytes_used += (sizeof(*header) + tt_size);
11576 uint64_t current_va = start_va;
11577
11578 for (const tt_entry_t *ttep = ttp; ttep < tt_end; ttep++, current_va += size) {
11579 tt_entry_t tte = *ttep;
11580
11581 if (!(tte & valid_mask)) {
11582 continue;
11583 }
11584
11585 if ((tte & type_mask) == type_block) {
11586 continue;
11587 } else {
11588 if (cur_level >= PMAP_TT_MAX_LEVEL) {
11589 panic("%s: corrupt entry %#llx at %p, "
11590 "ttp=%p, cur_level=%u, bufp=%p, buf_end=%p",
11591 __FUNCTION__, tte, ttep,
11592 ttp, cur_level, bufp, buf_end);
11593 }
11594
11595 const tt_entry_t *next_tt = (const tt_entry_t*)phystokv(tte & ARM_TTE_TABLE_MASK);
11596
11597 size_t recurse_result = pmap_dump_page_tables_recurse(pmap, next_tt, cur_level + 1, current_va, (uint8_t*)bufp + bytes_used, buf_end);
11598
11599 if (recurse_result == 0) {
11600 return 0;
11601 }
11602
11603 bytes_used += recurse_result;
11604 }
11605 }
11606
11607 return bytes_used;
11608 }
11609
11610 size_t
11611 pmap_dump_page_tables(pmap_t pmap, void *bufp, void *buf_end)
11612 {
11613 if (not_in_kdp) {
11614 panic("pmap_dump_page_tables must only be called from kernel debugger context");
11615 }
11616 return pmap_dump_page_tables_recurse(pmap, pmap->tte, arm64_root_pgtable_level, pmap->min, bufp, buf_end);
11617 }
11618
11619 #else /* defined(__arm64__) && (DEVELOPMENT || DEBUG) */
11620
11621 size_t
11622 pmap_dump_page_tables(pmap_t pmap __unused, void *bufp __unused, void *buf_end __unused)
11623 {
11624 return (size_t)-1;
11625 }
11626
11627 #endif /* !defined(__arm64__) */