]> git.saurik.com Git - apple/xnu.git/blob - osfmk/arm/pmap.c
93921c0eb467ba6b577c7fdde4ef506686aa649a
[apple/xnu.git] / osfmk / arm / pmap.c
1 /*
2 * Copyright (c) 2011-2019 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28 #include <string.h>
29 #include <mach_assert.h>
30 #include <mach_ldebug.h>
31
32 #include <mach/shared_region.h>
33 #include <mach/vm_param.h>
34 #include <mach/vm_prot.h>
35 #include <mach/vm_map.h>
36 #include <mach/machine/vm_param.h>
37 #include <mach/machine/vm_types.h>
38
39 #include <mach/boolean.h>
40 #include <kern/bits.h>
41 #include <kern/thread.h>
42 #include <kern/sched.h>
43 #include <kern/zalloc.h>
44 #include <kern/kalloc.h>
45 #include <kern/ledger.h>
46 #include <kern/spl.h>
47 #include <kern/trustcache.h>
48
49 #include <os/overflow.h>
50
51 #include <vm/pmap.h>
52 #include <vm/vm_map.h>
53 #include <vm/vm_kern.h>
54 #include <vm/vm_protos.h>
55 #include <vm/vm_object.h>
56 #include <vm/vm_page.h>
57 #include <vm/vm_pageout.h>
58 #include <vm/cpm.h>
59
60 #include <libkern/img4/interface.h>
61 #include <libkern/section_keywords.h>
62
63 #include <machine/atomic.h>
64 #include <machine/thread.h>
65 #include <machine/lowglobals.h>
66
67 #include <arm/caches_internal.h>
68 #include <arm/cpu_data.h>
69 #include <arm/cpu_data_internal.h>
70 #include <arm/cpu_capabilities.h>
71 #include <arm/cpu_number.h>
72 #include <arm/machine_cpu.h>
73 #include <arm/misc_protos.h>
74 #include <arm/trap.h>
75
76 #if (__ARM_VMSA__ > 7)
77 #include <arm64/proc_reg.h>
78 #include <pexpert/arm64/boot.h>
79 #if CONFIG_PGTRACE
80 #include <stdint.h>
81 #include <arm64/pgtrace.h>
82 #if CONFIG_PGTRACE_NONKEXT
83 #include <arm64/pgtrace_decoder.h>
84 #endif // CONFIG_PGTRACE_NONKEXT
85 #endif
86 #endif
87
88 #include <pexpert/device_tree.h>
89
90 #include <san/kasan.h>
91 #include <sys/cdefs.h>
92
93 #if defined(HAS_APPLE_PAC)
94 #include <ptrauth.h>
95 #endif
96
97 #define PMAP_TT_L0_LEVEL 0x0
98 #define PMAP_TT_L1_LEVEL 0x1
99 #define PMAP_TT_L2_LEVEL 0x2
100 #define PMAP_TT_L3_LEVEL 0x3
101 #if (__ARM_VMSA__ == 7)
102 #define PMAP_TT_MAX_LEVEL PMAP_TT_L2_LEVEL
103 #else
104 #define PMAP_TT_MAX_LEVEL PMAP_TT_L3_LEVEL
105 #endif
106 #define PMAP_TT_LEAF_LEVEL PMAP_TT_MAX_LEVEL
107 #define PMAP_TT_TWIG_LEVEL (PMAP_TT_MAX_LEVEL - 1)
108
109 static bool alloc_asid(pmap_t pmap);
110 static void free_asid(pmap_t pmap);
111 static void flush_mmu_tlb_region_asid_async(vm_offset_t va, unsigned length, pmap_t pmap);
112 static void flush_mmu_tlb_tte_asid_async(vm_offset_t va, pmap_t pmap);
113 static void flush_mmu_tlb_full_asid_async(pmap_t pmap);
114 static pt_entry_t wimg_to_pte(unsigned int wimg);
115
116 struct page_table_ops {
117 bool (*alloc_id)(pmap_t pmap);
118 void (*free_id)(pmap_t pmap);
119 void (*flush_tlb_region_async)(vm_offset_t va, unsigned length, pmap_t pmap);
120 void (*flush_tlb_tte_async)(vm_offset_t va, pmap_t pmap);
121 void (*flush_tlb_async)(pmap_t pmap);
122 pt_entry_t (*wimg_to_pte)(unsigned int wimg);
123 };
124
125 static const struct page_table_ops native_pt_ops =
126 {
127 .alloc_id = alloc_asid,
128 .free_id = free_asid,
129 .flush_tlb_region_async = flush_mmu_tlb_region_asid_async,
130 .flush_tlb_tte_async = flush_mmu_tlb_tte_asid_async,
131 .flush_tlb_async = flush_mmu_tlb_full_asid_async,
132 .wimg_to_pte = wimg_to_pte,
133 };
134
135 #if (__ARM_VMSA__ > 7)
136 const struct page_table_level_info pmap_table_level_info_16k[] =
137 {
138 [0] = {
139 .size = ARM_16K_TT_L0_SIZE,
140 .offmask = ARM_16K_TT_L0_OFFMASK,
141 .shift = ARM_16K_TT_L0_SHIFT,
142 .index_mask = ARM_16K_TT_L0_INDEX_MASK,
143 .valid_mask = ARM_TTE_VALID,
144 .type_mask = ARM_TTE_TYPE_MASK,
145 .type_block = ARM_TTE_TYPE_BLOCK
146 },
147 [1] = {
148 .size = ARM_16K_TT_L1_SIZE,
149 .offmask = ARM_16K_TT_L1_OFFMASK,
150 .shift = ARM_16K_TT_L1_SHIFT,
151 .index_mask = ARM_16K_TT_L1_INDEX_MASK,
152 .valid_mask = ARM_TTE_VALID,
153 .type_mask = ARM_TTE_TYPE_MASK,
154 .type_block = ARM_TTE_TYPE_BLOCK
155 },
156 [2] = {
157 .size = ARM_16K_TT_L2_SIZE,
158 .offmask = ARM_16K_TT_L2_OFFMASK,
159 .shift = ARM_16K_TT_L2_SHIFT,
160 .index_mask = ARM_16K_TT_L2_INDEX_MASK,
161 .valid_mask = ARM_TTE_VALID,
162 .type_mask = ARM_TTE_TYPE_MASK,
163 .type_block = ARM_TTE_TYPE_BLOCK
164 },
165 [3] = {
166 .size = ARM_16K_TT_L3_SIZE,
167 .offmask = ARM_16K_TT_L3_OFFMASK,
168 .shift = ARM_16K_TT_L3_SHIFT,
169 .index_mask = ARM_16K_TT_L3_INDEX_MASK,
170 .valid_mask = ARM_PTE_TYPE_VALID,
171 .type_mask = ARM_PTE_TYPE_MASK,
172 .type_block = ARM_TTE_TYPE_L3BLOCK
173 }
174 };
175
176 const struct page_table_level_info pmap_table_level_info_4k[] =
177 {
178 [0] = {
179 .size = ARM_4K_TT_L0_SIZE,
180 .offmask = ARM_4K_TT_L0_OFFMASK,
181 .shift = ARM_4K_TT_L0_SHIFT,
182 .index_mask = ARM_4K_TT_L0_INDEX_MASK,
183 .valid_mask = ARM_TTE_VALID,
184 .type_mask = ARM_TTE_TYPE_MASK,
185 .type_block = ARM_TTE_TYPE_BLOCK
186 },
187 [1] = {
188 .size = ARM_4K_TT_L1_SIZE,
189 .offmask = ARM_4K_TT_L1_OFFMASK,
190 .shift = ARM_4K_TT_L1_SHIFT,
191 .index_mask = ARM_4K_TT_L1_INDEX_MASK,
192 .valid_mask = ARM_TTE_VALID,
193 .type_mask = ARM_TTE_TYPE_MASK,
194 .type_block = ARM_TTE_TYPE_BLOCK
195 },
196 [2] = {
197 .size = ARM_4K_TT_L2_SIZE,
198 .offmask = ARM_4K_TT_L2_OFFMASK,
199 .shift = ARM_4K_TT_L2_SHIFT,
200 .index_mask = ARM_4K_TT_L2_INDEX_MASK,
201 .valid_mask = ARM_TTE_VALID,
202 .type_mask = ARM_TTE_TYPE_MASK,
203 .type_block = ARM_TTE_TYPE_BLOCK
204 },
205 [3] = {
206 .size = ARM_4K_TT_L3_SIZE,
207 .offmask = ARM_4K_TT_L3_OFFMASK,
208 .shift = ARM_4K_TT_L3_SHIFT,
209 .index_mask = ARM_4K_TT_L3_INDEX_MASK,
210 .valid_mask = ARM_PTE_TYPE_VALID,
211 .type_mask = ARM_PTE_TYPE_MASK,
212 .type_block = ARM_TTE_TYPE_L3BLOCK
213 }
214 };
215
216 struct page_table_attr {
217 const struct page_table_level_info * const pta_level_info;
218 const struct page_table_ops * const pta_ops;
219 const uintptr_t ap_ro;
220 const uintptr_t ap_rw;
221 const uintptr_t ap_rona;
222 const uintptr_t ap_rwna;
223 const uintptr_t ap_xn;
224 const uintptr_t ap_x;
225 const unsigned int pta_root_level;
226 const unsigned int pta_max_level;
227 };
228
229 const struct page_table_attr pmap_pt_attr_4k = {
230 .pta_level_info = pmap_table_level_info_4k,
231 .pta_root_level = PMAP_TT_L1_LEVEL,
232 .pta_max_level = PMAP_TT_L3_LEVEL,
233 .pta_ops = &native_pt_ops,
234 .ap_ro = ARM_PTE_AP(AP_RORO),
235 .ap_rw = ARM_PTE_AP(AP_RWRW),
236 .ap_rona = ARM_PTE_AP(AP_RONA),
237 .ap_rwna = ARM_PTE_AP(AP_RWNA),
238 .ap_xn = ARM_PTE_PNX | ARM_PTE_NX,
239 .ap_x = ARM_PTE_PNX,
240 };
241
242 const struct page_table_attr pmap_pt_attr_16k = {
243 .pta_level_info = pmap_table_level_info_16k,
244 .pta_root_level = PMAP_TT_L1_LEVEL,
245 .pta_max_level = PMAP_TT_L3_LEVEL,
246 .pta_ops = &native_pt_ops,
247 .ap_ro = ARM_PTE_AP(AP_RORO),
248 .ap_rw = ARM_PTE_AP(AP_RWRW),
249 .ap_rona = ARM_PTE_AP(AP_RONA),
250 .ap_rwna = ARM_PTE_AP(AP_RWNA),
251 .ap_xn = ARM_PTE_PNX | ARM_PTE_NX,
252 .ap_x = ARM_PTE_PNX,
253 };
254
255 #if __ARM_16K_PG__
256 const struct page_table_attr * const native_pt_attr = &pmap_pt_attr_16k;
257 #else /* !__ARM_16K_PG__ */
258 const struct page_table_attr * const native_pt_attr = &pmap_pt_attr_4k;
259 #endif /* !__ARM_16K_PG__ */
260
261
262 #else /* (__ARM_VMSA__ > 7) */
263 /*
264 * We don't support pmap parameterization for VMSA7, so use an opaque
265 * page_table_attr structure.
266 */
267 const struct page_table_attr * const native_pt_attr = NULL;
268 #endif /* (__ARM_VMSA__ > 7) */
269
270 typedef struct page_table_attr pt_attr_t;
271
272 /* Macro for getting pmap attributes; not a function for const propagation. */
273 #if ARM_PARAMETERIZED_PMAP
274 /* The page table attributes are linked to the pmap */
275 #define pmap_get_pt_attr(pmap) ((pmap)->pmap_pt_attr)
276 #define pmap_get_pt_ops(pmap) ((pmap)->pmap_pt_attr->pta_ops)
277 #else /* !ARM_PARAMETERIZED_PMAP */
278 /* The page table attributes are fixed (to allow for const propagation) */
279 #define pmap_get_pt_attr(pmap) (native_pt_attr)
280 #define pmap_get_pt_ops(pmap) (&native_pt_ops)
281 #endif /* !ARM_PARAMETERIZED_PMAP */
282
283 #if (__ARM_VMSA__ > 7)
284 static inline uint64_t
285 pt_attr_ln_size(const pt_attr_t * const pt_attr, unsigned int level)
286 {
287 return pt_attr->pta_level_info[level].size;
288 }
289
290 __unused static inline uint64_t
291 pt_attr_ln_shift(const pt_attr_t * const pt_attr, unsigned int level)
292 {
293 return pt_attr->pta_level_info[level].shift;
294 }
295
296 __unused static inline uint64_t
297 pt_attr_ln_offmask(const pt_attr_t * const pt_attr, unsigned int level)
298 {
299 return pt_attr->pta_level_info[level].offmask;
300 }
301
302 static inline unsigned int
303 pt_attr_twig_level(const pt_attr_t * const pt_attr)
304 {
305 return pt_attr->pta_max_level - 1;
306 }
307
308 static inline unsigned int
309 pt_attr_root_level(const pt_attr_t * const pt_attr)
310 {
311 return pt_attr->pta_root_level;
312 }
313
314 static __unused inline uint64_t
315 pt_attr_leaf_size(const pt_attr_t * const pt_attr)
316 {
317 return pt_attr->pta_level_info[pt_attr->pta_max_level].size;
318 }
319
320 static __unused inline uint64_t
321 pt_attr_leaf_offmask(const pt_attr_t * const pt_attr)
322 {
323 return pt_attr->pta_level_info[pt_attr->pta_max_level].offmask;
324 }
325
326 static inline uint64_t
327 pt_attr_leaf_shift(const pt_attr_t * const pt_attr)
328 {
329 return pt_attr->pta_level_info[pt_attr->pta_max_level].shift;
330 }
331
332 static __unused inline uint64_t
333 pt_attr_leaf_index_mask(const pt_attr_t * const pt_attr)
334 {
335 return pt_attr->pta_level_info[pt_attr->pta_max_level].index_mask;
336 }
337
338 static inline uint64_t
339 pt_attr_twig_size(const pt_attr_t * const pt_attr)
340 {
341 return pt_attr->pta_level_info[pt_attr->pta_max_level - 1].size;
342 }
343
344 static inline uint64_t
345 pt_attr_twig_offmask(const pt_attr_t * const pt_attr)
346 {
347 return pt_attr->pta_level_info[pt_attr->pta_max_level - 1].offmask;
348 }
349
350 static inline uint64_t
351 pt_attr_twig_shift(const pt_attr_t * const pt_attr)
352 {
353 return pt_attr->pta_level_info[pt_attr->pta_max_level - 1].shift;
354 }
355
356 static __unused inline uint64_t
357 pt_attr_twig_index_mask(const pt_attr_t * const pt_attr)
358 {
359 return pt_attr->pta_level_info[pt_attr->pta_max_level - 1].index_mask;
360 }
361
362 static inline uint64_t
363 pt_attr_leaf_table_size(const pt_attr_t * const pt_attr)
364 {
365 return pt_attr_twig_size(pt_attr);
366 }
367
368 static inline uint64_t
369 pt_attr_leaf_table_offmask(const pt_attr_t * const pt_attr)
370 {
371 return pt_attr_twig_offmask(pt_attr);
372 }
373
374 static inline uintptr_t
375 pt_attr_leaf_rw(const pt_attr_t * const pt_attr)
376 {
377 return pt_attr->ap_rw;
378 }
379
380 static inline uintptr_t
381 pt_attr_leaf_ro(const pt_attr_t * const pt_attr)
382 {
383 return pt_attr->ap_ro;
384 }
385
386 static inline uintptr_t
387 pt_attr_leaf_rona(const pt_attr_t * const pt_attr)
388 {
389 return pt_attr->ap_rona;
390 }
391
392 static inline uintptr_t
393 pt_attr_leaf_rwna(const pt_attr_t * const pt_attr)
394 {
395 return pt_attr->ap_rwna;
396 }
397
398 static inline uintptr_t
399 pt_attr_leaf_xn(const pt_attr_t * const pt_attr)
400 {
401 return pt_attr->ap_xn;
402 }
403
404 static inline uintptr_t
405 pt_attr_leaf_x(const pt_attr_t * const pt_attr)
406 {
407 return pt_attr->ap_x;
408 }
409
410 #else /* (__ARM_VMSA__ > 7) */
411
412 static inline unsigned int
413 pt_attr_twig_level(__unused const pt_attr_t * const pt_attr)
414 {
415 return PMAP_TT_L1_LEVEL;
416 }
417
418 static inline uint64_t
419 pt_attr_twig_size(__unused const pt_attr_t * const pt_attr)
420 {
421 return ARM_TT_TWIG_SIZE;
422 }
423
424 static inline uint64_t
425 pt_attr_twig_offmask(__unused const pt_attr_t * const pt_attr)
426 {
427 return ARM_TT_TWIG_OFFMASK;
428 }
429
430 static inline uint64_t
431 pt_attr_twig_shift(__unused const pt_attr_t * const pt_attr)
432 {
433 return ARM_TT_TWIG_SHIFT;
434 }
435
436 static __unused inline uint64_t
437 pt_attr_twig_index_mask(__unused const pt_attr_t * const pt_attr)
438 {
439 return ARM_TT_TWIG_INDEX_MASK;
440 }
441
442 __unused static inline uint64_t
443 pt_attr_leaf_size(__unused const pt_attr_t * const pt_attr)
444 {
445 return ARM_TT_LEAF_SIZE;
446 }
447
448 __unused static inline uint64_t
449 pt_attr_leaf_offmask(__unused const pt_attr_t * const pt_attr)
450 {
451 return ARM_TT_LEAF_OFFMASK;
452 }
453
454 static inline uint64_t
455 pt_attr_leaf_shift(__unused const pt_attr_t * const pt_attr)
456 {
457 return ARM_TT_LEAF_SHIFT;
458 }
459
460 static __unused inline uint64_t
461 pt_attr_leaf_index_mask(__unused const pt_attr_t * const pt_attr)
462 {
463 return ARM_TT_LEAF_INDEX_MASK;
464 }
465
466 static inline uint64_t
467 pt_attr_leaf_table_size(__unused const pt_attr_t * const pt_attr)
468 {
469 return ARM_TT_L1_PT_SIZE;
470 }
471
472 static inline uint64_t
473 pt_attr_leaf_table_offmask(__unused const pt_attr_t * const pt_attr)
474 {
475 return ARM_TT_L1_PT_OFFMASK;
476 }
477
478 static inline uintptr_t
479 pt_attr_leaf_rw(__unused const pt_attr_t * const pt_attr)
480 {
481 return ARM_PTE_AP(AP_RWRW);
482 }
483
484 static inline uintptr_t
485 pt_attr_leaf_ro(__unused const pt_attr_t * const pt_attr)
486 {
487 return ARM_PTE_AP(AP_RORO);
488 }
489
490 static inline uintptr_t
491 pt_attr_leaf_rona(__unused const pt_attr_t * const pt_attr)
492 {
493 return ARM_PTE_AP(AP_RONA);
494 }
495
496 static inline uintptr_t
497 pt_attr_leaf_rwna(__unused const pt_attr_t * const pt_attr)
498 {
499 return ARM_PTE_AP(AP_RWNA);
500 }
501
502 static inline uintptr_t
503 pt_attr_leaf_xn(__unused const pt_attr_t * const pt_attr)
504 {
505 return ARM_PTE_NX;
506 }
507
508 #endif /* (__ARM_VMSA__ > 7) */
509
510 static inline void
511 pmap_sync_tlb(bool strong __unused)
512 {
513 sync_tlb_flush();
514 }
515
516 #if MACH_ASSERT
517 int vm_footprint_suspend_allowed = 1;
518
519 extern int pmap_ledgers_panic;
520 extern int pmap_ledgers_panic_leeway;
521
522 int pmap_stats_assert = 1;
523 #define PMAP_STATS_ASSERTF(cond, pmap, fmt, ...) \
524 MACRO_BEGIN \
525 if (pmap_stats_assert && (pmap)->pmap_stats_assert) \
526 assertf(cond, fmt, ##__VA_ARGS__); \
527 MACRO_END
528 #else /* MACH_ASSERT */
529 #define PMAP_STATS_ASSERTF(cond, pmap, fmt, ...)
530 #endif /* MACH_ASSERT */
531
532 #if DEVELOPMENT || DEBUG
533 #define PMAP_FOOTPRINT_SUSPENDED(pmap) \
534 (current_thread()->pmap_footprint_suspended)
535 #else /* DEVELOPMENT || DEBUG */
536 #define PMAP_FOOTPRINT_SUSPENDED(pmap) (FALSE)
537 #endif /* DEVELOPMENT || DEBUG */
538
539
540
541 #define pmap_ledger_debit(p, e, a) ledger_debit((p)->ledger, e, a)
542 #define pmap_ledger_credit(p, e, a) ledger_credit((p)->ledger, e, a)
543
544
545 #if DEVELOPMENT || DEBUG
546 int panic_on_unsigned_execute = 0;
547 #endif /* DEVELOPMENT || DEBUG */
548
549
550 /* Virtual memory region for early allocation */
551 #if (__ARM_VMSA__ == 7)
552 #define VREGION1_HIGH_WINDOW (0)
553 #else
554 #define VREGION1_HIGH_WINDOW (PE_EARLY_BOOT_VA)
555 #endif
556 #define VREGION1_START ((VM_MAX_KERNEL_ADDRESS & CPUWINDOWS_BASE_MASK) - VREGION1_HIGH_WINDOW)
557 #define VREGION1_SIZE (trunc_page(VM_MAX_KERNEL_ADDRESS - (VREGION1_START)))
558
559 extern unsigned int not_in_kdp;
560
561 extern vm_offset_t first_avail;
562
563 extern pmap_paddr_t avail_start;
564 extern pmap_paddr_t avail_end;
565
566 extern vm_offset_t virtual_space_start; /* Next available kernel VA */
567 extern vm_offset_t virtual_space_end; /* End of kernel address space */
568 extern vm_offset_t static_memory_end;
569
570 extern int maxproc, hard_maxproc;
571
572 #if (__ARM_VMSA__ > 7)
573 /* The number of address bits one TTBR can cover. */
574 #define PGTABLE_ADDR_BITS (64ULL - T0SZ_BOOT)
575
576 /*
577 * The bounds on our TTBRs. These are for sanity checking that
578 * an address is accessible by a TTBR before we attempt to map it.
579 */
580 #define ARM64_TTBR0_MIN_ADDR (0ULL)
581 #define ARM64_TTBR0_MAX_ADDR (0ULL + (1ULL << PGTABLE_ADDR_BITS) - 1)
582 #define ARM64_TTBR1_MIN_ADDR (0ULL - (1ULL << PGTABLE_ADDR_BITS))
583 #define ARM64_TTBR1_MAX_ADDR (~0ULL)
584
585 /* The level of the root of a page table. */
586 const uint64_t arm64_root_pgtable_level = (3 - ((PGTABLE_ADDR_BITS - 1 - ARM_PGSHIFT) / (ARM_PGSHIFT - TTE_SHIFT)));
587
588 /* The number of entries in the root TT of a page table. */
589 const uint64_t arm64_root_pgtable_num_ttes = (2 << ((PGTABLE_ADDR_BITS - 1 - ARM_PGSHIFT) % (ARM_PGSHIFT - TTE_SHIFT)));
590 #else
591 const uint64_t arm64_root_pgtable_level = 0;
592 const uint64_t arm64_root_pgtable_num_ttes = 0;
593 #endif
594
595 struct pmap kernel_pmap_store MARK_AS_PMAP_DATA;
596 SECURITY_READ_ONLY_LATE(pmap_t) kernel_pmap = &kernel_pmap_store;
597
598 struct vm_object pmap_object_store __attribute__((aligned(VM_PACKED_POINTER_ALIGNMENT))); /* store pt pages */
599 vm_object_t pmap_object = &pmap_object_store;
600
601 static struct zone *pmap_zone; /* zone of pmap structures */
602
603 decl_simple_lock_data(, pmaps_lock MARK_AS_PMAP_DATA);
604 decl_simple_lock_data(, tt1_lock MARK_AS_PMAP_DATA);
605 unsigned int pmap_stamp MARK_AS_PMAP_DATA;
606 queue_head_t map_pmap_list MARK_AS_PMAP_DATA;
607
608 decl_simple_lock_data(, pt_pages_lock MARK_AS_PMAP_DATA);
609 queue_head_t pt_page_list MARK_AS_PMAP_DATA; /* pt page ptd entries list */
610
611 decl_simple_lock_data(, pmap_pages_lock MARK_AS_PMAP_DATA);
612
613 typedef struct page_free_entry {
614 struct page_free_entry *next;
615 } page_free_entry_t;
616
617 #define PAGE_FREE_ENTRY_NULL ((page_free_entry_t *) 0)
618
619 page_free_entry_t *pmap_pages_reclaim_list MARK_AS_PMAP_DATA; /* Reclaimed pt page list */
620 unsigned int pmap_pages_request_count MARK_AS_PMAP_DATA; /* Pending requests to reclaim pt page */
621 unsigned long long pmap_pages_request_acum MARK_AS_PMAP_DATA;
622
623
624 typedef struct tt_free_entry {
625 struct tt_free_entry *next;
626 } tt_free_entry_t;
627
628 #define TT_FREE_ENTRY_NULL ((tt_free_entry_t *) 0)
629
630 tt_free_entry_t *free_page_size_tt_list MARK_AS_PMAP_DATA;
631 unsigned int free_page_size_tt_count MARK_AS_PMAP_DATA;
632 unsigned int free_page_size_tt_max MARK_AS_PMAP_DATA;
633 #define FREE_PAGE_SIZE_TT_MAX 4
634 tt_free_entry_t *free_two_page_size_tt_list MARK_AS_PMAP_DATA;
635 unsigned int free_two_page_size_tt_count MARK_AS_PMAP_DATA;
636 unsigned int free_two_page_size_tt_max MARK_AS_PMAP_DATA;
637 #define FREE_TWO_PAGE_SIZE_TT_MAX 4
638 tt_free_entry_t *free_tt_list MARK_AS_PMAP_DATA;
639 unsigned int free_tt_count MARK_AS_PMAP_DATA;
640 unsigned int free_tt_max MARK_AS_PMAP_DATA;
641
642 #define TT_FREE_ENTRY_NULL ((tt_free_entry_t *) 0)
643
644 boolean_t pmap_gc_allowed MARK_AS_PMAP_DATA = TRUE;
645 boolean_t pmap_gc_forced MARK_AS_PMAP_DATA = FALSE;
646 boolean_t pmap_gc_allowed_by_time_throttle = TRUE;
647
648 unsigned int inuse_user_ttepages_count MARK_AS_PMAP_DATA = 0; /* non-root, non-leaf user pagetable pages, in units of PAGE_SIZE */
649 unsigned int inuse_user_ptepages_count MARK_AS_PMAP_DATA = 0; /* leaf user pagetable pages, in units of PAGE_SIZE */
650 unsigned int inuse_user_tteroot_count MARK_AS_PMAP_DATA = 0; /* root user pagetables, in units of PMAP_ROOT_ALLOC_SIZE */
651 unsigned int inuse_kernel_ttepages_count MARK_AS_PMAP_DATA = 0; /* non-root, non-leaf kernel pagetable pages, in units of PAGE_SIZE */
652 unsigned int inuse_kernel_ptepages_count MARK_AS_PMAP_DATA = 0; /* leaf kernel pagetable pages, in units of PAGE_SIZE */
653 unsigned int inuse_kernel_tteroot_count MARK_AS_PMAP_DATA = 0; /* root kernel pagetables, in units of PMAP_ROOT_ALLOC_SIZE */
654 unsigned int inuse_pmap_pages_count = 0; /* debugging */
655
656 SECURITY_READ_ONLY_LATE(tt_entry_t *) invalid_tte = 0;
657 SECURITY_READ_ONLY_LATE(pmap_paddr_t) invalid_ttep = 0;
658
659 SECURITY_READ_ONLY_LATE(tt_entry_t *) cpu_tte = 0; /* set by arm_vm_init() - keep out of bss */
660 SECURITY_READ_ONLY_LATE(pmap_paddr_t) cpu_ttep = 0; /* set by arm_vm_init() - phys tte addr */
661
662 #if DEVELOPMENT || DEBUG
663 int nx_enabled = 1; /* enable no-execute protection */
664 int allow_data_exec = 0; /* No apps may execute data */
665 int allow_stack_exec = 0; /* No apps may execute from the stack */
666 unsigned long pmap_asid_flushes MARK_AS_PMAP_DATA = 0;
667 #else /* DEVELOPMENT || DEBUG */
668 const int nx_enabled = 1; /* enable no-execute protection */
669 const int allow_data_exec = 0; /* No apps may execute data */
670 const int allow_stack_exec = 0; /* No apps may execute from the stack */
671 #endif /* DEVELOPMENT || DEBUG */
672
673 /*
674 * pv_entry_t - structure to track the active mappings for a given page
675 */
676 typedef struct pv_entry {
677 struct pv_entry *pve_next; /* next alias */
678 pt_entry_t *pve_ptep; /* page table entry */
679 }
680 #if __arm__ && (__BIGGEST_ALIGNMENT__ > 4)
681 /* For the newer ARMv7k ABI where 64-bit types are 64-bit aligned, but pointers
682 * are 32-bit:
683 * Since pt_desc is 64-bit aligned and we cast often from pv_entry to
684 * pt_desc.
685 */
686 __attribute__ ((aligned(8))) pv_entry_t;
687 #else
688 pv_entry_t;
689 #endif
690
691 #define PV_ENTRY_NULL ((pv_entry_t *) 0)
692
693 /*
694 * PMAP LEDGERS:
695 * We use the least significant bit of the "pve_next" pointer in a "pv_entry"
696 * as a marker for pages mapped through an "alternate accounting" mapping.
697 * These macros set, clear and test for this marker and extract the actual
698 * value of the "pve_next" pointer.
699 */
700 #define PVE_NEXT_ALTACCT ((uintptr_t) 0x1)
701 #define PVE_NEXT_SET_ALTACCT(pve_next_p) \
702 *(pve_next_p) = (struct pv_entry *) (((uintptr_t) *(pve_next_p)) | \
703 PVE_NEXT_ALTACCT)
704 #define PVE_NEXT_CLR_ALTACCT(pve_next_p) \
705 *(pve_next_p) = (struct pv_entry *) (((uintptr_t) *(pve_next_p)) & \
706 ~PVE_NEXT_ALTACCT)
707 #define PVE_NEXT_IS_ALTACCT(pve_next) \
708 ((((uintptr_t) (pve_next)) & PVE_NEXT_ALTACCT) ? TRUE : FALSE)
709 #define PVE_NEXT_PTR(pve_next) \
710 ((struct pv_entry *)(((uintptr_t) (pve_next)) & \
711 ~PVE_NEXT_ALTACCT))
712 #if MACH_ASSERT
713 static void pmap_check_ledgers(pmap_t pmap);
714 #else
715 static inline void
716 pmap_check_ledgers(__unused pmap_t pmap)
717 {
718 }
719 #endif /* MACH_ASSERT */
720
721 SECURITY_READ_ONLY_LATE(pv_entry_t * *) pv_head_table; /* array of pv entry pointers */
722
723 pv_entry_t *pv_free_list MARK_AS_PMAP_DATA;
724 pv_entry_t *pv_kern_free_list MARK_AS_PMAP_DATA;
725 decl_simple_lock_data(, pv_free_list_lock MARK_AS_PMAP_DATA);
726 decl_simple_lock_data(, pv_kern_free_list_lock MARK_AS_PMAP_DATA);
727
728 decl_simple_lock_data(, phys_backup_lock);
729
730 /*
731 * pt_desc - structure to keep info on page assigned to page tables
732 */
733 #if (__ARM_VMSA__ == 7)
734 #define PT_INDEX_MAX 1
735 #else
736 #if (ARM_PGSHIFT == 14)
737 #define PT_INDEX_MAX 1
738 #else
739 #define PT_INDEX_MAX 4
740 #endif
741 #endif
742
743 #define PT_DESC_REFCOUNT 0x4000U
744 #define PT_DESC_IOMMU_REFCOUNT 0x8000U
745
746 typedef struct pt_desc {
747 queue_chain_t pt_page;
748 union {
749 struct pmap *pmap;
750 };
751 /*
752 * Locate this struct towards the end of the pt_desc; our long term
753 * goal is to make this a VLA to avoid wasting memory if we don't need
754 * multiple entries.
755 */
756 struct {
757 /*
758 * For non-leaf pagetables, should always be PT_DESC_REFCOUNT
759 * For leaf pagetables, should reflect the number of non-empty PTEs
760 * For IOMMU pages, should always be PT_DESC_IOMMU_REFCOUNT
761 */
762 unsigned short refcnt;
763 /*
764 * For non-leaf pagetables, should be 0
765 * For leaf pagetables, should reflect the number of wired entries
766 * For IOMMU pages, may optionally reflect a driver-defined refcount (IOMMU operations are implicitly wired)
767 */
768 unsigned short wiredcnt;
769 vm_offset_t va;
770 } ptd_info[PT_INDEX_MAX];
771 } pt_desc_t;
772
773
774 #define PTD_ENTRY_NULL ((pt_desc_t *) 0)
775
776 SECURITY_READ_ONLY_LATE(pt_desc_t *) ptd_root_table;
777
778 pt_desc_t *ptd_free_list MARK_AS_PMAP_DATA = PTD_ENTRY_NULL;
779 SECURITY_READ_ONLY_LATE(boolean_t) ptd_preboot = TRUE;
780 unsigned int ptd_free_count MARK_AS_PMAP_DATA = 0;
781 decl_simple_lock_data(, ptd_free_list_lock MARK_AS_PMAP_DATA);
782
783 /*
784 * physical page attribute
785 */
786 typedef u_int16_t pp_attr_t;
787
788 #define PP_ATTR_WIMG_MASK 0x003F
789 #define PP_ATTR_WIMG(x) ((x) & PP_ATTR_WIMG_MASK)
790
791 #define PP_ATTR_REFERENCED 0x0040
792 #define PP_ATTR_MODIFIED 0x0080
793
794 #define PP_ATTR_INTERNAL 0x0100
795 #define PP_ATTR_REUSABLE 0x0200
796 #define PP_ATTR_ALTACCT 0x0400
797 #define PP_ATTR_NOENCRYPT 0x0800
798
799 #define PP_ATTR_REFFAULT 0x1000
800 #define PP_ATTR_MODFAULT 0x2000
801
802
803 SECURITY_READ_ONLY_LATE(pp_attr_t*) pp_attr_table;
804
805 typedef struct pmap_io_range {
806 uint64_t addr;
807 uint64_t len;
808 #define PMAP_IO_RANGE_STRONG_SYNC (1UL << 31) // Strong DSB required for pages in this range
809 uint32_t wimg; // lower 16 bits treated as pp_attr_t, upper 16 bits contain additional mapping flags
810 uint32_t signature; // 4CC
811 } __attribute__((packed)) pmap_io_range_t;
812
813 SECURITY_READ_ONLY_LATE(pmap_io_range_t*) io_attr_table;
814
815 SECURITY_READ_ONLY_LATE(pmap_paddr_t) vm_first_phys = (pmap_paddr_t) 0;
816 SECURITY_READ_ONLY_LATE(pmap_paddr_t) vm_last_phys = (pmap_paddr_t) 0;
817
818 SECURITY_READ_ONLY_LATE(unsigned int) num_io_rgns = 0;
819
820 SECURITY_READ_ONLY_LATE(boolean_t) pmap_initialized = FALSE; /* Has pmap_init completed? */
821
822 SECURITY_READ_ONLY_LATE(uint64_t) pmap_nesting_size_min;
823 SECURITY_READ_ONLY_LATE(uint64_t) pmap_nesting_size_max;
824
825 SECURITY_READ_ONLY_LATE(vm_map_offset_t) arm_pmap_max_offset_default = 0x0;
826 #if defined(__arm64__)
827 SECURITY_READ_ONLY_LATE(vm_map_offset_t) arm64_pmap_max_offset_default = 0x0;
828 #endif
829
830 #define PMAP_MAX_SW_ASID ((MAX_ASID + MAX_HW_ASID - 1) / MAX_HW_ASID)
831 _Static_assert(PMAP_MAX_SW_ASID <= (UINT8_MAX + 1),
832 "VASID bits can't be represented by an 8-bit integer");
833
834 decl_simple_lock_data(, asid_lock MARK_AS_PMAP_DATA);
835 static bitmap_t asid_bitmap[BITMAP_LEN(MAX_ASID)] MARK_AS_PMAP_DATA;
836
837
838 #if (__ARM_VMSA__ > 7)
839 SECURITY_READ_ONLY_LATE(pmap_t) sharedpage_pmap;
840 #endif
841
842
843 #define pa_index(pa) \
844 (atop((pa) - vm_first_phys))
845
846 #define pai_to_pvh(pai) \
847 (&pv_head_table[pai])
848
849 #define pa_valid(x) \
850 ((x) >= vm_first_phys && (x) < vm_last_phys)
851
852 /* PTE Define Macros */
853
854 #define pte_is_wired(pte) \
855 (((pte) & ARM_PTE_WIRED_MASK) == ARM_PTE_WIRED)
856
857 #define pte_set_wired(ptep, wired) \
858 do { \
859 SInt16 *ptd_wiredcnt_ptr; \
860 ptd_wiredcnt_ptr = (SInt16 *)&(ptep_get_ptd(ptep)->ptd_info[ARM_PT_DESC_INDEX(ptep)].wiredcnt); \
861 if (wired) { \
862 *ptep |= ARM_PTE_WIRED; \
863 OSAddAtomic16(1, ptd_wiredcnt_ptr); \
864 } else { \
865 *ptep &= ~ARM_PTE_WIRED; \
866 OSAddAtomic16(-1, ptd_wiredcnt_ptr); \
867 } \
868 } while(0)
869
870 #define pte_was_writeable(pte) \
871 (((pte) & ARM_PTE_WRITEABLE) == ARM_PTE_WRITEABLE)
872
873 #define pte_set_was_writeable(pte, was_writeable) \
874 do { \
875 if ((was_writeable)) { \
876 (pte) |= ARM_PTE_WRITEABLE; \
877 } else { \
878 (pte) &= ~ARM_PTE_WRITEABLE; \
879 } \
880 } while(0)
881
882 /* PVE Define Macros */
883
884 #define pve_next(pve) \
885 ((pve)->pve_next)
886
887 #define pve_link_field(pve) \
888 (&pve_next(pve))
889
890 #define pve_link(pp, e) \
891 ((pve_next(e) = pve_next(pp)), (pve_next(pp) = (e)))
892
893 #define pve_unlink(pp, e) \
894 (pve_next(pp) = pve_next(e))
895
896 /* bits held in the ptep pointer field */
897
898 #define pve_get_ptep(pve) \
899 ((pve)->pve_ptep)
900
901 #define pve_set_ptep(pve, ptep_new) \
902 do { \
903 (pve)->pve_ptep = (ptep_new); \
904 } while (0)
905
906 /* PTEP Define Macros */
907
908 /* mask for page descriptor index */
909 #define ARM_TT_PT_INDEX_MASK ARM_PGMASK
910
911 #if (__ARM_VMSA__ == 7)
912 #define ARM_PT_DESC_INDEX_MASK 0x00000
913 #define ARM_PT_DESC_INDEX_SHIFT 0
914
915 /*
916 * Shift value used for reconstructing the virtual address for a PTE.
917 */
918 #define ARM_TT_PT_ADDR_SHIFT (10U)
919
920 #define ptep_get_va(ptep) \
921 ((((pt_desc_t *) (pvh_list(pai_to_pvh(pa_index(ml_static_vtop((((vm_offset_t)(ptep) & ~ARM_PGMASK))))))))->ptd_info[ARM_PT_DESC_INDEX(ptep)].va)+ ((((unsigned)(ptep)) & ARM_TT_PT_INDEX_MASK)<<ARM_TT_PT_ADDR_SHIFT))
922
923 #define ptep_get_pmap(ptep) \
924 ((((pt_desc_t *) (pvh_list(pai_to_pvh(pa_index(ml_static_vtop((((vm_offset_t)(ptep) & ~ARM_PGMASK))))))))->pmap))
925
926 #else
927
928 #if (ARM_PGSHIFT == 12)
929 #define ARM_PT_DESC_INDEX_MASK ((PAGE_SHIFT_CONST == ARM_PGSHIFT )? 0x00000ULL : 0x03000ULL)
930 #define ARM_PT_DESC_INDEX_SHIFT ((PAGE_SHIFT_CONST == ARM_PGSHIFT )? 0 : 12)
931 /*
932 * Shift value used for reconstructing the virtual address for a PTE.
933 */
934 #define ARM_TT_PT_ADDR_SHIFT (9ULL)
935 #else
936
937 #define ARM_PT_DESC_INDEX_MASK (0x00000)
938 #define ARM_PT_DESC_INDEX_SHIFT (0)
939 /*
940 * Shift value used for reconstructing the virtual address for a PTE.
941 */
942 #define ARM_TT_PT_ADDR_SHIFT (11ULL)
943 #endif
944
945
946 #define ARM_PT_DESC_INDEX(ptep) \
947 (((unsigned)(ptep) & ARM_PT_DESC_INDEX_MASK) >> ARM_PT_DESC_INDEX_SHIFT)
948
949 #define ptep_get_va(ptep) \
950 ((((pt_desc_t *) (pvh_list(pai_to_pvh(pa_index(ml_static_vtop((((vm_offset_t)(ptep) & ~ARM_PGMASK))))))))->ptd_info[ARM_PT_DESC_INDEX(ptep)].va)+ ((((unsigned)(ptep)) & ARM_TT_PT_INDEX_MASK)<<ARM_TT_PT_ADDR_SHIFT))
951
952 #define ptep_get_pmap(ptep) \
953 ((((pt_desc_t *) (pvh_list(pai_to_pvh(pa_index(ml_static_vtop((((vm_offset_t)(ptep) & ~ARM_PGMASK))))))))->pmap))
954
955 #endif
956
957 #define ARM_PT_DESC_INDEX(ptep) \
958 (((unsigned)(ptep) & ARM_PT_DESC_INDEX_MASK) >> ARM_PT_DESC_INDEX_SHIFT)
959
960 #define ptep_get_ptd(ptep) \
961 ((struct pt_desc *)(pvh_list(pai_to_pvh(pa_index(ml_static_vtop((vm_offset_t)(ptep)))))))
962
963
964 /* PVH Define Macros */
965
966 /* pvhead type */
967 #define PVH_TYPE_NULL 0x0UL
968 #define PVH_TYPE_PVEP 0x1UL
969 #define PVH_TYPE_PTEP 0x2UL
970 #define PVH_TYPE_PTDP 0x3UL
971
972 #define PVH_TYPE_MASK (0x3UL)
973
974 #ifdef __arm64__
975
976 /* All flags listed below are stored in the PV head pointer unless otherwise noted */
977 #define PVH_FLAG_IOMMU 0x4UL /* Stored in each PTE, or in PV head for single-PTE PV heads */
978 #define PVH_FLAG_IOMMU_TABLE (1ULL << 63) /* Stored in each PTE, or in PV head for single-PTE PV heads */
979 #define PVH_FLAG_CPU (1ULL << 62)
980 #define PVH_LOCK_BIT 61
981 #define PVH_FLAG_LOCK (1ULL << PVH_LOCK_BIT)
982 #define PVH_FLAG_EXEC (1ULL << 60)
983 #define PVH_FLAG_LOCKDOWN (1ULL << 59)
984 #define PVH_HIGH_FLAGS (PVH_FLAG_CPU | PVH_FLAG_LOCK | PVH_FLAG_EXEC | PVH_FLAG_LOCKDOWN)
985
986 #else /* !__arm64__ */
987
988 #define PVH_LOCK_BIT 31
989 #define PVH_FLAG_LOCK (1UL << PVH_LOCK_BIT)
990 #define PVH_HIGH_FLAGS PVH_FLAG_LOCK
991
992 #endif
993
994 #define PVH_LIST_MASK (~PVH_TYPE_MASK)
995
996 #define pvh_test_type(h, b) \
997 ((*(vm_offset_t *)(h) & (PVH_TYPE_MASK)) == (b))
998
999 #define pvh_ptep(h) \
1000 ((pt_entry_t *)((*(vm_offset_t *)(h) & PVH_LIST_MASK) | PVH_HIGH_FLAGS))
1001
1002 #define pvh_list(h) \
1003 ((pv_entry_t *)((*(vm_offset_t *)(h) & PVH_LIST_MASK) | PVH_HIGH_FLAGS))
1004
1005 #define pvh_get_flags(h) \
1006 (*(vm_offset_t *)(h) & PVH_HIGH_FLAGS)
1007
1008 #define pvh_set_flags(h, f) \
1009 do { \
1010 os_atomic_store((vm_offset_t *)(h), (*(vm_offset_t *)(h) & ~PVH_HIGH_FLAGS) | (f), \
1011 relaxed); \
1012 } while (0)
1013
1014 #define pvh_update_head(h, e, t) \
1015 do { \
1016 assert(*(vm_offset_t *)(h) & PVH_FLAG_LOCK); \
1017 os_atomic_store((vm_offset_t *)(h), (vm_offset_t)(e) | (t) | PVH_FLAG_LOCK, \
1018 relaxed); \
1019 } while (0)
1020
1021 #define pvh_update_head_unlocked(h, e, t) \
1022 do { \
1023 assert(!(*(vm_offset_t *)(h) & PVH_FLAG_LOCK)); \
1024 *(vm_offset_t *)(h) = ((vm_offset_t)(e) | (t)) & ~PVH_FLAG_LOCK; \
1025 } while (0)
1026
1027 #define pvh_add(h, e) \
1028 do { \
1029 assert(!pvh_test_type((h), PVH_TYPE_PTEP)); \
1030 pve_next(e) = pvh_list(h); \
1031 pvh_update_head((h), (e), PVH_TYPE_PVEP); \
1032 } while (0)
1033
1034 #define pvh_remove(h, p, e) \
1035 do { \
1036 assert(!PVE_NEXT_IS_ALTACCT(pve_next((e)))); \
1037 if ((p) == (h)) { \
1038 if (PVE_NEXT_PTR(pve_next((e))) == PV_ENTRY_NULL) { \
1039 pvh_update_head((h), PV_ENTRY_NULL, PVH_TYPE_NULL); \
1040 } else { \
1041 pvh_update_head((h), PVE_NEXT_PTR(pve_next((e))), PVH_TYPE_PVEP); \
1042 } \
1043 } else { \
1044 /* \
1045 * PMAP LEDGERS: \
1046 * preserve the "alternate accounting" bit \
1047 * when updating "p" (the previous entry's \
1048 * "pve_next"). \
1049 */ \
1050 boolean_t __is_altacct; \
1051 __is_altacct = PVE_NEXT_IS_ALTACCT(*(p)); \
1052 *(p) = PVE_NEXT_PTR(pve_next((e))); \
1053 if (__is_altacct) { \
1054 PVE_NEXT_SET_ALTACCT((p)); \
1055 } else { \
1056 PVE_NEXT_CLR_ALTACCT((p)); \
1057 } \
1058 } \
1059 } while (0)
1060
1061
1062 /* PPATTR Define Macros */
1063
1064 #define ppattr_set_bits(h, b) \
1065 do { \
1066 while (!OSCompareAndSwap16(*(pp_attr_t *)(h), *(pp_attr_t *)(h) | (b), (pp_attr_t *)(h))); \
1067 } while (0)
1068
1069 #define ppattr_clear_bits(h, b) \
1070 do { \
1071 while (!OSCompareAndSwap16(*(pp_attr_t *)(h), *(pp_attr_t *)(h) & ~(b), (pp_attr_t *)(h))); \
1072 } while (0)
1073
1074 #define ppattr_test_bits(h, b) \
1075 ((*(pp_attr_t *)(h) & (b)) == (b))
1076
1077 #define pa_set_bits(x, b) \
1078 do { \
1079 if (pa_valid(x)) \
1080 ppattr_set_bits(&pp_attr_table[pa_index(x)], \
1081 (b)); \
1082 } while (0)
1083
1084 #define pa_test_bits(x, b) \
1085 (pa_valid(x) ? ppattr_test_bits(&pp_attr_table[pa_index(x)],\
1086 (b)) : FALSE)
1087
1088 #define pa_clear_bits(x, b) \
1089 do { \
1090 if (pa_valid(x)) \
1091 ppattr_clear_bits(&pp_attr_table[pa_index(x)], \
1092 (b)); \
1093 } while (0)
1094
1095 #define pa_set_modify(x) \
1096 pa_set_bits(x, PP_ATTR_MODIFIED)
1097
1098 #define pa_clear_modify(x) \
1099 pa_clear_bits(x, PP_ATTR_MODIFIED)
1100
1101 #define pa_set_reference(x) \
1102 pa_set_bits(x, PP_ATTR_REFERENCED)
1103
1104 #define pa_clear_reference(x) \
1105 pa_clear_bits(x, PP_ATTR_REFERENCED)
1106
1107
1108 #define IS_INTERNAL_PAGE(pai) \
1109 ppattr_test_bits(&pp_attr_table[pai], PP_ATTR_INTERNAL)
1110 #define SET_INTERNAL_PAGE(pai) \
1111 ppattr_set_bits(&pp_attr_table[pai], PP_ATTR_INTERNAL)
1112 #define CLR_INTERNAL_PAGE(pai) \
1113 ppattr_clear_bits(&pp_attr_table[pai], PP_ATTR_INTERNAL)
1114
1115 #define IS_REUSABLE_PAGE(pai) \
1116 ppattr_test_bits(&pp_attr_table[pai], PP_ATTR_REUSABLE)
1117 #define SET_REUSABLE_PAGE(pai) \
1118 ppattr_set_bits(&pp_attr_table[pai], PP_ATTR_REUSABLE)
1119 #define CLR_REUSABLE_PAGE(pai) \
1120 ppattr_clear_bits(&pp_attr_table[pai], PP_ATTR_REUSABLE)
1121
1122 #define IS_ALTACCT_PAGE(pai, pve_p) \
1123 (((pve_p) == NULL) \
1124 ? ppattr_test_bits(&pp_attr_table[pai], PP_ATTR_ALTACCT) \
1125 : PVE_NEXT_IS_ALTACCT(pve_next((pve_p))))
1126 #define SET_ALTACCT_PAGE(pai, pve_p) \
1127 if ((pve_p) == NULL) { \
1128 ppattr_set_bits(&pp_attr_table[pai], PP_ATTR_ALTACCT); \
1129 } else { \
1130 PVE_NEXT_SET_ALTACCT(&pve_next((pve_p))); \
1131 }
1132 #define CLR_ALTACCT_PAGE(pai, pve_p) \
1133 if ((pve_p) == NULL) { \
1134 ppattr_clear_bits(&pp_attr_table[pai], PP_ATTR_ALTACCT); \
1135 } else { \
1136 PVE_NEXT_CLR_ALTACCT(&pve_next((pve_p))); \
1137 }
1138
1139 #define IS_REFFAULT_PAGE(pai) \
1140 ppattr_test_bits(&pp_attr_table[pai], PP_ATTR_REFFAULT)
1141 #define SET_REFFAULT_PAGE(pai) \
1142 ppattr_set_bits(&pp_attr_table[pai], PP_ATTR_REFFAULT)
1143 #define CLR_REFFAULT_PAGE(pai) \
1144 ppattr_clear_bits(&pp_attr_table[pai], PP_ATTR_REFFAULT)
1145
1146 #define IS_MODFAULT_PAGE(pai) \
1147 ppattr_test_bits(&pp_attr_table[pai], PP_ATTR_MODFAULT)
1148 #define SET_MODFAULT_PAGE(pai) \
1149 ppattr_set_bits(&pp_attr_table[pai], PP_ATTR_MODFAULT)
1150 #define CLR_MODFAULT_PAGE(pai) \
1151 ppattr_clear_bits(&pp_attr_table[pai], PP_ATTR_MODFAULT)
1152
1153 #define tte_get_ptd(tte) \
1154 ((struct pt_desc *)(pvh_list(pai_to_pvh(pa_index((vm_offset_t)((tte) & ~PAGE_MASK))))))
1155
1156
1157 #if (__ARM_VMSA__ == 7)
1158
1159 #define tte_index(pmap, pt_attr, addr) \
1160 ttenum((addr))
1161
1162 #define pte_index(pmap, pt_attr, addr) \
1163 ptenum((addr))
1164
1165 #else
1166
1167 #define ttn_index(pmap, pt_attr, addr, pt_level) \
1168 (((addr) & (pt_attr)->pta_level_info[(pt_level)].index_mask) >> (pt_attr)->pta_level_info[(pt_level)].shift)
1169
1170 #define tt0_index(pmap, pt_attr, addr) \
1171 ttn_index((pmap), (pt_attr), (addr), PMAP_TT_L0_LEVEL)
1172
1173 #define tt1_index(pmap, pt_attr, addr) \
1174 ttn_index((pmap), (pt_attr), (addr), PMAP_TT_L1_LEVEL)
1175
1176 #define tt2_index(pmap, pt_attr, addr) \
1177 ttn_index((pmap), (pt_attr), (addr), PMAP_TT_L2_LEVEL)
1178
1179 #define tt3_index(pmap, pt_attr, addr) \
1180 ttn_index((pmap), (pt_attr), (addr), PMAP_TT_L3_LEVEL)
1181
1182 #define tte_index(pmap, pt_attr, addr) \
1183 tt2_index((pmap), (pt_attr), (addr))
1184
1185 #define pte_index(pmap, pt_attr, addr) \
1186 tt3_index((pmap), (pt_attr), (addr))
1187
1188 #endif
1189
1190 /*
1191 * Lock on pmap system
1192 */
1193
1194 lck_grp_t pmap_lck_grp;
1195
1196 #define PMAP_LOCK_INIT(pmap) { \
1197 simple_lock_init(&(pmap)->lock, 0); \
1198 }
1199
1200 #define PMAP_LOCK(pmap) { \
1201 pmap_simple_lock(&(pmap)->lock); \
1202 }
1203
1204 #define PMAP_UNLOCK(pmap) { \
1205 pmap_simple_unlock(&(pmap)->lock); \
1206 }
1207
1208 #if MACH_ASSERT
1209 #define PMAP_ASSERT_LOCKED(pmap) { \
1210 simple_lock_assert(&(pmap)->lock, LCK_ASSERT_OWNED); \
1211 }
1212 #else
1213 #define PMAP_ASSERT_LOCKED(pmap)
1214 #endif
1215
1216 #if defined(__arm64__)
1217 #define PVH_LOCK_WORD 1 /* Assumes little-endian */
1218 #else
1219 #define PVH_LOCK_WORD 0
1220 #endif
1221
1222 #define ASSERT_PVH_LOCKED(index) \
1223 do { \
1224 assert((vm_offset_t)(pv_head_table[index]) & PVH_FLAG_LOCK); \
1225 } while (0)
1226
1227 #define LOCK_PVH(index) \
1228 do { \
1229 pmap_lock_bit((uint32_t*)(&pv_head_table[index]) + PVH_LOCK_WORD, PVH_LOCK_BIT - (PVH_LOCK_WORD * 32)); \
1230 } while (0)
1231
1232 #define UNLOCK_PVH(index) \
1233 do { \
1234 ASSERT_PVH_LOCKED(index); \
1235 pmap_unlock_bit((uint32_t*)(&pv_head_table[index]) + PVH_LOCK_WORD, PVH_LOCK_BIT - (PVH_LOCK_WORD * 32)); \
1236 } while (0)
1237
1238 #define PMAP_UPDATE_TLBS(pmap, s, e, strong) { \
1239 pmap_get_pt_ops(pmap)->flush_tlb_region_async(s, (unsigned)(e - s), pmap); \
1240 pmap_sync_tlb(strong); \
1241 }
1242
1243 #define FLUSH_PTE_RANGE(spte, epte) \
1244 __builtin_arm_dmb(DMB_ISH);
1245
1246 #define FLUSH_PTE(pte_p) \
1247 __builtin_arm_dmb(DMB_ISH);
1248
1249 #define FLUSH_PTE_STRONG(pte_p) \
1250 __builtin_arm_dsb(DSB_ISH);
1251
1252 #define FLUSH_PTE_RANGE_STRONG(spte, epte) \
1253 __builtin_arm_dsb(DSB_ISH);
1254
1255 #define WRITE_PTE_FAST(pte_p, pte_entry) \
1256 __unreachable_ok_push \
1257 if (TEST_PAGE_RATIO_4) { \
1258 if (((unsigned)(pte_p)) & 0x1f) { \
1259 panic("%s: WRITE_PTE_FAST is unaligned, " \
1260 "pte_p=%p, pte_entry=%p", \
1261 __FUNCTION__, \
1262 pte_p, (void*)pte_entry); \
1263 } \
1264 if (((pte_entry) & ~ARM_PTE_COMPRESSED_MASK) == ARM_PTE_EMPTY) { \
1265 *(pte_p) = (pte_entry); \
1266 *((pte_p)+1) = (pte_entry); \
1267 *((pte_p)+2) = (pte_entry); \
1268 *((pte_p)+3) = (pte_entry); \
1269 } else { \
1270 *(pte_p) = (pte_entry); \
1271 *((pte_p)+1) = (pte_entry) | 0x1000; \
1272 *((pte_p)+2) = (pte_entry) | 0x2000; \
1273 *((pte_p)+3) = (pte_entry) | 0x3000; \
1274 } \
1275 } else { \
1276 *(pte_p) = (pte_entry); \
1277 } \
1278 __unreachable_ok_pop
1279
1280 #define WRITE_PTE(pte_p, pte_entry) \
1281 WRITE_PTE_FAST(pte_p, pte_entry); \
1282 FLUSH_PTE(pte_p);
1283
1284 #define WRITE_PTE_STRONG(pte_p, pte_entry) \
1285 WRITE_PTE_FAST(pte_p, pte_entry); \
1286 FLUSH_PTE_STRONG(pte_p);
1287
1288 /*
1289 * Other useful macros.
1290 */
1291 #define current_pmap() \
1292 (vm_map_pmap(current_thread()->map))
1293
1294
1295 #define VALIDATE_USER_PMAP(x)
1296 #define VALIDATE_PMAP(x)
1297 #define VALIDATE_LEDGER(x)
1298
1299
1300 #if DEVELOPMENT || DEBUG
1301
1302 /*
1303 * Trace levels are controlled by a bitmask in which each
1304 * level can be enabled/disabled by the (1<<level) position
1305 * in the boot arg
1306 * Level 1: pmap lifecycle (create/destroy/switch)
1307 * Level 2: mapping lifecycle (enter/remove/protect/nest/unnest)
1308 * Level 3: internal state management (tte/attributes/fast-fault)
1309 */
1310
1311 SECURITY_READ_ONLY_LATE(unsigned int) pmap_trace_mask = 0;
1312
1313 #define PMAP_TRACE(level, ...) \
1314 if (__improbable((1 << (level)) & pmap_trace_mask)) { \
1315 KDBG_RELEASE(__VA_ARGS__); \
1316 }
1317 #else
1318
1319 #define PMAP_TRACE(level, ...)
1320
1321 #endif
1322
1323
1324 /*
1325 * Internal function prototypes (forward declarations).
1326 */
1327
1328 static void pv_init(
1329 void);
1330
1331 static boolean_t pv_alloc(
1332 pmap_t pmap,
1333 unsigned int pai,
1334 pv_entry_t **pvepp);
1335
1336 static void pv_free(
1337 pv_entry_t *pvep);
1338
1339 static void pv_list_free(
1340 pv_entry_t *pvehp,
1341 pv_entry_t *pvetp,
1342 unsigned int cnt);
1343
1344 static void ptd_bootstrap(
1345 pt_desc_t *ptdp, unsigned int ptd_cnt);
1346
1347 static inline pt_desc_t *ptd_alloc_unlinked(bool reclaim);
1348
1349 static pt_desc_t *ptd_alloc(pmap_t pmap, bool reclaim);
1350
1351 static void ptd_deallocate(pt_desc_t *ptdp);
1352
1353 static void ptd_init(
1354 pt_desc_t *ptdp, pmap_t pmap, vm_map_address_t va, unsigned int ttlevel, pt_entry_t * pte_p);
1355
1356 static void pmap_zone_init(
1357 void);
1358
1359 static void pmap_set_reference(
1360 ppnum_t pn);
1361
1362 ppnum_t pmap_vtophys(
1363 pmap_t pmap, addr64_t va);
1364
1365 void pmap_switch_user_ttb(
1366 pmap_t pmap);
1367
1368 static kern_return_t pmap_expand(
1369 pmap_t, vm_map_address_t, unsigned int options, unsigned int level);
1370
1371 static int pmap_remove_range(
1372 pmap_t, vm_map_address_t, pt_entry_t *, pt_entry_t *, uint32_t *);
1373
1374 static int pmap_remove_range_options(
1375 pmap_t, vm_map_address_t, pt_entry_t *, pt_entry_t *, uint32_t *, bool *, int);
1376
1377 static tt_entry_t *pmap_tt1_allocate(
1378 pmap_t, vm_size_t, unsigned int);
1379
1380 #define PMAP_TT_ALLOCATE_NOWAIT 0x1
1381
1382 static void pmap_tt1_deallocate(
1383 pmap_t, tt_entry_t *, vm_size_t, unsigned int);
1384
1385 #define PMAP_TT_DEALLOCATE_NOBLOCK 0x1
1386
1387 static kern_return_t pmap_tt_allocate(
1388 pmap_t, tt_entry_t **, unsigned int, unsigned int);
1389
1390 #define PMAP_TT_ALLOCATE_NOWAIT 0x1
1391
1392 static void pmap_tte_deallocate(
1393 pmap_t, tt_entry_t *, unsigned int);
1394
1395 #ifdef __ARM64_PMAP_SUBPAGE_L1__
1396 #if (__ARM_VMSA__ <= 7)
1397 #error This is not supported for old-style page tables
1398 #endif
1399 #define PMAP_ROOT_ALLOC_SIZE (((ARM_TT_L1_INDEX_MASK >> ARM_TT_L1_SHIFT) + 1) * sizeof(tt_entry_t))
1400 #else
1401 #define PMAP_ROOT_ALLOC_SIZE (ARM_PGBYTES)
1402 #endif
1403
1404 const unsigned int arm_hardware_page_size = ARM_PGBYTES;
1405 const unsigned int arm_pt_desc_size = sizeof(pt_desc_t);
1406 const unsigned int arm_pt_root_size = PMAP_ROOT_ALLOC_SIZE;
1407
1408 #define PMAP_TT_DEALLOCATE_NOBLOCK 0x1
1409
1410 #if (__ARM_VMSA__ > 7)
1411
1412 static inline tt_entry_t *pmap_tt1e(
1413 pmap_t, vm_map_address_t);
1414
1415 static inline tt_entry_t *pmap_tt2e(
1416 pmap_t, vm_map_address_t);
1417
1418 static inline pt_entry_t *pmap_tt3e(
1419 pmap_t, vm_map_address_t);
1420
1421 static inline pt_entry_t *pmap_ttne(
1422 pmap_t, unsigned int, vm_map_address_t);
1423
1424 static void pmap_unmap_sharedpage(
1425 pmap_t pmap);
1426
1427 static boolean_t
1428 pmap_is_64bit(pmap_t);
1429
1430
1431 #endif
1432 static inline tt_entry_t *pmap_tte(
1433 pmap_t, vm_map_address_t);
1434
1435 static inline pt_entry_t *pmap_pte(
1436 pmap_t, vm_map_address_t);
1437
1438 static void pmap_update_cache_attributes_locked(
1439 ppnum_t, unsigned);
1440
1441 boolean_t arm_clear_fast_fault(
1442 ppnum_t ppnum,
1443 vm_prot_t fault_type);
1444
1445 static pmap_paddr_t pmap_pages_reclaim(
1446 void);
1447
1448 static kern_return_t pmap_pages_alloc(
1449 pmap_paddr_t *pa,
1450 unsigned size,
1451 unsigned option);
1452
1453 #define PMAP_PAGES_ALLOCATE_NOWAIT 0x1
1454 #define PMAP_PAGES_RECLAIM_NOWAIT 0x2
1455
1456 static void pmap_pages_free(
1457 pmap_paddr_t pa,
1458 unsigned size);
1459
1460 static void pmap_pin_kernel_pages(vm_offset_t kva, size_t nbytes);
1461
1462 static void pmap_unpin_kernel_pages(vm_offset_t kva, size_t nbytes);
1463
1464 static void pmap_trim_self(pmap_t pmap);
1465 static void pmap_trim_subord(pmap_t subord);
1466
1467
1468 #define PMAP_SUPPORT_PROTOTYPES(__return_type, __function_name, __function_args, __function_index) \
1469 static __return_type __function_name##_internal __function_args
1470
1471 PMAP_SUPPORT_PROTOTYPES(
1472 kern_return_t,
1473 arm_fast_fault, (pmap_t pmap,
1474 vm_map_address_t va,
1475 vm_prot_t fault_type,
1476 bool was_af_fault,
1477 bool from_user), ARM_FAST_FAULT_INDEX);
1478
1479
1480 PMAP_SUPPORT_PROTOTYPES(
1481 boolean_t,
1482 arm_force_fast_fault, (ppnum_t ppnum,
1483 vm_prot_t allow_mode,
1484 int options), ARM_FORCE_FAST_FAULT_INDEX);
1485
1486 PMAP_SUPPORT_PROTOTYPES(
1487 kern_return_t,
1488 mapping_free_prime, (void), MAPPING_FREE_PRIME_INDEX);
1489
1490 PMAP_SUPPORT_PROTOTYPES(
1491 kern_return_t,
1492 mapping_replenish, (void), MAPPING_REPLENISH_INDEX);
1493
1494 PMAP_SUPPORT_PROTOTYPES(
1495 boolean_t,
1496 pmap_batch_set_cache_attributes, (ppnum_t pn,
1497 unsigned int cacheattr,
1498 unsigned int page_cnt,
1499 unsigned int page_index,
1500 boolean_t doit,
1501 unsigned int *res), PMAP_BATCH_SET_CACHE_ATTRIBUTES_INDEX);
1502
1503 PMAP_SUPPORT_PROTOTYPES(
1504 void,
1505 pmap_change_wiring, (pmap_t pmap,
1506 vm_map_address_t v,
1507 boolean_t wired), PMAP_CHANGE_WIRING_INDEX);
1508
1509 PMAP_SUPPORT_PROTOTYPES(
1510 pmap_t,
1511 pmap_create_options, (ledger_t ledger,
1512 vm_map_size_t size,
1513 unsigned int flags), PMAP_CREATE_INDEX);
1514
1515 PMAP_SUPPORT_PROTOTYPES(
1516 void,
1517 pmap_destroy, (pmap_t pmap), PMAP_DESTROY_INDEX);
1518
1519 PMAP_SUPPORT_PROTOTYPES(
1520 kern_return_t,
1521 pmap_enter_options, (pmap_t pmap,
1522 vm_map_address_t v,
1523 ppnum_t pn,
1524 vm_prot_t prot,
1525 vm_prot_t fault_type,
1526 unsigned int flags,
1527 boolean_t wired,
1528 unsigned int options), PMAP_ENTER_OPTIONS_INDEX);
1529
1530 PMAP_SUPPORT_PROTOTYPES(
1531 vm_offset_t,
1532 pmap_extract, (pmap_t pmap,
1533 vm_map_address_t va), PMAP_EXTRACT_INDEX);
1534
1535 PMAP_SUPPORT_PROTOTYPES(
1536 ppnum_t,
1537 pmap_find_phys, (pmap_t pmap,
1538 addr64_t va), PMAP_FIND_PHYS_INDEX);
1539
1540 #if (__ARM_VMSA__ > 7)
1541 PMAP_SUPPORT_PROTOTYPES(
1542 kern_return_t,
1543 pmap_insert_sharedpage, (pmap_t pmap), PMAP_INSERT_SHAREDPAGE_INDEX);
1544 #endif
1545
1546
1547 PMAP_SUPPORT_PROTOTYPES(
1548 boolean_t,
1549 pmap_is_empty, (pmap_t pmap,
1550 vm_map_offset_t va_start,
1551 vm_map_offset_t va_end), PMAP_IS_EMPTY_INDEX);
1552
1553
1554 PMAP_SUPPORT_PROTOTYPES(
1555 unsigned int,
1556 pmap_map_cpu_windows_copy, (ppnum_t pn,
1557 vm_prot_t prot,
1558 unsigned int wimg_bits), PMAP_MAP_CPU_WINDOWS_COPY_INDEX);
1559
1560 PMAP_SUPPORT_PROTOTYPES(
1561 kern_return_t,
1562 pmap_nest, (pmap_t grand,
1563 pmap_t subord,
1564 addr64_t vstart,
1565 addr64_t nstart,
1566 uint64_t size), PMAP_NEST_INDEX);
1567
1568 PMAP_SUPPORT_PROTOTYPES(
1569 void,
1570 pmap_page_protect_options, (ppnum_t ppnum,
1571 vm_prot_t prot,
1572 unsigned int options), PMAP_PAGE_PROTECT_OPTIONS_INDEX);
1573
1574 PMAP_SUPPORT_PROTOTYPES(
1575 void,
1576 pmap_protect_options, (pmap_t pmap,
1577 vm_map_address_t start,
1578 vm_map_address_t end,
1579 vm_prot_t prot,
1580 unsigned int options,
1581 void *args), PMAP_PROTECT_OPTIONS_INDEX);
1582
1583 PMAP_SUPPORT_PROTOTYPES(
1584 kern_return_t,
1585 pmap_query_page_info, (pmap_t pmap,
1586 vm_map_offset_t va,
1587 int *disp_p), PMAP_QUERY_PAGE_INFO_INDEX);
1588
1589 PMAP_SUPPORT_PROTOTYPES(
1590 mach_vm_size_t,
1591 pmap_query_resident, (pmap_t pmap,
1592 vm_map_address_t start,
1593 vm_map_address_t end,
1594 mach_vm_size_t * compressed_bytes_p), PMAP_QUERY_RESIDENT_INDEX);
1595
1596 PMAP_SUPPORT_PROTOTYPES(
1597 void,
1598 pmap_reference, (pmap_t pmap), PMAP_REFERENCE_INDEX);
1599
1600 PMAP_SUPPORT_PROTOTYPES(
1601 int,
1602 pmap_remove_options, (pmap_t pmap,
1603 vm_map_address_t start,
1604 vm_map_address_t end,
1605 int options), PMAP_REMOVE_OPTIONS_INDEX);
1606
1607 PMAP_SUPPORT_PROTOTYPES(
1608 kern_return_t,
1609 pmap_return, (boolean_t do_panic,
1610 boolean_t do_recurse), PMAP_RETURN_INDEX);
1611
1612 PMAP_SUPPORT_PROTOTYPES(
1613 void,
1614 pmap_set_cache_attributes, (ppnum_t pn,
1615 unsigned int cacheattr), PMAP_SET_CACHE_ATTRIBUTES_INDEX);
1616
1617 PMAP_SUPPORT_PROTOTYPES(
1618 void,
1619 pmap_update_compressor_page, (ppnum_t pn,
1620 unsigned int prev_cacheattr, unsigned int new_cacheattr), PMAP_UPDATE_COMPRESSOR_PAGE_INDEX);
1621
1622 PMAP_SUPPORT_PROTOTYPES(
1623 void,
1624 pmap_set_nested, (pmap_t pmap), PMAP_SET_NESTED_INDEX);
1625
1626 #if MACH_ASSERT
1627 PMAP_SUPPORT_PROTOTYPES(
1628 void,
1629 pmap_set_process, (pmap_t pmap,
1630 int pid,
1631 char *procname), PMAP_SET_PROCESS_INDEX);
1632 #endif
1633
1634 PMAP_SUPPORT_PROTOTYPES(
1635 void,
1636 pmap_unmap_cpu_windows_copy, (unsigned int index), PMAP_UNMAP_CPU_WINDOWS_COPY_INDEX);
1637
1638 PMAP_SUPPORT_PROTOTYPES(
1639 kern_return_t,
1640 pmap_unnest_options, (pmap_t grand,
1641 addr64_t vaddr,
1642 uint64_t size,
1643 unsigned int option), PMAP_UNNEST_OPTIONS_INDEX);
1644
1645
1646 PMAP_SUPPORT_PROTOTYPES(
1647 void,
1648 phys_attribute_set, (ppnum_t pn,
1649 unsigned int bits), PHYS_ATTRIBUTE_SET_INDEX);
1650
1651
1652 PMAP_SUPPORT_PROTOTYPES(
1653 void,
1654 phys_attribute_clear, (ppnum_t pn,
1655 unsigned int bits,
1656 int options,
1657 void *arg), PHYS_ATTRIBUTE_CLEAR_INDEX);
1658
1659 PMAP_SUPPORT_PROTOTYPES(
1660 void,
1661 pmap_switch, (pmap_t pmap), PMAP_SWITCH_INDEX);
1662
1663 PMAP_SUPPORT_PROTOTYPES(
1664 void,
1665 pmap_switch_user_ttb, (pmap_t pmap), PMAP_SWITCH_USER_TTB_INDEX);
1666
1667 PMAP_SUPPORT_PROTOTYPES(
1668 void,
1669 pmap_clear_user_ttb, (void), PMAP_CLEAR_USER_TTB_INDEX);
1670
1671
1672 PMAP_SUPPORT_PROTOTYPES(
1673 void,
1674 pmap_set_jit_entitled, (pmap_t pmap), PMAP_SET_JIT_ENTITLED_INDEX);
1675
1676 PMAP_SUPPORT_PROTOTYPES(
1677 void,
1678 pmap_trim, (pmap_t grand,
1679 pmap_t subord,
1680 addr64_t vstart,
1681 addr64_t nstart,
1682 uint64_t size), PMAP_TRIM_INDEX);
1683
1684
1685
1686
1687
1688
1689 void pmap_footprint_suspend(vm_map_t map,
1690 boolean_t suspend);
1691 PMAP_SUPPORT_PROTOTYPES(
1692 void,
1693 pmap_footprint_suspend, (vm_map_t map,
1694 boolean_t suspend),
1695 PMAP_FOOTPRINT_SUSPEND_INDEX);
1696
1697
1698 #if CONFIG_PGTRACE
1699 boolean_t pgtrace_enabled = 0;
1700
1701 typedef struct {
1702 queue_chain_t chain;
1703
1704 /*
1705 * pmap - pmap for below addresses
1706 * ova - original va page address
1707 * cva - clone va addresses for pre, target and post pages
1708 * cva_spte - clone saved ptes
1709 * range - trace range in this map
1710 * cloned - has been cloned or not
1711 */
1712 pmap_t pmap;
1713 vm_map_offset_t ova;
1714 vm_map_offset_t cva[3];
1715 pt_entry_t cva_spte[3];
1716 struct {
1717 pmap_paddr_t start;
1718 pmap_paddr_t end;
1719 } range;
1720 bool cloned;
1721 } pmap_pgtrace_map_t;
1722
1723 static void pmap_pgtrace_init(void);
1724 static bool pmap_pgtrace_enter_clone(pmap_t pmap, vm_map_offset_t va_page, vm_map_offset_t start, vm_map_offset_t end);
1725 static void pmap_pgtrace_remove_clone(pmap_t pmap, pmap_paddr_t pa_page, vm_map_offset_t va_page);
1726 static void pmap_pgtrace_remove_all_clone(pmap_paddr_t pa);
1727 #endif
1728
1729 #if (__ARM_VMSA__ > 7)
1730 /*
1731 * The low global vector page is mapped at a fixed alias.
1732 * Since the page size is 16k for H8 and newer we map the globals to a 16k
1733 * aligned address. Readers of the globals (e.g. lldb, panic server) need
1734 * to check both addresses anyway for backward compatibility. So for now
1735 * we leave H6 and H7 where they were.
1736 */
1737 #if (ARM_PGSHIFT == 14)
1738 #define LOWGLOBAL_ALIAS (LOW_GLOBAL_BASE_ADDRESS + 0x4000)
1739 #else
1740 #define LOWGLOBAL_ALIAS (LOW_GLOBAL_BASE_ADDRESS + 0x2000)
1741 #endif
1742
1743 #else
1744 #define LOWGLOBAL_ALIAS (0xFFFF1000)
1745 #endif
1746
1747 long long alloc_tteroot_count __attribute__((aligned(8))) MARK_AS_PMAP_DATA = 0LL;
1748 long long alloc_ttepages_count __attribute__((aligned(8))) MARK_AS_PMAP_DATA = 0LL;
1749 long long alloc_ptepages_count __attribute__((aligned(8))) MARK_AS_PMAP_DATA = 0LL;
1750 long long alloc_pmap_pages_count __attribute__((aligned(8))) = 0LL;
1751
1752 int pt_fake_zone_index = -1; /* index of pmap fake zone */
1753
1754
1755
1756 /*
1757 * Allocates and initializes a per-CPU data structure for the pmap.
1758 */
1759 MARK_AS_PMAP_TEXT static void
1760 pmap_cpu_data_init_internal(unsigned int cpu_number)
1761 {
1762 pmap_cpu_data_t * pmap_cpu_data = pmap_get_cpu_data();
1763
1764 pmap_cpu_data->cpu_number = cpu_number;
1765 }
1766
1767 void
1768 pmap_cpu_data_init(void)
1769 {
1770 pmap_cpu_data_init_internal(cpu_number());
1771 }
1772
1773 static void
1774 pmap_cpu_data_array_init(void)
1775 {
1776
1777 pmap_cpu_data_init();
1778 }
1779
1780 pmap_cpu_data_t *
1781 pmap_get_cpu_data(void)
1782 {
1783 pmap_cpu_data_t * pmap_cpu_data = NULL;
1784
1785 pmap_cpu_data = &getCpuDatap()->cpu_pmap_cpu_data;
1786
1787 return pmap_cpu_data;
1788 }
1789
1790
1791
1792 /* TODO */
1793 pmap_paddr_t
1794 pmap_pages_reclaim(
1795 void)
1796 {
1797 boolean_t found_page;
1798 unsigned i;
1799 pt_desc_t *ptdp;
1800
1801 /*
1802 * pmap_pages_reclaim() is returning a page by freeing an active pt page.
1803 * To be eligible, a pt page is assigned to a user pmap. It doesn't have any wired pte
1804 * entry and it contains at least one valid pte entry.
1805 *
1806 * In a loop, check for a page in the reclaimed pt page list.
1807 * if one is present, unlink that page and return the physical page address.
1808 * Otherwise, scan the pt page list for an eligible pt page to reclaim.
1809 * If found, invoke pmap_remove_range() on its pmap and address range then
1810 * deallocates that pt page. This will end up adding the pt page to the
1811 * reclaimed pt page list.
1812 * If no eligible page were found in the pt page list, panic.
1813 */
1814
1815 pmap_simple_lock(&pmap_pages_lock);
1816 pmap_pages_request_count++;
1817 pmap_pages_request_acum++;
1818
1819 while (1) {
1820 if (pmap_pages_reclaim_list != (page_free_entry_t *)NULL) {
1821 page_free_entry_t *page_entry;
1822
1823 page_entry = pmap_pages_reclaim_list;
1824 pmap_pages_reclaim_list = pmap_pages_reclaim_list->next;
1825 pmap_simple_unlock(&pmap_pages_lock);
1826
1827 return (pmap_paddr_t)ml_static_vtop((vm_offset_t)page_entry);
1828 }
1829
1830 pmap_simple_unlock(&pmap_pages_lock);
1831
1832 pmap_simple_lock(&pt_pages_lock);
1833 ptdp = (pt_desc_t *)queue_first(&pt_page_list);
1834 found_page = FALSE;
1835
1836 while (!queue_end(&pt_page_list, (queue_entry_t)ptdp)) {
1837 if ((ptdp->pmap->nested == FALSE)
1838 && (pmap_simple_lock_try(&ptdp->pmap->lock))) {
1839 assert(ptdp->pmap != kernel_pmap);
1840 unsigned refcnt_acc = 0;
1841 unsigned wiredcnt_acc = 0;
1842
1843 for (i = 0; i < PT_INDEX_MAX; i++) {
1844 if (ptdp->ptd_info[i].refcnt == PT_DESC_REFCOUNT) {
1845 /* Do not attempt to free a page that contains an L2 table */
1846 refcnt_acc = 0;
1847 break;
1848 }
1849 refcnt_acc += ptdp->ptd_info[i].refcnt;
1850 wiredcnt_acc += ptdp->ptd_info[i].wiredcnt;
1851 }
1852 if ((wiredcnt_acc == 0) && (refcnt_acc != 0)) {
1853 found_page = TRUE;
1854 /* Leave ptdp->pmap locked here. We're about to reclaim
1855 * a tt page from it, so we don't want anyone else messing
1856 * with it while we do that. */
1857 break;
1858 }
1859 pmap_simple_unlock(&ptdp->pmap->lock);
1860 }
1861 ptdp = (pt_desc_t *)queue_next((queue_t)ptdp);
1862 }
1863 if (!found_page) {
1864 panic("%s: No eligible page in pt_page_list", __FUNCTION__);
1865 } else {
1866 int remove_count = 0;
1867 bool need_strong_sync = false;
1868 vm_map_address_t va;
1869 pmap_t pmap;
1870 pt_entry_t *bpte, *epte;
1871 pt_entry_t *pte_p;
1872 tt_entry_t *tte_p;
1873 uint32_t rmv_spte = 0;
1874
1875 pmap_simple_unlock(&pt_pages_lock);
1876 pmap = ptdp->pmap;
1877 PMAP_ASSERT_LOCKED(pmap); // pmap lock should be held from loop above
1878
1879 __unused const pt_attr_t * const pt_attr = pmap_get_pt_attr(pmap);
1880
1881 for (i = 0; i < PT_INDEX_MAX; i++) {
1882 va = ptdp->ptd_info[i].va;
1883
1884 /* If the VA is bogus, this may represent an unallocated region
1885 * or one which is in transition (already being freed or expanded).
1886 * Don't try to remove mappings here. */
1887 if (va == (vm_offset_t)-1) {
1888 continue;
1889 }
1890
1891 tte_p = pmap_tte(pmap, va);
1892 if ((tte_p != (tt_entry_t *) NULL)
1893 && ((*tte_p & ARM_TTE_TYPE_MASK) == ARM_TTE_TYPE_TABLE)) {
1894 pte_p = (pt_entry_t *) ttetokv(*tte_p);
1895 bpte = &pte_p[pte_index(pmap, pt_attr, va)];
1896 epte = bpte + PAGE_SIZE / sizeof(pt_entry_t);
1897 /*
1898 * Use PMAP_OPTIONS_REMOVE to clear any
1899 * "compressed" markers and update the
1900 * "compressed" counter in pmap->stats.
1901 * This means that we lose accounting for
1902 * any compressed pages in this range
1903 * but the alternative is to not be able
1904 * to account for their future decompression,
1905 * which could cause the counter to drift
1906 * more and more.
1907 */
1908 remove_count += pmap_remove_range_options(
1909 pmap, va, bpte, epte,
1910 &rmv_spte, &need_strong_sync, PMAP_OPTIONS_REMOVE);
1911 if (ptdp->ptd_info[ARM_PT_DESC_INDEX(pte_p)].refcnt != 0) {
1912 panic("%s: ptdp %p, count %d", __FUNCTION__, ptdp, ptdp->ptd_info[ARM_PT_DESC_INDEX(pte_p)].refcnt);
1913 }
1914
1915 pmap_tte_deallocate(pmap, tte_p, PMAP_TT_TWIG_LEVEL);
1916
1917 if (remove_count > 0) {
1918 pmap_get_pt_ops(pmap)->flush_tlb_region_async(va, (unsigned int)pt_attr_leaf_table_size(pt_attr), pmap);
1919 } else {
1920 pmap_get_pt_ops(pmap)->flush_tlb_tte_async(va, pmap);
1921 }
1922 }
1923 }
1924 // Undo the lock we grabbed when we found ptdp above
1925 PMAP_UNLOCK(pmap);
1926 pmap_sync_tlb(need_strong_sync);
1927 }
1928 pmap_simple_lock(&pmap_pages_lock);
1929 }
1930 }
1931
1932
1933 static kern_return_t
1934 pmap_pages_alloc(
1935 pmap_paddr_t *pa,
1936 unsigned size,
1937 unsigned option)
1938 {
1939 vm_page_t m = VM_PAGE_NULL, m_prev;
1940
1941 if (option & PMAP_PAGES_RECLAIM_NOWAIT) {
1942 assert(size == PAGE_SIZE);
1943 *pa = pmap_pages_reclaim();
1944 return KERN_SUCCESS;
1945 }
1946 if (size == PAGE_SIZE) {
1947 while ((m = vm_page_grab()) == VM_PAGE_NULL) {
1948 if (option & PMAP_PAGES_ALLOCATE_NOWAIT) {
1949 return KERN_RESOURCE_SHORTAGE;
1950 }
1951
1952 VM_PAGE_WAIT();
1953 }
1954 vm_page_lock_queues();
1955 vm_page_wire(m, VM_KERN_MEMORY_PTE, TRUE);
1956 vm_page_unlock_queues();
1957 }
1958 if (size == 2 * PAGE_SIZE) {
1959 while (cpm_allocate(size, &m, 0, 1, TRUE, 0) != KERN_SUCCESS) {
1960 if (option & PMAP_PAGES_ALLOCATE_NOWAIT) {
1961 return KERN_RESOURCE_SHORTAGE;
1962 }
1963
1964 VM_PAGE_WAIT();
1965 }
1966 }
1967
1968 *pa = (pmap_paddr_t)ptoa(VM_PAGE_GET_PHYS_PAGE(m));
1969
1970 vm_object_lock(pmap_object);
1971 while (m != VM_PAGE_NULL) {
1972 vm_page_insert_wired(m, pmap_object, (vm_object_offset_t) ((ptoa(VM_PAGE_GET_PHYS_PAGE(m))) - gPhysBase), VM_KERN_MEMORY_PTE);
1973 m_prev = m;
1974 m = NEXT_PAGE(m_prev);
1975 *(NEXT_PAGE_PTR(m_prev)) = VM_PAGE_NULL;
1976 }
1977 vm_object_unlock(pmap_object);
1978
1979 OSAddAtomic(size >> PAGE_SHIFT, &inuse_pmap_pages_count);
1980 OSAddAtomic64(size >> PAGE_SHIFT, &alloc_pmap_pages_count);
1981
1982 return KERN_SUCCESS;
1983 }
1984
1985
1986 static void
1987 pmap_pages_free(
1988 pmap_paddr_t pa,
1989 unsigned size)
1990 {
1991 pmap_simple_lock(&pmap_pages_lock);
1992
1993 if (pmap_pages_request_count != 0) {
1994 page_free_entry_t *page_entry;
1995
1996 pmap_pages_request_count--;
1997 page_entry = (page_free_entry_t *)phystokv(pa);
1998 page_entry->next = pmap_pages_reclaim_list;
1999 pmap_pages_reclaim_list = page_entry;
2000 pmap_simple_unlock(&pmap_pages_lock);
2001
2002 return;
2003 }
2004
2005 pmap_simple_unlock(&pmap_pages_lock);
2006
2007 vm_page_t m;
2008 pmap_paddr_t pa_max;
2009
2010 OSAddAtomic(-(size >> PAGE_SHIFT), &inuse_pmap_pages_count);
2011
2012 for (pa_max = pa + size; pa < pa_max; pa = pa + PAGE_SIZE) {
2013 vm_object_lock(pmap_object);
2014 m = vm_page_lookup(pmap_object, (pa - gPhysBase));
2015 assert(m != VM_PAGE_NULL);
2016 assert(VM_PAGE_WIRED(m));
2017 vm_page_lock_queues();
2018 vm_page_free(m);
2019 vm_page_unlock_queues();
2020 vm_object_unlock(pmap_object);
2021 }
2022 }
2023
2024 static inline void
2025 PMAP_ZINFO_PALLOC(
2026 pmap_t pmap, int bytes)
2027 {
2028 pmap_ledger_credit(pmap, task_ledgers.tkm_private, bytes);
2029 }
2030
2031 static inline void
2032 PMAP_ZINFO_PFREE(
2033 pmap_t pmap,
2034 int bytes)
2035 {
2036 pmap_ledger_debit(pmap, task_ledgers.tkm_private, bytes);
2037 }
2038
2039 static inline void
2040 pmap_tt_ledger_credit(
2041 pmap_t pmap,
2042 vm_size_t size)
2043 {
2044 if (pmap != kernel_pmap) {
2045 pmap_ledger_credit(pmap, task_ledgers.phys_footprint, size);
2046 pmap_ledger_credit(pmap, task_ledgers.page_table, size);
2047 }
2048 }
2049
2050 static inline void
2051 pmap_tt_ledger_debit(
2052 pmap_t pmap,
2053 vm_size_t size)
2054 {
2055 if (pmap != kernel_pmap) {
2056 pmap_ledger_debit(pmap, task_ledgers.phys_footprint, size);
2057 pmap_ledger_debit(pmap, task_ledgers.page_table, size);
2058 }
2059 }
2060
2061 static bool
2062 alloc_asid(pmap_t pmap)
2063 {
2064 int vasid;
2065 uint16_t hw_asid;
2066
2067 pmap_simple_lock(&asid_lock);
2068 vasid = bitmap_first(&asid_bitmap[0], MAX_ASID);
2069 if (vasid < 0) {
2070 pmap_simple_unlock(&asid_lock);
2071 return false;
2072 }
2073 assert(vasid < MAX_ASID);
2074 bitmap_clear(&asid_bitmap[0], (unsigned int)vasid);
2075 pmap_simple_unlock(&asid_lock);
2076 // bitmap_first() returns highest-order bits first, but a 0-based scheme works
2077 // slightly better with the collision detection scheme used by pmap_switch_internal().
2078 vasid = MAX_ASID - 1 - vasid;
2079 hw_asid = vasid % MAX_HW_ASID;
2080 pmap->sw_asid = vasid / MAX_HW_ASID;
2081 hw_asid += 1; // Account for ASID 0, which is reserved for the kernel
2082 #if __ARM_KERNEL_PROTECT__
2083 hw_asid <<= 1; // We're really handing out 2 hardware ASIDs, one for EL0 and one for EL1 access
2084 #endif
2085 pmap->hw_asid = hw_asid;
2086 return true;
2087 }
2088
2089 static void
2090 free_asid(pmap_t pmap)
2091 {
2092 unsigned int vasid;
2093 uint16_t hw_asid = pmap->hw_asid;
2094 assert(hw_asid != 0); // Should not try to free kernel ASID
2095
2096 #if __ARM_KERNEL_PROTECT__
2097 hw_asid >>= 1;
2098 #endif
2099 hw_asid -= 1;
2100
2101 vasid = ((unsigned int)pmap->sw_asid * MAX_HW_ASID) + hw_asid;
2102 vasid = MAX_ASID - 1 - vasid;
2103
2104 pmap_simple_lock(&asid_lock);
2105 assert(!bitmap_test(&asid_bitmap[0], vasid));
2106 bitmap_set(&asid_bitmap[0], vasid);
2107 pmap_simple_unlock(&asid_lock);
2108 }
2109
2110
2111 #ifndef PMAP_PV_LOAD_FACTOR
2112 #define PMAP_PV_LOAD_FACTOR 1
2113 #endif
2114
2115 #define PV_LOW_WATER_MARK_DEFAULT (0x200 * PMAP_PV_LOAD_FACTOR)
2116 #define PV_KERN_LOW_WATER_MARK_DEFAULT (0x200 * PMAP_PV_LOAD_FACTOR)
2117 #define PV_ALLOC_CHUNK_INITIAL (0x200 * PMAP_PV_LOAD_FACTOR)
2118 #define PV_KERN_ALLOC_CHUNK_INITIAL (0x200 * PMAP_PV_LOAD_FACTOR)
2119 #define PV_ALLOC_INITIAL_TARGET (PV_ALLOC_CHUNK_INITIAL * 5)
2120 #define PV_KERN_ALLOC_INITIAL_TARGET (PV_KERN_ALLOC_CHUNK_INITIAL)
2121
2122 uint32_t pv_free_count MARK_AS_PMAP_DATA = 0;
2123 uint32_t pv_page_count MARK_AS_PMAP_DATA = 0;
2124 uint32_t pv_kern_free_count MARK_AS_PMAP_DATA = 0;
2125
2126 uint32_t pv_low_water_mark MARK_AS_PMAP_DATA;
2127 uint32_t pv_kern_low_water_mark MARK_AS_PMAP_DATA;
2128 uint32_t pv_alloc_chunk MARK_AS_PMAP_DATA;
2129 uint32_t pv_kern_alloc_chunk MARK_AS_PMAP_DATA;
2130
2131 thread_t mapping_replenish_thread;
2132 event_t mapping_replenish_event;
2133 event_t pmap_user_pv_throttle_event;
2134 volatile uint32_t mappingrecurse = 0;
2135
2136 uint64_t pmap_pv_throttle_stat;
2137 uint64_t pmap_pv_throttled_waiters;
2138
2139 unsigned pmap_mapping_thread_wakeups;
2140 unsigned pmap_kernel_reserve_replenish_stat MARK_AS_PMAP_DATA;
2141 unsigned pmap_user_reserve_replenish_stat MARK_AS_PMAP_DATA;
2142 unsigned pmap_kern_reserve_alloc_stat MARK_AS_PMAP_DATA;
2143
2144
2145 static void
2146 pv_init(
2147 void)
2148 {
2149 simple_lock_init(&pv_free_list_lock, 0);
2150 simple_lock_init(&pv_kern_free_list_lock, 0);
2151 pv_free_list = PV_ENTRY_NULL;
2152 pv_free_count = 0x0U;
2153 pv_kern_free_list = PV_ENTRY_NULL;
2154 pv_kern_free_count = 0x0U;
2155 }
2156
2157 static inline void PV_ALLOC(pv_entry_t **pv_ep);
2158 static inline void PV_KERN_ALLOC(pv_entry_t **pv_e);
2159 static inline void PV_FREE_LIST(pv_entry_t *pv_eh, pv_entry_t *pv_et, int pv_cnt);
2160 static inline void PV_KERN_FREE_LIST(pv_entry_t *pv_eh, pv_entry_t *pv_et, int pv_cnt);
2161
2162 static inline void pmap_pv_throttle(pmap_t p);
2163
2164 static boolean_t
2165 pv_alloc(
2166 pmap_t pmap,
2167 unsigned int pai,
2168 pv_entry_t **pvepp)
2169 {
2170 if (pmap != NULL) {
2171 PMAP_ASSERT_LOCKED(pmap);
2172 }
2173 ASSERT_PVH_LOCKED(pai);
2174 PV_ALLOC(pvepp);
2175 if (PV_ENTRY_NULL == *pvepp) {
2176 if ((pmap == NULL) || (kernel_pmap == pmap)) {
2177 PV_KERN_ALLOC(pvepp);
2178
2179 if (PV_ENTRY_NULL == *pvepp) {
2180 pv_entry_t *pv_e;
2181 pv_entry_t *pv_eh;
2182 pv_entry_t *pv_et;
2183 int pv_cnt;
2184 unsigned j;
2185 pmap_paddr_t pa;
2186 kern_return_t ret;
2187
2188 UNLOCK_PVH(pai);
2189 if (pmap != NULL) {
2190 PMAP_UNLOCK(pmap);
2191 }
2192
2193 ret = pmap_pages_alloc(&pa, PAGE_SIZE, PMAP_PAGES_ALLOCATE_NOWAIT);
2194
2195 if (ret == KERN_RESOURCE_SHORTAGE) {
2196 ret = pmap_pages_alloc(&pa, PAGE_SIZE, PMAP_PAGES_RECLAIM_NOWAIT);
2197 }
2198
2199 if (ret != KERN_SUCCESS) {
2200 panic("%s: failed to alloc page for kernel, ret=%d, "
2201 "pmap=%p, pai=%u, pvepp=%p",
2202 __FUNCTION__, ret,
2203 pmap, pai, pvepp);
2204 }
2205
2206 pv_page_count++;
2207
2208 pv_e = (pv_entry_t *)phystokv(pa);
2209 pv_cnt = 0;
2210 pv_eh = pv_et = PV_ENTRY_NULL;
2211 *pvepp = pv_e;
2212 pv_e++;
2213
2214 for (j = 1; j < (PAGE_SIZE / sizeof(pv_entry_t)); j++) {
2215 pv_e->pve_next = pv_eh;
2216 pv_eh = pv_e;
2217
2218 if (pv_et == PV_ENTRY_NULL) {
2219 pv_et = pv_e;
2220 }
2221 pv_cnt++;
2222 pv_e++;
2223 }
2224 PV_KERN_FREE_LIST(pv_eh, pv_et, pv_cnt);
2225 if (pmap != NULL) {
2226 PMAP_LOCK(pmap);
2227 }
2228 LOCK_PVH(pai);
2229 return FALSE;
2230 }
2231 } else {
2232 UNLOCK_PVH(pai);
2233 PMAP_UNLOCK(pmap);
2234 pmap_pv_throttle(pmap);
2235 {
2236 pv_entry_t *pv_e;
2237 pv_entry_t *pv_eh;
2238 pv_entry_t *pv_et;
2239 int pv_cnt;
2240 unsigned j;
2241 pmap_paddr_t pa;
2242 kern_return_t ret;
2243
2244 ret = pmap_pages_alloc(&pa, PAGE_SIZE, 0);
2245
2246 if (ret != KERN_SUCCESS) {
2247 panic("%s: failed to alloc page, ret=%d, "
2248 "pmap=%p, pai=%u, pvepp=%p",
2249 __FUNCTION__, ret,
2250 pmap, pai, pvepp);
2251 }
2252
2253 pv_page_count++;
2254
2255 pv_e = (pv_entry_t *)phystokv(pa);
2256 pv_cnt = 0;
2257 pv_eh = pv_et = PV_ENTRY_NULL;
2258 *pvepp = pv_e;
2259 pv_e++;
2260
2261 for (j = 1; j < (PAGE_SIZE / sizeof(pv_entry_t)); j++) {
2262 pv_e->pve_next = pv_eh;
2263 pv_eh = pv_e;
2264
2265 if (pv_et == PV_ENTRY_NULL) {
2266 pv_et = pv_e;
2267 }
2268 pv_cnt++;
2269 pv_e++;
2270 }
2271 PV_FREE_LIST(pv_eh, pv_et, pv_cnt);
2272 }
2273 PMAP_LOCK(pmap);
2274 LOCK_PVH(pai);
2275 return FALSE;
2276 }
2277 }
2278 assert(PV_ENTRY_NULL != *pvepp);
2279 return TRUE;
2280 }
2281
2282 static void
2283 pv_free(
2284 pv_entry_t *pvep)
2285 {
2286 PV_FREE_LIST(pvep, pvep, 1);
2287 }
2288
2289 static void
2290 pv_list_free(
2291 pv_entry_t *pvehp,
2292 pv_entry_t *pvetp,
2293 unsigned int cnt)
2294 {
2295 PV_FREE_LIST(pvehp, pvetp, cnt);
2296 }
2297
2298 static inline void
2299 pv_water_mark_check(void)
2300 {
2301 if (__improbable((pv_free_count < pv_low_water_mark) || (pv_kern_free_count < pv_kern_low_water_mark))) {
2302 if (!mappingrecurse && os_atomic_cmpxchg(&mappingrecurse, 0, 1, acq_rel)) {
2303 thread_wakeup(&mapping_replenish_event);
2304 }
2305 }
2306 }
2307
2308 static inline void
2309 PV_ALLOC(pv_entry_t **pv_ep)
2310 {
2311 assert(*pv_ep == PV_ENTRY_NULL);
2312 pmap_simple_lock(&pv_free_list_lock);
2313 /*
2314 * If the kernel reserved pool is low, let non-kernel mappings allocate
2315 * synchronously, possibly subject to a throttle.
2316 */
2317 if ((pv_kern_free_count >= pv_kern_low_water_mark) && ((*pv_ep = pv_free_list) != 0)) {
2318 pv_free_list = (pv_entry_t *)(*pv_ep)->pve_next;
2319 (*pv_ep)->pve_next = PV_ENTRY_NULL;
2320 pv_free_count--;
2321 }
2322
2323 pmap_simple_unlock(&pv_free_list_lock);
2324 }
2325
2326 static inline void
2327 PV_FREE_LIST(pv_entry_t *pv_eh, pv_entry_t *pv_et, int pv_cnt)
2328 {
2329 pmap_simple_lock(&pv_free_list_lock);
2330 pv_et->pve_next = (pv_entry_t *)pv_free_list;
2331 pv_free_list = pv_eh;
2332 pv_free_count += pv_cnt;
2333 pmap_simple_unlock(&pv_free_list_lock);
2334 }
2335
2336 static inline void
2337 PV_KERN_ALLOC(pv_entry_t **pv_e)
2338 {
2339 assert(*pv_e == PV_ENTRY_NULL);
2340 pmap_simple_lock(&pv_kern_free_list_lock);
2341
2342 if ((*pv_e = pv_kern_free_list) != 0) {
2343 pv_kern_free_list = (pv_entry_t *)(*pv_e)->pve_next;
2344 (*pv_e)->pve_next = PV_ENTRY_NULL;
2345 pv_kern_free_count--;
2346 pmap_kern_reserve_alloc_stat++;
2347 }
2348
2349 pmap_simple_unlock(&pv_kern_free_list_lock);
2350 }
2351
2352 static inline void
2353 PV_KERN_FREE_LIST(pv_entry_t *pv_eh, pv_entry_t *pv_et, int pv_cnt)
2354 {
2355 pmap_simple_lock(&pv_kern_free_list_lock);
2356 pv_et->pve_next = pv_kern_free_list;
2357 pv_kern_free_list = pv_eh;
2358 pv_kern_free_count += pv_cnt;
2359 pmap_simple_unlock(&pv_kern_free_list_lock);
2360 }
2361
2362 static inline void
2363 pmap_pv_throttle(__unused pmap_t p)
2364 {
2365 assert(p != kernel_pmap);
2366 /* Apply throttle on non-kernel mappings */
2367 if (pv_kern_free_count < (pv_kern_low_water_mark / 2)) {
2368 pmap_pv_throttle_stat++;
2369 /* This doesn't need to be strictly accurate, merely a hint
2370 * to eliminate the timeout when the reserve is replenished.
2371 */
2372 pmap_pv_throttled_waiters++;
2373 assert_wait_timeout(&pmap_user_pv_throttle_event, THREAD_UNINT, 1, 1000 * NSEC_PER_USEC);
2374 thread_block(THREAD_CONTINUE_NULL);
2375 }
2376 }
2377
2378 /*
2379 * Creates a target number of free pv_entry_t objects for the kernel free list
2380 * and the general free list.
2381 */
2382 MARK_AS_PMAP_TEXT static kern_return_t
2383 mapping_free_prime_internal(void)
2384 {
2385 unsigned j;
2386 pmap_paddr_t pa;
2387 kern_return_t ret;
2388 pv_entry_t *pv_e;
2389 pv_entry_t *pv_eh;
2390 pv_entry_t *pv_et;
2391 int pv_cnt;
2392 int alloc_options = 0;
2393 int needed_pv_cnt = 0;
2394 int target_pv_free_cnt = 0;
2395
2396 SECURITY_READ_ONLY_LATE(static boolean_t) mapping_free_prime_internal_called = FALSE;
2397 SECURITY_READ_ONLY_LATE(static boolean_t) mapping_free_prime_internal_done = FALSE;
2398
2399 if (mapping_free_prime_internal_done) {
2400 return KERN_FAILURE;
2401 }
2402
2403 if (!mapping_free_prime_internal_called) {
2404 mapping_free_prime_internal_called = TRUE;
2405
2406 pv_low_water_mark = PV_LOW_WATER_MARK_DEFAULT;
2407
2408 /* Alterable via sysctl */
2409 pv_kern_low_water_mark = PV_KERN_LOW_WATER_MARK_DEFAULT;
2410
2411 pv_kern_alloc_chunk = PV_KERN_ALLOC_CHUNK_INITIAL;
2412 pv_alloc_chunk = PV_ALLOC_CHUNK_INITIAL;
2413 }
2414
2415 pv_cnt = 0;
2416 pv_eh = pv_et = PV_ENTRY_NULL;
2417 target_pv_free_cnt = PV_ALLOC_INITIAL_TARGET;
2418
2419 /*
2420 * We don't take the lock to read pv_free_count, as we should not be
2421 * invoking this from a multithreaded context.
2422 */
2423 needed_pv_cnt = target_pv_free_cnt - pv_free_count;
2424
2425 if (needed_pv_cnt > target_pv_free_cnt) {
2426 needed_pv_cnt = 0;
2427 }
2428
2429 while (pv_cnt < needed_pv_cnt) {
2430 ret = pmap_pages_alloc(&pa, PAGE_SIZE, alloc_options);
2431
2432 assert(ret == KERN_SUCCESS);
2433
2434 pv_page_count++;
2435
2436 pv_e = (pv_entry_t *)phystokv(pa);
2437
2438 for (j = 0; j < (PAGE_SIZE / sizeof(pv_entry_t)); j++) {
2439 pv_e->pve_next = pv_eh;
2440 pv_eh = pv_e;
2441
2442 if (pv_et == PV_ENTRY_NULL) {
2443 pv_et = pv_e;
2444 }
2445 pv_cnt++;
2446 pv_e++;
2447 }
2448 }
2449
2450 if (pv_cnt) {
2451 PV_FREE_LIST(pv_eh, pv_et, pv_cnt);
2452 }
2453
2454 pv_cnt = 0;
2455 pv_eh = pv_et = PV_ENTRY_NULL;
2456 target_pv_free_cnt = PV_KERN_ALLOC_INITIAL_TARGET;
2457
2458 /*
2459 * We don't take the lock to read pv_kern_free_count, as we should not
2460 * be invoking this from a multithreaded context.
2461 */
2462 needed_pv_cnt = target_pv_free_cnt - pv_kern_free_count;
2463
2464 if (needed_pv_cnt > target_pv_free_cnt) {
2465 needed_pv_cnt = 0;
2466 }
2467
2468 while (pv_cnt < needed_pv_cnt) {
2469 ret = pmap_pages_alloc(&pa, PAGE_SIZE, alloc_options);
2470
2471 assert(ret == KERN_SUCCESS);
2472 pv_page_count++;
2473
2474 pv_e = (pv_entry_t *)phystokv(pa);
2475
2476 for (j = 0; j < (PAGE_SIZE / sizeof(pv_entry_t)); j++) {
2477 pv_e->pve_next = pv_eh;
2478 pv_eh = pv_e;
2479
2480 if (pv_et == PV_ENTRY_NULL) {
2481 pv_et = pv_e;
2482 }
2483 pv_cnt++;
2484 pv_e++;
2485 }
2486 }
2487
2488 if (pv_cnt) {
2489 PV_KERN_FREE_LIST(pv_eh, pv_et, pv_cnt);
2490 }
2491
2492 mapping_free_prime_internal_done = TRUE;
2493 return KERN_SUCCESS;
2494 }
2495
2496 void
2497 mapping_free_prime(void)
2498 {
2499 kern_return_t kr = KERN_FAILURE;
2500
2501 kr = mapping_free_prime_internal();
2502
2503 if (kr != KERN_SUCCESS) {
2504 panic("%s: failed, kr=%d",
2505 __FUNCTION__, kr);
2506 }
2507 }
2508
2509 void mapping_replenish(void);
2510
2511 void
2512 mapping_adjust(void)
2513 {
2514 kern_return_t mres;
2515
2516 mres = kernel_thread_start_priority((thread_continue_t)mapping_replenish, NULL, MAXPRI_KERNEL, &mapping_replenish_thread);
2517 if (mres != KERN_SUCCESS) {
2518 panic("%s: mapping_replenish thread creation failed",
2519 __FUNCTION__);
2520 }
2521 thread_deallocate(mapping_replenish_thread);
2522 }
2523
2524 /*
2525 * Fills the kernel and general PV free lists back up to their low watermarks.
2526 */
2527 MARK_AS_PMAP_TEXT static kern_return_t
2528 mapping_replenish_internal(void)
2529 {
2530 pv_entry_t *pv_e;
2531 pv_entry_t *pv_eh;
2532 pv_entry_t *pv_et;
2533 int pv_cnt;
2534 unsigned j;
2535 pmap_paddr_t pa;
2536 kern_return_t ret = KERN_SUCCESS;
2537
2538 while (pv_kern_free_count < pv_kern_low_water_mark) {
2539 pv_cnt = 0;
2540 pv_eh = pv_et = PV_ENTRY_NULL;
2541
2542 ret = pmap_pages_alloc(&pa, PAGE_SIZE, 0);
2543 assert(ret == KERN_SUCCESS);
2544
2545 pv_page_count++;
2546
2547 pv_e = (pv_entry_t *)phystokv(pa);
2548
2549 for (j = 0; j < (PAGE_SIZE / sizeof(pv_entry_t)); j++) {
2550 pv_e->pve_next = pv_eh;
2551 pv_eh = pv_e;
2552
2553 if (pv_et == PV_ENTRY_NULL) {
2554 pv_et = pv_e;
2555 }
2556 pv_cnt++;
2557 pv_e++;
2558 }
2559 pmap_kernel_reserve_replenish_stat += pv_cnt;
2560 PV_KERN_FREE_LIST(pv_eh, pv_et, pv_cnt);
2561 }
2562
2563 while (pv_free_count < pv_low_water_mark) {
2564 pv_cnt = 0;
2565 pv_eh = pv_et = PV_ENTRY_NULL;
2566
2567 ret = pmap_pages_alloc(&pa, PAGE_SIZE, 0);
2568 assert(ret == KERN_SUCCESS);
2569
2570 pv_page_count++;
2571
2572 pv_e = (pv_entry_t *)phystokv(pa);
2573
2574 for (j = 0; j < (PAGE_SIZE / sizeof(pv_entry_t)); j++) {
2575 pv_e->pve_next = pv_eh;
2576 pv_eh = pv_e;
2577
2578 if (pv_et == PV_ENTRY_NULL) {
2579 pv_et = pv_e;
2580 }
2581 pv_cnt++;
2582 pv_e++;
2583 }
2584 pmap_user_reserve_replenish_stat += pv_cnt;
2585 PV_FREE_LIST(pv_eh, pv_et, pv_cnt);
2586 }
2587
2588 return ret;
2589 }
2590
2591 /*
2592 * Continuation function that keeps the PV free lists from running out of free
2593 * elements.
2594 */
2595 __attribute__((noreturn))
2596 void
2597 mapping_replenish(void)
2598 {
2599 kern_return_t kr;
2600
2601 /* We qualify for VM privileges...*/
2602 current_thread()->options |= TH_OPT_VMPRIV;
2603
2604 for (;;) {
2605 kr = mapping_replenish_internal();
2606
2607 if (kr != KERN_SUCCESS) {
2608 panic("%s: failed, kr=%d", __FUNCTION__, kr);
2609 }
2610
2611 /*
2612 * Wake threads throttled while the kernel reserve was being replenished.
2613 */
2614 if (pmap_pv_throttled_waiters) {
2615 pmap_pv_throttled_waiters = 0;
2616 thread_wakeup(&pmap_user_pv_throttle_event);
2617 }
2618
2619 /* Check if the kernel pool has been depleted since the
2620 * first pass, to reduce refill latency.
2621 */
2622 if (pv_kern_free_count < pv_kern_low_water_mark) {
2623 continue;
2624 }
2625 /* Block sans continuation to avoid yielding kernel stack */
2626 assert_wait(&mapping_replenish_event, THREAD_UNINT);
2627 mappingrecurse = 0;
2628 thread_block(THREAD_CONTINUE_NULL);
2629 pmap_mapping_thread_wakeups++;
2630 }
2631 }
2632
2633
2634 static void
2635 ptd_bootstrap(
2636 pt_desc_t *ptdp,
2637 unsigned int ptd_cnt)
2638 {
2639 simple_lock_init(&ptd_free_list_lock, 0);
2640 while (ptd_cnt != 0) {
2641 (*(void **)ptdp) = (void *)ptd_free_list;
2642 ptd_free_list = ptdp;
2643 ptdp++;
2644 ptd_cnt--;
2645 ptd_free_count++;
2646 }
2647 ptd_preboot = FALSE;
2648 }
2649
2650 static pt_desc_t*
2651 ptd_alloc_unlinked(bool reclaim)
2652 {
2653 pt_desc_t *ptdp;
2654 unsigned i;
2655
2656 if (!ptd_preboot) {
2657 pmap_simple_lock(&ptd_free_list_lock);
2658 }
2659
2660 if (ptd_free_count == 0) {
2661 unsigned int ptd_cnt;
2662 pt_desc_t *ptdp_next;
2663
2664 if (ptd_preboot) {
2665 ptdp = (pt_desc_t *)avail_start;
2666 avail_start += ARM_PGBYTES;
2667 ptdp_next = ptdp;
2668 ptd_cnt = ARM_PGBYTES / sizeof(pt_desc_t);
2669 } else {
2670 pmap_paddr_t pa;
2671 kern_return_t ret;
2672
2673 pmap_simple_unlock(&ptd_free_list_lock);
2674
2675 if (pmap_pages_alloc(&pa, PAGE_SIZE, PMAP_PAGES_ALLOCATE_NOWAIT) != KERN_SUCCESS) {
2676 if (reclaim) {
2677 ret = pmap_pages_alloc(&pa, PAGE_SIZE, PMAP_PAGES_RECLAIM_NOWAIT);
2678 assert(ret == KERN_SUCCESS);
2679 } else {
2680 return NULL;
2681 }
2682 }
2683 ptdp = (pt_desc_t *)phystokv(pa);
2684
2685 pmap_simple_lock(&ptd_free_list_lock);
2686 ptdp_next = ptdp;
2687 ptd_cnt = PAGE_SIZE / sizeof(pt_desc_t);
2688 }
2689
2690 while (ptd_cnt != 0) {
2691 (*(void **)ptdp_next) = (void *)ptd_free_list;
2692 ptd_free_list = ptdp_next;
2693 ptdp_next++;
2694 ptd_cnt--;
2695 ptd_free_count++;
2696 }
2697 }
2698
2699 if ((ptdp = ptd_free_list) != PTD_ENTRY_NULL) {
2700 ptd_free_list = (pt_desc_t *)(*(void **)ptdp);
2701 ptd_free_count--;
2702 } else {
2703 panic("%s: out of ptd entry",
2704 __FUNCTION__);
2705 }
2706
2707 if (!ptd_preboot) {
2708 pmap_simple_unlock(&ptd_free_list_lock);
2709 }
2710
2711 ptdp->pt_page.next = NULL;
2712 ptdp->pt_page.prev = NULL;
2713 ptdp->pmap = NULL;
2714
2715 for (i = 0; i < PT_INDEX_MAX; i++) {
2716 ptdp->ptd_info[i].va = (vm_offset_t)-1;
2717 ptdp->ptd_info[i].refcnt = 0;
2718 ptdp->ptd_info[i].wiredcnt = 0;
2719 }
2720
2721 return ptdp;
2722 }
2723
2724 static inline pt_desc_t*
2725 ptd_alloc(pmap_t pmap, bool reclaim)
2726 {
2727 pt_desc_t *ptdp = ptd_alloc_unlinked(reclaim);
2728
2729 if (ptdp == NULL) {
2730 return NULL;
2731 }
2732
2733 ptdp->pmap = pmap;
2734 if (pmap != kernel_pmap) {
2735 /* We should never try to reclaim kernel pagetable pages in
2736 * pmap_pages_reclaim(), so don't enter them into the list. */
2737 pmap_simple_lock(&pt_pages_lock);
2738 queue_enter(&pt_page_list, ptdp, pt_desc_t *, pt_page);
2739 pmap_simple_unlock(&pt_pages_lock);
2740 }
2741
2742 pmap_tt_ledger_credit(pmap, sizeof(*ptdp));
2743 return ptdp;
2744 }
2745
2746 static void
2747 ptd_deallocate(pt_desc_t *ptdp)
2748 {
2749 pmap_t pmap = ptdp->pmap;
2750
2751 if (ptd_preboot) {
2752 panic("%s: early boot, "
2753 "ptdp=%p",
2754 __FUNCTION__,
2755 ptdp);
2756 }
2757
2758 if (ptdp->pt_page.next != NULL) {
2759 pmap_simple_lock(&pt_pages_lock);
2760 queue_remove(&pt_page_list, ptdp, pt_desc_t *, pt_page);
2761 pmap_simple_unlock(&pt_pages_lock);
2762 }
2763 pmap_simple_lock(&ptd_free_list_lock);
2764 (*(void **)ptdp) = (void *)ptd_free_list;
2765 ptd_free_list = (pt_desc_t *)ptdp;
2766 ptd_free_count++;
2767 pmap_simple_unlock(&ptd_free_list_lock);
2768 if (pmap != NULL) {
2769 pmap_tt_ledger_debit(pmap, sizeof(*ptdp));
2770 }
2771 }
2772
2773 static void
2774 ptd_init(
2775 pt_desc_t *ptdp,
2776 pmap_t pmap,
2777 vm_map_address_t va,
2778 unsigned int level,
2779 pt_entry_t *pte_p)
2780 {
2781 if (ptdp->pmap != pmap) {
2782 panic("%s: pmap mismatch, "
2783 "ptdp=%p, pmap=%p, va=%p, level=%u, pte_p=%p",
2784 __FUNCTION__,
2785 ptdp, pmap, (void*)va, level, pte_p);
2786 }
2787
2788 #if (__ARM_VMSA__ == 7)
2789 assert(level == 2);
2790 ptdp->ptd_info[ARM_PT_DESC_INDEX(pte_p)].va = (vm_offset_t) va & ~(ARM_TT_L1_PT_OFFMASK);
2791 #else
2792 assert(level > pt_attr_root_level(pmap_get_pt_attr(pmap)));
2793 ptdp->ptd_info[ARM_PT_DESC_INDEX(pte_p)].va = (vm_offset_t) va & ~(pt_attr_ln_offmask(pmap_get_pt_attr(pmap), level - 1));
2794 #endif
2795 if (level < PMAP_TT_MAX_LEVEL) {
2796 ptdp->ptd_info[ARM_PT_DESC_INDEX(pte_p)].refcnt = PT_DESC_REFCOUNT;
2797 }
2798 }
2799
2800
2801 boolean_t
2802 pmap_valid_address(
2803 pmap_paddr_t addr)
2804 {
2805 return pa_valid(addr);
2806 }
2807
2808 #if (__ARM_VMSA__ == 7)
2809
2810 /*
2811 * Given an offset and a map, compute the address of the
2812 * corresponding translation table entry.
2813 */
2814 static inline tt_entry_t *
2815 pmap_tte(pmap_t pmap,
2816 vm_map_address_t addr)
2817 {
2818 __unused const pt_attr_t * const pt_attr = pmap_get_pt_attr(pmap);
2819
2820 if (!(tte_index(pmap, pt_attr, addr) < pmap->tte_index_max)) {
2821 return (tt_entry_t *)NULL;
2822 }
2823 return &pmap->tte[tte_index(pmap, pt_attr, addr)];
2824 }
2825
2826
2827 /*
2828 * Given an offset and a map, compute the address of the
2829 * pte. If the address is invalid with respect to the map
2830 * then PT_ENTRY_NULL is returned (and the map may need to grow).
2831 *
2832 * This is only used internally.
2833 */
2834 static inline pt_entry_t *
2835 pmap_pte(
2836 pmap_t pmap,
2837 vm_map_address_t addr)
2838 {
2839 pt_entry_t *ptp;
2840 tt_entry_t *ttp;
2841 tt_entry_t tte;
2842
2843 ttp = pmap_tte(pmap, addr);
2844 if (ttp == (tt_entry_t *)NULL) {
2845 return PT_ENTRY_NULL;
2846 }
2847 tte = *ttp;
2848 #if MACH_ASSERT
2849 if ((tte & ARM_TTE_TYPE_MASK) == ARM_TTE_TYPE_BLOCK) {
2850 panic("%s: Attempt to demote L1 block, tte=0x%lx, "
2851 "pmap=%p, addr=%p",
2852 __FUNCTION__, (unsigned long)tte,
2853 pmap, (void*)addr);
2854 }
2855 #endif
2856 if ((tte & ARM_TTE_TYPE_MASK) != ARM_TTE_TYPE_TABLE) {
2857 return PT_ENTRY_NULL;
2858 }
2859 ptp = (pt_entry_t *) ttetokv(tte) + ptenum(addr);
2860 return ptp;
2861 }
2862
2863 __unused static inline tt_entry_t *
2864 pmap_ttne(pmap_t pmap,
2865 unsigned int target_level,
2866 vm_map_address_t addr)
2867 {
2868 tt_entry_t * ret_ttep = NULL;
2869
2870 switch (target_level) {
2871 case 1:
2872 ret_ttep = pmap_tte(pmap, addr);
2873 break;
2874 case 2:
2875 ret_ttep = (tt_entry_t *)pmap_pte(pmap, addr);
2876 break;
2877 default:
2878 panic("%s: bad level, "
2879 "pmap=%p, target_level=%u, addr=%p",
2880 __FUNCTION__,
2881 pmap, target_level, (void *)addr);
2882 }
2883
2884 return ret_ttep;
2885 }
2886
2887 #else
2888
2889 static inline tt_entry_t *
2890 pmap_ttne(pmap_t pmap,
2891 unsigned int target_level,
2892 vm_map_address_t addr)
2893 {
2894 tt_entry_t * ttp = NULL;
2895 tt_entry_t * ttep = NULL;
2896 tt_entry_t tte = ARM_TTE_EMPTY;
2897 unsigned int cur_level;
2898
2899 const pt_attr_t * const pt_attr = pmap_get_pt_attr(pmap);
2900
2901 ttp = pmap->tte;
2902
2903 assert(target_level <= pt_attr->pta_max_level);
2904
2905 for (cur_level = pt_attr->pta_root_level; cur_level <= target_level; cur_level++) {
2906 ttep = &ttp[ttn_index(pmap, pt_attr, addr, cur_level)];
2907
2908 if (cur_level == target_level) {
2909 break;
2910 }
2911
2912 tte = *ttep;
2913
2914 #if MACH_ASSERT
2915 if ((tte & (ARM_TTE_TYPE_MASK | ARM_TTE_VALID)) == (ARM_TTE_TYPE_BLOCK | ARM_TTE_VALID)) {
2916 panic("%s: Attempt to demote L%u block, tte=0x%llx, "
2917 "pmap=%p, target_level=%u, addr=%p",
2918 __FUNCTION__, cur_level, tte,
2919 pmap, target_level, (void*)addr);
2920 }
2921 #endif
2922 if ((tte & (ARM_TTE_TYPE_MASK | ARM_TTE_VALID)) != (ARM_TTE_TYPE_TABLE | ARM_TTE_VALID)) {
2923 return TT_ENTRY_NULL;
2924 }
2925
2926 ttp = (tt_entry_t*)phystokv(tte & ARM_TTE_TABLE_MASK);
2927 }
2928
2929 return ttep;
2930 }
2931
2932 /*
2933 * Given an offset and a map, compute the address of level 1 translation table entry.
2934 * If the tranlation is invalid then PT_ENTRY_NULL is returned.
2935 */
2936 static inline tt_entry_t *
2937 pmap_tt1e(pmap_t pmap,
2938 vm_map_address_t addr)
2939 {
2940 return pmap_ttne(pmap, PMAP_TT_L1_LEVEL, addr);
2941 }
2942
2943 /*
2944 * Given an offset and a map, compute the address of level 2 translation table entry.
2945 * If the tranlation is invalid then PT_ENTRY_NULL is returned.
2946 */
2947 static inline tt_entry_t *
2948 pmap_tt2e(pmap_t pmap,
2949 vm_map_address_t addr)
2950 {
2951 return pmap_ttne(pmap, PMAP_TT_L2_LEVEL, addr);
2952 }
2953
2954
2955 /*
2956 * Given an offset and a map, compute the address of level 3 translation table entry.
2957 * If the tranlation is invalid then PT_ENTRY_NULL is returned.
2958 */
2959 static inline pt_entry_t *
2960 pmap_tt3e(
2961 pmap_t pmap,
2962 vm_map_address_t addr)
2963 {
2964 return (pt_entry_t*)pmap_ttne(pmap, PMAP_TT_L3_LEVEL, addr);
2965 }
2966
2967 static inline tt_entry_t *
2968 pmap_tte(
2969 pmap_t pmap,
2970 vm_map_address_t addr)
2971 {
2972 return pmap_tt2e(pmap, addr);
2973 }
2974
2975 static inline pt_entry_t *
2976 pmap_pte(
2977 pmap_t pmap,
2978 vm_map_address_t addr)
2979 {
2980 return pmap_tt3e(pmap, addr);
2981 }
2982
2983 #endif
2984
2985
2986
2987
2988
2989
2990 /*
2991 * Map memory at initialization. The physical addresses being
2992 * mapped are not managed and are never unmapped.
2993 *
2994 * For now, VM is already on, we only need to map the
2995 * specified memory.
2996 */
2997 vm_map_address_t
2998 pmap_map(
2999 vm_map_address_t virt,
3000 vm_offset_t start,
3001 vm_offset_t end,
3002 vm_prot_t prot,
3003 unsigned int flags)
3004 {
3005 kern_return_t kr;
3006 vm_size_t ps;
3007
3008 ps = PAGE_SIZE;
3009 while (start < end) {
3010 kr = pmap_enter(kernel_pmap, virt, (ppnum_t)atop(start),
3011 prot, VM_PROT_NONE, flags, FALSE);
3012
3013 if (kr != KERN_SUCCESS) {
3014 panic("%s: failed pmap_enter, "
3015 "virt=%p, start_addr=%p, end_addr=%p, prot=%#x, flags=%#x",
3016 __FUNCTION__,
3017 (void *) virt, (void *) start, (void *) end, prot, flags);
3018 }
3019
3020 virt += ps;
3021 start += ps;
3022 }
3023 return virt;
3024 }
3025
3026 vm_map_address_t
3027 pmap_map_bd_with_options(
3028 vm_map_address_t virt,
3029 vm_offset_t start,
3030 vm_offset_t end,
3031 vm_prot_t prot,
3032 int32_t options)
3033 {
3034 pt_entry_t tmplate;
3035 pt_entry_t *ptep;
3036 vm_map_address_t vaddr;
3037 vm_offset_t paddr;
3038 pt_entry_t mem_attr;
3039
3040 switch (options & PMAP_MAP_BD_MASK) {
3041 case PMAP_MAP_BD_WCOMB:
3042 mem_attr = ARM_PTE_ATTRINDX(CACHE_ATTRINDX_WRITECOMB);
3043 #if (__ARM_VMSA__ > 7)
3044 mem_attr |= ARM_PTE_SH(SH_OUTER_MEMORY);
3045 #else
3046 mem_attr |= ARM_PTE_SH;
3047 #endif
3048 break;
3049 case PMAP_MAP_BD_POSTED:
3050 mem_attr = ARM_PTE_ATTRINDX(CACHE_ATTRINDX_POSTED);
3051 break;
3052 case PMAP_MAP_BD_POSTED_REORDERED:
3053 mem_attr = ARM_PTE_ATTRINDX(CACHE_ATTRINDX_POSTED_REORDERED);
3054 break;
3055 case PMAP_MAP_BD_POSTED_COMBINED_REORDERED:
3056 mem_attr = ARM_PTE_ATTRINDX(CACHE_ATTRINDX_POSTED_COMBINED_REORDERED);
3057 break;
3058 default:
3059 mem_attr = ARM_PTE_ATTRINDX(CACHE_ATTRINDX_DISABLE);
3060 break;
3061 }
3062
3063 tmplate = pa_to_pte(start) | ARM_PTE_AP((prot & VM_PROT_WRITE) ? AP_RWNA : AP_RONA) |
3064 mem_attr | ARM_PTE_TYPE | ARM_PTE_NX | ARM_PTE_PNX | ARM_PTE_AF;
3065 #if __ARM_KERNEL_PROTECT__
3066 tmplate |= ARM_PTE_NG;
3067 #endif /* __ARM_KERNEL_PROTECT__ */
3068
3069 vaddr = virt;
3070 paddr = start;
3071 while (paddr < end) {
3072 ptep = pmap_pte(kernel_pmap, vaddr);
3073 if (ptep == PT_ENTRY_NULL) {
3074 panic("%s: no PTE for vaddr=%p, "
3075 "virt=%p, start=%p, end=%p, prot=0x%x, options=0x%x",
3076 __FUNCTION__, (void*)vaddr,
3077 (void*)virt, (void*)start, (void*)end, prot, options);
3078 }
3079
3080 assert(!ARM_PTE_IS_COMPRESSED(*ptep, ptep));
3081 WRITE_PTE_STRONG(ptep, tmplate);
3082
3083 pte_increment_pa(tmplate);
3084 vaddr += PAGE_SIZE;
3085 paddr += PAGE_SIZE;
3086 }
3087
3088 if (end >= start) {
3089 flush_mmu_tlb_region(virt, (unsigned)(end - start));
3090 }
3091
3092 return vaddr;
3093 }
3094
3095 /*
3096 * Back-door routine for mapping kernel VM at initialization.
3097 * Useful for mapping memory outside the range
3098 * [vm_first_phys, vm_last_phys] (i.e., devices).
3099 * Otherwise like pmap_map.
3100 */
3101 vm_map_address_t
3102 pmap_map_bd(
3103 vm_map_address_t virt,
3104 vm_offset_t start,
3105 vm_offset_t end,
3106 vm_prot_t prot)
3107 {
3108 pt_entry_t tmplate;
3109 pt_entry_t *ptep;
3110 vm_map_address_t vaddr;
3111 vm_offset_t paddr;
3112
3113 /* not cacheable and not buffered */
3114 tmplate = pa_to_pte(start)
3115 | ARM_PTE_TYPE | ARM_PTE_AF | ARM_PTE_NX | ARM_PTE_PNX
3116 | ARM_PTE_AP((prot & VM_PROT_WRITE) ? AP_RWNA : AP_RONA)
3117 | ARM_PTE_ATTRINDX(CACHE_ATTRINDX_DISABLE);
3118 #if __ARM_KERNEL_PROTECT__
3119 tmplate |= ARM_PTE_NG;
3120 #endif /* __ARM_KERNEL_PROTECT__ */
3121
3122 vaddr = virt;
3123 paddr = start;
3124 while (paddr < end) {
3125 ptep = pmap_pte(kernel_pmap, vaddr);
3126 if (ptep == PT_ENTRY_NULL) {
3127 panic("pmap_map_bd");
3128 }
3129 assert(!ARM_PTE_IS_COMPRESSED(*ptep, ptep));
3130 WRITE_PTE_STRONG(ptep, tmplate);
3131
3132 pte_increment_pa(tmplate);
3133 vaddr += PAGE_SIZE;
3134 paddr += PAGE_SIZE;
3135 }
3136
3137 if (end >= start) {
3138 flush_mmu_tlb_region(virt, (unsigned)(end - start));
3139 }
3140
3141 return vaddr;
3142 }
3143
3144 /*
3145 * Back-door routine for mapping kernel VM at initialization.
3146 * Useful for mapping memory specific physical addresses in early
3147 * boot (i.e., before kernel_map is initialized).
3148 *
3149 * Maps are in the VM_HIGH_KERNEL_WINDOW area.
3150 */
3151
3152 vm_map_address_t
3153 pmap_map_high_window_bd(
3154 vm_offset_t pa_start,
3155 vm_size_t len,
3156 vm_prot_t prot)
3157 {
3158 pt_entry_t *ptep, pte;
3159 #if (__ARM_VMSA__ == 7)
3160 vm_map_address_t va_start = VM_HIGH_KERNEL_WINDOW;
3161 vm_map_address_t va_max = VM_MAX_KERNEL_ADDRESS;
3162 #else
3163 vm_map_address_t va_start = VREGION1_START;
3164 vm_map_address_t va_max = VREGION1_START + VREGION1_SIZE;
3165 #endif
3166 vm_map_address_t va_end;
3167 vm_map_address_t va;
3168 vm_size_t offset;
3169
3170 offset = pa_start & PAGE_MASK;
3171 pa_start -= offset;
3172 len += offset;
3173
3174 if (len > (va_max - va_start)) {
3175 panic("%s: area too large, "
3176 "pa_start=%p, len=%p, prot=0x%x",
3177 __FUNCTION__,
3178 (void*)pa_start, (void*)len, prot);
3179 }
3180
3181 scan:
3182 for (; va_start < va_max; va_start += PAGE_SIZE) {
3183 ptep = pmap_pte(kernel_pmap, va_start);
3184 assert(!ARM_PTE_IS_COMPRESSED(*ptep, ptep));
3185 if (*ptep == ARM_PTE_TYPE_FAULT) {
3186 break;
3187 }
3188 }
3189 if (va_start > va_max) {
3190 panic("%s: insufficient pages, "
3191 "pa_start=%p, len=%p, prot=0x%x",
3192 __FUNCTION__,
3193 (void*)pa_start, (void*)len, prot);
3194 }
3195
3196 for (va_end = va_start + PAGE_SIZE; va_end < va_start + len; va_end += PAGE_SIZE) {
3197 ptep = pmap_pte(kernel_pmap, va_end);
3198 assert(!ARM_PTE_IS_COMPRESSED(*ptep, ptep));
3199 if (*ptep != ARM_PTE_TYPE_FAULT) {
3200 va_start = va_end + PAGE_SIZE;
3201 goto scan;
3202 }
3203 }
3204
3205 for (va = va_start; va < va_end; va += PAGE_SIZE, pa_start += PAGE_SIZE) {
3206 ptep = pmap_pte(kernel_pmap, va);
3207 pte = pa_to_pte(pa_start)
3208 | ARM_PTE_TYPE | ARM_PTE_AF | ARM_PTE_NX | ARM_PTE_PNX
3209 | ARM_PTE_AP((prot & VM_PROT_WRITE) ? AP_RWNA : AP_RONA)
3210 | ARM_PTE_ATTRINDX(CACHE_ATTRINDX_DEFAULT);
3211 #if (__ARM_VMSA__ > 7)
3212 pte |= ARM_PTE_SH(SH_OUTER_MEMORY);
3213 #else
3214 pte |= ARM_PTE_SH;
3215 #endif
3216 #if __ARM_KERNEL_PROTECT__
3217 pte |= ARM_PTE_NG;
3218 #endif /* __ARM_KERNEL_PROTECT__ */
3219 WRITE_PTE_STRONG(ptep, pte);
3220 }
3221 PMAP_UPDATE_TLBS(kernel_pmap, va_start, va_start + len, false);
3222 #if KASAN
3223 kasan_notify_address(va_start, len);
3224 #endif
3225 return va_start;
3226 }
3227
3228 #define PMAP_ALIGN(addr, align) ((addr) + ((align) - 1) & ~((align) - 1))
3229
3230 static vm_size_t
3231 pmap_compute_io_rgns(void)
3232 {
3233 DTEntry entry;
3234 pmap_io_range_t *ranges;
3235 uint64_t rgn_end;
3236 void *prop = NULL;
3237 int err;
3238 unsigned int prop_size;
3239
3240 err = DTLookupEntry(NULL, "/defaults", &entry);
3241 assert(err == kSuccess);
3242
3243 if (kSuccess != DTGetProperty(entry, "pmap-io-ranges", &prop, &prop_size)) {
3244 return 0;
3245 }
3246
3247 ranges = prop;
3248 for (unsigned int i = 0; i < (prop_size / sizeof(*ranges)); ++i) {
3249 if (ranges[i].addr & PAGE_MASK) {
3250 panic("pmap I/O region %u addr 0x%llx is not page-aligned", i, ranges[i].addr);
3251 }
3252 if (ranges[i].len & PAGE_MASK) {
3253 panic("pmap I/O region %u length 0x%llx is not page-aligned", i, ranges[i].len);
3254 }
3255 if (os_add_overflow(ranges[i].addr, ranges[i].len, &rgn_end)) {
3256 panic("pmap I/O region %u addr 0x%llx length 0x%llx wraps around", i, ranges[i].addr, ranges[i].len);
3257 }
3258 if (((ranges[i].addr <= gPhysBase) && (rgn_end > gPhysBase)) ||
3259 ((ranges[i].addr < avail_end) && (rgn_end >= avail_end)) ||
3260 ((ranges[i].addr > gPhysBase) && (rgn_end < avail_end))) {
3261 panic("pmap I/O region %u addr 0x%llx length 0x%llx overlaps physical memory", i, ranges[i].addr, ranges[i].len);
3262 }
3263
3264 ++num_io_rgns;
3265 }
3266
3267 return num_io_rgns * sizeof(*ranges);
3268 }
3269
3270 /*
3271 * return < 0 for a < b
3272 * 0 for a == b
3273 * > 0 for a > b
3274 */
3275 typedef int (*cmpfunc_t)(const void *a, const void *b);
3276
3277 extern void
3278 qsort(void *a, size_t n, size_t es, cmpfunc_t cmp);
3279
3280 static int
3281 cmp_io_rgns(const void *a, const void *b)
3282 {
3283 const pmap_io_range_t *range_a = a;
3284 const pmap_io_range_t *range_b = b;
3285 if ((range_b->addr + range_b->len) <= range_a->addr) {
3286 return 1;
3287 } else if ((range_a->addr + range_a->len) <= range_b->addr) {
3288 return -1;
3289 } else {
3290 return 0;
3291 }
3292 }
3293
3294 static void
3295 pmap_load_io_rgns(void)
3296 {
3297 DTEntry entry;
3298 pmap_io_range_t *ranges;
3299 void *prop = NULL;
3300 int err;
3301 unsigned int prop_size;
3302
3303 if (num_io_rgns == 0) {
3304 return;
3305 }
3306
3307 err = DTLookupEntry(NULL, "/defaults", &entry);
3308 assert(err == kSuccess);
3309
3310 err = DTGetProperty(entry, "pmap-io-ranges", &prop, &prop_size);
3311 assert(err == kSuccess);
3312
3313 ranges = prop;
3314 for (unsigned int i = 0; i < (prop_size / sizeof(*ranges)); ++i) {
3315 io_attr_table[i] = ranges[i];
3316 }
3317
3318 qsort(io_attr_table, num_io_rgns, sizeof(*ranges), cmp_io_rgns);
3319 }
3320
3321 #if __arm64__
3322 /*
3323 * pmap_get_arm64_prot
3324 *
3325 * return effective armv8 VMSA block protections including
3326 * table AP/PXN/XN overrides of a pmap entry
3327 *
3328 */
3329
3330 uint64_t
3331 pmap_get_arm64_prot(
3332 pmap_t pmap,
3333 vm_offset_t addr)
3334 {
3335 tt_entry_t tte = 0;
3336 unsigned int level = 0;
3337 uint64_t tte_type = 0;
3338 uint64_t effective_prot_bits = 0;
3339 uint64_t aggregate_tte = 0;
3340 uint64_t table_ap_bits = 0, table_xn = 0, table_pxn = 0;
3341 const pt_attr_t * const pt_attr = pmap_get_pt_attr(pmap);
3342
3343 for (level = pt_attr->pta_root_level; level <= pt_attr->pta_max_level; level++) {
3344 tte = *pmap_ttne(pmap, level, addr);
3345
3346 if (!(tte & ARM_TTE_VALID)) {
3347 return 0;
3348 }
3349
3350 tte_type = tte & ARM_TTE_TYPE_MASK;
3351
3352 if ((tte_type == ARM_TTE_TYPE_BLOCK) ||
3353 (level == pt_attr->pta_max_level)) {
3354 /* Block or page mapping; both have the same protection bit layout. */
3355 break;
3356 } else if (tte_type == ARM_TTE_TYPE_TABLE) {
3357 /* All of the table bits we care about are overrides, so just OR them together. */
3358 aggregate_tte |= tte;
3359 }
3360 }
3361
3362 table_ap_bits = ((aggregate_tte >> ARM_TTE_TABLE_APSHIFT) & AP_MASK);
3363 table_xn = (aggregate_tte & ARM_TTE_TABLE_XN);
3364 table_pxn = (aggregate_tte & ARM_TTE_TABLE_PXN);
3365
3366 /* Start with the PTE bits. */
3367 effective_prot_bits = tte & (ARM_PTE_APMASK | ARM_PTE_NX | ARM_PTE_PNX);
3368
3369 /* Table AP bits mask out block/page AP bits */
3370 effective_prot_bits &= ~(ARM_PTE_AP(table_ap_bits));
3371
3372 /* XN/PXN bits can be OR'd in. */
3373 effective_prot_bits |= (table_xn ? ARM_PTE_NX : 0);
3374 effective_prot_bits |= (table_pxn ? ARM_PTE_PNX : 0);
3375
3376 return effective_prot_bits;
3377 }
3378 #endif /* __arm64__ */
3379
3380
3381 /*
3382 * Bootstrap the system enough to run with virtual memory.
3383 *
3384 * The early VM initialization code has already allocated
3385 * the first CPU's translation table and made entries for
3386 * all the one-to-one mappings to be found there.
3387 *
3388 * We must set up the kernel pmap structures, the
3389 * physical-to-virtual translation lookup tables for the
3390 * physical memory to be managed (between avail_start and
3391 * avail_end).
3392 *
3393 * Map the kernel's code and data, and allocate the system page table.
3394 * Page_size must already be set.
3395 *
3396 * Parameters:
3397 * first_avail first available physical page -
3398 * after kernel page tables
3399 * avail_start PA of first managed physical page
3400 * avail_end PA of last managed physical page
3401 */
3402
3403 void
3404 pmap_bootstrap(
3405 vm_offset_t vstart)
3406 {
3407 pmap_paddr_t pmap_struct_start;
3408 vm_size_t pv_head_size;
3409 vm_size_t ptd_root_table_size;
3410 vm_size_t pp_attr_table_size;
3411 vm_size_t io_attr_table_size;
3412 unsigned int npages;
3413 vm_map_offset_t maxoffset;
3414
3415 lck_grp_init(&pmap_lck_grp, "pmap", LCK_GRP_ATTR_NULL);
3416
3417
3418 #if DEVELOPMENT || DEBUG
3419 if (PE_parse_boot_argn("pmap_trace", &pmap_trace_mask, sizeof(pmap_trace_mask))) {
3420 kprintf("Kernel traces for pmap operations enabled\n");
3421 }
3422 #endif
3423
3424 /*
3425 * Initialize the kernel pmap.
3426 */
3427 pmap_stamp = 1;
3428 #if ARM_PARAMETERIZED_PMAP
3429 kernel_pmap->pmap_pt_attr = native_pt_attr;
3430 #endif /* ARM_PARAMETERIZED_PMAP */
3431 #if HAS_APPLE_PAC
3432 kernel_pmap->disable_jop = 0;
3433 #endif /* HAS_APPLE_PAC */
3434 kernel_pmap->tte = cpu_tte;
3435 kernel_pmap->ttep = cpu_ttep;
3436 #if (__ARM_VMSA__ > 7)
3437 kernel_pmap->min = ARM64_TTBR1_MIN_ADDR;
3438 #else
3439 kernel_pmap->min = VM_MIN_KERNEL_AND_KEXT_ADDRESS;
3440 #endif
3441 kernel_pmap->max = VM_MAX_KERNEL_ADDRESS;
3442 os_atomic_init(&kernel_pmap->ref_count, 1);
3443 kernel_pmap->gc_status = 0;
3444 kernel_pmap->nx_enabled = TRUE;
3445 #ifdef __arm64__
3446 kernel_pmap->is_64bit = TRUE;
3447 #else
3448 kernel_pmap->is_64bit = FALSE;
3449 #endif
3450 kernel_pmap->stamp = os_atomic_inc(&pmap_stamp, relaxed);
3451
3452 kernel_pmap->nested_region_grand_addr = 0x0ULL;
3453 kernel_pmap->nested_region_subord_addr = 0x0ULL;
3454 kernel_pmap->nested_region_size = 0x0ULL;
3455 kernel_pmap->nested_region_asid_bitmap = NULL;
3456 kernel_pmap->nested_region_asid_bitmap_size = 0x0UL;
3457
3458 #if (__ARM_VMSA__ == 7)
3459 kernel_pmap->tte_index_max = 4 * NTTES;
3460 #endif
3461 kernel_pmap->prev_tte = (tt_entry_t *) NULL;
3462 kernel_pmap->hw_asid = 0;
3463 kernel_pmap->sw_asid = 0;
3464
3465 PMAP_LOCK_INIT(kernel_pmap);
3466 #if (__ARM_VMSA__ == 7)
3467 simple_lock_init(&kernel_pmap->tt1_lock, 0);
3468 kernel_pmap->cpu_ref = 0;
3469 #endif
3470 memset((void *) &kernel_pmap->stats, 0, sizeof(kernel_pmap->stats));
3471
3472 /* allocate space for and initialize the bookkeeping structures */
3473 io_attr_table_size = pmap_compute_io_rgns();
3474 npages = (unsigned int)atop(mem_size);
3475 pp_attr_table_size = npages * sizeof(pp_attr_t);
3476 pv_head_size = round_page(sizeof(pv_entry_t *) * npages);
3477 // allocate enough initial PTDs to map twice the available physical memory
3478 ptd_root_table_size = sizeof(pt_desc_t) * (mem_size / ((PAGE_SIZE / sizeof(pt_entry_t)) * ARM_PGBYTES)) * 2;
3479
3480 pmap_struct_start = avail_start;
3481
3482 pp_attr_table = (pp_attr_t *) phystokv(avail_start);
3483 avail_start = PMAP_ALIGN(avail_start + pp_attr_table_size, __alignof(pp_attr_t));
3484 io_attr_table = (pmap_io_range_t *) phystokv(avail_start);
3485 avail_start = PMAP_ALIGN(avail_start + io_attr_table_size, __alignof(pv_entry_t*));
3486 pv_head_table = (pv_entry_t **) phystokv(avail_start);
3487 avail_start = PMAP_ALIGN(avail_start + pv_head_size, __alignof(pt_desc_t));
3488 ptd_root_table = (pt_desc_t *)phystokv(avail_start);
3489 avail_start = round_page(avail_start + ptd_root_table_size);
3490
3491 memset((char *)phystokv(pmap_struct_start), 0, avail_start - pmap_struct_start);
3492
3493 pmap_load_io_rgns();
3494 ptd_bootstrap(ptd_root_table, (unsigned int)(ptd_root_table_size / sizeof(pt_desc_t)));
3495
3496 pmap_cpu_data_array_init();
3497
3498 vm_first_phys = gPhysBase;
3499 vm_last_phys = trunc_page(avail_end);
3500
3501 simple_lock_init(&pmaps_lock, 0);
3502 simple_lock_init(&asid_lock, 0);
3503 simple_lock_init(&tt1_lock, 0);
3504 queue_init(&map_pmap_list);
3505 queue_enter(&map_pmap_list, kernel_pmap, pmap_t, pmaps);
3506 free_page_size_tt_list = TT_FREE_ENTRY_NULL;
3507 free_page_size_tt_count = 0;
3508 free_page_size_tt_max = 0;
3509 free_two_page_size_tt_list = TT_FREE_ENTRY_NULL;
3510 free_two_page_size_tt_count = 0;
3511 free_two_page_size_tt_max = 0;
3512 free_tt_list = TT_FREE_ENTRY_NULL;
3513 free_tt_count = 0;
3514 free_tt_max = 0;
3515
3516 simple_lock_init(&pt_pages_lock, 0);
3517 queue_init(&pt_page_list);
3518
3519 simple_lock_init(&pmap_pages_lock, 0);
3520 pmap_pages_request_count = 0;
3521 pmap_pages_request_acum = 0;
3522 pmap_pages_reclaim_list = PAGE_FREE_ENTRY_NULL;
3523
3524 virtual_space_start = vstart;
3525 virtual_space_end = VM_MAX_KERNEL_ADDRESS;
3526
3527 bitmap_full(&asid_bitmap[0], MAX_ASID);
3528
3529
3530
3531 if (PE_parse_boot_argn("arm_maxoffset", &maxoffset, sizeof(maxoffset))) {
3532 maxoffset = trunc_page(maxoffset);
3533 if ((maxoffset >= pmap_max_offset(FALSE, ARM_PMAP_MAX_OFFSET_MIN))
3534 && (maxoffset <= pmap_max_offset(FALSE, ARM_PMAP_MAX_OFFSET_MAX))) {
3535 arm_pmap_max_offset_default = maxoffset;
3536 }
3537 }
3538 #if defined(__arm64__)
3539 if (PE_parse_boot_argn("arm64_maxoffset", &maxoffset, sizeof(maxoffset))) {
3540 maxoffset = trunc_page(maxoffset);
3541 if ((maxoffset >= pmap_max_offset(TRUE, ARM_PMAP_MAX_OFFSET_MIN))
3542 && (maxoffset <= pmap_max_offset(TRUE, ARM_PMAP_MAX_OFFSET_MAX))) {
3543 arm64_pmap_max_offset_default = maxoffset;
3544 }
3545 }
3546 #endif
3547
3548 #if DEVELOPMENT || DEBUG
3549 PE_parse_boot_argn("panic_on_unsigned_execute", &panic_on_unsigned_execute, sizeof(panic_on_unsigned_execute));
3550 #endif /* DEVELOPMENT || DEBUG */
3551
3552 pmap_nesting_size_min = ARM_NESTING_SIZE_MIN;
3553 pmap_nesting_size_max = ARM_NESTING_SIZE_MAX;
3554
3555 simple_lock_init(&phys_backup_lock, 0);
3556
3557
3558 #if MACH_ASSERT
3559 PE_parse_boot_argn("pmap_stats_assert",
3560 &pmap_stats_assert,
3561 sizeof(pmap_stats_assert));
3562 PE_parse_boot_argn("vm_footprint_suspend_allowed",
3563 &vm_footprint_suspend_allowed,
3564 sizeof(vm_footprint_suspend_allowed));
3565 #endif /* MACH_ASSERT */
3566
3567 #if KASAN
3568 /* Shadow the CPU copy windows, as they fall outside of the physical aperture */
3569 kasan_map_shadow(CPUWINDOWS_BASE, CPUWINDOWS_TOP - CPUWINDOWS_BASE, true);
3570 #endif /* KASAN */
3571 }
3572
3573
3574 void
3575 pmap_virtual_space(
3576 vm_offset_t *startp,
3577 vm_offset_t *endp
3578 )
3579 {
3580 *startp = virtual_space_start;
3581 *endp = virtual_space_end;
3582 }
3583
3584
3585 boolean_t
3586 pmap_virtual_region(
3587 unsigned int region_select,
3588 vm_map_offset_t *startp,
3589 vm_map_size_t *size
3590 )
3591 {
3592 boolean_t ret = FALSE;
3593 #if __ARM64_PMAP_SUBPAGE_L1__ && __ARM_16K_PG__
3594 if (region_select == 0) {
3595 /*
3596 * In this config, the bootstrap mappings should occupy their own L2
3597 * TTs, as they should be immutable after boot. Having the associated
3598 * TTEs and PTEs in their own pages allows us to lock down those pages,
3599 * while allowing the rest of the kernel address range to be remapped.
3600 */
3601 #if (__ARM_VMSA__ > 7)
3602 *startp = LOW_GLOBAL_BASE_ADDRESS & ~ARM_TT_L2_OFFMASK;
3603 #else
3604 #error Unsupported configuration
3605 #endif
3606 *size = ((VM_MAX_KERNEL_ADDRESS - *startp) & ~PAGE_MASK);
3607 ret = TRUE;
3608 }
3609 #else
3610 #if (__ARM_VMSA__ > 7)
3611 unsigned long low_global_vr_mask = 0;
3612 vm_map_size_t low_global_vr_size = 0;
3613 #endif
3614
3615 if (region_select == 0) {
3616 #if (__ARM_VMSA__ == 7)
3617 *startp = gVirtBase & 0xFFC00000;
3618 *size = ((virtual_space_start - (gVirtBase & 0xFFC00000)) + ~0xFFC00000) & 0xFFC00000;
3619 #else
3620 /* Round to avoid overlapping with the V=P area; round to at least the L2 block size. */
3621 if (!TEST_PAGE_SIZE_4K) {
3622 *startp = gVirtBase & 0xFFFFFFFFFE000000;
3623 *size = ((virtual_space_start - (gVirtBase & 0xFFFFFFFFFE000000)) + ~0xFFFFFFFFFE000000) & 0xFFFFFFFFFE000000;
3624 } else {
3625 *startp = gVirtBase & 0xFFFFFFFFFF800000;
3626 *size = ((virtual_space_start - (gVirtBase & 0xFFFFFFFFFF800000)) + ~0xFFFFFFFFFF800000) & 0xFFFFFFFFFF800000;
3627 }
3628 #endif
3629 ret = TRUE;
3630 }
3631 if (region_select == 1) {
3632 *startp = VREGION1_START;
3633 *size = VREGION1_SIZE;
3634 ret = TRUE;
3635 }
3636 #if (__ARM_VMSA__ > 7)
3637 /* We need to reserve a range that is at least the size of an L2 block mapping for the low globals */
3638 if (!TEST_PAGE_SIZE_4K) {
3639 low_global_vr_mask = 0xFFFFFFFFFE000000;
3640 low_global_vr_size = 0x2000000;
3641 } else {
3642 low_global_vr_mask = 0xFFFFFFFFFF800000;
3643 low_global_vr_size = 0x800000;
3644 }
3645
3646 if (((gVirtBase & low_global_vr_mask) != LOW_GLOBAL_BASE_ADDRESS) && (region_select == 2)) {
3647 *startp = LOW_GLOBAL_BASE_ADDRESS;
3648 *size = low_global_vr_size;
3649 ret = TRUE;
3650 }
3651
3652 if (region_select == 3) {
3653 /* In this config, we allow the bootstrap mappings to occupy the same
3654 * page table pages as the heap.
3655 */
3656 *startp = VM_MIN_KERNEL_ADDRESS;
3657 *size = LOW_GLOBAL_BASE_ADDRESS - *startp;
3658 ret = TRUE;
3659 }
3660 #endif
3661 #endif
3662 return ret;
3663 }
3664
3665 unsigned int
3666 pmap_free_pages(
3667 void)
3668 {
3669 return (unsigned int)atop(avail_end - first_avail);
3670 }
3671
3672
3673 boolean_t
3674 pmap_next_page_hi(
3675 ppnum_t * pnum,
3676 __unused boolean_t might_free)
3677 {
3678 return pmap_next_page(pnum);
3679 }
3680
3681
3682 boolean_t
3683 pmap_next_page(
3684 ppnum_t *pnum)
3685 {
3686 if (first_avail != avail_end) {
3687 *pnum = (ppnum_t)atop(first_avail);
3688 first_avail += PAGE_SIZE;
3689 return TRUE;
3690 }
3691 return FALSE;
3692 }
3693
3694
3695 /*
3696 * Initialize the pmap module.
3697 * Called by vm_init, to initialize any structures that the pmap
3698 * system needs to map virtual memory.
3699 */
3700 void
3701 pmap_init(
3702 void)
3703 {
3704 /*
3705 * Protect page zero in the kernel map.
3706 * (can be overruled by permanent transltion
3707 * table entries at page zero - see arm_vm_init).
3708 */
3709 vm_protect(kernel_map, 0, PAGE_SIZE, TRUE, VM_PROT_NONE);
3710
3711 pmap_initialized = TRUE;
3712
3713 pmap_zone_init();
3714
3715
3716 /*
3717 * Initialize the pmap object (for tracking the vm_page_t
3718 * structures for pages we allocate to be page tables in
3719 * pmap_expand().
3720 */
3721 _vm_object_allocate(mem_size, pmap_object);
3722 pmap_object->copy_strategy = MEMORY_OBJECT_COPY_NONE;
3723
3724 pv_init();
3725
3726 /*
3727 * The values of [hard_]maxproc may have been scaled, make sure
3728 * they are still less than the value of MAX_ASID.
3729 */
3730 if (maxproc > MAX_ASID) {
3731 maxproc = MAX_ASID;
3732 }
3733 if (hard_maxproc > MAX_ASID) {
3734 hard_maxproc = MAX_ASID;
3735 }
3736
3737 #if CONFIG_PGTRACE
3738 pmap_pgtrace_init();
3739 #endif
3740 }
3741
3742 boolean_t
3743 pmap_verify_free(
3744 ppnum_t ppnum)
3745 {
3746 pv_entry_t **pv_h;
3747 int pai;
3748 pmap_paddr_t phys = ptoa(ppnum);
3749
3750 assert(phys != vm_page_fictitious_addr);
3751
3752 if (!pa_valid(phys)) {
3753 return FALSE;
3754 }
3755
3756 pai = (int)pa_index(phys);
3757 pv_h = pai_to_pvh(pai);
3758
3759 return pvh_test_type(pv_h, PVH_TYPE_NULL);
3760 }
3761
3762 #if MACH_ASSERT
3763 void
3764 pmap_assert_free(ppnum_t ppnum)
3765 {
3766 assertf(pmap_verify_free(ppnum), "page = 0x%x", ppnum);
3767 (void)ppnum;
3768 }
3769 #endif
3770
3771
3772 /*
3773 * Initialize zones used by pmap.
3774 */
3775 static void
3776 pmap_zone_init(
3777 void)
3778 {
3779 /*
3780 * Create the zone of physical maps
3781 * and the physical-to-virtual entries.
3782 */
3783 pmap_zone = zinit((vm_size_t) sizeof(struct pmap), (vm_size_t) sizeof(struct pmap) * 256,
3784 PAGE_SIZE, "pmap");
3785 }
3786
3787 __dead2
3788 void
3789 pmap_ledger_alloc_init(size_t size)
3790 {
3791 panic("%s: unsupported, "
3792 "size=%lu",
3793 __func__, size);
3794 }
3795
3796 __dead2
3797 ledger_t
3798 pmap_ledger_alloc(void)
3799 {
3800 panic("%s: unsupported",
3801 __func__);
3802 }
3803
3804 __dead2
3805 void
3806 pmap_ledger_free(ledger_t ledger)
3807 {
3808 panic("%s: unsupported, "
3809 "ledger=%p",
3810 __func__, ledger);
3811 }
3812
3813 /*
3814 * Create and return a physical map.
3815 *
3816 * If the size specified for the map
3817 * is zero, the map is an actual physical
3818 * map, and may be referenced by the
3819 * hardware.
3820 *
3821 * If the size specified is non-zero,
3822 * the map will be used in software only, and
3823 * is bounded by that size.
3824 */
3825 MARK_AS_PMAP_TEXT static pmap_t
3826 pmap_create_options_internal(
3827 ledger_t ledger,
3828 vm_map_size_t size,
3829 unsigned int flags)
3830 {
3831 unsigned i;
3832 unsigned tte_index_max;
3833 pmap_t p;
3834 bool is_64bit = flags & PMAP_CREATE_64BIT;
3835 #if defined(HAS_APPLE_PAC)
3836 bool disable_jop = flags & PMAP_CREATE_DISABLE_JOP;
3837 #endif /* defined(HAS_APPLE_PAC) */
3838
3839 /*
3840 * A software use-only map doesn't even need a pmap.
3841 */
3842 if (size != 0) {
3843 return PMAP_NULL;
3844 }
3845
3846 /*
3847 * Allocate a pmap struct from the pmap_zone. Then allocate
3848 * the translation table of the right size for the pmap.
3849 */
3850 if ((p = (pmap_t) zalloc(pmap_zone)) == PMAP_NULL) {
3851 return PMAP_NULL;
3852 }
3853
3854 if (flags & PMAP_CREATE_64BIT) {
3855 p->min = MACH_VM_MIN_ADDRESS;
3856 p->max = MACH_VM_MAX_ADDRESS;
3857 } else {
3858 p->min = VM_MIN_ADDRESS;
3859 p->max = VM_MAX_ADDRESS;
3860 }
3861
3862 #if defined(HAS_APPLE_PAC)
3863 p->disable_jop = disable_jop;
3864 #endif /* defined(HAS_APPLE_PAC) */
3865
3866 p->nested_region_true_start = 0;
3867 p->nested_region_true_end = ~0;
3868
3869 os_atomic_init(&p->ref_count, 1);
3870 p->gc_status = 0;
3871 p->stamp = os_atomic_inc(&pmap_stamp, relaxed);
3872 p->nx_enabled = TRUE;
3873 p->is_64bit = is_64bit;
3874 p->nested = FALSE;
3875 p->nested_pmap = PMAP_NULL;
3876
3877 #if ARM_PARAMETERIZED_PMAP
3878 p->pmap_pt_attr = native_pt_attr;
3879 #endif /* ARM_PARAMETERIZED_PMAP */
3880
3881 if (!pmap_get_pt_ops(p)->alloc_id(p)) {
3882 goto id_alloc_fail;
3883 }
3884
3885
3886
3887 p->ledger = ledger;
3888
3889 PMAP_LOCK_INIT(p);
3890 #if (__ARM_VMSA__ == 7)
3891 simple_lock_init(&p->tt1_lock, 0);
3892 p->cpu_ref = 0;
3893 #endif
3894 memset((void *) &p->stats, 0, sizeof(p->stats));
3895
3896 p->tt_entry_free = (tt_entry_t *)0;
3897
3898 p->tte = pmap_tt1_allocate(p, PMAP_ROOT_ALLOC_SIZE, 0);
3899 if (!(p->tte)) {
3900 goto tt1_alloc_fail;
3901 }
3902
3903 p->ttep = ml_static_vtop((vm_offset_t)p->tte);
3904 PMAP_TRACE(3, PMAP_CODE(PMAP__TTE), VM_KERNEL_ADDRHIDE(p), VM_KERNEL_ADDRHIDE(p->min), VM_KERNEL_ADDRHIDE(p->max), p->ttep);
3905
3906 #if (__ARM_VMSA__ == 7)
3907 tte_index_max = p->tte_index_max = NTTES;
3908 #else
3909 tte_index_max = (PMAP_ROOT_ALLOC_SIZE / sizeof(tt_entry_t));
3910 #endif
3911 p->prev_tte = (tt_entry_t *) NULL;
3912
3913 /* nullify the translation table */
3914 for (i = 0; i < tte_index_max; i++) {
3915 p->tte[i] = ARM_TTE_TYPE_FAULT;
3916 }
3917
3918 FLUSH_PTE_RANGE(p->tte, p->tte + tte_index_max);
3919
3920 /*
3921 * initialize the rest of the structure
3922 */
3923 p->nested_region_grand_addr = 0x0ULL;
3924 p->nested_region_subord_addr = 0x0ULL;
3925 p->nested_region_size = 0x0ULL;
3926 p->nested_region_asid_bitmap = NULL;
3927 p->nested_region_asid_bitmap_size = 0x0UL;
3928
3929 p->nested_has_no_bounds_ref = false;
3930 p->nested_no_bounds_refcnt = 0;
3931 p->nested_bounds_set = false;
3932
3933
3934 #if MACH_ASSERT
3935 p->pmap_stats_assert = TRUE;
3936 p->pmap_pid = 0;
3937 strlcpy(p->pmap_procname, "<nil>", sizeof(p->pmap_procname));
3938 #endif /* MACH_ASSERT */
3939 #if DEVELOPMENT || DEBUG
3940 p->footprint_was_suspended = FALSE;
3941 #endif /* DEVELOPMENT || DEBUG */
3942
3943 pmap_simple_lock(&pmaps_lock);
3944 queue_enter(&map_pmap_list, p, pmap_t, pmaps);
3945 pmap_simple_unlock(&pmaps_lock);
3946
3947 return p;
3948
3949 tt1_alloc_fail:
3950 pmap_get_pt_ops(p)->free_id(p);
3951 id_alloc_fail:
3952 zfree(pmap_zone, p);
3953 return PMAP_NULL;
3954 }
3955
3956 pmap_t
3957 pmap_create_options(
3958 ledger_t ledger,
3959 vm_map_size_t size,
3960 unsigned int flags)
3961 {
3962 pmap_t pmap;
3963
3964 PMAP_TRACE(1, PMAP_CODE(PMAP__CREATE) | DBG_FUNC_START, size, flags);
3965
3966 ledger_reference(ledger);
3967
3968 pmap = pmap_create_options_internal(ledger, size, flags);
3969
3970 if (pmap == PMAP_NULL) {
3971 ledger_dereference(ledger);
3972 }
3973
3974 PMAP_TRACE(1, PMAP_CODE(PMAP__CREATE) | DBG_FUNC_END, VM_KERNEL_ADDRHIDE(pmap), PMAP_VASID(pmap), pmap->hw_asid);
3975
3976 return pmap;
3977 }
3978
3979 #if MACH_ASSERT
3980 MARK_AS_PMAP_TEXT static void
3981 pmap_set_process_internal(
3982 __unused pmap_t pmap,
3983 __unused int pid,
3984 __unused char *procname)
3985 {
3986 #if MACH_ASSERT
3987 if (pmap == NULL) {
3988 return;
3989 }
3990
3991 VALIDATE_PMAP(pmap);
3992
3993 pmap->pmap_pid = pid;
3994 strlcpy(pmap->pmap_procname, procname, sizeof(pmap->pmap_procname));
3995 if (pmap_ledgers_panic_leeway) {
3996 /*
3997 * XXX FBDP
3998 * Some processes somehow trigger some issues that make
3999 * the pmap stats and ledgers go off track, causing
4000 * some assertion failures and ledger panics.
4001 * Turn off the sanity checks if we allow some ledger leeway
4002 * because of that. We'll still do a final check in
4003 * pmap_check_ledgers() for discrepancies larger than the
4004 * allowed leeway after the address space has been fully
4005 * cleaned up.
4006 */
4007 pmap->pmap_stats_assert = FALSE;
4008 ledger_disable_panic_on_negative(pmap->ledger,
4009 task_ledgers.phys_footprint);
4010 ledger_disable_panic_on_negative(pmap->ledger,
4011 task_ledgers.internal);
4012 ledger_disable_panic_on_negative(pmap->ledger,
4013 task_ledgers.internal_compressed);
4014 ledger_disable_panic_on_negative(pmap->ledger,
4015 task_ledgers.iokit_mapped);
4016 ledger_disable_panic_on_negative(pmap->ledger,
4017 task_ledgers.alternate_accounting);
4018 ledger_disable_panic_on_negative(pmap->ledger,
4019 task_ledgers.alternate_accounting_compressed);
4020 }
4021 #endif /* MACH_ASSERT */
4022 }
4023 #endif /* MACH_ASSERT*/
4024
4025 #if MACH_ASSERT
4026 void
4027 pmap_set_process(
4028 pmap_t pmap,
4029 int pid,
4030 char *procname)
4031 {
4032 pmap_set_process_internal(pmap, pid, procname);
4033 }
4034 #endif /* MACH_ASSERT */
4035
4036 /*
4037 * We maintain stats and ledgers so that a task's physical footprint is:
4038 * phys_footprint = ((internal - alternate_accounting)
4039 * + (internal_compressed - alternate_accounting_compressed)
4040 * + iokit_mapped
4041 * + purgeable_nonvolatile
4042 * + purgeable_nonvolatile_compressed
4043 * + page_table)
4044 * where "alternate_accounting" includes "iokit" and "purgeable" memory.
4045 */
4046
4047
4048 /*
4049 * Retire the given physical map from service.
4050 * Should only be called if the map contains
4051 * no valid mappings.
4052 */
4053 MARK_AS_PMAP_TEXT static void
4054 pmap_destroy_internal(
4055 pmap_t pmap)
4056 {
4057 if (pmap == PMAP_NULL) {
4058 return;
4059 }
4060
4061 VALIDATE_PMAP(pmap);
4062
4063 __unused const pt_attr_t * const pt_attr = pmap_get_pt_attr(pmap);
4064
4065 int32_t ref_count = os_atomic_dec(&pmap->ref_count, relaxed);
4066 if (ref_count > 0) {
4067 return;
4068 } else if (ref_count < 0) {
4069 panic("pmap %p: refcount underflow", pmap);
4070 } else if (pmap == kernel_pmap) {
4071 panic("pmap %p: attempt to destroy kernel pmap", pmap);
4072 }
4073
4074 pt_entry_t *ttep;
4075
4076 #if (__ARM_VMSA__ > 7)
4077 pmap_unmap_sharedpage(pmap);
4078 #endif /* (__ARM_VMSA__ > 7) */
4079
4080 pmap_simple_lock(&pmaps_lock);
4081 while (pmap->gc_status & PMAP_GC_INFLIGHT) {
4082 pmap->gc_status |= PMAP_GC_WAIT;
4083 assert_wait((event_t) &pmap->gc_status, THREAD_UNINT);
4084 pmap_simple_unlock(&pmaps_lock);
4085 (void) thread_block(THREAD_CONTINUE_NULL);
4086 pmap_simple_lock(&pmaps_lock);
4087 }
4088 queue_remove(&map_pmap_list, pmap, pmap_t, pmaps);
4089 pmap_simple_unlock(&pmaps_lock);
4090
4091 #if (__ARM_VMSA__ == 7)
4092 if (pmap->cpu_ref != 0) {
4093 panic("%s: cpu_ref=%u, "
4094 "pmap=%p",
4095 __FUNCTION__, pmap->cpu_ref,
4096 pmap);
4097 }
4098 #endif /* (__ARM_VMSA__ == 7) */
4099
4100 pmap_trim_self(pmap);
4101
4102 /*
4103 * Free the memory maps, then the
4104 * pmap structure.
4105 */
4106 #if (__ARM_VMSA__ == 7)
4107 unsigned int i = 0;
4108
4109 PMAP_LOCK(pmap);
4110 for (i = 0; i < pmap->tte_index_max; i++) {
4111 ttep = &pmap->tte[i];
4112 if ((*ttep & ARM_TTE_TYPE_MASK) == ARM_TTE_TYPE_TABLE) {
4113 pmap_tte_deallocate(pmap, ttep, PMAP_TT_L1_LEVEL);
4114 }
4115 }
4116 PMAP_UNLOCK(pmap);
4117 #else /* (__ARM_VMSA__ == 7) */
4118 vm_map_address_t c;
4119 unsigned int level;
4120
4121 for (level = pt_attr->pta_max_level - 1; level >= pt_attr->pta_root_level; level--) {
4122 for (c = pmap->min; c < pmap->max; c += pt_attr_ln_size(pt_attr, level)) {
4123 ttep = pmap_ttne(pmap, level, c);
4124
4125 if ((ttep != PT_ENTRY_NULL) && (*ttep & ARM_TTE_TYPE_MASK) == ARM_TTE_TYPE_TABLE) {
4126 PMAP_LOCK(pmap);
4127 pmap_tte_deallocate(pmap, ttep, level);
4128 PMAP_UNLOCK(pmap);
4129 }
4130 }
4131 }
4132 #endif /* (__ARM_VMSA__ == 7) */
4133
4134
4135
4136 if (pmap->tte) {
4137 #if (__ARM_VMSA__ == 7)
4138 pmap_tt1_deallocate(pmap, pmap->tte, pmap->tte_index_max * sizeof(tt_entry_t), 0);
4139 pmap->tte_index_max = 0;
4140 #else /* (__ARM_VMSA__ == 7) */
4141 pmap_tt1_deallocate(pmap, pmap->tte, PMAP_ROOT_ALLOC_SIZE, 0);
4142 #endif /* (__ARM_VMSA__ == 7) */
4143 pmap->tte = (tt_entry_t *) NULL;
4144 pmap->ttep = 0;
4145 }
4146
4147 #if (__ARM_VMSA__ == 7)
4148 if (pmap->prev_tte) {
4149 pmap_tt1_deallocate(pmap, pmap->prev_tte, PMAP_ROOT_ALLOC_SIZE, 0);
4150 pmap->prev_tte = (tt_entry_t *) NULL;
4151 }
4152 #endif /* (__ARM_VMSA__ == 7) */
4153
4154 assert((tt_free_entry_t*)pmap->tt_entry_free == NULL);
4155
4156 pmap_get_pt_ops(pmap)->flush_tlb_async(pmap);
4157 sync_tlb_flush();
4158
4159 /* return its asid to the pool */
4160 pmap_get_pt_ops(pmap)->free_id(pmap);
4161 pmap_check_ledgers(pmap);
4162
4163 if (pmap->nested_region_asid_bitmap) {
4164 kfree(pmap->nested_region_asid_bitmap, pmap->nested_region_asid_bitmap_size * sizeof(unsigned int));
4165 }
4166
4167 zfree(pmap_zone, pmap);
4168 }
4169
4170 void
4171 pmap_destroy(
4172 pmap_t pmap)
4173 {
4174 ledger_t ledger;
4175
4176 PMAP_TRACE(1, PMAP_CODE(PMAP__DESTROY) | DBG_FUNC_START, VM_KERNEL_ADDRHIDE(pmap), PMAP_VASID(pmap), pmap->hw_asid);
4177
4178 ledger = pmap->ledger;
4179
4180 pmap_destroy_internal(pmap);
4181
4182 ledger_dereference(ledger);
4183
4184 PMAP_TRACE(1, PMAP_CODE(PMAP__DESTROY) | DBG_FUNC_END);
4185 }
4186
4187
4188 /*
4189 * Add a reference to the specified pmap.
4190 */
4191 MARK_AS_PMAP_TEXT static void
4192 pmap_reference_internal(
4193 pmap_t pmap)
4194 {
4195 if (pmap != PMAP_NULL) {
4196 VALIDATE_PMAP(pmap);
4197 os_atomic_inc(&pmap->ref_count, relaxed);
4198 }
4199 }
4200
4201 void
4202 pmap_reference(
4203 pmap_t pmap)
4204 {
4205 pmap_reference_internal(pmap);
4206 }
4207
4208 static tt_entry_t *
4209 pmap_tt1_allocate(
4210 pmap_t pmap,
4211 vm_size_t size,
4212 unsigned option)
4213 {
4214 tt_entry_t *tt1 = NULL;
4215 tt_free_entry_t *tt1_free;
4216 pmap_paddr_t pa;
4217 vm_address_t va;
4218 vm_address_t va_end;
4219 kern_return_t ret;
4220
4221 pmap_simple_lock(&tt1_lock);
4222 if ((size == PAGE_SIZE) && (free_page_size_tt_count != 0)) {
4223 free_page_size_tt_count--;
4224 tt1 = (tt_entry_t *)free_page_size_tt_list;
4225 free_page_size_tt_list = ((tt_free_entry_t *)tt1)->next;
4226 } else if ((size == 2 * PAGE_SIZE) && (free_two_page_size_tt_count != 0)) {
4227 free_two_page_size_tt_count--;
4228 tt1 = (tt_entry_t *)free_two_page_size_tt_list;
4229 free_two_page_size_tt_list = ((tt_free_entry_t *)tt1)->next;
4230 } else if ((size < PAGE_SIZE) && (free_tt_count != 0)) {
4231 free_tt_count--;
4232 tt1 = (tt_entry_t *)free_tt_list;
4233 free_tt_list = (tt_free_entry_t *)((tt_free_entry_t *)tt1)->next;
4234 }
4235
4236 pmap_simple_unlock(&tt1_lock);
4237
4238 if (tt1 != NULL) {
4239 pmap_tt_ledger_credit(pmap, size);
4240 return (tt_entry_t *)tt1;
4241 }
4242
4243 ret = pmap_pages_alloc(&pa, (unsigned)((size < PAGE_SIZE)? PAGE_SIZE : size), ((option & PMAP_TT_ALLOCATE_NOWAIT)? PMAP_PAGES_ALLOCATE_NOWAIT : 0));
4244
4245 if (ret == KERN_RESOURCE_SHORTAGE) {
4246 return (tt_entry_t *)0;
4247 }
4248
4249
4250 if (size < PAGE_SIZE) {
4251 va = phystokv(pa) + size;
4252 tt_free_entry_t *local_free_list = (tt_free_entry_t*)va;
4253 tt_free_entry_t *next_free = NULL;
4254 for (va_end = phystokv(pa) + PAGE_SIZE; va < va_end; va = va + size) {
4255 tt1_free = (tt_free_entry_t *)va;
4256 tt1_free->next = next_free;
4257 next_free = tt1_free;
4258 }
4259 pmap_simple_lock(&tt1_lock);
4260 local_free_list->next = free_tt_list;
4261 free_tt_list = next_free;
4262 free_tt_count += ((PAGE_SIZE / size) - 1);
4263 if (free_tt_count > free_tt_max) {
4264 free_tt_max = free_tt_count;
4265 }
4266 pmap_simple_unlock(&tt1_lock);
4267 }
4268
4269 /* Always report root allocations in units of PMAP_ROOT_ALLOC_SIZE, which can be obtained by sysctl arm_pt_root_size.
4270 * Depending on the device, this can vary between 512b and 16K. */
4271 OSAddAtomic((uint32_t)(size / PMAP_ROOT_ALLOC_SIZE), (pmap == kernel_pmap ? &inuse_kernel_tteroot_count : &inuse_user_tteroot_count));
4272 OSAddAtomic64(size / PMAP_ROOT_ALLOC_SIZE, &alloc_tteroot_count);
4273 pmap_tt_ledger_credit(pmap, size);
4274
4275 return (tt_entry_t *) phystokv(pa);
4276 }
4277
4278 static void
4279 pmap_tt1_deallocate(
4280 pmap_t pmap,
4281 tt_entry_t *tt,
4282 vm_size_t size,
4283 unsigned option)
4284 {
4285 tt_free_entry_t *tt_entry;
4286
4287 tt_entry = (tt_free_entry_t *)tt;
4288 assert(not_in_kdp);
4289 pmap_simple_lock(&tt1_lock);
4290
4291 if (size < PAGE_SIZE) {
4292 free_tt_count++;
4293 if (free_tt_count > free_tt_max) {
4294 free_tt_max = free_tt_count;
4295 }
4296 tt_entry->next = free_tt_list;
4297 free_tt_list = tt_entry;
4298 }
4299
4300 if (size == PAGE_SIZE) {
4301 free_page_size_tt_count++;
4302 if (free_page_size_tt_count > free_page_size_tt_max) {
4303 free_page_size_tt_max = free_page_size_tt_count;
4304 }
4305 tt_entry->next = free_page_size_tt_list;
4306 free_page_size_tt_list = tt_entry;
4307 }
4308
4309 if (size == 2 * PAGE_SIZE) {
4310 free_two_page_size_tt_count++;
4311 if (free_two_page_size_tt_count > free_two_page_size_tt_max) {
4312 free_two_page_size_tt_max = free_two_page_size_tt_count;
4313 }
4314 tt_entry->next = free_two_page_size_tt_list;
4315 free_two_page_size_tt_list = tt_entry;
4316 }
4317
4318 if (option & PMAP_TT_DEALLOCATE_NOBLOCK) {
4319 pmap_simple_unlock(&tt1_lock);
4320 pmap_tt_ledger_debit(pmap, size);
4321 return;
4322 }
4323
4324 while (free_page_size_tt_count > FREE_PAGE_SIZE_TT_MAX) {
4325 free_page_size_tt_count--;
4326 tt = (tt_entry_t *)free_page_size_tt_list;
4327 free_page_size_tt_list = ((tt_free_entry_t *)tt)->next;
4328
4329 pmap_simple_unlock(&tt1_lock);
4330
4331 pmap_pages_free(ml_static_vtop((vm_offset_t)tt), PAGE_SIZE);
4332
4333 OSAddAtomic(-(int32_t)(PAGE_SIZE / PMAP_ROOT_ALLOC_SIZE), (pmap == kernel_pmap ? &inuse_kernel_tteroot_count : &inuse_user_tteroot_count));
4334
4335 pmap_simple_lock(&tt1_lock);
4336 }
4337
4338 while (free_two_page_size_tt_count > FREE_TWO_PAGE_SIZE_TT_MAX) {
4339 free_two_page_size_tt_count--;
4340 tt = (tt_entry_t *)free_two_page_size_tt_list;
4341 free_two_page_size_tt_list = ((tt_free_entry_t *)tt)->next;
4342
4343 pmap_simple_unlock(&tt1_lock);
4344
4345 pmap_pages_free(ml_static_vtop((vm_offset_t)tt), 2 * PAGE_SIZE);
4346
4347 OSAddAtomic(-2 * (int32_t)(PAGE_SIZE / PMAP_ROOT_ALLOC_SIZE), (pmap == kernel_pmap ? &inuse_kernel_tteroot_count : &inuse_user_tteroot_count));
4348
4349 pmap_simple_lock(&tt1_lock);
4350 }
4351 pmap_simple_unlock(&tt1_lock);
4352 pmap_tt_ledger_debit(pmap, size);
4353 }
4354
4355 static kern_return_t
4356 pmap_tt_allocate(
4357 pmap_t pmap,
4358 tt_entry_t **ttp,
4359 unsigned int level,
4360 unsigned int options)
4361 {
4362 pmap_paddr_t pa;
4363 *ttp = NULL;
4364
4365 PMAP_LOCK(pmap);
4366 if ((tt_free_entry_t *)pmap->tt_entry_free != NULL) {
4367 tt_free_entry_t *tt_free_next;
4368
4369 tt_free_next = ((tt_free_entry_t *)pmap->tt_entry_free)->next;
4370 *ttp = (tt_entry_t *)pmap->tt_entry_free;
4371 pmap->tt_entry_free = (tt_entry_t *)tt_free_next;
4372 }
4373 PMAP_UNLOCK(pmap);
4374
4375 if (*ttp == NULL) {
4376 pt_desc_t *ptdp;
4377
4378 /*
4379 * Allocate a VM page for the level x page table entries.
4380 */
4381 while (pmap_pages_alloc(&pa, PAGE_SIZE, ((options & PMAP_TT_ALLOCATE_NOWAIT)? PMAP_PAGES_ALLOCATE_NOWAIT : 0)) != KERN_SUCCESS) {
4382 if (options & PMAP_OPTIONS_NOWAIT) {
4383 return KERN_RESOURCE_SHORTAGE;
4384 }
4385 VM_PAGE_WAIT();
4386 }
4387
4388 while ((ptdp = ptd_alloc(pmap, false)) == NULL) {
4389 if (options & PMAP_OPTIONS_NOWAIT) {
4390 pmap_pages_free(pa, PAGE_SIZE);
4391 return KERN_RESOURCE_SHORTAGE;
4392 }
4393 VM_PAGE_WAIT();
4394 }
4395
4396 if (level < PMAP_TT_MAX_LEVEL) {
4397 OSAddAtomic64(1, &alloc_ttepages_count);
4398 OSAddAtomic(1, (pmap == kernel_pmap ? &inuse_kernel_ttepages_count : &inuse_user_ttepages_count));
4399 } else {
4400 OSAddAtomic64(1, &alloc_ptepages_count);
4401 OSAddAtomic(1, (pmap == kernel_pmap ? &inuse_kernel_ptepages_count : &inuse_user_ptepages_count));
4402 }
4403
4404 pmap_tt_ledger_credit(pmap, PAGE_SIZE);
4405
4406 PMAP_ZINFO_PALLOC(pmap, PAGE_SIZE);
4407
4408 pvh_update_head_unlocked(pai_to_pvh(pa_index(pa)), ptdp, PVH_TYPE_PTDP);
4409
4410 __unreachable_ok_push
4411 if (TEST_PAGE_RATIO_4) {
4412 vm_address_t va;
4413 vm_address_t va_end;
4414
4415 PMAP_LOCK(pmap);
4416
4417 for (va_end = phystokv(pa) + PAGE_SIZE, va = phystokv(pa) + ARM_PGBYTES; va < va_end; va = va + ARM_PGBYTES) {
4418 ((tt_free_entry_t *)va)->next = (tt_free_entry_t *)pmap->tt_entry_free;
4419 pmap->tt_entry_free = (tt_entry_t *)va;
4420 }
4421 PMAP_UNLOCK(pmap);
4422 }
4423 __unreachable_ok_pop
4424
4425 *ttp = (tt_entry_t *)phystokv(pa);
4426 }
4427
4428
4429 return KERN_SUCCESS;
4430 }
4431
4432
4433 static void
4434 pmap_tt_deallocate(
4435 pmap_t pmap,
4436 tt_entry_t *ttp,
4437 unsigned int level)
4438 {
4439 pt_desc_t *ptdp;
4440 unsigned pt_acc_cnt;
4441 unsigned i, max_pt_index = PAGE_RATIO;
4442 vm_offset_t free_page = 0;
4443
4444 PMAP_LOCK(pmap);
4445
4446 ptdp = ptep_get_ptd((vm_offset_t)ttp);
4447
4448 ptdp->ptd_info[ARM_PT_DESC_INDEX(ttp)].va = (vm_offset_t)-1;
4449
4450 if ((level < PMAP_TT_MAX_LEVEL) && (ptdp->ptd_info[ARM_PT_DESC_INDEX(ttp)].refcnt == PT_DESC_REFCOUNT)) {
4451 ptdp->ptd_info[ARM_PT_DESC_INDEX(ttp)].refcnt = 0;
4452 }
4453
4454 if (ptdp->ptd_info[ARM_PT_DESC_INDEX(ttp)].refcnt != 0) {
4455 panic("pmap_tt_deallocate(): ptdp %p, count %d\n", ptdp, ptdp->ptd_info[ARM_PT_DESC_INDEX(ttp)].refcnt);
4456 }
4457
4458 ptdp->ptd_info[ARM_PT_DESC_INDEX(ttp)].refcnt = 0;
4459
4460 for (i = 0, pt_acc_cnt = 0; i < max_pt_index; i++) {
4461 pt_acc_cnt += ptdp->ptd_info[i].refcnt;
4462 }
4463
4464 if (pt_acc_cnt == 0) {
4465 tt_free_entry_t *tt_free_list = (tt_free_entry_t *)&pmap->tt_entry_free;
4466 unsigned pt_free_entry_cnt = 1;
4467
4468 while (pt_free_entry_cnt < max_pt_index && tt_free_list) {
4469 tt_free_entry_t *tt_free_list_next;
4470
4471 tt_free_list_next = tt_free_list->next;
4472 if ((((vm_offset_t)tt_free_list_next) - ((vm_offset_t)ttp & ~PAGE_MASK)) < PAGE_SIZE) {
4473 pt_free_entry_cnt++;
4474 }
4475 tt_free_list = tt_free_list_next;
4476 }
4477 if (pt_free_entry_cnt == max_pt_index) {
4478 tt_free_entry_t *tt_free_list_cur;
4479
4480 free_page = (vm_offset_t)ttp & ~PAGE_MASK;
4481 tt_free_list = (tt_free_entry_t *)&pmap->tt_entry_free;
4482 tt_free_list_cur = (tt_free_entry_t *)&pmap->tt_entry_free;
4483
4484 while (tt_free_list_cur) {
4485 tt_free_entry_t *tt_free_list_next;
4486
4487 tt_free_list_next = tt_free_list_cur->next;
4488 if ((((vm_offset_t)tt_free_list_next) - free_page) < PAGE_SIZE) {
4489 tt_free_list->next = tt_free_list_next->next;
4490 } else {
4491 tt_free_list = tt_free_list_next;
4492 }
4493 tt_free_list_cur = tt_free_list_next;
4494 }
4495 } else {
4496 ((tt_free_entry_t *)ttp)->next = (tt_free_entry_t *)pmap->tt_entry_free;
4497 pmap->tt_entry_free = ttp;
4498 }
4499 } else {
4500 ((tt_free_entry_t *)ttp)->next = (tt_free_entry_t *)pmap->tt_entry_free;
4501 pmap->tt_entry_free = ttp;
4502 }
4503
4504 PMAP_UNLOCK(pmap);
4505
4506 if (free_page != 0) {
4507 ptd_deallocate(ptep_get_ptd((vm_offset_t)free_page));
4508 *(pt_desc_t **)pai_to_pvh(pa_index(ml_static_vtop(free_page))) = NULL;
4509 pmap_pages_free(ml_static_vtop(free_page), PAGE_SIZE);
4510 if (level < PMAP_TT_MAX_LEVEL) {
4511 OSAddAtomic(-1, (pmap == kernel_pmap ? &inuse_kernel_ttepages_count : &inuse_user_ttepages_count));
4512 } else {
4513 OSAddAtomic(-1, (pmap == kernel_pmap ? &inuse_kernel_ptepages_count : &inuse_user_ptepages_count));
4514 }
4515 PMAP_ZINFO_PFREE(pmap, PAGE_SIZE);
4516 pmap_tt_ledger_debit(pmap, PAGE_SIZE);
4517 }
4518 }
4519
4520 static void
4521 pmap_tte_remove(
4522 pmap_t pmap,
4523 tt_entry_t *ttep,
4524 unsigned int level)
4525 {
4526 tt_entry_t tte = *ttep;
4527
4528 if (tte == 0) {
4529 panic("pmap_tte_deallocate(): null tt_entry ttep==%p\n", ttep);
4530 }
4531
4532 if (((level + 1) == PMAP_TT_MAX_LEVEL) && (tte_get_ptd(tte)->ptd_info[ARM_PT_DESC_INDEX(ttetokv(*ttep))].refcnt != 0)) {
4533 panic("pmap_tte_deallocate(): pmap=%p ttep=%p ptd=%p refcnt=0x%x \n", pmap, ttep,
4534 tte_get_ptd(tte), (tte_get_ptd(tte)->ptd_info[ARM_PT_DESC_INDEX(ttetokv(*ttep))].refcnt));
4535 }
4536
4537 #if (__ARM_VMSA__ == 7)
4538 {
4539 tt_entry_t *ttep_4M = (tt_entry_t *) ((vm_offset_t)ttep & 0xFFFFFFF0);
4540 unsigned i;
4541
4542 for (i = 0; i < 4; i++, ttep_4M++) {
4543 *ttep_4M = (tt_entry_t) 0;
4544 }
4545 FLUSH_PTE_RANGE_STRONG(ttep_4M - 4, ttep_4M);
4546 }
4547 #else
4548 *ttep = (tt_entry_t) 0;
4549 FLUSH_PTE_STRONG(ttep);
4550 #endif
4551 }
4552
4553 static void
4554 pmap_tte_deallocate(
4555 pmap_t pmap,
4556 tt_entry_t *ttep,
4557 unsigned int level)
4558 {
4559 pmap_paddr_t pa;
4560 tt_entry_t tte;
4561
4562 PMAP_ASSERT_LOCKED(pmap);
4563
4564 tte = *ttep;
4565
4566 #if MACH_ASSERT
4567 if (tte_get_ptd(tte)->pmap != pmap) {
4568 panic("pmap_tte_deallocate(): ptd=%p ptd->pmap=%p pmap=%p \n",
4569 tte_get_ptd(tte), tte_get_ptd(tte)->pmap, pmap);
4570 }
4571 #endif
4572
4573 pmap_tte_remove(pmap, ttep, level);
4574
4575 if ((tte & ARM_TTE_TYPE_MASK) == ARM_TTE_TYPE_TABLE) {
4576 #if MACH_ASSERT
4577 {
4578 pt_entry_t *pte_p = ((pt_entry_t *) (ttetokv(tte) & ~ARM_PGMASK));
4579 unsigned i;
4580
4581 for (i = 0; i < (ARM_PGBYTES / sizeof(*pte_p)); i++, pte_p++) {
4582 if (ARM_PTE_IS_COMPRESSED(*pte_p, pte_p)) {
4583 panic("pmap_tte_deallocate: tte=0x%llx pmap=%p, pte_p=%p, pte=0x%llx compressed\n",
4584 (uint64_t)tte, pmap, pte_p, (uint64_t)(*pte_p));
4585 } else if (((*pte_p) & ARM_PTE_TYPE_MASK) != ARM_PTE_TYPE_FAULT) {
4586 panic("pmap_tte_deallocate: tte=0x%llx pmap=%p, pte_p=%p, pte=0x%llx\n",
4587 (uint64_t)tte, pmap, pte_p, (uint64_t)(*pte_p));
4588 }
4589 }
4590 }
4591 #endif
4592 PMAP_UNLOCK(pmap);
4593
4594 /* Clear any page offset: we mean to free the whole page, but armv7 TTEs may only be
4595 * aligned on 1K boundaries. We clear the surrounding "chunk" of 4 TTEs above. */
4596 pa = tte_to_pa(tte) & ~ARM_PGMASK;
4597 pmap_tt_deallocate(pmap, (tt_entry_t *) phystokv(pa), level + 1);
4598 PMAP_LOCK(pmap);
4599 }
4600 }
4601
4602 /*
4603 * Remove a range of hardware page-table entries.
4604 * The entries given are the first (inclusive)
4605 * and last (exclusive) entries for the VM pages.
4606 * The virtual address is the va for the first pte.
4607 *
4608 * The pmap must be locked.
4609 * If the pmap is not the kernel pmap, the range must lie
4610 * entirely within one pte-page. This is NOT checked.
4611 * Assumes that the pte-page exists.
4612 *
4613 * Returns the number of PTE changed, and sets *rmv_cnt
4614 * to the number of SPTE changed.
4615 */
4616 static int
4617 pmap_remove_range(
4618 pmap_t pmap,
4619 vm_map_address_t va,
4620 pt_entry_t *bpte,
4621 pt_entry_t *epte,
4622 uint32_t *rmv_cnt)
4623 {
4624 bool need_strong_sync = false;
4625 int num_changed = pmap_remove_range_options(pmap, va, bpte, epte, rmv_cnt,
4626 &need_strong_sync, PMAP_OPTIONS_REMOVE);
4627 if (num_changed > 0) {
4628 PMAP_UPDATE_TLBS(pmap, va, va + (PAGE_SIZE * (epte - bpte)), need_strong_sync);
4629 }
4630 return num_changed;
4631 }
4632
4633
4634 #ifdef PVH_FLAG_EXEC
4635
4636 /*
4637 * Update the access protection bits of the physical aperture mapping for a page.
4638 * This is useful, for example, in guranteeing that a verified executable page
4639 * has no writable mappings anywhere in the system, including the physical
4640 * aperture. flush_tlb_async can be set to true to avoid unnecessary TLB
4641 * synchronization overhead in cases where the call to this function is
4642 * guaranteed to be followed by other TLB operations.
4643 */
4644 static void
4645 pmap_set_ptov_ap(unsigned int pai __unused, unsigned int ap __unused, boolean_t flush_tlb_async __unused)
4646 {
4647 #if __ARM_PTE_PHYSMAP__
4648 ASSERT_PVH_LOCKED(pai);
4649 vm_offset_t kva = phystokv(vm_first_phys + (pmap_paddr_t)ptoa(pai));
4650 pt_entry_t *pte_p = pmap_pte(kernel_pmap, kva);
4651
4652 pt_entry_t tmplate = *pte_p;
4653 if ((tmplate & ARM_PTE_APMASK) == ARM_PTE_AP(ap)) {
4654 return;
4655 }
4656 tmplate = (tmplate & ~ARM_PTE_APMASK) | ARM_PTE_AP(ap);
4657 #if (__ARM_VMSA__ > 7)
4658 if (tmplate & ARM_PTE_HINT_MASK) {
4659 panic("%s: physical aperture PTE %p has hint bit set, va=%p, pte=0x%llx",
4660 __func__, pte_p, (void *)kva, tmplate);
4661 }
4662 #endif
4663 WRITE_PTE_STRONG(pte_p, tmplate);
4664 flush_mmu_tlb_region_asid_async(kva, PAGE_SIZE, kernel_pmap);
4665 if (!flush_tlb_async) {
4666 sync_tlb_flush();
4667 }
4668 #endif
4669 }
4670
4671 #endif /* defined(PVH_FLAG_EXEC) */
4672
4673 static void
4674 pmap_remove_pv(
4675 pmap_t pmap,
4676 pt_entry_t *cpte,
4677 int pai,
4678 int *num_internal,
4679 int *num_alt_internal,
4680 int *num_reusable,
4681 int *num_external)
4682 {
4683 pv_entry_t **pv_h, **pve_pp;
4684 pv_entry_t *pve_p;
4685
4686 ASSERT_PVH_LOCKED(pai);
4687 pv_h = pai_to_pvh(pai);
4688 vm_offset_t pvh_flags = pvh_get_flags(pv_h);
4689
4690
4691 if (pvh_test_type(pv_h, PVH_TYPE_PTEP)) {
4692 if (__builtin_expect((cpte != pvh_ptep(pv_h)), 0)) {
4693 panic("%s: cpte=%p does not match pv_h=%p (%p), pai=0x%x\n", __func__, cpte, pv_h, pvh_ptep(pv_h), pai);
4694 }
4695 if (IS_ALTACCT_PAGE(pai, PV_ENTRY_NULL)) {
4696 assert(IS_INTERNAL_PAGE(pai));
4697 (*num_internal)++;
4698 (*num_alt_internal)++;
4699 CLR_ALTACCT_PAGE(pai, PV_ENTRY_NULL);
4700 } else if (IS_INTERNAL_PAGE(pai)) {
4701 if (IS_REUSABLE_PAGE(pai)) {
4702 (*num_reusable)++;
4703 } else {
4704 (*num_internal)++;
4705 }
4706 } else {
4707 (*num_external)++;
4708 }
4709 pvh_update_head(pv_h, PV_ENTRY_NULL, PVH_TYPE_NULL);
4710 } else if (pvh_test_type(pv_h, PVH_TYPE_PVEP)) {
4711 pve_pp = pv_h;
4712 pve_p = pvh_list(pv_h);
4713
4714 while (pve_p != PV_ENTRY_NULL &&
4715 (pve_get_ptep(pve_p) != cpte)) {
4716 pve_pp = pve_link_field(pve_p);
4717 pve_p = PVE_NEXT_PTR(pve_next(pve_p));
4718 }
4719
4720 if (__builtin_expect((pve_p == PV_ENTRY_NULL), 0)) {
4721 panic("%s: cpte=%p (pai=0x%x) not in pv_h=%p\n", __func__, cpte, pai, pv_h);
4722 }
4723
4724 #if MACH_ASSERT
4725 if ((pmap != NULL) && (kern_feature_override(KF_PMAPV_OVRD) == FALSE)) {
4726 pv_entry_t *check_pve_p = PVE_NEXT_PTR(pve_next(pve_p));
4727 while (check_pve_p != PV_ENTRY_NULL) {
4728 if (pve_get_ptep(check_pve_p) == cpte) {
4729 panic("%s: duplicate pve entry cpte=%p pmap=%p, pv_h=%p, pve_p=%p, pai=0x%x",
4730 __func__, cpte, pmap, pv_h, pve_p, pai);
4731 }
4732 check_pve_p = PVE_NEXT_PTR(pve_next(check_pve_p));
4733 }
4734 }
4735 #endif
4736
4737 if (IS_ALTACCT_PAGE(pai, pve_p)) {
4738 assert(IS_INTERNAL_PAGE(pai));
4739 (*num_internal)++;
4740 (*num_alt_internal)++;
4741 CLR_ALTACCT_PAGE(pai, pve_p);
4742 } else if (IS_INTERNAL_PAGE(pai)) {
4743 if (IS_REUSABLE_PAGE(pai)) {
4744 (*num_reusable)++;
4745 } else {
4746 (*num_internal)++;
4747 }
4748 } else {
4749 (*num_external)++;
4750 }
4751
4752 pvh_remove(pv_h, pve_pp, pve_p);
4753 pv_free(pve_p);
4754 if (!pvh_test_type(pv_h, PVH_TYPE_NULL)) {
4755 pvh_set_flags(pv_h, pvh_flags);
4756 }
4757 } else {
4758 panic("%s: unexpected PV head %p, cpte=%p pmap=%p pv_h=%p pai=0x%x",
4759 __func__, *pv_h, cpte, pmap, pv_h, pai);
4760 }
4761
4762 #ifdef PVH_FLAG_EXEC
4763 if ((pvh_flags & PVH_FLAG_EXEC) && pvh_test_type(pv_h, PVH_TYPE_NULL)) {
4764 pmap_set_ptov_ap(pai, AP_RWNA, FALSE);
4765 }
4766 #endif
4767 }
4768
4769 static int
4770 pmap_remove_range_options(
4771 pmap_t pmap,
4772 vm_map_address_t va,
4773 pt_entry_t *bpte,
4774 pt_entry_t *epte,
4775 uint32_t *rmv_cnt,
4776 bool *need_strong_sync __unused,
4777 int options)
4778 {
4779 pt_entry_t *cpte;
4780 int num_removed, num_unwired;
4781 int num_pte_changed;
4782 int pai = 0;
4783 pmap_paddr_t pa;
4784 int num_external, num_internal, num_reusable;
4785 int num_alt_internal;
4786 uint64_t num_compressed, num_alt_compressed;
4787
4788 PMAP_ASSERT_LOCKED(pmap);
4789
4790 num_removed = 0;
4791 num_unwired = 0;
4792 num_pte_changed = 0;
4793 num_external = 0;
4794 num_internal = 0;
4795 num_reusable = 0;
4796 num_compressed = 0;
4797 num_alt_internal = 0;
4798 num_alt_compressed = 0;
4799
4800 for (cpte = bpte; cpte < epte;
4801 cpte += PAGE_SIZE / ARM_PGBYTES, va += PAGE_SIZE) {
4802 pt_entry_t spte;
4803 boolean_t managed = FALSE;
4804
4805 spte = *cpte;
4806
4807 #if CONFIG_PGTRACE
4808 if (pgtrace_enabled) {
4809 pmap_pgtrace_remove_clone(pmap, pte_to_pa(spte), va);
4810 }
4811 #endif
4812
4813 while (!managed) {
4814 if (pmap != kernel_pmap &&
4815 (options & PMAP_OPTIONS_REMOVE) &&
4816 (ARM_PTE_IS_COMPRESSED(spte, cpte))) {
4817 /*
4818 * "pmap" must be locked at this point,
4819 * so this should not race with another
4820 * pmap_remove_range() or pmap_enter().
4821 */
4822
4823 /* one less "compressed"... */
4824 num_compressed++;
4825 if (spte & ARM_PTE_COMPRESSED_ALT) {
4826 /* ... but it used to be "ALTACCT" */
4827 num_alt_compressed++;
4828 }
4829
4830 /* clear marker */
4831 WRITE_PTE_FAST(cpte, ARM_PTE_TYPE_FAULT);
4832 /*
4833 * "refcnt" also accounts for
4834 * our "compressed" markers,
4835 * so let's update it here.
4836 */
4837 if (OSAddAtomic16(-1, (SInt16 *) &(ptep_get_ptd(cpte)->ptd_info[ARM_PT_DESC_INDEX(cpte)].refcnt)) <= 0) {
4838 panic("pmap_remove_range_options: over-release of ptdp %p for pte %p\n", ptep_get_ptd(cpte), cpte);
4839 }
4840 spte = *cpte;
4841 }
4842 /*
4843 * It may be possible for the pte to transition from managed
4844 * to unmanaged in this timeframe; for now, elide the assert.
4845 * We should break out as a consequence of checking pa_valid.
4846 */
4847 //assert(!ARM_PTE_IS_COMPRESSED(spte));
4848 pa = pte_to_pa(spte);
4849 if (!pa_valid(pa)) {
4850 break;
4851 }
4852 pai = (int)pa_index(pa);
4853 LOCK_PVH(pai);
4854 spte = *cpte;
4855 pa = pte_to_pa(spte);
4856 if (pai == (int)pa_index(pa)) {
4857 managed = TRUE;
4858 break; // Leave pai locked as we will unlock it after we free the PV entry
4859 }
4860 UNLOCK_PVH(pai);
4861 }
4862
4863 if (ARM_PTE_IS_COMPRESSED(*cpte, cpte)) {
4864 /*
4865 * There used to be a valid mapping here but it
4866 * has already been removed when the page was
4867 * sent to the VM compressor, so nothing left to
4868 * remove now...
4869 */
4870 continue;
4871 }
4872
4873 /* remove the translation, do not flush the TLB */
4874 if (*cpte != ARM_PTE_TYPE_FAULT) {
4875 assertf(!ARM_PTE_IS_COMPRESSED(*cpte, cpte), "unexpected compressed pte %p (=0x%llx)", cpte, (uint64_t)*cpte);
4876 assertf((*cpte & ARM_PTE_TYPE_VALID) == ARM_PTE_TYPE, "invalid pte %p (=0x%llx)", cpte, (uint64_t)*cpte);
4877 #if MACH_ASSERT
4878 if (managed && (pmap != kernel_pmap) && (ptep_get_va(cpte) != va)) {
4879 panic("pmap_remove_range_options(): cpte=%p ptd=%p pte=0x%llx va=0x%llx\n",
4880 cpte, ptep_get_ptd(cpte), (uint64_t)*cpte, (uint64_t)va);
4881 }
4882 #endif
4883 WRITE_PTE_FAST(cpte, ARM_PTE_TYPE_FAULT);
4884 num_pte_changed++;
4885 }
4886
4887 if ((spte != ARM_PTE_TYPE_FAULT) &&
4888 (pmap != kernel_pmap)) {
4889 assertf(!ARM_PTE_IS_COMPRESSED(spte, cpte), "unexpected compressed pte %p (=0x%llx)", cpte, (uint64_t)spte);
4890 assertf((spte & ARM_PTE_TYPE_VALID) == ARM_PTE_TYPE, "invalid pte %p (=0x%llx)", cpte, (uint64_t)spte);
4891 if (OSAddAtomic16(-1, (SInt16 *) &(ptep_get_ptd(cpte)->ptd_info[ARM_PT_DESC_INDEX(cpte)].refcnt)) <= 0) {
4892 panic("pmap_remove_range_options: over-release of ptdp %p for pte %p\n", ptep_get_ptd(cpte), cpte);
4893 }
4894 if (rmv_cnt) {
4895 (*rmv_cnt)++;
4896 }
4897 }
4898
4899 if (pte_is_wired(spte)) {
4900 pte_set_wired(cpte, 0);
4901 num_unwired++;
4902 }
4903 /*
4904 * if not managed, we're done
4905 */
4906 if (!managed) {
4907 continue;
4908 }
4909 /*
4910 * find and remove the mapping from the chain for this
4911 * physical address.
4912 */
4913
4914 pmap_remove_pv(pmap, cpte, pai, &num_internal, &num_alt_internal, &num_reusable, &num_external);
4915
4916 UNLOCK_PVH(pai);
4917 num_removed++;
4918 }
4919
4920 /*
4921 * Update the counts
4922 */
4923 OSAddAtomic(-num_removed, (SInt32 *) &pmap->stats.resident_count);
4924 pmap_ledger_debit(pmap, task_ledgers.phys_mem, machine_ptob(num_removed));
4925
4926 if (pmap != kernel_pmap) {
4927 /* sanity checks... */
4928 #if MACH_ASSERT
4929 if (pmap->stats.internal < num_internal) {
4930 if ((!pmap_stats_assert ||
4931 !pmap->pmap_stats_assert)) {
4932 printf("%d[%s] pmap_remove_range_options(%p,0x%llx,%p,%p,0x%x): num_internal=%d num_removed=%d num_unwired=%d num_external=%d num_reusable=%d num_compressed=%lld num_alt_internal=%d num_alt_compressed=%lld num_pte_changed=%d stats.internal=%d stats.reusable=%d\n",
4933 pmap->pmap_pid,
4934 pmap->pmap_procname,
4935 pmap,
4936 (uint64_t) va,
4937 bpte,
4938 epte,
4939 options,
4940 num_internal,
4941 num_removed,
4942 num_unwired,
4943 num_external,
4944 num_reusable,
4945 num_compressed,
4946 num_alt_internal,
4947 num_alt_compressed,
4948 num_pte_changed,
4949 pmap->stats.internal,
4950 pmap->stats.reusable);
4951 } else {
4952 panic("%d[%s] pmap_remove_range_options(%p,0x%llx,%p,%p,0x%x): num_internal=%d num_removed=%d num_unwired=%d num_external=%d num_reusable=%d num_compressed=%lld num_alt_internal=%d num_alt_compressed=%lld num_pte_changed=%d stats.internal=%d stats.reusable=%d",
4953 pmap->pmap_pid,
4954 pmap->pmap_procname,
4955 pmap,
4956 (uint64_t) va,
4957 bpte,
4958 epte,
4959 options,
4960 num_internal,
4961 num_removed,
4962 num_unwired,
4963 num_external,
4964 num_reusable,
4965 num_compressed,
4966 num_alt_internal,
4967 num_alt_compressed,
4968 num_pte_changed,
4969 pmap->stats.internal,
4970 pmap->stats.reusable);
4971 }
4972 }
4973 #endif /* MACH_ASSERT */
4974 PMAP_STATS_ASSERTF(pmap->stats.external >= num_external,
4975 pmap,
4976 "pmap=%p num_external=%d stats.external=%d",
4977 pmap, num_external, pmap->stats.external);
4978 PMAP_STATS_ASSERTF(pmap->stats.internal >= num_internal,
4979 pmap,
4980 "pmap=%p num_internal=%d stats.internal=%d num_reusable=%d stats.reusable=%d",
4981 pmap,
4982 num_internal, pmap->stats.internal,
4983 num_reusable, pmap->stats.reusable);
4984 PMAP_STATS_ASSERTF(pmap->stats.reusable >= num_reusable,
4985 pmap,
4986 "pmap=%p num_internal=%d stats.internal=%d num_reusable=%d stats.reusable=%d",
4987 pmap,
4988 num_internal, pmap->stats.internal,
4989 num_reusable, pmap->stats.reusable);
4990 PMAP_STATS_ASSERTF(pmap->stats.compressed >= num_compressed,
4991 pmap,
4992 "pmap=%p num_compressed=%lld num_alt_compressed=%lld stats.compressed=%lld",
4993 pmap, num_compressed, num_alt_compressed,
4994 pmap->stats.compressed);
4995
4996 /* update pmap stats... */
4997 OSAddAtomic(-num_unwired, (SInt32 *) &pmap->stats.wired_count);
4998 if (num_external) {
4999 OSAddAtomic(-num_external, &pmap->stats.external);
5000 }
5001 if (num_internal) {
5002 OSAddAtomic(-num_internal, &pmap->stats.internal);
5003 }
5004 if (num_reusable) {
5005 OSAddAtomic(-num_reusable, &pmap->stats.reusable);
5006 }
5007 if (num_compressed) {
5008 OSAddAtomic64(-num_compressed, &pmap->stats.compressed);
5009 }
5010 /* ... and ledgers */
5011 pmap_ledger_debit(pmap, task_ledgers.wired_mem, machine_ptob(num_unwired));
5012 pmap_ledger_debit(pmap, task_ledgers.internal, machine_ptob(num_internal));
5013 pmap_ledger_debit(pmap, task_ledgers.alternate_accounting, machine_ptob(num_alt_internal));
5014 pmap_ledger_debit(pmap, task_ledgers.alternate_accounting_compressed, machine_ptob(num_alt_compressed));
5015 pmap_ledger_debit(pmap, task_ledgers.internal_compressed, machine_ptob(num_compressed));
5016 /* make needed adjustments to phys_footprint */
5017 pmap_ledger_debit(pmap, task_ledgers.phys_footprint,
5018 machine_ptob((num_internal -
5019 num_alt_internal) +
5020 (num_compressed -
5021 num_alt_compressed)));
5022 }
5023
5024 /* flush the ptable entries we have written */
5025 if (num_pte_changed > 0) {
5026 FLUSH_PTE_RANGE_STRONG(bpte, epte);
5027 }
5028
5029 return num_pte_changed;
5030 }
5031
5032
5033 /*
5034 * Remove the given range of addresses
5035 * from the specified map.
5036 *
5037 * It is assumed that the start and end are properly
5038 * rounded to the hardware page size.
5039 */
5040 void
5041 pmap_remove(
5042 pmap_t pmap,
5043 vm_map_address_t start,
5044 vm_map_address_t end)
5045 {
5046 pmap_remove_options(pmap, start, end, PMAP_OPTIONS_REMOVE);
5047 }
5048
5049 MARK_AS_PMAP_TEXT static int
5050 pmap_remove_options_internal(
5051 pmap_t pmap,
5052 vm_map_address_t start,
5053 vm_map_address_t end,
5054 int options)
5055 {
5056 int remove_count = 0;
5057 pt_entry_t *bpte, *epte;
5058 pt_entry_t *pte_p;
5059 tt_entry_t *tte_p;
5060 uint32_t rmv_spte = 0;
5061 bool need_strong_sync = false;
5062 bool flush_tte = false;
5063
5064 if (__improbable(end < start)) {
5065 panic("%s: invalid address range %p, %p", __func__, (void*)start, (void*)end);
5066 }
5067
5068 VALIDATE_PMAP(pmap);
5069
5070 __unused const pt_attr_t * const pt_attr = pmap_get_pt_attr(pmap);
5071
5072 PMAP_LOCK(pmap);
5073
5074 tte_p = pmap_tte(pmap, start);
5075
5076 if (tte_p == (tt_entry_t *) NULL) {
5077 goto done;
5078 }
5079
5080 if ((*tte_p & ARM_TTE_TYPE_MASK) == ARM_TTE_TYPE_TABLE) {
5081 pte_p = (pt_entry_t *) ttetokv(*tte_p);
5082 bpte = &pte_p[ptenum(start)];
5083 epte = bpte + ((end - start) >> pt_attr_leaf_shift(pt_attr));
5084
5085 remove_count += pmap_remove_range_options(pmap, start, bpte, epte,
5086 &rmv_spte, &need_strong_sync, options);
5087
5088 if (rmv_spte && (ptep_get_ptd(pte_p)->ptd_info[ARM_PT_DESC_INDEX(pte_p)].refcnt == 0) &&
5089 (pmap != kernel_pmap) && (pmap->nested == FALSE)) {
5090 pmap_tte_deallocate(pmap, tte_p, pt_attr_twig_level(pt_attr));
5091 flush_tte = true;
5092 }
5093 }
5094
5095 done:
5096 PMAP_UNLOCK(pmap);
5097
5098 if (remove_count > 0) {
5099 PMAP_UPDATE_TLBS(pmap, start, end, need_strong_sync);
5100 } else if (flush_tte > 0) {
5101 pmap_get_pt_ops(pmap)->flush_tlb_tte_async(start, pmap);
5102 sync_tlb_flush();
5103 }
5104 return remove_count;
5105 }
5106
5107 void
5108 pmap_remove_options(
5109 pmap_t pmap,
5110 vm_map_address_t start,
5111 vm_map_address_t end,
5112 int options)
5113 {
5114 int remove_count = 0;
5115 vm_map_address_t va;
5116
5117 if (pmap == PMAP_NULL) {
5118 return;
5119 }
5120
5121 __unused const pt_attr_t * const pt_attr = pmap_get_pt_attr(pmap);
5122
5123 PMAP_TRACE(2, PMAP_CODE(PMAP__REMOVE) | DBG_FUNC_START,
5124 VM_KERNEL_ADDRHIDE(pmap), VM_KERNEL_ADDRHIDE(start),
5125 VM_KERNEL_ADDRHIDE(end));
5126
5127 #if MACH_ASSERT
5128 if ((start | end) & PAGE_MASK) {
5129 panic("pmap_remove_options() pmap %p start 0x%llx end 0x%llx\n",
5130 pmap, (uint64_t)start, (uint64_t)end);
5131 }
5132 if ((end < start) || (start < pmap->min) || (end > pmap->max)) {
5133 panic("pmap_remove_options(): invalid address range, pmap=%p, start=0x%llx, end=0x%llx\n",
5134 pmap, (uint64_t)start, (uint64_t)end);
5135 }
5136 #endif
5137
5138 /*
5139 * Invalidate the translation buffer first
5140 */
5141 va = start;
5142 while (va < end) {
5143 vm_map_address_t l;
5144
5145 l = ((va + pt_attr_twig_size(pt_attr)) & ~pt_attr_twig_offmask(pt_attr));
5146 if (l > end) {
5147 l = end;
5148 }
5149
5150 remove_count += pmap_remove_options_internal(pmap, va, l, options);
5151
5152 va = l;
5153 }
5154
5155 PMAP_TRACE(2, PMAP_CODE(PMAP__REMOVE) | DBG_FUNC_END);
5156 }
5157
5158
5159 /*
5160 * Remove phys addr if mapped in specified map
5161 */
5162 void
5163 pmap_remove_some_phys(
5164 __unused pmap_t map,
5165 __unused ppnum_t pn)
5166 {
5167 /* Implement to support working set code */
5168 }
5169
5170 void
5171 pmap_set_pmap(
5172 pmap_t pmap,
5173 #if !__ARM_USER_PROTECT__
5174 __unused
5175 #endif
5176 thread_t thread)
5177 {
5178 pmap_switch(pmap);
5179 #if __ARM_USER_PROTECT__
5180 if (pmap->tte_index_max == NTTES) {
5181 thread->machine.uptw_ttc = 2;
5182 } else {
5183 thread->machine.uptw_ttc = 1;
5184 }
5185 thread->machine.uptw_ttb = ((unsigned int) pmap->ttep) | TTBR_SETUP;
5186 thread->machine.asid = pmap->hw_asid;
5187 #endif
5188 }
5189
5190 static void
5191 pmap_flush_core_tlb_asid(pmap_t pmap)
5192 {
5193 #if (__ARM_VMSA__ == 7)
5194 flush_core_tlb_asid(pmap->hw_asid);
5195 #else
5196 flush_core_tlb_asid(((uint64_t) pmap->hw_asid) << TLBI_ASID_SHIFT);
5197 #endif
5198 }
5199
5200 MARK_AS_PMAP_TEXT static void
5201 pmap_switch_internal(
5202 pmap_t pmap)
5203 {
5204 VALIDATE_PMAP(pmap);
5205 pmap_cpu_data_t *cpu_data_ptr = pmap_get_cpu_data();
5206 uint16_t asid_index = pmap->hw_asid;
5207 boolean_t do_asid_flush = FALSE;
5208
5209 #if __ARM_KERNEL_PROTECT__
5210 asid_index >>= 1;
5211 #endif
5212
5213 #if (__ARM_VMSA__ == 7)
5214 assert(not_in_kdp);
5215 pmap_simple_lock(&pmap->tt1_lock);
5216 #else
5217 pmap_t last_nested_pmap = cpu_data_ptr->cpu_nested_pmap;
5218 #endif
5219
5220 #if MAX_ASID > MAX_HW_ASID
5221 if (asid_index > 0) {
5222 asid_index -= 1;
5223 /* Paranoia. */
5224 assert(asid_index < (sizeof(cpu_data_ptr->cpu_asid_high_bits) / sizeof(*cpu_data_ptr->cpu_asid_high_bits)));
5225
5226 /* Extract the "virtual" bits of the ASIDs (which could cause us to alias). */
5227 uint8_t asid_high_bits = pmap->sw_asid;
5228 uint8_t last_asid_high_bits = cpu_data_ptr->cpu_asid_high_bits[asid_index];
5229
5230 if (asid_high_bits != last_asid_high_bits) {
5231 /*
5232 * If the virtual ASID of the new pmap does not match the virtual ASID
5233 * last seen on this CPU for the physical ASID (that was a mouthful),
5234 * then this switch runs the risk of aliasing. We need to flush the
5235 * TLB for this phyiscal ASID in this case.
5236 */
5237 cpu_data_ptr->cpu_asid_high_bits[asid_index] = asid_high_bits;
5238 do_asid_flush = TRUE;
5239 }
5240 }
5241 #endif /* MAX_ASID > MAX_HW_ASID */
5242
5243 pmap_switch_user_ttb_internal(pmap);
5244
5245 #if (__ARM_VMSA__ > 7)
5246 /* If we're switching to a different nested pmap (i.e. shared region), we'll need
5247 * to flush the userspace mappings for that region. Those mappings are global
5248 * and will not be protected by the ASID. It should also be cheaper to flush the
5249 * entire local TLB rather than to do a broadcast MMU flush by VA region. */
5250 if ((pmap != kernel_pmap) && (last_nested_pmap != NULL) && (pmap->nested_pmap != last_nested_pmap)) {
5251 flush_core_tlb();
5252 } else
5253 #endif
5254 if (do_asid_flush) {
5255 pmap_flush_core_tlb_asid(pmap);
5256 #if DEVELOPMENT || DEBUG
5257 os_atomic_inc(&pmap_asid_flushes, relaxed);
5258 #endif
5259 }
5260
5261 #if (__ARM_VMSA__ == 7)
5262 pmap_simple_unlock(&pmap->tt1_lock);
5263 #endif
5264 }
5265
5266 void
5267 pmap_switch(
5268 pmap_t pmap)
5269 {
5270 PMAP_TRACE(1, PMAP_CODE(PMAP__SWITCH) | DBG_FUNC_START, VM_KERNEL_ADDRHIDE(pmap), PMAP_VASID(pmap), pmap->hw_asid);
5271 pmap_switch_internal(pmap);
5272 PMAP_TRACE(1, PMAP_CODE(PMAP__SWITCH) | DBG_FUNC_END);
5273 }
5274
5275 void
5276 pmap_page_protect(
5277 ppnum_t ppnum,
5278 vm_prot_t prot)
5279 {
5280 pmap_page_protect_options(ppnum, prot, 0, NULL);
5281 }
5282
5283 /*
5284 * Routine: pmap_page_protect_options
5285 *
5286 * Function:
5287 * Lower the permission for all mappings to a given
5288 * page.
5289 */
5290 MARK_AS_PMAP_TEXT static void
5291 pmap_page_protect_options_internal(
5292 ppnum_t ppnum,
5293 vm_prot_t prot,
5294 unsigned int options)
5295 {
5296 pmap_paddr_t phys = ptoa(ppnum);
5297 pv_entry_t **pv_h;
5298 pv_entry_t **pve_pp;
5299 pv_entry_t *pve_p;
5300 pv_entry_t *pveh_p;
5301 pv_entry_t *pvet_p;
5302 pt_entry_t *pte_p;
5303 pv_entry_t *new_pve_p;
5304 pt_entry_t *new_pte_p;
5305 vm_offset_t pvh_flags;
5306 int pai;
5307 boolean_t remove;
5308 boolean_t set_NX;
5309 boolean_t tlb_flush_needed = FALSE;
5310 unsigned int pvh_cnt = 0;
5311
5312 assert(ppnum != vm_page_fictitious_addr);
5313
5314 /* Only work with managed pages. */
5315 if (!pa_valid(phys)) {
5316 return;
5317 }
5318
5319 /*
5320 * Determine the new protection.
5321 */
5322 switch (prot) {
5323 case VM_PROT_ALL:
5324 return; /* nothing to do */
5325 case VM_PROT_READ:
5326 case VM_PROT_READ | VM_PROT_EXECUTE:
5327 remove = FALSE;
5328 break;
5329 default:
5330 remove = TRUE;
5331 break;
5332 }
5333
5334 pai = (int)pa_index(phys);
5335 LOCK_PVH(pai);
5336 pv_h = pai_to_pvh(pai);
5337 pvh_flags = pvh_get_flags(pv_h);
5338
5339
5340 pte_p = PT_ENTRY_NULL;
5341 pve_p = PV_ENTRY_NULL;
5342 pve_pp = pv_h;
5343 pveh_p = PV_ENTRY_NULL;
5344 pvet_p = PV_ENTRY_NULL;
5345 new_pve_p = PV_ENTRY_NULL;
5346 new_pte_p = PT_ENTRY_NULL;
5347 if (pvh_test_type(pv_h, PVH_TYPE_PTEP)) {
5348 pte_p = pvh_ptep(pv_h);
5349 } else if (pvh_test_type(pv_h, PVH_TYPE_PVEP)) {
5350 pve_p = pvh_list(pv_h);
5351 pveh_p = pve_p;
5352 }
5353
5354 while ((pve_p != PV_ENTRY_NULL) || (pte_p != PT_ENTRY_NULL)) {
5355 vm_map_address_t va;
5356 pmap_t pmap;
5357 pt_entry_t tmplate;
5358 boolean_t update = FALSE;
5359
5360 if (pve_p != PV_ENTRY_NULL) {
5361 pte_p = pve_get_ptep(pve_p);
5362 }
5363
5364 #ifdef PVH_FLAG_IOMMU
5365 if ((vm_offset_t)pte_p & PVH_FLAG_IOMMU) {
5366 if (remove) {
5367 if (options & PMAP_OPTIONS_COMPRESSOR) {
5368 panic("pmap_page_protect: attempt to compress ppnum 0x%x owned by iommu 0x%llx, pve_p=%p",
5369 ppnum, (uint64_t)pte_p & ~PVH_FLAG_IOMMU, pve_p);
5370 }
5371 if (pve_p != PV_ENTRY_NULL) {
5372 pv_entry_t *temp_pve_p = PVE_NEXT_PTR(pve_next(pve_p));
5373 pvh_remove(pv_h, pve_pp, pve_p);
5374 pveh_p = pvh_list(pv_h);
5375 pve_next(pve_p) = new_pve_p;
5376 new_pve_p = pve_p;
5377 pve_p = temp_pve_p;
5378 continue;
5379 } else {
5380 new_pte_p = pte_p;
5381 break;
5382 }
5383 }
5384 goto protect_skip_pve;
5385 }
5386 #endif
5387 pmap = ptep_get_pmap(pte_p);
5388 va = ptep_get_va(pte_p);
5389
5390 if (pte_p == PT_ENTRY_NULL) {
5391 panic("pmap_page_protect: pmap=%p prot=%d options=%u, pv_h=%p, pveh_p=%p, pve_p=%p, va=0x%llx ppnum: 0x%x\n",
5392 pmap, prot, options, pv_h, pveh_p, pve_p, (uint64_t)va, ppnum);
5393 } else if ((pmap == NULL) || (atop(pte_to_pa(*pte_p)) != ppnum)) {
5394 #if MACH_ASSERT
5395 if (kern_feature_override(KF_PMAPV_OVRD) == FALSE) {
5396 pv_entry_t *check_pve_p = pveh_p;
5397 while (check_pve_p != PV_ENTRY_NULL) {
5398 if ((check_pve_p != pve_p) && (pve_get_ptep(check_pve_p) == pte_p)) {
5399 panic("pmap_page_protect: duplicate pve entry pte_p=%p pmap=%p prot=%d options=%u, pv_h=%p, pveh_p=%p, pve_p=%p, pte=0x%llx, va=0x%llx ppnum: 0x%x\n",
5400 pte_p, pmap, prot, options, pv_h, pveh_p, pve_p, (uint64_t)*pte_p, (uint64_t)va, ppnum);
5401 }
5402 check_pve_p = PVE_NEXT_PTR(pve_next(check_pve_p));
5403 }
5404 }
5405 #endif
5406 panic("pmap_page_protect: bad pve entry pte_p=%p pmap=%p prot=%d options=%u, pv_h=%p, pveh_p=%p, pve_p=%p, pte=0x%llx, va=0x%llx ppnum: 0x%x\n",
5407 pte_p, pmap, prot, options, pv_h, pveh_p, pve_p, (uint64_t)*pte_p, (uint64_t)va, ppnum);
5408 }
5409
5410 #if DEVELOPMENT || DEBUG
5411 if ((prot & VM_PROT_EXECUTE) || !nx_enabled || !pmap->nx_enabled)
5412 #else
5413 if ((prot & VM_PROT_EXECUTE))
5414 #endif
5415 { set_NX = FALSE;} else {
5416 set_NX = TRUE;
5417 }
5418
5419 /* Remove the mapping if new protection is NONE */
5420 if (remove) {
5421 boolean_t is_altacct = FALSE;
5422
5423 if (IS_ALTACCT_PAGE(pai, pve_p)) {
5424 is_altacct = TRUE;
5425 } else {
5426 is_altacct = FALSE;
5427 }
5428
5429 if (pte_is_wired(*pte_p)) {
5430 pte_set_wired(pte_p, 0);
5431 if (pmap != kernel_pmap) {
5432 pmap_ledger_debit(pmap, task_ledgers.wired_mem, PAGE_SIZE);
5433 OSAddAtomic(-1, (SInt32 *) &pmap->stats.wired_count);
5434 }
5435 }
5436
5437 if (*pte_p != ARM_PTE_TYPE_FAULT &&
5438 pmap != kernel_pmap &&
5439 (options & PMAP_OPTIONS_COMPRESSOR) &&
5440 IS_INTERNAL_PAGE(pai)) {
5441 assert(!ARM_PTE_IS_COMPRESSED(*pte_p, pte_p));
5442 /* mark this PTE as having been "compressed" */
5443 tmplate = ARM_PTE_COMPRESSED;
5444 if (is_altacct) {
5445 tmplate |= ARM_PTE_COMPRESSED_ALT;
5446 is_altacct = TRUE;
5447 }
5448 } else {
5449 tmplate = ARM_PTE_TYPE_FAULT;
5450 }
5451
5452 if ((*pte_p != ARM_PTE_TYPE_FAULT) &&
5453 tmplate == ARM_PTE_TYPE_FAULT &&
5454 (pmap != kernel_pmap)) {
5455 if (OSAddAtomic16(-1, (SInt16 *) &(ptep_get_ptd(pte_p)->ptd_info[ARM_PT_DESC_INDEX(pte_p)].refcnt)) <= 0) {
5456 panic("pmap_page_protect_options(): over-release of ptdp %p for pte %p\n", ptep_get_ptd(pte_p), pte_p);
5457 }
5458 }
5459
5460 if (*pte_p != tmplate) {
5461 WRITE_PTE_STRONG(pte_p, tmplate);
5462 update = TRUE;
5463 }
5464 pvh_cnt++;
5465 pmap_ledger_debit(pmap, task_ledgers.phys_mem, PAGE_SIZE);
5466 OSAddAtomic(-1, (SInt32 *) &pmap->stats.resident_count);
5467
5468 #if MACH_ASSERT
5469 /*
5470 * We only ever compress internal pages.
5471 */
5472 if (options & PMAP_OPTIONS_COMPRESSOR) {
5473 assert(IS_INTERNAL_PAGE(pai));
5474 }
5475 #endif
5476
5477 if (pmap != kernel_pmap) {
5478 if (IS_REUSABLE_PAGE(pai) &&
5479 IS_INTERNAL_PAGE(pai) &&
5480 !is_altacct) {
5481 PMAP_STATS_ASSERTF(pmap->stats.reusable > 0, pmap, "stats.reusable %d", pmap->stats.reusable);
5482 OSAddAtomic(-1, &pmap->stats.reusable);
5483 } else if (IS_INTERNAL_PAGE(pai)) {
5484 PMAP_STATS_ASSERTF(pmap->stats.internal > 0, pmap, "stats.internal %d", pmap->stats.internal);
5485 OSAddAtomic(-1, &pmap->stats.internal);
5486 } else {
5487 PMAP_STATS_ASSERTF(pmap->stats.external > 0, pmap, "stats.external %d", pmap->stats.external);
5488 OSAddAtomic(-1, &pmap->stats.external);
5489 }
5490 if ((options & PMAP_OPTIONS_COMPRESSOR) &&
5491 IS_INTERNAL_PAGE(pai)) {
5492 /* adjust "compressed" stats */
5493 OSAddAtomic64(+1, &pmap->stats.compressed);
5494 PMAP_STATS_PEAK(pmap->stats.compressed);
5495 pmap->stats.compressed_lifetime++;
5496 }
5497
5498 if (IS_ALTACCT_PAGE(pai, pve_p)) {
5499 assert(IS_INTERNAL_PAGE(pai));
5500 pmap_ledger_debit(pmap, task_ledgers.internal, PAGE_SIZE);
5501 pmap_ledger_debit(pmap, task_ledgers.alternate_accounting, PAGE_SIZE);
5502 if (options & PMAP_OPTIONS_COMPRESSOR) {
5503 pmap_ledger_credit(pmap, task_ledgers.internal_compressed, PAGE_SIZE);
5504 pmap_ledger_credit(pmap, task_ledgers.alternate_accounting_compressed, PAGE_SIZE);
5505 }
5506
5507 /*
5508 * Cleanup our marker before
5509 * we free this pv_entry.
5510 */
5511 CLR_ALTACCT_PAGE(pai, pve_p);
5512 } else if (IS_REUSABLE_PAGE(pai)) {
5513 assert(IS_INTERNAL_PAGE(pai));
5514 if (options & PMAP_OPTIONS_COMPRESSOR) {
5515 pmap_ledger_credit(pmap, task_ledgers.internal_compressed, PAGE_SIZE);
5516 /* was not in footprint, but is now */
5517 pmap_ledger_credit(pmap, task_ledgers.phys_footprint, PAGE_SIZE);
5518 }
5519 } else if (IS_INTERNAL_PAGE(pai)) {
5520 pmap_ledger_debit(pmap, task_ledgers.internal, PAGE_SIZE);
5521
5522 /*
5523 * Update all stats related to physical footprint, which only
5524 * deals with internal pages.
5525 */
5526 if (options & PMAP_OPTIONS_COMPRESSOR) {
5527 /*
5528 * This removal is only being done so we can send this page to
5529 * the compressor; therefore it mustn't affect total task footprint.
5530 */
5531 pmap_ledger_credit(pmap, task_ledgers.internal_compressed, PAGE_SIZE);
5532 } else {
5533 /*
5534 * This internal page isn't going to the compressor, so adjust stats to keep
5535 * phys_footprint up to date.
5536 */
5537 pmap_ledger_debit(pmap, task_ledgers.phys_footprint, PAGE_SIZE);
5538 }
5539 } else {
5540 /* external page: no impact on ledgers */
5541 }
5542 }
5543
5544 if (pve_p != PV_ENTRY_NULL) {
5545 assert(pve_next(pve_p) == PVE_NEXT_PTR(pve_next(pve_p)));
5546 }
5547 } else {
5548 pt_entry_t spte;
5549 const pt_attr_t *const pt_attr = pmap_get_pt_attr(pmap);
5550
5551 spte = *pte_p;
5552
5553 if (pmap == kernel_pmap) {
5554 tmplate = ((spte & ~ARM_PTE_APMASK) | ARM_PTE_AP(AP_RONA));
5555 } else {
5556 tmplate = ((spte & ~ARM_PTE_APMASK) | pt_attr_leaf_ro(pt_attr));
5557 }
5558
5559 pte_set_was_writeable(tmplate, false);
5560 /*
5561 * While the naive implementation of this would serve to add execute
5562 * permission, this is not how the VM uses this interface, or how
5563 * x86_64 implements it. So ignore requests to add execute permissions.
5564 */
5565 if (set_NX) {
5566 tmplate |= pt_attr_leaf_xn(pt_attr);
5567 }
5568
5569
5570 if (*pte_p != ARM_PTE_TYPE_FAULT &&
5571 !ARM_PTE_IS_COMPRESSED(*pte_p, pte_p) &&
5572 *pte_p != tmplate) {
5573 WRITE_PTE_STRONG(pte_p, tmplate);
5574 update = TRUE;
5575 }
5576 }
5577
5578 /* Invalidate TLBs for all CPUs using it */
5579 if (update) {
5580 tlb_flush_needed = TRUE;
5581 pmap_get_pt_ops(pmap)->flush_tlb_region_async(va, PAGE_SIZE, pmap);
5582 }
5583
5584 #ifdef PVH_FLAG_IOMMU
5585 protect_skip_pve:
5586 #endif
5587 pte_p = PT_ENTRY_NULL;
5588 pvet_p = pve_p;
5589 if (pve_p != PV_ENTRY_NULL) {
5590 if (remove) {
5591 assert(pve_next(pve_p) == PVE_NEXT_PTR(pve_next(pve_p)));
5592 }
5593 pve_pp = pve_link_field(pve_p);
5594 pve_p = PVE_NEXT_PTR(pve_next(pve_p));
5595 }
5596 }
5597
5598 #ifdef PVH_FLAG_EXEC
5599 if (remove && (pvh_get_flags(pv_h) & PVH_FLAG_EXEC)) {
5600 pmap_set_ptov_ap(pai, AP_RWNA, tlb_flush_needed);
5601 }
5602 #endif
5603 if (tlb_flush_needed) {
5604 sync_tlb_flush();
5605 }
5606
5607 /* if we removed a bunch of entries, take care of them now */
5608 if (remove) {
5609 if (new_pve_p != PV_ENTRY_NULL) {
5610 pvh_update_head(pv_h, new_pve_p, PVH_TYPE_PVEP);
5611 pvh_set_flags(pv_h, pvh_flags);
5612 } else if (new_pte_p != PT_ENTRY_NULL) {
5613 pvh_update_head(pv_h, new_pte_p, PVH_TYPE_PTEP);
5614 pvh_set_flags(pv_h, pvh_flags);
5615 } else {
5616 pvh_update_head(pv_h, PV_ENTRY_NULL, PVH_TYPE_NULL);
5617 }
5618 }
5619
5620 UNLOCK_PVH(pai);
5621
5622 if (remove && (pvet_p != PV_ENTRY_NULL)) {
5623 pv_list_free(pveh_p, pvet_p, pvh_cnt);
5624 }
5625 }
5626
5627 void
5628 pmap_page_protect_options(
5629 ppnum_t ppnum,
5630 vm_prot_t prot,
5631 unsigned int options,
5632 __unused void *arg)
5633 {
5634 pmap_paddr_t phys = ptoa(ppnum);
5635
5636 assert(ppnum != vm_page_fictitious_addr);
5637
5638 /* Only work with managed pages. */
5639 if (!pa_valid(phys)) {
5640 return;
5641 }
5642
5643 /*
5644 * Determine the new protection.
5645 */
5646 if (prot == VM_PROT_ALL) {
5647 return; /* nothing to do */
5648 }
5649
5650 PMAP_TRACE(2, PMAP_CODE(PMAP__PAGE_PROTECT) | DBG_FUNC_START, ppnum, prot);
5651
5652 pmap_page_protect_options_internal(ppnum, prot, options);
5653
5654 PMAP_TRACE(2, PMAP_CODE(PMAP__PAGE_PROTECT) | DBG_FUNC_END);
5655 }
5656
5657 /*
5658 * Indicates if the pmap layer enforces some additional restrictions on the
5659 * given set of protections.
5660 */
5661 bool
5662 pmap_has_prot_policy(__unused vm_prot_t prot)
5663 {
5664 return FALSE;
5665 }
5666
5667 /*
5668 * Set the physical protection on the
5669 * specified range of this map as requested.
5670 * VERY IMPORTANT: Will not increase permissions.
5671 * VERY IMPORTANT: Only pmap_enter() is allowed to grant permissions.
5672 */
5673 void
5674 pmap_protect(
5675 pmap_t pmap,
5676 vm_map_address_t b,
5677 vm_map_address_t e,
5678 vm_prot_t prot)
5679 {
5680 pmap_protect_options(pmap, b, e, prot, 0, NULL);
5681 }
5682
5683 MARK_AS_PMAP_TEXT static void
5684 pmap_protect_options_internal(
5685 pmap_t pmap,
5686 vm_map_address_t start,
5687 vm_map_address_t end,
5688 vm_prot_t prot,
5689 unsigned int options,
5690 __unused void *args)
5691 {
5692 const pt_attr_t *const pt_attr = pmap_get_pt_attr(pmap);
5693 tt_entry_t *tte_p;
5694 pt_entry_t *bpte_p, *epte_p;
5695 pt_entry_t *pte_p;
5696 boolean_t set_NX = TRUE;
5697 #if (__ARM_VMSA__ > 7)
5698 boolean_t set_XO = FALSE;
5699 #endif
5700 boolean_t should_have_removed = FALSE;
5701 bool need_strong_sync = false;
5702
5703 if (__improbable(end < start)) {
5704 panic("%s called with bogus range: %p, %p", __func__, (void*)start, (void*)end);
5705 }
5706
5707 #if DEVELOPMENT || DEBUG
5708 if (options & PMAP_OPTIONS_PROTECT_IMMEDIATE) {
5709 if ((prot & VM_PROT_ALL) == VM_PROT_NONE) {
5710 should_have_removed = TRUE;
5711 }
5712 } else
5713 #endif
5714 {
5715 /* Determine the new protection. */
5716 switch (prot) {
5717 #if (__ARM_VMSA__ > 7)
5718 case VM_PROT_EXECUTE:
5719 set_XO = TRUE;
5720 /* fall through */
5721 #endif
5722 case VM_PROT_READ:
5723 case VM_PROT_READ | VM_PROT_EXECUTE:
5724 break;
5725 case VM_PROT_READ | VM_PROT_WRITE:
5726 case VM_PROT_ALL:
5727 return; /* nothing to do */
5728 default:
5729 should_have_removed = TRUE;
5730 }
5731 }
5732
5733 if (should_have_removed) {
5734 panic("%s: should have been a remove operation, "
5735 "pmap=%p, start=%p, end=%p, prot=%#x, options=%#x, args=%p",
5736 __FUNCTION__,
5737 pmap, (void *)start, (void *)end, prot, options, args);
5738 }
5739
5740 #if DEVELOPMENT || DEBUG
5741 if ((prot & VM_PROT_EXECUTE) || !nx_enabled || !pmap->nx_enabled)
5742 #else
5743 if ((prot & VM_PROT_EXECUTE))
5744 #endif
5745 {
5746 set_NX = FALSE;
5747 } else {
5748 set_NX = TRUE;
5749 }
5750
5751 VALIDATE_PMAP(pmap);
5752 PMAP_LOCK(pmap);
5753 tte_p = pmap_tte(pmap, start);
5754
5755 if ((tte_p != (tt_entry_t *) NULL) && (*tte_p & ARM_TTE_TYPE_MASK) == ARM_TTE_TYPE_TABLE) {
5756 bpte_p = (pt_entry_t *) ttetokv(*tte_p);
5757 bpte_p = &bpte_p[ptenum(start)];
5758 epte_p = bpte_p + arm_atop(end - start);
5759 pte_p = bpte_p;
5760
5761 for (pte_p = bpte_p;
5762 pte_p < epte_p;
5763 pte_p += PAGE_SIZE / ARM_PGBYTES) {
5764 pt_entry_t spte;
5765 #if DEVELOPMENT || DEBUG
5766 boolean_t force_write = FALSE;
5767 #endif
5768
5769 spte = *pte_p;
5770
5771 if ((spte == ARM_PTE_TYPE_FAULT) ||
5772 ARM_PTE_IS_COMPRESSED(spte, pte_p)) {
5773 continue;
5774 }
5775
5776 pmap_paddr_t pa;
5777 int pai = 0;
5778 boolean_t managed = FALSE;
5779
5780 while (!managed) {
5781 /*
5782 * It may be possible for the pte to transition from managed
5783 * to unmanaged in this timeframe; for now, elide the assert.
5784 * We should break out as a consequence of checking pa_valid.
5785 */
5786 // assert(!ARM_PTE_IS_COMPRESSED(spte));
5787 pa = pte_to_pa(spte);
5788 if (!pa_valid(pa)) {
5789 break;
5790 }
5791 pai = (int)pa_index(pa);
5792 LOCK_PVH(pai);
5793 spte = *pte_p;
5794 pa = pte_to_pa(spte);
5795 if (pai == (int)pa_index(pa)) {
5796 managed = TRUE;
5797 break; // Leave the PVH locked as we will unlock it after we free the PTE
5798 }
5799 UNLOCK_PVH(pai);
5800 }
5801
5802 if ((spte == ARM_PTE_TYPE_FAULT) ||
5803 ARM_PTE_IS_COMPRESSED(spte, pte_p)) {
5804 continue;
5805 }
5806
5807 pt_entry_t tmplate;
5808
5809 if (pmap == kernel_pmap) {
5810 #if DEVELOPMENT || DEBUG
5811 if ((options & PMAP_OPTIONS_PROTECT_IMMEDIATE) && (prot & VM_PROT_WRITE)) {
5812 force_write = TRUE;
5813 tmplate = ((spte & ~ARM_PTE_APMASK) | ARM_PTE_AP(AP_RWNA));
5814 } else
5815 #endif
5816 {
5817 tmplate = ((spte & ~ARM_PTE_APMASK) | ARM_PTE_AP(AP_RONA));
5818 }
5819 } else {
5820 #if DEVELOPMENT || DEBUG
5821 if ((options & PMAP_OPTIONS_PROTECT_IMMEDIATE) && (prot & VM_PROT_WRITE)) {
5822 force_write = TRUE;
5823 tmplate = ((spte & ~ARM_PTE_APMASK) | pt_attr_leaf_rw(pt_attr));
5824 } else
5825 #endif
5826 {
5827 tmplate = ((spte & ~ARM_PTE_APMASK) | pt_attr_leaf_ro(pt_attr));
5828 }
5829 }
5830
5831 /*
5832 * XXX Removing "NX" would
5833 * grant "execute" access
5834 * immediately, bypassing any
5835 * checks VM might want to do
5836 * in its soft fault path.
5837 * pmap_protect() and co. are
5838 * not allowed to increase
5839 * access permissions.
5840 */
5841 if (set_NX) {
5842 tmplate |= pt_attr_leaf_xn(pt_attr);
5843 } else {
5844 #if (__ARM_VMSA__ > 7)
5845 if (pmap == kernel_pmap) {
5846 /* do NOT clear "PNX"! */
5847 tmplate |= ARM_PTE_NX;
5848 } else {
5849 /* do NOT clear "NX"! */
5850 tmplate |= pt_attr_leaf_x(pt_attr);
5851 if (set_XO) {
5852 tmplate &= ~ARM_PTE_APMASK;
5853 tmplate |= pt_attr_leaf_rona(pt_attr);
5854 }
5855 }
5856 #endif
5857 }
5858
5859 #if DEVELOPMENT || DEBUG
5860 if (force_write) {
5861 /*
5862 * TODO: Run CS/Monitor checks here.
5863 */
5864 if (managed) {
5865 /*
5866 * We are marking the page as writable,
5867 * so we consider it to be modified and
5868 * referenced.
5869 */
5870 pa_set_bits(pa, PP_ATTR_REFERENCED | PP_ATTR_MODIFIED);
5871 tmplate |= ARM_PTE_AF;
5872
5873 if (IS_REFFAULT_PAGE(pai)) {
5874 CLR_REFFAULT_PAGE(pai);
5875 }
5876
5877 if (IS_MODFAULT_PAGE(pai)) {
5878 CLR_MODFAULT_PAGE(pai);
5879 }
5880 }
5881 } else if (options & PMAP_OPTIONS_PROTECT_IMMEDIATE) {
5882 /*
5883 * An immediate request for anything other than
5884 * write should still mark the page as
5885 * referenced if managed.
5886 */
5887 if (managed) {
5888 pa_set_bits(pa, PP_ATTR_REFERENCED);
5889 tmplate |= ARM_PTE_AF;
5890
5891 if (IS_REFFAULT_PAGE(pai)) {
5892 CLR_REFFAULT_PAGE(pai);
5893 }
5894 }
5895 }
5896 #endif
5897
5898 /* We do not expect to write fast fault the entry. */
5899 pte_set_was_writeable(tmplate, false);
5900
5901 WRITE_PTE_FAST(pte_p, tmplate);
5902
5903 if (managed) {
5904 ASSERT_PVH_LOCKED(pai);
5905 UNLOCK_PVH(pai);
5906 }
5907 }
5908 FLUSH_PTE_RANGE_STRONG(bpte_p, epte_p);
5909 PMAP_UPDATE_TLBS(pmap, start, end, need_strong_sync);
5910 }
5911
5912 PMAP_UNLOCK(pmap);
5913 }
5914
5915 void
5916 pmap_protect_options(
5917 pmap_t pmap,
5918 vm_map_address_t b,
5919 vm_map_address_t e,
5920 vm_prot_t prot,
5921 unsigned int options,
5922 __unused void *args)
5923 {
5924 vm_map_address_t l, beg;
5925
5926 __unused const pt_attr_t * const pt_attr = pmap_get_pt_attr(pmap);
5927
5928 if ((b | e) & PAGE_MASK) {
5929 panic("pmap_protect_options() pmap %p start 0x%llx end 0x%llx\n",
5930 pmap, (uint64_t)b, (uint64_t)e);
5931 }
5932
5933 #if DEVELOPMENT || DEBUG
5934 if (options & PMAP_OPTIONS_PROTECT_IMMEDIATE) {
5935 if ((prot & VM_PROT_ALL) == VM_PROT_NONE) {
5936 pmap_remove_options(pmap, b, e, options);
5937 return;
5938 }
5939 } else
5940 #endif
5941 {
5942 /* Determine the new protection. */
5943 switch (prot) {
5944 case VM_PROT_EXECUTE:
5945 case VM_PROT_READ:
5946 case VM_PROT_READ | VM_PROT_EXECUTE:
5947 break;
5948 case VM_PROT_READ | VM_PROT_WRITE:
5949 case VM_PROT_ALL:
5950 return; /* nothing to do */
5951 default:
5952 pmap_remove_options(pmap, b, e, options);
5953 return;
5954 }
5955 }
5956
5957 PMAP_TRACE(2, PMAP_CODE(PMAP__PROTECT) | DBG_FUNC_START,
5958 VM_KERNEL_ADDRHIDE(pmap), VM_KERNEL_ADDRHIDE(b),
5959 VM_KERNEL_ADDRHIDE(e));
5960
5961 beg = b;
5962
5963 while (beg < e) {
5964 l = ((beg + pt_attr_twig_size(pt_attr)) & ~pt_attr_twig_offmask(pt_attr));
5965
5966 if (l > e) {
5967 l = e;
5968 }
5969
5970 pmap_protect_options_internal(pmap, beg, l, prot, options, args);
5971
5972 beg = l;
5973 }
5974
5975 PMAP_TRACE(2, PMAP_CODE(PMAP__PROTECT) | DBG_FUNC_END);
5976 }
5977
5978 /* Map a (possibly) autogenned block */
5979 kern_return_t
5980 pmap_map_block(
5981 pmap_t pmap,
5982 addr64_t va,
5983 ppnum_t pa,
5984 uint32_t size,
5985 vm_prot_t prot,
5986 int attr,
5987 __unused unsigned int flags)
5988 {
5989 kern_return_t kr;
5990 addr64_t original_va = va;
5991 uint32_t page;
5992
5993 for (page = 0; page < size; page++) {
5994 kr = pmap_enter(pmap, va, pa, prot, VM_PROT_NONE, attr, TRUE);
5995
5996 if (kr != KERN_SUCCESS) {
5997 /*
5998 * This will panic for now, as it is unclear that
5999 * removing the mappings is correct.
6000 */
6001 panic("%s: failed pmap_enter, "
6002 "pmap=%p, va=%#llx, pa=%u, size=%u, prot=%#x, flags=%#x",
6003 __FUNCTION__,
6004 pmap, va, pa, size, prot, flags);
6005
6006 pmap_remove(pmap, original_va, va - original_va);
6007 return kr;
6008 }
6009
6010 va += PAGE_SIZE;
6011 pa++;
6012 }
6013
6014 return KERN_SUCCESS;
6015 }
6016
6017 /*
6018 * Insert the given physical page (p) at
6019 * the specified virtual address (v) in the
6020 * target physical map with the protection requested.
6021 *
6022 * If specified, the page will be wired down, meaning
6023 * that the related pte can not be reclaimed.
6024 *
6025 * NB: This is the only routine which MAY NOT lazy-evaluate
6026 * or lose information. That is, this routine must actually
6027 * insert this page into the given map eventually (must make
6028 * forward progress eventually.
6029 */
6030 kern_return_t
6031 pmap_enter(
6032 pmap_t pmap,
6033 vm_map_address_t v,
6034 ppnum_t pn,
6035 vm_prot_t prot,
6036 vm_prot_t fault_type,
6037 unsigned int flags,
6038 boolean_t wired)
6039 {
6040 return pmap_enter_options(pmap, v, pn, prot, fault_type, flags, wired, 0, NULL);
6041 }
6042
6043
6044 static inline void
6045 pmap_enter_pte(pmap_t pmap, pt_entry_t *pte_p, pt_entry_t pte, vm_map_address_t v)
6046 {
6047 if (pmap != kernel_pmap && ((pte & ARM_PTE_WIRED) != (*pte_p & ARM_PTE_WIRED))) {
6048 SInt16 *ptd_wiredcnt_ptr = (SInt16 *)&(ptep_get_ptd(pte_p)->ptd_info[ARM_PT_DESC_INDEX(pte_p)].wiredcnt);
6049 if (pte & ARM_PTE_WIRED) {
6050 OSAddAtomic16(1, ptd_wiredcnt_ptr);
6051 pmap_ledger_credit(pmap, task_ledgers.wired_mem, PAGE_SIZE);
6052 OSAddAtomic(1, (SInt32 *) &pmap->stats.wired_count);
6053 } else {
6054 OSAddAtomic16(-1, ptd_wiredcnt_ptr);
6055 pmap_ledger_debit(pmap, task_ledgers.wired_mem, PAGE_SIZE);
6056 OSAddAtomic(-1, (SInt32 *) &pmap->stats.wired_count);
6057 }
6058 }
6059 if (*pte_p != ARM_PTE_TYPE_FAULT &&
6060 !ARM_PTE_IS_COMPRESSED(*pte_p, pte_p)) {
6061 WRITE_PTE_STRONG(pte_p, pte);
6062 PMAP_UPDATE_TLBS(pmap, v, v + PAGE_SIZE, false);
6063 } else {
6064 WRITE_PTE(pte_p, pte);
6065 __builtin_arm_isb(ISB_SY);
6066 }
6067
6068 PMAP_TRACE(3, PMAP_CODE(PMAP__TTE), VM_KERNEL_ADDRHIDE(pmap), VM_KERNEL_ADDRHIDE(v), VM_KERNEL_ADDRHIDE(v + PAGE_SIZE), pte);
6069 }
6070
6071 MARK_AS_PMAP_TEXT static pt_entry_t
6072 wimg_to_pte(unsigned int wimg)
6073 {
6074 pt_entry_t pte;
6075
6076 switch (wimg & (VM_WIMG_MASK)) {
6077 case VM_WIMG_IO:
6078 case VM_WIMG_RT:
6079 pte = ARM_PTE_ATTRINDX(CACHE_ATTRINDX_DISABLE);
6080 pte |= ARM_PTE_NX | ARM_PTE_PNX;
6081 break;
6082 case VM_WIMG_POSTED:
6083 pte = ARM_PTE_ATTRINDX(CACHE_ATTRINDX_POSTED);
6084 pte |= ARM_PTE_NX | ARM_PTE_PNX;
6085 break;
6086 case VM_WIMG_POSTED_REORDERED:
6087 pte = ARM_PTE_ATTRINDX(CACHE_ATTRINDX_POSTED_REORDERED);
6088 pte |= ARM_PTE_NX | ARM_PTE_PNX;
6089 break;
6090 case VM_WIMG_POSTED_COMBINED_REORDERED:
6091 pte = ARM_PTE_ATTRINDX(CACHE_ATTRINDX_POSTED_COMBINED_REORDERED);
6092 pte |= ARM_PTE_NX | ARM_PTE_PNX;
6093 break;
6094 case VM_WIMG_WCOMB:
6095 pte = ARM_PTE_ATTRINDX(CACHE_ATTRINDX_WRITECOMB);
6096 pte |= ARM_PTE_NX | ARM_PTE_PNX;
6097 break;
6098 case VM_WIMG_WTHRU:
6099 pte = ARM_PTE_ATTRINDX(CACHE_ATTRINDX_WRITETHRU);
6100 #if (__ARM_VMSA__ > 7)
6101 pte |= ARM_PTE_SH(SH_OUTER_MEMORY);
6102 #else
6103 pte |= ARM_PTE_SH;
6104 #endif
6105 break;
6106 case VM_WIMG_COPYBACK:
6107 pte = ARM_PTE_ATTRINDX(CACHE_ATTRINDX_WRITEBACK);
6108 #if (__ARM_VMSA__ > 7)
6109 pte |= ARM_PTE_SH(SH_OUTER_MEMORY);
6110 #else
6111 pte |= ARM_PTE_SH;
6112 #endif
6113 break;
6114 case VM_WIMG_INNERWBACK:
6115 pte = ARM_PTE_ATTRINDX(CACHE_ATTRINDX_INNERWRITEBACK);
6116 #if (__ARM_VMSA__ > 7)
6117 pte |= ARM_PTE_SH(SH_INNER_MEMORY);
6118 #else
6119 pte |= ARM_PTE_SH;
6120 #endif
6121 break;
6122 default:
6123 pte = ARM_PTE_ATTRINDX(CACHE_ATTRINDX_DEFAULT);
6124 #if (__ARM_VMSA__ > 7)
6125 pte |= ARM_PTE_SH(SH_OUTER_MEMORY);
6126 #else
6127 pte |= ARM_PTE_SH;
6128 #endif
6129 }
6130
6131 return pte;
6132 }
6133
6134 static boolean_t
6135 pmap_enter_pv(
6136 pmap_t pmap,
6137 pt_entry_t *pte_p,
6138 int pai,
6139 unsigned int options,
6140 pv_entry_t **pve_p,
6141 boolean_t *is_altacct)
6142 {
6143 pv_entry_t **pv_h;
6144 pv_h = pai_to_pvh(pai);
6145 boolean_t first_cpu_mapping;
6146
6147 ASSERT_PVH_LOCKED(pai);
6148
6149 vm_offset_t pvh_flags = pvh_get_flags(pv_h);
6150
6151
6152 #ifdef PVH_FLAG_CPU
6153 /* An IOMMU mapping may already be present for a page that hasn't yet
6154 * had a CPU mapping established, so we use PVH_FLAG_CPU to determine
6155 * if this is the first CPU mapping. We base internal/reusable
6156 * accounting on the options specified for the first CPU mapping.
6157 * PVH_FLAG_CPU, and thus this accounting, will then persist as long
6158 * as there are *any* mappings of the page. The accounting for a
6159 * page should not need to change until the page is recycled by the
6160 * VM layer, and we assert that there are no mappings when a page
6161 * is recycled. An IOMMU mapping of a freed/recycled page is
6162 * considered a security violation & potential DMA corruption path.*/
6163 first_cpu_mapping = ((pmap != NULL) && !(pvh_flags & PVH_FLAG_CPU));
6164 if (first_cpu_mapping) {
6165 pvh_flags |= PVH_FLAG_CPU;
6166 }
6167 #else
6168 first_cpu_mapping = pvh_test_type(pv_h, PVH_TYPE_NULL);
6169 #endif
6170
6171 if (first_cpu_mapping) {
6172 if (options & PMAP_OPTIONS_INTERNAL) {
6173 SET_INTERNAL_PAGE(pai);
6174 } else {
6175 CLR_INTERNAL_PAGE(pai);
6176 }
6177 if ((options & PMAP_OPTIONS_INTERNAL) &&
6178 (options & PMAP_OPTIONS_REUSABLE)) {
6179 SET_REUSABLE_PAGE(pai);
6180 } else {
6181 CLR_REUSABLE_PAGE(pai);
6182 }
6183 }
6184 if (pvh_test_type(pv_h, PVH_TYPE_NULL)) {
6185 pvh_update_head(pv_h, pte_p, PVH_TYPE_PTEP);
6186 if (pmap != NULL && pmap != kernel_pmap &&
6187 ((options & PMAP_OPTIONS_ALT_ACCT) ||
6188 PMAP_FOOTPRINT_SUSPENDED(pmap)) &&
6189 IS_INTERNAL_PAGE(pai)) {
6190 /*
6191 * Make a note to ourselves that this mapping is using alternative
6192 * accounting. We'll need this in order to know which ledger to
6193 * debit when the mapping is removed.
6194 *
6195 * The altacct bit must be set while the pv head is locked. Defer
6196 * the ledger accounting until after we've dropped the lock.
6197 */
6198 SET_ALTACCT_PAGE(pai, PV_ENTRY_NULL);
6199 *is_altacct = TRUE;
6200 } else {
6201 CLR_ALTACCT_PAGE(pai, PV_ENTRY_NULL);
6202 }
6203 } else {
6204 if (pvh_test_type(pv_h, PVH_TYPE_PTEP)) {
6205 pt_entry_t *pte1_p;
6206
6207 /*
6208 * convert pvh list from PVH_TYPE_PTEP to PVH_TYPE_PVEP
6209 */
6210 pte1_p = pvh_ptep(pv_h);
6211 pvh_set_flags(pv_h, pvh_flags);
6212 if ((*pve_p == PV_ENTRY_NULL) && (!pv_alloc(pmap, pai, pve_p))) {
6213 return FALSE;
6214 }
6215
6216 pve_set_ptep(*pve_p, pte1_p);
6217 (*pve_p)->pve_next = PV_ENTRY_NULL;
6218
6219 if (IS_ALTACCT_PAGE(pai, PV_ENTRY_NULL)) {
6220 /*
6221 * transfer "altacct" from
6222 * pp_attr to this pve
6223 */
6224 CLR_ALTACCT_PAGE(pai, PV_ENTRY_NULL);
6225 SET_ALTACCT_PAGE(pai, *pve_p);
6226 }
6227 pvh_update_head(pv_h, *pve_p, PVH_TYPE_PVEP);
6228 *pve_p = PV_ENTRY_NULL;
6229 } else if (!pvh_test_type(pv_h, PVH_TYPE_PVEP)) {
6230 panic("%s: unexpected PV head %p, pte_p=%p pmap=%p pv_h=%p",
6231 __func__, *pv_h, pte_p, pmap, pv_h);
6232 }
6233 /*
6234 * Set up pv_entry for this new mapping and then
6235 * add it to the list for this physical page.
6236 */
6237 pvh_set_flags(pv_h, pvh_flags);
6238 if ((*pve_p == PV_ENTRY_NULL) && (!pv_alloc(pmap, pai, pve_p))) {
6239 return FALSE;
6240 }
6241
6242 pve_set_ptep(*pve_p, pte_p);
6243 (*pve_p)->pve_next = PV_ENTRY_NULL;
6244
6245 pvh_add(pv_h, *pve_p);
6246
6247 if (pmap != NULL && pmap != kernel_pmap &&
6248 ((options & PMAP_OPTIONS_ALT_ACCT) ||
6249 PMAP_FOOTPRINT_SUSPENDED(pmap)) &&
6250 IS_INTERNAL_PAGE(pai)) {
6251 /*
6252 * Make a note to ourselves that this
6253 * mapping is using alternative
6254 * accounting. We'll need this in order
6255 * to know which ledger to debit when
6256 * the mapping is removed.
6257 *
6258 * The altacct bit must be set while
6259 * the pv head is locked. Defer the
6260 * ledger accounting until after we've
6261 * dropped the lock.
6262 */
6263 SET_ALTACCT_PAGE(pai, *pve_p);
6264 *is_altacct = TRUE;
6265 }
6266
6267 *pve_p = PV_ENTRY_NULL;
6268 }
6269
6270 pvh_set_flags(pv_h, pvh_flags);
6271
6272 return TRUE;
6273 }
6274
6275 MARK_AS_PMAP_TEXT static kern_return_t
6276 pmap_enter_options_internal(
6277 pmap_t pmap,
6278 vm_map_address_t v,
6279 ppnum_t pn,
6280 vm_prot_t prot,
6281 vm_prot_t fault_type,
6282 unsigned int flags,
6283 boolean_t wired,
6284 unsigned int options)
6285 {
6286 pmap_paddr_t pa = ptoa(pn);
6287 pt_entry_t pte;
6288 pt_entry_t spte;
6289 pt_entry_t *pte_p;
6290 pv_entry_t *pve_p;
6291 boolean_t set_NX;
6292 boolean_t set_XO = FALSE;
6293 boolean_t refcnt_updated;
6294 boolean_t wiredcnt_updated;
6295 unsigned int wimg_bits;
6296 boolean_t was_compressed, was_alt_compressed;
6297 kern_return_t kr = KERN_SUCCESS;
6298
6299 VALIDATE_PMAP(pmap);
6300
6301 __unused const pt_attr_t * const pt_attr = pmap_get_pt_attr(pmap);
6302
6303 if ((v) & PAGE_MASK) {
6304 panic("pmap_enter_options() pmap %p v 0x%llx\n",
6305 pmap, (uint64_t)v);
6306 }
6307
6308 if ((prot & VM_PROT_EXECUTE) && (prot & VM_PROT_WRITE) && (pmap == kernel_pmap)) {
6309 panic("pmap_enter_options(): WX request on kernel_pmap");
6310 }
6311
6312 #if DEVELOPMENT || DEBUG
6313 if ((prot & VM_PROT_EXECUTE) || !nx_enabled || !pmap->nx_enabled)
6314 #else
6315 if ((prot & VM_PROT_EXECUTE))
6316 #endif
6317 { set_NX = FALSE;} else {
6318 set_NX = TRUE;
6319 }
6320
6321 #if (__ARM_VMSA__ > 7)
6322 if (prot == VM_PROT_EXECUTE) {
6323 set_XO = TRUE;
6324 }
6325 #endif
6326
6327 assert(pn != vm_page_fictitious_addr);
6328
6329 refcnt_updated = FALSE;
6330 wiredcnt_updated = FALSE;
6331 pve_p = PV_ENTRY_NULL;
6332 was_compressed = FALSE;
6333 was_alt_compressed = FALSE;
6334
6335 PMAP_LOCK(pmap);
6336
6337 /*
6338 * Expand pmap to include this pte. Assume that
6339 * pmap is always expanded to include enough hardware
6340 * pages to map one VM page.
6341 */
6342 while ((pte_p = pmap_pte(pmap, v)) == PT_ENTRY_NULL) {
6343 /* Must unlock to expand the pmap. */
6344 PMAP_UNLOCK(pmap);
6345
6346 kr = pmap_expand(pmap, v, options, PMAP_TT_MAX_LEVEL);
6347
6348 if (kr != KERN_SUCCESS) {
6349 return kr;
6350 }
6351
6352 PMAP_LOCK(pmap);
6353 }
6354
6355 if (options & PMAP_OPTIONS_NOENTER) {
6356 PMAP_UNLOCK(pmap);
6357 return KERN_SUCCESS;
6358 }
6359
6360 Pmap_enter_retry:
6361
6362 spte = *pte_p;
6363
6364 if (ARM_PTE_IS_COMPRESSED(spte, pte_p)) {
6365 /*
6366 * "pmap" should be locked at this point, so this should
6367 * not race with another pmap_enter() or pmap_remove_range().
6368 */
6369 assert(pmap != kernel_pmap);
6370
6371 /* one less "compressed" */
6372 OSAddAtomic64(-1, &pmap->stats.compressed);
6373 pmap_ledger_debit(pmap, task_ledgers.internal_compressed,
6374 PAGE_SIZE);
6375
6376 was_compressed = TRUE;
6377 if (spte & ARM_PTE_COMPRESSED_ALT) {
6378 was_alt_compressed = TRUE;
6379 pmap_ledger_debit(pmap, task_ledgers.alternate_accounting_compressed, PAGE_SIZE);
6380 } else {
6381 /* was part of the footprint */
6382 pmap_ledger_debit(pmap, task_ledgers.phys_footprint, PAGE_SIZE);
6383 }
6384
6385 /* clear "compressed" marker */
6386 /* XXX is it necessary since we're about to overwrite it ? */
6387 WRITE_PTE_FAST(pte_p, ARM_PTE_TYPE_FAULT);
6388 spte = ARM_PTE_TYPE_FAULT;
6389
6390 /*
6391 * We're replacing a "compressed" marker with a valid PTE,
6392 * so no change for "refcnt".
6393 */
6394 refcnt_updated = TRUE;
6395 }
6396
6397 if ((spte != ARM_PTE_TYPE_FAULT) && (pte_to_pa(spte) != pa)) {
6398 pmap_remove_range(pmap, v, pte_p, pte_p + 1, 0);
6399 }
6400
6401 pte = pa_to_pte(pa) | ARM_PTE_TYPE;
6402
6403 /* Don't bother tracking wiring for kernel PTEs. We use ARM_PTE_WIRED to track
6404 * wired memory statistics for user pmaps, but kernel PTEs are assumed
6405 * to be wired in nearly all cases. For VM layer functionality, the wired
6406 * count in vm_page_t is sufficient. */
6407 if (wired && pmap != kernel_pmap) {
6408 pte |= ARM_PTE_WIRED;
6409 }
6410
6411 if (set_NX) {
6412 pte |= pt_attr_leaf_xn(pt_attr);
6413 } else {
6414 #if (__ARM_VMSA__ > 7)
6415 if (pmap == kernel_pmap) {
6416 pte |= ARM_PTE_NX;
6417 } else {
6418 pte |= pt_attr_leaf_x(pt_attr);
6419 }
6420 #endif
6421 }
6422
6423 if (pmap == kernel_pmap) {
6424 #if __ARM_KERNEL_PROTECT__
6425 pte |= ARM_PTE_NG;
6426 #endif /* __ARM_KERNEL_PROTECT__ */
6427 if (prot & VM_PROT_WRITE) {
6428 pte |= ARM_PTE_AP(AP_RWNA);
6429 pa_set_bits(pa, PP_ATTR_MODIFIED | PP_ATTR_REFERENCED);
6430 } else {
6431 pte |= ARM_PTE_AP(AP_RONA);
6432 pa_set_bits(pa, PP_ATTR_REFERENCED);
6433 }
6434 #if (__ARM_VMSA__ == 7)
6435 if ((_COMM_PAGE_BASE_ADDRESS <= v) && (v < _COMM_PAGE_BASE_ADDRESS + _COMM_PAGE_AREA_LENGTH)) {
6436 pte = (pte & ~(ARM_PTE_APMASK)) | ARM_PTE_AP(AP_RORO);
6437 }
6438 #endif
6439 } else {
6440 if (!pmap->nested) {
6441 pte |= ARM_PTE_NG;
6442 } else if ((pmap->nested_region_asid_bitmap)
6443 && (v >= pmap->nested_region_subord_addr)
6444 && (v < (pmap->nested_region_subord_addr + pmap->nested_region_size))) {
6445 unsigned int index = (unsigned int)((v - pmap->nested_region_subord_addr) >> pt_attr_twig_shift(pt_attr));
6446
6447 if ((pmap->nested_region_asid_bitmap)
6448 && testbit(index, (int *)pmap->nested_region_asid_bitmap)) {
6449 pte |= ARM_PTE_NG;
6450 }
6451 }
6452 #if MACH_ASSERT
6453 if (pmap->nested_pmap != NULL) {
6454 vm_map_address_t nest_vaddr;
6455 pt_entry_t *nest_pte_p;
6456
6457 nest_vaddr = v - pmap->nested_region_grand_addr + pmap->nested_region_subord_addr;
6458
6459 if ((nest_vaddr >= pmap->nested_region_subord_addr)
6460 && (nest_vaddr < (pmap->nested_region_subord_addr + pmap->nested_region_size))
6461 && ((nest_pte_p = pmap_pte(pmap->nested_pmap, nest_vaddr)) != PT_ENTRY_NULL)
6462 && (*nest_pte_p != ARM_PTE_TYPE_FAULT)
6463 && (!ARM_PTE_IS_COMPRESSED(*nest_pte_p, nest_pte_p))
6464 && (((*nest_pte_p) & ARM_PTE_NG) != ARM_PTE_NG)) {
6465 unsigned int index = (unsigned int)((v - pmap->nested_region_subord_addr) >> pt_attr_twig_shift(pt_attr));
6466
6467 if ((pmap->nested_pmap->nested_region_asid_bitmap)
6468 && !testbit(index, (int *)pmap->nested_pmap->nested_region_asid_bitmap)) {
6469 panic("pmap_enter(): Global attribute conflict nest_pte_p=%p pmap=%p v=0x%llx spte=0x%llx \n",
6470 nest_pte_p, pmap, (uint64_t)v, (uint64_t)*nest_pte_p);
6471 }
6472 }
6473 }
6474 #endif
6475 if (prot & VM_PROT_WRITE) {
6476 if (pa_valid(pa) && (!pa_test_bits(pa, PP_ATTR_MODIFIED))) {
6477 if (fault_type & VM_PROT_WRITE) {
6478 if (set_XO) {
6479 pte |= pt_attr_leaf_rwna(pt_attr);
6480 } else {
6481 pte |= pt_attr_leaf_rw(pt_attr);
6482 }
6483 pa_set_bits(pa, PP_ATTR_REFERENCED | PP_ATTR_MODIFIED);
6484 } else {
6485 if (set_XO) {
6486 pte |= pt_attr_leaf_rona(pt_attr);
6487 } else {
6488 pte |= pt_attr_leaf_ro(pt_attr);
6489 }
6490 pa_set_bits(pa, PP_ATTR_REFERENCED);
6491 pte_set_was_writeable(pte, true);
6492 }
6493 } else {
6494 if (set_XO) {
6495 pte |= pt_attr_leaf_rwna(pt_attr);
6496 } else {
6497 pte |= pt_attr_leaf_rw(pt_attr);
6498 }
6499 pa_set_bits(pa, PP_ATTR_REFERENCED);
6500 }
6501 } else {
6502 if (set_XO) {
6503 pte |= pt_attr_leaf_rona(pt_attr);
6504 } else {
6505 pte |= pt_attr_leaf_ro(pt_attr);;
6506 }
6507 pa_set_bits(pa, PP_ATTR_REFERENCED);
6508 }
6509 }
6510
6511 pte |= ARM_PTE_AF;
6512
6513 volatile uint16_t *refcnt = NULL;
6514 volatile uint16_t *wiredcnt = NULL;
6515 if (pmap != kernel_pmap) {
6516 refcnt = &(ptep_get_ptd(pte_p)->ptd_info[ARM_PT_DESC_INDEX(pte_p)].refcnt);
6517 wiredcnt = &(ptep_get_ptd(pte_p)->ptd_info[ARM_PT_DESC_INDEX(pte_p)].wiredcnt);
6518 /* Bump the wired count to keep the PTE page from being reclaimed. We need this because
6519 * we may drop the PVH and pmap locks later in pmap_enter() if we need to allocate
6520 * a new PV entry. */
6521 if (!wiredcnt_updated) {
6522 OSAddAtomic16(1, (volatile int16_t*)wiredcnt);
6523 wiredcnt_updated = TRUE;
6524 }
6525 if (!refcnt_updated) {
6526 OSAddAtomic16(1, (volatile int16_t*)refcnt);
6527 refcnt_updated = TRUE;
6528 }
6529 }
6530
6531 if (pa_valid(pa)) {
6532 int pai;
6533 boolean_t is_altacct, is_internal;
6534
6535 is_internal = FALSE;
6536 is_altacct = FALSE;
6537
6538 pai = (int)pa_index(pa);
6539
6540 LOCK_PVH(pai);
6541
6542 Pmap_enter_loop:
6543 if ((flags & (VM_WIMG_MASK | VM_WIMG_USE_DEFAULT))) {
6544 wimg_bits = (flags & (VM_WIMG_MASK | VM_WIMG_USE_DEFAULT));
6545 } else {
6546 wimg_bits = pmap_cache_attributes(pn);
6547 }
6548
6549 /* We may be retrying this operation after dropping the PVH lock.
6550 * Cache attributes for the physical page may have changed while the lock
6551 * was dropped, so clear any cache attributes we may have previously set
6552 * in the PTE template. */
6553 pte &= ~(ARM_PTE_ATTRINDXMASK | ARM_PTE_SHMASK);
6554 pte |= pmap_get_pt_ops(pmap)->wimg_to_pte(wimg_bits);
6555
6556
6557
6558 if (pte == *pte_p) {
6559 /*
6560 * This pmap_enter operation has been completed by another thread
6561 * undo refcnt on pt and return
6562 */
6563 UNLOCK_PVH(pai);
6564 goto Pmap_enter_cleanup;
6565 } else if (pte_to_pa(*pte_p) == pa) {
6566 pmap_enter_pte(pmap, pte_p, pte, v);
6567 UNLOCK_PVH(pai);
6568 goto Pmap_enter_cleanup;
6569 } else if (*pte_p != ARM_PTE_TYPE_FAULT) {
6570 /*
6571 * pte has been modified by another thread
6572 * hold refcnt on pt and retry pmap_enter operation
6573 */
6574 UNLOCK_PVH(pai);
6575 goto Pmap_enter_retry;
6576 }
6577 if (!pmap_enter_pv(pmap, pte_p, pai, options, &pve_p, &is_altacct)) {
6578 goto Pmap_enter_loop;
6579 }
6580
6581 pmap_enter_pte(pmap, pte_p, pte, v);
6582
6583 if (pmap != kernel_pmap) {
6584 if (IS_REUSABLE_PAGE(pai) &&
6585 !is_altacct) {
6586 assert(IS_INTERNAL_PAGE(pai));
6587 OSAddAtomic(+1, &pmap->stats.reusable);
6588 PMAP_STATS_PEAK(pmap->stats.reusable);
6589 } else if (IS_INTERNAL_PAGE(pai)) {
6590 OSAddAtomic(+1, &pmap->stats.internal);
6591 PMAP_STATS_PEAK(pmap->stats.internal);
6592 is_internal = TRUE;
6593 } else {
6594 OSAddAtomic(+1, &pmap->stats.external);
6595 PMAP_STATS_PEAK(pmap->stats.external);
6596 }
6597 }
6598
6599 UNLOCK_PVH(pai);
6600
6601 if (pmap != kernel_pmap) {
6602 pmap_ledger_credit(pmap, task_ledgers.phys_mem, PAGE_SIZE);
6603
6604 if (is_internal) {
6605 /*
6606 * Make corresponding adjustments to
6607 * phys_footprint statistics.
6608 */
6609 pmap_ledger_credit(pmap, task_ledgers.internal, PAGE_SIZE);
6610 if (is_altacct) {
6611 /*
6612 * If this page is internal and
6613 * in an IOKit region, credit
6614 * the task's total count of
6615 * dirty, internal IOKit pages.
6616 * It should *not* count towards
6617 * the task's total physical
6618 * memory footprint, because
6619 * this entire region was
6620 * already billed to the task
6621 * at the time the mapping was
6622 * created.
6623 *
6624 * Put another way, this is
6625 * internal++ and
6626 * alternate_accounting++, so
6627 * net effect on phys_footprint
6628 * is 0. That means: don't
6629 * touch phys_footprint here.
6630 */
6631 pmap_ledger_credit(pmap, task_ledgers.alternate_accounting, PAGE_SIZE);
6632 } else {
6633 pmap_ledger_credit(pmap, task_ledgers.phys_footprint, PAGE_SIZE);
6634 }
6635 }
6636 }
6637
6638 OSAddAtomic(1, (SInt32 *) &pmap->stats.resident_count);
6639 if (pmap->stats.resident_count > pmap->stats.resident_max) {
6640 pmap->stats.resident_max = pmap->stats.resident_count;
6641 }
6642 } else {
6643 if (prot & VM_PROT_EXECUTE) {
6644 kr = KERN_FAILURE;
6645 goto Pmap_enter_cleanup;
6646 }
6647
6648 wimg_bits = pmap_cache_attributes(pn);
6649 if ((flags & (VM_WIMG_MASK | VM_WIMG_USE_DEFAULT))) {
6650 wimg_bits = (wimg_bits & (~VM_WIMG_MASK)) | (flags & (VM_WIMG_MASK | VM_WIMG_USE_DEFAULT));
6651 }
6652
6653 pte |= pmap_get_pt_ops(pmap)->wimg_to_pte(wimg_bits);
6654
6655 pmap_enter_pte(pmap, pte_p, pte, v);
6656 }
6657
6658 goto Pmap_enter_return;
6659
6660 Pmap_enter_cleanup:
6661
6662 if (refcnt != NULL) {
6663 assert(refcnt_updated);
6664 if (OSAddAtomic16(-1, (volatile int16_t*)refcnt) <= 0) {
6665 panic("pmap_enter(): over-release of ptdp %p for pte %p\n", ptep_get_ptd(pte_p), pte_p);
6666 }
6667 }
6668
6669 Pmap_enter_return:
6670
6671 #if CONFIG_PGTRACE
6672 if (pgtrace_enabled) {
6673 // Clone and invalidate original mapping if eligible
6674 for (int i = 0; i < PAGE_RATIO; i++) {
6675 pmap_pgtrace_enter_clone(pmap, v + ARM_PGBYTES * i, 0, 0);
6676 }
6677 }
6678 #endif
6679
6680 if (pve_p != PV_ENTRY_NULL) {
6681 pv_free(pve_p);
6682 }
6683
6684 if (wiredcnt_updated && (OSAddAtomic16(-1, (volatile int16_t*)wiredcnt) <= 0)) {
6685 panic("pmap_enter(): over-unwire of ptdp %p for pte %p\n", ptep_get_ptd(pte_p), pte_p);
6686 }
6687
6688 PMAP_UNLOCK(pmap);
6689
6690 return kr;
6691 }
6692
6693 kern_return_t
6694 pmap_enter_options(
6695 pmap_t pmap,
6696 vm_map_address_t v,
6697 ppnum_t pn,
6698 vm_prot_t prot,
6699 vm_prot_t fault_type,
6700 unsigned int flags,
6701 boolean_t wired,
6702 unsigned int options,
6703 __unused void *arg)
6704 {
6705 kern_return_t kr = KERN_FAILURE;
6706
6707 PMAP_TRACE(2, PMAP_CODE(PMAP__ENTER) | DBG_FUNC_START,
6708 VM_KERNEL_ADDRHIDE(pmap), VM_KERNEL_ADDRHIDE(v), pn, prot);
6709
6710 kr = pmap_enter_options_internal(pmap, v, pn, prot, fault_type, flags, wired, options);
6711 pv_water_mark_check();
6712
6713 PMAP_TRACE(2, PMAP_CODE(PMAP__ENTER) | DBG_FUNC_END, kr);
6714
6715 return kr;
6716 }
6717
6718 /*
6719 * Routine: pmap_change_wiring
6720 * Function: Change the wiring attribute for a map/virtual-address
6721 * pair.
6722 * In/out conditions:
6723 * The mapping must already exist in the pmap.
6724 */
6725 MARK_AS_PMAP_TEXT static void
6726 pmap_change_wiring_internal(
6727 pmap_t pmap,
6728 vm_map_address_t v,
6729 boolean_t wired)
6730 {
6731 pt_entry_t *pte_p;
6732 pmap_paddr_t pa;
6733
6734 /* Don't bother tracking wiring for kernel PTEs. We use ARM_PTE_WIRED to track
6735 * wired memory statistics for user pmaps, but kernel PTEs are assumed
6736 * to be wired in nearly all cases. For VM layer functionality, the wired
6737 * count in vm_page_t is sufficient. */
6738 if (pmap == kernel_pmap) {
6739 return;
6740 }
6741 VALIDATE_USER_PMAP(pmap);
6742
6743 PMAP_LOCK(pmap);
6744 pte_p = pmap_pte(pmap, v);
6745 assert(pte_p != PT_ENTRY_NULL);
6746 pa = pte_to_pa(*pte_p);
6747
6748 while (pa_valid(pa)) {
6749 pmap_paddr_t new_pa;
6750
6751 LOCK_PVH((int)pa_index(pa));
6752 new_pa = pte_to_pa(*pte_p);
6753
6754 if (pa == new_pa) {
6755 break;
6756 }
6757
6758 UNLOCK_PVH((int)pa_index(pa));
6759 pa = new_pa;
6760 }
6761
6762 if (wired && !pte_is_wired(*pte_p)) {
6763 pte_set_wired(pte_p, wired);
6764 OSAddAtomic(+1, (SInt32 *) &pmap->stats.wired_count);
6765 pmap_ledger_credit(pmap, task_ledgers.wired_mem, PAGE_SIZE);
6766 } else if (!wired && pte_is_wired(*pte_p)) {
6767 PMAP_STATS_ASSERTF(pmap->stats.wired_count >= 1, pmap, "stats.wired_count %d", pmap->stats.wired_count);
6768 pte_set_wired(pte_p, wired);
6769 OSAddAtomic(-1, (SInt32 *) &pmap->stats.wired_count);
6770 pmap_ledger_debit(pmap, task_ledgers.wired_mem, PAGE_SIZE);
6771 }
6772
6773 if (pa_valid(pa)) {
6774 UNLOCK_PVH((int)pa_index(pa));
6775 }
6776
6777 PMAP_UNLOCK(pmap);
6778 }
6779
6780 void
6781 pmap_change_wiring(
6782 pmap_t pmap,
6783 vm_map_address_t v,
6784 boolean_t wired)
6785 {
6786 pmap_change_wiring_internal(pmap, v, wired);
6787 }
6788
6789 MARK_AS_PMAP_TEXT static ppnum_t
6790 pmap_find_phys_internal(
6791 pmap_t pmap,
6792 addr64_t va)
6793 {
6794 ppnum_t ppn = 0;
6795
6796 VALIDATE_PMAP(pmap);
6797
6798 if (pmap != kernel_pmap) {
6799 PMAP_LOCK(pmap);
6800 }
6801
6802 ppn = pmap_vtophys(pmap, va);
6803
6804 if (pmap != kernel_pmap) {
6805 PMAP_UNLOCK(pmap);
6806 }
6807
6808 return ppn;
6809 }
6810
6811 ppnum_t
6812 pmap_find_phys(
6813 pmap_t pmap,
6814 addr64_t va)
6815 {
6816 pmap_paddr_t pa = 0;
6817
6818 if (pmap == kernel_pmap) {
6819 pa = mmu_kvtop(va);
6820 } else if ((current_thread()->map) && (pmap == vm_map_pmap(current_thread()->map))) {
6821 pa = mmu_uvtop(va);
6822 }
6823
6824 if (pa) {
6825 return (ppnum_t)(pa >> PAGE_SHIFT);
6826 }
6827
6828 if (not_in_kdp) {
6829 return pmap_find_phys_internal(pmap, va);
6830 } else {
6831 return pmap_vtophys(pmap, va);
6832 }
6833 }
6834
6835 pmap_paddr_t
6836 kvtophys(
6837 vm_offset_t va)
6838 {
6839 pmap_paddr_t pa;
6840
6841 pa = mmu_kvtop(va);
6842 if (pa) {
6843 return pa;
6844 }
6845 pa = ((pmap_paddr_t)pmap_vtophys(kernel_pmap, va)) << PAGE_SHIFT;
6846 if (pa) {
6847 pa |= (va & PAGE_MASK);
6848 }
6849
6850 return (pmap_paddr_t)pa;
6851 }
6852
6853 ppnum_t
6854 pmap_vtophys(
6855 pmap_t pmap,
6856 addr64_t va)
6857 {
6858 if ((va < pmap->min) || (va >= pmap->max)) {
6859 return 0;
6860 }
6861
6862 #if (__ARM_VMSA__ == 7)
6863 tt_entry_t *tte_p, tte;
6864 pt_entry_t *pte_p;
6865 ppnum_t ppn;
6866
6867 tte_p = pmap_tte(pmap, va);
6868 if (tte_p == (tt_entry_t *) NULL) {
6869 return (ppnum_t) 0;
6870 }
6871
6872 tte = *tte_p;
6873 if ((tte & ARM_TTE_TYPE_MASK) == ARM_TTE_TYPE_TABLE) {
6874 pte_p = (pt_entry_t *) ttetokv(tte) + ptenum(va);
6875 ppn = (ppnum_t) atop(pte_to_pa(*pte_p) | (va & ARM_PGMASK));
6876 #if DEVELOPMENT || DEBUG
6877 if (ppn != 0 &&
6878 ARM_PTE_IS_COMPRESSED(*pte_p, pte_p)) {
6879 panic("pmap_vtophys(%p,0x%llx): compressed pte_p=%p 0x%llx with ppn=0x%x\n",
6880 pmap, va, pte_p, (uint64_t) (*pte_p), ppn);
6881 }
6882 #endif /* DEVELOPMENT || DEBUG */
6883 } else if ((tte & ARM_TTE_TYPE_MASK) == ARM_TTE_TYPE_BLOCK) {
6884 if ((tte & ARM_TTE_BLOCK_SUPER) == ARM_TTE_BLOCK_SUPER) {
6885 ppn = (ppnum_t) atop(suptte_to_pa(tte) | (va & ARM_TT_L1_SUPER_OFFMASK));
6886 } else {
6887 ppn = (ppnum_t) atop(sectte_to_pa(tte) | (va & ARM_TT_L1_BLOCK_OFFMASK));
6888 }
6889 } else {
6890 ppn = 0;
6891 }
6892 #else
6893 tt_entry_t *ttp;
6894 tt_entry_t tte;
6895 ppnum_t ppn = 0;
6896
6897 __unused const pt_attr_t * const pt_attr = pmap_get_pt_attr(pmap);
6898
6899 /* Level 0 currently unused */
6900
6901 /* Get first-level (1GB) entry */
6902 ttp = pmap_tt1e(pmap, va);
6903 tte = *ttp;
6904 if ((tte & (ARM_TTE_TYPE_MASK | ARM_TTE_VALID)) != (ARM_TTE_TYPE_TABLE | ARM_TTE_VALID)) {
6905 return ppn;
6906 }
6907
6908 tte = ((tt_entry_t*) phystokv(tte & ARM_TTE_TABLE_MASK))[tt2_index(pmap, pt_attr, va)];
6909
6910 if ((tte & ARM_TTE_VALID) != (ARM_TTE_VALID)) {
6911 return ppn;
6912 }
6913
6914 if ((tte & ARM_TTE_TYPE_MASK) == ARM_TTE_TYPE_BLOCK) {
6915 ppn = (ppnum_t) atop((tte & ARM_TTE_BLOCK_L2_MASK) | (va & ARM_TT_L2_OFFMASK));
6916 return ppn;
6917 }
6918 tte = ((tt_entry_t*) phystokv(tte & ARM_TTE_TABLE_MASK))[tt3_index(pmap, pt_attr, va)];
6919 ppn = (ppnum_t) atop((tte & ARM_PTE_MASK) | (va & ARM_TT_L3_OFFMASK));
6920 #endif
6921
6922 return ppn;
6923 }
6924
6925 MARK_AS_PMAP_TEXT static vm_offset_t
6926 pmap_extract_internal(
6927 pmap_t pmap,
6928 vm_map_address_t va)
6929 {
6930 pmap_paddr_t pa = 0;
6931 ppnum_t ppn = 0;
6932
6933 if (pmap == NULL) {
6934 return 0;
6935 }
6936
6937 VALIDATE_PMAP(pmap);
6938
6939 PMAP_LOCK(pmap);
6940
6941 ppn = pmap_vtophys(pmap, va);
6942
6943 if (ppn != 0) {
6944 pa = ptoa(ppn) | ((va) & PAGE_MASK);
6945 }
6946
6947 PMAP_UNLOCK(pmap);
6948
6949 return pa;
6950 }
6951
6952 /*
6953 * Routine: pmap_extract
6954 * Function:
6955 * Extract the physical page address associated
6956 * with the given map/virtual_address pair.
6957 *
6958 */
6959 vm_offset_t
6960 pmap_extract(
6961 pmap_t pmap,
6962 vm_map_address_t va)
6963 {
6964 pmap_paddr_t pa = 0;
6965
6966 if (pmap == kernel_pmap) {
6967 pa = mmu_kvtop(va);
6968 } else if (pmap == vm_map_pmap(current_thread()->map)) {
6969 pa = mmu_uvtop(va);
6970 }
6971
6972 if (pa) {
6973 return pa;
6974 }
6975
6976 return pmap_extract_internal(pmap, va);
6977 }
6978
6979 /*
6980 * pmap_init_pte_page - Initialize a page table page.
6981 */
6982 void
6983 pmap_init_pte_page(
6984 pmap_t pmap,
6985 pt_entry_t *pte_p,
6986 vm_offset_t va,
6987 unsigned int ttlevel,
6988 boolean_t alloc_ptd,
6989 boolean_t clear)
6990 {
6991 pt_desc_t *ptdp = NULL;
6992 vm_offset_t *pvh;
6993
6994 pvh = (vm_offset_t *)(pai_to_pvh(pa_index(ml_static_vtop((vm_offset_t)pte_p))));
6995
6996 if (pvh_test_type(pvh, PVH_TYPE_NULL)) {
6997 if (alloc_ptd) {
6998 /*
6999 * This path should only be invoked from arm_vm_init. If we are emulating 16KB pages
7000 * on 4KB hardware, we may already have allocated a page table descriptor for a
7001 * bootstrap request, so we check for an existing PTD here.
7002 */
7003 ptdp = ptd_alloc(pmap, true);
7004 pvh_update_head_unlocked(pvh, ptdp, PVH_TYPE_PTDP);
7005 } else {
7006 panic("pmap_init_pte_page(): pte_p %p", pte_p);
7007 }
7008 } else if (pvh_test_type(pvh, PVH_TYPE_PTDP)) {
7009 ptdp = (pt_desc_t*)(pvh_list(pvh));
7010 } else {
7011 panic("pmap_init_pte_page(): invalid PVH type for pte_p %p", pte_p);
7012 }
7013
7014 if (clear) {
7015 bzero(pte_p, ARM_PGBYTES);
7016 // below barrier ensures the page zeroing is visible to PTW before
7017 // it is linked to the PTE of previous level
7018 __builtin_arm_dmb(DMB_ISHST);
7019 }
7020 ptd_init(ptdp, pmap, va, ttlevel, pte_p);
7021 }
7022
7023 /*
7024 * Routine: pmap_expand
7025 *
7026 * Expands a pmap to be able to map the specified virtual address.
7027 *
7028 * Allocates new memory for the default (COARSE) translation table
7029 * entry, initializes all the pte entries to ARM_PTE_TYPE_FAULT and
7030 * also allocates space for the corresponding pv entries.
7031 *
7032 * Nothing should be locked.
7033 */
7034 static kern_return_t
7035 pmap_expand(
7036 pmap_t pmap,
7037 vm_map_address_t v,
7038 unsigned int options,
7039 unsigned int level)
7040 {
7041 __unused const pt_attr_t * const pt_attr = pmap_get_pt_attr(pmap);
7042
7043 #if (__ARM_VMSA__ == 7)
7044 vm_offset_t pa;
7045 tt_entry_t *tte_p;
7046 tt_entry_t *tt_p;
7047 unsigned int i;
7048
7049 while (tte_index(pmap, pt_attr, v) >= pmap->tte_index_max) {
7050 tte_p = pmap_tt1_allocate(pmap, 2 * ARM_PGBYTES, ((options & PMAP_OPTIONS_NOWAIT)? PMAP_TT_ALLOCATE_NOWAIT : 0));
7051 if (tte_p == (tt_entry_t *)0) {
7052 return KERN_RESOURCE_SHORTAGE;
7053 }
7054
7055 PMAP_LOCK(pmap);
7056 if (pmap->tte_index_max > NTTES) {
7057 pmap_tt1_deallocate(pmap, tte_p, 2 * ARM_PGBYTES, PMAP_TT_DEALLOCATE_NOBLOCK);
7058 PMAP_UNLOCK(pmap);
7059 break;
7060 }
7061
7062 pmap_simple_lock(&pmap->tt1_lock);
7063 for (i = 0; i < pmap->tte_index_max; i++) {
7064 tte_p[i] = pmap->tte[i];
7065 }
7066 for (i = NTTES; i < 2 * NTTES; i++) {
7067 tte_p[i] = ARM_TTE_TYPE_FAULT;
7068 }
7069
7070 FLUSH_PTE_RANGE(tte_p, tte_p + (2 * NTTES)); // DMB
7071
7072 /* Order is important here, so that pmap_switch_user_ttb() sees things
7073 * in the correct sequence.
7074 * --update of pmap->tte[p] must happen prior to updating pmap->tte_index_max,
7075 * separated by at least a DMB, so that context switch does not see a 1 GB
7076 * L1 table with a 2GB size.
7077 * --update of pmap->tte[p] must also happen prior to setting pmap->prev_tte,
7078 * separated by at least a DMB, so that context switch does not see an L1
7079 * table to be freed without also seeing its replacement.*/
7080
7081 tt_entry_t *prev_tte = pmap->tte;
7082
7083 pmap->tte = tte_p;
7084 pmap->ttep = ml_static_vtop((vm_offset_t)pmap->tte);
7085
7086 __builtin_arm_dmb(DMB_ISH);
7087
7088 pmap->tte_index_max = 2 * NTTES;
7089 pmap->stamp = os_atomic_inc(&pmap_stamp, relaxed);
7090
7091 for (i = 0; i < NTTES; i++) {
7092 prev_tte[i] = ARM_TTE_TYPE_FAULT;
7093 }
7094
7095 /* We need a strong flush here because a TLB flush will be
7096 * issued from pmap_switch_user_ttb() as soon as this pmap
7097 * is no longer active on any CPU. We need to ensure all
7098 * prior stores to the TTE region have retired before that. */
7099 FLUSH_PTE_RANGE_STRONG(prev_tte, prev_tte + NTTES); // DSB
7100 pmap->prev_tte = prev_tte;
7101
7102 pmap_simple_unlock(&pmap->tt1_lock);
7103 PMAP_UNLOCK(pmap);
7104 if (current_pmap() == pmap) {
7105 pmap_set_pmap(pmap, current_thread());
7106 }
7107 }
7108
7109 if (level == 1) {
7110 return KERN_SUCCESS;
7111 }
7112
7113 {
7114 tt_entry_t *tte_next_p;
7115
7116 PMAP_LOCK(pmap);
7117 pa = 0;
7118 if (pmap_pte(pmap, v) != PT_ENTRY_NULL) {
7119 PMAP_UNLOCK(pmap);
7120 return KERN_SUCCESS;
7121 }
7122 tte_p = &pmap->tte[ttenum(v & ~ARM_TT_L1_PT_OFFMASK)];
7123 for (i = 0, tte_next_p = tte_p; i < 4; i++) {
7124 if (tte_to_pa(*tte_next_p)) {
7125 pa = tte_to_pa(*tte_next_p);
7126 break;
7127 }
7128 tte_next_p++;
7129 }
7130 pa = pa & ~PAGE_MASK;
7131 if (pa) {
7132 tte_p = &pmap->tte[ttenum(v)];
7133 *tte_p = pa_to_tte(pa) | (((v >> ARM_TT_L1_SHIFT) & 0x3) << 10) | ARM_TTE_TYPE_TABLE;
7134 FLUSH_PTE(tte_p);
7135 PMAP_TRACE(3, PMAP_CODE(PMAP__TTE), VM_KERNEL_ADDRHIDE(pmap), VM_KERNEL_ADDRHIDE(v & ~ARM_TT_L1_OFFMASK),
7136 VM_KERNEL_ADDRHIDE((v & ~ARM_TT_L1_OFFMASK) + ARM_TT_L1_SIZE), *tte_p);
7137 PMAP_UNLOCK(pmap);
7138 return KERN_SUCCESS;
7139 }
7140 PMAP_UNLOCK(pmap);
7141 }
7142 v = v & ~ARM_TT_L1_PT_OFFMASK;
7143
7144
7145 while (pmap_pte(pmap, v) == PT_ENTRY_NULL) {
7146 /*
7147 * Allocate a VM page for the level 2 page table entries.
7148 */
7149 while (pmap_tt_allocate(pmap, &tt_p, PMAP_TT_L2_LEVEL, ((options & PMAP_TT_ALLOCATE_NOWAIT)? PMAP_PAGES_ALLOCATE_NOWAIT : 0)) != KERN_SUCCESS) {
7150 if (options & PMAP_OPTIONS_NOWAIT) {
7151 return KERN_RESOURCE_SHORTAGE;
7152 }
7153 VM_PAGE_WAIT();
7154 }
7155
7156 PMAP_LOCK(pmap);
7157 /*
7158 * See if someone else expanded us first
7159 */
7160 if (pmap_pte(pmap, v) == PT_ENTRY_NULL) {
7161 tt_entry_t *tte_next_p;
7162
7163 pmap_init_pte_page(pmap, (pt_entry_t *) tt_p, v, PMAP_TT_L2_LEVEL, FALSE, TRUE);
7164 pa = kvtophys((vm_offset_t)tt_p);
7165 tte_p = &pmap->tte[ttenum(v)];
7166 for (i = 0, tte_next_p = tte_p; i < 4; i++) {
7167 *tte_next_p = pa_to_tte(pa) | ARM_TTE_TYPE_TABLE;
7168 PMAP_TRACE(3, PMAP_CODE(PMAP__TTE), VM_KERNEL_ADDRHIDE(pmap), VM_KERNEL_ADDRHIDE((v & ~ARM_TT_L1_PT_OFFMASK) + (i * ARM_TT_L1_SIZE)),
7169 VM_KERNEL_ADDRHIDE((v & ~ARM_TT_L1_PT_OFFMASK) + ((i + 1) * ARM_TT_L1_SIZE)), *tte_p);
7170 tte_next_p++;
7171 pa = pa + 0x400;
7172 }
7173 FLUSH_PTE_RANGE(tte_p, tte_p + 4);
7174
7175 pa = 0x0ULL;
7176 tt_p = (tt_entry_t *)NULL;
7177 }
7178 PMAP_UNLOCK(pmap);
7179 if (tt_p != (tt_entry_t *)NULL) {
7180 pmap_tt_deallocate(pmap, tt_p, PMAP_TT_L2_LEVEL);
7181 tt_p = (tt_entry_t *)NULL;
7182 }
7183 }
7184 return KERN_SUCCESS;
7185 #else
7186 pmap_paddr_t pa;
7187 unsigned int ttlevel = pt_attr_root_level(pt_attr);
7188 tt_entry_t *tte_p;
7189 tt_entry_t *tt_p;
7190
7191 pa = 0x0ULL;
7192 tt_p = (tt_entry_t *)NULL;
7193
7194 for (; ttlevel < level; ttlevel++) {
7195 PMAP_LOCK(pmap);
7196
7197 if (pmap_ttne(pmap, ttlevel + 1, v) == PT_ENTRY_NULL) {
7198 PMAP_UNLOCK(pmap);
7199 while (pmap_tt_allocate(pmap, &tt_p, ttlevel + 1, ((options & PMAP_TT_ALLOCATE_NOWAIT)? PMAP_PAGES_ALLOCATE_NOWAIT : 0)) != KERN_SUCCESS) {
7200 if (options & PMAP_OPTIONS_NOWAIT) {
7201 return KERN_RESOURCE_SHORTAGE;
7202 }
7203 VM_PAGE_WAIT();
7204 }
7205 PMAP_LOCK(pmap);
7206 if ((pmap_ttne(pmap, ttlevel + 1, v) == PT_ENTRY_NULL)) {
7207 pmap_init_pte_page(pmap, (pt_entry_t *) tt_p, v, ttlevel + 1, FALSE, TRUE);
7208 pa = kvtophys((vm_offset_t)tt_p);
7209 tte_p = pmap_ttne(pmap, ttlevel, v);
7210 *tte_p = (pa & ARM_TTE_TABLE_MASK) | ARM_TTE_TYPE_TABLE | ARM_TTE_VALID;
7211 PMAP_TRACE(ttlevel + 1, PMAP_CODE(PMAP__TTE), VM_KERNEL_ADDRHIDE(pmap), VM_KERNEL_ADDRHIDE(v & ~pt_attr_ln_offmask(pt_attr, ttlevel)),
7212 VM_KERNEL_ADDRHIDE((v & ~pt_attr_ln_offmask(pt_attr, ttlevel)) + pt_attr_ln_size(pt_attr, ttlevel)), *tte_p);
7213 pa = 0x0ULL;
7214 tt_p = (tt_entry_t *)NULL;
7215 }
7216 }
7217
7218 PMAP_UNLOCK(pmap);
7219
7220 if (tt_p != (tt_entry_t *)NULL) {
7221 pmap_tt_deallocate(pmap, tt_p, ttlevel + 1);
7222 tt_p = (tt_entry_t *)NULL;
7223 }
7224 }
7225
7226 return KERN_SUCCESS;
7227 #endif
7228 }
7229
7230 /*
7231 * Routine: pmap_collect
7232 * Function:
7233 * Garbage collects the physical map system for
7234 * pages which are no longer used.
7235 * Success need not be guaranteed -- that is, there
7236 * may well be pages which are not referenced, but
7237 * others may be collected.
7238 */
7239 void
7240 pmap_collect(pmap_t pmap)
7241 {
7242 if (pmap == PMAP_NULL) {
7243 return;
7244 }
7245
7246 #if 0
7247 PMAP_LOCK(pmap);
7248 if ((pmap->nested == FALSE) && (pmap != kernel_pmap)) {
7249 /* TODO: Scan for vm page assigned to top level page tables with no reference */
7250 }
7251 PMAP_UNLOCK(pmap);
7252 #endif
7253
7254 return;
7255 }
7256
7257 /*
7258 * Routine: pmap_gc
7259 * Function:
7260 * Pmap garbage collection
7261 * Called by the pageout daemon when pages are scarce.
7262 *
7263 */
7264 void
7265 pmap_gc(
7266 void)
7267 {
7268 pmap_t pmap, pmap_next;
7269 boolean_t gc_wait;
7270
7271 if (pmap_gc_allowed &&
7272 (pmap_gc_allowed_by_time_throttle ||
7273 pmap_gc_forced)) {
7274 pmap_gc_forced = FALSE;
7275 pmap_gc_allowed_by_time_throttle = FALSE;
7276 pmap_simple_lock(&pmaps_lock);
7277 pmap = CAST_DOWN_EXPLICIT(pmap_t, queue_first(&map_pmap_list));
7278 while (!queue_end(&map_pmap_list, (queue_entry_t)pmap)) {
7279 if (!(pmap->gc_status & PMAP_GC_INFLIGHT)) {
7280 pmap->gc_status |= PMAP_GC_INFLIGHT;
7281 }
7282 pmap_simple_unlock(&pmaps_lock);
7283
7284 pmap_collect(pmap);
7285
7286 pmap_simple_lock(&pmaps_lock);
7287 gc_wait = (pmap->gc_status & PMAP_GC_WAIT);
7288 pmap->gc_status &= ~(PMAP_GC_INFLIGHT | PMAP_GC_WAIT);
7289 pmap_next = CAST_DOWN_EXPLICIT(pmap_t, queue_next(&pmap->pmaps));
7290 if (gc_wait) {
7291 if (!queue_end(&map_pmap_list, (queue_entry_t)pmap_next)) {
7292 pmap_next->gc_status |= PMAP_GC_INFLIGHT;
7293 }
7294 pmap_simple_unlock(&pmaps_lock);
7295 thread_wakeup((event_t) &pmap->gc_status);
7296 pmap_simple_lock(&pmaps_lock);
7297 }
7298 pmap = pmap_next;
7299 }
7300 pmap_simple_unlock(&pmaps_lock);
7301 }
7302 }
7303
7304 /*
7305 * Called by the VM to reclaim pages that we can reclaim quickly and cheaply.
7306 */
7307 uint64_t
7308 pmap_release_pages_fast(void)
7309 {
7310 return 0;
7311 }
7312
7313 /*
7314 * By default, don't attempt pmap GC more frequently
7315 * than once / 1 minutes.
7316 */
7317
7318 void
7319 compute_pmap_gc_throttle(
7320 void *arg __unused)
7321 {
7322 pmap_gc_allowed_by_time_throttle = TRUE;
7323 }
7324
7325 /*
7326 * pmap_attribute_cache_sync(vm_offset_t pa)
7327 *
7328 * Invalidates all of the instruction cache on a physical page and
7329 * pushes any dirty data from the data cache for the same physical page
7330 */
7331
7332 kern_return_t
7333 pmap_attribute_cache_sync(
7334 ppnum_t pp,
7335 vm_size_t size,
7336 __unused vm_machine_attribute_t attribute,
7337 __unused vm_machine_attribute_val_t * value)
7338 {
7339 if (size > PAGE_SIZE) {
7340 panic("pmap_attribute_cache_sync size: 0x%llx\n", (uint64_t)size);
7341 } else {
7342 cache_sync_page(pp);
7343 }
7344
7345 return KERN_SUCCESS;
7346 }
7347
7348 /*
7349 * pmap_sync_page_data_phys(ppnum_t pp)
7350 *
7351 * Invalidates all of the instruction cache on a physical page and
7352 * pushes any dirty data from the data cache for the same physical page
7353 */
7354 void
7355 pmap_sync_page_data_phys(
7356 ppnum_t pp)
7357 {
7358 cache_sync_page(pp);
7359 }
7360
7361 /*
7362 * pmap_sync_page_attributes_phys(ppnum_t pp)
7363 *
7364 * Write back and invalidate all cachelines on a physical page.
7365 */
7366 void
7367 pmap_sync_page_attributes_phys(
7368 ppnum_t pp)
7369 {
7370 flush_dcache((vm_offset_t) (pp << PAGE_SHIFT), PAGE_SIZE, TRUE);
7371 }
7372
7373 #if CONFIG_COREDUMP
7374 /* temporary workaround */
7375 boolean_t
7376 coredumpok(
7377 vm_map_t map,
7378 vm_offset_t va)
7379 {
7380 pt_entry_t *pte_p;
7381 pt_entry_t spte;
7382
7383 pte_p = pmap_pte(map->pmap, va);
7384 if (0 == pte_p) {
7385 return FALSE;
7386 }
7387 spte = *pte_p;
7388 return (spte & ARM_PTE_ATTRINDXMASK) == ARM_PTE_ATTRINDX(CACHE_ATTRINDX_DEFAULT);
7389 }
7390 #endif
7391
7392 void
7393 fillPage(
7394 ppnum_t pn,
7395 unsigned int fill)
7396 {
7397 unsigned int *addr;
7398 int count;
7399
7400 addr = (unsigned int *) phystokv(ptoa(pn));
7401 count = PAGE_SIZE / sizeof(unsigned int);
7402 while (count--) {
7403 *addr++ = fill;
7404 }
7405 }
7406
7407 extern void mapping_set_mod(ppnum_t pn);
7408
7409 void
7410 mapping_set_mod(
7411 ppnum_t pn)
7412 {
7413 pmap_set_modify(pn);
7414 }
7415
7416 extern void mapping_set_ref(ppnum_t pn);
7417
7418 void
7419 mapping_set_ref(
7420 ppnum_t pn)
7421 {
7422 pmap_set_reference(pn);
7423 }
7424
7425 /*
7426 * Clear specified attribute bits.
7427 *
7428 * Try to force an arm_fast_fault() for all mappings of
7429 * the page - to force attributes to be set again at fault time.
7430 * If the forcing succeeds, clear the cached bits at the head.
7431 * Otherwise, something must have been wired, so leave the cached
7432 * attributes alone.
7433 */
7434 MARK_AS_PMAP_TEXT static void
7435 phys_attribute_clear_internal(
7436 ppnum_t pn,
7437 unsigned int bits,
7438 int options,
7439 void *arg)
7440 {
7441 pmap_paddr_t pa = ptoa(pn);
7442 vm_prot_t allow_mode = VM_PROT_ALL;
7443
7444
7445 if ((bits & PP_ATTR_MODIFIED) &&
7446 (options & PMAP_OPTIONS_NOFLUSH) &&
7447 (arg == NULL)) {
7448 panic("phys_attribute_clear(0x%x,0x%x,0x%x,%p): "
7449 "should not clear 'modified' without flushing TLBs\n",
7450 pn, bits, options, arg);
7451 }
7452
7453 assert(pn != vm_page_fictitious_addr);
7454
7455 if (options & PMAP_OPTIONS_CLEAR_WRITE) {
7456 assert(bits == PP_ATTR_MODIFIED);
7457
7458 pmap_page_protect_options_internal(pn, (VM_PROT_ALL & ~VM_PROT_WRITE), 0);
7459 /*
7460 * We short circuit this case; it should not need to
7461 * invoke arm_force_fast_fault, so just clear the modified bit.
7462 * pmap_page_protect has taken care of resetting
7463 * the state so that we'll see the next write as a fault to
7464 * the VM (i.e. we don't want a fast fault).
7465 */
7466 pa_clear_bits(pa, bits);
7467 return;
7468 }
7469 if (bits & PP_ATTR_REFERENCED) {
7470 allow_mode &= ~(VM_PROT_READ | VM_PROT_EXECUTE);
7471 }
7472 if (bits & PP_ATTR_MODIFIED) {
7473 allow_mode &= ~VM_PROT_WRITE;
7474 }
7475
7476 if (bits == PP_ATTR_NOENCRYPT) {
7477 /*
7478 * We short circuit this case; it should not need to
7479 * invoke arm_force_fast_fault, so just clear and
7480 * return. On ARM, this bit is just a debugging aid.
7481 */
7482 pa_clear_bits(pa, bits);
7483 return;
7484 }
7485
7486 if (arm_force_fast_fault_internal(pn, allow_mode, options)) {
7487 pa_clear_bits(pa, bits);
7488 }
7489 return;
7490 }
7491
7492 static void
7493 phys_attribute_clear(
7494 ppnum_t pn,
7495 unsigned int bits,
7496 int options,
7497 void *arg)
7498 {
7499 /*
7500 * Do we really want this tracepoint? It will be extremely chatty.
7501 * Also, should we have a corresponding trace point for the set path?
7502 */
7503 PMAP_TRACE(3, PMAP_CODE(PMAP__ATTRIBUTE_CLEAR) | DBG_FUNC_START, pn, bits);
7504
7505 phys_attribute_clear_internal(pn, bits, options, arg);
7506
7507 PMAP_TRACE(3, PMAP_CODE(PMAP__ATTRIBUTE_CLEAR) | DBG_FUNC_END);
7508 }
7509
7510 /*
7511 * Set specified attribute bits.
7512 *
7513 * Set cached value in the pv head because we have
7514 * no per-mapping hardware support for referenced and
7515 * modify bits.
7516 */
7517 MARK_AS_PMAP_TEXT static void
7518 phys_attribute_set_internal(
7519 ppnum_t pn,
7520 unsigned int bits)
7521 {
7522 pmap_paddr_t pa = ptoa(pn);
7523 assert(pn != vm_page_fictitious_addr);
7524
7525
7526 pa_set_bits(pa, bits);
7527
7528 return;
7529 }
7530
7531 static void
7532 phys_attribute_set(
7533 ppnum_t pn,
7534 unsigned int bits)
7535 {
7536 phys_attribute_set_internal(pn, bits);
7537 }
7538
7539
7540 /*
7541 * Check specified attribute bits.
7542 *
7543 * use the software cached bits (since no hw support).
7544 */
7545 static boolean_t
7546 phys_attribute_test(
7547 ppnum_t pn,
7548 unsigned int bits)
7549 {
7550 pmap_paddr_t pa = ptoa(pn);
7551 assert(pn != vm_page_fictitious_addr);
7552 return pa_test_bits(pa, bits);
7553 }
7554
7555
7556 /*
7557 * Set the modify/reference bits on the specified physical page.
7558 */
7559 void
7560 pmap_set_modify(ppnum_t pn)
7561 {
7562 phys_attribute_set(pn, PP_ATTR_MODIFIED);
7563 }
7564
7565
7566 /*
7567 * Clear the modify bits on the specified physical page.
7568 */
7569 void
7570 pmap_clear_modify(
7571 ppnum_t pn)
7572 {
7573 phys_attribute_clear(pn, PP_ATTR_MODIFIED, 0, NULL);
7574 }
7575
7576
7577 /*
7578 * pmap_is_modified:
7579 *
7580 * Return whether or not the specified physical page is modified
7581 * by any physical maps.
7582 */
7583 boolean_t
7584 pmap_is_modified(
7585 ppnum_t pn)
7586 {
7587 return phys_attribute_test(pn, PP_ATTR_MODIFIED);
7588 }
7589
7590
7591 /*
7592 * Set the reference bit on the specified physical page.
7593 */
7594 static void
7595 pmap_set_reference(
7596 ppnum_t pn)
7597 {
7598 phys_attribute_set(pn, PP_ATTR_REFERENCED);
7599 }
7600
7601 /*
7602 * Clear the reference bits on the specified physical page.
7603 */
7604 void
7605 pmap_clear_reference(
7606 ppnum_t pn)
7607 {
7608 phys_attribute_clear(pn, PP_ATTR_REFERENCED, 0, NULL);
7609 }
7610
7611
7612 /*
7613 * pmap_is_referenced:
7614 *
7615 * Return whether or not the specified physical page is referenced
7616 * by any physical maps.
7617 */
7618 boolean_t
7619 pmap_is_referenced(
7620 ppnum_t pn)
7621 {
7622 return phys_attribute_test(pn, PP_ATTR_REFERENCED);
7623 }
7624
7625 /*
7626 * pmap_get_refmod(phys)
7627 * returns the referenced and modified bits of the specified
7628 * physical page.
7629 */
7630 unsigned int
7631 pmap_get_refmod(
7632 ppnum_t pn)
7633 {
7634 return ((phys_attribute_test(pn, PP_ATTR_MODIFIED)) ? VM_MEM_MODIFIED : 0)
7635 | ((phys_attribute_test(pn, PP_ATTR_REFERENCED)) ? VM_MEM_REFERENCED : 0);
7636 }
7637
7638 /*
7639 * pmap_clear_refmod(phys, mask)
7640 * clears the referenced and modified bits as specified by the mask
7641 * of the specified physical page.
7642 */
7643 void
7644 pmap_clear_refmod_options(
7645 ppnum_t pn,
7646 unsigned int mask,
7647 unsigned int options,
7648 void *arg)
7649 {
7650 unsigned int bits;
7651
7652 bits = ((mask & VM_MEM_MODIFIED) ? PP_ATTR_MODIFIED : 0) |
7653 ((mask & VM_MEM_REFERENCED) ? PP_ATTR_REFERENCED : 0);
7654 phys_attribute_clear(pn, bits, options, arg);
7655 }
7656
7657 void
7658 pmap_clear_refmod(
7659 ppnum_t pn,
7660 unsigned int mask)
7661 {
7662 pmap_clear_refmod_options(pn, mask, 0, NULL);
7663 }
7664
7665 unsigned int
7666 pmap_disconnect_options(
7667 ppnum_t pn,
7668 unsigned int options,
7669 void *arg)
7670 {
7671 if ((options & PMAP_OPTIONS_COMPRESSOR_IFF_MODIFIED)) {
7672 /*
7673 * On ARM, the "modified" bit is managed by software, so
7674 * we know up-front if the physical page is "modified",
7675 * without having to scan all the PTEs pointing to it.
7676 * The caller should have made the VM page "busy" so noone
7677 * should be able to establish any new mapping and "modify"
7678 * the page behind us.
7679 */
7680 if (pmap_is_modified(pn)) {
7681 /*
7682 * The page has been modified and will be sent to
7683 * the VM compressor.
7684 */
7685 options |= PMAP_OPTIONS_COMPRESSOR;
7686 } else {
7687 /*
7688 * The page hasn't been modified and will be freed
7689 * instead of compressed.
7690 */
7691 }
7692 }
7693
7694 /* disconnect the page */
7695 pmap_page_protect_options(pn, 0, options, arg);
7696
7697 /* return ref/chg status */
7698 return pmap_get_refmod(pn);
7699 }
7700
7701 /*
7702 * Routine:
7703 * pmap_disconnect
7704 *
7705 * Function:
7706 * Disconnect all mappings for this page and return reference and change status
7707 * in generic format.
7708 *
7709 */
7710 unsigned int
7711 pmap_disconnect(
7712 ppnum_t pn)
7713 {
7714 pmap_page_protect(pn, 0); /* disconnect the page */
7715 return pmap_get_refmod(pn); /* return ref/chg status */
7716 }
7717
7718 boolean_t
7719 pmap_has_managed_page(ppnum_t first, ppnum_t last)
7720 {
7721 if (ptoa(first) >= vm_last_phys) {
7722 return FALSE;
7723 }
7724 if (ptoa(last) < vm_first_phys) {
7725 return FALSE;
7726 }
7727
7728 return TRUE;
7729 }
7730
7731 /*
7732 * The state maintained by the noencrypt functions is used as a
7733 * debugging aid on ARM. This incurs some overhead on the part
7734 * of the caller. A special case check in phys_attribute_clear
7735 * (the most expensive path) currently minimizes this overhead,
7736 * but stubbing these functions out on RELEASE kernels yields
7737 * further wins.
7738 */
7739 boolean_t
7740 pmap_is_noencrypt(
7741 ppnum_t pn)
7742 {
7743 #if DEVELOPMENT || DEBUG
7744 boolean_t result = FALSE;
7745
7746 if (!pa_valid(ptoa(pn))) {
7747 return FALSE;
7748 }
7749
7750 result = (phys_attribute_test(pn, PP_ATTR_NOENCRYPT));
7751
7752 return result;
7753 #else
7754 #pragma unused(pn)
7755 return FALSE;
7756 #endif
7757 }
7758
7759 void
7760 pmap_set_noencrypt(
7761 ppnum_t pn)
7762 {
7763 #if DEVELOPMENT || DEBUG
7764 if (!pa_valid(ptoa(pn))) {
7765 return;
7766 }
7767
7768 phys_attribute_set(pn, PP_ATTR_NOENCRYPT);
7769 #else
7770 #pragma unused(pn)
7771 #endif
7772 }
7773
7774 void
7775 pmap_clear_noencrypt(
7776 ppnum_t pn)
7777 {
7778 #if DEVELOPMENT || DEBUG
7779 if (!pa_valid(ptoa(pn))) {
7780 return;
7781 }
7782
7783 phys_attribute_clear(pn, PP_ATTR_NOENCRYPT, 0, NULL);
7784 #else
7785 #pragma unused(pn)
7786 #endif
7787 }
7788
7789
7790 void
7791 pmap_lock_phys_page(ppnum_t pn)
7792 {
7793 int pai;
7794 pmap_paddr_t phys = ptoa(pn);
7795
7796 if (pa_valid(phys)) {
7797 pai = (int)pa_index(phys);
7798 LOCK_PVH(pai);
7799 } else
7800 { simple_lock(&phys_backup_lock, LCK_GRP_NULL);}
7801 }
7802
7803
7804 void
7805 pmap_unlock_phys_page(ppnum_t pn)
7806 {
7807 int pai;
7808 pmap_paddr_t phys = ptoa(pn);
7809
7810 if (pa_valid(phys)) {
7811 pai = (int)pa_index(phys);
7812 UNLOCK_PVH(pai);
7813 } else
7814 { simple_unlock(&phys_backup_lock);}
7815 }
7816
7817 MARK_AS_PMAP_TEXT static void
7818 pmap_switch_user_ttb_internal(
7819 pmap_t pmap)
7820 {
7821 VALIDATE_PMAP(pmap);
7822 pmap_cpu_data_t *cpu_data_ptr;
7823 cpu_data_ptr = pmap_get_cpu_data();
7824
7825 #if (__ARM_VMSA__ == 7)
7826
7827 if ((cpu_data_ptr->cpu_user_pmap != PMAP_NULL)
7828 && (cpu_data_ptr->cpu_user_pmap != kernel_pmap)) {
7829 unsigned int c;
7830 tt_entry_t *tt_entry = cpu_data_ptr->cpu_user_pmap->prev_tte;
7831
7832 c = os_atomic_dec(&cpu_data_ptr->cpu_user_pmap->cpu_ref, acq_rel);
7833 if ((c == 0) && (tt_entry != NULL)) {
7834 /* We saved off the old 1-page tt1 in pmap_expand() in case other cores were still using it.
7835 * Now that the user pmap's cpu_ref is 0, we should be able to safely free it.*/
7836
7837 cpu_data_ptr->cpu_user_pmap->prev_tte = NULL;
7838 #if !__ARM_USER_PROTECT__
7839 set_mmu_ttb(kernel_pmap->ttep);
7840 set_context_id(kernel_pmap->hw_asid);
7841 #endif
7842 /* Now that we can guarantee the old 1-page L1 table is no longer active on any CPU,
7843 * flush any cached intermediate translations that may point to it. Note that to be truly
7844 * safe from prefetch-related issues, this table PA must have been cleared from TTBR0 prior
7845 * to this call. __ARM_USER_PROTECT__ effectively guarantees that for all current configurations.*/
7846 flush_mmu_tlb_asid(cpu_data_ptr->cpu_user_pmap->hw_asid);
7847 pmap_tt1_deallocate(cpu_data_ptr->cpu_user_pmap, tt_entry, ARM_PGBYTES, PMAP_TT_DEALLOCATE_NOBLOCK);
7848 }
7849 }
7850 cpu_data_ptr->cpu_user_pmap = pmap;
7851 cpu_data_ptr->cpu_user_pmap_stamp = pmap->stamp;
7852 os_atomic_inc(&pmap->cpu_ref, acq_rel);
7853
7854 #if MACH_ASSERT && __ARM_USER_PROTECT__
7855 {
7856 unsigned int ttbr0_val, ttbr1_val;
7857 __asm__ volatile ("mrc p15,0,%0,c2,c0,0\n" : "=r"(ttbr0_val));
7858 __asm__ volatile ("mrc p15,0,%0,c2,c0,1\n" : "=r"(ttbr1_val));
7859 if (ttbr0_val != ttbr1_val) {
7860 panic("Misaligned ttbr0 %08X\n", ttbr0_val);
7861 }
7862 }
7863 #endif
7864 if (pmap->tte_index_max == NTTES) {
7865 /* Setting TTBCR.N for TTBR0 TTBR1 boundary at 0x40000000 */
7866 __asm__ volatile ("mcr p15,0,%0,c2,c0,2" : : "r"(2));
7867 __builtin_arm_isb(ISB_SY);
7868 #if !__ARM_USER_PROTECT__
7869 set_mmu_ttb(pmap->ttep);
7870 #endif
7871 } else {
7872 #if !__ARM_USER_PROTECT__
7873 set_mmu_ttb(pmap->ttep);
7874 #endif
7875 /* Setting TTBCR.N for TTBR0 TTBR1 boundary at 0x80000000 */
7876 __asm__ volatile ("mcr p15,0,%0,c2,c0,2" : : "r"(1));
7877 __builtin_arm_isb(ISB_SY);
7878 #if MACH_ASSERT && __ARM_USER_PROTECT__
7879 if (pmap->ttep & 0x1000) {
7880 panic("Misaligned ttbr0 %08X\n", pmap->ttep);
7881 }
7882 #endif
7883 }
7884
7885 #if !__ARM_USER_PROTECT__
7886 set_context_id(pmap->hw_asid);
7887 #endif
7888
7889 #else /* (__ARM_VMSA__ == 7) */
7890
7891 if (pmap != kernel_pmap) {
7892 cpu_data_ptr->cpu_nested_pmap = pmap->nested_pmap;
7893 }
7894
7895 if (pmap == kernel_pmap) {
7896 pmap_clear_user_ttb_internal();
7897 } else {
7898 set_mmu_ttb((pmap->ttep & TTBR_BADDR_MASK) | (((uint64_t)pmap->hw_asid) << TTBR_ASID_SHIFT));
7899 }
7900
7901 #if defined(HAS_APPLE_PAC) && (__APCFG_SUPPORTED__ || __APSTS_SUPPORTED__)
7902 if (!(BootArgs->bootFlags & kBootFlagsDisableJOP) && !(BootArgs->bootFlags & kBootFlagsDisableUserJOP)) {
7903 uint64_t sctlr = __builtin_arm_rsr64("SCTLR_EL1");
7904 bool jop_enabled = sctlr & SCTLR_JOP_KEYS_ENABLED;
7905 if (!jop_enabled && !pmap->disable_jop) {
7906 // turn on JOP
7907 sctlr |= SCTLR_JOP_KEYS_ENABLED;
7908 __builtin_arm_wsr64("SCTLR_EL1", sctlr);
7909 // no ISB necessary because this won't take effect until eret returns to EL0
7910 } else if (jop_enabled && pmap->disable_jop) {
7911 // turn off JOP
7912 sctlr &= ~SCTLR_JOP_KEYS_ENABLED;
7913 __builtin_arm_wsr64("SCTLR_EL1", sctlr);
7914 }
7915 }
7916 #endif /* HAS_APPLE_PAC && (__APCFG_SUPPORTED__ || __APSTS_SUPPORTED__) */
7917 #endif /* (__ARM_VMSA__ == 7) */
7918 }
7919
7920 void
7921 pmap_switch_user_ttb(
7922 pmap_t pmap)
7923 {
7924 PMAP_TRACE(1, PMAP_CODE(PMAP__SWITCH_USER_TTB) | DBG_FUNC_START, VM_KERNEL_ADDRHIDE(pmap), PMAP_VASID(pmap), pmap->hw_asid);
7925 pmap_switch_user_ttb_internal(pmap);
7926 PMAP_TRACE(1, PMAP_CODE(PMAP__SWITCH_USER_TTB) | DBG_FUNC_END);
7927 }
7928
7929 MARK_AS_PMAP_TEXT static void
7930 pmap_clear_user_ttb_internal(void)
7931 {
7932 #if (__ARM_VMSA__ > 7)
7933 set_mmu_ttb(invalid_ttep & TTBR_BADDR_MASK);
7934 #else
7935 set_mmu_ttb(kernel_pmap->ttep);
7936 #endif
7937 }
7938
7939 void
7940 pmap_clear_user_ttb(void)
7941 {
7942 pmap_clear_user_ttb_internal();
7943 }
7944
7945 /*
7946 * Routine: arm_force_fast_fault
7947 *
7948 * Function:
7949 * Force all mappings for this page to fault according
7950 * to the access modes allowed, so we can gather ref/modify
7951 * bits again.
7952 */
7953 MARK_AS_PMAP_TEXT static boolean_t
7954 arm_force_fast_fault_internal(
7955 ppnum_t ppnum,
7956 vm_prot_t allow_mode,
7957 int options)
7958 {
7959 pmap_paddr_t phys = ptoa(ppnum);
7960 pv_entry_t *pve_p;
7961 pt_entry_t *pte_p;
7962 int pai;
7963 boolean_t result;
7964 pv_entry_t **pv_h;
7965 boolean_t is_reusable, is_internal;
7966 boolean_t tlb_flush_needed = FALSE;
7967 boolean_t ref_fault;
7968 boolean_t mod_fault;
7969
7970 assert(ppnum != vm_page_fictitious_addr);
7971
7972 if (!pa_valid(phys)) {
7973 return FALSE; /* Not a managed page. */
7974 }
7975
7976 result = TRUE;
7977 ref_fault = FALSE;
7978 mod_fault = FALSE;
7979 pai = (int)pa_index(phys);
7980 LOCK_PVH(pai);
7981 pv_h = pai_to_pvh(pai);
7982
7983 pte_p = PT_ENTRY_NULL;
7984 pve_p = PV_ENTRY_NULL;
7985 if (pvh_test_type(pv_h, PVH_TYPE_PTEP)) {
7986 pte_p = pvh_ptep(pv_h);
7987 } else if (pvh_test_type(pv_h, PVH_TYPE_PVEP)) {
7988 pve_p = pvh_list(pv_h);
7989 }
7990
7991 is_reusable = IS_REUSABLE_PAGE(pai);
7992 is_internal = IS_INTERNAL_PAGE(pai);
7993
7994 while ((pve_p != PV_ENTRY_NULL) || (pte_p != PT_ENTRY_NULL)) {
7995 vm_map_address_t va;
7996 pt_entry_t spte;
7997 pt_entry_t tmplate;
7998 pmap_t pmap;
7999 boolean_t update_pte;
8000
8001 if (pve_p != PV_ENTRY_NULL) {
8002 pte_p = pve_get_ptep(pve_p);
8003 }
8004
8005 if (pte_p == PT_ENTRY_NULL) {
8006 panic("pte_p is NULL: pve_p=%p ppnum=0x%x\n", pve_p, ppnum);
8007 }
8008 #ifdef PVH_FLAG_IOMMU
8009 if ((vm_offset_t)pte_p & PVH_FLAG_IOMMU) {
8010 goto fff_skip_pve;
8011 }
8012 #endif
8013 if (*pte_p == ARM_PTE_EMPTY) {
8014 panic("pte is NULL: pte_p=%p ppnum=0x%x\n", pte_p, ppnum);
8015 }
8016 if (ARM_PTE_IS_COMPRESSED(*pte_p, pte_p)) {
8017 panic("pte is COMPRESSED: pte_p=%p ppnum=0x%x\n", pte_p, ppnum);
8018 }
8019
8020 pmap = ptep_get_pmap(pte_p);
8021 va = ptep_get_va(pte_p);
8022
8023 assert(va >= pmap->min && va < pmap->max);
8024
8025 if (pte_is_wired(*pte_p) || pmap == kernel_pmap) {
8026 result = FALSE;
8027 break;
8028 }
8029
8030 spte = *pte_p;
8031 tmplate = spte;
8032 update_pte = FALSE;
8033
8034 if ((allow_mode & VM_PROT_READ) != VM_PROT_READ) {
8035 /* read protection sets the pte to fault */
8036 tmplate = tmplate & ~ARM_PTE_AF;
8037 update_pte = TRUE;
8038 ref_fault = TRUE;
8039 }
8040 if ((allow_mode & VM_PROT_WRITE) != VM_PROT_WRITE) {
8041 /* take away write permission if set */
8042 if (pmap == kernel_pmap) {
8043 if ((tmplate & ARM_PTE_APMASK) == ARM_PTE_AP(AP_RWNA)) {
8044 tmplate = ((tmplate & ~ARM_PTE_APMASK) | ARM_PTE_AP(AP_RONA));
8045 pte_set_was_writeable(tmplate, true);
8046 update_pte = TRUE;
8047 mod_fault = TRUE;
8048 }
8049 } else {
8050 if ((tmplate & ARM_PTE_APMASK) == ARM_PTE_AP(AP_RWRW)) {
8051 tmplate = ((tmplate & ~ARM_PTE_APMASK) | pt_attr_leaf_ro(pmap_get_pt_attr(pmap)));
8052 pte_set_was_writeable(tmplate, true);
8053 update_pte = TRUE;
8054 mod_fault = TRUE;
8055 }
8056 }
8057 }
8058
8059
8060 if (update_pte) {
8061 if (*pte_p != ARM_PTE_TYPE_FAULT &&
8062 !ARM_PTE_IS_COMPRESSED(*pte_p, pte_p)) {
8063 WRITE_PTE_STRONG(pte_p, tmplate);
8064 pmap_get_pt_ops(pmap)->flush_tlb_region_async(va, PAGE_SIZE, pmap);
8065 tlb_flush_needed = TRUE;
8066 } else {
8067 WRITE_PTE(pte_p, tmplate);
8068 __builtin_arm_isb(ISB_SY);
8069 }
8070 }
8071
8072 /* update pmap stats and ledgers */
8073 if (IS_ALTACCT_PAGE(pai, pve_p)) {
8074 /*
8075 * We do not track "reusable" status for
8076 * "alternate accounting" mappings.
8077 */
8078 } else if ((options & PMAP_OPTIONS_CLEAR_REUSABLE) &&
8079 is_reusable &&
8080 is_internal &&
8081 pmap != kernel_pmap) {
8082 /* one less "reusable" */
8083 PMAP_STATS_ASSERTF(pmap->stats.reusable > 0, pmap, "stats.reusable %d", pmap->stats.reusable);
8084 OSAddAtomic(-1, &pmap->stats.reusable);
8085 /* one more "internal" */
8086 OSAddAtomic(+1, &pmap->stats.internal);
8087 PMAP_STATS_PEAK(pmap->stats.internal);
8088 PMAP_STATS_ASSERTF(pmap->stats.internal > 0, pmap, "stats.internal %d", pmap->stats.internal);
8089 pmap_ledger_credit(pmap, task_ledgers.internal, machine_ptob(1));
8090 assert(!IS_ALTACCT_PAGE(pai, pve_p));
8091 assert(IS_INTERNAL_PAGE(pai));
8092 pmap_ledger_credit(pmap, task_ledgers.phys_footprint, machine_ptob(1));
8093
8094 /*
8095 * Avoid the cost of another trap to handle the fast
8096 * fault when we next write to this page: let's just
8097 * handle that now since we already have all the
8098 * necessary information.
8099 */
8100 {
8101 arm_clear_fast_fault(ppnum, VM_PROT_WRITE);
8102 }
8103 } else if ((options & PMAP_OPTIONS_SET_REUSABLE) &&
8104 !is_reusable &&
8105 is_internal &&
8106 pmap != kernel_pmap) {
8107 /* one more "reusable" */
8108 OSAddAtomic(+1, &pmap->stats.reusable);
8109 PMAP_STATS_PEAK(pmap->stats.reusable);
8110 PMAP_STATS_ASSERTF(pmap->stats.reusable > 0, pmap, "stats.reusable %d", pmap->stats.reusable);
8111 /* one less "internal" */
8112 PMAP_STATS_ASSERTF(pmap->stats.internal > 0, pmap, "stats.internal %d", pmap->stats.internal);
8113 OSAddAtomic(-1, &pmap->stats.internal);
8114 pmap_ledger_debit(pmap, task_ledgers.internal, machine_ptob(1));
8115 assert(!IS_ALTACCT_PAGE(pai, pve_p));
8116 assert(IS_INTERNAL_PAGE(pai));
8117 pmap_ledger_debit(pmap, task_ledgers.phys_footprint, machine_ptob(1));
8118 }
8119
8120 #ifdef PVH_FLAG_IOMMU
8121 fff_skip_pve:
8122 #endif
8123 pte_p = PT_ENTRY_NULL;
8124 if (pve_p != PV_ENTRY_NULL) {
8125 pve_p = PVE_NEXT_PTR(pve_next(pve_p));
8126 }
8127 }
8128
8129 if (tlb_flush_needed) {
8130 sync_tlb_flush();
8131 }
8132
8133 /* update global "reusable" status for this page */
8134 if (is_internal) {
8135 if ((options & PMAP_OPTIONS_CLEAR_REUSABLE) &&
8136 is_reusable) {
8137 CLR_REUSABLE_PAGE(pai);
8138 } else if ((options & PMAP_OPTIONS_SET_REUSABLE) &&
8139 !is_reusable) {
8140 SET_REUSABLE_PAGE(pai);
8141 }
8142 }
8143
8144 if (mod_fault) {
8145 SET_MODFAULT_PAGE(pai);
8146 }
8147 if (ref_fault) {
8148 SET_REFFAULT_PAGE(pai);
8149 }
8150
8151 UNLOCK_PVH(pai);
8152 return result;
8153 }
8154
8155 boolean_t
8156 arm_force_fast_fault(
8157 ppnum_t ppnum,
8158 vm_prot_t allow_mode,
8159 int options,
8160 __unused void *arg)
8161 {
8162 pmap_paddr_t phys = ptoa(ppnum);
8163
8164 assert(ppnum != vm_page_fictitious_addr);
8165
8166 if (!pa_valid(phys)) {
8167 return FALSE; /* Not a managed page. */
8168 }
8169
8170 return arm_force_fast_fault_internal(ppnum, allow_mode, options);
8171 }
8172
8173 /*
8174 * Routine: arm_clear_fast_fault
8175 *
8176 * Function:
8177 * Clear pending force fault for all mappings for this page based on
8178 * the observed fault type, update ref/modify bits.
8179 */
8180 boolean_t
8181 arm_clear_fast_fault(
8182 ppnum_t ppnum,
8183 vm_prot_t fault_type)
8184 {
8185 pmap_paddr_t pa = ptoa(ppnum);
8186 pv_entry_t *pve_p;
8187 pt_entry_t *pte_p;
8188 int pai;
8189 boolean_t result;
8190 boolean_t tlb_flush_needed = FALSE;
8191 pv_entry_t **pv_h;
8192
8193 assert(ppnum != vm_page_fictitious_addr);
8194
8195 if (!pa_valid(pa)) {
8196 return FALSE; /* Not a managed page. */
8197 }
8198
8199 result = FALSE;
8200 pai = (int)pa_index(pa);
8201 ASSERT_PVH_LOCKED(pai);
8202 pv_h = pai_to_pvh(pai);
8203
8204 pte_p = PT_ENTRY_NULL;
8205 pve_p = PV_ENTRY_NULL;
8206 if (pvh_test_type(pv_h, PVH_TYPE_PTEP)) {
8207 pte_p = pvh_ptep(pv_h);
8208 } else if (pvh_test_type(pv_h, PVH_TYPE_PVEP)) {
8209 pve_p = pvh_list(pv_h);
8210 }
8211
8212 while ((pve_p != PV_ENTRY_NULL) || (pte_p != PT_ENTRY_NULL)) {
8213 vm_map_address_t va;
8214 pt_entry_t spte;
8215 pt_entry_t tmplate;
8216 pmap_t pmap;
8217
8218 if (pve_p != PV_ENTRY_NULL) {
8219 pte_p = pve_get_ptep(pve_p);
8220 }
8221
8222 if (pte_p == PT_ENTRY_NULL) {
8223 panic("pte_p is NULL: pve_p=%p ppnum=0x%x\n", pve_p, ppnum);
8224 }
8225 #ifdef PVH_FLAG_IOMMU
8226 if ((vm_offset_t)pte_p & PVH_FLAG_IOMMU) {
8227 goto cff_skip_pve;
8228 }
8229 #endif
8230 if (*pte_p == ARM_PTE_EMPTY) {
8231 panic("pte is NULL: pte_p=%p ppnum=0x%x\n", pte_p, ppnum);
8232 }
8233
8234 pmap = ptep_get_pmap(pte_p);
8235 va = ptep_get_va(pte_p);
8236
8237 assert(va >= pmap->min && va < pmap->max);
8238
8239 spte = *pte_p;
8240 tmplate = spte;
8241
8242 if ((fault_type & VM_PROT_WRITE) && (pte_was_writeable(spte))) {
8243 {
8244 if (pmap == kernel_pmap) {
8245 tmplate = ((spte & ~ARM_PTE_APMASK) | ARM_PTE_AP(AP_RWNA));
8246 } else {
8247 tmplate = ((spte & ~ARM_PTE_APMASK) | pt_attr_leaf_rw(pmap_get_pt_attr(pmap)));
8248 }
8249 }
8250
8251 tmplate |= ARM_PTE_AF;
8252
8253 pte_set_was_writeable(tmplate, false);
8254 pa_set_bits(pa, PP_ATTR_REFERENCED | PP_ATTR_MODIFIED);
8255 } else if ((fault_type & VM_PROT_READ) && ((spte & ARM_PTE_AF) != ARM_PTE_AF)) {
8256 tmplate = spte | ARM_PTE_AF;
8257
8258 {
8259 pa_set_bits(pa, PP_ATTR_REFERENCED);
8260 }
8261 }
8262
8263
8264 if (spte != tmplate) {
8265 if (spte != ARM_PTE_TYPE_FAULT) {
8266 WRITE_PTE_STRONG(pte_p, tmplate);
8267 pmap_get_pt_ops(pmap)->flush_tlb_region_async(va, PAGE_SIZE, pmap);
8268 tlb_flush_needed = TRUE;
8269 } else {
8270 WRITE_PTE(pte_p, tmplate);
8271 __builtin_arm_isb(ISB_SY);
8272 }
8273 result = TRUE;
8274 }
8275
8276 #ifdef PVH_FLAG_IOMMU
8277 cff_skip_pve:
8278 #endif
8279 pte_p = PT_ENTRY_NULL;
8280 if (pve_p != PV_ENTRY_NULL) {
8281 pve_p = PVE_NEXT_PTR(pve_next(pve_p));
8282 }
8283 }
8284 if (tlb_flush_needed) {
8285 sync_tlb_flush();
8286 }
8287 return result;
8288 }
8289
8290 /*
8291 * Determine if the fault was induced by software tracking of
8292 * modify/reference bits. If so, re-enable the mapping (and set
8293 * the appropriate bits).
8294 *
8295 * Returns KERN_SUCCESS if the fault was induced and was
8296 * successfully handled.
8297 *
8298 * Returns KERN_FAILURE if the fault was not induced and
8299 * the function was unable to deal with it.
8300 *
8301 * Returns KERN_PROTECTION_FAILURE if the pmap layer explictly
8302 * disallows this type of access.
8303 */
8304 MARK_AS_PMAP_TEXT static kern_return_t
8305 arm_fast_fault_internal(
8306 pmap_t pmap,
8307 vm_map_address_t va,
8308 vm_prot_t fault_type,
8309 __unused bool was_af_fault,
8310 __unused bool from_user)
8311 {
8312 kern_return_t result = KERN_FAILURE;
8313 pt_entry_t *ptep;
8314 pt_entry_t spte = ARM_PTE_TYPE_FAULT;
8315 int pai;
8316 pmap_paddr_t pa;
8317 VALIDATE_PMAP(pmap);
8318
8319 PMAP_LOCK(pmap);
8320
8321 /*
8322 * If the entry doesn't exist, is completely invalid, or is already
8323 * valid, we can't fix it here.
8324 */
8325
8326 ptep = pmap_pte(pmap, va);
8327 if (ptep != PT_ENTRY_NULL) {
8328 while (true) {
8329 spte = *ptep;
8330
8331 pa = pte_to_pa(spte);
8332
8333 if ((spte == ARM_PTE_TYPE_FAULT) ||
8334 ARM_PTE_IS_COMPRESSED(spte, ptep)) {
8335 PMAP_UNLOCK(pmap);
8336 return result;
8337 }
8338
8339 if (!pa_valid(pa)) {
8340 PMAP_UNLOCK(pmap);
8341 return result;
8342 }
8343 pai = (int)pa_index(pa);
8344 LOCK_PVH(pai);
8345 break;
8346 }
8347 } else {
8348 PMAP_UNLOCK(pmap);
8349 return result;
8350 }
8351
8352
8353 if ((IS_REFFAULT_PAGE(pai)) ||
8354 ((fault_type & VM_PROT_WRITE) && IS_MODFAULT_PAGE(pai))) {
8355 /*
8356 * An attempted access will always clear ref/mod fault state, as
8357 * appropriate for the fault type. arm_clear_fast_fault will
8358 * update the associated PTEs for the page as appropriate; if
8359 * any PTEs are updated, we redrive the access. If the mapping
8360 * does not actually allow for the attempted access, the
8361 * following fault will (hopefully) fail to update any PTEs, and
8362 * thus cause arm_fast_fault to decide that it failed to handle
8363 * the fault.
8364 */
8365 if (IS_REFFAULT_PAGE(pai)) {
8366 CLR_REFFAULT_PAGE(pai);
8367 }
8368 if ((fault_type & VM_PROT_WRITE) && IS_MODFAULT_PAGE(pai)) {
8369 CLR_MODFAULT_PAGE(pai);
8370 }
8371
8372 if (arm_clear_fast_fault((ppnum_t)atop(pa), fault_type)) {
8373 /*
8374 * Should this preserve KERN_PROTECTION_FAILURE? The
8375 * cost of not doing so is a another fault in a case
8376 * that should already result in an exception.
8377 */
8378 result = KERN_SUCCESS;
8379 }
8380 }
8381
8382 UNLOCK_PVH(pai);
8383 PMAP_UNLOCK(pmap);
8384 return result;
8385 }
8386
8387 kern_return_t
8388 arm_fast_fault(
8389 pmap_t pmap,
8390 vm_map_address_t va,
8391 vm_prot_t fault_type,
8392 bool was_af_fault,
8393 __unused bool from_user)
8394 {
8395 kern_return_t result = KERN_FAILURE;
8396
8397 if (va < pmap->min || va >= pmap->max) {
8398 return result;
8399 }
8400
8401 PMAP_TRACE(3, PMAP_CODE(PMAP__FAST_FAULT) | DBG_FUNC_START,
8402 VM_KERNEL_ADDRHIDE(pmap), VM_KERNEL_ADDRHIDE(va), fault_type,
8403 from_user);
8404
8405 #if (__ARM_VMSA__ == 7)
8406 if (pmap != kernel_pmap) {
8407 pmap_cpu_data_t *cpu_data_ptr = pmap_get_cpu_data();
8408 pmap_t cur_pmap;
8409 pmap_t cur_user_pmap;
8410
8411 cur_pmap = current_pmap();
8412 cur_user_pmap = cpu_data_ptr->cpu_user_pmap;
8413
8414 if ((cur_user_pmap == cur_pmap) && (cur_pmap == pmap)) {
8415 if (cpu_data_ptr->cpu_user_pmap_stamp != pmap->stamp) {
8416 pmap_set_pmap(pmap, current_thread());
8417 result = KERN_SUCCESS;
8418 goto done;
8419 }
8420 }
8421 }
8422 #endif
8423
8424 result = arm_fast_fault_internal(pmap, va, fault_type, was_af_fault, from_user);
8425
8426 #if (__ARM_VMSA__ == 7)
8427 done:
8428 #endif
8429
8430 PMAP_TRACE(3, PMAP_CODE(PMAP__FAST_FAULT) | DBG_FUNC_END, result);
8431
8432 return result;
8433 }
8434
8435 void
8436 pmap_copy_page(
8437 ppnum_t psrc,
8438 ppnum_t pdst)
8439 {
8440 bcopy_phys((addr64_t) (ptoa(psrc)),
8441 (addr64_t) (ptoa(pdst)),
8442 PAGE_SIZE);
8443 }
8444
8445
8446 /*
8447 * pmap_copy_page copies the specified (machine independent) pages.
8448 */
8449 void
8450 pmap_copy_part_page(
8451 ppnum_t psrc,
8452 vm_offset_t src_offset,
8453 ppnum_t pdst,
8454 vm_offset_t dst_offset,
8455 vm_size_t len)
8456 {
8457 bcopy_phys((addr64_t) (ptoa(psrc) + src_offset),
8458 (addr64_t) (ptoa(pdst) + dst_offset),
8459 len);
8460 }
8461
8462
8463 /*
8464 * pmap_zero_page zeros the specified (machine independent) page.
8465 */
8466 void
8467 pmap_zero_page(
8468 ppnum_t pn)
8469 {
8470 assert(pn != vm_page_fictitious_addr);
8471 bzero_phys((addr64_t) ptoa(pn), PAGE_SIZE);
8472 }
8473
8474 /*
8475 * pmap_zero_part_page
8476 * zeros the specified (machine independent) part of a page.
8477 */
8478 void
8479 pmap_zero_part_page(
8480 ppnum_t pn,
8481 vm_offset_t offset,
8482 vm_size_t len)
8483 {
8484 assert(pn != vm_page_fictitious_addr);
8485 assert(offset + len <= PAGE_SIZE);
8486 bzero_phys((addr64_t) (ptoa(pn) + offset), len);
8487 }
8488
8489
8490 /*
8491 * nop in current arm implementation
8492 */
8493 void
8494 inval_copy_windows(
8495 __unused thread_t t)
8496 {
8497 }
8498
8499 void
8500 pmap_map_globals(
8501 void)
8502 {
8503 pt_entry_t *ptep, pte;
8504
8505 ptep = pmap_pte(kernel_pmap, LOWGLOBAL_ALIAS);
8506 assert(ptep != PT_ENTRY_NULL);
8507 assert(*ptep == ARM_PTE_EMPTY);
8508
8509 pte = pa_to_pte(ml_static_vtop((vm_offset_t)&lowGlo)) | AP_RONA | ARM_PTE_NX | ARM_PTE_PNX | ARM_PTE_AF | ARM_PTE_TYPE;
8510 #if __ARM_KERNEL_PROTECT__
8511 pte |= ARM_PTE_NG;
8512 #endif /* __ARM_KERNEL_PROTECT__ */
8513 pte |= ARM_PTE_ATTRINDX(CACHE_ATTRINDX_WRITEBACK);
8514 #if (__ARM_VMSA__ > 7)
8515 pte |= ARM_PTE_SH(SH_OUTER_MEMORY);
8516 #else
8517 pte |= ARM_PTE_SH;
8518 #endif
8519 *ptep = pte;
8520 FLUSH_PTE_RANGE(ptep, (ptep + 1));
8521 PMAP_UPDATE_TLBS(kernel_pmap, LOWGLOBAL_ALIAS, LOWGLOBAL_ALIAS + PAGE_SIZE, false);
8522 }
8523
8524 vm_offset_t
8525 pmap_cpu_windows_copy_addr(int cpu_num, unsigned int index)
8526 {
8527 if (__improbable(index >= CPUWINDOWS_MAX)) {
8528 panic("%s: invalid index %u", __func__, index);
8529 }
8530 return (vm_offset_t)(CPUWINDOWS_BASE + (PAGE_SIZE * ((CPUWINDOWS_MAX * cpu_num) + index)));
8531 }
8532
8533 MARK_AS_PMAP_TEXT static unsigned int
8534 pmap_map_cpu_windows_copy_internal(
8535 ppnum_t pn,
8536 vm_prot_t prot,
8537 unsigned int wimg_bits)
8538 {
8539 pt_entry_t *ptep = NULL, pte;
8540 pmap_cpu_data_t *pmap_cpu_data = pmap_get_cpu_data();
8541 unsigned int cpu_num;
8542 unsigned int i;
8543 vm_offset_t cpu_copywindow_vaddr = 0;
8544 bool need_strong_sync = false;
8545
8546
8547 cpu_num = pmap_cpu_data->cpu_number;
8548
8549 for (i = 0; i < CPUWINDOWS_MAX; i++) {
8550 cpu_copywindow_vaddr = pmap_cpu_windows_copy_addr(cpu_num, i);
8551 ptep = pmap_pte(kernel_pmap, cpu_copywindow_vaddr);
8552 assert(!ARM_PTE_IS_COMPRESSED(*ptep, ptep));
8553 if (*ptep == ARM_PTE_TYPE_FAULT) {
8554 break;
8555 }
8556 }
8557 if (i == CPUWINDOWS_MAX) {
8558 panic("pmap_map_cpu_windows_copy: out of window\n");
8559 }
8560
8561 pte = pa_to_pte(ptoa(pn)) | ARM_PTE_TYPE | ARM_PTE_AF | ARM_PTE_NX | ARM_PTE_PNX;
8562 #if __ARM_KERNEL_PROTECT__
8563 pte |= ARM_PTE_NG;
8564 #endif /* __ARM_KERNEL_PROTECT__ */
8565
8566 pte |= wimg_to_pte(wimg_bits);
8567
8568 if (prot & VM_PROT_WRITE) {
8569 pte |= ARM_PTE_AP(AP_RWNA);
8570 } else {
8571 pte |= ARM_PTE_AP(AP_RONA);
8572 }
8573
8574 WRITE_PTE_FAST(ptep, pte);
8575 /*
8576 * Invalidate tlb. Cover nested cpu_copywindow_vaddr usage with the interrupted context
8577 * in pmap_unmap_cpu_windows_copy() after clearing the pte and before tlb invalidate.
8578 */
8579 FLUSH_PTE_STRONG(ptep);
8580 PMAP_UPDATE_TLBS(kernel_pmap, cpu_copywindow_vaddr, cpu_copywindow_vaddr + PAGE_SIZE, pmap_cpu_data->copywindow_strong_sync[i]);
8581 pmap_cpu_data->copywindow_strong_sync[i] = need_strong_sync;
8582
8583 return i;
8584 }
8585
8586 unsigned int
8587 pmap_map_cpu_windows_copy(
8588 ppnum_t pn,
8589 vm_prot_t prot,
8590 unsigned int wimg_bits)
8591 {
8592 return pmap_map_cpu_windows_copy_internal(pn, prot, wimg_bits);
8593 }
8594
8595 MARK_AS_PMAP_TEXT static void
8596 pmap_unmap_cpu_windows_copy_internal(
8597 unsigned int index)
8598 {
8599 pt_entry_t *ptep;
8600 unsigned int cpu_num;
8601 vm_offset_t cpu_copywindow_vaddr = 0;
8602 pmap_cpu_data_t *pmap_cpu_data = pmap_get_cpu_data();
8603
8604 cpu_num = pmap_cpu_data->cpu_number;
8605
8606 cpu_copywindow_vaddr = pmap_cpu_windows_copy_addr(cpu_num, index);
8607 /* Issue full-system DSB to ensure prior operations on the per-CPU window
8608 * (which are likely to have been on I/O memory) are complete before
8609 * tearing down the mapping. */
8610 __builtin_arm_dsb(DSB_SY);
8611 ptep = pmap_pte(kernel_pmap, cpu_copywindow_vaddr);
8612 WRITE_PTE_STRONG(ptep, ARM_PTE_TYPE_FAULT);
8613 PMAP_UPDATE_TLBS(kernel_pmap, cpu_copywindow_vaddr, cpu_copywindow_vaddr + PAGE_SIZE, pmap_cpu_data->copywindow_strong_sync[index]);
8614 }
8615
8616 void
8617 pmap_unmap_cpu_windows_copy(
8618 unsigned int index)
8619 {
8620 return pmap_unmap_cpu_windows_copy_internal(index);
8621 }
8622
8623 /*
8624 * Indicate that a pmap is intended to be used as a nested pmap
8625 * within one or more larger address spaces. This must be set
8626 * before pmap_nest() is called with this pmap as the 'subordinate'.
8627 */
8628 MARK_AS_PMAP_TEXT static void
8629 pmap_set_nested_internal(
8630 pmap_t pmap)
8631 {
8632 VALIDATE_PMAP(pmap);
8633 pmap->nested = TRUE;
8634 }
8635
8636 void
8637 pmap_set_nested(
8638 pmap_t pmap)
8639 {
8640 pmap_set_nested_internal(pmap);
8641 }
8642
8643 /*
8644 * pmap_trim_range(pmap, start, end)
8645 *
8646 * pmap = pmap to operate on
8647 * start = start of the range
8648 * end = end of the range
8649 *
8650 * Attempts to deallocate TTEs for the given range in the nested range.
8651 */
8652 MARK_AS_PMAP_TEXT static void
8653 pmap_trim_range(
8654 pmap_t pmap,
8655 addr64_t start,
8656 addr64_t end)
8657 {
8658 addr64_t cur;
8659 addr64_t nested_region_start;
8660 addr64_t nested_region_end;
8661 addr64_t adjusted_start;
8662 addr64_t adjusted_end;
8663 addr64_t adjust_offmask;
8664 tt_entry_t * tte_p;
8665 pt_entry_t * pte_p;
8666 __unused const pt_attr_t * const pt_attr = pmap_get_pt_attr(pmap);
8667
8668 if (__improbable(end < start)) {
8669 panic("%s: invalid address range, "
8670 "pmap=%p, start=%p, end=%p",
8671 __func__,
8672 pmap, (void*)start, (void*)end);
8673 }
8674
8675 nested_region_start = pmap->nested ? pmap->nested_region_subord_addr : pmap->nested_region_subord_addr;
8676 nested_region_end = nested_region_start + pmap->nested_region_size;
8677
8678 if (__improbable((start < nested_region_start) || (end > nested_region_end))) {
8679 panic("%s: range outside nested region %p-%p, "
8680 "pmap=%p, start=%p, end=%p",
8681 __func__, (void *)nested_region_start, (void *)nested_region_end,
8682 pmap, (void*)start, (void*)end);
8683 }
8684
8685 /* Contract the range to TT page boundaries. */
8686 adjust_offmask = pt_attr_leaf_table_offmask(pt_attr);
8687 adjusted_start = ((start + adjust_offmask) & ~adjust_offmask);
8688 adjusted_end = end & ~adjust_offmask;
8689 bool modified = false;
8690
8691 /* Iterate over the range, trying to remove TTEs. */
8692 for (cur = adjusted_start; (cur < adjusted_end) && (cur >= adjusted_start); cur += pt_attr_twig_size(pt_attr)) {
8693 PMAP_LOCK(pmap);
8694
8695 tte_p = pmap_tte(pmap, cur);
8696
8697 if (tte_p == (tt_entry_t *) NULL) {
8698 goto done;
8699 }
8700
8701 if ((*tte_p & ARM_TTE_TYPE_MASK) == ARM_TTE_TYPE_TABLE) {
8702 pte_p = (pt_entry_t *) ttetokv(*tte_p);
8703
8704 if ((ptep_get_ptd(pte_p)->ptd_info[ARM_PT_DESC_INDEX(pte_p)].refcnt == 0) &&
8705 (pmap != kernel_pmap)) {
8706 if (pmap->nested == TRUE) {
8707 /* Deallocate for the nested map. */
8708 pmap_tte_deallocate(pmap, tte_p, pt_attr_twig_level(pt_attr));
8709 } else {
8710 /* Just remove for the parent map. */
8711 pmap_tte_remove(pmap, tte_p, pt_attr_twig_level(pt_attr));
8712 }
8713
8714 pmap_get_pt_ops(pmap)->flush_tlb_tte_async(cur, pmap);
8715 modified = true;
8716 }
8717 }
8718
8719 done:
8720 PMAP_UNLOCK(pmap);
8721 }
8722
8723 if (modified) {
8724 sync_tlb_flush();
8725 }
8726
8727 #if (__ARM_VMSA__ > 7)
8728 /* Remove empty L2 TTs. */
8729 adjusted_start = ((start + ARM_TT_L1_OFFMASK) & ~ARM_TT_L1_OFFMASK);
8730 adjusted_end = end & ~ARM_TT_L1_OFFMASK;
8731
8732 for (cur = adjusted_start; (cur < adjusted_end) && (cur >= adjusted_start); cur += ARM_TT_L1_SIZE) {
8733 /* For each L1 entry in our range... */
8734 PMAP_LOCK(pmap);
8735
8736 bool remove_tt1e = true;
8737 tt_entry_t * tt1e_p = pmap_tt1e(pmap, cur);
8738 tt_entry_t * tt2e_start;
8739 tt_entry_t * tt2e_end;
8740 tt_entry_t * tt2e_p;
8741 tt_entry_t tt1e;
8742
8743 if (tt1e_p == NULL) {
8744 PMAP_UNLOCK(pmap);
8745 continue;
8746 }
8747
8748 tt1e = *tt1e_p;
8749
8750 if (tt1e == ARM_TTE_TYPE_FAULT) {
8751 PMAP_UNLOCK(pmap);
8752 continue;
8753 }
8754
8755 tt2e_start = &((tt_entry_t*) phystokv(tt1e & ARM_TTE_TABLE_MASK))[0];
8756 tt2e_end = &tt2e_start[TTE_PGENTRIES];
8757
8758 for (tt2e_p = tt2e_start; tt2e_p < tt2e_end; tt2e_p++) {
8759 if (*tt2e_p != ARM_TTE_TYPE_FAULT) {
8760 /*
8761 * If any TTEs are populated, don't remove the
8762 * L1 TT.
8763 */
8764 remove_tt1e = false;
8765 }
8766 }
8767
8768 if (remove_tt1e) {
8769 pmap_tte_deallocate(pmap, tt1e_p, PMAP_TT_L1_LEVEL);
8770 PMAP_UPDATE_TLBS(pmap, cur, cur + PAGE_SIZE, false);
8771 }
8772
8773 PMAP_UNLOCK(pmap);
8774 }
8775 #endif /* (__ARM_VMSA__ > 7) */
8776 }
8777
8778 /*
8779 * pmap_trim_internal(grand, subord, vstart, nstart, size)
8780 *
8781 * grand = pmap subord is nested in
8782 * subord = nested pmap
8783 * vstart = start of the used range in grand
8784 * nstart = start of the used range in nstart
8785 * size = size of the used range
8786 *
8787 * Attempts to trim the shared region page tables down to only cover the given
8788 * range in subord and grand.
8789 */
8790 MARK_AS_PMAP_TEXT static void
8791 pmap_trim_internal(
8792 pmap_t grand,
8793 pmap_t subord,
8794 addr64_t vstart,
8795 addr64_t nstart,
8796 uint64_t size)
8797 {
8798 addr64_t vend, nend;
8799 addr64_t adjust_offmask;
8800
8801 if (__improbable(os_add_overflow(vstart, size, &vend))) {
8802 panic("%s: grand addr wraps around, "
8803 "grand=%p, subord=%p, vstart=%p, nstart=%p, size=%#llx",
8804 __func__, grand, subord, (void*)vstart, (void*)nstart, size);
8805 }
8806
8807 if (__improbable(os_add_overflow(nstart, size, &nend))) {
8808 panic("%s: nested addr wraps around, "
8809 "grand=%p, subord=%p, vstart=%p, nstart=%p, size=%#llx",
8810 __func__, grand, subord, (void*)vstart, (void*)nstart, size);
8811 }
8812
8813 VALIDATE_PMAP(grand);
8814 VALIDATE_PMAP(subord);
8815
8816 __unused const pt_attr_t * const pt_attr = pmap_get_pt_attr(grand);
8817
8818 PMAP_LOCK(subord);
8819
8820 if (!subord->nested) {
8821 panic("%s: subord is not nestable, "
8822 "grand=%p, subord=%p, vstart=%p, nstart=%p, size=%#llx",
8823 __func__, grand, subord, (void*)vstart, (void*)nstart, size);
8824 }
8825
8826 if (grand->nested) {
8827 panic("%s: grand is nestable, "
8828 "grand=%p, subord=%p, vstart=%p, nstart=%p, size=%#llx",
8829 __func__, grand, subord, (void*)vstart, (void*)nstart, size);
8830 }
8831
8832 if (grand->nested_pmap != subord) {
8833 panic("%s: grand->nested != subord, "
8834 "grand=%p, subord=%p, vstart=%p, nstart=%p, size=%#llx",
8835 __func__, grand, subord, (void*)vstart, (void*)nstart, size);
8836 }
8837
8838 if (size != 0) {
8839 if ((vstart < grand->nested_region_grand_addr) || (vend > (grand->nested_region_grand_addr + grand->nested_region_size))) {
8840 panic("%s: grand range not in nested region, "
8841 "grand=%p, subord=%p, vstart=%p, nstart=%p, size=%#llx",
8842 __func__, grand, subord, (void*)vstart, (void*)nstart, size);
8843 }
8844
8845 if ((nstart < grand->nested_region_grand_addr) || (nend > (grand->nested_region_grand_addr + grand->nested_region_size))) {
8846 panic("%s: subord range not in nested region, "
8847 "grand=%p, subord=%p, vstart=%p, nstart=%p, size=%#llx",
8848 __func__, grand, subord, (void*)vstart, (void*)nstart, size);
8849 }
8850 }
8851
8852
8853 if (!grand->nested_has_no_bounds_ref) {
8854 assert(subord->nested_bounds_set);
8855
8856 if (!grand->nested_bounds_set) {
8857 /* Inherit the bounds from subord. */
8858 grand->nested_region_true_start = (subord->nested_region_true_start - grand->nested_region_subord_addr) + grand->nested_region_grand_addr;
8859 grand->nested_region_true_end = (subord->nested_region_true_end - grand->nested_region_subord_addr) + grand->nested_region_grand_addr;
8860 grand->nested_bounds_set = true;
8861 }
8862
8863 PMAP_UNLOCK(subord);
8864 return;
8865 }
8866
8867 if ((!subord->nested_bounds_set) && size) {
8868 adjust_offmask = pt_attr_leaf_table_offmask(pt_attr);
8869
8870 subord->nested_region_true_start = nstart;
8871 subord->nested_region_true_end = nend;
8872 subord->nested_region_true_start &= ~adjust_offmask;
8873
8874 if (__improbable(os_add_overflow(subord->nested_region_true_end, adjust_offmask, &subord->nested_region_true_end))) {
8875 panic("%s: padded true end wraps around, "
8876 "grand=%p, subord=%p, vstart=%p, nstart=%p, size=%#llx",
8877 __func__, grand, subord, (void*)vstart, (void*)nstart, size);
8878 }
8879
8880 subord->nested_region_true_end &= ~adjust_offmask;
8881 subord->nested_bounds_set = true;
8882 }
8883
8884 if (subord->nested_bounds_set) {
8885 /* Inherit the bounds from subord. */
8886 grand->nested_region_true_start = (subord->nested_region_true_start - grand->nested_region_subord_addr) + grand->nested_region_grand_addr;
8887 grand->nested_region_true_end = (subord->nested_region_true_end - grand->nested_region_subord_addr) + grand->nested_region_grand_addr;
8888 grand->nested_bounds_set = true;
8889
8890 /* If we know the bounds, we can trim the pmap. */
8891 grand->nested_has_no_bounds_ref = false;
8892 PMAP_UNLOCK(subord);
8893 } else {
8894 /* Don't trim if we don't know the bounds. */
8895 PMAP_UNLOCK(subord);
8896 return;
8897 }
8898
8899 /* Trim grand to only cover the given range. */
8900 pmap_trim_range(grand, grand->nested_region_grand_addr, grand->nested_region_true_start);
8901 pmap_trim_range(grand, grand->nested_region_true_end, (grand->nested_region_grand_addr + grand->nested_region_size));
8902
8903 /* Try to trim subord. */
8904 pmap_trim_subord(subord);
8905 }
8906
8907 MARK_AS_PMAP_TEXT static void
8908 pmap_trim_self(pmap_t pmap)
8909 {
8910 if (pmap->nested_has_no_bounds_ref && pmap->nested_pmap) {
8911 /* If we have a no bounds ref, we need to drop it. */
8912 PMAP_LOCK(pmap->nested_pmap);
8913 pmap->nested_has_no_bounds_ref = false;
8914 boolean_t nested_bounds_set = pmap->nested_pmap->nested_bounds_set;
8915 vm_map_offset_t nested_region_true_start = (pmap->nested_pmap->nested_region_true_start - pmap->nested_region_subord_addr) + pmap->nested_region_grand_addr;
8916 vm_map_offset_t nested_region_true_end = (pmap->nested_pmap->nested_region_true_end - pmap->nested_region_subord_addr) + pmap->nested_region_grand_addr;
8917 PMAP_UNLOCK(pmap->nested_pmap);
8918
8919 if (nested_bounds_set) {
8920 pmap_trim_range(pmap, pmap->nested_region_grand_addr, nested_region_true_start);
8921 pmap_trim_range(pmap, nested_region_true_end, (pmap->nested_region_grand_addr + pmap->nested_region_size));
8922 }
8923 /*
8924 * Try trimming the nested pmap, in case we had the
8925 * last reference.
8926 */
8927 pmap_trim_subord(pmap->nested_pmap);
8928 }
8929 }
8930
8931 /*
8932 * pmap_trim_subord(grand, subord)
8933 *
8934 * grand = pmap that we have nested subord in
8935 * subord = nested pmap we are attempting to trim
8936 *
8937 * Trims subord if possible
8938 */
8939 MARK_AS_PMAP_TEXT static void
8940 pmap_trim_subord(pmap_t subord)
8941 {
8942 bool contract_subord = false;
8943
8944 PMAP_LOCK(subord);
8945
8946 subord->nested_no_bounds_refcnt--;
8947
8948 if ((subord->nested_no_bounds_refcnt == 0) && (subord->nested_bounds_set)) {
8949 /* If this was the last no bounds reference, trim subord. */
8950 contract_subord = true;
8951 }
8952
8953 PMAP_UNLOCK(subord);
8954
8955 if (contract_subord) {
8956 pmap_trim_range(subord, subord->nested_region_subord_addr, subord->nested_region_true_start);
8957 pmap_trim_range(subord, subord->nested_region_true_end, subord->nested_region_subord_addr + subord->nested_region_size);
8958 }
8959 }
8960
8961 void
8962 pmap_trim(
8963 pmap_t grand,
8964 pmap_t subord,
8965 addr64_t vstart,
8966 addr64_t nstart,
8967 uint64_t size)
8968 {
8969 pmap_trim_internal(grand, subord, vstart, nstart, size);
8970 }
8971
8972
8973 /*
8974 * kern_return_t pmap_nest(grand, subord, vstart, size)
8975 *
8976 * grand = the pmap that we will nest subord into
8977 * subord = the pmap that goes into the grand
8978 * vstart = start of range in pmap to be inserted
8979 * nstart = start of range in pmap nested pmap
8980 * size = Size of nest area (up to 16TB)
8981 *
8982 * Inserts a pmap into another. This is used to implement shared segments.
8983 *
8984 */
8985
8986 MARK_AS_PMAP_TEXT static kern_return_t
8987 pmap_nest_internal(
8988 pmap_t grand,
8989 pmap_t subord,
8990 addr64_t vstart,
8991 addr64_t nstart,
8992 uint64_t size)
8993 {
8994 kern_return_t kr = KERN_FAILURE;
8995 vm_map_offset_t vaddr, nvaddr;
8996 tt_entry_t *stte_p;
8997 tt_entry_t *gtte_p;
8998 unsigned int i;
8999 unsigned int num_tte;
9000 unsigned int nested_region_asid_bitmap_size;
9001 unsigned int* nested_region_asid_bitmap;
9002 int expand_options = 0;
9003
9004 addr64_t vend, nend;
9005 if (__improbable(os_add_overflow(vstart, size, &vend))) {
9006 panic("%s: %p grand addr wraps around: 0x%llx + 0x%llx", __func__, grand, vstart, size);
9007 }
9008 if (__improbable(os_add_overflow(nstart, size, &nend))) {
9009 panic("%s: %p nested addr wraps around: 0x%llx + 0x%llx", __func__, subord, nstart, size);
9010 }
9011
9012 VALIDATE_PMAP(grand);
9013 VALIDATE_PMAP(subord);
9014
9015 __unused const pt_attr_t * const pt_attr = pmap_get_pt_attr(grand);
9016 assert(pmap_get_pt_attr(subord) == pt_attr);
9017
9018
9019 if (((size | vstart | nstart) & (pt_attr_leaf_table_offmask(pt_attr))) != 0x0ULL) {
9020 panic("pmap_nest() pmap %p unaligned nesting request 0x%llx, 0x%llx, 0x%llx\n", grand, vstart, nstart, size);
9021 }
9022
9023 if (!subord->nested) {
9024 panic("%s: subordinate pmap %p is not nestable", __func__, subord);
9025 }
9026
9027 if ((grand->nested_pmap != PMAP_NULL) && (grand->nested_pmap != subord)) {
9028 panic("pmap_nest() pmap %p has a nested pmap\n", grand);
9029 }
9030
9031 if (subord->nested_region_asid_bitmap == NULL) {
9032 nested_region_asid_bitmap_size = (unsigned int)(size >> pt_attr_twig_shift(pt_attr)) / (sizeof(unsigned int) * NBBY);
9033
9034 nested_region_asid_bitmap = kalloc(nested_region_asid_bitmap_size * sizeof(unsigned int));
9035 bzero(nested_region_asid_bitmap, nested_region_asid_bitmap_size * sizeof(unsigned int));
9036
9037 PMAP_LOCK(subord);
9038 if (subord->nested_region_asid_bitmap == NULL) {
9039 subord->nested_region_asid_bitmap = nested_region_asid_bitmap;
9040 subord->nested_region_asid_bitmap_size = nested_region_asid_bitmap_size;
9041 subord->nested_region_subord_addr = nstart;
9042 subord->nested_region_size = (mach_vm_offset_t) size;
9043 nested_region_asid_bitmap = NULL;
9044 }
9045 PMAP_UNLOCK(subord);
9046 if (nested_region_asid_bitmap != NULL) {
9047 kfree(nested_region_asid_bitmap, nested_region_asid_bitmap_size * sizeof(unsigned int));
9048 }
9049 }
9050 if ((subord->nested_region_subord_addr + subord->nested_region_size) < nend) {
9051 uint64_t new_size;
9052 unsigned int new_nested_region_asid_bitmap_size;
9053 unsigned int* new_nested_region_asid_bitmap;
9054
9055 nested_region_asid_bitmap = NULL;
9056 nested_region_asid_bitmap_size = 0;
9057 new_size = nend - subord->nested_region_subord_addr;
9058
9059 /* We explicitly add 1 to the bitmap allocation size in order to avoid issues with truncation. */
9060 new_nested_region_asid_bitmap_size = (unsigned int)((new_size >> pt_attr_twig_shift(pt_attr)) / (sizeof(unsigned int) * NBBY)) + 1;
9061
9062 new_nested_region_asid_bitmap = kalloc(new_nested_region_asid_bitmap_size * sizeof(unsigned int));
9063 PMAP_LOCK(subord);
9064 if (subord->nested_region_size < new_size) {
9065 bzero(new_nested_region_asid_bitmap, new_nested_region_asid_bitmap_size * sizeof(unsigned int));
9066 bcopy(subord->nested_region_asid_bitmap, new_nested_region_asid_bitmap, subord->nested_region_asid_bitmap_size);
9067 nested_region_asid_bitmap_size = subord->nested_region_asid_bitmap_size;
9068 nested_region_asid_bitmap = subord->nested_region_asid_bitmap;
9069 subord->nested_region_asid_bitmap = new_nested_region_asid_bitmap;
9070 subord->nested_region_asid_bitmap_size = new_nested_region_asid_bitmap_size;
9071 subord->nested_region_size = new_size;
9072 new_nested_region_asid_bitmap = NULL;
9073 }
9074 PMAP_UNLOCK(subord);
9075 if (nested_region_asid_bitmap != NULL)
9076 { kfree(nested_region_asid_bitmap, nested_region_asid_bitmap_size * sizeof(unsigned int));}
9077 if (new_nested_region_asid_bitmap != NULL)
9078 { kfree(new_nested_region_asid_bitmap, new_nested_region_asid_bitmap_size * sizeof(unsigned int));}
9079 }
9080
9081 PMAP_LOCK(subord);
9082 if (grand->nested_pmap == PMAP_NULL) {
9083 grand->nested_pmap = subord;
9084
9085 if (!subord->nested_bounds_set) {
9086 /*
9087 * We are nesting without the shared regions bounds
9088 * being known. We'll have to trim the pmap later.
9089 */
9090 grand->nested_has_no_bounds_ref = true;
9091 subord->nested_no_bounds_refcnt++;
9092 }
9093
9094 grand->nested_region_grand_addr = vstart;
9095 grand->nested_region_subord_addr = nstart;
9096 grand->nested_region_size = (mach_vm_offset_t) size;
9097 } else {
9098 if ((grand->nested_region_grand_addr > vstart)) {
9099 panic("pmap_nest() pmap %p : attempt to nest outside the nested region\n", grand);
9100 } else if ((grand->nested_region_grand_addr + grand->nested_region_size) < vend) {
9101 grand->nested_region_size = (mach_vm_offset_t)(vstart - grand->nested_region_grand_addr + size);
9102 }
9103 }
9104
9105 #if (__ARM_VMSA__ == 7)
9106 nvaddr = (vm_map_offset_t) nstart;
9107 vaddr = (vm_map_offset_t) vstart;
9108 num_tte = size >> ARM_TT_L1_SHIFT;
9109
9110 for (i = 0; i < num_tte; i++) {
9111 if (((subord->nested_region_true_start) > nvaddr) || ((subord->nested_region_true_end) <= nvaddr)) {
9112 goto expand_next;
9113 }
9114
9115 stte_p = pmap_tte(subord, nvaddr);
9116 if ((stte_p == (tt_entry_t *)NULL) || (((*stte_p) & ARM_TTE_TYPE_MASK) != ARM_TTE_TYPE_TABLE)) {
9117 PMAP_UNLOCK(subord);
9118 kr = pmap_expand(subord, nvaddr, expand_options, PMAP_TT_L2_LEVEL);
9119
9120 if (kr != KERN_SUCCESS) {
9121 PMAP_LOCK(grand);
9122 goto done;
9123 }
9124
9125 PMAP_LOCK(subord);
9126 }
9127 PMAP_UNLOCK(subord);
9128 PMAP_LOCK(grand);
9129 stte_p = pmap_tte(grand, vaddr);
9130 if (stte_p == (tt_entry_t *)NULL) {
9131 PMAP_UNLOCK(grand);
9132 kr = pmap_expand(grand, vaddr, expand_options, PMAP_TT_L1_LEVEL);
9133
9134 if (kr != KERN_SUCCESS) {
9135 PMAP_LOCK(grand);
9136 goto done;
9137 }
9138 } else {
9139 PMAP_UNLOCK(grand);
9140 kr = KERN_SUCCESS;
9141 }
9142 PMAP_LOCK(subord);
9143
9144 expand_next:
9145 nvaddr += ARM_TT_L1_SIZE;
9146 vaddr += ARM_TT_L1_SIZE;
9147 }
9148
9149 #else
9150 nvaddr = (vm_map_offset_t) nstart;
9151 num_tte = (unsigned int)(size >> pt_attr_twig_shift(pt_attr));
9152
9153 for (i = 0; i < num_tte; i++) {
9154 if (((subord->nested_region_true_start) > nvaddr) || ((subord->nested_region_true_end) <= nvaddr)) {
9155 goto expand_next;
9156 }
9157
9158 stte_p = pmap_tte(subord, nvaddr);
9159 if (stte_p == PT_ENTRY_NULL || *stte_p == ARM_TTE_EMPTY) {
9160 PMAP_UNLOCK(subord);
9161 kr = pmap_expand(subord, nvaddr, expand_options, PMAP_TT_LEAF_LEVEL);
9162
9163 if (kr != KERN_SUCCESS) {
9164 PMAP_LOCK(grand);
9165 goto done;
9166 }
9167
9168 PMAP_LOCK(subord);
9169 }
9170 expand_next:
9171 nvaddr += pt_attr_twig_size(pt_attr);
9172 }
9173 #endif
9174 PMAP_UNLOCK(subord);
9175
9176 /*
9177 * copy tte's from subord pmap into grand pmap
9178 */
9179
9180 PMAP_LOCK(grand);
9181 nvaddr = (vm_map_offset_t) nstart;
9182 vaddr = (vm_map_offset_t) vstart;
9183
9184
9185 #if (__ARM_VMSA__ == 7)
9186 for (i = 0; i < num_tte; i++) {
9187 if (((subord->nested_region_true_start) > nvaddr) || ((subord->nested_region_true_end) <= nvaddr)) {
9188 goto nest_next;
9189 }
9190
9191 stte_p = pmap_tte(subord, nvaddr);
9192 gtte_p = pmap_tte(grand, vaddr);
9193 *gtte_p = *stte_p;
9194
9195 nest_next:
9196 nvaddr += ARM_TT_L1_SIZE;
9197 vaddr += ARM_TT_L1_SIZE;
9198 }
9199 #else
9200 for (i = 0; i < num_tte; i++) {
9201 if (((subord->nested_region_true_start) > nvaddr) || ((subord->nested_region_true_end) <= nvaddr)) {
9202 goto nest_next;
9203 }
9204
9205 stte_p = pmap_tte(subord, nvaddr);
9206 gtte_p = pmap_tte(grand, vaddr);
9207 if (gtte_p == PT_ENTRY_NULL) {
9208 PMAP_UNLOCK(grand);
9209 kr = pmap_expand(grand, vaddr, expand_options, PMAP_TT_TWIG_LEVEL);
9210 PMAP_LOCK(grand);
9211
9212 if (kr != KERN_SUCCESS) {
9213 goto done;
9214 }
9215
9216 gtte_p = pmap_tt2e(grand, vaddr);
9217 }
9218 *gtte_p = *stte_p;
9219
9220 nest_next:
9221 vaddr += pt_attr_twig_size(pt_attr);
9222 nvaddr += pt_attr_twig_size(pt_attr);
9223 }
9224 #endif
9225
9226 kr = KERN_SUCCESS;
9227 done:
9228
9229 stte_p = pmap_tte(grand, vstart);
9230 FLUSH_PTE_RANGE_STRONG(stte_p, stte_p + num_tte);
9231
9232 #if (__ARM_VMSA__ > 7)
9233 /*
9234 * check for overflow on LP64 arch
9235 */
9236 assert((size & 0xFFFFFFFF00000000ULL) == 0);
9237 #endif
9238 PMAP_UPDATE_TLBS(grand, vstart, vend, false);
9239
9240 PMAP_UNLOCK(grand);
9241 return kr;
9242 }
9243
9244 kern_return_t
9245 pmap_nest(
9246 pmap_t grand,
9247 pmap_t subord,
9248 addr64_t vstart,
9249 addr64_t nstart,
9250 uint64_t size)
9251 {
9252 kern_return_t kr = KERN_FAILURE;
9253
9254 PMAP_TRACE(2, PMAP_CODE(PMAP__NEST) | DBG_FUNC_START,
9255 VM_KERNEL_ADDRHIDE(grand), VM_KERNEL_ADDRHIDE(subord),
9256 VM_KERNEL_ADDRHIDE(vstart));
9257
9258 kr = pmap_nest_internal(grand, subord, vstart, nstart, size);
9259
9260 PMAP_TRACE(2, PMAP_CODE(PMAP__NEST) | DBG_FUNC_END, kr);
9261
9262 return kr;
9263 }
9264
9265 /*
9266 * kern_return_t pmap_unnest(grand, vaddr)
9267 *
9268 * grand = the pmap that will have the virtual range unnested
9269 * vaddr = start of range in pmap to be unnested
9270 * size = size of range in pmap to be unnested
9271 *
9272 */
9273
9274 kern_return_t
9275 pmap_unnest(
9276 pmap_t grand,
9277 addr64_t vaddr,
9278 uint64_t size)
9279 {
9280 return pmap_unnest_options(grand, vaddr, size, 0);
9281 }
9282
9283 MARK_AS_PMAP_TEXT static kern_return_t
9284 pmap_unnest_options_internal(
9285 pmap_t grand,
9286 addr64_t vaddr,
9287 uint64_t size,
9288 unsigned int option)
9289 {
9290 vm_map_offset_t start;
9291 vm_map_offset_t addr;
9292 tt_entry_t *tte_p;
9293 unsigned int current_index;
9294 unsigned int start_index;
9295 unsigned int max_index;
9296 unsigned int num_tte;
9297 unsigned int i;
9298
9299 addr64_t vend;
9300 if (__improbable(os_add_overflow(vaddr, size, &vend))) {
9301 panic("%s: %p vaddr wraps around: 0x%llx + 0x%llx", __func__, grand, vaddr, size);
9302 }
9303
9304 VALIDATE_PMAP(grand);
9305
9306 __unused const pt_attr_t * const pt_attr = pmap_get_pt_attr(grand);
9307
9308 if (((size | vaddr) & pt_attr_twig_offmask(pt_attr)) != 0x0ULL) {
9309 panic("pmap_unnest(): unaligned request");
9310 }
9311
9312 if ((option & PMAP_UNNEST_CLEAN) == 0) {
9313 if (grand->nested_pmap == NULL) {
9314 panic("%s: %p has no nested pmap", __func__, grand);
9315 }
9316
9317 if ((vaddr < grand->nested_region_grand_addr) || (vend > (grand->nested_region_grand_addr + grand->nested_region_size))) {
9318 panic("%s: %p: unnest request to region not-fully-nested region [%p, %p)", __func__, grand, (void*)vaddr, (void*)vend);
9319 }
9320
9321 PMAP_LOCK(grand->nested_pmap);
9322
9323 start = vaddr - grand->nested_region_grand_addr + grand->nested_region_subord_addr;
9324 start_index = (unsigned int)((vaddr - grand->nested_region_grand_addr) >> pt_attr_twig_shift(pt_attr));
9325 max_index = (unsigned int)(start_index + (size >> pt_attr_twig_shift(pt_attr)));
9326 num_tte = (unsigned int)(size >> pt_attr_twig_shift(pt_attr));
9327
9328 for (current_index = start_index, addr = start; current_index < max_index; current_index++, addr += pt_attr_twig_size(pt_attr)) {
9329 pt_entry_t *bpte, *epte, *cpte;
9330
9331 if (addr < grand->nested_pmap->nested_region_true_start) {
9332 /* We haven't reached the interesting range. */
9333 continue;
9334 }
9335
9336 if (addr >= grand->nested_pmap->nested_region_true_end) {
9337 /* We're done with the interesting range. */
9338 break;
9339 }
9340
9341 bpte = pmap_pte(grand->nested_pmap, addr);
9342 epte = bpte + (pt_attr_leaf_index_mask(pt_attr) >> pt_attr_leaf_shift(pt_attr));
9343
9344 if (!testbit(current_index, (int *)grand->nested_pmap->nested_region_asid_bitmap)) {
9345 setbit(current_index, (int *)grand->nested_pmap->nested_region_asid_bitmap);
9346
9347 for (cpte = bpte; cpte <= epte; cpte++) {
9348 pmap_paddr_t pa;
9349 int pai = 0;
9350 boolean_t managed = FALSE;
9351 pt_entry_t spte;
9352
9353 if ((*cpte != ARM_PTE_TYPE_FAULT)
9354 && (!ARM_PTE_IS_COMPRESSED(*cpte, cpte))) {
9355 spte = *cpte;
9356 while (!managed) {
9357 pa = pte_to_pa(spte);
9358 if (!pa_valid(pa)) {
9359 break;
9360 }
9361 pai = (int)pa_index(pa);
9362 LOCK_PVH(pai);
9363 spte = *cpte;
9364 pa = pte_to_pa(spte);
9365 if (pai == (int)pa_index(pa)) {
9366 managed = TRUE;
9367 break; // Leave the PVH locked as we'll unlock it after we update the PTE
9368 }
9369 UNLOCK_PVH(pai);
9370 }
9371
9372 if (((spte & ARM_PTE_NG) != ARM_PTE_NG)) {
9373 WRITE_PTE_FAST(cpte, (spte | ARM_PTE_NG));
9374 }
9375
9376 if (managed) {
9377 ASSERT_PVH_LOCKED(pai);
9378 UNLOCK_PVH(pai);
9379 }
9380 }
9381 }
9382 }
9383
9384 FLUSH_PTE_RANGE_STRONG(bpte, epte);
9385 flush_mmu_tlb_region_asid_async(start, (unsigned)size, grand->nested_pmap);
9386 }
9387
9388 sync_tlb_flush();
9389
9390 PMAP_UNLOCK(grand->nested_pmap);
9391 }
9392
9393 PMAP_LOCK(grand);
9394
9395 /*
9396 * invalidate all pdes for segment at vaddr in pmap grand
9397 */
9398 start = vaddr;
9399 addr = vaddr;
9400
9401 num_tte = (unsigned int)(size >> pt_attr_twig_shift(pt_attr));
9402
9403 for (i = 0; i < num_tte; i++, addr += pt_attr_twig_size(pt_attr)) {
9404 if (addr < grand->nested_pmap->nested_region_true_start) {
9405 /* We haven't reached the interesting range. */
9406 continue;
9407 }
9408
9409 if (addr >= grand->nested_pmap->nested_region_true_end) {
9410 /* We're done with the interesting range. */
9411 break;
9412 }
9413
9414 tte_p = pmap_tte(grand, addr);
9415 *tte_p = ARM_TTE_TYPE_FAULT;
9416 }
9417
9418 tte_p = pmap_tte(grand, start);
9419 FLUSH_PTE_RANGE_STRONG(tte_p, tte_p + num_tte);
9420 PMAP_UPDATE_TLBS(grand, start, vend, false);
9421
9422 PMAP_UNLOCK(grand);
9423
9424 return KERN_SUCCESS;
9425 }
9426
9427 kern_return_t
9428 pmap_unnest_options(
9429 pmap_t grand,
9430 addr64_t vaddr,
9431 uint64_t size,
9432 unsigned int option)
9433 {
9434 kern_return_t kr = KERN_FAILURE;
9435
9436 PMAP_TRACE(2, PMAP_CODE(PMAP__UNNEST) | DBG_FUNC_START,
9437 VM_KERNEL_ADDRHIDE(grand), VM_KERNEL_ADDRHIDE(vaddr));
9438
9439 kr = pmap_unnest_options_internal(grand, vaddr, size, option);
9440
9441 PMAP_TRACE(2, PMAP_CODE(PMAP__UNNEST) | DBG_FUNC_END, kr);
9442
9443 return kr;
9444 }
9445
9446 boolean_t
9447 pmap_adjust_unnest_parameters(
9448 __unused pmap_t p,
9449 __unused vm_map_offset_t *s,
9450 __unused vm_map_offset_t *e)
9451 {
9452 return TRUE; /* to get to log_unnest_badness()... */
9453 }
9454
9455 /*
9456 * disable no-execute capability on
9457 * the specified pmap
9458 */
9459 #if DEVELOPMENT || DEBUG
9460 void
9461 pmap_disable_NX(
9462 pmap_t pmap)
9463 {
9464 pmap->nx_enabled = FALSE;
9465 }
9466 #else
9467 void
9468 pmap_disable_NX(
9469 __unused pmap_t pmap)
9470 {
9471 }
9472 #endif
9473
9474 void
9475 pt_fake_zone_init(
9476 int zone_index)
9477 {
9478 pt_fake_zone_index = zone_index;
9479 }
9480
9481 void
9482 pt_fake_zone_info(
9483 int *count,
9484 vm_size_t *cur_size, vm_size_t *max_size, vm_size_t *elem_size, vm_size_t *alloc_size,
9485 uint64_t *sum_size, int *collectable, int *exhaustable, int *caller_acct)
9486 {
9487 *count = inuse_pmap_pages_count;
9488 *cur_size = PAGE_SIZE * (inuse_pmap_pages_count);
9489 *max_size = PAGE_SIZE * (inuse_pmap_pages_count + vm_page_inactive_count + vm_page_active_count + vm_page_free_count);
9490 *elem_size = PAGE_SIZE;
9491 *alloc_size = PAGE_SIZE;
9492 *sum_size = (alloc_pmap_pages_count) * PAGE_SIZE;
9493
9494 *collectable = 1;
9495 *exhaustable = 0;
9496 *caller_acct = 1;
9497 }
9498
9499 /*
9500 * flush a range of hardware TLB entries.
9501 * NOTE: assumes the smallest TLB entry in use will be for
9502 * an ARM small page (4K).
9503 */
9504
9505 #define ARM_FULL_TLB_FLUSH_THRESHOLD 64
9506
9507 #if __ARM_RANGE_TLBI__
9508 #define ARM64_RANGE_TLB_FLUSH_THRESHOLD 1
9509 #define ARM64_FULL_TLB_FLUSH_THRESHOLD ARM64_16K_TLB_RANGE_PAGES
9510 #else
9511 #define ARM64_FULL_TLB_FLUSH_THRESHOLD 256
9512 #endif // __ARM_RANGE_TLBI__
9513
9514 static void
9515 flush_mmu_tlb_region_asid_async(
9516 vm_offset_t va,
9517 unsigned length,
9518 pmap_t pmap)
9519 {
9520 #if (__ARM_VMSA__ == 7)
9521 vm_offset_t end = va + length;
9522 uint32_t asid;
9523
9524 asid = pmap->hw_asid;
9525
9526 if (length / ARM_SMALL_PAGE_SIZE > ARM_FULL_TLB_FLUSH_THRESHOLD) {
9527 boolean_t flush_all = FALSE;
9528
9529 if ((asid == 0) || (pmap->nested == TRUE)) {
9530 flush_all = TRUE;
9531 }
9532 if (flush_all) {
9533 flush_mmu_tlb_async();
9534 } else {
9535 flush_mmu_tlb_asid_async(asid);
9536 }
9537
9538 return;
9539 }
9540 if (pmap->nested == TRUE) {
9541 #if !__ARM_MP_EXT__
9542 flush_mmu_tlb();
9543 #else
9544 va = arm_trunc_page(va);
9545 while (va < end) {
9546 flush_mmu_tlb_mva_entries_async(va);
9547 va += ARM_SMALL_PAGE_SIZE;
9548 }
9549 #endif
9550 return;
9551 }
9552 va = arm_trunc_page(va) | (asid & 0xff);
9553 flush_mmu_tlb_entries_async(va, end);
9554
9555 #else
9556 unsigned npages = length >> pt_attr_leaf_shift(pmap_get_pt_attr(pmap));
9557 uint32_t asid;
9558
9559 asid = pmap->hw_asid;
9560
9561 if (npages > ARM64_FULL_TLB_FLUSH_THRESHOLD) {
9562 boolean_t flush_all = FALSE;
9563
9564 if ((asid == 0) || (pmap->nested == TRUE)) {
9565 flush_all = TRUE;
9566 }
9567 if (flush_all) {
9568 flush_mmu_tlb_async();
9569 } else {
9570 flush_mmu_tlb_asid_async((uint64_t)asid << TLBI_ASID_SHIFT);
9571 }
9572 return;
9573 }
9574 #if __ARM_RANGE_TLBI__
9575 if (npages > ARM64_RANGE_TLB_FLUSH_THRESHOLD) {
9576 va = generate_rtlbi_param(npages, asid, va);
9577 if (pmap->nested == TRUE) {
9578 flush_mmu_tlb_allrange_async(va);
9579 } else {
9580 flush_mmu_tlb_range_async(va);
9581 }
9582 return;
9583 }
9584 #endif
9585 vm_offset_t end = tlbi_asid(asid) | tlbi_addr(va + length);
9586 va = tlbi_asid(asid) | tlbi_addr(va);
9587 if (pmap->nested == TRUE) {
9588 flush_mmu_tlb_allentries_async(va, end);
9589 } else {
9590 flush_mmu_tlb_entries_async(va, end);
9591 }
9592
9593 #endif
9594 }
9595
9596 MARK_AS_PMAP_TEXT static void
9597 flush_mmu_tlb_tte_asid_async(vm_offset_t va, pmap_t pmap)
9598 {
9599 #if (__ARM_VMSA__ == 7)
9600 flush_mmu_tlb_entry_async((va & ~ARM_TT_L1_PT_OFFMASK) | (pmap->hw_asid & 0xff));
9601 flush_mmu_tlb_entry_async(((va & ~ARM_TT_L1_PT_OFFMASK) + ARM_TT_L1_SIZE) | (pmap->hw_asid & 0xff));
9602 flush_mmu_tlb_entry_async(((va & ~ARM_TT_L1_PT_OFFMASK) + 2 * ARM_TT_L1_SIZE) | (pmap->hw_asid & 0xff));
9603 flush_mmu_tlb_entry_async(((va & ~ARM_TT_L1_PT_OFFMASK) + 3 * ARM_TT_L1_SIZE) | (pmap->hw_asid & 0xff));
9604 #else
9605 flush_mmu_tlb_entry_async(tlbi_addr(va & ~pt_attr_twig_offmask(pmap_get_pt_attr(pmap))) | tlbi_asid(pmap->hw_asid));
9606 #endif
9607 }
9608
9609 MARK_AS_PMAP_TEXT static void
9610 flush_mmu_tlb_full_asid_async(pmap_t pmap)
9611 {
9612 #if (__ARM_VMSA__ == 7)
9613 flush_mmu_tlb_asid_async(pmap->hw_asid);
9614 #else /* (__ARM_VMSA__ == 7) */
9615 flush_mmu_tlb_asid_async((uint64_t)(pmap->hw_asid) << TLBI_ASID_SHIFT);
9616 #endif /* (__ARM_VMSA__ == 7) */
9617 }
9618
9619 void
9620 flush_mmu_tlb_region(
9621 vm_offset_t va,
9622 unsigned length)
9623 {
9624 flush_mmu_tlb_region_asid_async(va, length, kernel_pmap);
9625 sync_tlb_flush();
9626 }
9627
9628 static pmap_io_range_t*
9629 pmap_find_io_attr(pmap_paddr_t paddr)
9630 {
9631 pmap_io_range_t find_range = {.addr = paddr & ~PAGE_MASK, .len = PAGE_SIZE};
9632 unsigned int begin = 0, end = num_io_rgns - 1;
9633 if ((num_io_rgns == 0) || (paddr < io_attr_table[begin].addr) ||
9634 (paddr >= (io_attr_table[end].addr + io_attr_table[end].len))) {
9635 return NULL;
9636 }
9637
9638 for (;;) {
9639 unsigned int middle = (begin + end) / 2;
9640 int cmp = cmp_io_rgns(&find_range, &io_attr_table[middle]);
9641 if (cmp == 0) {
9642 return &io_attr_table[middle];
9643 } else if (begin == end) {
9644 break;
9645 } else if (cmp > 0) {
9646 begin = middle + 1;
9647 } else {
9648 end = middle;
9649 }
9650 }
9651
9652 return NULL;
9653 }
9654
9655 unsigned int
9656 pmap_cache_attributes(
9657 ppnum_t pn)
9658 {
9659 pmap_paddr_t paddr;
9660 int pai;
9661 unsigned int result;
9662 pp_attr_t pp_attr_current;
9663
9664 paddr = ptoa(pn);
9665
9666 assert(vm_last_phys > vm_first_phys); // Check that pmap has been bootstrapped
9667
9668 if (!pa_valid(paddr)) {
9669 pmap_io_range_t *io_rgn = pmap_find_io_attr(paddr);
9670 return (io_rgn == NULL) ? VM_WIMG_IO : io_rgn->wimg;
9671 }
9672
9673 result = VM_WIMG_DEFAULT;
9674
9675 pai = (int)pa_index(paddr);
9676
9677 pp_attr_current = pp_attr_table[pai];
9678 if (pp_attr_current & PP_ATTR_WIMG_MASK) {
9679 result = pp_attr_current & PP_ATTR_WIMG_MASK;
9680 }
9681 return result;
9682 }
9683
9684 MARK_AS_PMAP_TEXT static void
9685 pmap_sync_wimg(ppnum_t pn, unsigned int wimg_bits_prev, unsigned int wimg_bits_new)
9686 {
9687 if ((wimg_bits_prev != wimg_bits_new)
9688 && ((wimg_bits_prev == VM_WIMG_COPYBACK)
9689 || ((wimg_bits_prev == VM_WIMG_INNERWBACK)
9690 && (wimg_bits_new != VM_WIMG_COPYBACK))
9691 || ((wimg_bits_prev == VM_WIMG_WTHRU)
9692 && ((wimg_bits_new != VM_WIMG_COPYBACK) || (wimg_bits_new != VM_WIMG_INNERWBACK))))) {
9693 pmap_sync_page_attributes_phys(pn);
9694 }
9695
9696 if ((wimg_bits_new == VM_WIMG_RT) && (wimg_bits_prev != VM_WIMG_RT)) {
9697 pmap_force_dcache_clean(phystokv(ptoa(pn)), PAGE_SIZE);
9698 }
9699 }
9700
9701 MARK_AS_PMAP_TEXT static __unused void
9702 pmap_update_compressor_page_internal(ppnum_t pn, unsigned int prev_cacheattr, unsigned int new_cacheattr)
9703 {
9704 pmap_paddr_t paddr = ptoa(pn);
9705 int pai = (int)pa_index(paddr);
9706
9707 if (__improbable(!pa_valid(paddr))) {
9708 panic("%s called on non-managed page 0x%08x", __func__, pn);
9709 }
9710
9711 LOCK_PVH(pai);
9712
9713
9714 pmap_update_cache_attributes_locked(pn, new_cacheattr);
9715
9716 UNLOCK_PVH(pai);
9717
9718 pmap_sync_wimg(pn, prev_cacheattr & VM_WIMG_MASK, new_cacheattr & VM_WIMG_MASK);
9719 }
9720
9721 void *
9722 pmap_map_compressor_page(ppnum_t pn)
9723 {
9724 #if __ARM_PTE_PHYSMAP__
9725 unsigned int cacheattr = pmap_cache_attributes(pn) & VM_WIMG_MASK;
9726 if (cacheattr != VM_WIMG_DEFAULT) {
9727 pmap_update_compressor_page_internal(pn, cacheattr, VM_WIMG_DEFAULT);
9728 }
9729 #endif
9730 return (void*)phystokv(ptoa(pn));
9731 }
9732
9733 void
9734 pmap_unmap_compressor_page(ppnum_t pn __unused, void *kva __unused)
9735 {
9736 #if __ARM_PTE_PHYSMAP__
9737 unsigned int cacheattr = pmap_cache_attributes(pn) & VM_WIMG_MASK;
9738 if (cacheattr != VM_WIMG_DEFAULT) {
9739 pmap_update_compressor_page_internal(pn, VM_WIMG_DEFAULT, cacheattr);
9740 }
9741 #endif
9742 }
9743
9744 MARK_AS_PMAP_TEXT static boolean_t
9745 pmap_batch_set_cache_attributes_internal(
9746 ppnum_t pn,
9747 unsigned int cacheattr,
9748 unsigned int page_cnt,
9749 unsigned int page_index,
9750 boolean_t doit,
9751 unsigned int *res)
9752 {
9753 pmap_paddr_t paddr;
9754 int pai;
9755 pp_attr_t pp_attr_current;
9756 pp_attr_t pp_attr_template;
9757 unsigned int wimg_bits_prev, wimg_bits_new;
9758
9759 if (cacheattr & VM_WIMG_USE_DEFAULT) {
9760 cacheattr = VM_WIMG_DEFAULT;
9761 }
9762
9763 if ((doit == FALSE) && (*res == 0)) {
9764 pmap_pin_kernel_pages((vm_offset_t)res, sizeof(*res));
9765 *res = page_cnt;
9766 pmap_unpin_kernel_pages((vm_offset_t)res, sizeof(*res));
9767 if (platform_cache_batch_wimg(cacheattr & (VM_WIMG_MASK), page_cnt << PAGE_SHIFT) == FALSE) {
9768 return FALSE;
9769 }
9770 }
9771
9772 paddr = ptoa(pn);
9773
9774 if (!pa_valid(paddr)) {
9775 panic("pmap_batch_set_cache_attributes(): pn 0x%08x not managed", pn);
9776 }
9777
9778 pai = (int)pa_index(paddr);
9779
9780 if (doit) {
9781 LOCK_PVH(pai);
9782 }
9783
9784 do {
9785 pp_attr_current = pp_attr_table[pai];
9786 wimg_bits_prev = VM_WIMG_DEFAULT;
9787 if (pp_attr_current & PP_ATTR_WIMG_MASK) {
9788 wimg_bits_prev = pp_attr_current & PP_ATTR_WIMG_MASK;
9789 }
9790
9791 pp_attr_template = (pp_attr_current & ~PP_ATTR_WIMG_MASK) | PP_ATTR_WIMG(cacheattr & (VM_WIMG_MASK));
9792
9793 if (!doit) {
9794 break;
9795 }
9796
9797 /* WIMG bits should only be updated under the PVH lock, but we should do this in a CAS loop
9798 * to avoid losing simultaneous updates to other bits like refmod. */
9799 } while (!OSCompareAndSwap16(pp_attr_current, pp_attr_template, &pp_attr_table[pai]));
9800
9801 wimg_bits_new = VM_WIMG_DEFAULT;
9802 if (pp_attr_template & PP_ATTR_WIMG_MASK) {
9803 wimg_bits_new = pp_attr_template & PP_ATTR_WIMG_MASK;
9804 }
9805
9806 if (doit) {
9807 if (wimg_bits_new != wimg_bits_prev) {
9808 pmap_update_cache_attributes_locked(pn, cacheattr);
9809 }
9810 UNLOCK_PVH(pai);
9811 if ((wimg_bits_new == VM_WIMG_RT) && (wimg_bits_prev != VM_WIMG_RT)) {
9812 pmap_force_dcache_clean(phystokv(paddr), PAGE_SIZE);
9813 }
9814 } else {
9815 if (wimg_bits_new == VM_WIMG_COPYBACK) {
9816 return FALSE;
9817 }
9818 if (wimg_bits_prev == wimg_bits_new) {
9819 pmap_pin_kernel_pages((vm_offset_t)res, sizeof(*res));
9820 *res = *res - 1;
9821 pmap_unpin_kernel_pages((vm_offset_t)res, sizeof(*res));
9822 if (!platform_cache_batch_wimg(wimg_bits_new, (*res) << PAGE_SHIFT)) {
9823 return FALSE;
9824 }
9825 }
9826 return TRUE;
9827 }
9828
9829 if (page_cnt == (page_index + 1)) {
9830 wimg_bits_prev = VM_WIMG_COPYBACK;
9831 if (((wimg_bits_prev != wimg_bits_new))
9832 && ((wimg_bits_prev == VM_WIMG_COPYBACK)
9833 || ((wimg_bits_prev == VM_WIMG_INNERWBACK)
9834 && (wimg_bits_new != VM_WIMG_COPYBACK))
9835 || ((wimg_bits_prev == VM_WIMG_WTHRU)
9836 && ((wimg_bits_new != VM_WIMG_COPYBACK) || (wimg_bits_new != VM_WIMG_INNERWBACK))))) {
9837 platform_cache_flush_wimg(wimg_bits_new);
9838 }
9839 }
9840
9841 return TRUE;
9842 }
9843
9844 boolean_t
9845 pmap_batch_set_cache_attributes(
9846 ppnum_t pn,
9847 unsigned int cacheattr,
9848 unsigned int page_cnt,
9849 unsigned int page_index,
9850 boolean_t doit,
9851 unsigned int *res)
9852 {
9853 return pmap_batch_set_cache_attributes_internal(pn, cacheattr, page_cnt, page_index, doit, res);
9854 }
9855
9856 MARK_AS_PMAP_TEXT static void
9857 pmap_set_cache_attributes_priv(
9858 ppnum_t pn,
9859 unsigned int cacheattr,
9860 boolean_t external __unused)
9861 {
9862 pmap_paddr_t paddr;
9863 int pai;
9864 pp_attr_t pp_attr_current;
9865 pp_attr_t pp_attr_template;
9866 unsigned int wimg_bits_prev, wimg_bits_new;
9867
9868 paddr = ptoa(pn);
9869
9870 if (!pa_valid(paddr)) {
9871 return; /* Not a managed page. */
9872 }
9873
9874 if (cacheattr & VM_WIMG_USE_DEFAULT) {
9875 cacheattr = VM_WIMG_DEFAULT;
9876 }
9877
9878 pai = (int)pa_index(paddr);
9879
9880 LOCK_PVH(pai);
9881
9882
9883 do {
9884 pp_attr_current = pp_attr_table[pai];
9885 wimg_bits_prev = VM_WIMG_DEFAULT;
9886 if (pp_attr_current & PP_ATTR_WIMG_MASK) {
9887 wimg_bits_prev = pp_attr_current & PP_ATTR_WIMG_MASK;
9888 }
9889
9890 pp_attr_template = (pp_attr_current & ~PP_ATTR_WIMG_MASK) | PP_ATTR_WIMG(cacheattr & (VM_WIMG_MASK));
9891
9892 /* WIMG bits should only be updated under the PVH lock, but we should do this in a CAS loop
9893 * to avoid losing simultaneous updates to other bits like refmod. */
9894 } while (!OSCompareAndSwap16(pp_attr_current, pp_attr_template, &pp_attr_table[pai]));
9895
9896 wimg_bits_new = VM_WIMG_DEFAULT;
9897 if (pp_attr_template & PP_ATTR_WIMG_MASK) {
9898 wimg_bits_new = pp_attr_template & PP_ATTR_WIMG_MASK;
9899 }
9900
9901 if (wimg_bits_new != wimg_bits_prev) {
9902 pmap_update_cache_attributes_locked(pn, cacheattr);
9903 }
9904
9905 UNLOCK_PVH(pai);
9906
9907 pmap_sync_wimg(pn, wimg_bits_prev, wimg_bits_new);
9908 }
9909
9910 MARK_AS_PMAP_TEXT static void
9911 pmap_set_cache_attributes_internal(
9912 ppnum_t pn,
9913 unsigned int cacheattr)
9914 {
9915 pmap_set_cache_attributes_priv(pn, cacheattr, TRUE);
9916 }
9917
9918 void
9919 pmap_set_cache_attributes(
9920 ppnum_t pn,
9921 unsigned int cacheattr)
9922 {
9923 pmap_set_cache_attributes_internal(pn, cacheattr);
9924 }
9925
9926 MARK_AS_PMAP_TEXT void
9927 pmap_update_cache_attributes_locked(
9928 ppnum_t ppnum,
9929 unsigned attributes)
9930 {
9931 pmap_paddr_t phys = ptoa(ppnum);
9932 pv_entry_t *pve_p;
9933 pt_entry_t *pte_p;
9934 pv_entry_t **pv_h;
9935 pt_entry_t tmplate;
9936 unsigned int pai;
9937 boolean_t tlb_flush_needed = FALSE;
9938
9939 PMAP_TRACE(2, PMAP_CODE(PMAP__UPDATE_CACHING) | DBG_FUNC_START, ppnum, attributes);
9940
9941 #if __ARM_PTE_PHYSMAP__
9942 vm_offset_t kva = phystokv(phys);
9943 pte_p = pmap_pte(kernel_pmap, kva);
9944
9945 tmplate = *pte_p;
9946 tmplate &= ~(ARM_PTE_ATTRINDXMASK | ARM_PTE_SHMASK);
9947 tmplate |= wimg_to_pte(attributes);
9948 #if (__ARM_VMSA__ > 7)
9949 if (tmplate & ARM_PTE_HINT_MASK) {
9950 panic("%s: physical aperture PTE %p has hint bit set, va=%p, pte=0x%llx",
9951 __FUNCTION__, pte_p, (void *)kva, tmplate);
9952 }
9953 #endif
9954 WRITE_PTE_STRONG(pte_p, tmplate);
9955 flush_mmu_tlb_region_asid_async(kva, PAGE_SIZE, kernel_pmap);
9956 tlb_flush_needed = TRUE;
9957 #endif
9958
9959 pai = (unsigned int)pa_index(phys);
9960
9961 pv_h = pai_to_pvh(pai);
9962
9963 pte_p = PT_ENTRY_NULL;
9964 pve_p = PV_ENTRY_NULL;
9965 if (pvh_test_type(pv_h, PVH_TYPE_PTEP)) {
9966 pte_p = pvh_ptep(pv_h);
9967 } else if (pvh_test_type(pv_h, PVH_TYPE_PVEP)) {
9968 pve_p = pvh_list(pv_h);
9969 pte_p = PT_ENTRY_NULL;
9970 }
9971
9972 while ((pve_p != PV_ENTRY_NULL) || (pte_p != PT_ENTRY_NULL)) {
9973 vm_map_address_t va;
9974 pmap_t pmap;
9975
9976 if (pve_p != PV_ENTRY_NULL) {
9977 pte_p = pve_get_ptep(pve_p);
9978 }
9979 #ifdef PVH_FLAG_IOMMU
9980 if ((vm_offset_t)pte_p & PVH_FLAG_IOMMU) {
9981 goto cache_skip_pve;
9982 }
9983 #endif
9984 pmap = ptep_get_pmap(pte_p);
9985 va = ptep_get_va(pte_p);
9986
9987 tmplate = *pte_p;
9988 tmplate &= ~(ARM_PTE_ATTRINDXMASK | ARM_PTE_SHMASK);
9989 tmplate |= pmap_get_pt_ops(pmap)->wimg_to_pte(attributes);
9990
9991 WRITE_PTE_STRONG(pte_p, tmplate);
9992 pmap_get_pt_ops(pmap)->flush_tlb_region_async(va, PAGE_SIZE, pmap);
9993 tlb_flush_needed = TRUE;
9994
9995 #ifdef PVH_FLAG_IOMMU
9996 cache_skip_pve:
9997 #endif
9998 pte_p = PT_ENTRY_NULL;
9999 if (pve_p != PV_ENTRY_NULL) {
10000 pve_p = PVE_NEXT_PTR(pve_next(pve_p));
10001 }
10002 }
10003 if (tlb_flush_needed) {
10004 sync_tlb_flush();
10005 }
10006
10007 PMAP_TRACE(2, PMAP_CODE(PMAP__UPDATE_CACHING) | DBG_FUNC_END, ppnum, attributes);
10008 }
10009
10010 #if (__ARM_VMSA__ == 7)
10011 vm_map_address_t
10012 pmap_create_sharedpage(
10013 void)
10014 {
10015 pmap_paddr_t pa;
10016 kern_return_t kr;
10017
10018 (void) pmap_pages_alloc(&pa, PAGE_SIZE, 0);
10019 memset((char *) phystokv(pa), 0, PAGE_SIZE);
10020
10021 kr = pmap_enter(kernel_pmap, _COMM_PAGE_BASE_ADDRESS, atop(pa), VM_PROT_READ | VM_PROT_WRITE, VM_PROT_NONE, VM_WIMG_USE_DEFAULT, TRUE);
10022 assert(kr == KERN_SUCCESS);
10023
10024 return (vm_map_address_t)phystokv(pa);
10025 }
10026 #else
10027 static void
10028 pmap_update_tt3e(
10029 pmap_t pmap,
10030 vm_address_t address,
10031 tt_entry_t template)
10032 {
10033 tt_entry_t *ptep, pte;
10034
10035 ptep = pmap_tt3e(pmap, address);
10036 if (ptep == NULL) {
10037 panic("%s: no ptep?\n", __FUNCTION__);
10038 }
10039
10040 pte = *ptep;
10041 pte = tte_to_pa(pte) | template;
10042 WRITE_PTE_STRONG(ptep, pte);
10043 }
10044
10045 /* Note absence of non-global bit */
10046 #define PMAP_COMM_PAGE_PTE_TEMPLATE (ARM_PTE_TYPE_VALID \
10047 | ARM_PTE_ATTRINDX(CACHE_ATTRINDX_WRITEBACK) \
10048 | ARM_PTE_SH(SH_INNER_MEMORY) | ARM_PTE_NX \
10049 | ARM_PTE_PNX | ARM_PTE_AP(AP_RORO) | ARM_PTE_AF)
10050
10051 vm_map_address_t
10052 pmap_create_sharedpage(
10053 void
10054 )
10055 {
10056 kern_return_t kr;
10057 pmap_paddr_t pa = 0;
10058
10059
10060 (void) pmap_pages_alloc(&pa, PAGE_SIZE, 0);
10061
10062 memset((char *) phystokv(pa), 0, PAGE_SIZE);
10063
10064 #ifdef CONFIG_XNUPOST
10065 /*
10066 * The kernel pmap maintains a user accessible mapping of the commpage
10067 * to test PAN.
10068 */
10069 kr = pmap_enter(kernel_pmap, _COMM_HIGH_PAGE64_BASE_ADDRESS, (ppnum_t)atop(pa), VM_PROT_READ, VM_PROT_NONE, VM_WIMG_USE_DEFAULT, TRUE);
10070 assert(kr == KERN_SUCCESS);
10071
10072 /*
10073 * This mapping should not be global (as we only expect to reference it
10074 * during testing).
10075 */
10076 pmap_update_tt3e(kernel_pmap, _COMM_HIGH_PAGE64_BASE_ADDRESS, PMAP_COMM_PAGE_PTE_TEMPLATE | ARM_PTE_NG);
10077
10078 #if KASAN
10079 kasan_map_shadow(_COMM_HIGH_PAGE64_BASE_ADDRESS, PAGE_SIZE, true);
10080 #endif
10081 #endif /* CONFIG_XNUPOST */
10082
10083 /*
10084 * In order to avoid burning extra pages on mapping the shared page, we
10085 * create a dedicated pmap for the shared page. We forcibly nest the
10086 * translation tables from this pmap into other pmaps. The level we
10087 * will nest at depends on the MMU configuration (page size, TTBR range,
10088 * etc).
10089 *
10090 * Note that this is NOT "the nested pmap" (which is used to nest the
10091 * shared cache).
10092 *
10093 * Note that we update parameters of the entry for our unique needs (NG
10094 * entry, etc.).
10095 */
10096 sharedpage_pmap = pmap_create_options(NULL, 0x0, 0);
10097 assert(sharedpage_pmap != NULL);
10098
10099 /* The user 64-bit mapping... */
10100 kr = pmap_enter(sharedpage_pmap, _COMM_PAGE64_BASE_ADDRESS, (ppnum_t)atop(pa), VM_PROT_READ, VM_PROT_NONE, VM_WIMG_USE_DEFAULT, TRUE);
10101 assert(kr == KERN_SUCCESS);
10102 pmap_update_tt3e(sharedpage_pmap, _COMM_PAGE64_BASE_ADDRESS, PMAP_COMM_PAGE_PTE_TEMPLATE);
10103
10104 /* ...and the user 32-bit mapping. */
10105 kr = pmap_enter(sharedpage_pmap, _COMM_PAGE32_BASE_ADDRESS, (ppnum_t)atop(pa), VM_PROT_READ, VM_PROT_NONE, VM_WIMG_USE_DEFAULT, TRUE);
10106 assert(kr == KERN_SUCCESS);
10107 pmap_update_tt3e(sharedpage_pmap, _COMM_PAGE32_BASE_ADDRESS, PMAP_COMM_PAGE_PTE_TEMPLATE);
10108
10109 /* For manipulation in kernel, go straight to physical page */
10110 return (vm_map_address_t)phystokv(pa);
10111 }
10112
10113 /*
10114 * Asserts to ensure that the TTEs we nest to map the shared page do not overlap
10115 * with user controlled TTEs.
10116 */
10117 #if (ARM_PGSHIFT == 14)
10118 static_assert((_COMM_PAGE64_BASE_ADDRESS & ~ARM_TT_L2_OFFMASK) >= MACH_VM_MAX_ADDRESS);
10119 static_assert((_COMM_PAGE32_BASE_ADDRESS & ~ARM_TT_L2_OFFMASK) >= VM_MAX_ADDRESS);
10120 #elif (ARM_PGSHIFT == 12)
10121 static_assert((_COMM_PAGE64_BASE_ADDRESS & ~ARM_TT_L1_OFFMASK) >= MACH_VM_MAX_ADDRESS);
10122 static_assert((_COMM_PAGE32_BASE_ADDRESS & ~ARM_TT_L1_OFFMASK) >= VM_MAX_ADDRESS);
10123 #else
10124 #error Nested shared page mapping is unsupported on this config
10125 #endif
10126
10127 MARK_AS_PMAP_TEXT static kern_return_t
10128 pmap_insert_sharedpage_internal(
10129 pmap_t pmap)
10130 {
10131 kern_return_t kr = KERN_SUCCESS;
10132 vm_offset_t sharedpage_vaddr;
10133 pt_entry_t *ttep, *src_ttep;
10134 int options = 0;
10135
10136 VALIDATE_PMAP(pmap);
10137
10138 #if _COMM_PAGE_AREA_LENGTH != PAGE_SIZE
10139 #error We assume a single page.
10140 #endif
10141
10142 if (pmap_is_64bit(pmap)) {
10143 sharedpage_vaddr = _COMM_PAGE64_BASE_ADDRESS;
10144 } else {
10145 sharedpage_vaddr = _COMM_PAGE32_BASE_ADDRESS;
10146 }
10147
10148 PMAP_LOCK(pmap);
10149
10150 /*
10151 * For 4KB pages, we can force the commpage to nest at the level one
10152 * page table, as each entry is 1GB (i.e, there will be no overlap
10153 * with regular userspace mappings). For 16KB pages, each level one
10154 * entry is 64GB, so we must go to the second level entry (32MB) in
10155 * order to nest.
10156 */
10157 #if (ARM_PGSHIFT == 12)
10158 (void)options;
10159
10160 /* Just slam in the L1 entry. */
10161 ttep = pmap_tt1e(pmap, sharedpage_vaddr);
10162
10163 if (*ttep != ARM_PTE_EMPTY) {
10164 panic("%s: Found something mapped at the commpage address?!", __FUNCTION__);
10165 }
10166
10167 src_ttep = pmap_tt1e(sharedpage_pmap, sharedpage_vaddr);
10168 #elif (ARM_PGSHIFT == 14)
10169 /* Allocate for the L2 entry if necessary, and slam it into place. */
10170 /*
10171 * As long as we are use a three level page table, the first level
10172 * should always exist, so we don't need to check for it.
10173 */
10174 while (*pmap_tt1e(pmap, sharedpage_vaddr) == ARM_PTE_EMPTY) {
10175 PMAP_UNLOCK(pmap);
10176
10177 kr = pmap_expand(pmap, sharedpage_vaddr, options, PMAP_TT_L2_LEVEL);
10178
10179 if (kr != KERN_SUCCESS) {
10180 {
10181 panic("Failed to pmap_expand for commpage, pmap=%p", pmap);
10182 }
10183 }
10184
10185 PMAP_LOCK(pmap);
10186 }
10187
10188 ttep = pmap_tt2e(pmap, sharedpage_vaddr);
10189
10190 if (*ttep != ARM_PTE_EMPTY) {
10191 panic("%s: Found something mapped at the commpage address?!", __FUNCTION__);
10192 }
10193
10194 src_ttep = pmap_tt2e(sharedpage_pmap, sharedpage_vaddr);
10195 #endif
10196
10197 *ttep = *src_ttep;
10198 FLUSH_PTE_STRONG(ttep);
10199
10200 /* TODO: Should we flush in the 64-bit case? */
10201 flush_mmu_tlb_region_asid_async(sharedpage_vaddr, PAGE_SIZE, kernel_pmap);
10202
10203 #if (ARM_PGSHIFT == 12)
10204 flush_mmu_tlb_entry_async(tlbi_addr(sharedpage_vaddr & ~ARM_TT_L1_OFFMASK) | tlbi_asid(pmap->hw_asid));
10205 #elif (ARM_PGSHIFT == 14)
10206 flush_mmu_tlb_entry_async(tlbi_addr(sharedpage_vaddr & ~ARM_TT_L2_OFFMASK) | tlbi_asid(pmap->hw_asid));
10207 #endif
10208 sync_tlb_flush();
10209
10210 PMAP_UNLOCK(pmap);
10211
10212 return kr;
10213 }
10214
10215 static void
10216 pmap_unmap_sharedpage(
10217 pmap_t pmap)
10218 {
10219 pt_entry_t *ttep;
10220 vm_offset_t sharedpage_vaddr;
10221
10222 #if _COMM_PAGE_AREA_LENGTH != PAGE_SIZE
10223 #error We assume a single page.
10224 #endif
10225
10226 if (pmap_is_64bit(pmap)) {
10227 sharedpage_vaddr = _COMM_PAGE64_BASE_ADDRESS;
10228 } else {
10229 sharedpage_vaddr = _COMM_PAGE32_BASE_ADDRESS;
10230 }
10231
10232 #if (ARM_PGSHIFT == 12)
10233 ttep = pmap_tt1e(pmap, sharedpage_vaddr);
10234
10235 if (ttep == NULL) {
10236 return;
10237 }
10238
10239 /* It had better be mapped to the shared page */
10240 if (*ttep != ARM_TTE_EMPTY && *ttep != *pmap_tt1e(sharedpage_pmap, sharedpage_vaddr)) {
10241 panic("%s: Something other than commpage mapped in shared page slot?", __FUNCTION__);
10242 }
10243 #elif (ARM_PGSHIFT == 14)
10244 ttep = pmap_tt2e(pmap, sharedpage_vaddr);
10245
10246 if (ttep == NULL) {
10247 return;
10248 }
10249
10250 /* It had better be mapped to the shared page */
10251 if (*ttep != ARM_TTE_EMPTY && *ttep != *pmap_tt2e(sharedpage_pmap, sharedpage_vaddr)) {
10252 panic("%s: Something other than commpage mapped in shared page slot?", __FUNCTION__);
10253 }
10254 #endif
10255
10256 *ttep = ARM_TTE_EMPTY;
10257 flush_mmu_tlb_region_asid_async(sharedpage_vaddr, PAGE_SIZE, kernel_pmap);
10258
10259 #if (ARM_PGSHIFT == 12)
10260 flush_mmu_tlb_entry_async(tlbi_addr(sharedpage_vaddr & ~ARM_TT_L1_OFFMASK) | tlbi_asid(pmap->hw_asid));
10261 #elif (ARM_PGSHIFT == 14)
10262 flush_mmu_tlb_entry_async(tlbi_addr(sharedpage_vaddr & ~ARM_TT_L2_OFFMASK) | tlbi_asid(pmap->hw_asid));
10263 #endif
10264 sync_tlb_flush();
10265 }
10266
10267 void
10268 pmap_insert_sharedpage(
10269 pmap_t pmap)
10270 {
10271 pmap_insert_sharedpage_internal(pmap);
10272 }
10273
10274 static boolean_t
10275 pmap_is_64bit(
10276 pmap_t pmap)
10277 {
10278 return pmap->is_64bit;
10279 }
10280
10281 #endif
10282
10283 /* ARMTODO -- an implementation that accounts for
10284 * holes in the physical map, if any.
10285 */
10286 boolean_t
10287 pmap_valid_page(
10288 ppnum_t pn)
10289 {
10290 return pa_valid(ptoa(pn));
10291 }
10292
10293 MARK_AS_PMAP_TEXT static boolean_t
10294 pmap_is_empty_internal(
10295 pmap_t pmap,
10296 vm_map_offset_t va_start,
10297 vm_map_offset_t va_end)
10298 {
10299 vm_map_offset_t block_start, block_end;
10300 tt_entry_t *tte_p;
10301
10302 if (pmap == NULL) {
10303 return TRUE;
10304 }
10305
10306 VALIDATE_PMAP(pmap);
10307
10308 __unused const pt_attr_t * const pt_attr = pmap_get_pt_attr(pmap);
10309 unsigned int initial_not_in_kdp = not_in_kdp;
10310
10311 if ((pmap != kernel_pmap) && (initial_not_in_kdp)) {
10312 PMAP_LOCK(pmap);
10313 }
10314
10315 #if (__ARM_VMSA__ == 7)
10316 if (tte_index(pmap, pt_attr, va_end) >= pmap->tte_index_max) {
10317 if ((pmap != kernel_pmap) && (initial_not_in_kdp)) {
10318 PMAP_UNLOCK(pmap);
10319 }
10320 return TRUE;
10321 }
10322 #endif
10323
10324 /* TODO: This will be faster if we increment ttep at each level. */
10325 block_start = va_start;
10326
10327 while (block_start < va_end) {
10328 pt_entry_t *bpte_p, *epte_p;
10329 pt_entry_t *pte_p;
10330
10331 block_end = (block_start + pt_attr_twig_size(pt_attr)) & ~pt_attr_twig_offmask(pt_attr);
10332 if (block_end > va_end) {
10333 block_end = va_end;
10334 }
10335
10336 tte_p = pmap_tte(pmap, block_start);
10337 if ((tte_p != PT_ENTRY_NULL)
10338 && ((*tte_p & ARM_TTE_TYPE_MASK) == ARM_TTE_TYPE_TABLE)) {
10339 pte_p = (pt_entry_t *) ttetokv(*tte_p);
10340 bpte_p = &pte_p[pte_index(pmap, pt_attr, block_start)];
10341 epte_p = &pte_p[pte_index(pmap, pt_attr, block_end)];
10342
10343 for (pte_p = bpte_p; pte_p < epte_p; pte_p++) {
10344 if (*pte_p != ARM_PTE_EMPTY) {
10345 if ((pmap != kernel_pmap) && (initial_not_in_kdp)) {
10346 PMAP_UNLOCK(pmap);
10347 }
10348 return FALSE;
10349 }
10350 }
10351 }
10352 block_start = block_end;
10353 }
10354
10355 if ((pmap != kernel_pmap) && (initial_not_in_kdp)) {
10356 PMAP_UNLOCK(pmap);
10357 }
10358
10359 return TRUE;
10360 }
10361
10362 boolean_t
10363 pmap_is_empty(
10364 pmap_t pmap,
10365 vm_map_offset_t va_start,
10366 vm_map_offset_t va_end)
10367 {
10368 return pmap_is_empty_internal(pmap, va_start, va_end);
10369 }
10370
10371 vm_map_offset_t
10372 pmap_max_offset(
10373 boolean_t is64,
10374 unsigned int option)
10375 {
10376 return (is64) ? pmap_max_64bit_offset(option) : pmap_max_32bit_offset(option);
10377 }
10378
10379 vm_map_offset_t
10380 pmap_max_64bit_offset(
10381 __unused unsigned int option)
10382 {
10383 vm_map_offset_t max_offset_ret = 0;
10384
10385 #if defined(__arm64__)
10386 const vm_map_offset_t min_max_offset = SHARED_REGION_BASE_ARM64 + SHARED_REGION_SIZE_ARM64 + 0x20000000; // end of shared region + 512MB for various purposes
10387 if (option == ARM_PMAP_MAX_OFFSET_DEFAULT) {
10388 max_offset_ret = arm64_pmap_max_offset_default;
10389 } else if (option == ARM_PMAP_MAX_OFFSET_MIN) {
10390 max_offset_ret = min_max_offset;
10391 } else if (option == ARM_PMAP_MAX_OFFSET_MAX) {
10392 max_offset_ret = MACH_VM_MAX_ADDRESS;
10393 } else if (option == ARM_PMAP_MAX_OFFSET_DEVICE) {
10394 if (arm64_pmap_max_offset_default) {
10395 max_offset_ret = arm64_pmap_max_offset_default;
10396 } else if (max_mem > 0xC0000000) {
10397 max_offset_ret = min_max_offset + 0x138000000; // Max offset is 13.375GB for devices with > 3GB of memory
10398 } else if (max_mem > 0x40000000) {
10399 max_offset_ret = min_max_offset + 0x38000000; // Max offset is 9.375GB for devices with > 1GB and <= 3GB of memory
10400 } else {
10401 max_offset_ret = min_max_offset;
10402 }
10403 } else if (option == ARM_PMAP_MAX_OFFSET_JUMBO) {
10404 if (arm64_pmap_max_offset_default) {
10405 // Allow the boot-arg to override jumbo size
10406 max_offset_ret = arm64_pmap_max_offset_default;
10407 } else {
10408 max_offset_ret = MACH_VM_MAX_ADDRESS; // Max offset is 64GB for pmaps with special "jumbo" blessing
10409 }
10410 } else {
10411 panic("pmap_max_64bit_offset illegal option 0x%x\n", option);
10412 }
10413
10414 assert(max_offset_ret <= MACH_VM_MAX_ADDRESS);
10415 assert(max_offset_ret >= min_max_offset);
10416 #else
10417 panic("Can't run pmap_max_64bit_offset on non-64bit architectures\n");
10418 #endif
10419
10420 return max_offset_ret;
10421 }
10422
10423 vm_map_offset_t
10424 pmap_max_32bit_offset(
10425 unsigned int option)
10426 {
10427 vm_map_offset_t max_offset_ret = 0;
10428
10429 if (option == ARM_PMAP_MAX_OFFSET_DEFAULT) {
10430 max_offset_ret = arm_pmap_max_offset_default;
10431 } else if (option == ARM_PMAP_MAX_OFFSET_MIN) {
10432 max_offset_ret = 0x66000000;
10433 } else if (option == ARM_PMAP_MAX_OFFSET_MAX) {
10434 max_offset_ret = VM_MAX_ADDRESS;
10435 } else if (option == ARM_PMAP_MAX_OFFSET_DEVICE) {
10436 if (arm_pmap_max_offset_default) {
10437 max_offset_ret = arm_pmap_max_offset_default;
10438 } else if (max_mem > 0x20000000) {
10439 max_offset_ret = 0x80000000;
10440 } else {
10441 max_offset_ret = 0x66000000;
10442 }
10443 } else if (option == ARM_PMAP_MAX_OFFSET_JUMBO) {
10444 max_offset_ret = 0x80000000;
10445 } else {
10446 panic("pmap_max_32bit_offset illegal option 0x%x\n", option);
10447 }
10448
10449 assert(max_offset_ret <= MACH_VM_MAX_ADDRESS);
10450 return max_offset_ret;
10451 }
10452
10453 #if CONFIG_DTRACE
10454 /*
10455 * Constrain DTrace copyin/copyout actions
10456 */
10457 extern kern_return_t dtrace_copyio_preflight(addr64_t);
10458 extern kern_return_t dtrace_copyio_postflight(addr64_t);
10459
10460 kern_return_t
10461 dtrace_copyio_preflight(
10462 __unused addr64_t va)
10463 {
10464 if (current_map() == kernel_map) {
10465 return KERN_FAILURE;
10466 } else {
10467 return KERN_SUCCESS;
10468 }
10469 }
10470
10471 kern_return_t
10472 dtrace_copyio_postflight(
10473 __unused addr64_t va)
10474 {
10475 return KERN_SUCCESS;
10476 }
10477 #endif /* CONFIG_DTRACE */
10478
10479
10480 void
10481 pmap_flush_context_init(__unused pmap_flush_context *pfc)
10482 {
10483 }
10484
10485
10486 void
10487 pmap_flush(
10488 __unused pmap_flush_context *cpus_to_flush)
10489 {
10490 /* not implemented yet */
10491 return;
10492 }
10493
10494
10495 static void __unused
10496 pmap_pin_kernel_pages(vm_offset_t kva __unused, size_t nbytes __unused)
10497 {
10498 }
10499
10500 static void __unused
10501 pmap_unpin_kernel_pages(vm_offset_t kva __unused, size_t nbytes __unused)
10502 {
10503 }
10504
10505
10506
10507 #define PMAP_RESIDENT_INVALID ((mach_vm_size_t)-1)
10508
10509 MARK_AS_PMAP_TEXT static mach_vm_size_t
10510 pmap_query_resident_internal(
10511 pmap_t pmap,
10512 vm_map_address_t start,
10513 vm_map_address_t end,
10514 mach_vm_size_t *compressed_bytes_p)
10515 {
10516 mach_vm_size_t resident_bytes = 0;
10517 mach_vm_size_t compressed_bytes = 0;
10518
10519 pt_entry_t *bpte, *epte;
10520 pt_entry_t *pte_p;
10521 tt_entry_t *tte_p;
10522
10523 if (pmap == NULL) {
10524 return PMAP_RESIDENT_INVALID;
10525 }
10526
10527 VALIDATE_PMAP(pmap);
10528
10529 /* Ensure that this request is valid, and addresses exactly one TTE. */
10530 if (__improbable((start % ARM_PGBYTES) || (end % ARM_PGBYTES))) {
10531 panic("%s: address range %p, %p not page-aligned", __func__, (void*)start, (void*)end);
10532 }
10533
10534 if (__improbable((end < start) || ((end - start) > (PTE_PGENTRIES * ARM_PGBYTES)))) {
10535 panic("%s: invalid address range %p, %p", __func__, (void*)start, (void*)end);
10536 }
10537
10538 PMAP_LOCK(pmap);
10539 tte_p = pmap_tte(pmap, start);
10540 if (tte_p == (tt_entry_t *) NULL) {
10541 PMAP_UNLOCK(pmap);
10542 return PMAP_RESIDENT_INVALID;
10543 }
10544 if ((*tte_p & ARM_TTE_TYPE_MASK) == ARM_TTE_TYPE_TABLE) {
10545 __unused const pt_attr_t * const pt_attr = pmap_get_pt_attr(pmap);
10546 pte_p = (pt_entry_t *) ttetokv(*tte_p);
10547 bpte = &pte_p[pte_index(pmap, pt_attr, start)];
10548 epte = &pte_p[pte_index(pmap, pt_attr, end)];
10549
10550 for (; bpte < epte; bpte++) {
10551 if (ARM_PTE_IS_COMPRESSED(*bpte, bpte)) {
10552 compressed_bytes += ARM_PGBYTES;
10553 } else if (pa_valid(pte_to_pa(*bpte))) {
10554 resident_bytes += ARM_PGBYTES;
10555 }
10556 }
10557 }
10558 PMAP_UNLOCK(pmap);
10559
10560 if (compressed_bytes_p) {
10561 pmap_pin_kernel_pages((vm_offset_t)compressed_bytes_p, sizeof(*compressed_bytes_p));
10562 *compressed_bytes_p += compressed_bytes;
10563 pmap_unpin_kernel_pages((vm_offset_t)compressed_bytes_p, sizeof(*compressed_bytes_p));
10564 }
10565
10566 return resident_bytes;
10567 }
10568
10569 mach_vm_size_t
10570 pmap_query_resident(
10571 pmap_t pmap,
10572 vm_map_address_t start,
10573 vm_map_address_t end,
10574 mach_vm_size_t *compressed_bytes_p)
10575 {
10576 mach_vm_size_t total_resident_bytes;
10577 mach_vm_size_t compressed_bytes;
10578 vm_map_address_t va;
10579
10580
10581 if (pmap == PMAP_NULL) {
10582 if (compressed_bytes_p) {
10583 *compressed_bytes_p = 0;
10584 }
10585 return 0;
10586 }
10587
10588 __unused const pt_attr_t * const pt_attr = pmap_get_pt_attr(pmap);
10589
10590 total_resident_bytes = 0;
10591 compressed_bytes = 0;
10592
10593 PMAP_TRACE(3, PMAP_CODE(PMAP__QUERY_RESIDENT) | DBG_FUNC_START,
10594 VM_KERNEL_ADDRHIDE(pmap), VM_KERNEL_ADDRHIDE(start),
10595 VM_KERNEL_ADDRHIDE(end));
10596
10597 va = start;
10598 while (va < end) {
10599 vm_map_address_t l;
10600 mach_vm_size_t resident_bytes;
10601
10602 l = ((va + pt_attr_twig_size(pt_attr)) & ~pt_attr_twig_offmask(pt_attr));
10603
10604 if (l > end) {
10605 l = end;
10606 }
10607 resident_bytes = pmap_query_resident_internal(pmap, va, l, compressed_bytes_p);
10608 if (resident_bytes == PMAP_RESIDENT_INVALID) {
10609 break;
10610 }
10611
10612 total_resident_bytes += resident_bytes;
10613
10614 va = l;
10615 }
10616
10617 if (compressed_bytes_p) {
10618 *compressed_bytes_p = compressed_bytes;
10619 }
10620
10621 PMAP_TRACE(3, PMAP_CODE(PMAP__QUERY_RESIDENT) | DBG_FUNC_END,
10622 total_resident_bytes);
10623
10624 return total_resident_bytes;
10625 }
10626
10627 #if MACH_ASSERT
10628 static void
10629 pmap_check_ledgers(
10630 pmap_t pmap)
10631 {
10632 int pid;
10633 char *procname;
10634
10635 if (pmap->pmap_pid == 0) {
10636 /*
10637 * This pmap was not or is no longer fully associated
10638 * with a task (e.g. the old pmap after a fork()/exec() or
10639 * spawn()). Its "ledger" still points at a task that is
10640 * now using a different (and active) address space, so
10641 * we can't check that all the pmap ledgers are balanced here.
10642 *
10643 * If the "pid" is set, that means that we went through
10644 * pmap_set_process() in task_terminate_internal(), so
10645 * this task's ledger should not have been re-used and
10646 * all the pmap ledgers should be back to 0.
10647 */
10648 return;
10649 }
10650
10651 pid = pmap->pmap_pid;
10652 procname = pmap->pmap_procname;
10653
10654 vm_map_pmap_check_ledgers(pmap, pmap->ledger, pid, procname);
10655
10656 PMAP_STATS_ASSERTF(pmap->stats.resident_count == 0, pmap, "stats.resident_count %d", pmap->stats.resident_count);
10657 #if 00
10658 PMAP_STATS_ASSERTF(pmap->stats.wired_count == 0, pmap, "stats.wired_count %d", pmap->stats.wired_count);
10659 #endif
10660 PMAP_STATS_ASSERTF(pmap->stats.device == 0, pmap, "stats.device %d", pmap->stats.device);
10661 PMAP_STATS_ASSERTF(pmap->stats.internal == 0, pmap, "stats.internal %d", pmap->stats.internal);
10662 PMAP_STATS_ASSERTF(pmap->stats.external == 0, pmap, "stats.external %d", pmap->stats.external);
10663 PMAP_STATS_ASSERTF(pmap->stats.reusable == 0, pmap, "stats.reusable %d", pmap->stats.reusable);
10664 PMAP_STATS_ASSERTF(pmap->stats.compressed == 0, pmap, "stats.compressed %lld", pmap->stats.compressed);
10665 }
10666 #endif /* MACH_ASSERT */
10667
10668 void
10669 pmap_advise_pagezero_range(__unused pmap_t p, __unused uint64_t a)
10670 {
10671 }
10672
10673
10674 #if CONFIG_PGTRACE
10675 #define PROF_START uint64_t t, nanot;\
10676 t = mach_absolute_time();
10677
10678 #define PROF_END absolutetime_to_nanoseconds(mach_absolute_time()-t, &nanot);\
10679 kprintf("%s: took %llu ns\n", __func__, nanot);
10680
10681 #define PMAP_PGTRACE_LOCK(p) \
10682 do { \
10683 *(p) = ml_set_interrupts_enabled(false); \
10684 if (simple_lock_try(&(pmap_pgtrace.lock), LCK_GRP_NULL)) break; \
10685 ml_set_interrupts_enabled(*(p)); \
10686 } while (true)
10687
10688 #define PMAP_PGTRACE_UNLOCK(p) \
10689 do { \
10690 simple_unlock(&(pmap_pgtrace.lock)); \
10691 ml_set_interrupts_enabled(*(p)); \
10692 } while (0)
10693
10694 #define PGTRACE_WRITE_PTE(pte_p, pte_entry) \
10695 do { \
10696 *(pte_p) = (pte_entry); \
10697 FLUSH_PTE(pte_p); \
10698 } while (0)
10699
10700 #define PGTRACE_MAX_MAP 16 // maximum supported va to same pa
10701
10702 typedef enum {
10703 UNDEFINED,
10704 PA_UNDEFINED,
10705 VA_UNDEFINED,
10706 DEFINED
10707 } pmap_pgtrace_page_state_t;
10708
10709 typedef struct {
10710 queue_chain_t chain;
10711
10712 /*
10713 * pa - pa
10714 * maps - list of va maps to upper pa
10715 * map_pool - map pool
10716 * map_waste - waste can
10717 * state - state
10718 */
10719 pmap_paddr_t pa;
10720 queue_head_t maps;
10721 queue_head_t map_pool;
10722 queue_head_t map_waste;
10723 pmap_pgtrace_page_state_t state;
10724 } pmap_pgtrace_page_t;
10725
10726 static struct {
10727 /*
10728 * pages - list of tracing page info
10729 */
10730 queue_head_t pages;
10731 decl_simple_lock_data(, lock);
10732 } pmap_pgtrace = {};
10733
10734 static void
10735 pmap_pgtrace_init(void)
10736 {
10737 queue_init(&(pmap_pgtrace.pages));
10738 simple_lock_init(&(pmap_pgtrace.lock), 0);
10739
10740 boolean_t enabled;
10741
10742 if (PE_parse_boot_argn("pgtrace", &enabled, sizeof(enabled))) {
10743 pgtrace_enabled = enabled;
10744 }
10745 }
10746
10747 // find a page with given pa - pmap_pgtrace should be locked
10748 inline static pmap_pgtrace_page_t *
10749 pmap_pgtrace_find_page(pmap_paddr_t pa)
10750 {
10751 queue_head_t *q = &(pmap_pgtrace.pages);
10752 pmap_pgtrace_page_t *p;
10753
10754 queue_iterate(q, p, pmap_pgtrace_page_t *, chain) {
10755 if (p->state == UNDEFINED) {
10756 continue;
10757 }
10758 if (p->state == PA_UNDEFINED) {
10759 continue;
10760 }
10761 if (p->pa == pa) {
10762 return p;
10763 }
10764 }
10765
10766 return NULL;
10767 }
10768
10769 // enter clone of given pmap, va page and range - pmap should be locked
10770 static bool
10771 pmap_pgtrace_enter_clone(pmap_t pmap, vm_map_offset_t va_page, vm_map_offset_t start, vm_map_offset_t end)
10772 {
10773 bool ints;
10774 queue_head_t *q = &(pmap_pgtrace.pages);
10775 pmap_paddr_t pa_page;
10776 pt_entry_t *ptep, *cptep;
10777 pmap_pgtrace_page_t *p;
10778 bool found = false;
10779
10780 PMAP_ASSERT_LOCKED(pmap);
10781 assert(va_page == arm_trunc_page(va_page));
10782
10783 PMAP_PGTRACE_LOCK(&ints);
10784
10785 ptep = pmap_pte(pmap, va_page);
10786
10787 // target pte should exist
10788 if (!ptep || !(*ptep & ARM_PTE_TYPE_VALID)) {
10789 PMAP_PGTRACE_UNLOCK(&ints);
10790 return false;
10791 }
10792
10793 queue_head_t *mapq;
10794 queue_head_t *mappool;
10795 pmap_pgtrace_map_t *map = NULL;
10796
10797 pa_page = pte_to_pa(*ptep);
10798
10799 // find if we have a page info defined for this
10800 queue_iterate(q, p, pmap_pgtrace_page_t *, chain) {
10801 mapq = &(p->maps);
10802 mappool = &(p->map_pool);
10803
10804 switch (p->state) {
10805 case PA_UNDEFINED:
10806 queue_iterate(mapq, map, pmap_pgtrace_map_t *, chain) {
10807 if (map->cloned == false && map->pmap == pmap && map->ova == va_page) {
10808 p->pa = pa_page;
10809 map->range.start = start;
10810 map->range.end = end;
10811 found = true;
10812 break;
10813 }
10814 }
10815 break;
10816
10817 case VA_UNDEFINED:
10818 if (p->pa != pa_page) {
10819 break;
10820 }
10821 queue_iterate(mapq, map, pmap_pgtrace_map_t *, chain) {
10822 if (map->cloned == false) {
10823 map->pmap = pmap;
10824 map->ova = va_page;
10825 map->range.start = start;
10826 map->range.end = end;
10827 found = true;
10828 break;
10829 }
10830 }
10831 break;
10832
10833 case DEFINED:
10834 if (p->pa != pa_page) {
10835 break;
10836 }
10837 queue_iterate(mapq, map, pmap_pgtrace_map_t *, chain) {
10838 if (map->cloned == true && map->pmap == pmap && map->ova == va_page) {
10839 kprintf("%s: skip existing mapping at va=%llx\n", __func__, va_page);
10840 break;
10841 } else if (map->cloned == true && map->pmap == kernel_pmap && map->cva[1] == va_page) {
10842 kprintf("%s: skip clone mapping at va=%llx\n", __func__, va_page);
10843 break;
10844 } else if (map->cloned == false && map->pmap == pmap && map->ova == va_page) {
10845 // range should be already defined as well
10846 found = true;
10847 break;
10848 }
10849 }
10850 break;
10851
10852 default:
10853 panic("invalid state p->state=%x\n", p->state);
10854 }
10855
10856 if (found == true) {
10857 break;
10858 }
10859 }
10860
10861 // do not clone if no page info found
10862 if (found == false) {
10863 PMAP_PGTRACE_UNLOCK(&ints);
10864 return false;
10865 }
10866
10867 // copy pre, target and post ptes to clone ptes
10868 for (int i = 0; i < 3; i++) {
10869 ptep = pmap_pte(pmap, va_page + (i - 1) * ARM_PGBYTES);
10870 cptep = pmap_pte(kernel_pmap, map->cva[i]);
10871 assert(cptep != NULL);
10872 if (ptep == NULL) {
10873 PGTRACE_WRITE_PTE(cptep, (pt_entry_t)NULL);
10874 } else {
10875 PGTRACE_WRITE_PTE(cptep, *ptep);
10876 }
10877 PMAP_UPDATE_TLBS(kernel_pmap, map->cva[i], map->cva[i] + ARM_PGBYTES, false);
10878 }
10879
10880 // get ptes for original and clone
10881 ptep = pmap_pte(pmap, va_page);
10882 cptep = pmap_pte(kernel_pmap, map->cva[1]);
10883
10884 // invalidate original pte and mark it as a pgtrace page
10885 PGTRACE_WRITE_PTE(ptep, (*ptep | ARM_PTE_PGTRACE) & ~ARM_PTE_TYPE_VALID);
10886 PMAP_UPDATE_TLBS(pmap, map->ova, map->ova + ARM_PGBYTES, false);
10887
10888 map->cloned = true;
10889 p->state = DEFINED;
10890
10891 kprintf("%s: pa_page=%llx va_page=%llx cva[1]=%llx pmap=%p ptep=%p cptep=%p\n", __func__, pa_page, va_page, map->cva[1], pmap, ptep, cptep);
10892
10893 PMAP_PGTRACE_UNLOCK(&ints);
10894
10895 return true;
10896 }
10897
10898 // This function removes trace bit and validate pte if applicable. Pmap must be locked.
10899 static void
10900 pmap_pgtrace_remove_clone(pmap_t pmap, pmap_paddr_t pa, vm_map_offset_t va)
10901 {
10902 bool ints, found = false;
10903 pmap_pgtrace_page_t *p;
10904 pt_entry_t *ptep;
10905
10906 PMAP_PGTRACE_LOCK(&ints);
10907
10908 // we must have this page info
10909 p = pmap_pgtrace_find_page(pa);
10910 if (p == NULL) {
10911 goto unlock_exit;
10912 }
10913
10914 // find matching map
10915 queue_head_t *mapq = &(p->maps);
10916 queue_head_t *mappool = &(p->map_pool);
10917 pmap_pgtrace_map_t *map;
10918
10919 queue_iterate(mapq, map, pmap_pgtrace_map_t *, chain) {
10920 if (map->pmap == pmap && map->ova == va) {
10921 found = true;
10922 break;
10923 }
10924 }
10925
10926 if (!found) {
10927 goto unlock_exit;
10928 }
10929
10930 if (map->cloned == true) {
10931 // Restore back the pte to original state
10932 ptep = pmap_pte(pmap, map->ova);
10933 assert(ptep);
10934 PGTRACE_WRITE_PTE(ptep, *ptep | ARM_PTE_TYPE_VALID);
10935 PMAP_UPDATE_TLBS(pmap, va, va + ARM_PGBYTES, false);
10936
10937 // revert clone pages
10938 for (int i = 0; i < 3; i++) {
10939 ptep = pmap_pte(kernel_pmap, map->cva[i]);
10940 assert(ptep != NULL);
10941 PGTRACE_WRITE_PTE(ptep, map->cva_spte[i]);
10942 PMAP_UPDATE_TLBS(kernel_pmap, map->cva[i], map->cva[i] + ARM_PGBYTES, false);
10943 }
10944 }
10945
10946 queue_remove(mapq, map, pmap_pgtrace_map_t *, chain);
10947 map->pmap = NULL;
10948 map->ova = (vm_map_offset_t)NULL;
10949 map->cloned = false;
10950 queue_enter_first(mappool, map, pmap_pgtrace_map_t *, chain);
10951
10952 kprintf("%s: p=%p pa=%llx va=%llx\n", __func__, p, pa, va);
10953
10954 unlock_exit:
10955 PMAP_PGTRACE_UNLOCK(&ints);
10956 }
10957
10958 // remove all clones of given pa - pmap must be locked
10959 static void
10960 pmap_pgtrace_remove_all_clone(pmap_paddr_t pa)
10961 {
10962 bool ints;
10963 pmap_pgtrace_page_t *p;
10964 pt_entry_t *ptep;
10965
10966 PMAP_PGTRACE_LOCK(&ints);
10967
10968 // we must have this page info
10969 p = pmap_pgtrace_find_page(pa);
10970 if (p == NULL) {
10971 PMAP_PGTRACE_UNLOCK(&ints);
10972 return;
10973 }
10974
10975 queue_head_t *mapq = &(p->maps);
10976 queue_head_t *mappool = &(p->map_pool);
10977 queue_head_t *mapwaste = &(p->map_waste);
10978 pmap_pgtrace_map_t *map;
10979
10980 // move maps to waste
10981 while (!queue_empty(mapq)) {
10982 queue_remove_first(mapq, map, pmap_pgtrace_map_t *, chain);
10983 queue_enter_first(mapwaste, map, pmap_pgtrace_map_t*, chain);
10984 }
10985
10986 PMAP_PGTRACE_UNLOCK(&ints);
10987
10988 // sanitize maps in waste
10989 queue_iterate(mapwaste, map, pmap_pgtrace_map_t *, chain) {
10990 if (map->cloned == true) {
10991 PMAP_LOCK(map->pmap);
10992
10993 // restore back original pte
10994 ptep = pmap_pte(map->pmap, map->ova);
10995 assert(ptep);
10996 PGTRACE_WRITE_PTE(ptep, *ptep | ARM_PTE_TYPE_VALID);
10997 PMAP_UPDATE_TLBS(map->pmap, map->ova, map->ova + ARM_PGBYTES, false);
10998
10999 // revert clone ptes
11000 for (int i = 0; i < 3; i++) {
11001 ptep = pmap_pte(kernel_pmap, map->cva[i]);
11002 assert(ptep != NULL);
11003 PGTRACE_WRITE_PTE(ptep, map->cva_spte[i]);
11004 PMAP_UPDATE_TLBS(kernel_pmap, map->cva[i], map->cva[i] + ARM_PGBYTES, false);
11005 }
11006
11007 PMAP_UNLOCK(map->pmap);
11008 }
11009
11010 map->pmap = NULL;
11011 map->ova = (vm_map_offset_t)NULL;
11012 map->cloned = false;
11013 }
11014
11015 PMAP_PGTRACE_LOCK(&ints);
11016
11017 // recycle maps back to map_pool
11018 while (!queue_empty(mapwaste)) {
11019 queue_remove_first(mapwaste, map, pmap_pgtrace_map_t *, chain);
11020 queue_enter_first(mappool, map, pmap_pgtrace_map_t*, chain);
11021 }
11022
11023 PMAP_PGTRACE_UNLOCK(&ints);
11024 }
11025
11026 inline static void
11027 pmap_pgtrace_get_search_space(pmap_t pmap, vm_map_offset_t *startp, vm_map_offset_t *endp)
11028 {
11029 uint64_t tsz;
11030 vm_map_offset_t end;
11031
11032 if (pmap == kernel_pmap) {
11033 tsz = (get_tcr() >> TCR_T1SZ_SHIFT) & TCR_TSZ_MASK;
11034 *startp = MAX(VM_MIN_KERNEL_ADDRESS, (UINT64_MAX >> (64 - tsz)) << (64 - tsz));
11035 *endp = VM_MAX_KERNEL_ADDRESS;
11036 } else {
11037 tsz = (get_tcr() >> TCR_T0SZ_SHIFT) & TCR_TSZ_MASK;
11038 if (tsz == 64) {
11039 end = 0;
11040 } else {
11041 end = ((uint64_t)1 << (64 - tsz)) - 1;
11042 }
11043
11044 *startp = 0;
11045 *endp = end;
11046 }
11047
11048 assert(*endp > *startp);
11049
11050 return;
11051 }
11052
11053 // has pa mapped in given pmap? then clone it
11054 static uint64_t
11055 pmap_pgtrace_clone_from_pa(pmap_t pmap, pmap_paddr_t pa, vm_map_offset_t start_offset, vm_map_offset_t end_offset)
11056 {
11057 uint64_t ret = 0;
11058 vm_map_offset_t min, max;
11059 vm_map_offset_t cur_page, end_page;
11060 pt_entry_t *ptep;
11061 tt_entry_t *ttep;
11062 tt_entry_t tte;
11063 __unused const pt_attr_t * const pt_attr = pmap_get_pt_attr(pmap);
11064
11065 pmap_pgtrace_get_search_space(pmap, &min, &max);
11066
11067 cur_page = arm_trunc_page(min);
11068 end_page = arm_trunc_page(max);
11069 while (cur_page <= end_page) {
11070 vm_map_offset_t add = 0;
11071
11072 PMAP_LOCK(pmap);
11073
11074 // skip uninterested space
11075 if (pmap == kernel_pmap &&
11076 ((vm_kernel_base <= cur_page && cur_page < vm_kernel_top) ||
11077 (vm_kext_base <= cur_page && cur_page < vm_kext_top))) {
11078 add = ARM_PGBYTES;
11079 goto unlock_continue;
11080 }
11081
11082 // check whether we can skip l1
11083 ttep = pmap_tt1e(pmap, cur_page);
11084 assert(ttep);
11085 tte = *ttep;
11086 if ((tte & (ARM_TTE_TYPE_MASK | ARM_TTE_VALID)) != (ARM_TTE_TYPE_TABLE | ARM_TTE_VALID)) {
11087 add = ARM_TT_L1_SIZE;
11088 goto unlock_continue;
11089 }
11090
11091 // how about l2
11092 tte = ((tt_entry_t*) phystokv(tte & ARM_TTE_TABLE_MASK))[tt2_index(pmap, pt_attr, cur_page)];
11093
11094 if ((tte & (ARM_TTE_TYPE_MASK | ARM_TTE_VALID)) != (ARM_TTE_TYPE_TABLE | ARM_TTE_VALID)) {
11095 add = ARM_TT_L2_SIZE;
11096 goto unlock_continue;
11097 }
11098
11099 // ptep finally
11100 ptep = &(((pt_entry_t*) phystokv(tte & ARM_TTE_TABLE_MASK))[tt3_index(pmap, pt_attr, cur_page)]);
11101 if (ptep == PT_ENTRY_NULL) {
11102 add = ARM_TT_L3_SIZE;
11103 goto unlock_continue;
11104 }
11105
11106 if (arm_trunc_page(pa) == pte_to_pa(*ptep)) {
11107 if (pmap_pgtrace_enter_clone(pmap, cur_page, start_offset, end_offset) == true) {
11108 ret++;
11109 }
11110 }
11111
11112 add = ARM_PGBYTES;
11113
11114 unlock_continue:
11115 PMAP_UNLOCK(pmap);
11116
11117 //overflow
11118 if (cur_page + add < cur_page) {
11119 break;
11120 }
11121
11122 cur_page += add;
11123 }
11124
11125
11126 return ret;
11127 }
11128
11129 // search pv table and clone vas of given pa
11130 static uint64_t
11131 pmap_pgtrace_clone_from_pvtable(pmap_paddr_t pa, vm_map_offset_t start_offset, vm_map_offset_t end_offset)
11132 {
11133 uint64_t ret = 0;
11134 unsigned long pai;
11135 pv_entry_t **pvh;
11136 pt_entry_t *ptep;
11137 pmap_t pmap;
11138
11139 typedef struct {
11140 queue_chain_t chain;
11141 pmap_t pmap;
11142 vm_map_offset_t va;
11143 } pmap_va_t;
11144
11145 queue_head_t pmapvaq;
11146 pmap_va_t *pmapva;
11147
11148 queue_init(&pmapvaq);
11149
11150 pai = pa_index(pa);
11151 LOCK_PVH(pai);
11152 pvh = pai_to_pvh(pai);
11153
11154 // collect pmap/va pair from pvh
11155 if (pvh_test_type(pvh, PVH_TYPE_PTEP)) {
11156 ptep = pvh_ptep(pvh);
11157 pmap = ptep_get_pmap(ptep);
11158
11159 pmapva = (pmap_va_t *)kalloc(sizeof(pmap_va_t));
11160 pmapva->pmap = pmap;
11161 pmapva->va = ptep_get_va(ptep);
11162
11163 queue_enter_first(&pmapvaq, pmapva, pmap_va_t *, chain);
11164 } else if (pvh_test_type(pvh, PVH_TYPE_PVEP)) {
11165 pv_entry_t *pvep;
11166
11167 pvep = pvh_list(pvh);
11168 while (pvep) {
11169 ptep = pve_get_ptep(pvep);
11170 pmap = ptep_get_pmap(ptep);
11171
11172 pmapva = (pmap_va_t *)kalloc(sizeof(pmap_va_t));
11173 pmapva->pmap = pmap;
11174 pmapva->va = ptep_get_va(ptep);
11175
11176 queue_enter_first(&pmapvaq, pmapva, pmap_va_t *, chain);
11177
11178 pvep = PVE_NEXT_PTR(pve_next(pvep));
11179 }
11180 }
11181
11182 UNLOCK_PVH(pai);
11183
11184 // clone them while making sure mapping still exists
11185 queue_iterate(&pmapvaq, pmapva, pmap_va_t *, chain) {
11186 PMAP_LOCK(pmapva->pmap);
11187 ptep = pmap_pte(pmapva->pmap, pmapva->va);
11188 if (pte_to_pa(*ptep) == pa) {
11189 if (pmap_pgtrace_enter_clone(pmapva->pmap, pmapva->va, start_offset, end_offset) == true) {
11190 ret++;
11191 }
11192 }
11193 PMAP_UNLOCK(pmapva->pmap);
11194
11195 kfree(pmapva, sizeof(pmap_va_t));
11196 }
11197
11198 return ret;
11199 }
11200
11201 // allocate a page info
11202 static pmap_pgtrace_page_t *
11203 pmap_pgtrace_alloc_page(void)
11204 {
11205 pmap_pgtrace_page_t *p;
11206 queue_head_t *mapq;
11207 queue_head_t *mappool;
11208 queue_head_t *mapwaste;
11209 pmap_pgtrace_map_t *map;
11210
11211 p = kalloc(sizeof(pmap_pgtrace_page_t));
11212 assert(p);
11213
11214 p->state = UNDEFINED;
11215
11216 mapq = &(p->maps);
11217 mappool = &(p->map_pool);
11218 mapwaste = &(p->map_waste);
11219 queue_init(mapq);
11220 queue_init(mappool);
11221 queue_init(mapwaste);
11222
11223 for (int i = 0; i < PGTRACE_MAX_MAP; i++) {
11224 vm_map_offset_t newcva;
11225 pt_entry_t *cptep;
11226 kern_return_t kr;
11227 vm_map_entry_t entry;
11228
11229 // get a clone va
11230 vm_object_reference(kernel_object);
11231 kr = vm_map_find_space(kernel_map, &newcva, vm_map_round_page(3 * ARM_PGBYTES, PAGE_MASK), 0, 0, VM_MAP_KERNEL_FLAGS_NONE, VM_KERN_MEMORY_DIAG, &entry);
11232 if (kr != KERN_SUCCESS) {
11233 panic("%s VM couldn't find any space kr=%d\n", __func__, kr);
11234 }
11235 VME_OBJECT_SET(entry, kernel_object);
11236 VME_OFFSET_SET(entry, newcva);
11237 vm_map_unlock(kernel_map);
11238
11239 // fill default clone page info and add to pool
11240 map = kalloc(sizeof(pmap_pgtrace_map_t));
11241 for (int j = 0; j < 3; j++) {
11242 vm_map_offset_t addr = newcva + j * ARM_PGBYTES;
11243
11244 // pre-expand pmap while preemption enabled
11245 kr = pmap_expand(kernel_pmap, addr, 0, PMAP_TT_MAX_LEVEL);
11246 if (kr != KERN_SUCCESS) {
11247 panic("%s: pmap_expand(kernel_pmap, addr=%llx) returns kr=%d\n", __func__, addr, kr);
11248 }
11249
11250 cptep = pmap_pte(kernel_pmap, addr);
11251 assert(cptep != NULL);
11252
11253 map->cva[j] = addr;
11254 map->cva_spte[j] = *cptep;
11255 }
11256 map->range.start = map->range.end = 0;
11257 map->cloned = false;
11258 queue_enter_first(mappool, map, pmap_pgtrace_map_t *, chain);
11259 }
11260
11261 return p;
11262 }
11263
11264 // free a page info
11265 static void
11266 pmap_pgtrace_free_page(pmap_pgtrace_page_t *p)
11267 {
11268 queue_head_t *mapq;
11269 queue_head_t *mappool;
11270 queue_head_t *mapwaste;
11271 pmap_pgtrace_map_t *map;
11272
11273 assert(p);
11274
11275 mapq = &(p->maps);
11276 mappool = &(p->map_pool);
11277 mapwaste = &(p->map_waste);
11278
11279 while (!queue_empty(mapq)) {
11280 queue_remove_first(mapq, map, pmap_pgtrace_map_t *, chain);
11281 kfree(map, sizeof(pmap_pgtrace_map_t));
11282 }
11283
11284 while (!queue_empty(mappool)) {
11285 queue_remove_first(mappool, map, pmap_pgtrace_map_t *, chain);
11286 kfree(map, sizeof(pmap_pgtrace_map_t));
11287 }
11288
11289 while (!queue_empty(mapwaste)) {
11290 queue_remove_first(mapwaste, map, pmap_pgtrace_map_t *, chain);
11291 kfree(map, sizeof(pmap_pgtrace_map_t));
11292 }
11293
11294 kfree(p, sizeof(pmap_pgtrace_page_t));
11295 }
11296
11297 // construct page infos with the given address range
11298 int
11299 pmap_pgtrace_add_page(pmap_t pmap, vm_map_offset_t start, vm_map_offset_t end)
11300 {
11301 int ret = 0;
11302 pt_entry_t *ptep;
11303 queue_head_t *q = &(pmap_pgtrace.pages);
11304 bool ints;
11305 vm_map_offset_t cur_page, end_page;
11306
11307 if (start > end) {
11308 kprintf("%s: invalid start=%llx > end=%llx\n", __func__, start, end);
11309 return -1;
11310 }
11311
11312 PROF_START
11313
11314 // add each page in given range
11315 cur_page = arm_trunc_page(start);
11316 end_page = arm_trunc_page(end);
11317 while (cur_page <= end_page) {
11318 pmap_paddr_t pa_page = 0;
11319 uint64_t num_cloned = 0;
11320 pmap_pgtrace_page_t *p = NULL, *newp;
11321 bool free_newp = true;
11322 pmap_pgtrace_page_state_t state;
11323
11324 // do all allocations outside of spinlocks
11325 newp = pmap_pgtrace_alloc_page();
11326
11327 // keep lock orders in pmap, kernel_pmap and pgtrace lock
11328 if (pmap != NULL) {
11329 PMAP_LOCK(pmap);
11330 }
11331 if (pmap != kernel_pmap) {
11332 PMAP_LOCK(kernel_pmap);
11333 }
11334
11335 // addresses are physical if pmap is null
11336 if (pmap == NULL) {
11337 ptep = NULL;
11338 pa_page = cur_page;
11339 state = VA_UNDEFINED;
11340 } else {
11341 ptep = pmap_pte(pmap, cur_page);
11342 if (ptep != NULL) {
11343 pa_page = pte_to_pa(*ptep);
11344 state = DEFINED;
11345 } else {
11346 state = PA_UNDEFINED;
11347 }
11348 }
11349
11350 // search if we have a page info already
11351 PMAP_PGTRACE_LOCK(&ints);
11352 if (state != PA_UNDEFINED) {
11353 p = pmap_pgtrace_find_page(pa_page);
11354 }
11355
11356 // add pre-allocated page info if nothing found
11357 if (p == NULL) {
11358 queue_enter_first(q, newp, pmap_pgtrace_page_t *, chain);
11359 p = newp;
11360 free_newp = false;
11361 }
11362
11363 // now p points what we want
11364 p->state = state;
11365
11366 queue_head_t *mapq = &(p->maps);
11367 queue_head_t *mappool = &(p->map_pool);
11368 pmap_pgtrace_map_t *map;
11369 vm_map_offset_t start_offset, end_offset;
11370
11371 // calculate trace offsets in the page
11372 if (cur_page > start) {
11373 start_offset = 0;
11374 } else {
11375 start_offset = start - cur_page;
11376 }
11377 if (cur_page == end_page) {
11378 end_offset = end - end_page;
11379 } else {
11380 end_offset = ARM_PGBYTES - 1;
11381 }
11382
11383 kprintf("%s: pmap=%p cur_page=%llx ptep=%p state=%d start_offset=%llx end_offset=%llx\n", __func__, pmap, cur_page, ptep, state, start_offset, end_offset);
11384
11385 // fill map info
11386 assert(!queue_empty(mappool));
11387 queue_remove_first(mappool, map, pmap_pgtrace_map_t *, chain);
11388 if (p->state == PA_UNDEFINED) {
11389 map->pmap = pmap;
11390 map->ova = cur_page;
11391 map->range.start = start_offset;
11392 map->range.end = end_offset;
11393 } else if (p->state == VA_UNDEFINED) {
11394 p->pa = pa_page;
11395 map->range.start = start_offset;
11396 map->range.end = end_offset;
11397 } else if (p->state == DEFINED) {
11398 p->pa = pa_page;
11399 map->pmap = pmap;
11400 map->ova = cur_page;
11401 map->range.start = start_offset;
11402 map->range.end = end_offset;
11403 } else {
11404 panic("invalid p->state=%d\n", p->state);
11405 }
11406
11407 // not cloned yet
11408 map->cloned = false;
11409 queue_enter(mapq, map, pmap_pgtrace_map_t *, chain);
11410
11411 // unlock locks
11412 PMAP_PGTRACE_UNLOCK(&ints);
11413 if (pmap != kernel_pmap) {
11414 PMAP_UNLOCK(kernel_pmap);
11415 }
11416 if (pmap != NULL) {
11417 PMAP_UNLOCK(pmap);
11418 }
11419
11420 // now clone it
11421 if (pa_valid(pa_page)) {
11422 num_cloned = pmap_pgtrace_clone_from_pvtable(pa_page, start_offset, end_offset);
11423 }
11424 if (pmap == NULL) {
11425 num_cloned += pmap_pgtrace_clone_from_pa(kernel_pmap, pa_page, start_offset, end_offset);
11426 } else {
11427 num_cloned += pmap_pgtrace_clone_from_pa(pmap, pa_page, start_offset, end_offset);
11428 }
11429
11430 // free pre-allocations if we didn't add it to the q
11431 if (free_newp) {
11432 pmap_pgtrace_free_page(newp);
11433 }
11434
11435 if (num_cloned == 0) {
11436 kprintf("%s: no mapping found for pa_page=%llx but will be added when a page entered\n", __func__, pa_page);
11437 }
11438
11439 ret += num_cloned;
11440
11441 // overflow
11442 if (cur_page + ARM_PGBYTES < cur_page) {
11443 break;
11444 } else {
11445 cur_page += ARM_PGBYTES;
11446 }
11447 }
11448
11449 PROF_END
11450
11451 return ret;
11452 }
11453
11454 // delete page infos for given address range
11455 int
11456 pmap_pgtrace_delete_page(pmap_t pmap, vm_map_offset_t start, vm_map_offset_t end)
11457 {
11458 int ret = 0;
11459 bool ints;
11460 queue_head_t *q = &(pmap_pgtrace.pages);
11461 pmap_pgtrace_page_t *p;
11462 vm_map_offset_t cur_page, end_page;
11463
11464 kprintf("%s start=%llx end=%llx\n", __func__, start, end);
11465
11466 PROF_START
11467
11468 pt_entry_t *ptep;
11469 pmap_paddr_t pa_page;
11470
11471 // remove page info from start to end
11472 cur_page = arm_trunc_page(start);
11473 end_page = arm_trunc_page(end);
11474 while (cur_page <= end_page) {
11475 p = NULL;
11476
11477 if (pmap == NULL) {
11478 pa_page = cur_page;
11479 } else {
11480 PMAP_LOCK(pmap);
11481 ptep = pmap_pte(pmap, cur_page);
11482 if (ptep == NULL) {
11483 PMAP_UNLOCK(pmap);
11484 goto cont;
11485 }
11486 pa_page = pte_to_pa(*ptep);
11487 PMAP_UNLOCK(pmap);
11488 }
11489
11490 // remove all clones and validate
11491 pmap_pgtrace_remove_all_clone(pa_page);
11492
11493 // find page info and delete
11494 PMAP_PGTRACE_LOCK(&ints);
11495 p = pmap_pgtrace_find_page(pa_page);
11496 if (p != NULL) {
11497 queue_remove(q, p, pmap_pgtrace_page_t *, chain);
11498 ret++;
11499 }
11500 PMAP_PGTRACE_UNLOCK(&ints);
11501
11502 // free outside of locks
11503 if (p != NULL) {
11504 pmap_pgtrace_free_page(p);
11505 }
11506
11507 cont:
11508 // overflow
11509 if (cur_page + ARM_PGBYTES < cur_page) {
11510 break;
11511 } else {
11512 cur_page += ARM_PGBYTES;
11513 }
11514 }
11515
11516 PROF_END
11517
11518 return ret;
11519 }
11520
11521 kern_return_t
11522 pmap_pgtrace_fault(pmap_t pmap, vm_map_offset_t va, arm_saved_state_t *ss)
11523 {
11524 pt_entry_t *ptep;
11525 pgtrace_run_result_t res;
11526 pmap_pgtrace_page_t *p;
11527 bool ints, found = false;
11528 pmap_paddr_t pa;
11529
11530 // Quick check if we are interested
11531 ptep = pmap_pte(pmap, va);
11532 if (!ptep || !(*ptep & ARM_PTE_PGTRACE)) {
11533 return KERN_FAILURE;
11534 }
11535
11536 PMAP_PGTRACE_LOCK(&ints);
11537
11538 // Check again since access is serialized
11539 ptep = pmap_pte(pmap, va);
11540 if (!ptep || !(*ptep & ARM_PTE_PGTRACE)) {
11541 PMAP_PGTRACE_UNLOCK(&ints);
11542 return KERN_FAILURE;
11543 } else if ((*ptep & ARM_PTE_TYPE_VALID) == ARM_PTE_TYPE_VALID) {
11544 // Somehow this cpu's tlb has not updated
11545 kprintf("%s Somehow this cpu's tlb has not updated?\n", __func__);
11546 PMAP_UPDATE_TLBS(pmap, va, va + ARM_PGBYTES, false);
11547
11548 PMAP_PGTRACE_UNLOCK(&ints);
11549 return KERN_SUCCESS;
11550 }
11551
11552 // Find if this pa is what we are tracing
11553 pa = pte_to_pa(*ptep);
11554
11555 p = pmap_pgtrace_find_page(arm_trunc_page(pa));
11556 if (p == NULL) {
11557 panic("%s Can't find va=%llx pa=%llx from tracing pages\n", __func__, va, pa);
11558 }
11559
11560 // find if pmap and va are also matching
11561 queue_head_t *mapq = &(p->maps);
11562 queue_head_t *mapwaste = &(p->map_waste);
11563 pmap_pgtrace_map_t *map;
11564
11565 queue_iterate(mapq, map, pmap_pgtrace_map_t *, chain) {
11566 if (map->pmap == pmap && map->ova == arm_trunc_page(va)) {
11567 found = true;
11568 break;
11569 }
11570 }
11571
11572 // if not found, search map waste as they are still valid
11573 if (!found) {
11574 queue_iterate(mapwaste, map, pmap_pgtrace_map_t *, chain) {
11575 if (map->pmap == pmap && map->ova == arm_trunc_page(va)) {
11576 found = true;
11577 break;
11578 }
11579 }
11580 }
11581
11582 if (!found) {
11583 panic("%s Can't find va=%llx pa=%llx from tracing pages\n", __func__, va, pa);
11584 }
11585
11586 // Decode and run it on the clone map
11587 bzero(&res, sizeof(res));
11588 pgtrace_decode_and_run(*(uint32_t *)get_saved_state_pc(ss), // instruction
11589 va, map->cva, // fault va and clone page vas
11590 ss, &res);
11591
11592 // write a log if in range
11593 vm_map_offset_t offset = va - map->ova;
11594 if (map->range.start <= offset && offset <= map->range.end) {
11595 pgtrace_write_log(res);
11596 }
11597
11598 PMAP_PGTRACE_UNLOCK(&ints);
11599
11600 // Return to next instruction
11601 add_saved_state_pc(ss, sizeof(uint32_t));
11602
11603 return KERN_SUCCESS;
11604 }
11605 #endif
11606
11607 boolean_t
11608 pmap_enforces_execute_only(
11609 #if (__ARM_VMSA__ == 7)
11610 __unused
11611 #endif
11612 pmap_t pmap)
11613 {
11614 #if (__ARM_VMSA__ > 7)
11615 return pmap != kernel_pmap;
11616 #else
11617 return FALSE;
11618 #endif
11619 }
11620
11621 MARK_AS_PMAP_TEXT void
11622 pmap_set_jit_entitled_internal(
11623 __unused pmap_t pmap)
11624 {
11625 return;
11626 }
11627
11628 void
11629 pmap_set_jit_entitled(
11630 pmap_t pmap)
11631 {
11632 pmap_set_jit_entitled_internal(pmap);
11633 }
11634
11635 MARK_AS_PMAP_TEXT static kern_return_t
11636 pmap_query_page_info_internal(
11637 pmap_t pmap,
11638 vm_map_offset_t va,
11639 int *disp_p)
11640 {
11641 pmap_paddr_t pa;
11642 int disp;
11643 int pai;
11644 pt_entry_t *pte;
11645 pv_entry_t **pv_h, *pve_p;
11646
11647 if (pmap == PMAP_NULL || pmap == kernel_pmap) {
11648 pmap_pin_kernel_pages((vm_offset_t)disp_p, sizeof(*disp_p));
11649 *disp_p = 0;
11650 pmap_unpin_kernel_pages((vm_offset_t)disp_p, sizeof(*disp_p));
11651 return KERN_INVALID_ARGUMENT;
11652 }
11653
11654 disp = 0;
11655
11656 VALIDATE_PMAP(pmap);
11657 PMAP_LOCK(pmap);
11658
11659 pte = pmap_pte(pmap, va);
11660 if (pte == PT_ENTRY_NULL) {
11661 goto done;
11662 }
11663
11664 pa = pte_to_pa(*pte);
11665 if (pa == 0) {
11666 if (ARM_PTE_IS_COMPRESSED(*pte, pte)) {
11667 disp |= PMAP_QUERY_PAGE_COMPRESSED;
11668 if (*pte & ARM_PTE_COMPRESSED_ALT) {
11669 disp |= PMAP_QUERY_PAGE_COMPRESSED_ALTACCT;
11670 }
11671 }
11672 } else {
11673 disp |= PMAP_QUERY_PAGE_PRESENT;
11674 pai = (int) pa_index(pa);
11675 if (!pa_valid(pa)) {
11676 goto done;
11677 }
11678 LOCK_PVH(pai);
11679 pv_h = pai_to_pvh(pai);
11680 pve_p = PV_ENTRY_NULL;
11681 if (pvh_test_type(pv_h, PVH_TYPE_PVEP)) {
11682 pve_p = pvh_list(pv_h);
11683 while (pve_p != PV_ENTRY_NULL &&
11684 pve_get_ptep(pve_p) != pte) {
11685 pve_p = PVE_NEXT_PTR(pve_next(pve_p));
11686 }
11687 }
11688 if (IS_ALTACCT_PAGE(pai, pve_p)) {
11689 disp |= PMAP_QUERY_PAGE_ALTACCT;
11690 } else if (IS_REUSABLE_PAGE(pai)) {
11691 disp |= PMAP_QUERY_PAGE_REUSABLE;
11692 } else if (IS_INTERNAL_PAGE(pai)) {
11693 disp |= PMAP_QUERY_PAGE_INTERNAL;
11694 }
11695 UNLOCK_PVH(pai);
11696 }
11697
11698 done:
11699 PMAP_UNLOCK(pmap);
11700 pmap_pin_kernel_pages((vm_offset_t)disp_p, sizeof(*disp_p));
11701 *disp_p = disp;
11702 pmap_unpin_kernel_pages((vm_offset_t)disp_p, sizeof(*disp_p));
11703 return KERN_SUCCESS;
11704 }
11705
11706 kern_return_t
11707 pmap_query_page_info(
11708 pmap_t pmap,
11709 vm_map_offset_t va,
11710 int *disp_p)
11711 {
11712 return pmap_query_page_info_internal(pmap, va, disp_p);
11713 }
11714
11715 MARK_AS_PMAP_TEXT kern_return_t
11716 pmap_return_internal(__unused boolean_t do_panic, __unused boolean_t do_recurse)
11717 {
11718
11719 return KERN_SUCCESS;
11720 }
11721
11722 kern_return_t
11723 pmap_return(boolean_t do_panic, boolean_t do_recurse)
11724 {
11725 return pmap_return_internal(do_panic, do_recurse);
11726 }
11727
11728
11729
11730
11731 MARK_AS_PMAP_TEXT static void
11732 pmap_footprint_suspend_internal(
11733 vm_map_t map,
11734 boolean_t suspend)
11735 {
11736 #if DEVELOPMENT || DEBUG
11737 if (suspend) {
11738 current_thread()->pmap_footprint_suspended = TRUE;
11739 map->pmap->footprint_was_suspended = TRUE;
11740 } else {
11741 current_thread()->pmap_footprint_suspended = FALSE;
11742 }
11743 #else /* DEVELOPMENT || DEBUG */
11744 (void) map;
11745 (void) suspend;
11746 #endif /* DEVELOPMENT || DEBUG */
11747 }
11748
11749 void
11750 pmap_footprint_suspend(
11751 vm_map_t map,
11752 boolean_t suspend)
11753 {
11754 pmap_footprint_suspend_internal(map, suspend);
11755 }
11756
11757 #if defined(__arm64__) && (DEVELOPMENT || DEBUG)
11758
11759 struct page_table_dump_header {
11760 uint64_t pa;
11761 uint64_t num_entries;
11762 uint64_t start_va;
11763 uint64_t end_va;
11764 };
11765
11766 static size_t
11767 pmap_dump_page_tables_recurse(pmap_t pmap,
11768 const tt_entry_t *ttp,
11769 unsigned int cur_level,
11770 uint64_t start_va,
11771 void *bufp,
11772 void *buf_end)
11773 {
11774 size_t bytes_used = 0;
11775 uint64_t num_entries = ARM_PGBYTES / sizeof(*ttp);
11776 const pt_attr_t * const pt_attr = pmap_get_pt_attr(pmap);
11777
11778 uint64_t size = pt_attr->pta_level_info[cur_level].size;
11779 uint64_t valid_mask = pt_attr->pta_level_info[cur_level].valid_mask;
11780 uint64_t type_mask = pt_attr->pta_level_info[cur_level].type_mask;
11781 uint64_t type_block = pt_attr->pta_level_info[cur_level].type_block;
11782
11783 if (cur_level == arm64_root_pgtable_level) {
11784 num_entries = arm64_root_pgtable_num_ttes;
11785 }
11786
11787 uint64_t tt_size = num_entries * sizeof(tt_entry_t);
11788 const tt_entry_t *tt_end = &ttp[num_entries];
11789
11790 if (((vm_offset_t)buf_end - (vm_offset_t)bufp) < (tt_size + sizeof(struct page_table_dump_header))) {
11791 return 0;
11792 }
11793
11794 struct page_table_dump_header *header = (struct page_table_dump_header*)bufp;
11795 header->pa = ml_static_vtop((vm_offset_t)ttp);
11796 header->num_entries = num_entries;
11797 header->start_va = start_va;
11798 header->end_va = start_va + (num_entries * size);
11799
11800 bcopy(ttp, (uint8_t*)bufp + sizeof(*header), tt_size);
11801 bytes_used += (sizeof(*header) + tt_size);
11802 uint64_t current_va = start_va;
11803
11804 for (const tt_entry_t *ttep = ttp; ttep < tt_end; ttep++, current_va += size) {
11805 tt_entry_t tte = *ttep;
11806
11807 if (!(tte & valid_mask)) {
11808 continue;
11809 }
11810
11811 if ((tte & type_mask) == type_block) {
11812 continue;
11813 } else {
11814 if (cur_level >= PMAP_TT_MAX_LEVEL) {
11815 panic("%s: corrupt entry %#llx at %p, "
11816 "ttp=%p, cur_level=%u, bufp=%p, buf_end=%p",
11817 __FUNCTION__, tte, ttep,
11818 ttp, cur_level, bufp, buf_end);
11819 }
11820
11821 const tt_entry_t *next_tt = (const tt_entry_t*)phystokv(tte & ARM_TTE_TABLE_MASK);
11822
11823 size_t recurse_result = pmap_dump_page_tables_recurse(pmap, next_tt, cur_level + 1, current_va, (uint8_t*)bufp + bytes_used, buf_end);
11824
11825 if (recurse_result == 0) {
11826 return 0;
11827 }
11828
11829 bytes_used += recurse_result;
11830 }
11831 }
11832
11833 return bytes_used;
11834 }
11835
11836 size_t
11837 pmap_dump_page_tables(pmap_t pmap, void *bufp, void *buf_end)
11838 {
11839 if (not_in_kdp) {
11840 panic("pmap_dump_page_tables must only be called from kernel debugger context");
11841 }
11842 return pmap_dump_page_tables_recurse(pmap, pmap->tte, arm64_root_pgtable_level, pmap->min, bufp, buf_end);
11843 }
11844
11845 #else /* defined(__arm64__) && (DEVELOPMENT || DEBUG) */
11846
11847 size_t
11848 pmap_dump_page_tables(pmap_t pmap __unused, void *bufp __unused, void *buf_end __unused)
11849 {
11850 return (size_t)-1;
11851 }
11852
11853 #endif /* !defined(__arm64__) */